Files
mlx-swift-examples/Libraries/LLM/Tokenizer.swift
Pedro Cuenca 6c270a9d12 Remove Tokenizer wrapper (#28)
* Remove Tokenizer wrapper

Depends on https://github.com/huggingface/swift-transformers/pull/70

* Update swift-transformers revision
2024-03-18 09:07:52 -07:00

33 lines
1.1 KiB
Swift

// Copyright © 2024 Apple Inc.
import Foundation
import Hub
import Tokenizers
public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tokenizer {
// from AutoTokenizer.from() -- this lets us override parts of the configuration
let config = LanguageModelConfigurationFromHub(
modelName: configuration.tokenizerId ?? configuration.id)
guard var tokenizerConfig = try await config.tokenizerConfig else {
throw LLMError(message: "missing config")
}
let tokenizerData = try await config.tokenizerData
// workaround: replacement tokenizers for unhandled values in swift-transform
if let tokenizerClass = tokenizerConfig.tokenizerClass?.stringValue,
let replacement = replacementTokenizers[tokenizerClass]
{
var dictionary = tokenizerConfig.dictionary
dictionary["tokenizer_class"] = replacement
tokenizerConfig = Config(dictionary)
}
return try PreTrainedTokenizer(
tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
}
/// overrides for TokenizerModel/knownTokenizers
let replacementTokenizers = [
"Qwen2Tokenizer": "PreTrainedTokenizer"
]