diff --git a/Libraries/LLM/Tokenizer.swift b/Libraries/LLM/Tokenizer.swift index 100b5d2..bcd6fb1 100644 --- a/Libraries/LLM/Tokenizer.swift +++ b/Libraries/LLM/Tokenizer.swift @@ -4,51 +4,6 @@ import Foundation import Hub import Tokenizers -/// Wrapper for `Tokenizers.Tokenizer` that provides access to config -/// like ``eosToken``. -public struct Tokenizer: Tokenizers.Tokenizer { - - let tokenizer: Tokenizers.Tokenizer - - public let eosToken: String? - public let eosTokenId: Int? - - internal init(tokenizer: Tokenizers.Tokenizer, tokenizerConfig: Config) { - self.tokenizer = tokenizer - self.eosToken = tokenizerConfig.eosToken?.stringValue - if let eosToken { - self.eosTokenId = tokenizer.convertTokenToId(eosToken) - } else { - self.eosTokenId = nil - } - } - - public func tokenize(text: String) -> [String] { - tokenizer.tokenize(text: text) - } - - public func encode(text: String) -> [Int] { - tokenizer.encode(text: text) - } - - public func decode(tokens: [Int]) -> String { - tokenizer.decode(tokens: tokens) - } - - public func convertTokenToId(_ token: String) -> Int? { - tokenizer.convertTokenToId(token) - } - - public func convertIdToToken(_ id: Int) -> String? { - tokenizer.convertIdToToken(id) - } - - public var unknownToken: String? { tokenizer.unknownToken } - - public var unknownTokenId: Int? { tokenizer.unknownTokenId } - -} - public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tokenizer { // from AutoTokenizer.from() -- this lets us override parts of the configuration let config = LanguageModelConfigurationFromHub( @@ -56,7 +11,7 @@ public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tok guard var tokenizerConfig = try await config.tokenizerConfig else { throw LLMError(message: "missing config") } - var tokenizerData = try await config.tokenizerData + let tokenizerData = try await config.tokenizerData // workaround: replacement tokenizers for unhandled values in swift-transform if let tokenizerClass = tokenizerConfig.tokenizerClass?.stringValue, @@ -67,10 +22,8 @@ public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tok tokenizerConfig = Config(dictionary) } - let impl = try PreTrainedTokenizer( + return try PreTrainedTokenizer( tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData) - - return Tokenizer(tokenizer: impl, tokenizerConfig: tokenizerConfig) } /// overrides for TokenizerModel/knownTokenizers diff --git a/Tools/llm-tool/LLMTool.swift b/Tools/llm-tool/LLMTool.swift index a563a6b..98be194 100644 --- a/Tools/llm-tool/LLMTool.swift +++ b/Tools/llm-tool/LLMTool.swift @@ -5,6 +5,7 @@ import Foundation import LLM import MLX import MLXRandom +import Tokenizers @main struct LLMTool: AsyncParsableCommand { diff --git a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 75af8d9..2ce06df 100644 --- a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -60,7 +60,7 @@ "location" : "https://github.com/huggingface/swift-transformers", "state" : { "branch" : "main", - "revision" : "24605a8c0cc974bec5b94a6752eb687bae77db31" + "revision" : "3bd02269b7797ade67c15679a575cd5c6f203ce6" } } ],