From 7b746cb89ca289ee4c1f9b32e491aea9dcb0afd8 Mon Sep 17 00:00:00 2001 From: David Koski Date: Fri, 1 Mar 2024 23:27:03 -0800 Subject: [PATCH] allow alternate location for tokenizer --- Libraries/LLM/Models.swift | 7 ++++++- Libraries/LLM/Tokenizer.swift | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Libraries/LLM/Models.swift b/Libraries/LLM/Models.swift index a478e43..5564c4a 100644 --- a/Libraries/LLM/Models.swift +++ b/Libraries/LLM/Models.swift @@ -11,6 +11,9 @@ import Foundation public struct ModelConfiguration { public let id: String + /// pull the tokenizer from an alternate id + public let tokenizerId: String? + /// overrides for TokenizerModel/knownTokenizers -- useful before swift-transformers is updated public let overrideTokenizer: String? @@ -20,9 +23,11 @@ public struct ModelConfiguration { private let preparePrompt: ((String) -> String)? public init( - id: String, overrideTokenizer: String? = nil, preparePrompt: ((String) -> String)? = nil + id: String, tokenizerId: String? = nil, overrideTokenizer: String? = nil, + preparePrompt: ((String) -> String)? = nil ) { self.id = id + self.tokenizerId = tokenizerId self.overrideTokenizer = overrideTokenizer self.preparePrompt = preparePrompt } diff --git a/Libraries/LLM/Tokenizer.swift b/Libraries/LLM/Tokenizer.swift index e8c6446..ce8b291 100644 --- a/Libraries/LLM/Tokenizer.swift +++ b/Libraries/LLM/Tokenizer.swift @@ -51,7 +51,8 @@ public struct Tokenizer: Tokenizers.Tokenizer { public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tokenizer { // from AutoTokenizer.from() -- this lets us override parts of the configuration - let config = LanguageModelConfigurationFromHub(modelName: configuration.id) + let config = LanguageModelConfigurationFromHub( + modelName: configuration.tokenizerId ?? configuration.id) guard var tokenizerConfig = try await config.tokenizerConfig else { throw LLMError(message: "missing config") }