llm improvements

- document the tokenizer used (https://github.com/huggingface/swift-transformers)
- provide a hook for tokenizer configuration, prompt augmentation
	- this isn't as rich as the python equivalents but it helps a little
This commit is contained in:
David Koski
2024-03-01 14:46:32 -08:00
parent 599661774a
commit 82f6a969d4
8 changed files with 250 additions and 22 deletions

View File

@@ -49,9 +49,9 @@ public struct Tokenizer: Tokenizers.Tokenizer {
}
public func loadTokenizer(name: String) async throws -> Tokenizer {
public func loadTokenizer(configuration: ModelConfiguration) async throws -> Tokenizer {
// from AutoTokenizer.from() -- this lets us override parts of the configuration
let config = LanguageModelConfigurationFromHub(modelName: name)
let config = LanguageModelConfigurationFromHub(modelName: configuration.id)
guard var tokenizerConfig = try await config.tokenizerConfig else {
throw LLMError(message: "missing config")
}