handle partially quantized models (#76)

* handle partially quantized models

- fix for #53 #71 #69 #74
- in order to test the models
	- I added a default prompt of an appropriate form
	- while working on the model configuration also added additional stop tokens (#74)
- fixed the repetitionPenalty code (#71)
This commit is contained in:
David Koski
2024-05-28 16:35:11 -07:00
committed by GitHub
parent 65f4968e5f
commit 9d74afd119
12 changed files with 139 additions and 67 deletions

View File

@@ -10,7 +10,7 @@ import Tokenizers
struct ContentView: View {
@State var prompt = "compare python and swift"
@State var prompt = ""
@State var llm = LLMEvaluator()
@Environment(DeviceStat.self) private var deviceStat
@@ -125,6 +125,8 @@ struct ContentView: View {
}
.task {
self.prompt = llm.modelConfiguration.defaultPrompt
// pre-load the weights on launch to speed up the first generation
_ = try? await llm.load()
}
@@ -224,7 +226,7 @@ class LLMEvaluator {
let result = await LLM.generate(
promptTokens: promptTokens, parameters: generateParameters, model: model,
tokenizer: tokenizer
tokenizer: tokenizer, extraEOSTokens: modelConfiguration.extraEOSTokens
) { tokens in
// update the output -- this will make the view show the text as it generates
if tokens.count % displayEveryNTokens == 0 {