handle partially quantized models (#76)

* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
2024-05-28 16:35:11 -07:00
parent 65f4968e5f
commit 9d74afd119
12 changed files with 139 additions and 67 deletions
--- a/Applications/LLMEval/ContentView.swift
+++ b/Applications/LLMEval/ContentView.swift
@@ -10,7 +10,7 @@ import Tokenizers

 struct ContentView: View {

-    @State var prompt = "compare python and swift"
+    @State var prompt = ""
    @State var llm = LLMEvaluator()
    @Environment(DeviceStat.self) private var deviceStat

@@ -125,6 +125,8 @@ struct ContentView: View {

        }
        .task {
+            self.prompt = llm.modelConfiguration.defaultPrompt
+
            // pre-load the weights on launch to speed up the first generation
            _ = try? await llm.load()
        }
@@ -224,7 +226,7 @@ class LLMEvaluator {

            let result = await LLM.generate(
                promptTokens: promptTokens, parameters: generateParameters, model: model,
-                tokenizer: tokenizer
+                tokenizer: tokenizer, extraEOSTokens: modelConfiguration.extraEOSTokens
            ) { tokens in
                // update the output -- this will make the view show the text as it generates
                if tokens.count % displayEveryNTokens == 0 {