handle partially quantized models (#76)

* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
2024-05-28 16:35:11 -07:00
parent 65f4968e5f
commit 9d74afd119
12 changed files with 139 additions and 67 deletions
--- a/Applications/LoRATrainingExample/ContentView.swift
+++ b/Applications/LoRATrainingExample/ContentView.swift
@@ -266,6 +266,7 @@ class LoRAEvaluator {
        let result = await LLM.generate(
            promptTokens: promptTokens, parameters: generateParameters, model: model,
            tokenizer: tokenizer,
+            extraEOSTokens: modelConfiguration.extraEOSTokens,
            didGenerate: { tokens in
                if tokens.count % evaluateShowEvery == 0 {
                    let fullOutput = tokenizer.decode(tokens: tokens)