handle partially quantized models (#76)

* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
2024-05-28 16:35:11 -07:00
parent 65f4968e5f
commit 9d74afd119
12 changed files with 139 additions and 67 deletions
--- a/Tools/llm-tool/LoraCommands.swift
+++ b/Tools/llm-tool/LoraCommands.swift
@@ -275,7 +275,8 @@ struct LoRAEvalCommand: AsyncParsableCommand {

        // generate and print the result
        let _ = await generate.generate(
-            promptTokens: promptTokens, model: model, tokenizer: tokenizer)
+            promptTokens: promptTokens, model: model, tokenizer: tokenizer,
+            extraEOSTokens: modelConfiguration.extraEOSTokens)
        print()
    }
 }