handle partially quantized models (#76)
* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
This commit is contained in:
@@ -10,7 +10,7 @@ import Tokenizers
|
||||
|
||||
struct ContentView: View {
|
||||
|
||||
@State var prompt = "compare python and swift"
|
||||
@State var prompt = ""
|
||||
@State var llm = LLMEvaluator()
|
||||
@Environment(DeviceStat.self) private var deviceStat
|
||||
|
||||
@@ -125,6 +125,8 @@ struct ContentView: View {
|
||||
|
||||
}
|
||||
.task {
|
||||
self.prompt = llm.modelConfiguration.defaultPrompt
|
||||
|
||||
// pre-load the weights on launch to speed up the first generation
|
||||
_ = try? await llm.load()
|
||||
}
|
||||
@@ -224,7 +226,7 @@ class LLMEvaluator {
|
||||
|
||||
let result = await LLM.generate(
|
||||
promptTokens: promptTokens, parameters: generateParameters, model: model,
|
||||
tokenizer: tokenizer
|
||||
tokenizer: tokenizer, extraEOSTokens: modelConfiguration.extraEOSTokens
|
||||
) { tokens in
|
||||
// update the output -- this will make the view show the text as it generates
|
||||
if tokens.count % displayEveryNTokens == 0 {
|
||||
|
||||
@@ -266,6 +266,7 @@ class LoRAEvaluator {
|
||||
let result = await LLM.generate(
|
||||
promptTokens: promptTokens, parameters: generateParameters, model: model,
|
||||
tokenizer: tokenizer,
|
||||
extraEOSTokens: modelConfiguration.extraEOSTokens,
|
||||
didGenerate: { tokens in
|
||||
if tokens.count % evaluateShowEvery == 0 {
|
||||
let fullOutput = tokenizer.decode(tokens: tokens)
|
||||
|
||||
Reference in New Issue
Block a user