update documentation

2024-03-01 16:33:49 -08:00
parent 807c8136c0
commit 0374e4b073
2 changed files with 14 additions and 1 deletions
--- a/Applications/LLMEval/ContentView.swift
+++ b/Applications/LLMEval/ContentView.swift
@@ -13,12 +13,14 @@ struct ContentView: View {
    var body: some View {
        VStack {
            // show the model output
            ScrollView(.vertical) {
                if llm.running {
                    ProgressView()
                }
                Text(llm.output)
            }
            HStack {
                TextField("prompt", text: $prompt)
                    .onSubmit(generate)
@@ -29,6 +31,7 @@ struct ContentView: View {
        }
        .padding()
        .task {
            // pre-load the weights on launch to speed up the first generation
            _ = try? await llm.load()
        }
    }
@@ -48,7 +51,11 @@ class LLMEvaluator {
    var output = ""
    /// this controls which model loads -- phi4bit is one of the smaller ones so this will fit on
    /// more devices
    let modelConfiguration = ModelConfiguration.phi4bit
    /// parameters controlling the output
    let temperature: Float = 0.0
    let maxTokens = 100
@@ -59,6 +66,8 @@ class LLMEvaluator {
    var loadState = LoadState.idle
    /// load and return the model -- can be called multiple times, subsequent calls will
    /// just return the loaded model
    func load() async throws -> (LLMModel, LLM.Tokenizer) {
        switch loadState {
        case .idle:
@@ -86,6 +95,7 @@ class LLMEvaluator {
                self.output = ""
            }
            // augment the prompt as needed
            let prompt = modelConfiguration.prepare(prompt: prompt)
            let promptTokens = MLXArray(tokenizer.encode(text: prompt))
@@ -94,12 +104,14 @@ class LLMEvaluator {
            for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
                let tokenId = token.item(Int.self)
-                if tokenId == tokenizer.unknownTokenId {
+                if tokenId == tokenizer.unknownTokenId || tokenId == tokenizer.eosTokenId {
                    break
                }
                outputTokens.append(tokenId)
                let text = tokenizer.decode(tokens: outputTokens)
                // update the output -- this will make the view show the text as it generates
                await MainActor.run {
                    self.output = text
                }
--- a/Applications/LLMEval/README.md
+++ b/Applications/LLMEval/README.md
@@ -17,6 +17,7 @@ Some notes about the setup:
 - this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
 - LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
 - The Phi2 4 bit model is small enough to run on some iPhone models
    - this can be changed by editing `let modelConfiguration = ModelConfiguration.phi4bit`
 ### Troubleshooting