diff --git a/Applications/LLMEval/ContentView.swift b/Applications/LLMEval/ContentView.swift index 806264e..176c1ff 100644 --- a/Applications/LLMEval/ContentView.swift +++ b/Applications/LLMEval/ContentView.swift @@ -13,12 +13,14 @@ struct ContentView: View { var body: some View { VStack { + // show the model output ScrollView(.vertical) { if llm.running { ProgressView() } Text(llm.output) } + HStack { TextField("prompt", text: $prompt) .onSubmit(generate) @@ -29,6 +31,7 @@ struct ContentView: View { } .padding() .task { + // pre-load the weights on launch to speed up the first generation _ = try? await llm.load() } } @@ -48,7 +51,11 @@ class LLMEvaluator { var output = "" + /// this controls which model loads -- phi4bit is one of the smaller ones so this will fit on + /// more devices let modelConfiguration = ModelConfiguration.phi4bit + + /// parameters controlling the output let temperature: Float = 0.0 let maxTokens = 100 @@ -59,6 +66,8 @@ class LLMEvaluator { var loadState = LoadState.idle + /// load and return the model -- can be called multiple times, subsequent calls will + /// just return the loaded model func load() async throws -> (LLMModel, LLM.Tokenizer) { switch loadState { case .idle: @@ -86,6 +95,7 @@ class LLMEvaluator { self.output = "" } + // augment the prompt as needed let prompt = modelConfiguration.prepare(prompt: prompt) let promptTokens = MLXArray(tokenizer.encode(text: prompt)) @@ -94,12 +104,14 @@ class LLMEvaluator { for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) { let tokenId = token.item(Int.self) - if tokenId == tokenizer.unknownTokenId { + if tokenId == tokenizer.unknownTokenId || tokenId == tokenizer.eosTokenId { break } outputTokens.append(tokenId) let text = tokenizer.decode(tokens: outputTokens) + + // update the output -- this will make the view show the text as it generates await MainActor.run { self.output = text } diff --git a/Applications/LLMEval/README.md b/Applications/LLMEval/README.md index ab7243c..2735b1a 100644 --- a/Applications/LLMEval/README.md +++ b/Applications/LLMEval/README.md @@ -17,6 +17,7 @@ Some notes about the setup: - this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox - LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory - The Phi2 4 bit model is small enough to run on some iPhone models + - this can be changed by editing `let modelConfiguration = ModelConfiguration.phi4bit` ### Troubleshooting