update documentation

This commit is contained in:
David Koski
2024-03-01 16:33:49 -08:00
parent 807c8136c0
commit 0374e4b073
2 changed files with 14 additions and 1 deletions

View File

@@ -13,12 +13,14 @@ struct ContentView: View {
var body: some View { var body: some View {
VStack { VStack {
// show the model output
ScrollView(.vertical) { ScrollView(.vertical) {
if llm.running { if llm.running {
ProgressView() ProgressView()
} }
Text(llm.output) Text(llm.output)
} }
HStack { HStack {
TextField("prompt", text: $prompt) TextField("prompt", text: $prompt)
.onSubmit(generate) .onSubmit(generate)
@@ -29,6 +31,7 @@ struct ContentView: View {
} }
.padding() .padding()
.task { .task {
// pre-load the weights on launch to speed up the first generation
_ = try? await llm.load() _ = try? await llm.load()
} }
} }
@@ -48,7 +51,11 @@ class LLMEvaluator {
var output = "" var output = ""
/// this controls which model loads -- phi4bit is one of the smaller ones so this will fit on
/// more devices
let modelConfiguration = ModelConfiguration.phi4bit let modelConfiguration = ModelConfiguration.phi4bit
/// parameters controlling the output
let temperature: Float = 0.0 let temperature: Float = 0.0
let maxTokens = 100 let maxTokens = 100
@@ -59,6 +66,8 @@ class LLMEvaluator {
var loadState = LoadState.idle var loadState = LoadState.idle
/// load and return the model -- can be called multiple times, subsequent calls will
/// just return the loaded model
func load() async throws -> (LLMModel, LLM.Tokenizer) { func load() async throws -> (LLMModel, LLM.Tokenizer) {
switch loadState { switch loadState {
case .idle: case .idle:
@@ -86,6 +95,7 @@ class LLMEvaluator {
self.output = "" self.output = ""
} }
// augment the prompt as needed
let prompt = modelConfiguration.prepare(prompt: prompt) let prompt = modelConfiguration.prepare(prompt: prompt)
let promptTokens = MLXArray(tokenizer.encode(text: prompt)) let promptTokens = MLXArray(tokenizer.encode(text: prompt))
@@ -94,12 +104,14 @@ class LLMEvaluator {
for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) { for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
let tokenId = token.item(Int.self) let tokenId = token.item(Int.self)
if tokenId == tokenizer.unknownTokenId { if tokenId == tokenizer.unknownTokenId || tokenId == tokenizer.eosTokenId {
break break
} }
outputTokens.append(tokenId) outputTokens.append(tokenId)
let text = tokenizer.decode(tokens: outputTokens) let text = tokenizer.decode(tokens: outputTokens)
// update the output -- this will make the view show the text as it generates
await MainActor.run { await MainActor.run {
self.output = text self.output = text
} }

View File

@@ -17,6 +17,7 @@ Some notes about the setup:
- this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox - this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory - LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
- The Phi2 4 bit model is small enough to run on some iPhone models - The Phi2 4 bit model is small enough to run on some iPhone models
- this can be changed by editing `let modelConfiguration = ModelConfiguration.phi4bit`
### Troubleshooting ### Troubleshooting