update documentation
This commit is contained in:
@@ -13,12 +13,14 @@ struct ContentView: View {
|
|||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
VStack {
|
VStack {
|
||||||
|
// show the model output
|
||||||
ScrollView(.vertical) {
|
ScrollView(.vertical) {
|
||||||
if llm.running {
|
if llm.running {
|
||||||
ProgressView()
|
ProgressView()
|
||||||
}
|
}
|
||||||
Text(llm.output)
|
Text(llm.output)
|
||||||
}
|
}
|
||||||
|
|
||||||
HStack {
|
HStack {
|
||||||
TextField("prompt", text: $prompt)
|
TextField("prompt", text: $prompt)
|
||||||
.onSubmit(generate)
|
.onSubmit(generate)
|
||||||
@@ -29,6 +31,7 @@ struct ContentView: View {
|
|||||||
}
|
}
|
||||||
.padding()
|
.padding()
|
||||||
.task {
|
.task {
|
||||||
|
// pre-load the weights on launch to speed up the first generation
|
||||||
_ = try? await llm.load()
|
_ = try? await llm.load()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -48,7 +51,11 @@ class LLMEvaluator {
|
|||||||
|
|
||||||
var output = ""
|
var output = ""
|
||||||
|
|
||||||
|
/// this controls which model loads -- phi4bit is one of the smaller ones so this will fit on
|
||||||
|
/// more devices
|
||||||
let modelConfiguration = ModelConfiguration.phi4bit
|
let modelConfiguration = ModelConfiguration.phi4bit
|
||||||
|
|
||||||
|
/// parameters controlling the output
|
||||||
let temperature: Float = 0.0
|
let temperature: Float = 0.0
|
||||||
let maxTokens = 100
|
let maxTokens = 100
|
||||||
|
|
||||||
@@ -59,6 +66,8 @@ class LLMEvaluator {
|
|||||||
|
|
||||||
var loadState = LoadState.idle
|
var loadState = LoadState.idle
|
||||||
|
|
||||||
|
/// load and return the model -- can be called multiple times, subsequent calls will
|
||||||
|
/// just return the loaded model
|
||||||
func load() async throws -> (LLMModel, LLM.Tokenizer) {
|
func load() async throws -> (LLMModel, LLM.Tokenizer) {
|
||||||
switch loadState {
|
switch loadState {
|
||||||
case .idle:
|
case .idle:
|
||||||
@@ -86,6 +95,7 @@ class LLMEvaluator {
|
|||||||
self.output = ""
|
self.output = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// augment the prompt as needed
|
||||||
let prompt = modelConfiguration.prepare(prompt: prompt)
|
let prompt = modelConfiguration.prepare(prompt: prompt)
|
||||||
let promptTokens = MLXArray(tokenizer.encode(text: prompt))
|
let promptTokens = MLXArray(tokenizer.encode(text: prompt))
|
||||||
|
|
||||||
@@ -94,12 +104,14 @@ class LLMEvaluator {
|
|||||||
for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
|
for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
|
||||||
let tokenId = token.item(Int.self)
|
let tokenId = token.item(Int.self)
|
||||||
|
|
||||||
if tokenId == tokenizer.unknownTokenId {
|
if tokenId == tokenizer.unknownTokenId || tokenId == tokenizer.eosTokenId {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
outputTokens.append(tokenId)
|
outputTokens.append(tokenId)
|
||||||
let text = tokenizer.decode(tokens: outputTokens)
|
let text = tokenizer.decode(tokens: outputTokens)
|
||||||
|
|
||||||
|
// update the output -- this will make the view show the text as it generates
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
self.output = text
|
self.output = text
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ Some notes about the setup:
|
|||||||
- this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
|
- this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
|
||||||
- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
|
- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
|
||||||
- The Phi2 4 bit model is small enough to run on some iPhone models
|
- The Phi2 4 bit model is small enough to run on some iPhone models
|
||||||
|
- this can be changed by editing `let modelConfiguration = ModelConfiguration.phi4bit`
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user