add LLM evaluator example
- runs on iOS and macOS - downloads a model / tokenizer from hugging face - evaluates the given prompt
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"colors" : [
|
||||
{
|
||||
"idiom" : "universal"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"images" : [
|
||||
{
|
||||
"idiom" : "universal",
|
||||
"platform" : "ios",
|
||||
"size" : "1024x1024"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "16x16"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "16x16"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "32x32"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "32x32"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "128x128"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "128x128"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "256x256"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "256x256"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "512x512"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "512x512"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
6
Applications/LLMEval/Assets.xcassets/Contents.json
Normal file
6
Applications/LLMEval/Assets.xcassets/Contents.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
121
Applications/LLMEval/ContentView.swift
Normal file
121
Applications/LLMEval/ContentView.swift
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright © 2024 Apple Inc.
|
||||
|
||||
import LLM
|
||||
import MLX
|
||||
import SwiftUI
|
||||
import Tokenizers
|
||||
import Metal
|
||||
|
||||
struct ContentView: View {
|
||||
|
||||
@State var prompt = "compare python and swift"
|
||||
@State var llm = LLMEvaluator()
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
ScrollView(.vertical) {
|
||||
if llm.running {
|
||||
ProgressView()
|
||||
}
|
||||
Text(llm.output)
|
||||
}
|
||||
HStack {
|
||||
TextField("prompt", text: $prompt)
|
||||
.onSubmit(generate)
|
||||
.disabled(llm.running)
|
||||
Button("generate", action: generate)
|
||||
.disabled(llm.running)
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
.task {
|
||||
_ = try? await llm.load()
|
||||
}
|
||||
}
|
||||
|
||||
private func generate() {
|
||||
Task {
|
||||
await llm.generate(prompt: prompt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Observable
|
||||
class LLMEvaluator {
|
||||
|
||||
@MainActor
|
||||
var running = false
|
||||
|
||||
var output = ""
|
||||
|
||||
let modelConfiguration = ModelConfiguration.phi4bit
|
||||
let temperature: Float = 0.0
|
||||
let maxTokens = 100
|
||||
|
||||
enum LoadState {
|
||||
case idle
|
||||
case loaded(LLMModel, LLM.Tokenizer)
|
||||
}
|
||||
|
||||
var loadState = LoadState.idle
|
||||
|
||||
func load() async throws -> (LLMModel, LLM.Tokenizer) {
|
||||
switch loadState {
|
||||
case .idle:
|
||||
let (model, tokenizer) = try await LLM.load(configuration: modelConfiguration) { [modelConfiguration] progress in
|
||||
DispatchQueue.main.sync {
|
||||
self.output = "Downloading \(modelConfiguration.id): \(Int(progress.fractionCompleted * 100))%"
|
||||
}
|
||||
}
|
||||
loadState = .loaded(model, tokenizer)
|
||||
return (model, tokenizer)
|
||||
|
||||
case .loaded(let model, let tokenizer):
|
||||
return (model, tokenizer)
|
||||
}
|
||||
}
|
||||
|
||||
func generate(prompt: String) async {
|
||||
do {
|
||||
let (model, tokenizer) = try await load()
|
||||
|
||||
await MainActor.run {
|
||||
running = true
|
||||
self.output = ""
|
||||
}
|
||||
|
||||
let prompt = modelConfiguration.prepare(prompt: prompt)
|
||||
let promptTokens = MLXArray(tokenizer.encode(text: prompt))
|
||||
|
||||
var outputTokens = [Int]()
|
||||
|
||||
for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
|
||||
let tokenId = token.item(Int.self)
|
||||
|
||||
if tokenId == tokenizer.unknownTokenId {
|
||||
break
|
||||
}
|
||||
|
||||
outputTokens.append(tokenId)
|
||||
let text = tokenizer.decode(tokens: outputTokens)
|
||||
await MainActor.run {
|
||||
self.output = text
|
||||
}
|
||||
|
||||
if outputTokens.count == maxTokens {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
running = false
|
||||
}
|
||||
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
running = false
|
||||
output = "Failed: \(error)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
16
Applications/LLMEval/LLMEval.entitlements
Normal file
16
Applications/LLMEval/LLMEval.entitlements
Normal file
@@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.developer.kernel.increased-memory-limit</key>
|
||||
<true/>
|
||||
<key>com.apple.security.app-sandbox</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.usb</key>
|
||||
<true/>
|
||||
<key>com.apple.security.files.user-selected.read-only</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.client</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
12
Applications/LLMEval/LLMEvalApp.swift
Normal file
12
Applications/LLMEval/LLMEvalApp.swift
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright © 2024 Apple Inc.
|
||||
|
||||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct LLMEvalApp: App {
|
||||
var body: some Scene {
|
||||
WindowGroup {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
35
Applications/LLMEval/README.md
Normal file
35
Applications/LLMEval/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# LLMEval
|
||||
|
||||
An example that:
|
||||
|
||||
- downloads a huggingface model (phi-2) and tokenizer
|
||||
- evaluates a prompt
|
||||
- displays the output as it generates text
|
||||
|
||||
> Note: this _must_ be built Release, otherwise you will encounter
|
||||
stack overflows.
|
||||
|
||||
You will need to set the Team on the LLMEval target in order to build and
|
||||
run on iOS.
|
||||
|
||||
Some notes about the setup:
|
||||
|
||||
- this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
|
||||
- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
|
||||
- The Phi2 4 bit model is small enough to run on some iPhone models
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If the program crashes with a very deep stack trace you may need to build
|
||||
in Release configuration. This seems to depend on the size of the model.
|
||||
|
||||
There are a couple options:
|
||||
|
||||
- build Release
|
||||
- force the model evaluation to run on the main thread, e.g. using @MainActor
|
||||
- build `Cmlx` with optimizations by modifying `mlx/Package.swift` and adding `.unsafeOptions(["-O3"]),` around line 87
|
||||
|
||||
Building in Release / optimizations will remove a lot of tail calls in the C++
|
||||
layer. These lead to the stack overflows.
|
||||
|
||||
See discussion here: https://github.com/ml-explore/mlx-swift-examples/issues/3
|
||||
Reference in New Issue
Block a user