add LLM evaluator example

- runs on iOS and macOS - downloads a model / tokenizer from hugging face - evaluates the given prompt
2024-03-01 16:10:00 -08:00
parent 79e0620891
commit b41f14fba7
11 changed files with 645 additions and 4 deletions
--- a/Applications/LLMEval/Assets.xcassets/AccentColor.colorset/Contents.json
+++ b/Applications/LLMEval/Assets.xcassets/AccentColor.colorset/Contents.json
@@ -0,0 +1,11 @@
+{
+  "colors" : [
+    {
+      "idiom" : "universal"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/Applications/LLMEval/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/Applications/LLMEval/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -0,0 +1,63 @@
+{
+  "images" : [
+    {
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "512x512"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "512x512"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/Applications/LLMEval/Assets.xcassets/Contents.json
+++ b/Applications/LLMEval/Assets.xcassets/Contents.json
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/Applications/LLMEval/ContentView.swift
+++ b/Applications/LLMEval/ContentView.swift
@@ -0,0 +1,121 @@
+// Copyright © 2024 Apple Inc.
+
+import LLM
+import MLX
+import SwiftUI
+import Tokenizers
+import Metal
+
+struct ContentView: View {
+
+    @State var prompt = "compare python and swift"
+    @State var llm = LLMEvaluator()
+
+    var body: some View {
+        VStack {
+            ScrollView(.vertical) {
+                if llm.running {
+                    ProgressView()
+                }
+                Text(llm.output)
+            }
+            HStack {
+                TextField("prompt", text: $prompt)
+                    .onSubmit(generate)
+                    .disabled(llm.running)
+                Button("generate", action: generate)
+                    .disabled(llm.running)
+            }
+        }
+        .padding()
+        .task {
+            _ = try? await llm.load()
+        }
+    }
+
+    private func generate() {
+        Task {
+            await llm.generate(prompt: prompt)
+        }
+    }
+}
+
+@Observable
+class LLMEvaluator {
+
+    @MainActor
+    var running = false
+
+    var output = ""
+
+    let modelConfiguration = ModelConfiguration.phi4bit
+    let temperature: Float = 0.0
+    let maxTokens = 100
+
+    enum LoadState {
+        case idle
+        case loaded(LLMModel, LLM.Tokenizer)
+    }
+
+    var loadState = LoadState.idle
+
+    func load() async throws -> (LLMModel, LLM.Tokenizer) {
+        switch loadState {
+        case .idle:
+            let (model, tokenizer) = try await LLM.load(configuration: modelConfiguration) { [modelConfiguration] progress in
+                DispatchQueue.main.sync {
+                    self.output = "Downloading \(modelConfiguration.id): \(Int(progress.fractionCompleted * 100))%"
+                }
+            }
+            loadState = .loaded(model, tokenizer)
+            return (model, tokenizer)
+
+        case .loaded(let model, let tokenizer):
+            return (model, tokenizer)
+        }
+    }
+
+    func generate(prompt: String) async {
+        do {
+            let (model, tokenizer) = try await load()
+
+            await MainActor.run {
+                running = true
+                self.output = ""
+            }
+
+            let prompt = modelConfiguration.prepare(prompt: prompt)
+            let promptTokens = MLXArray(tokenizer.encode(text: prompt))
+
+            var outputTokens = [Int]()
+
+            for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
+                let tokenId = token.item(Int.self)
+
+                if tokenId == tokenizer.unknownTokenId {
+                    break
+                }
+
+                outputTokens.append(tokenId)
+                let text = tokenizer.decode(tokens: outputTokens)
+                await MainActor.run {
+                    self.output = text
+                }
+
+                if outputTokens.count == maxTokens {
+                    break
+                }
+            }
+
+            await MainActor.run {
+                running = false
+            }
+
+        } catch {
+            await MainActor.run {
+                running = false
+                output = "Failed: \(error)"
+            }
+        }
+    }
+}
--- a/Applications/LLMEval/LLMEval.entitlements
+++ b/Applications/LLMEval/LLMEval.entitlements
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>com.apple.developer.kernel.increased-memory-limit</key>
+	<true/>
+	<key>com.apple.security.app-sandbox</key>
+	<true/>
+	<key>com.apple.security.device.usb</key>
+	<true/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
+	<key>com.apple.security.network.client</key>
+	<true/>
+</dict>
+</plist>
--- a/Applications/LLMEval/LLMEvalApp.swift
+++ b/Applications/LLMEval/LLMEvalApp.swift
@@ -0,0 +1,12 @@
+// Copyright © 2024 Apple Inc.
+
+import SwiftUI
+
+@main
+struct LLMEvalApp: App {
+    var body: some Scene {
+        WindowGroup {
+            ContentView()
+        }
+    }
+}
--- a/Assets.xcassets/Contents.json
+++ b/Assets.xcassets/Contents.json
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/Applications/LLMEval/README.md
+++ b/Applications/LLMEval/README.md
@@ -0,0 +1,35 @@
+#  LLMEval
+
+An example that:
+
+- downloads a huggingface model (phi-2) and tokenizer
+- evaluates a prompt
+- displays the output as it generates text
+
+> Note: this _must_ be built Release, otherwise you will encounter
+stack overflows.
+
+You will need to set the Team on the LLMEval target in order to build and
+run on iOS.
+
+Some notes about the setup:
+
+- this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
+- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
+- The Phi2 4 bit model is small enough to run on some iPhone models
+
+### Troubleshooting
+
+If the program crashes with a very deep stack trace you may need to build
+in Release configuration.  This seems to depend on the size of the model.
+
+There are a couple options:
+
+- build Release
+- force the model evaluation to run on the main thread, e.g. using @MainActor
+- build `Cmlx` with optimizations by modifying `mlx/Package.swift` and adding `.unsafeOptions(["-O3"]),` around line 87
+
+Building in Release / optimizations will remove a lot of tail calls in the C++ 
+layer.  These lead to the stack overflows.
+
+See discussion here: https://github.com/ml-explore/mlx-swift-examples/issues/3