add MNIST training example

2024-03-01 15:55:36 -08:00
parent 2157333905
commit 79e0620891
11 changed files with 576 additions and 1 deletions
--- a/Applications/MNISTTrainer/ContentView.swift
+++ b/Applications/MNISTTrainer/ContentView.swift
@@ -0,0 +1,116 @@
+// Copyright © 2024 Apple Inc.
+
+import MLX
+import MLXNN
+import MLXOptimizers
+import MLXRandom
+import MNIST
+import SwiftUI
+
+struct ContentView: View {
+
+    // the training loop
+    @State var trainer = Trainer()
+
+    // toggle for cpu/gpu training
+    @State var cpu = true
+
+    var body: some View {
+        VStack {
+            Spacer()
+
+            ScrollView(.vertical) {
+                ForEach(trainer.messages, id: \.self) {
+                    Text($0)
+                }
+            }
+
+            HStack {
+                Spacer()
+
+                Button("Train") {
+                    Task {
+                        try! await trainer.run(device: cpu ? .cpu : .gpu)
+                    }
+                }
+
+                Toggle("CPU", isOn: $cpu)
+                    .frame(maxWidth: 150)
+
+                Spacer()
+            }
+            Spacer()
+        }
+        .padding()
+    }
+}
+
+@Observable
+class Trainer {
+
+    var messages = [String]()
+
+    func run(device: Device = .cpu) async throws {
+        // Note: this is pretty close to the code in `mnist-tool`, just
+        // wrapped in an Observable to make it easy to display in SwiftUI
+
+        Device.setDefault(device: device)
+
+        // download & load the training data
+        let url = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
+        try await download(into: url)
+        let data = try load(from: url)
+
+        let trainImages = data[.init(.training, .images)]!
+        let trainLabels = data[.init(.training, .labels)]!
+        let testImages = data[.init(.test, .images)]!
+        let testLabels = data[.init(.test, .labels)]!
+
+        // create the model with random weights
+        let model = MLP(
+            layers: 2, inputDimensions: trainImages.dim(-1), hiddenDimensions: 32,
+            outputDimensions: 10)
+        eval(model.parameters())
+
+        // the training loop
+        let lg = valueAndGrad(model: model, loss)
+        let optimizer = SGD(learningRate: 0.1)
+
+        // using a consistent random seed so it behaves the same way each time
+        MLXRandom.seed(0)
+        var generator: RandomNumberGenerator = SplitMix64(seed: 0)
+
+        for e in 0 ..< 10 {
+            let start = Date.timeIntervalSinceReferenceDate
+
+            for (x, y) in iterateBatches(
+                batchSize: 256, x: trainImages, y: trainLabels, using: &generator)
+            {
+                // loss and gradients
+                let (_, grads) = lg(model, x, y)
+
+                // use SGD to update the weights
+                optimizer.update(model: model, gradients: grads)
+
+                // eval the parameters so the next iteration is independent
+                eval(model, optimizer)
+            }
+
+            let accuracy = eval(model: model, x: testImages, y: testLabels)
+
+            let end = Date.timeIntervalSinceReferenceDate
+
+            // add to messages -- triggers display
+            await MainActor.run {
+                messages.append(
+                    """
+                    Epoch \(e): test accuracy \(accuracy.item(Float.self).formatted())
+                    Time: \((end - start).formatted())
+
+                    """
+                )
+            }
+        }
+
+    }
+}