diff --git a/Applications/LLMEval/ContentView.swift b/Applications/LLMEval/ContentView.swift
index 806264e..176c1ff 100644
--- a/Applications/LLMEval/ContentView.swift
+++ b/Applications/LLMEval/ContentView.swift
@@ -13,12 +13,14 @@ struct ContentView: View {
 
     var body: some View {
         VStack {
+            // show the model output
             ScrollView(.vertical) {
                 if llm.running {
                     ProgressView()
                 }
                 Text(llm.output)
             }
+
             HStack {
                 TextField("prompt", text: $prompt)
                     .onSubmit(generate)
@@ -29,6 +31,7 @@ struct ContentView: View {
         }
         .padding()
         .task {
+            // pre-load the weights on launch to speed up the first generation
             _ = try? await llm.load()
         }
     }
@@ -48,7 +51,11 @@ class LLMEvaluator {
 
     var output = ""
 
+    /// this controls which model loads -- phi4bit is one of the smaller ones so this will fit on
+    /// more devices
     let modelConfiguration = ModelConfiguration.phi4bit
+
+    /// parameters controlling the output
     let temperature: Float = 0.0
     let maxTokens = 100
 
@@ -59,6 +66,8 @@ class LLMEvaluator {
 
     var loadState = LoadState.idle
 
+    /// load and return the model -- can be called multiple times, subsequent calls will
+    /// just return the loaded model
     func load() async throws -> (LLMModel, LLM.Tokenizer) {
         switch loadState {
         case .idle:
@@ -86,6 +95,7 @@ class LLMEvaluator {
                 self.output = ""
             }
 
+            // augment the prompt as needed
             let prompt = modelConfiguration.prepare(prompt: prompt)
             let promptTokens = MLXArray(tokenizer.encode(text: prompt))
 
@@ -94,12 +104,14 @@ class LLMEvaluator {
             for token in TokenIterator(prompt: promptTokens, model: model, temp: temperature) {
                 let tokenId = token.item(Int.self)
 
-                if tokenId == tokenizer.unknownTokenId {
+                if tokenId == tokenizer.unknownTokenId || tokenId == tokenizer.eosTokenId {
                     break
                 }
 
                 outputTokens.append(tokenId)
                 let text = tokenizer.decode(tokens: outputTokens)
+
+                // update the output -- this will make the view show the text as it generates
                 await MainActor.run {
                     self.output = text
                 }
diff --git a/Applications/LLMEval/README.md b/Applications/LLMEval/README.md
index ab7243c..2735b1a 100644
--- a/Applications/LLMEval/README.md
+++ b/Applications/LLMEval/README.md
@@ -17,6 +17,7 @@ Some notes about the setup:
 - this downloads models from hugging face so LLMEval -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
 - LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
 - The Phi2 4 bit model is small enough to run on some iPhone models
+    - this can be changed by editing `let modelConfiguration = ModelConfiguration.phi4bit`
 
 ### Troubleshooting