fix for #2 -- CodeLlama crashes

- add replacement tokenizer class for unknown tokenizers - fix quantization for models that don't have lm_head quantized Requires https://github.com/ml-explore/mlx-swift/pull/28
2024-02-26 10:38:05 -08:00
parent 8870b0d386
commit bb7bacc077
5 changed files with 80 additions and 54 deletions
--- a/Libraries/LLM/Llama.swift
+++ b/Libraries/LLM/Llama.swift
@@ -187,11 +187,13 @@ public class LlamaModelInner: Module {

 public class LlamaModel: Module, LLMModel {

+    public let vocabularySize: Int
    let model: LlamaModelInner

    @ModuleInfo(key: "lm_head") var lmHead: Linear

    public init(_ args: LlamaConfiguration) {
+        self.vocabularySize = args.vocabularySize
        self.model = LlamaModelInner(args)
        self._lmHead.wrappedValue = Linear(args.hiddenSize, args.vocabularySize, bias: false)
    }