fix for #2 -- CodeLlama crashes

- add replacement tokenizer class for unknown tokenizers - fix quantization for models that don't have lm_head quantized Requires https://github.com/ml-explore/mlx-swift/pull/28
2024-02-26 10:38:05 -08:00
parent 8870b0d386
commit bb7bacc077
5 changed files with 80 additions and 54 deletions
--- a/Libraries/LLM/LLMModel.swift
+++ b/Libraries/LLM/LLMModel.swift
@@ -6,6 +6,9 @@ import MLXNN

 // Interface for all LLM Models
 public protocol LLMModel: Module {
+
+    var vocabularySize: Int { get }
+
    func callAsFunction(_ inputs: MLXArray, cache: [(MLXArray, MLXArray)]?) -> (
        MLXArray, [(MLXArray, MLXArray)]
    )