handle partially quantized models (#76)
* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
This commit is contained in:
@@ -12,4 +12,15 @@ public protocol LLMModel: Module {
|
||||
func callAsFunction(_ inputs: MLXArray, cache: [(MLXArray, MLXArray)]?) -> (
|
||||
MLXArray, [(MLXArray, MLXArray)]
|
||||
)
|
||||
|
||||
/// Optionally preprocess the weights and modify / remove values as needed.
|
||||
func sanitize(weights: [String: MLXArray]) -> [String: MLXArray]
|
||||
}
|
||||
|
||||
extension LLMModel {
|
||||
|
||||
public func sanitize(weights: [String: MLXArray]) -> [String: MLXArray] {
|
||||
weights
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user