* handle partially quantized models - fix for #53 #71 #69 #74 - in order to test the models - I added a default prompt of an appropriate form - while working on the model configuration also added additional stop tokens (#74) - fixed the repetitionPenalty code (#71)
27 lines
587 B
Swift
27 lines
587 B
Swift
// Copyright © 2024 Apple Inc.
|
|
|
|
import Foundation
|
|
import MLX
|
|
import MLXNN
|
|
|
|
// Interface for all LLM Models
|
|
public protocol LLMModel: Module {
|
|
|
|
var vocabularySize: Int { get }
|
|
|
|
func callAsFunction(_ inputs: MLXArray, cache: [(MLXArray, MLXArray)]?) -> (
|
|
MLXArray, [(MLXArray, MLXArray)]
|
|
)
|
|
|
|
/// Optionally preprocess the weights and modify / remove values as needed.
|
|
func sanitize(weights: [String: MLXArray]) -> [String: MLXArray]
|
|
}
|
|
|
|
extension LLMModel {
|
|
|
|
public func sanitize(weights: [String: MLXArray]) -> [String: MLXArray] {
|
|
weights
|
|
}
|
|
|
|
}
|