1. Error handler thread safety: last_mlx_error now uses _Atomic(const char*) with atomic_store_explicit/atomic_exchange_explicit (release/acquire). 2. macOS version minimum: -mmacosx-version-min changed from 26.0 to 13.3 (MLX's own minimum), no longer locks out macOS 14/15 users. 3. LoadOption applied in metalBackend.LoadModel(): calls ApplyLoadOpts(), passes ContextLen through to Model which replaces unbounded KVCache with RotatingKVCache when set. GPULayers=0 logs a warning. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
29 lines
699 B
Go
29 lines
699 B
Go
//go:build darwin && arm64
|
|
|
|
package metal
|
|
|
|
import "fmt"
|
|
|
|
// LoadConfig holds configuration applied during model loading.
|
|
type LoadConfig struct {
|
|
ContextLen int // Context window size (0 = model default, unbounded KV cache)
|
|
}
|
|
|
|
// LoadAndInit initialises Metal and loads a model from the given path.
|
|
// Returns a *Model ready for generation.
|
|
func LoadAndInit(path string, cfg ...LoadConfig) (*Model, error) {
|
|
Init()
|
|
im, err := loadModel(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("metal: %w", err)
|
|
}
|
|
m := &Model{
|
|
model: im,
|
|
tokenizer: im.Tokenizer(),
|
|
modelType: im.ModelType(),
|
|
}
|
|
if len(cfg) > 0 && cfg[0].ContextLen > 0 {
|
|
m.contextLen = cfg[0].ContextLen
|
|
}
|
|
return m, nil
|
|
}
|