InferenceAdapter wraps inference.TextModel (iter.Seq[Token]) to satisfy ml.Backend (string returns) and ml.StreamingBackend (TokenCallback). - adapter.go: InferenceAdapter with Generate/Chat/Stream/Close - adapter_test.go: 13 test cases with mock TextModel (all pass) - backend_mlx.go: rewritten from 253 LOC to ~35 LOC using go-inference - go.mod: add forge.lthn.ai/core/go-inference dependency - TODO.md: mark Phase 1 steps 1.1-1.3 complete Co-Authored-By: Virgil <virgil@lethean.io>
38 lines
1 KiB
Go
38 lines
1 KiB
Go
// SPDX-Licence-Identifier: EUPL-1.2
|
|
|
|
//go:build darwin && arm64
|
|
|
|
package ml
|
|
|
|
import (
|
|
"fmt"
|
|
"log/slog"
|
|
|
|
"forge.lthn.ai/core/go-inference"
|
|
_ "forge.lthn.ai/core/go-mlx" // registers "metal" backend via init()
|
|
)
|
|
|
|
// NewMLXBackend loads a model via go-inference's Metal backend and wraps it
|
|
// in an InferenceAdapter for use as ml.Backend/StreamingBackend.
|
|
//
|
|
// The blank import of go-mlx registers the "metal" backend, so
|
|
// inference.LoadModel() will automatically use Metal on Apple Silicon.
|
|
//
|
|
// Load options (context length, etc.) are forwarded directly to go-inference.
|
|
func NewMLXBackend(modelPath string, loadOpts ...inference.LoadOption) (*InferenceAdapter, error) {
|
|
slog.Info("mlx: loading model via go-inference", "path", modelPath)
|
|
|
|
m, err := inference.LoadModel(modelPath, loadOpts...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("mlx: %w", err)
|
|
}
|
|
|
|
info := m.Info()
|
|
slog.Info("mlx: model loaded",
|
|
"arch", info.Architecture,
|
|
"layers", info.NumLayers,
|
|
"quant", info.QuantBits,
|
|
)
|
|
|
|
return NewInferenceAdapter(m, "mlx"), nil
|
|
}
|