go-ml/backend_mlx.go
Snider a4d7686147 feat(adapter): bridge go-inference TextModel to ml.Backend/StreamingBackend
InferenceAdapter wraps inference.TextModel (iter.Seq[Token]) to satisfy
ml.Backend (string returns) and ml.StreamingBackend (TokenCallback).

- adapter.go: InferenceAdapter with Generate/Chat/Stream/Close
- adapter_test.go: 13 test cases with mock TextModel (all pass)
- backend_mlx.go: rewritten from 253 LOC to ~35 LOC using go-inference
- go.mod: add forge.lthn.ai/core/go-inference dependency
- TODO.md: mark Phase 1 steps 1.1-1.3 complete

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-20 00:52:34 +00:00

38 lines
1 KiB
Go

// SPDX-Licence-Identifier: EUPL-1.2
//go:build darwin && arm64
package ml
import (
"fmt"
"log/slog"
"forge.lthn.ai/core/go-inference"
_ "forge.lthn.ai/core/go-mlx" // registers "metal" backend via init()
)
// NewMLXBackend loads a model via go-inference's Metal backend and wraps it
// in an InferenceAdapter for use as ml.Backend/StreamingBackend.
//
// The blank import of go-mlx registers the "metal" backend, so
// inference.LoadModel() will automatically use Metal on Apple Silicon.
//
// Load options (context length, etc.) are forwarded directly to go-inference.
func NewMLXBackend(modelPath string, loadOpts ...inference.LoadOption) (*InferenceAdapter, error) {
slog.Info("mlx: loading model via go-inference", "path", modelPath)
m, err := inference.LoadModel(modelPath, loadOpts...)
if err != nil {
return nil, fmt.Errorf("mlx: %w", err)
}
info := m.Info()
slog.Info("mlx: model loaded",
"arch", info.Architecture,
"layers", info.NumLayers,
"quant", info.QuantBits,
)
return NewInferenceAdapter(m, "mlx"), nil
}