feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)
All 37 ML tests passing. Binary builds at 138MB with all commands.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 00:34:53 +00:00
|
|
|
package ml
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
|
2026-02-16 14:24:37 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
|
|
|
|
"forge.lthn.ai/core/go/pkg/ml"
|
feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)
All 37 ML tests passing. Binary builds at 138MB with all commands.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 00:34:53 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
convertInput string
|
|
|
|
|
convertConfig string
|
|
|
|
|
convertOutputDir string
|
|
|
|
|
convertBaseModel string
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var convertCmd = &cli.Command{
|
|
|
|
|
Use: "convert",
|
|
|
|
|
Short: "Convert MLX LoRA adapter to PEFT format",
|
|
|
|
|
Long: "Converts an MLX safetensors LoRA adapter to HuggingFace PEFT format for Ollama.",
|
|
|
|
|
RunE: runConvert,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
convertCmd.Flags().StringVar(&convertInput, "input", "", "Input safetensors file (required)")
|
|
|
|
|
convertCmd.Flags().StringVar(&convertConfig, "config", "", "Adapter config JSON (required)")
|
|
|
|
|
convertCmd.Flags().StringVar(&convertOutputDir, "output-dir", "", "Output directory (required)")
|
|
|
|
|
convertCmd.Flags().StringVar(&convertBaseModel, "base-model", "", "Base model name for adapter_config.json")
|
|
|
|
|
convertCmd.MarkFlagRequired("input")
|
|
|
|
|
convertCmd.MarkFlagRequired("config")
|
|
|
|
|
convertCmd.MarkFlagRequired("output-dir")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func runConvert(cmd *cli.Command, args []string) error {
|
|
|
|
|
if err := ml.ConvertMLXtoPEFT(convertInput, convertConfig, convertOutputDir, convertBaseModel); err != nil {
|
|
|
|
|
return fmt.Errorf("convert to PEFT: %w", err)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("PEFT adapter written to %s\n", convertOutputDir)
|
|
|
|
|
return nil
|
|
|
|
|
}
|