go/pkg/ml/compare.go
Claude 548256312d feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)

All 37 ML tests passing. Binary builds at 138MB with all commands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

75 lines
1.7 KiB
Go

package ml
import (
"fmt"
"sort"
)
// RunCompare reads two score files and prints a comparison table for each
// model showing Old, New, and Delta values for every metric.
func RunCompare(oldPath, newPath string) error {
oldOutput, err := ReadScorerOutput(oldPath)
if err != nil {
return fmt.Errorf("read old file: %w", err)
}
newOutput, err := ReadScorerOutput(newPath)
if err != nil {
return fmt.Errorf("read new file: %w", err)
}
// Collect all models present in both files.
models := make(map[string]bool)
for m := range oldOutput.ModelAverages {
models[m] = true
}
for m := range newOutput.ModelAverages {
models[m] = true
}
// Sort model names for deterministic output.
sortedModels := make([]string, 0, len(models))
for m := range models {
sortedModels = append(sortedModels, m)
}
sort.Strings(sortedModels)
for _, model := range sortedModels {
oldAvgs := oldOutput.ModelAverages[model]
newAvgs := newOutput.ModelAverages[model]
if oldAvgs == nil && newAvgs == nil {
continue
}
fmt.Printf("\nModel: %s\n", model)
fmt.Printf("%-25s %11s %11s %6s\n", "", "Old", "New", "Delta")
// Collect all metrics from both old and new.
metrics := make(map[string]bool)
for k := range oldAvgs {
metrics[k] = true
}
for k := range newAvgs {
metrics[k] = true
}
sortedMetrics := make([]string, 0, len(metrics))
for k := range metrics {
sortedMetrics = append(sortedMetrics, k)
}
sort.Strings(sortedMetrics)
for _, metric := range sortedMetrics {
oldVal := oldAvgs[metric]
newVal := newAvgs[metric]
delta := newVal - oldVal
deltaStr := fmt.Sprintf("%+.2f", delta)
fmt.Printf("%-25s %11.2f %11.2f %6s\n", metric, oldVal, newVal, deltaStr)
}
}
return nil
}