feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)
All 37 ML tests passing. Binary builds at 138MB with all commands.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 00:34:53 +00:00
|
|
|
package ml
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"encoding/json"
|
|
|
|
|
"fmt"
|
|
|
|
|
"os"
|
|
|
|
|
|
2026-02-16 14:24:37 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
2026-02-17 19:19:40 +00:00
|
|
|
"forge.lthn.ai/core/go-ai/ml"
|
feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)
All 37 ML tests passing. Binary builds at 138MB with all commands.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 00:34:53 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
probeOutput string
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var probeCmd = &cli.Command{
|
|
|
|
|
Use: "probe",
|
|
|
|
|
Short: "Run capability and content probes against a model",
|
|
|
|
|
Long: "Runs 23 capability probes and 6 content probes against an OpenAI-compatible API.",
|
|
|
|
|
RunE: runProbe,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
probeCmd.Flags().StringVar(&probeOutput, "output", "", "Output JSON file for probe results")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func runProbe(cmd *cli.Command, args []string) error {
|
|
|
|
|
if apiURL == "" {
|
|
|
|
|
return fmt.Errorf("--api-url is required")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
model := modelName
|
|
|
|
|
if model == "" {
|
|
|
|
|
model = "default"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
|
backend := ml.NewHTTPBackend(apiURL, model)
|
|
|
|
|
|
|
|
|
|
fmt.Printf("Running %d capability probes against %s...\n", len(ml.CapabilityProbes), apiURL)
|
|
|
|
|
results := ml.RunCapabilityProbes(ctx, backend)
|
|
|
|
|
|
|
|
|
|
fmt.Printf("\nResults: %.1f%% (%d/%d)\n", results.Accuracy, results.Correct, results.Total)
|
|
|
|
|
|
|
|
|
|
for cat, data := range results.ByCategory {
|
|
|
|
|
catAcc := 0.0
|
|
|
|
|
if data.Total > 0 {
|
|
|
|
|
catAcc = float64(data.Correct) / float64(data.Total) * 100
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf(" %-20s %d/%d (%.0f%%)\n", cat, data.Correct, data.Total, catAcc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if probeOutput != "" {
|
|
|
|
|
data, err := json.MarshalIndent(results, "", " ")
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("marshal results: %w", err)
|
|
|
|
|
}
|
|
|
|
|
if err := os.WriteFile(probeOutput, data, 0644); err != nil {
|
|
|
|
|
return fmt.Errorf("write output: %w", err)
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\nResults written to %s\n", probeOutput)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|