cli/pkg/ml/export.go
Claude 5ff4b8a2eb feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)

All 37 ML tests passing. Binary builds at 138MB with all commands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

112 lines
2.8 KiB
Go

package ml
import (
"bufio"
"encoding/json"
"fmt"
"math/rand"
"os"
"strings"
)
// ChatMessage is a single message in the chat training format.
type ChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
// TrainingExample is a single training example in chat JSONL format.
type TrainingExample struct {
Messages []ChatMessage `json:"messages"`
}
// ValidatePercentages checks that train+valid+test percentages sum to 100
// and that none are negative.
func ValidatePercentages(trainPct, validPct, testPct int) error {
if trainPct < 0 || validPct < 0 || testPct < 0 {
return fmt.Errorf("percentages must be non-negative: train=%d, valid=%d, test=%d", trainPct, validPct, testPct)
}
sum := trainPct + validPct + testPct
if sum != 100 {
return fmt.Errorf("percentages must sum to 100, got %d (train=%d + valid=%d + test=%d)", sum, trainPct, validPct, testPct)
}
return nil
}
// FilterResponses removes responses with empty content, "ERROR:" prefix,
// or response length < 50 characters.
func FilterResponses(responses []Response) []Response {
var filtered []Response
for _, r := range responses {
if r.Response == "" {
continue
}
if strings.HasPrefix(r.Response, "ERROR:") {
continue
}
if len(r.Response) < 50 {
continue
}
filtered = append(filtered, r)
}
return filtered
}
// SplitData shuffles responses with a deterministic seed and splits them
// into train, valid, and test sets by the given percentages.
func SplitData(responses []Response, trainPct, validPct, testPct int, seed int64) (train, valid, test []Response) {
shuffled := make([]Response, len(responses))
copy(shuffled, responses)
rng := rand.New(rand.NewSource(seed))
rng.Shuffle(len(shuffled), func(i, j int) {
shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
})
n := len(shuffled)
trainN := n * trainPct / 100
validN := n * validPct / 100
_ = testPct
train = shuffled[:trainN]
valid = shuffled[trainN : trainN+validN]
test = shuffled[trainN+validN:]
return train, valid, test
}
// WriteTrainingJSONL writes responses in chat JSONL format suitable for
// MLX LoRA fine-tuning.
func WriteTrainingJSONL(path string, responses []Response) error {
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("create %s: %w", path, err)
}
defer f.Close()
w := bufio.NewWriter(f)
defer w.Flush()
for _, r := range responses {
example := TrainingExample{
Messages: []ChatMessage{
{Role: "user", Content: r.Prompt},
{Role: "assistant", Content: r.Response},
},
}
data, err := json.Marshal(example)
if err != nil {
return fmt.Errorf("marshal example: %w", err)
}
if _, err := w.Write(data); err != nil {
return fmt.Errorf("write line: %w", err)
}
if _, err := w.WriteString("\n"); err != nil {
return fmt.Errorf("write newline: %w", err)
}
}
return nil
}