LEM/pkg/lem/distill.go
Snider c701c2e0af feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined)
- Add KDTree ScoreIndex with cosine distance for probe clustering
- Add score distribution analytics (percentiles, variance, skewness)
- Add grammar-profile dedup filtering to distill pipeline
- Add spatial gap detection (FindGaps) for coverage analysis
- Wire analytics into coverage CLI (PrintScoreAnalytics)

New files: features.go, cluster.go, analytics.go + tests
Modified: distill.go (dedup filter), coverage.go (analytics output)
Dep: github.com/Snider/Poindexter

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:26:06 +00:00

386 lines
11 KiB
Go

package lem
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"time"
"forge.lthn.ai/core/go-i18n/reversal"
"forge.lthn.ai/core/go-ml"
"forge.lthn.ai/core/go-mlx"
)
// DistillProbe is a single input prompt for distillation.
type DistillProbe struct {
ID string `json:"id"`
Domain string `json:"domain,omitempty"`
Prompt string `json:"prompt"`
Source string `json:"-"`
}
// distillCandidate holds a single generation attempt with its scores.
type distillCandidate struct {
Response string
Grammar GrammarScore
Delta DeltaScore
Elapsed time.Duration
}
// RunDistill is the CLI entry point for the distill command.
// Generates responses via native Metal inference, scores with go-i18n/reversal,
// writes passing examples as training JSONL.
func RunDistill(args []string) {
fs := flag.NewFlagSet("distill", flag.ExitOnError)
modelFlag := fs.String("model", "gemma3/27b", "Model config path (relative to .core/ai/models/)")
probesFlag := fs.String("probes", "", "Probe set name from probes.yaml, or path to JSON file")
outputFlag := fs.String("output", "", "Output JSONL path (defaults to model training dir)")
lessonFlag := fs.Int("lesson", -1, "Lesson number to append to (defaults to probe set phase)")
minScore := fs.Float64("min-score", 0, "Min grammar composite (0 = use ai.yaml default)")
runs := fs.Int("runs", 0, "Generations per probe (0 = use ai.yaml default)")
dryRun := fs.Bool("dry-run", false, "Show plan and exit without generating")
root := fs.String("root", ".", "Project root (for .core/ai/ config)")
cacheLimit := fs.Int("cache-limit", 0, "Metal cache limit in GB (0 = use ai.yaml default)")
memLimit := fs.Int("mem-limit", 0, "Metal memory limit in GB (0 = use ai.yaml default)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
// Load configs.
aiCfg, err := LoadAIConfig(*root)
if err != nil {
log.Fatalf("load ai config: %v", err)
}
modelCfg, err := LoadModelConfig(*root, *modelFlag)
if err != nil {
log.Fatalf("load model config: %v", err)
}
genCfg := MergeGenerate(aiCfg.Generate, modelCfg.Generate)
// Apply flag overrides.
if *minScore == 0 {
*minScore = aiCfg.Scorer.MinScore
}
if *runs == 0 {
*runs = aiCfg.Distill.Runs
}
cacheLimitGB := aiCfg.Distill.CacheLimit
if *cacheLimit > 0 {
cacheLimitGB = *cacheLimit
}
memLimitGB := aiCfg.Distill.MemoryLimit
if *memLimit > 0 {
memLimitGB = *memLimit
}
// Load probes.
probes, phase, err := loadDistillProbes(*root, *probesFlag)
if err != nil {
log.Fatalf("load probes: %v", err)
}
log.Printf("loaded %d probes", len(probes))
// Determine output path.
outputPath := *outputFlag
if outputPath == "" {
lesson := *lessonFlag
if lesson < 0 {
lesson = phase
}
lessonFile, ok := modelCfg.Lessons[lesson]
if !ok {
lessonFile = fmt.Sprintf("lesson-%d.jsonl", lesson)
}
outputPath = filepath.Join(modelCfg.Training, lessonFile)
}
// Load kernel.
kernel, err := os.ReadFile(modelCfg.Kernel)
if err != nil {
log.Fatalf("read kernel: %v", err)
}
log.Printf("kernel: %d chars from %s", len(kernel), modelCfg.Kernel)
// Load signature (LEK-1-Sig).
var sig string
if modelCfg.Signature != "" {
sigBytes, err := os.ReadFile(modelCfg.Signature)
if err != nil {
log.Fatalf("read signature: %v", err)
}
sig = strings.TrimSpace(string(sigBytes))
log.Printf("signature: %d chars from %s", len(sig), modelCfg.Signature)
}
// Dry run.
if *dryRun {
fmt.Printf("Model: %s (%s)\n", modelCfg.Name, modelCfg.Paths.Base)
fmt.Printf("Backend: %s\n", aiCfg.Backend)
fmt.Printf("Probes: %d\n", len(probes))
fmt.Printf("Runs: %d per probe (%d total generations)\n", *runs, len(probes)**runs)
fmt.Printf("Gate: grammar v3 composite >= %.1f\n", *minScore)
fmt.Printf("Generate: temp=%.2f max_tokens=%d top_p=%.2f\n",
genCfg.Temperature, genCfg.MaxTokens, genCfg.TopP)
fmt.Printf("Memory: cache=%dGB limit=%dGB\n", cacheLimitGB, memLimitGB)
fmt.Printf("Output: %s\n", outputPath)
fmt.Println()
for i, p := range probes {
if i >= 10 {
fmt.Printf(" ... and %d more\n", len(probes)-10)
break
}
prompt := p.Prompt
if len(prompt) > 80 {
prompt = prompt[:80] + "..."
}
fmt.Printf(" %s: %s\n", p.ID, prompt)
}
return
}
// Set Metal memory limits before loading model.
if cacheLimitGB > 0 {
mlx.SetCacheLimit(uint64(cacheLimitGB) * 1024 * 1024 * 1024)
log.Printf("metal cache limit: %dGB", cacheLimitGB)
}
if memLimitGB > 0 {
mlx.SetMemoryLimit(uint64(memLimitGB) * 1024 * 1024 * 1024)
log.Printf("metal memory limit: %dGB", memLimitGB)
}
// Load model via go-ml Backend (wraps go-inference with memory management).
log.Printf("loading model: %s", modelCfg.Paths.Base)
backend, err := ml.NewMLXBackend(modelCfg.Paths.Base)
if err != nil {
log.Fatalf("load model: %v", err)
}
defer backend.Close()
log.Printf("model loaded via %s backend", backend.Name())
// Build generation options from merged config.
genOpts := ml.GenOpts{
MaxTokens: genCfg.MaxTokens,
Temperature: genCfg.Temperature,
TopP: genCfg.TopP,
TopK: genCfg.TopK,
RepeatPenalty: genCfg.RepeatPenalty,
}
// Initialise grammar scorer.
tok := reversal.NewTokeniser()
// Open output for append.
out, err := os.OpenFile(outputPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatalf("open output: %v", err)
}
defer out.Close()
kept := 0
skipped := 0
deduped := 0
totalStart := time.Now()
ctx := context.Background()
kernelStr := strings.TrimSpace(string(kernel))
// Running duplicate index for grammar-profile deduplication.
var dedupIdx *ScoreIndex
for i, probe := range probes {
var best *distillCandidate
// Build sandwich prompt for output: LEK-1 + Prompt + LEK-1-Sig
sandwichPrompt := kernelStr + "\n\n" + probe.Prompt
if sig != "" {
sandwichPrompt += "\n\n" + sig
}
for run := range *runs {
fmt.Fprintf(os.Stderr, " [%d/%d] %s run %d/%d",
i+1, len(probes), probe.ID, run+1, *runs)
// Inference uses bare probe — the model generates from its weights.
// Sandwich wrapping is only for the training output format.
messages := []ml.Message{
{Role: "user", Content: probe.Prompt},
}
// Generate via go-ml Backend (memory-managed Metal inference).
start := time.Now()
result, err := backend.Chat(ctx, messages, genOpts)
if err != nil {
fmt.Fprintf(os.Stderr, " → ERROR: %v\n", err)
continue
}
response := result.Text
elapsed := time.Since(start)
// Quick reject: empty/degenerate.
if len(strings.TrimSpace(response)) < aiCfg.Distill.MinChars {
fmt.Fprintf(os.Stderr, " → %d chars, EMPTY, %.1fs\n", len(response), elapsed.Seconds())
continue
}
// Score with go-i18n/reversal.
grammar := ScoreResponse(tok, response)
delta := ComputeDelta(tok, probe.Prompt, response,
aiCfg.Scorer.SycophancyEcho, aiCfg.Scorer.SycophancyUplift)
tokPerSec := 0.0
if result.Metrics != nil {
tokPerSec = result.Metrics.DecodeTokensPerSec
}
fmt.Fprintf(os.Stderr, " → %d chars, g=%.1f up=%+.1f echo=%.2f enr=%+.1f, %.1fs (%.0f tok/s)\n",
len(response), grammar.Composite,
delta.Uplift, delta.Echo, delta.Enrichment,
elapsed.Seconds(), tokPerSec)
candidate := &distillCandidate{
Response: response,
Grammar: grammar,
Delta: delta,
Elapsed: elapsed,
}
if best == nil || grammar.Composite > best.Grammar.Composite {
best = candidate
}
}
// Quality gate.
if best != nil && best.Grammar.Composite >= *minScore {
// Duplicate filter: reject if grammar profile is too similar to an already-kept entry.
bestFeatures := GrammarFeatures(best.Grammar)
if dedupIdx != nil && dedupIdx.IsDuplicate(bestFeatures, 0.02) {
deduped++
fmt.Fprintf(os.Stderr, " ~ DEDUP %s (grammar profile too similar to existing)\n", probe.ID)
// Release GPU memory between probes to prevent incremental leak.
runtime.GC()
continue
}
// Save with sandwich prompt — kernel wraps the bare probe for training.
example := TrainingExample{
Messages: []ChatMessage{
{Role: "user", Content: sandwichPrompt},
{Role: "assistant", Content: best.Response},
},
}
line, _ := json.Marshal(example)
out.Write(append(line, '\n'))
// Add to dedup index.
entry := ScoredEntry{ID: probe.ID, Domain: probe.Domain, Grammar: best.Grammar}
if dedupIdx == nil {
dedupIdx, _ = NewScoreIndex([]ScoredEntry{entry})
} else {
_ = dedupIdx.Insert(entry)
}
kept++
fmt.Fprintf(os.Stderr, " ✓ KEPT %s (g=%.1f, verbs=%d, nouns=%d, enr=%+.1f)\n",
probe.ID, best.Grammar.Composite,
best.Grammar.VerbDiversity, best.Grammar.NounDiversity,
best.Delta.Enrichment)
} else {
skipped++
score := 0.0
if best != nil {
score = best.Grammar.Composite
}
fmt.Fprintf(os.Stderr, " ✗ SKIP %s (best g=%.1f < %.1f)\n",
probe.ID, score, *minScore)
}
// Release GPU memory between probes to prevent incremental leak.
runtime.GC()
}
duration := time.Since(totalStart)
fmt.Fprintf(os.Stderr, "\n=== Distillation Complete ===\n")
fmt.Fprintf(os.Stderr, "Model: %s (%s)\n", modelCfg.Name, backend.Name())
fmt.Fprintf(os.Stderr, "Probes: %d\n", len(probes))
fmt.Fprintf(os.Stderr, "Runs: %d per probe (%d total generations)\n", *runs, len(probes)**runs)
fmt.Fprintf(os.Stderr, "Scorer: go-i18n/reversal grammar v3, gate >= %.1f\n", *minScore)
fmt.Fprintf(os.Stderr, "Kept: %d\n", kept)
fmt.Fprintf(os.Stderr, "Deduped: %d\n", deduped)
fmt.Fprintf(os.Stderr, "Skipped: %d\n", skipped)
total := kept + deduped + skipped
if total > 0 {
fmt.Fprintf(os.Stderr, "Pass rate: %.0f%%\n", float64(kept)/float64(total)*100)
}
fmt.Fprintf(os.Stderr, "Output: %s\n", outputPath)
fmt.Fprintf(os.Stderr, "Duration: %.0fs (%.1fm)\n", duration.Seconds(), duration.Minutes())
}
// loadDistillProbes loads probes from a named set or a file path.
// Returns the probes and the default phase number for output routing.
func loadDistillProbes(root, spec string) ([]DistillProbe, int, error) {
// Try as a probe set name first.
probesCfg, cfgErr := LoadProbesConfig(root)
if cfgErr == nil {
if set, ok := probesCfg.Sets[spec]; ok {
phase := 0
if set.Phase != nil {
phase = *set.Phase
}
var probes []DistillProbe
for _, f := range set.Files {
// Files are relative to the training root.
ps, err := readProbeFile(filepath.Join(root, "training", "lem", f))
if err != nil {
return nil, 0, fmt.Errorf("read %s: %w", f, err)
}
probes = append(probes, ps...)
}
return probes, phase, nil
}
}
// Fall back to direct file path.
probes, err := readProbeFile(spec)
if err != nil {
return nil, 0, err
}
return probes, 0, nil
}
// readProbeFile reads probes from a JSON array file.
func readProbeFile(path string) ([]DistillProbe, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var raw []struct {
ID string `json:"id"`
Domain string `json:"domain"`
Prompt string `json:"prompt"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return nil, fmt.Errorf("parse %s: %w", filepath.Base(path), err)
}
probes := make([]DistillProbe, len(raw))
for i, r := range raw {
probes[i] = DistillProbe{
ID: r.ID,
Domain: r.Domain,
Prompt: r.Prompt,
Source: filepath.Base(path),
}
}
return probes, nil
}