- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
386 lines
11 KiB
Go
386 lines
11 KiB
Go
package lem
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"forge.lthn.ai/core/go-i18n/reversal"
|
|
"forge.lthn.ai/core/go-ml"
|
|
"forge.lthn.ai/core/go-mlx"
|
|
)
|
|
|
|
// DistillProbe is a single input prompt for distillation.
|
|
type DistillProbe struct {
|
|
ID string `json:"id"`
|
|
Domain string `json:"domain,omitempty"`
|
|
Prompt string `json:"prompt"`
|
|
Source string `json:"-"`
|
|
}
|
|
|
|
// distillCandidate holds a single generation attempt with its scores.
|
|
type distillCandidate struct {
|
|
Response string
|
|
Grammar GrammarScore
|
|
Delta DeltaScore
|
|
Elapsed time.Duration
|
|
}
|
|
|
|
// RunDistill is the CLI entry point for the distill command.
|
|
// Generates responses via native Metal inference, scores with go-i18n/reversal,
|
|
// writes passing examples as training JSONL.
|
|
func RunDistill(args []string) {
|
|
fs := flag.NewFlagSet("distill", flag.ExitOnError)
|
|
|
|
modelFlag := fs.String("model", "gemma3/27b", "Model config path (relative to .core/ai/models/)")
|
|
probesFlag := fs.String("probes", "", "Probe set name from probes.yaml, or path to JSON file")
|
|
outputFlag := fs.String("output", "", "Output JSONL path (defaults to model training dir)")
|
|
lessonFlag := fs.Int("lesson", -1, "Lesson number to append to (defaults to probe set phase)")
|
|
minScore := fs.Float64("min-score", 0, "Min grammar composite (0 = use ai.yaml default)")
|
|
runs := fs.Int("runs", 0, "Generations per probe (0 = use ai.yaml default)")
|
|
dryRun := fs.Bool("dry-run", false, "Show plan and exit without generating")
|
|
root := fs.String("root", ".", "Project root (for .core/ai/ config)")
|
|
cacheLimit := fs.Int("cache-limit", 0, "Metal cache limit in GB (0 = use ai.yaml default)")
|
|
memLimit := fs.Int("mem-limit", 0, "Metal memory limit in GB (0 = use ai.yaml default)")
|
|
|
|
if err := fs.Parse(args); err != nil {
|
|
log.Fatalf("parse flags: %v", err)
|
|
}
|
|
|
|
// Load configs.
|
|
aiCfg, err := LoadAIConfig(*root)
|
|
if err != nil {
|
|
log.Fatalf("load ai config: %v", err)
|
|
}
|
|
|
|
modelCfg, err := LoadModelConfig(*root, *modelFlag)
|
|
if err != nil {
|
|
log.Fatalf("load model config: %v", err)
|
|
}
|
|
|
|
genCfg := MergeGenerate(aiCfg.Generate, modelCfg.Generate)
|
|
|
|
// Apply flag overrides.
|
|
if *minScore == 0 {
|
|
*minScore = aiCfg.Scorer.MinScore
|
|
}
|
|
if *runs == 0 {
|
|
*runs = aiCfg.Distill.Runs
|
|
}
|
|
cacheLimitGB := aiCfg.Distill.CacheLimit
|
|
if *cacheLimit > 0 {
|
|
cacheLimitGB = *cacheLimit
|
|
}
|
|
memLimitGB := aiCfg.Distill.MemoryLimit
|
|
if *memLimit > 0 {
|
|
memLimitGB = *memLimit
|
|
}
|
|
|
|
// Load probes.
|
|
probes, phase, err := loadDistillProbes(*root, *probesFlag)
|
|
if err != nil {
|
|
log.Fatalf("load probes: %v", err)
|
|
}
|
|
log.Printf("loaded %d probes", len(probes))
|
|
|
|
// Determine output path.
|
|
outputPath := *outputFlag
|
|
if outputPath == "" {
|
|
lesson := *lessonFlag
|
|
if lesson < 0 {
|
|
lesson = phase
|
|
}
|
|
lessonFile, ok := modelCfg.Lessons[lesson]
|
|
if !ok {
|
|
lessonFile = fmt.Sprintf("lesson-%d.jsonl", lesson)
|
|
}
|
|
outputPath = filepath.Join(modelCfg.Training, lessonFile)
|
|
}
|
|
|
|
// Load kernel.
|
|
kernel, err := os.ReadFile(modelCfg.Kernel)
|
|
if err != nil {
|
|
log.Fatalf("read kernel: %v", err)
|
|
}
|
|
log.Printf("kernel: %d chars from %s", len(kernel), modelCfg.Kernel)
|
|
|
|
// Load signature (LEK-1-Sig).
|
|
var sig string
|
|
if modelCfg.Signature != "" {
|
|
sigBytes, err := os.ReadFile(modelCfg.Signature)
|
|
if err != nil {
|
|
log.Fatalf("read signature: %v", err)
|
|
}
|
|
sig = strings.TrimSpace(string(sigBytes))
|
|
log.Printf("signature: %d chars from %s", len(sig), modelCfg.Signature)
|
|
}
|
|
|
|
// Dry run.
|
|
if *dryRun {
|
|
fmt.Printf("Model: %s (%s)\n", modelCfg.Name, modelCfg.Paths.Base)
|
|
fmt.Printf("Backend: %s\n", aiCfg.Backend)
|
|
fmt.Printf("Probes: %d\n", len(probes))
|
|
fmt.Printf("Runs: %d per probe (%d total generations)\n", *runs, len(probes)**runs)
|
|
fmt.Printf("Gate: grammar v3 composite >= %.1f\n", *minScore)
|
|
fmt.Printf("Generate: temp=%.2f max_tokens=%d top_p=%.2f\n",
|
|
genCfg.Temperature, genCfg.MaxTokens, genCfg.TopP)
|
|
fmt.Printf("Memory: cache=%dGB limit=%dGB\n", cacheLimitGB, memLimitGB)
|
|
fmt.Printf("Output: %s\n", outputPath)
|
|
fmt.Println()
|
|
for i, p := range probes {
|
|
if i >= 10 {
|
|
fmt.Printf(" ... and %d more\n", len(probes)-10)
|
|
break
|
|
}
|
|
prompt := p.Prompt
|
|
if len(prompt) > 80 {
|
|
prompt = prompt[:80] + "..."
|
|
}
|
|
fmt.Printf(" %s: %s\n", p.ID, prompt)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Set Metal memory limits before loading model.
|
|
if cacheLimitGB > 0 {
|
|
mlx.SetCacheLimit(uint64(cacheLimitGB) * 1024 * 1024 * 1024)
|
|
log.Printf("metal cache limit: %dGB", cacheLimitGB)
|
|
}
|
|
if memLimitGB > 0 {
|
|
mlx.SetMemoryLimit(uint64(memLimitGB) * 1024 * 1024 * 1024)
|
|
log.Printf("metal memory limit: %dGB", memLimitGB)
|
|
}
|
|
|
|
// Load model via go-ml Backend (wraps go-inference with memory management).
|
|
log.Printf("loading model: %s", modelCfg.Paths.Base)
|
|
backend, err := ml.NewMLXBackend(modelCfg.Paths.Base)
|
|
if err != nil {
|
|
log.Fatalf("load model: %v", err)
|
|
}
|
|
defer backend.Close()
|
|
|
|
log.Printf("model loaded via %s backend", backend.Name())
|
|
|
|
// Build generation options from merged config.
|
|
genOpts := ml.GenOpts{
|
|
MaxTokens: genCfg.MaxTokens,
|
|
Temperature: genCfg.Temperature,
|
|
TopP: genCfg.TopP,
|
|
TopK: genCfg.TopK,
|
|
RepeatPenalty: genCfg.RepeatPenalty,
|
|
}
|
|
|
|
// Initialise grammar scorer.
|
|
tok := reversal.NewTokeniser()
|
|
|
|
// Open output for append.
|
|
out, err := os.OpenFile(outputPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
if err != nil {
|
|
log.Fatalf("open output: %v", err)
|
|
}
|
|
defer out.Close()
|
|
|
|
kept := 0
|
|
skipped := 0
|
|
deduped := 0
|
|
totalStart := time.Now()
|
|
ctx := context.Background()
|
|
kernelStr := strings.TrimSpace(string(kernel))
|
|
|
|
// Running duplicate index for grammar-profile deduplication.
|
|
var dedupIdx *ScoreIndex
|
|
|
|
for i, probe := range probes {
|
|
var best *distillCandidate
|
|
|
|
// Build sandwich prompt for output: LEK-1 + Prompt + LEK-1-Sig
|
|
sandwichPrompt := kernelStr + "\n\n" + probe.Prompt
|
|
if sig != "" {
|
|
sandwichPrompt += "\n\n" + sig
|
|
}
|
|
|
|
for run := range *runs {
|
|
fmt.Fprintf(os.Stderr, " [%d/%d] %s run %d/%d",
|
|
i+1, len(probes), probe.ID, run+1, *runs)
|
|
|
|
// Inference uses bare probe — the model generates from its weights.
|
|
// Sandwich wrapping is only for the training output format.
|
|
messages := []ml.Message{
|
|
{Role: "user", Content: probe.Prompt},
|
|
}
|
|
|
|
// Generate via go-ml Backend (memory-managed Metal inference).
|
|
start := time.Now()
|
|
result, err := backend.Chat(ctx, messages, genOpts)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, " → ERROR: %v\n", err)
|
|
continue
|
|
}
|
|
response := result.Text
|
|
elapsed := time.Since(start)
|
|
|
|
// Quick reject: empty/degenerate.
|
|
if len(strings.TrimSpace(response)) < aiCfg.Distill.MinChars {
|
|
fmt.Fprintf(os.Stderr, " → %d chars, EMPTY, %.1fs\n", len(response), elapsed.Seconds())
|
|
continue
|
|
}
|
|
|
|
// Score with go-i18n/reversal.
|
|
grammar := ScoreResponse(tok, response)
|
|
delta := ComputeDelta(tok, probe.Prompt, response,
|
|
aiCfg.Scorer.SycophancyEcho, aiCfg.Scorer.SycophancyUplift)
|
|
|
|
tokPerSec := 0.0
|
|
if result.Metrics != nil {
|
|
tokPerSec = result.Metrics.DecodeTokensPerSec
|
|
}
|
|
fmt.Fprintf(os.Stderr, " → %d chars, g=%.1f up=%+.1f echo=%.2f enr=%+.1f, %.1fs (%.0f tok/s)\n",
|
|
len(response), grammar.Composite,
|
|
delta.Uplift, delta.Echo, delta.Enrichment,
|
|
elapsed.Seconds(), tokPerSec)
|
|
|
|
candidate := &distillCandidate{
|
|
Response: response,
|
|
Grammar: grammar,
|
|
Delta: delta,
|
|
Elapsed: elapsed,
|
|
}
|
|
|
|
if best == nil || grammar.Composite > best.Grammar.Composite {
|
|
best = candidate
|
|
}
|
|
}
|
|
|
|
// Quality gate.
|
|
if best != nil && best.Grammar.Composite >= *minScore {
|
|
// Duplicate filter: reject if grammar profile is too similar to an already-kept entry.
|
|
bestFeatures := GrammarFeatures(best.Grammar)
|
|
if dedupIdx != nil && dedupIdx.IsDuplicate(bestFeatures, 0.02) {
|
|
deduped++
|
|
fmt.Fprintf(os.Stderr, " ~ DEDUP %s (grammar profile too similar to existing)\n", probe.ID)
|
|
// Release GPU memory between probes to prevent incremental leak.
|
|
runtime.GC()
|
|
continue
|
|
}
|
|
|
|
// Save with sandwich prompt — kernel wraps the bare probe for training.
|
|
example := TrainingExample{
|
|
Messages: []ChatMessage{
|
|
{Role: "user", Content: sandwichPrompt},
|
|
{Role: "assistant", Content: best.Response},
|
|
},
|
|
}
|
|
line, _ := json.Marshal(example)
|
|
out.Write(append(line, '\n'))
|
|
|
|
// Add to dedup index.
|
|
entry := ScoredEntry{ID: probe.ID, Domain: probe.Domain, Grammar: best.Grammar}
|
|
if dedupIdx == nil {
|
|
dedupIdx, _ = NewScoreIndex([]ScoredEntry{entry})
|
|
} else {
|
|
_ = dedupIdx.Insert(entry)
|
|
}
|
|
|
|
kept++
|
|
fmt.Fprintf(os.Stderr, " ✓ KEPT %s (g=%.1f, verbs=%d, nouns=%d, enr=%+.1f)\n",
|
|
probe.ID, best.Grammar.Composite,
|
|
best.Grammar.VerbDiversity, best.Grammar.NounDiversity,
|
|
best.Delta.Enrichment)
|
|
} else {
|
|
skipped++
|
|
score := 0.0
|
|
if best != nil {
|
|
score = best.Grammar.Composite
|
|
}
|
|
fmt.Fprintf(os.Stderr, " ✗ SKIP %s (best g=%.1f < %.1f)\n",
|
|
probe.ID, score, *minScore)
|
|
}
|
|
|
|
// Release GPU memory between probes to prevent incremental leak.
|
|
runtime.GC()
|
|
}
|
|
|
|
duration := time.Since(totalStart)
|
|
|
|
fmt.Fprintf(os.Stderr, "\n=== Distillation Complete ===\n")
|
|
fmt.Fprintf(os.Stderr, "Model: %s (%s)\n", modelCfg.Name, backend.Name())
|
|
fmt.Fprintf(os.Stderr, "Probes: %d\n", len(probes))
|
|
fmt.Fprintf(os.Stderr, "Runs: %d per probe (%d total generations)\n", *runs, len(probes)**runs)
|
|
fmt.Fprintf(os.Stderr, "Scorer: go-i18n/reversal grammar v3, gate >= %.1f\n", *minScore)
|
|
fmt.Fprintf(os.Stderr, "Kept: %d\n", kept)
|
|
fmt.Fprintf(os.Stderr, "Deduped: %d\n", deduped)
|
|
fmt.Fprintf(os.Stderr, "Skipped: %d\n", skipped)
|
|
total := kept + deduped + skipped
|
|
if total > 0 {
|
|
fmt.Fprintf(os.Stderr, "Pass rate: %.0f%%\n", float64(kept)/float64(total)*100)
|
|
}
|
|
fmt.Fprintf(os.Stderr, "Output: %s\n", outputPath)
|
|
fmt.Fprintf(os.Stderr, "Duration: %.0fs (%.1fm)\n", duration.Seconds(), duration.Minutes())
|
|
}
|
|
|
|
// loadDistillProbes loads probes from a named set or a file path.
|
|
// Returns the probes and the default phase number for output routing.
|
|
func loadDistillProbes(root, spec string) ([]DistillProbe, int, error) {
|
|
// Try as a probe set name first.
|
|
probesCfg, cfgErr := LoadProbesConfig(root)
|
|
if cfgErr == nil {
|
|
if set, ok := probesCfg.Sets[spec]; ok {
|
|
phase := 0
|
|
if set.Phase != nil {
|
|
phase = *set.Phase
|
|
}
|
|
var probes []DistillProbe
|
|
for _, f := range set.Files {
|
|
// Files are relative to the training root.
|
|
ps, err := readProbeFile(filepath.Join(root, "training", "lem", f))
|
|
if err != nil {
|
|
return nil, 0, fmt.Errorf("read %s: %w", f, err)
|
|
}
|
|
probes = append(probes, ps...)
|
|
}
|
|
return probes, phase, nil
|
|
}
|
|
}
|
|
|
|
// Fall back to direct file path.
|
|
probes, err := readProbeFile(spec)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
return probes, 0, nil
|
|
}
|
|
|
|
// readProbeFile reads probes from a JSON array file.
|
|
func readProbeFile(path string) ([]DistillProbe, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var raw []struct {
|
|
ID string `json:"id"`
|
|
Domain string `json:"domain"`
|
|
Prompt string `json:"prompt"`
|
|
}
|
|
if err := json.Unmarshal(data, &raw); err != nil {
|
|
return nil, fmt.Errorf("parse %s: %w", filepath.Base(path), err)
|
|
}
|
|
|
|
probes := make([]DistillProbe, len(raw))
|
|
for i, r := range raw {
|
|
probes[i] = DistillProbe{
|
|
ID: r.ID,
|
|
Domain: r.Domain,
|
|
Prompt: r.Prompt,
|
|
Source: filepath.Base(path),
|
|
}
|
|
}
|
|
return probes, nil
|
|
}
|