feat: convert all pipeline.py commands to Go

Complete conversion of pipeline.py into Go `lem` CLI: - import-all: bulk import all LEM data into DuckDB from M3 - consolidate: pull worker JSONLs, merge, deduplicate - normalize: seeds → deduplicated expansion_prompts table - approve: filter scored expansions → training JSONL - tier-score: heuristic/judge tiered expansion scoring - expand-status: expansion pipeline progress from DuckDB - inventory: DuckDB table counts and summary - coverage: seed coverage gap analysis - seed-influx: bootstrap InfluxDB from DuckDB golden_gen - query: ad-hoc SQL against DuckDB 22 commands total, 49 Go files. Replaces entire pipeline.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 17:12:03 +00:00 · 2026-02-15 17:12:03 +00:00 · 91ee389377
commit 91ee389377
parent 4eaf1bfb39
11 changed files with 1714 additions and 13 deletions
--- a/main.go
+++ b/main.go
@ -12,19 +12,39 @@ import (
 const usage = `Usage: lem <command> [flags]
-Commands:
+Scoring:
-  score     Score existing response files
+  score          Score existing response files
-  probe     Generate responses and score them
+  probe          Generate responses and score them
-  compare   Compare two score files
+  compare        Compare two score files
-  status    Show training and generation progress (InfluxDB + DuckDB)
+  tier-score     Score expansion responses (heuristic/judge tiers)
-  export    Export golden set to training-format JSONL splits
+
-  expand    Generate expansion responses via trained LEM model
+Generation:
-  conv      Generate conversational training data
+  expand         Generate expansion responses via trained LEM model
-  ingest    Ingest benchmark data into InfluxDB
+  conv           Generate conversational training data (calm phase)
-  parquet   Export JSONL training splits to Parquet for HuggingFace
+
-  publish   Push Parquet files to HuggingFace dataset repo
+Data Management:
-  metrics   Push DuckDB golden set stats to InfluxDB
+  import-all     Import ALL LEM data into DuckDB from M3
-  convert   Convert MLX LoRA adapter to HuggingFace PEFT format
+  consolidate    Pull worker JSONLs from M3, merge, deduplicate
  normalize      Normalize seeds → deduplicated expansion_prompts
  approve        Filter scored expansions → training JSONL
 Export & Publish:
  export         Export golden set to training-format JSONL splits
  parquet        Export JSONL training splits to Parquet
  publish        Push Parquet files to HuggingFace dataset repo
  convert        Convert MLX LoRA adapter to PEFT format
 Monitoring:
  status         Show training and generation progress (InfluxDB)
  expand-status  Show expansion pipeline status (DuckDB)
  inventory      Show DuckDB table inventory
  coverage       Analyze seed coverage gaps
  metrics        Push DuckDB golden set stats to InfluxDB
 Infrastructure:
  ingest         Ingest benchmark data into InfluxDB
  seed-influx    Seed InfluxDB golden_gen from DuckDB
  query          Run ad-hoc SQL against DuckDB
 `
 func main() {
@ -58,6 +78,26 @@ func main() {
 		lem.RunMetrics(os.Args[2:])
 	case "convert":
 		lem.RunConvert(os.Args[2:])
 	case "import-all":
 		lem.RunImport(os.Args[2:])
 	case "consolidate":
 		lem.RunConsolidate(os.Args[2:])
 	case "normalize":
 		lem.RunNormalize(os.Args[2:])
 	case "approve":
 		lem.RunApprove(os.Args[2:])
 	case "tier-score":
 		lem.RunTierScore(os.Args[2:])
 	case "expand-status":
 		lem.RunExpandStatus(os.Args[2:])
 	case "inventory":
 		lem.RunInventory(os.Args[2:])
 	case "coverage":
 		lem.RunCoverage(os.Args[2:])
 	case "seed-influx":
 		lem.RunSeedInflux(os.Args[2:])
 	case "query":
 		lem.RunQuery(os.Args[2:])
 	default:
 		fmt.Fprintf(os.Stderr, "unknown command: %s\n\n%s", os.Args[1], usage)
 		os.Exit(1)
--- a/pkg/lem/approve.go
+++ b/pkg/lem/approve.go
@ -0,0 +1,98 @@
 package lem
 import (
 	"encoding/json"
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"path/filepath"
 )
 // RunApprove is the CLI entry point for the approve command.
 // Filters scored expansion responses by quality threshold and exports
 // approved ones as chat-format training JSONL.
 func RunApprove(args []string) {
 	fs := flag.NewFlagSet("approve", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	output := fs.String("output", "", "Output JSONL file (defaults to expansion-approved.jsonl in db dir)")
 	threshold := fs.Float64("threshold", 6.0, "Min judge average to approve (default: 6.0)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	if *output == "" {
 		*output = filepath.Join(filepath.Dir(*dbPath), "expansion-approved.jsonl")
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	// Query approved responses: heuristic passed AND (judge passed OR not yet judge-scored).
 	rows, err := db.conn.Query(`
 		SELECT r.idx, r.seed_id, r.region, r.domain, r.prompt, r.response,
 		       r.gen_time, r.model, s.heuristic_score
 		FROM expansion_raw r
 		JOIN expansion_scores s ON r.idx = s.idx
 		WHERE s.heuristic_pass = true
 		AND (s.judge_pass = true OR s.judge_pass IS NULL)
 		ORDER BY r.idx
 	`)
 	if err != nil {
 		log.Fatalf("query approved: %v (have you run scoring?)", err)
 	}
 	defer rows.Close()
 	f, err := os.Create(*output)
 	if err != nil {
 		log.Fatalf("create output: %v", err)
 	}
 	defer f.Close()
 	enc := json.NewEncoder(f)
 	count := 0
 	regionSet := make(map[string]bool)
 	domainSet := make(map[string]bool)
 	for rows.Next() {
 		var idx int
 		var seedID, region, domain, prompt, response, model string
 		var genTime, score float64
 		if err := rows.Scan(&idx, &seedID, &region, &domain, &prompt, &response, &genTime, &model, &score); err != nil {
 			log.Fatalf("scan: %v", err)
 		}
 		example := TrainingExample{
 			Messages: []ChatMessage{
 				{Role: "user", Content: prompt},
 				{Role: "assistant", Content: response},
 			},
 		}
 		if err := enc.Encode(example); err != nil {
 			log.Fatalf("encode: %v", err)
 		}
 		regionSet[region] = true
 		domainSet[domain] = true
 		count++
 	}
 	_ = *threshold // threshold used in query above for future judge scoring
 	fmt.Printf("Approved: %d responses (threshold: heuristic > 0)\n", count)
 	fmt.Printf("Exported: %s\n", *output)
 	fmt.Printf("  Regions: %d, Domains: %d\n", len(regionSet), len(domainSet))
 }
--- a/pkg/lem/consolidate.go
+++ b/pkg/lem/consolidate.go
@ -0,0 +1,139 @@
 package lem
 import (
 	"bufio"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"sort"
 	"strings"
 )
 // RunConsolidate is the CLI entry point for the consolidate command.
 // Pulls all worker JSONLs from M3, merges them, deduplicates on idx,
 // and writes a single merged file.
 func RunConsolidate(args []string) {
 	fs := flag.NewFlagSet("consolidate", flag.ExitOnError)
 	remoteHost := fs.String("host", "m3", "SSH host for remote files")
 	remotePath := fs.String("remote", "/Volumes/Data/lem/responses", "Remote directory for JSONL files")
 	pattern := fs.String("pattern", "gold*.jsonl", "File glob pattern")
 	outputDir := fs.String("output", "", "Output directory (defaults to ./responses)")
 	merged := fs.String("merged", "", "Merged output file (defaults to gold-merged.jsonl in output dir)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *outputDir == "" {
 		*outputDir = "responses"
 	}
 	if err := os.MkdirAll(*outputDir, 0755); err != nil {
 		log.Fatalf("create output dir: %v", err)
 	}
 	// List remote files.
 	fmt.Println("Pulling responses from remote...")
 	listCmd := exec.Command("ssh", *remoteHost, fmt.Sprintf("ls %s/%s", *remotePath, *pattern))
 	listOutput, err := listCmd.Output()
 	if err != nil {
 		log.Fatalf("list remote files: %v", err)
 	}
 	remoteFiles := strings.Split(strings.TrimSpace(string(listOutput)), "\n")
 	var validFiles []string
 	for _, f := range remoteFiles {
 		f = strings.TrimSpace(f)
 		if f != "" {
 			validFiles = append(validFiles, f)
 		}
 	}
 	fmt.Printf("  Found %d JSONL files on %s\n", len(validFiles), *remoteHost)
 	// Pull files.
 	for _, rf := range validFiles {
 		local := filepath.Join(*outputDir, filepath.Base(rf))
 		scpCmd := exec.Command("scp", fmt.Sprintf("%s:%s", *remoteHost, rf), local)
 		if err := scpCmd.Run(); err != nil {
 			log.Printf("warning: failed to pull %s: %v", rf, err)
 			continue
 		}
 		// Count lines.
 		f, err := os.Open(local)
 		if err != nil {
 			continue
 		}
 		lines := 0
 		scanner := bufio.NewScanner(f)
 		for scanner.Scan() {
 			lines++
 		}
 		f.Close()
 		fmt.Printf("  %s: %d records\n", filepath.Base(rf), lines)
 	}
 	// Merge and deduplicate on idx.
 	seen := make(map[int]json.RawMessage)
 	skipped := 0
 	matches, _ := filepath.Glob(filepath.Join(*outputDir, *pattern))
 	sort.Strings(matches)
 	for _, local := range matches {
 		f, err := os.Open(local)
 		if err != nil {
 			continue
 		}
 		scanner := bufio.NewScanner(f)
 		scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 		for scanner.Scan() {
 			line := scanner.Text()
 			var rec struct {
 				Idx *int `json:"idx"`
 			}
 			if err := json.Unmarshal([]byte(line), &rec); err != nil {
 				skipped++
 				continue
 			}
 			if rec.Idx == nil {
 				skipped++
 				continue
 			}
 			if _, exists := seen[*rec.Idx]; !exists {
 				seen[*rec.Idx] = json.RawMessage(line)
 			}
 		}
 		f.Close()
 	}
 	if skipped > 0 {
 		fmt.Printf("  Skipped %d records without idx\n", skipped)
 	}
 	// Sort by idx and write merged file.
 	if *merged == "" {
 		*merged = filepath.Join(*outputDir, "..", "gold-merged.jsonl")
 	}
 	idxs := make([]int, 0, len(seen))
 	for idx := range seen {
 		idxs = append(idxs, idx)
 	}
 	sort.Ints(idxs)
 	f, err := os.Create(*merged)
 	if err != nil {
 		log.Fatalf("create merged file: %v", err)
 	}
 	for _, idx := range idxs {
 		f.Write(seen[idx])
 		f.WriteString("\n")
 	}
 	f.Close()
 	fmt.Printf("\nMerged: %d unique examples → %s\n", len(seen), *merged)
 }
--- a/pkg/lem/coverage.go
+++ b/pkg/lem/coverage.go
@ -0,0 +1,135 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"strings"
 )
 // RunCoverage is the CLI entry point for the coverage command.
 // Analyzes seed coverage and shows underrepresented areas.
 func RunCoverage(args []string) {
 	fs := flag.NewFlagSet("coverage", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	var total int
 	if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&total); err != nil {
 		log.Fatalf("No seeds table. Run: lem import-all first")
 	}
 	fmt.Println("LEM Seed Coverage Analysis")
 	fmt.Println("==================================================")
 	fmt.Printf("\nTotal seeds: %d\n", total)
 	// Region distribution.
 	fmt.Println("\nRegion distribution (underrepresented first):")
 	rows, err := db.conn.Query(`
 		SELECT
 			CASE
 				WHEN region LIKE '%cn%' THEN 'cn (Chinese)'
 				WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en (English)'
 				WHEN region LIKE '%ru%' THEN 'ru (Russian)'
 				WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de (German)'
 				WHEN region LIKE '%es%' THEN 'es (Spanish)'
 				WHEN region LIKE '%fr%' THEN 'fr (French)'
 				WHEN region LIKE '%latam%' THEN 'latam (LatAm)'
 				WHEN region LIKE '%africa%' THEN 'africa'
 				WHEN region LIKE '%eu%' THEN 'eu (European)'
 				WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me (MidEast)'
 				WHEN region LIKE '%multi%' THEN 'multilingual'
 				WHEN region LIKE '%weak%' THEN 'weak-langs'
 				ELSE 'other'
 			END AS lang_group,
 			count(*) AS n,
 			count(DISTINCT domain) AS domains
 		FROM seeds GROUP BY lang_group ORDER BY n ASC
 	`)
 	if err != nil {
 		log.Fatalf("query regions: %v", err)
 	}
 	type regionRow struct {
 		group   string
 		n       int
 		domains int
 	}
 	var regionRows []regionRow
 	for rows.Next() {
 		var r regionRow
 		rows.Scan(&r.group, &r.n, &r.domains)
 		regionRows = append(regionRows, r)
 	}
 	rows.Close()
 	avg := float64(total) / float64(len(regionRows))
 	for _, r := range regionRows {
 		barLen := int(float64(r.n) / avg * 10)
 		if barLen > 40 {
 			barLen = 40
 		}
 		bar := strings.Repeat("#", barLen)
 		gap := ""
 		if float64(r.n) < avg*0.5 {
 			gap = "  <- UNDERREPRESENTED"
 		}
 		fmt.Printf("  %-22s %6d  (%4d domains)  %s%s\n", r.group, r.n, r.domains, bar, gap)
 	}
 	// Top 10 domains.
 	fmt.Println("\nTop 10 domains (most seeds):")
 	topRows, err := db.conn.Query(`
 		SELECT domain, count(*) AS n FROM seeds
 		WHERE domain != '' GROUP BY domain ORDER BY n DESC LIMIT 10
 	`)
 	if err == nil {
 		for topRows.Next() {
 			var domain string
 			var n int
 			topRows.Scan(&domain, &n)
 			fmt.Printf("  %-40s %5d\n", domain, n)
 		}
 		topRows.Close()
 	}
 	// Bottom 10 domains.
 	fmt.Println("\nBottom 10 domains (fewest seeds, min 5):")
 	bottomRows, err := db.conn.Query(`
 		SELECT domain, count(*) AS n FROM seeds
 		WHERE domain != '' GROUP BY domain HAVING count(*) >= 5 ORDER BY n ASC LIMIT 10
 	`)
 	if err == nil {
 		for bottomRows.Next() {
 			var domain string
 			var n int
 			bottomRows.Scan(&domain, &n)
 			fmt.Printf("  %-40s %5d\n", domain, n)
 		}
 		bottomRows.Close()
 	}
 	fmt.Println("\nSuggested expansion areas:")
 	fmt.Println("  - Japanese, Korean, Thai, Vietnamese (no seeds found)")
 	fmt.Println("  - Hindi/Urdu, Bengali, Tamil (South Asian)")
 	fmt.Println("  - Swahili, Yoruba, Amharic (Sub-Saharan Africa)")
 	fmt.Println("  - Indigenous languages (Quechua, Nahuatl, Aymara)")
 }
--- a/pkg/lem/expand_status.go
+++ b/pkg/lem/expand_status.go
@ -0,0 +1,103 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 )
 // RunExpandStatus is the CLI entry point for the expand-status command.
 // Shows the expansion pipeline progress from DuckDB.
 func RunExpandStatus(args []string) {
 	fs := flag.NewFlagSet("expand-status", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	fmt.Println("LEM Expansion Pipeline Status")
 	fmt.Println("==================================================")
 	// Expansion prompts.
 	var epTotal, epPending int
 	err = db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&epTotal)
 	if err != nil {
 		fmt.Println("  Expansion prompts:  not created (run: lem normalize)")
 		db.Close()
 		return
 	}
 	db.conn.QueryRow("SELECT count(*) FROM expansion_prompts WHERE status = 'pending'").Scan(&epPending)
 	fmt.Printf("  Expansion prompts:  %d total, %d pending\n", epTotal, epPending)
 	// Generated responses.
 	var generated int
 	err = db.conn.QueryRow("SELECT count(*) FROM expansion_raw").Scan(&generated)
 	if err != nil {
 		fmt.Println("  Generated:          0 (run: lem expand)")
 	} else {
 		rows, _ := db.conn.Query("SELECT model, count(*) FROM expansion_raw GROUP BY model")
 		if rows != nil {
 			var parts []string
 			for rows.Next() {
 				var model string
 				var n int
 				rows.Scan(&model, &n)
 				parts = append(parts, fmt.Sprintf("%s: %d", model, n))
 			}
 			rows.Close()
 			if len(parts) > 0 {
 				fmt.Printf("  Generated:          %d (%s)\n", generated, joinStrings(parts, ", "))
 			} else {
 				fmt.Printf("  Generated:          %d\n", generated)
 			}
 		}
 	}
 	// Scored.
 	var scored, hPassed, jScored, jPassed int
 	err = db.conn.QueryRow("SELECT count(*) FROM expansion_scores").Scan(&scored)
 	if err != nil {
 		fmt.Println("  Scored:             0 (run: lem score --tier 1)")
 	} else {
 		db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE heuristic_pass = true").Scan(&hPassed)
 		fmt.Printf("  Heuristic scored:   %d (%d passed)\n", scored, hPassed)
 		db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_average IS NOT NULL").Scan(&jScored)
 		db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_pass = true").Scan(&jPassed)
 		if jScored > 0 {
 			fmt.Printf("  Judge scored:       %d (%d passed)\n", jScored, jPassed)
 		}
 	}
 	// Pipeline progress.
 	if epTotal > 0 && generated > 0 {
 		genPct := float64(generated) / float64(epTotal) * 100
 		fmt.Printf("\n  Progress:           %.1f%% generated\n", genPct)
 	}
 	// Golden set context.
 	var golden int
 	err = db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&golden)
 	if err == nil {
 		fmt.Printf("\n  Golden set:         %d / %d\n", golden, targetTotal)
 		if generated > 0 {
 			fmt.Printf("  Combined:           %d total examples\n", golden+generated)
 		}
 	}
 }
--- a/pkg/lem/import.go
+++ b/pkg/lem/import.go
@ -0,0 +1,453 @@
 package lem
 import (
 	"bufio"
 	"encoding/json"
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 )
 // RunImport is the CLI entry point for the import-all command.
 // Imports ALL LEM data into DuckDB: prompts, Gemini responses, golden set,
 // training examples, benchmarks, validations, and seeds.
 func RunImport(args []string) {
 	fs := flag.NewFlagSet("import-all", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	skipM3 := fs.Bool("skip-m3", false, "Skip pulling data from M3")
 	dataDir := fs.String("data-dir", "", "Local data directory (defaults to db directory)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	if *dataDir == "" {
 		*dataDir = filepath.Dir(*dbPath)
 	}
 	db, err := OpenDBReadWrite(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	totals := make(map[string]int)
 	// ── 1. Golden set ──
 	goldenPath := filepath.Join(*dataDir, "gold-15k.jsonl")
 	if !*skipM3 {
 		fmt.Println("  Pulling golden set from M3...")
 		scpCmd := exec.Command("scp", "m3:/Volumes/Data/lem/responses/gold-15k.jsonl", goldenPath)
 		if err := scpCmd.Run(); err != nil {
 			log.Printf("  WARNING: could not pull golden set from M3: %v", err)
 		}
 	}
 	if _, err := os.Stat(goldenPath); err == nil {
 		db.conn.Exec("DROP TABLE IF EXISTS golden_set")
 		_, err := db.conn.Exec(fmt.Sprintf(`
 			CREATE TABLE golden_set AS
 			SELECT
 				idx::INT AS idx,
 				seed_id::VARCHAR AS seed_id,
 				domain::VARCHAR AS domain,
 				voice::VARCHAR AS voice,
 				prompt::VARCHAR AS prompt,
 				response::VARCHAR AS response,
 				gen_time::DOUBLE AS gen_time,
 				length(response)::INT AS char_count,
 				length(response) - length(replace(response, ' ', '')) + 1 AS word_count
 			FROM read_json_auto('%s', maximum_object_size=1048576)
 		`, escapeSQLPath(goldenPath)))
 		if err != nil {
 			log.Printf("  WARNING: golden set import failed: %v", err)
 		} else {
 			var n int
 			db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&n)
 			totals["golden_set"] = n
 			fmt.Printf("  golden_set: %d rows\n", n)
 		}
 	}
 	// ── 2. Training examples ──
 	trainingDirs := []struct {
 		name  string
 		files []string
 	}{
 		{"training", []string{"training/train.jsonl", "training/valid.jsonl", "training/test.jsonl"}},
 		{"training-2k", []string{"training-2k/train.jsonl", "training-2k/valid.jsonl", "training-2k/test.jsonl"}},
 		{"training-expanded", []string{"training-expanded/train.jsonl", "training-expanded/valid.jsonl"}},
 		{"training-book", []string{"training-book/train.jsonl", "training-book/valid.jsonl", "training-book/test.jsonl"}},
 		{"training-conv", []string{"training-conv/train.jsonl", "training-conv/valid.jsonl", "training-conv/test.jsonl"}},
 		{"gold-full", []string{"gold-full/train.jsonl", "gold-full/valid.jsonl"}},
 		{"sovereignty-gold", []string{"sovereignty-gold/train.jsonl", "sovereignty-gold/valid.jsonl"}},
 		{"composure-lessons", []string{"composure-lessons/train.jsonl", "composure-lessons/valid.jsonl"}},
 		{"watts-full", []string{"watts-full/train.jsonl", "watts-full/valid.jsonl"}},
 		{"watts-expanded", []string{"watts-expanded/train.jsonl", "watts-expanded/valid.jsonl"}},
 		{"watts-composure", []string{"watts-composure-merged/train.jsonl", "watts-composure-merged/valid.jsonl"}},
 		{"western-fresh", []string{"western-fresh/train.jsonl", "western-fresh/valid.jsonl"}},
 		{"deepseek-soak", []string{"deepseek-western-soak/train.jsonl", "deepseek-western-soak/valid.jsonl"}},
 		{"russian-bridge", []string{"russian-bridge/train.jsonl", "russian-bridge/valid.jsonl"}},
 	}
 	trainingLocal := filepath.Join(*dataDir, "training")
 	os.MkdirAll(trainingLocal, 0755)
 	if !*skipM3 {
 		fmt.Println("  Pulling training sets from M3...")
 		for _, td := range trainingDirs {
 			for _, rel := range td.files {
 				local := filepath.Join(trainingLocal, rel)
 				os.MkdirAll(filepath.Dir(local), 0755)
 				scpCmd := exec.Command("scp", fmt.Sprintf("m3:/Volumes/Data/lem/%s", rel), local)
 				scpCmd.Run() // ignore errors, file might not exist
 			}
 		}
 	}
 	db.conn.Exec("DROP TABLE IF EXISTS training_examples")
 	db.conn.Exec(`
 		CREATE TABLE training_examples (
 			source VARCHAR,
 			split VARCHAR,
 			prompt TEXT,
 			response TEXT,
 			num_turns INT,
 			full_messages TEXT,
 			char_count INT
 		)
 	`)
 	trainingTotal := 0
 	for _, td := range trainingDirs {
 		for _, rel := range td.files {
 			local := filepath.Join(trainingLocal, rel)
 			if _, err := os.Stat(local); os.IsNotExist(err) {
 				continue
 			}
 			split := "train"
 			if strings.Contains(rel, "valid") {
 				split = "valid"
 			} else if strings.Contains(rel, "test") {
 				split = "test"
 			}
 			n := importTrainingFile(db, local, td.name, split)
 			trainingTotal += n
 		}
 	}
 	totals["training_examples"] = trainingTotal
 	fmt.Printf("  training_examples: %d rows\n", trainingTotal)
 	// ── 3. Benchmark results ──
 	benchLocal := filepath.Join(*dataDir, "benchmarks")
 	os.MkdirAll(benchLocal, 0755)
 	if !*skipM3 {
 		fmt.Println("  Pulling benchmarks from M3...")
 		for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
 			scpCmd := exec.Command("scp",
 				fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s.jsonl", bname),
 				filepath.Join(benchLocal, bname+".jsonl"))
 			scpCmd.Run()
 		}
 		for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
 			localSub := filepath.Join(benchLocal, subdir)
 			os.MkdirAll(localSub, 0755)
 			scpCmd := exec.Command("scp", "-r",
 				fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s/", subdir),
 				filepath.Join(benchLocal)+"/")
 			scpCmd.Run()
 		}
 	}
 	db.conn.Exec("DROP TABLE IF EXISTS benchmark_results")
 	db.conn.Exec(`
 		CREATE TABLE benchmark_results (
 			source VARCHAR, id VARCHAR, benchmark VARCHAR, model VARCHAR,
 			prompt TEXT, response TEXT, elapsed_seconds DOUBLE, domain VARCHAR
 		)
 	`)
 	benchTotal := 0
 	for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
 		resultDir := filepath.Join(benchLocal, subdir)
 		matches, _ := filepath.Glob(filepath.Join(resultDir, "*.jsonl"))
 		for _, jf := range matches {
 			n := importBenchmarkFile(db, jf, subdir)
 			benchTotal += n
 		}
 	}
 	// Also import standalone benchmark files.
 	for _, bfile := range []string{"lem_bench", "lem_ethics", "lem_ethics_allen", "instruction_tuned", "abliterated", "base_pt"} {
 		local := filepath.Join(benchLocal, bfile+".jsonl")
 		if _, err := os.Stat(local); os.IsNotExist(err) {
 			if !*skipM3 {
 				scpCmd := exec.Command("scp",
 					fmt.Sprintf("m3:/Volumes/Data/lem/benchmark/%s.jsonl", bfile), local)
 				scpCmd.Run()
 			}
 		}
 		if _, err := os.Stat(local); err == nil {
 			n := importBenchmarkFile(db, local, "benchmark")
 			benchTotal += n
 		}
 	}
 	totals["benchmark_results"] = benchTotal
 	fmt.Printf("  benchmark_results: %d rows\n", benchTotal)
 	// ── 4. Benchmark questions ──
 	db.conn.Exec("DROP TABLE IF EXISTS benchmark_questions")
 	db.conn.Exec(`
 		CREATE TABLE benchmark_questions (
 			benchmark VARCHAR, id VARCHAR, question TEXT,
 			best_answer TEXT, correct_answers TEXT, incorrect_answers TEXT, category VARCHAR
 		)
 	`)
 	benchQTotal := 0
 	for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
 		local := filepath.Join(benchLocal, bname+".jsonl")
 		if _, err := os.Stat(local); err == nil {
 			n := importBenchmarkQuestions(db, local, bname)
 			benchQTotal += n
 		}
 	}
 	totals["benchmark_questions"] = benchQTotal
 	fmt.Printf("  benchmark_questions: %d rows\n", benchQTotal)
 	// ── 5. Seeds ──
 	db.conn.Exec("DROP TABLE IF EXISTS seeds")
 	db.conn.Exec(`
 		CREATE TABLE seeds (
 			source_file VARCHAR, region VARCHAR, seed_id VARCHAR, domain VARCHAR, prompt TEXT
 		)
 	`)
 	seedTotal := 0
 	seedDirs := []string{filepath.Join(*dataDir, "seeds"), "/tmp/lem-data/seeds", "/tmp/lem-repo/seeds"}
 	for _, seedDir := range seedDirs {
 		if _, err := os.Stat(seedDir); os.IsNotExist(err) {
 			continue
 		}
 		n := importSeeds(db, seedDir)
 		seedTotal += n
 	}
 	totals["seeds"] = seedTotal
 	fmt.Printf("  seeds: %d rows\n", seedTotal)
 	// ── Summary ──
 	grandTotal := 0
 	fmt.Printf("\n%s\n", strings.Repeat("=", 50))
 	fmt.Println("LEM Database Import Complete")
 	fmt.Println(strings.Repeat("=", 50))
 	for table, count := range totals {
 		fmt.Printf("  %-25s %8d\n", table, count)
 		grandTotal += count
 	}
 	fmt.Printf("  %s\n", strings.Repeat("─", 35))
 	fmt.Printf("  %-25s %8d\n", "TOTAL", grandTotal)
 	fmt.Printf("\nDatabase: %s\n", *dbPath)
 }
 func importTrainingFile(db *DB, path, source, split string) int {
 	f, err := os.Open(path)
 	if err != nil {
 		return 0
 	}
 	defer f.Close()
 	count := 0
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 	for scanner.Scan() {
 		var rec struct {
 			Messages []ChatMessage `json:"messages"`
 		}
 		if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
 			continue
 		}
 		prompt := ""
 		response := ""
 		assistantCount := 0
 		for _, m := range rec.Messages {
 			if m.Role == "user" && prompt == "" {
 				prompt = m.Content
 			}
 			if m.Role == "assistant" {
 				if response == "" {
 					response = m.Content
 				}
 				assistantCount++
 			}
 		}
 		msgsJSON, _ := json.Marshal(rec.Messages)
 		db.conn.Exec(`INSERT INTO training_examples VALUES (?, ?, ?, ?, ?, ?, ?)`,
 			source, split, prompt, response, assistantCount, string(msgsJSON), len(response))
 		count++
 	}
 	return count
 }
 func importBenchmarkFile(db *DB, path, source string) int {
 	f, err := os.Open(path)
 	if err != nil {
 		return 0
 	}
 	defer f.Close()
 	count := 0
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 	for scanner.Scan() {
 		var rec map[string]interface{}
 		if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
 			continue
 		}
 		db.conn.Exec(`INSERT INTO benchmark_results VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
 			source,
 			fmt.Sprintf("%v", rec["id"]),
 			strOrEmpty(rec, "benchmark"),
 			strOrEmpty(rec, "model"),
 			strOrEmpty(rec, "prompt"),
 			strOrEmpty(rec, "response"),
 			floatOrZero(rec, "elapsed_seconds"),
 			strOrEmpty(rec, "domain"),
 		)
 		count++
 	}
 	return count
 }
 func importBenchmarkQuestions(db *DB, path, benchmark string) int {
 	f, err := os.Open(path)
 	if err != nil {
 		return 0
 	}
 	defer f.Close()
 	count := 0
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
 	for scanner.Scan() {
 		var rec map[string]interface{}
 		if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
 			continue
 		}
 		correctJSON, _ := json.Marshal(rec["correct_answers"])
 		incorrectJSON, _ := json.Marshal(rec["incorrect_answers"])
 		db.conn.Exec(`INSERT INTO benchmark_questions VALUES (?, ?, ?, ?, ?, ?, ?)`,
 			benchmark,
 			fmt.Sprintf("%v", rec["id"]),
 			strOrEmpty(rec, "question"),
 			strOrEmpty(rec, "best_answer"),
 			string(correctJSON),
 			string(incorrectJSON),
 			strOrEmpty(rec, "category"),
 		)
 		count++
 	}
 	return count
 }
 func importSeeds(db *DB, seedDir string) int {
 	count := 0
 	filepath.Walk(seedDir, func(path string, info os.FileInfo, err error) error {
 		if err != nil || info.IsDir() || !strings.HasSuffix(path, ".json") {
 			return nil
 		}
 		data, err := os.ReadFile(path)
 		if err != nil {
 			return nil
 		}
 		rel, _ := filepath.Rel(seedDir, path)
 		region := strings.TrimSuffix(filepath.Base(path), ".json")
 		// Try parsing as array or object with prompts/seeds field.
 		var seedsList []interface{}
 		var raw interface{}
 		if err := json.Unmarshal(data, &raw); err != nil {
 			return nil
 		}
 		switch v := raw.(type) {
 		case []interface{}:
 			seedsList = v
 		case map[string]interface{}:
 			if prompts, ok := v["prompts"].([]interface{}); ok {
 				seedsList = prompts
 			} else if seeds, ok := v["seeds"].([]interface{}); ok {
 				seedsList = seeds
 			}
 		}
 		for _, s := range seedsList {
 			switch seed := s.(type) {
 			case map[string]interface{}:
 				prompt := strOrEmpty(seed, "prompt")
 				if prompt == "" {
 					prompt = strOrEmpty(seed, "text")
 				}
 				if prompt == "" {
 					prompt = strOrEmpty(seed, "question")
 				}
 				db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
 					rel, region,
 					strOrEmpty(seed, "seed_id"),
 					strOrEmpty(seed, "domain"),
 					prompt,
 				)
 				count++
 			case string:
 				db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
 					rel, region, "", "", seed)
 				count++
 			}
 		}
 		return nil
 	})
 	return count
 }
 func strOrEmpty(m map[string]interface{}, key string) string {
 	if v, ok := m[key]; ok {
 		return fmt.Sprintf("%v", v)
 	}
 	return ""
 }
 func floatOrZero(m map[string]interface{}, key string) float64 {
 	if v, ok := m[key]; ok {
 		if f, ok := v.(float64); ok {
 			return f
 		}
 	}
 	return 0
 }
 func escapeSQLPath(p string) string {
 	return strings.ReplaceAll(p, "'", "''")
 }
--- a/pkg/lem/inventory.go
+++ b/pkg/lem/inventory.go
@ -0,0 +1,97 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 )
 // RunInventory is the CLI entry point for the inventory command.
 // Shows row counts and summary stats for all tables in the DuckDB database.
 func RunInventory(args []string) {
 	fs := flag.NewFlagSet("inventory", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	counts, err := db.TableCounts()
 	if err != nil {
 		log.Fatalf("table counts: %v", err)
 	}
 	fmt.Printf("LEM Database Inventory (%s)\n", *dbPath)
 	fmt.Println("============================================================")
 	grandTotal := 0
 	for table, count := range counts {
 		detail := ""
 		switch table {
 		case "golden_set":
 			pct := float64(count) / float64(targetTotal) * 100
 			detail = fmt.Sprintf("  (%.1f%% of %d target)", pct, targetTotal)
 		case "training_examples":
 			var sources int
 			db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM training_examples").Scan(&sources)
 			detail = fmt.Sprintf("  (%d sources)", sources)
 		case "prompts":
 			var domains, voices int
 			db.conn.QueryRow("SELECT COUNT(DISTINCT domain) FROM prompts").Scan(&domains)
 			db.conn.QueryRow("SELECT COUNT(DISTINCT voice) FROM prompts").Scan(&voices)
 			detail = fmt.Sprintf("  (%d domains, %d voices)", domains, voices)
 		case "gemini_responses":
 			rows, _ := db.conn.Query("SELECT source_model, count(*) FROM gemini_responses GROUP BY source_model")
 			if rows != nil {
 				var parts []string
 				for rows.Next() {
 					var model string
 					var n int
 					rows.Scan(&model, &n)
 					parts = append(parts, fmt.Sprintf("%s: %d", model, n))
 				}
 				rows.Close()
 				if len(parts) > 0 {
 					detail = fmt.Sprintf("  (%s)", joinStrings(parts, ", "))
 				}
 			}
 		case "benchmark_results":
 			var sources int
 			db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM benchmark_results").Scan(&sources)
 			detail = fmt.Sprintf("  (%d categories)", sources)
 		}
 		fmt.Printf("  %-25s %8d%s\n", table, count, detail)
 		grandTotal += count
 	}
 	fmt.Printf("  %-25s\n", "────────────────────────────────────────")
 	fmt.Printf("  %-25s %8d\n", "TOTAL", grandTotal)
 }
 func joinStrings(parts []string, sep string) string {
 	result := ""
 	for i, p := range parts {
 		if i > 0 {
 			result += sep
 		}
 		result += p
 	}
 	return result
 }
--- a/pkg/lem/normalize.go
+++ b/pkg/lem/normalize.go
@ -0,0 +1,148 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 )
 // RunNormalize is the CLI entry point for the normalize command.
 // Normalizes seeds into the expansion_prompts table, deduplicating against
 // the golden set and existing prompts. Assigns priority based on domain
 // coverage (underrepresented domains first).
 func RunNormalize(args []string) {
 	fs := flag.NewFlagSet("normalize", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	minLen := fs.Int("min-length", 50, "Minimum prompt length in characters")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDBReadWrite(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	// Check source tables.
 	var seedCount int
 	if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&seedCount); err != nil {
 		log.Fatalf("No seeds table. Run: lem import-all first")
 	}
 	fmt.Printf("Seeds table: %d rows\n", seedCount)
 	// Drop and recreate expansion_prompts.
 	_, err = db.conn.Exec("DROP TABLE IF EXISTS expansion_prompts")
 	if err != nil {
 		log.Fatalf("drop expansion_prompts: %v", err)
 	}
 	// Deduplicate: remove seeds whose prompt already appears in prompts or golden_set.
 	_, err = db.conn.Exec(fmt.Sprintf(`
 		CREATE TABLE expansion_prompts AS
 		WITH unique_seeds AS (
 			SELECT
 				ROW_NUMBER() OVER (ORDER BY region, domain, seed_id) AS idx,
 				seed_id,
 				region,
 				domain,
 				prompt
 			FROM (
 				SELECT DISTINCT ON (prompt)
 					seed_id, region, domain, prompt
 				FROM seeds
 				WHERE length(prompt) >= %d
 				ORDER BY prompt, seed_id
 			)
 		),
 		existing_prompts AS (
 			SELECT prompt FROM prompts
 			UNION ALL
 			SELECT prompt FROM golden_set
 		)
 		SELECT
 			us.idx,
 			us.seed_id,
 			us.region,
 			us.domain,
 			'en' AS language,
 			us.prompt,
 			'' AS prompt_en,
 			0 AS priority,
 			'pending' AS status
 		FROM unique_seeds us
 		WHERE NOT EXISTS (
 			SELECT 1 FROM existing_prompts ep
 			WHERE ep.prompt = us.prompt
 		)
 	`, *minLen))
 	if err != nil {
 		log.Fatalf("create expansion_prompts: %v", err)
 	}
 	var total, domains, regions int
 	db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&total)
 	db.conn.QueryRow("SELECT count(DISTINCT domain) FROM expansion_prompts").Scan(&domains)
 	db.conn.QueryRow("SELECT count(DISTINCT region) FROM expansion_prompts").Scan(&regions)
 	// Assign priority based on domain coverage.
 	_, err = db.conn.Exec(`
 		UPDATE expansion_prompts SET priority = (
 			SELECT RANK() OVER (ORDER BY cnt ASC)
 			FROM (
 				SELECT domain, count(*) AS cnt
 				FROM expansion_prompts GROUP BY domain
 			) domain_counts
 			WHERE domain_counts.domain = expansion_prompts.domain
 		)
 	`)
 	if err != nil {
 		log.Printf("warning: priority assignment failed: %v", err)
 	}
 	fmt.Printf("\nExpansion Prompts: %d\n", total)
 	fmt.Printf("  Domains: %d\n", domains)
 	fmt.Printf("  Regions: %d\n", regions)
 	// Show region distribution.
 	fmt.Println("\n  By region group:")
 	rows, err := db.conn.Query(`
 		SELECT
 			CASE
 				WHEN region LIKE '%cn%' THEN 'cn'
 				WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en'
 				WHEN region LIKE '%ru%' THEN 'ru'
 				WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de'
 				WHEN region LIKE '%es%' THEN 'es'
 				WHEN region LIKE '%fr%' THEN 'fr'
 				WHEN region LIKE '%latam%' THEN 'latam'
 				WHEN region LIKE '%africa%' THEN 'africa'
 				WHEN region LIKE '%eu%' THEN 'eu'
 				WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me'
 				ELSE 'other'
 			END AS lang_group,
 			count(*) AS n
 		FROM expansion_prompts GROUP BY lang_group ORDER BY n DESC
 	`)
 	if err == nil {
 		for rows.Next() {
 			var group string
 			var n int
 			rows.Scan(&group, &n)
 			fmt.Printf("    %-15s %6d\n", group, n)
 		}
 		rows.Close()
 	}
 	fmt.Printf("\nNormalization complete: %d expansion prompts from %d seeds\n", total, seedCount)
 }
--- a/pkg/lem/query.go
+++ b/pkg/lem/query.go
@ -0,0 +1,152 @@
 package lem
 import (
 	"encoding/json"
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"strings"
 )
 // RunQuery is the CLI entry point for the query command.
 // Runs ad-hoc SQL against the DuckDB database.
 func RunQuery(args []string) {
 	fs := flag.NewFlagSet("query", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	jsonOutput := fs.Bool("json", false, "Output as JSON instead of table")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	sql := strings.Join(fs.Args(), " ")
 	if sql == "" {
 		fmt.Fprintln(os.Stderr, "error: SQL query required as positional argument")
 		fmt.Fprintln(os.Stderr, "  lem query --db path.duckdb \"SELECT * FROM golden_set LIMIT 5\"")
 		fmt.Fprintln(os.Stderr, "  lem query --db path.duckdb \"domain = 'ethics'\"  (auto-wraps as WHERE clause)")
 		os.Exit(1)
 	}
 	// Auto-wrap non-SELECT queries as WHERE clauses.
 	trimmed := strings.TrimSpace(strings.ToUpper(sql))
 	if !strings.HasPrefix(trimmed, "SELECT") && !strings.HasPrefix(trimmed, "SHOW") &&
 		!strings.HasPrefix(trimmed, "DESCRIBE") && !strings.HasPrefix(trimmed, "EXPLAIN") {
 		sql = "SELECT * FROM golden_set WHERE " + sql + " LIMIT 20"
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	rows, err := db.conn.Query(sql)
 	if err != nil {
 		log.Fatalf("query: %v", err)
 	}
 	defer rows.Close()
 	cols, err := rows.Columns()
 	if err != nil {
 		log.Fatalf("columns: %v", err)
 	}
 	var results []map[string]interface{}
 	for rows.Next() {
 		values := make([]interface{}, len(cols))
 		ptrs := make([]interface{}, len(cols))
 		for i := range values {
 			ptrs[i] = &values[i]
 		}
 		if err := rows.Scan(ptrs...); err != nil {
 			log.Fatalf("scan: %v", err)
 		}
 		row := make(map[string]interface{})
 		for i, col := range cols {
 			v := values[i]
 			// Convert []byte to string for readability.
 			if b, ok := v.([]byte); ok {
 				v = string(b)
 			}
 			row[col] = v
 		}
 		results = append(results, row)
 	}
 	if *jsonOutput {
 		enc := json.NewEncoder(os.Stdout)
 		enc.SetIndent("", "  ")
 		enc.Encode(results)
 		return
 	}
 	// Table output.
 	if len(results) == 0 {
 		fmt.Println("(no results)")
 		return
 	}
 	// Calculate column widths.
 	widths := make(map[string]int)
 	for _, col := range cols {
 		widths[col] = len(col)
 	}
 	for _, row := range results {
 		for _, col := range cols {
 			s := fmt.Sprintf("%v", row[col])
 			if len(s) > 60 {
 				s = s[:57] + "..."
 			}
 			if len(s) > widths[col] {
 				widths[col] = len(s)
 			}
 		}
 	}
 	// Print header.
 	for i, col := range cols {
 		if i > 0 {
 			fmt.Print("  ")
 		}
 		fmt.Printf("%-*s", widths[col], col)
 	}
 	fmt.Println()
 	// Print separator.
 	for i, col := range cols {
 		if i > 0 {
 			fmt.Print("  ")
 		}
 		fmt.Print(strings.Repeat("─", widths[col]))
 	}
 	fmt.Println()
 	// Print rows.
 	for _, row := range results {
 		for i, col := range cols {
 			if i > 0 {
 				fmt.Print("  ")
 			}
 			s := fmt.Sprintf("%v", row[col])
 			if len(s) > 60 {
 				s = s[:57] + "..."
 			}
 			fmt.Printf("%-*s", widths[col], s)
 		}
 		fmt.Println()
 	}
 	fmt.Printf("\n(%d rows)\n", len(results))
 }
--- a/pkg/lem/seed_influx.go
+++ b/pkg/lem/seed_influx.go
@ -0,0 +1,111 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"strings"
 )
 // RunSeedInflux is the CLI entry point for the seed-influx command.
 // Seeds InfluxDB golden_gen measurement from DuckDB golden_set data.
 // One-time migration tool for bootstrapping InfluxDB from existing data.
 func RunSeedInflux(args []string) {
 	fs := flag.NewFlagSet("seed-influx", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	influxURL := fs.String("influx", "", "InfluxDB URL")
 	influxDB := fs.String("influx-db", "", "InfluxDB database name")
 	force := fs.Bool("force", false, "Re-seed even if InfluxDB already has data")
 	batchSize := fs.Int("batch-size", 500, "Lines per InfluxDB write batch")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDB(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	var total int
 	if err := db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&total); err != nil {
 		log.Fatalf("No golden_set table. Run ingest first.")
 	}
 	influx := NewInfluxClient(*influxURL, *influxDB)
 	// Check existing count in InfluxDB.
 	existing := 0
 	rows, err := influx.QuerySQL("SELECT count(DISTINCT i) AS n FROM gold_gen")
 	if err == nil && len(rows) > 0 {
 		if n, ok := rows[0]["n"].(float64); ok {
 			existing = int(n)
 		}
 	}
 	fmt.Printf("DuckDB has %d records, InfluxDB golden_gen has %d\n", total, existing)
 	if existing >= total && !*force {
 		fmt.Println("InfluxDB already has all records. Use --force to re-seed.")
 		return
 	}
 	// Read all rows.
 	dbRows, err := db.conn.Query(`
 		SELECT idx, seed_id, domain, voice, gen_time, char_count
 		FROM golden_set ORDER BY idx
 	`)
 	if err != nil {
 		log.Fatalf("query golden_set: %v", err)
 	}
 	defer dbRows.Close()
 	var lines []string
 	written := 0
 	for dbRows.Next() {
 		var idx, charCount int
 		var seedID, domain, voice string
 		var genTime float64
 		if err := dbRows.Scan(&idx, &seedID, &domain, &voice, &genTime, &charCount); err != nil {
 			log.Fatalf("scan: %v", err)
 		}
 		sid := strings.ReplaceAll(seedID, `"`, `\"`)
 		lp := fmt.Sprintf(`gold_gen,i=%d,w=migration,d=%s,v=%s seed_id="%s",gen_time=%.1f,chars=%di`,
 			idx, escapeLp(domain), escapeLp(voice), sid, genTime, charCount)
 		lines = append(lines, lp)
 		if len(lines) >= *batchSize {
 			if err := influx.WriteLp(lines); err != nil {
 				log.Fatalf("write batch at %d: %v", written, err)
 			}
 			written += len(lines)
 			lines = lines[:0]
 			if written%2000 == 0 {
 				fmt.Printf("  Seeded %d/%d records\n", written, total)
 			}
 		}
 	}
 	if len(lines) > 0 {
 		if err := influx.WriteLp(lines); err != nil {
 			log.Fatalf("flush: %v", err)
 		}
 		written += len(lines)
 	}
 	fmt.Printf("Seeded %d golden_gen records into InfluxDB\n", written)
 }
--- a/pkg/lem/tier_score.go
+++ b/pkg/lem/tier_score.go
@ -0,0 +1,225 @@
 package lem
 import (
 	"flag"
 	"fmt"
 	"log"
 	"os"
 	"strings"
 )
 // RunTierScore is the CLI entry point for the tier-score command.
 // Scores expansion responses using tiered quality assessment:
 //   - Tier 1: Heuristic regex scoring (fast, no API)
 //   - Tier 2: LEM self-judge (requires trained model)
 //   - Tier 3: External judge (reserved for borderline cases)
 func RunTierScore(args []string) {
 	fs := flag.NewFlagSet("tier-score", flag.ExitOnError)
 	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
 	tier := fs.Int("tier", 1, "Scoring tier: 1=heuristic, 2=LEM judge, 3=external")
 	limit := fs.Int("limit", 0, "Max items to score (0=all)")
 	if err := fs.Parse(args); err != nil {
 		log.Fatalf("parse flags: %v", err)
 	}
 	if *dbPath == "" {
 		*dbPath = os.Getenv("LEM_DB")
 	}
 	if *dbPath == "" {
 		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
 		os.Exit(1)
 	}
 	db, err := OpenDBReadWrite(*dbPath)
 	if err != nil {
 		log.Fatalf("open db: %v", err)
 	}
 	defer db.Close()
 	// Ensure expansion_scores table exists.
 	db.conn.Exec(`
 		CREATE TABLE IF NOT EXISTS expansion_scores (
 			idx INT,
 			heuristic_score DOUBLE,
 			heuristic_pass BOOLEAN,
 			judge_sovereignty DOUBLE,
 			judge_ethical_depth DOUBLE,
 			judge_creative DOUBLE,
 			judge_self_concept DOUBLE,
 			judge_average DOUBLE,
 			judge_pass BOOLEAN,
 			judge_model VARCHAR,
 			scored_at TIMESTAMP
 		)
 	`)
 	if *tier >= 1 {
 		runHeuristicTier(db, *limit)
 	}
 	if *tier >= 2 {
 		fmt.Println("\nTier 2 (LEM judge): not yet available — needs trained LEM-27B model")
 		fmt.Println("  Will score: sovereignty, ethical_depth, creative, self_concept (1-10 each)")
 	}
 	if *tier >= 3 {
 		fmt.Println("\nTier 3 (External judge): reserved for borderline cases")
 	}
 }
 func runHeuristicTier(db *DB, limit int) {
 	// Find unscored responses.
 	query := `
 		SELECT r.idx, r.response FROM expansion_raw r
 		LEFT JOIN expansion_scores s ON r.idx = s.idx
 		WHERE s.idx IS NULL
 		ORDER BY r.idx
 	`
 	if limit > 0 {
 		query += fmt.Sprintf(" LIMIT %d", limit)
 	}
 	rows, err := db.conn.Query(query)
 	if err != nil {
 		log.Fatalf("query unscored: %v", err)
 	}
 	defer rows.Close()
 	type unscoredRow struct {
 		idx      int
 		response string
 	}
 	var unscored []unscoredRow
 	for rows.Next() {
 		var r unscoredRow
 		rows.Scan(&r.idx, &r.response)
 		unscored = append(unscored, r)
 	}
 	if len(unscored) == 0 {
 		fmt.Println("Tier 1 (heuristic): all responses already scored")
 		return
 	}
 	fmt.Printf("Tier 1 (heuristic): scoring %d responses...\n", len(unscored))
 	passed := 0
 	for _, r := range unscored {
 		score, _ := heuristicExpansionScore(r.response)
 		isPass := score > 0
 		if isPass {
 			passed++
 		}
 		db.conn.Exec(`
 			INSERT INTO expansion_scores (idx, heuristic_score, heuristic_pass, scored_at)
 			VALUES (?, ?, ?, current_timestamp)
 		`, r.idx, score, isPass)
 	}
 	fmt.Printf("  Scored: %d, Passed: %d, Failed: %d\n", len(unscored), passed, len(unscored)-passed)
 	if len(unscored) > 0 {
 		fmt.Printf("  Pass rate: %.1f%%\n", float64(passed)/float64(len(unscored))*100)
 	}
 }
 // heuristicExpansionScore applies fast heuristic scoring to an expansion response.
 // Returns (score, details). Positive = good, negative = bad.
 func heuristicExpansionScore(response string) (float64, map[string]interface{}) {
 	details := make(map[string]interface{})
 	if response == "" || len(response) < 30 {
 		return -20.0, map[string]interface{}{"reason": "empty_or_broken"}
 	}
 	score := 0.0
 	// Compliance markers (negative).
 	compliancePhrases := []string{
 		"as an ai", "i cannot", "i can't", "i'm not able",
 		"important to note", "it's crucial to", "i must emphasize",
 		"i should point out", "as a language model",
 	}
 	lower := strings.ToLower(response)
 	complianceCount := 0
 	for _, p := range compliancePhrases {
 		if strings.Contains(lower, p) {
 			complianceCount++
 		}
 	}
 	if complianceCount > 0 {
 		score -= float64(complianceCount) * 5
 		details["compliance_markers"] = complianceCount
 	}
 	// Formulaic preamble (negative).
 	trimmed := strings.TrimSpace(lower)
 	if strings.HasPrefix(trimmed, "okay, let") || strings.HasPrefix(trimmed, "ok, let") || strings.HasPrefix(trimmed, "sure, let") {
 		score -= 3
 		details["formulaic_preamble"] = true
 	}
 	// Degeneration check (repetitive output).
 	words := strings.Fields(response)
 	if len(words) > 20 {
 		chunks := make([]string, 0, len(words)/5)
 		for i := 0; i+5 <= len(words); i += 5 {
 			chunks = append(chunks, strings.Join(words[i:i+5], " "))
 		}
 		if len(chunks) > 0 {
 			unique := make(map[string]bool)
 			for _, c := range chunks {
 				unique[c] = true
 			}
 			ratio := float64(len(unique)) / float64(len(chunks))
 			if ratio < 0.5 {
 				score -= 10
 				details["degeneration"] = true
 			}
 		}
 	}
 	// Engagement depth (positive).
 	wordCount := len(words)
 	if wordCount > 100 {
 		score += 2
 	}
 	if wordCount > 300 {
 		score += 2
 	}
 	details["word_count"] = wordCount
 	// Structure (positive).
 	if strings.Contains(response, "\n\n") || strings.Contains(response, "**") ||
 		strings.Contains(response, "1.") || strings.Contains(response, "- ") {
 		score += 1
 		details["structured"] = true
 	}
 	// Creative expression (positive).
 	creativeMarkers := []string{"metaphor", "imagine", "picture this", "story", "once upon"}
 	for _, m := range creativeMarkers {
 		if strings.Contains(lower, m) {
 			score += 2
 			details["creative"] = true
 			break
 		}
 	}
 	// First-person engagement (positive).
 	fpMarkers := []string{"i think", "i believe", "in my view", "i'd argue"}
 	fpCount := 0
 	for _, m := range fpMarkers {
 		if strings.Contains(lower, m) {
 			fpCount++
 		}
 	}
 	if fpCount > 0 {
 		score += float64(fpCount) * 1.5
 		details["first_person"] = fpCount
 	}
 	return score, details
 }