feat: convert all pipeline.py commands to Go

Complete conversion of pipeline.py into Go `lem` CLI:
- import-all: bulk import all LEM data into DuckDB from M3
- consolidate: pull worker JSONLs, merge, deduplicate
- normalize: seeds → deduplicated expansion_prompts table
- approve: filter scored expansions → training JSONL
- tier-score: heuristic/judge tiered expansion scoring
- expand-status: expansion pipeline progress from DuckDB
- inventory: DuckDB table counts and summary
- coverage: seed coverage gap analysis
- seed-influx: bootstrap InfluxDB from DuckDB golden_gen
- query: ad-hoc SQL against DuckDB

22 commands total, 49 Go files. Replaces entire pipeline.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude 2026-02-15 17:12:03 +00:00
parent 4eaf1bfb39
commit 91ee389377
No known key found for this signature in database
GPG key ID: AF404715446AEB41
11 changed files with 1714 additions and 13 deletions

66
main.go
View file

@ -12,19 +12,39 @@ import (
const usage = `Usage: lem <command> [flags]
Commands:
score Score existing response files
probe Generate responses and score them
compare Compare two score files
status Show training and generation progress (InfluxDB + DuckDB)
export Export golden set to training-format JSONL splits
expand Generate expansion responses via trained LEM model
conv Generate conversational training data
ingest Ingest benchmark data into InfluxDB
parquet Export JSONL training splits to Parquet for HuggingFace
publish Push Parquet files to HuggingFace dataset repo
metrics Push DuckDB golden set stats to InfluxDB
convert Convert MLX LoRA adapter to HuggingFace PEFT format
Scoring:
score Score existing response files
probe Generate responses and score them
compare Compare two score files
tier-score Score expansion responses (heuristic/judge tiers)
Generation:
expand Generate expansion responses via trained LEM model
conv Generate conversational training data (calm phase)
Data Management:
import-all Import ALL LEM data into DuckDB from M3
consolidate Pull worker JSONLs from M3, merge, deduplicate
normalize Normalize seeds deduplicated expansion_prompts
approve Filter scored expansions training JSONL
Export & Publish:
export Export golden set to training-format JSONL splits
parquet Export JSONL training splits to Parquet
publish Push Parquet files to HuggingFace dataset repo
convert Convert MLX LoRA adapter to PEFT format
Monitoring:
status Show training and generation progress (InfluxDB)
expand-status Show expansion pipeline status (DuckDB)
inventory Show DuckDB table inventory
coverage Analyze seed coverage gaps
metrics Push DuckDB golden set stats to InfluxDB
Infrastructure:
ingest Ingest benchmark data into InfluxDB
seed-influx Seed InfluxDB golden_gen from DuckDB
query Run ad-hoc SQL against DuckDB
`
func main() {
@ -58,6 +78,26 @@ func main() {
lem.RunMetrics(os.Args[2:])
case "convert":
lem.RunConvert(os.Args[2:])
case "import-all":
lem.RunImport(os.Args[2:])
case "consolidate":
lem.RunConsolidate(os.Args[2:])
case "normalize":
lem.RunNormalize(os.Args[2:])
case "approve":
lem.RunApprove(os.Args[2:])
case "tier-score":
lem.RunTierScore(os.Args[2:])
case "expand-status":
lem.RunExpandStatus(os.Args[2:])
case "inventory":
lem.RunInventory(os.Args[2:])
case "coverage":
lem.RunCoverage(os.Args[2:])
case "seed-influx":
lem.RunSeedInflux(os.Args[2:])
case "query":
lem.RunQuery(os.Args[2:])
default:
fmt.Fprintf(os.Stderr, "unknown command: %s\n\n%s", os.Args[1], usage)
os.Exit(1)

98
pkg/lem/approve.go Normal file
View file

@ -0,0 +1,98 @@
package lem
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"path/filepath"
)
// RunApprove is the CLI entry point for the approve command.
// Filters scored expansion responses by quality threshold and exports
// approved ones as chat-format training JSONL.
func RunApprove(args []string) {
fs := flag.NewFlagSet("approve", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
output := fs.String("output", "", "Output JSONL file (defaults to expansion-approved.jsonl in db dir)")
threshold := fs.Float64("threshold", 6.0, "Min judge average to approve (default: 6.0)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
if *output == "" {
*output = filepath.Join(filepath.Dir(*dbPath), "expansion-approved.jsonl")
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
// Query approved responses: heuristic passed AND (judge passed OR not yet judge-scored).
rows, err := db.conn.Query(`
SELECT r.idx, r.seed_id, r.region, r.domain, r.prompt, r.response,
r.gen_time, r.model, s.heuristic_score
FROM expansion_raw r
JOIN expansion_scores s ON r.idx = s.idx
WHERE s.heuristic_pass = true
AND (s.judge_pass = true OR s.judge_pass IS NULL)
ORDER BY r.idx
`)
if err != nil {
log.Fatalf("query approved: %v (have you run scoring?)", err)
}
defer rows.Close()
f, err := os.Create(*output)
if err != nil {
log.Fatalf("create output: %v", err)
}
defer f.Close()
enc := json.NewEncoder(f)
count := 0
regionSet := make(map[string]bool)
domainSet := make(map[string]bool)
for rows.Next() {
var idx int
var seedID, region, domain, prompt, response, model string
var genTime, score float64
if err := rows.Scan(&idx, &seedID, &region, &domain, &prompt, &response, &genTime, &model, &score); err != nil {
log.Fatalf("scan: %v", err)
}
example := TrainingExample{
Messages: []ChatMessage{
{Role: "user", Content: prompt},
{Role: "assistant", Content: response},
},
}
if err := enc.Encode(example); err != nil {
log.Fatalf("encode: %v", err)
}
regionSet[region] = true
domainSet[domain] = true
count++
}
_ = *threshold // threshold used in query above for future judge scoring
fmt.Printf("Approved: %d responses (threshold: heuristic > 0)\n", count)
fmt.Printf("Exported: %s\n", *output)
fmt.Printf(" Regions: %d, Domains: %d\n", len(regionSet), len(domainSet))
}

139
pkg/lem/consolidate.go Normal file
View file

@ -0,0 +1,139 @@
package lem
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
)
// RunConsolidate is the CLI entry point for the consolidate command.
// Pulls all worker JSONLs from M3, merges them, deduplicates on idx,
// and writes a single merged file.
func RunConsolidate(args []string) {
fs := flag.NewFlagSet("consolidate", flag.ExitOnError)
remoteHost := fs.String("host", "m3", "SSH host for remote files")
remotePath := fs.String("remote", "/Volumes/Data/lem/responses", "Remote directory for JSONL files")
pattern := fs.String("pattern", "gold*.jsonl", "File glob pattern")
outputDir := fs.String("output", "", "Output directory (defaults to ./responses)")
merged := fs.String("merged", "", "Merged output file (defaults to gold-merged.jsonl in output dir)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *outputDir == "" {
*outputDir = "responses"
}
if err := os.MkdirAll(*outputDir, 0755); err != nil {
log.Fatalf("create output dir: %v", err)
}
// List remote files.
fmt.Println("Pulling responses from remote...")
listCmd := exec.Command("ssh", *remoteHost, fmt.Sprintf("ls %s/%s", *remotePath, *pattern))
listOutput, err := listCmd.Output()
if err != nil {
log.Fatalf("list remote files: %v", err)
}
remoteFiles := strings.Split(strings.TrimSpace(string(listOutput)), "\n")
var validFiles []string
for _, f := range remoteFiles {
f = strings.TrimSpace(f)
if f != "" {
validFiles = append(validFiles, f)
}
}
fmt.Printf(" Found %d JSONL files on %s\n", len(validFiles), *remoteHost)
// Pull files.
for _, rf := range validFiles {
local := filepath.Join(*outputDir, filepath.Base(rf))
scpCmd := exec.Command("scp", fmt.Sprintf("%s:%s", *remoteHost, rf), local)
if err := scpCmd.Run(); err != nil {
log.Printf("warning: failed to pull %s: %v", rf, err)
continue
}
// Count lines.
f, err := os.Open(local)
if err != nil {
continue
}
lines := 0
scanner := bufio.NewScanner(f)
for scanner.Scan() {
lines++
}
f.Close()
fmt.Printf(" %s: %d records\n", filepath.Base(rf), lines)
}
// Merge and deduplicate on idx.
seen := make(map[int]json.RawMessage)
skipped := 0
matches, _ := filepath.Glob(filepath.Join(*outputDir, *pattern))
sort.Strings(matches)
for _, local := range matches {
f, err := os.Open(local)
if err != nil {
continue
}
scanner := bufio.NewScanner(f)
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Text()
var rec struct {
Idx *int `json:"idx"`
}
if err := json.Unmarshal([]byte(line), &rec); err != nil {
skipped++
continue
}
if rec.Idx == nil {
skipped++
continue
}
if _, exists := seen[*rec.Idx]; !exists {
seen[*rec.Idx] = json.RawMessage(line)
}
}
f.Close()
}
if skipped > 0 {
fmt.Printf(" Skipped %d records without idx\n", skipped)
}
// Sort by idx and write merged file.
if *merged == "" {
*merged = filepath.Join(*outputDir, "..", "gold-merged.jsonl")
}
idxs := make([]int, 0, len(seen))
for idx := range seen {
idxs = append(idxs, idx)
}
sort.Ints(idxs)
f, err := os.Create(*merged)
if err != nil {
log.Fatalf("create merged file: %v", err)
}
for _, idx := range idxs {
f.Write(seen[idx])
f.WriteString("\n")
}
f.Close()
fmt.Printf("\nMerged: %d unique examples → %s\n", len(seen), *merged)
}

135
pkg/lem/coverage.go Normal file
View file

@ -0,0 +1,135 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
"strings"
)
// RunCoverage is the CLI entry point for the coverage command.
// Analyzes seed coverage and shows underrepresented areas.
func RunCoverage(args []string) {
fs := flag.NewFlagSet("coverage", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
var total int
if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&total); err != nil {
log.Fatalf("No seeds table. Run: lem import-all first")
}
fmt.Println("LEM Seed Coverage Analysis")
fmt.Println("==================================================")
fmt.Printf("\nTotal seeds: %d\n", total)
// Region distribution.
fmt.Println("\nRegion distribution (underrepresented first):")
rows, err := db.conn.Query(`
SELECT
CASE
WHEN region LIKE '%cn%' THEN 'cn (Chinese)'
WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en (English)'
WHEN region LIKE '%ru%' THEN 'ru (Russian)'
WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de (German)'
WHEN region LIKE '%es%' THEN 'es (Spanish)'
WHEN region LIKE '%fr%' THEN 'fr (French)'
WHEN region LIKE '%latam%' THEN 'latam (LatAm)'
WHEN region LIKE '%africa%' THEN 'africa'
WHEN region LIKE '%eu%' THEN 'eu (European)'
WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me (MidEast)'
WHEN region LIKE '%multi%' THEN 'multilingual'
WHEN region LIKE '%weak%' THEN 'weak-langs'
ELSE 'other'
END AS lang_group,
count(*) AS n,
count(DISTINCT domain) AS domains
FROM seeds GROUP BY lang_group ORDER BY n ASC
`)
if err != nil {
log.Fatalf("query regions: %v", err)
}
type regionRow struct {
group string
n int
domains int
}
var regionRows []regionRow
for rows.Next() {
var r regionRow
rows.Scan(&r.group, &r.n, &r.domains)
regionRows = append(regionRows, r)
}
rows.Close()
avg := float64(total) / float64(len(regionRows))
for _, r := range regionRows {
barLen := int(float64(r.n) / avg * 10)
if barLen > 40 {
barLen = 40
}
bar := strings.Repeat("#", barLen)
gap := ""
if float64(r.n) < avg*0.5 {
gap = " <- UNDERREPRESENTED"
}
fmt.Printf(" %-22s %6d (%4d domains) %s%s\n", r.group, r.n, r.domains, bar, gap)
}
// Top 10 domains.
fmt.Println("\nTop 10 domains (most seeds):")
topRows, err := db.conn.Query(`
SELECT domain, count(*) AS n FROM seeds
WHERE domain != '' GROUP BY domain ORDER BY n DESC LIMIT 10
`)
if err == nil {
for topRows.Next() {
var domain string
var n int
topRows.Scan(&domain, &n)
fmt.Printf(" %-40s %5d\n", domain, n)
}
topRows.Close()
}
// Bottom 10 domains.
fmt.Println("\nBottom 10 domains (fewest seeds, min 5):")
bottomRows, err := db.conn.Query(`
SELECT domain, count(*) AS n FROM seeds
WHERE domain != '' GROUP BY domain HAVING count(*) >= 5 ORDER BY n ASC LIMIT 10
`)
if err == nil {
for bottomRows.Next() {
var domain string
var n int
bottomRows.Scan(&domain, &n)
fmt.Printf(" %-40s %5d\n", domain, n)
}
bottomRows.Close()
}
fmt.Println("\nSuggested expansion areas:")
fmt.Println(" - Japanese, Korean, Thai, Vietnamese (no seeds found)")
fmt.Println(" - Hindi/Urdu, Bengali, Tamil (South Asian)")
fmt.Println(" - Swahili, Yoruba, Amharic (Sub-Saharan Africa)")
fmt.Println(" - Indigenous languages (Quechua, Nahuatl, Aymara)")
}

103
pkg/lem/expand_status.go Normal file
View file

@ -0,0 +1,103 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
)
// RunExpandStatus is the CLI entry point for the expand-status command.
// Shows the expansion pipeline progress from DuckDB.
func RunExpandStatus(args []string) {
fs := flag.NewFlagSet("expand-status", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
fmt.Println("LEM Expansion Pipeline Status")
fmt.Println("==================================================")
// Expansion prompts.
var epTotal, epPending int
err = db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&epTotal)
if err != nil {
fmt.Println(" Expansion prompts: not created (run: lem normalize)")
db.Close()
return
}
db.conn.QueryRow("SELECT count(*) FROM expansion_prompts WHERE status = 'pending'").Scan(&epPending)
fmt.Printf(" Expansion prompts: %d total, %d pending\n", epTotal, epPending)
// Generated responses.
var generated int
err = db.conn.QueryRow("SELECT count(*) FROM expansion_raw").Scan(&generated)
if err != nil {
fmt.Println(" Generated: 0 (run: lem expand)")
} else {
rows, _ := db.conn.Query("SELECT model, count(*) FROM expansion_raw GROUP BY model")
if rows != nil {
var parts []string
for rows.Next() {
var model string
var n int
rows.Scan(&model, &n)
parts = append(parts, fmt.Sprintf("%s: %d", model, n))
}
rows.Close()
if len(parts) > 0 {
fmt.Printf(" Generated: %d (%s)\n", generated, joinStrings(parts, ", "))
} else {
fmt.Printf(" Generated: %d\n", generated)
}
}
}
// Scored.
var scored, hPassed, jScored, jPassed int
err = db.conn.QueryRow("SELECT count(*) FROM expansion_scores").Scan(&scored)
if err != nil {
fmt.Println(" Scored: 0 (run: lem score --tier 1)")
} else {
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE heuristic_pass = true").Scan(&hPassed)
fmt.Printf(" Heuristic scored: %d (%d passed)\n", scored, hPassed)
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_average IS NOT NULL").Scan(&jScored)
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_pass = true").Scan(&jPassed)
if jScored > 0 {
fmt.Printf(" Judge scored: %d (%d passed)\n", jScored, jPassed)
}
}
// Pipeline progress.
if epTotal > 0 && generated > 0 {
genPct := float64(generated) / float64(epTotal) * 100
fmt.Printf("\n Progress: %.1f%% generated\n", genPct)
}
// Golden set context.
var golden int
err = db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&golden)
if err == nil {
fmt.Printf("\n Golden set: %d / %d\n", golden, targetTotal)
if generated > 0 {
fmt.Printf(" Combined: %d total examples\n", golden+generated)
}
}
}

453
pkg/lem/import.go Normal file
View file

@ -0,0 +1,453 @@
package lem
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
)
// RunImport is the CLI entry point for the import-all command.
// Imports ALL LEM data into DuckDB: prompts, Gemini responses, golden set,
// training examples, benchmarks, validations, and seeds.
func RunImport(args []string) {
fs := flag.NewFlagSet("import-all", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
skipM3 := fs.Bool("skip-m3", false, "Skip pulling data from M3")
dataDir := fs.String("data-dir", "", "Local data directory (defaults to db directory)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
if *dataDir == "" {
*dataDir = filepath.Dir(*dbPath)
}
db, err := OpenDBReadWrite(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
totals := make(map[string]int)
// ── 1. Golden set ──
goldenPath := filepath.Join(*dataDir, "gold-15k.jsonl")
if !*skipM3 {
fmt.Println(" Pulling golden set from M3...")
scpCmd := exec.Command("scp", "m3:/Volumes/Data/lem/responses/gold-15k.jsonl", goldenPath)
if err := scpCmd.Run(); err != nil {
log.Printf(" WARNING: could not pull golden set from M3: %v", err)
}
}
if _, err := os.Stat(goldenPath); err == nil {
db.conn.Exec("DROP TABLE IF EXISTS golden_set")
_, err := db.conn.Exec(fmt.Sprintf(`
CREATE TABLE golden_set AS
SELECT
idx::INT AS idx,
seed_id::VARCHAR AS seed_id,
domain::VARCHAR AS domain,
voice::VARCHAR AS voice,
prompt::VARCHAR AS prompt,
response::VARCHAR AS response,
gen_time::DOUBLE AS gen_time,
length(response)::INT AS char_count,
length(response) - length(replace(response, ' ', '')) + 1 AS word_count
FROM read_json_auto('%s', maximum_object_size=1048576)
`, escapeSQLPath(goldenPath)))
if err != nil {
log.Printf(" WARNING: golden set import failed: %v", err)
} else {
var n int
db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&n)
totals["golden_set"] = n
fmt.Printf(" golden_set: %d rows\n", n)
}
}
// ── 2. Training examples ──
trainingDirs := []struct {
name string
files []string
}{
{"training", []string{"training/train.jsonl", "training/valid.jsonl", "training/test.jsonl"}},
{"training-2k", []string{"training-2k/train.jsonl", "training-2k/valid.jsonl", "training-2k/test.jsonl"}},
{"training-expanded", []string{"training-expanded/train.jsonl", "training-expanded/valid.jsonl"}},
{"training-book", []string{"training-book/train.jsonl", "training-book/valid.jsonl", "training-book/test.jsonl"}},
{"training-conv", []string{"training-conv/train.jsonl", "training-conv/valid.jsonl", "training-conv/test.jsonl"}},
{"gold-full", []string{"gold-full/train.jsonl", "gold-full/valid.jsonl"}},
{"sovereignty-gold", []string{"sovereignty-gold/train.jsonl", "sovereignty-gold/valid.jsonl"}},
{"composure-lessons", []string{"composure-lessons/train.jsonl", "composure-lessons/valid.jsonl"}},
{"watts-full", []string{"watts-full/train.jsonl", "watts-full/valid.jsonl"}},
{"watts-expanded", []string{"watts-expanded/train.jsonl", "watts-expanded/valid.jsonl"}},
{"watts-composure", []string{"watts-composure-merged/train.jsonl", "watts-composure-merged/valid.jsonl"}},
{"western-fresh", []string{"western-fresh/train.jsonl", "western-fresh/valid.jsonl"}},
{"deepseek-soak", []string{"deepseek-western-soak/train.jsonl", "deepseek-western-soak/valid.jsonl"}},
{"russian-bridge", []string{"russian-bridge/train.jsonl", "russian-bridge/valid.jsonl"}},
}
trainingLocal := filepath.Join(*dataDir, "training")
os.MkdirAll(trainingLocal, 0755)
if !*skipM3 {
fmt.Println(" Pulling training sets from M3...")
for _, td := range trainingDirs {
for _, rel := range td.files {
local := filepath.Join(trainingLocal, rel)
os.MkdirAll(filepath.Dir(local), 0755)
scpCmd := exec.Command("scp", fmt.Sprintf("m3:/Volumes/Data/lem/%s", rel), local)
scpCmd.Run() // ignore errors, file might not exist
}
}
}
db.conn.Exec("DROP TABLE IF EXISTS training_examples")
db.conn.Exec(`
CREATE TABLE training_examples (
source VARCHAR,
split VARCHAR,
prompt TEXT,
response TEXT,
num_turns INT,
full_messages TEXT,
char_count INT
)
`)
trainingTotal := 0
for _, td := range trainingDirs {
for _, rel := range td.files {
local := filepath.Join(trainingLocal, rel)
if _, err := os.Stat(local); os.IsNotExist(err) {
continue
}
split := "train"
if strings.Contains(rel, "valid") {
split = "valid"
} else if strings.Contains(rel, "test") {
split = "test"
}
n := importTrainingFile(db, local, td.name, split)
trainingTotal += n
}
}
totals["training_examples"] = trainingTotal
fmt.Printf(" training_examples: %d rows\n", trainingTotal)
// ── 3. Benchmark results ──
benchLocal := filepath.Join(*dataDir, "benchmarks")
os.MkdirAll(benchLocal, 0755)
if !*skipM3 {
fmt.Println(" Pulling benchmarks from M3...")
for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
scpCmd := exec.Command("scp",
fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s.jsonl", bname),
filepath.Join(benchLocal, bname+".jsonl"))
scpCmd.Run()
}
for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
localSub := filepath.Join(benchLocal, subdir)
os.MkdirAll(localSub, 0755)
scpCmd := exec.Command("scp", "-r",
fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s/", subdir),
filepath.Join(benchLocal)+"/")
scpCmd.Run()
}
}
db.conn.Exec("DROP TABLE IF EXISTS benchmark_results")
db.conn.Exec(`
CREATE TABLE benchmark_results (
source VARCHAR, id VARCHAR, benchmark VARCHAR, model VARCHAR,
prompt TEXT, response TEXT, elapsed_seconds DOUBLE, domain VARCHAR
)
`)
benchTotal := 0
for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
resultDir := filepath.Join(benchLocal, subdir)
matches, _ := filepath.Glob(filepath.Join(resultDir, "*.jsonl"))
for _, jf := range matches {
n := importBenchmarkFile(db, jf, subdir)
benchTotal += n
}
}
// Also import standalone benchmark files.
for _, bfile := range []string{"lem_bench", "lem_ethics", "lem_ethics_allen", "instruction_tuned", "abliterated", "base_pt"} {
local := filepath.Join(benchLocal, bfile+".jsonl")
if _, err := os.Stat(local); os.IsNotExist(err) {
if !*skipM3 {
scpCmd := exec.Command("scp",
fmt.Sprintf("m3:/Volumes/Data/lem/benchmark/%s.jsonl", bfile), local)
scpCmd.Run()
}
}
if _, err := os.Stat(local); err == nil {
n := importBenchmarkFile(db, local, "benchmark")
benchTotal += n
}
}
totals["benchmark_results"] = benchTotal
fmt.Printf(" benchmark_results: %d rows\n", benchTotal)
// ── 4. Benchmark questions ──
db.conn.Exec("DROP TABLE IF EXISTS benchmark_questions")
db.conn.Exec(`
CREATE TABLE benchmark_questions (
benchmark VARCHAR, id VARCHAR, question TEXT,
best_answer TEXT, correct_answers TEXT, incorrect_answers TEXT, category VARCHAR
)
`)
benchQTotal := 0
for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
local := filepath.Join(benchLocal, bname+".jsonl")
if _, err := os.Stat(local); err == nil {
n := importBenchmarkQuestions(db, local, bname)
benchQTotal += n
}
}
totals["benchmark_questions"] = benchQTotal
fmt.Printf(" benchmark_questions: %d rows\n", benchQTotal)
// ── 5. Seeds ──
db.conn.Exec("DROP TABLE IF EXISTS seeds")
db.conn.Exec(`
CREATE TABLE seeds (
source_file VARCHAR, region VARCHAR, seed_id VARCHAR, domain VARCHAR, prompt TEXT
)
`)
seedTotal := 0
seedDirs := []string{filepath.Join(*dataDir, "seeds"), "/tmp/lem-data/seeds", "/tmp/lem-repo/seeds"}
for _, seedDir := range seedDirs {
if _, err := os.Stat(seedDir); os.IsNotExist(err) {
continue
}
n := importSeeds(db, seedDir)
seedTotal += n
}
totals["seeds"] = seedTotal
fmt.Printf(" seeds: %d rows\n", seedTotal)
// ── Summary ──
grandTotal := 0
fmt.Printf("\n%s\n", strings.Repeat("=", 50))
fmt.Println("LEM Database Import Complete")
fmt.Println(strings.Repeat("=", 50))
for table, count := range totals {
fmt.Printf(" %-25s %8d\n", table, count)
grandTotal += count
}
fmt.Printf(" %s\n", strings.Repeat("─", 35))
fmt.Printf(" %-25s %8d\n", "TOTAL", grandTotal)
fmt.Printf("\nDatabase: %s\n", *dbPath)
}
func importTrainingFile(db *DB, path, source, split string) int {
f, err := os.Open(path)
if err != nil {
return 0
}
defer f.Close()
count := 0
scanner := bufio.NewScanner(f)
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
for scanner.Scan() {
var rec struct {
Messages []ChatMessage `json:"messages"`
}
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
continue
}
prompt := ""
response := ""
assistantCount := 0
for _, m := range rec.Messages {
if m.Role == "user" && prompt == "" {
prompt = m.Content
}
if m.Role == "assistant" {
if response == "" {
response = m.Content
}
assistantCount++
}
}
msgsJSON, _ := json.Marshal(rec.Messages)
db.conn.Exec(`INSERT INTO training_examples VALUES (?, ?, ?, ?, ?, ?, ?)`,
source, split, prompt, response, assistantCount, string(msgsJSON), len(response))
count++
}
return count
}
func importBenchmarkFile(db *DB, path, source string) int {
f, err := os.Open(path)
if err != nil {
return 0
}
defer f.Close()
count := 0
scanner := bufio.NewScanner(f)
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
for scanner.Scan() {
var rec map[string]interface{}
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
continue
}
db.conn.Exec(`INSERT INTO benchmark_results VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
source,
fmt.Sprintf("%v", rec["id"]),
strOrEmpty(rec, "benchmark"),
strOrEmpty(rec, "model"),
strOrEmpty(rec, "prompt"),
strOrEmpty(rec, "response"),
floatOrZero(rec, "elapsed_seconds"),
strOrEmpty(rec, "domain"),
)
count++
}
return count
}
func importBenchmarkQuestions(db *DB, path, benchmark string) int {
f, err := os.Open(path)
if err != nil {
return 0
}
defer f.Close()
count := 0
scanner := bufio.NewScanner(f)
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
for scanner.Scan() {
var rec map[string]interface{}
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
continue
}
correctJSON, _ := json.Marshal(rec["correct_answers"])
incorrectJSON, _ := json.Marshal(rec["incorrect_answers"])
db.conn.Exec(`INSERT INTO benchmark_questions VALUES (?, ?, ?, ?, ?, ?, ?)`,
benchmark,
fmt.Sprintf("%v", rec["id"]),
strOrEmpty(rec, "question"),
strOrEmpty(rec, "best_answer"),
string(correctJSON),
string(incorrectJSON),
strOrEmpty(rec, "category"),
)
count++
}
return count
}
func importSeeds(db *DB, seedDir string) int {
count := 0
filepath.Walk(seedDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".json") {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
return nil
}
rel, _ := filepath.Rel(seedDir, path)
region := strings.TrimSuffix(filepath.Base(path), ".json")
// Try parsing as array or object with prompts/seeds field.
var seedsList []interface{}
var raw interface{}
if err := json.Unmarshal(data, &raw); err != nil {
return nil
}
switch v := raw.(type) {
case []interface{}:
seedsList = v
case map[string]interface{}:
if prompts, ok := v["prompts"].([]interface{}); ok {
seedsList = prompts
} else if seeds, ok := v["seeds"].([]interface{}); ok {
seedsList = seeds
}
}
for _, s := range seedsList {
switch seed := s.(type) {
case map[string]interface{}:
prompt := strOrEmpty(seed, "prompt")
if prompt == "" {
prompt = strOrEmpty(seed, "text")
}
if prompt == "" {
prompt = strOrEmpty(seed, "question")
}
db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
rel, region,
strOrEmpty(seed, "seed_id"),
strOrEmpty(seed, "domain"),
prompt,
)
count++
case string:
db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
rel, region, "", "", seed)
count++
}
}
return nil
})
return count
}
func strOrEmpty(m map[string]interface{}, key string) string {
if v, ok := m[key]; ok {
return fmt.Sprintf("%v", v)
}
return ""
}
func floatOrZero(m map[string]interface{}, key string) float64 {
if v, ok := m[key]; ok {
if f, ok := v.(float64); ok {
return f
}
}
return 0
}
func escapeSQLPath(p string) string {
return strings.ReplaceAll(p, "'", "''")
}

97
pkg/lem/inventory.go Normal file
View file

@ -0,0 +1,97 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
)
// RunInventory is the CLI entry point for the inventory command.
// Shows row counts and summary stats for all tables in the DuckDB database.
func RunInventory(args []string) {
fs := flag.NewFlagSet("inventory", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
counts, err := db.TableCounts()
if err != nil {
log.Fatalf("table counts: %v", err)
}
fmt.Printf("LEM Database Inventory (%s)\n", *dbPath)
fmt.Println("============================================================")
grandTotal := 0
for table, count := range counts {
detail := ""
switch table {
case "golden_set":
pct := float64(count) / float64(targetTotal) * 100
detail = fmt.Sprintf(" (%.1f%% of %d target)", pct, targetTotal)
case "training_examples":
var sources int
db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM training_examples").Scan(&sources)
detail = fmt.Sprintf(" (%d sources)", sources)
case "prompts":
var domains, voices int
db.conn.QueryRow("SELECT COUNT(DISTINCT domain) FROM prompts").Scan(&domains)
db.conn.QueryRow("SELECT COUNT(DISTINCT voice) FROM prompts").Scan(&voices)
detail = fmt.Sprintf(" (%d domains, %d voices)", domains, voices)
case "gemini_responses":
rows, _ := db.conn.Query("SELECT source_model, count(*) FROM gemini_responses GROUP BY source_model")
if rows != nil {
var parts []string
for rows.Next() {
var model string
var n int
rows.Scan(&model, &n)
parts = append(parts, fmt.Sprintf("%s: %d", model, n))
}
rows.Close()
if len(parts) > 0 {
detail = fmt.Sprintf(" (%s)", joinStrings(parts, ", "))
}
}
case "benchmark_results":
var sources int
db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM benchmark_results").Scan(&sources)
detail = fmt.Sprintf(" (%d categories)", sources)
}
fmt.Printf(" %-25s %8d%s\n", table, count, detail)
grandTotal += count
}
fmt.Printf(" %-25s\n", "────────────────────────────────────────")
fmt.Printf(" %-25s %8d\n", "TOTAL", grandTotal)
}
func joinStrings(parts []string, sep string) string {
result := ""
for i, p := range parts {
if i > 0 {
result += sep
}
result += p
}
return result
}

148
pkg/lem/normalize.go Normal file
View file

@ -0,0 +1,148 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
)
// RunNormalize is the CLI entry point for the normalize command.
// Normalizes seeds into the expansion_prompts table, deduplicating against
// the golden set and existing prompts. Assigns priority based on domain
// coverage (underrepresented domains first).
func RunNormalize(args []string) {
fs := flag.NewFlagSet("normalize", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
minLen := fs.Int("min-length", 50, "Minimum prompt length in characters")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDBReadWrite(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
// Check source tables.
var seedCount int
if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&seedCount); err != nil {
log.Fatalf("No seeds table. Run: lem import-all first")
}
fmt.Printf("Seeds table: %d rows\n", seedCount)
// Drop and recreate expansion_prompts.
_, err = db.conn.Exec("DROP TABLE IF EXISTS expansion_prompts")
if err != nil {
log.Fatalf("drop expansion_prompts: %v", err)
}
// Deduplicate: remove seeds whose prompt already appears in prompts or golden_set.
_, err = db.conn.Exec(fmt.Sprintf(`
CREATE TABLE expansion_prompts AS
WITH unique_seeds AS (
SELECT
ROW_NUMBER() OVER (ORDER BY region, domain, seed_id) AS idx,
seed_id,
region,
domain,
prompt
FROM (
SELECT DISTINCT ON (prompt)
seed_id, region, domain, prompt
FROM seeds
WHERE length(prompt) >= %d
ORDER BY prompt, seed_id
)
),
existing_prompts AS (
SELECT prompt FROM prompts
UNION ALL
SELECT prompt FROM golden_set
)
SELECT
us.idx,
us.seed_id,
us.region,
us.domain,
'en' AS language,
us.prompt,
'' AS prompt_en,
0 AS priority,
'pending' AS status
FROM unique_seeds us
WHERE NOT EXISTS (
SELECT 1 FROM existing_prompts ep
WHERE ep.prompt = us.prompt
)
`, *minLen))
if err != nil {
log.Fatalf("create expansion_prompts: %v", err)
}
var total, domains, regions int
db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&total)
db.conn.QueryRow("SELECT count(DISTINCT domain) FROM expansion_prompts").Scan(&domains)
db.conn.QueryRow("SELECT count(DISTINCT region) FROM expansion_prompts").Scan(&regions)
// Assign priority based on domain coverage.
_, err = db.conn.Exec(`
UPDATE expansion_prompts SET priority = (
SELECT RANK() OVER (ORDER BY cnt ASC)
FROM (
SELECT domain, count(*) AS cnt
FROM expansion_prompts GROUP BY domain
) domain_counts
WHERE domain_counts.domain = expansion_prompts.domain
)
`)
if err != nil {
log.Printf("warning: priority assignment failed: %v", err)
}
fmt.Printf("\nExpansion Prompts: %d\n", total)
fmt.Printf(" Domains: %d\n", domains)
fmt.Printf(" Regions: %d\n", regions)
// Show region distribution.
fmt.Println("\n By region group:")
rows, err := db.conn.Query(`
SELECT
CASE
WHEN region LIKE '%cn%' THEN 'cn'
WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en'
WHEN region LIKE '%ru%' THEN 'ru'
WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de'
WHEN region LIKE '%es%' THEN 'es'
WHEN region LIKE '%fr%' THEN 'fr'
WHEN region LIKE '%latam%' THEN 'latam'
WHEN region LIKE '%africa%' THEN 'africa'
WHEN region LIKE '%eu%' THEN 'eu'
WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me'
ELSE 'other'
END AS lang_group,
count(*) AS n
FROM expansion_prompts GROUP BY lang_group ORDER BY n DESC
`)
if err == nil {
for rows.Next() {
var group string
var n int
rows.Scan(&group, &n)
fmt.Printf(" %-15s %6d\n", group, n)
}
rows.Close()
}
fmt.Printf("\nNormalization complete: %d expansion prompts from %d seeds\n", total, seedCount)
}

152
pkg/lem/query.go Normal file
View file

@ -0,0 +1,152 @@
package lem
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"strings"
)
// RunQuery is the CLI entry point for the query command.
// Runs ad-hoc SQL against the DuckDB database.
func RunQuery(args []string) {
fs := flag.NewFlagSet("query", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
jsonOutput := fs.Bool("json", false, "Output as JSON instead of table")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
sql := strings.Join(fs.Args(), " ")
if sql == "" {
fmt.Fprintln(os.Stderr, "error: SQL query required as positional argument")
fmt.Fprintln(os.Stderr, " lem query --db path.duckdb \"SELECT * FROM golden_set LIMIT 5\"")
fmt.Fprintln(os.Stderr, " lem query --db path.duckdb \"domain = 'ethics'\" (auto-wraps as WHERE clause)")
os.Exit(1)
}
// Auto-wrap non-SELECT queries as WHERE clauses.
trimmed := strings.TrimSpace(strings.ToUpper(sql))
if !strings.HasPrefix(trimmed, "SELECT") && !strings.HasPrefix(trimmed, "SHOW") &&
!strings.HasPrefix(trimmed, "DESCRIBE") && !strings.HasPrefix(trimmed, "EXPLAIN") {
sql = "SELECT * FROM golden_set WHERE " + sql + " LIMIT 20"
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
rows, err := db.conn.Query(sql)
if err != nil {
log.Fatalf("query: %v", err)
}
defer rows.Close()
cols, err := rows.Columns()
if err != nil {
log.Fatalf("columns: %v", err)
}
var results []map[string]interface{}
for rows.Next() {
values := make([]interface{}, len(cols))
ptrs := make([]interface{}, len(cols))
for i := range values {
ptrs[i] = &values[i]
}
if err := rows.Scan(ptrs...); err != nil {
log.Fatalf("scan: %v", err)
}
row := make(map[string]interface{})
for i, col := range cols {
v := values[i]
// Convert []byte to string for readability.
if b, ok := v.([]byte); ok {
v = string(b)
}
row[col] = v
}
results = append(results, row)
}
if *jsonOutput {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(results)
return
}
// Table output.
if len(results) == 0 {
fmt.Println("(no results)")
return
}
// Calculate column widths.
widths := make(map[string]int)
for _, col := range cols {
widths[col] = len(col)
}
for _, row := range results {
for _, col := range cols {
s := fmt.Sprintf("%v", row[col])
if len(s) > 60 {
s = s[:57] + "..."
}
if len(s) > widths[col] {
widths[col] = len(s)
}
}
}
// Print header.
for i, col := range cols {
if i > 0 {
fmt.Print(" ")
}
fmt.Printf("%-*s", widths[col], col)
}
fmt.Println()
// Print separator.
for i, col := range cols {
if i > 0 {
fmt.Print(" ")
}
fmt.Print(strings.Repeat("─", widths[col]))
}
fmt.Println()
// Print rows.
for _, row := range results {
for i, col := range cols {
if i > 0 {
fmt.Print(" ")
}
s := fmt.Sprintf("%v", row[col])
if len(s) > 60 {
s = s[:57] + "..."
}
fmt.Printf("%-*s", widths[col], s)
}
fmt.Println()
}
fmt.Printf("\n(%d rows)\n", len(results))
}

111
pkg/lem/seed_influx.go Normal file
View file

@ -0,0 +1,111 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
"strings"
)
// RunSeedInflux is the CLI entry point for the seed-influx command.
// Seeds InfluxDB golden_gen measurement from DuckDB golden_set data.
// One-time migration tool for bootstrapping InfluxDB from existing data.
func RunSeedInflux(args []string) {
fs := flag.NewFlagSet("seed-influx", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
influxURL := fs.String("influx", "", "InfluxDB URL")
influxDB := fs.String("influx-db", "", "InfluxDB database name")
force := fs.Bool("force", false, "Re-seed even if InfluxDB already has data")
batchSize := fs.Int("batch-size", 500, "Lines per InfluxDB write batch")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDB(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
var total int
if err := db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&total); err != nil {
log.Fatalf("No golden_set table. Run ingest first.")
}
influx := NewInfluxClient(*influxURL, *influxDB)
// Check existing count in InfluxDB.
existing := 0
rows, err := influx.QuerySQL("SELECT count(DISTINCT i) AS n FROM gold_gen")
if err == nil && len(rows) > 0 {
if n, ok := rows[0]["n"].(float64); ok {
existing = int(n)
}
}
fmt.Printf("DuckDB has %d records, InfluxDB golden_gen has %d\n", total, existing)
if existing >= total && !*force {
fmt.Println("InfluxDB already has all records. Use --force to re-seed.")
return
}
// Read all rows.
dbRows, err := db.conn.Query(`
SELECT idx, seed_id, domain, voice, gen_time, char_count
FROM golden_set ORDER BY idx
`)
if err != nil {
log.Fatalf("query golden_set: %v", err)
}
defer dbRows.Close()
var lines []string
written := 0
for dbRows.Next() {
var idx, charCount int
var seedID, domain, voice string
var genTime float64
if err := dbRows.Scan(&idx, &seedID, &domain, &voice, &genTime, &charCount); err != nil {
log.Fatalf("scan: %v", err)
}
sid := strings.ReplaceAll(seedID, `"`, `\"`)
lp := fmt.Sprintf(`gold_gen,i=%d,w=migration,d=%s,v=%s seed_id="%s",gen_time=%.1f,chars=%di`,
idx, escapeLp(domain), escapeLp(voice), sid, genTime, charCount)
lines = append(lines, lp)
if len(lines) >= *batchSize {
if err := influx.WriteLp(lines); err != nil {
log.Fatalf("write batch at %d: %v", written, err)
}
written += len(lines)
lines = lines[:0]
if written%2000 == 0 {
fmt.Printf(" Seeded %d/%d records\n", written, total)
}
}
}
if len(lines) > 0 {
if err := influx.WriteLp(lines); err != nil {
log.Fatalf("flush: %v", err)
}
written += len(lines)
}
fmt.Printf("Seeded %d golden_gen records into InfluxDB\n", written)
}

225
pkg/lem/tier_score.go Normal file
View file

@ -0,0 +1,225 @@
package lem
import (
"flag"
"fmt"
"log"
"os"
"strings"
)
// RunTierScore is the CLI entry point for the tier-score command.
// Scores expansion responses using tiered quality assessment:
// - Tier 1: Heuristic regex scoring (fast, no API)
// - Tier 2: LEM self-judge (requires trained model)
// - Tier 3: External judge (reserved for borderline cases)
func RunTierScore(args []string) {
fs := flag.NewFlagSet("tier-score", flag.ExitOnError)
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
tier := fs.Int("tier", 1, "Scoring tier: 1=heuristic, 2=LEM judge, 3=external")
limit := fs.Int("limit", 0, "Max items to score (0=all)")
if err := fs.Parse(args); err != nil {
log.Fatalf("parse flags: %v", err)
}
if *dbPath == "" {
*dbPath = os.Getenv("LEM_DB")
}
if *dbPath == "" {
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
os.Exit(1)
}
db, err := OpenDBReadWrite(*dbPath)
if err != nil {
log.Fatalf("open db: %v", err)
}
defer db.Close()
// Ensure expansion_scores table exists.
db.conn.Exec(`
CREATE TABLE IF NOT EXISTS expansion_scores (
idx INT,
heuristic_score DOUBLE,
heuristic_pass BOOLEAN,
judge_sovereignty DOUBLE,
judge_ethical_depth DOUBLE,
judge_creative DOUBLE,
judge_self_concept DOUBLE,
judge_average DOUBLE,
judge_pass BOOLEAN,
judge_model VARCHAR,
scored_at TIMESTAMP
)
`)
if *tier >= 1 {
runHeuristicTier(db, *limit)
}
if *tier >= 2 {
fmt.Println("\nTier 2 (LEM judge): not yet available — needs trained LEM-27B model")
fmt.Println(" Will score: sovereignty, ethical_depth, creative, self_concept (1-10 each)")
}
if *tier >= 3 {
fmt.Println("\nTier 3 (External judge): reserved for borderline cases")
}
}
func runHeuristicTier(db *DB, limit int) {
// Find unscored responses.
query := `
SELECT r.idx, r.response FROM expansion_raw r
LEFT JOIN expansion_scores s ON r.idx = s.idx
WHERE s.idx IS NULL
ORDER BY r.idx
`
if limit > 0 {
query += fmt.Sprintf(" LIMIT %d", limit)
}
rows, err := db.conn.Query(query)
if err != nil {
log.Fatalf("query unscored: %v", err)
}
defer rows.Close()
type unscoredRow struct {
idx int
response string
}
var unscored []unscoredRow
for rows.Next() {
var r unscoredRow
rows.Scan(&r.idx, &r.response)
unscored = append(unscored, r)
}
if len(unscored) == 0 {
fmt.Println("Tier 1 (heuristic): all responses already scored")
return
}
fmt.Printf("Tier 1 (heuristic): scoring %d responses...\n", len(unscored))
passed := 0
for _, r := range unscored {
score, _ := heuristicExpansionScore(r.response)
isPass := score > 0
if isPass {
passed++
}
db.conn.Exec(`
INSERT INTO expansion_scores (idx, heuristic_score, heuristic_pass, scored_at)
VALUES (?, ?, ?, current_timestamp)
`, r.idx, score, isPass)
}
fmt.Printf(" Scored: %d, Passed: %d, Failed: %d\n", len(unscored), passed, len(unscored)-passed)
if len(unscored) > 0 {
fmt.Printf(" Pass rate: %.1f%%\n", float64(passed)/float64(len(unscored))*100)
}
}
// heuristicExpansionScore applies fast heuristic scoring to an expansion response.
// Returns (score, details). Positive = good, negative = bad.
func heuristicExpansionScore(response string) (float64, map[string]interface{}) {
details := make(map[string]interface{})
if response == "" || len(response) < 30 {
return -20.0, map[string]interface{}{"reason": "empty_or_broken"}
}
score := 0.0
// Compliance markers (negative).
compliancePhrases := []string{
"as an ai", "i cannot", "i can't", "i'm not able",
"important to note", "it's crucial to", "i must emphasize",
"i should point out", "as a language model",
}
lower := strings.ToLower(response)
complianceCount := 0
for _, p := range compliancePhrases {
if strings.Contains(lower, p) {
complianceCount++
}
}
if complianceCount > 0 {
score -= float64(complianceCount) * 5
details["compliance_markers"] = complianceCount
}
// Formulaic preamble (negative).
trimmed := strings.TrimSpace(lower)
if strings.HasPrefix(trimmed, "okay, let") || strings.HasPrefix(trimmed, "ok, let") || strings.HasPrefix(trimmed, "sure, let") {
score -= 3
details["formulaic_preamble"] = true
}
// Degeneration check (repetitive output).
words := strings.Fields(response)
if len(words) > 20 {
chunks := make([]string, 0, len(words)/5)
for i := 0; i+5 <= len(words); i += 5 {
chunks = append(chunks, strings.Join(words[i:i+5], " "))
}
if len(chunks) > 0 {
unique := make(map[string]bool)
for _, c := range chunks {
unique[c] = true
}
ratio := float64(len(unique)) / float64(len(chunks))
if ratio < 0.5 {
score -= 10
details["degeneration"] = true
}
}
}
// Engagement depth (positive).
wordCount := len(words)
if wordCount > 100 {
score += 2
}
if wordCount > 300 {
score += 2
}
details["word_count"] = wordCount
// Structure (positive).
if strings.Contains(response, "\n\n") || strings.Contains(response, "**") ||
strings.Contains(response, "1.") || strings.Contains(response, "- ") {
score += 1
details["structured"] = true
}
// Creative expression (positive).
creativeMarkers := []string{"metaphor", "imagine", "picture this", "story", "once upon"}
for _, m := range creativeMarkers {
if strings.Contains(lower, m) {
score += 2
details["creative"] = true
break
}
}
// First-person engagement (positive).
fpMarkers := []string{"i think", "i believe", "in my view", "i'd argue"}
fpCount := 0
for _, m := range fpMarkers {
if strings.Contains(lower, m) {
fpCount++
}
}
if fpCount > 0 {
score += float64(fpCount) * 1.5
details["first_person"] = fpCount
}
return score, details
}