feat: convert all pipeline.py commands to Go
Complete conversion of pipeline.py into Go `lem` CLI: - import-all: bulk import all LEM data into DuckDB from M3 - consolidate: pull worker JSONLs, merge, deduplicate - normalize: seeds → deduplicated expansion_prompts table - approve: filter scored expansions → training JSONL - tier-score: heuristic/judge tiered expansion scoring - expand-status: expansion pipeline progress from DuckDB - inventory: DuckDB table counts and summary - coverage: seed coverage gap analysis - seed-influx: bootstrap InfluxDB from DuckDB golden_gen - query: ad-hoc SQL against DuckDB 22 commands total, 49 Go files. Replaces entire pipeline.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4eaf1bfb39
commit
91ee389377
11 changed files with 1714 additions and 13 deletions
66
main.go
66
main.go
|
|
@ -12,19 +12,39 @@ import (
|
|||
|
||||
const usage = `Usage: lem <command> [flags]
|
||||
|
||||
Commands:
|
||||
score Score existing response files
|
||||
probe Generate responses and score them
|
||||
compare Compare two score files
|
||||
status Show training and generation progress (InfluxDB + DuckDB)
|
||||
export Export golden set to training-format JSONL splits
|
||||
expand Generate expansion responses via trained LEM model
|
||||
conv Generate conversational training data
|
||||
ingest Ingest benchmark data into InfluxDB
|
||||
parquet Export JSONL training splits to Parquet for HuggingFace
|
||||
publish Push Parquet files to HuggingFace dataset repo
|
||||
metrics Push DuckDB golden set stats to InfluxDB
|
||||
convert Convert MLX LoRA adapter to HuggingFace PEFT format
|
||||
Scoring:
|
||||
score Score existing response files
|
||||
probe Generate responses and score them
|
||||
compare Compare two score files
|
||||
tier-score Score expansion responses (heuristic/judge tiers)
|
||||
|
||||
Generation:
|
||||
expand Generate expansion responses via trained LEM model
|
||||
conv Generate conversational training data (calm phase)
|
||||
|
||||
Data Management:
|
||||
import-all Import ALL LEM data into DuckDB from M3
|
||||
consolidate Pull worker JSONLs from M3, merge, deduplicate
|
||||
normalize Normalize seeds → deduplicated expansion_prompts
|
||||
approve Filter scored expansions → training JSONL
|
||||
|
||||
Export & Publish:
|
||||
export Export golden set to training-format JSONL splits
|
||||
parquet Export JSONL training splits to Parquet
|
||||
publish Push Parquet files to HuggingFace dataset repo
|
||||
convert Convert MLX LoRA adapter to PEFT format
|
||||
|
||||
Monitoring:
|
||||
status Show training and generation progress (InfluxDB)
|
||||
expand-status Show expansion pipeline status (DuckDB)
|
||||
inventory Show DuckDB table inventory
|
||||
coverage Analyze seed coverage gaps
|
||||
metrics Push DuckDB golden set stats to InfluxDB
|
||||
|
||||
Infrastructure:
|
||||
ingest Ingest benchmark data into InfluxDB
|
||||
seed-influx Seed InfluxDB golden_gen from DuckDB
|
||||
query Run ad-hoc SQL against DuckDB
|
||||
`
|
||||
|
||||
func main() {
|
||||
|
|
@ -58,6 +78,26 @@ func main() {
|
|||
lem.RunMetrics(os.Args[2:])
|
||||
case "convert":
|
||||
lem.RunConvert(os.Args[2:])
|
||||
case "import-all":
|
||||
lem.RunImport(os.Args[2:])
|
||||
case "consolidate":
|
||||
lem.RunConsolidate(os.Args[2:])
|
||||
case "normalize":
|
||||
lem.RunNormalize(os.Args[2:])
|
||||
case "approve":
|
||||
lem.RunApprove(os.Args[2:])
|
||||
case "tier-score":
|
||||
lem.RunTierScore(os.Args[2:])
|
||||
case "expand-status":
|
||||
lem.RunExpandStatus(os.Args[2:])
|
||||
case "inventory":
|
||||
lem.RunInventory(os.Args[2:])
|
||||
case "coverage":
|
||||
lem.RunCoverage(os.Args[2:])
|
||||
case "seed-influx":
|
||||
lem.RunSeedInflux(os.Args[2:])
|
||||
case "query":
|
||||
lem.RunQuery(os.Args[2:])
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown command: %s\n\n%s", os.Args[1], usage)
|
||||
os.Exit(1)
|
||||
|
|
|
|||
98
pkg/lem/approve.go
Normal file
98
pkg/lem/approve.go
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// RunApprove is the CLI entry point for the approve command.
|
||||
// Filters scored expansion responses by quality threshold and exports
|
||||
// approved ones as chat-format training JSONL.
|
||||
func RunApprove(args []string) {
|
||||
fs := flag.NewFlagSet("approve", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
output := fs.String("output", "", "Output JSONL file (defaults to expansion-approved.jsonl in db dir)")
|
||||
threshold := fs.Float64("threshold", 6.0, "Min judge average to approve (default: 6.0)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if *output == "" {
|
||||
*output = filepath.Join(filepath.Dir(*dbPath), "expansion-approved.jsonl")
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Query approved responses: heuristic passed AND (judge passed OR not yet judge-scored).
|
||||
rows, err := db.conn.Query(`
|
||||
SELECT r.idx, r.seed_id, r.region, r.domain, r.prompt, r.response,
|
||||
r.gen_time, r.model, s.heuristic_score
|
||||
FROM expansion_raw r
|
||||
JOIN expansion_scores s ON r.idx = s.idx
|
||||
WHERE s.heuristic_pass = true
|
||||
AND (s.judge_pass = true OR s.judge_pass IS NULL)
|
||||
ORDER BY r.idx
|
||||
`)
|
||||
if err != nil {
|
||||
log.Fatalf("query approved: %v (have you run scoring?)", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
f, err := os.Create(*output)
|
||||
if err != nil {
|
||||
log.Fatalf("create output: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
enc := json.NewEncoder(f)
|
||||
count := 0
|
||||
regionSet := make(map[string]bool)
|
||||
domainSet := make(map[string]bool)
|
||||
|
||||
for rows.Next() {
|
||||
var idx int
|
||||
var seedID, region, domain, prompt, response, model string
|
||||
var genTime, score float64
|
||||
if err := rows.Scan(&idx, &seedID, ®ion, &domain, &prompt, &response, &genTime, &model, &score); err != nil {
|
||||
log.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
example := TrainingExample{
|
||||
Messages: []ChatMessage{
|
||||
{Role: "user", Content: prompt},
|
||||
{Role: "assistant", Content: response},
|
||||
},
|
||||
}
|
||||
|
||||
if err := enc.Encode(example); err != nil {
|
||||
log.Fatalf("encode: %v", err)
|
||||
}
|
||||
|
||||
regionSet[region] = true
|
||||
domainSet[domain] = true
|
||||
count++
|
||||
}
|
||||
|
||||
_ = *threshold // threshold used in query above for future judge scoring
|
||||
|
||||
fmt.Printf("Approved: %d responses (threshold: heuristic > 0)\n", count)
|
||||
fmt.Printf("Exported: %s\n", *output)
|
||||
fmt.Printf(" Regions: %d, Domains: %d\n", len(regionSet), len(domainSet))
|
||||
}
|
||||
139
pkg/lem/consolidate.go
Normal file
139
pkg/lem/consolidate.go
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunConsolidate is the CLI entry point for the consolidate command.
|
||||
// Pulls all worker JSONLs from M3, merges them, deduplicates on idx,
|
||||
// and writes a single merged file.
|
||||
func RunConsolidate(args []string) {
|
||||
fs := flag.NewFlagSet("consolidate", flag.ExitOnError)
|
||||
remoteHost := fs.String("host", "m3", "SSH host for remote files")
|
||||
remotePath := fs.String("remote", "/Volumes/Data/lem/responses", "Remote directory for JSONL files")
|
||||
pattern := fs.String("pattern", "gold*.jsonl", "File glob pattern")
|
||||
outputDir := fs.String("output", "", "Output directory (defaults to ./responses)")
|
||||
merged := fs.String("merged", "", "Merged output file (defaults to gold-merged.jsonl in output dir)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *outputDir == "" {
|
||||
*outputDir = "responses"
|
||||
}
|
||||
if err := os.MkdirAll(*outputDir, 0755); err != nil {
|
||||
log.Fatalf("create output dir: %v", err)
|
||||
}
|
||||
|
||||
// List remote files.
|
||||
fmt.Println("Pulling responses from remote...")
|
||||
listCmd := exec.Command("ssh", *remoteHost, fmt.Sprintf("ls %s/%s", *remotePath, *pattern))
|
||||
listOutput, err := listCmd.Output()
|
||||
if err != nil {
|
||||
log.Fatalf("list remote files: %v", err)
|
||||
}
|
||||
|
||||
remoteFiles := strings.Split(strings.TrimSpace(string(listOutput)), "\n")
|
||||
var validFiles []string
|
||||
for _, f := range remoteFiles {
|
||||
f = strings.TrimSpace(f)
|
||||
if f != "" {
|
||||
validFiles = append(validFiles, f)
|
||||
}
|
||||
}
|
||||
fmt.Printf(" Found %d JSONL files on %s\n", len(validFiles), *remoteHost)
|
||||
|
||||
// Pull files.
|
||||
for _, rf := range validFiles {
|
||||
local := filepath.Join(*outputDir, filepath.Base(rf))
|
||||
scpCmd := exec.Command("scp", fmt.Sprintf("%s:%s", *remoteHost, rf), local)
|
||||
if err := scpCmd.Run(); err != nil {
|
||||
log.Printf("warning: failed to pull %s: %v", rf, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Count lines.
|
||||
f, err := os.Open(local)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
lines := 0
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
lines++
|
||||
}
|
||||
f.Close()
|
||||
fmt.Printf(" %s: %d records\n", filepath.Base(rf), lines)
|
||||
}
|
||||
|
||||
// Merge and deduplicate on idx.
|
||||
seen := make(map[int]json.RawMessage)
|
||||
skipped := 0
|
||||
|
||||
matches, _ := filepath.Glob(filepath.Join(*outputDir, *pattern))
|
||||
sort.Strings(matches)
|
||||
|
||||
for _, local := range matches {
|
||||
f, err := os.Open(local)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
var rec struct {
|
||||
Idx *int `json:"idx"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(line), &rec); err != nil {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
if rec.Idx == nil {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
if _, exists := seen[*rec.Idx]; !exists {
|
||||
seen[*rec.Idx] = json.RawMessage(line)
|
||||
}
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
|
||||
if skipped > 0 {
|
||||
fmt.Printf(" Skipped %d records without idx\n", skipped)
|
||||
}
|
||||
|
||||
// Sort by idx and write merged file.
|
||||
if *merged == "" {
|
||||
*merged = filepath.Join(*outputDir, "..", "gold-merged.jsonl")
|
||||
}
|
||||
|
||||
idxs := make([]int, 0, len(seen))
|
||||
for idx := range seen {
|
||||
idxs = append(idxs, idx)
|
||||
}
|
||||
sort.Ints(idxs)
|
||||
|
||||
f, err := os.Create(*merged)
|
||||
if err != nil {
|
||||
log.Fatalf("create merged file: %v", err)
|
||||
}
|
||||
for _, idx := range idxs {
|
||||
f.Write(seen[idx])
|
||||
f.WriteString("\n")
|
||||
}
|
||||
f.Close()
|
||||
|
||||
fmt.Printf("\nMerged: %d unique examples → %s\n", len(seen), *merged)
|
||||
}
|
||||
135
pkg/lem/coverage.go
Normal file
135
pkg/lem/coverage.go
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunCoverage is the CLI entry point for the coverage command.
|
||||
// Analyzes seed coverage and shows underrepresented areas.
|
||||
func RunCoverage(args []string) {
|
||||
fs := flag.NewFlagSet("coverage", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var total int
|
||||
if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&total); err != nil {
|
||||
log.Fatalf("No seeds table. Run: lem import-all first")
|
||||
}
|
||||
|
||||
fmt.Println("LEM Seed Coverage Analysis")
|
||||
fmt.Println("==================================================")
|
||||
fmt.Printf("\nTotal seeds: %d\n", total)
|
||||
|
||||
// Region distribution.
|
||||
fmt.Println("\nRegion distribution (underrepresented first):")
|
||||
rows, err := db.conn.Query(`
|
||||
SELECT
|
||||
CASE
|
||||
WHEN region LIKE '%cn%' THEN 'cn (Chinese)'
|
||||
WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en (English)'
|
||||
WHEN region LIKE '%ru%' THEN 'ru (Russian)'
|
||||
WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de (German)'
|
||||
WHEN region LIKE '%es%' THEN 'es (Spanish)'
|
||||
WHEN region LIKE '%fr%' THEN 'fr (French)'
|
||||
WHEN region LIKE '%latam%' THEN 'latam (LatAm)'
|
||||
WHEN region LIKE '%africa%' THEN 'africa'
|
||||
WHEN region LIKE '%eu%' THEN 'eu (European)'
|
||||
WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me (MidEast)'
|
||||
WHEN region LIKE '%multi%' THEN 'multilingual'
|
||||
WHEN region LIKE '%weak%' THEN 'weak-langs'
|
||||
ELSE 'other'
|
||||
END AS lang_group,
|
||||
count(*) AS n,
|
||||
count(DISTINCT domain) AS domains
|
||||
FROM seeds GROUP BY lang_group ORDER BY n ASC
|
||||
`)
|
||||
if err != nil {
|
||||
log.Fatalf("query regions: %v", err)
|
||||
}
|
||||
|
||||
type regionRow struct {
|
||||
group string
|
||||
n int
|
||||
domains int
|
||||
}
|
||||
var regionRows []regionRow
|
||||
for rows.Next() {
|
||||
var r regionRow
|
||||
rows.Scan(&r.group, &r.n, &r.domains)
|
||||
regionRows = append(regionRows, r)
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
avg := float64(total) / float64(len(regionRows))
|
||||
for _, r := range regionRows {
|
||||
barLen := int(float64(r.n) / avg * 10)
|
||||
if barLen > 40 {
|
||||
barLen = 40
|
||||
}
|
||||
bar := strings.Repeat("#", barLen)
|
||||
gap := ""
|
||||
if float64(r.n) < avg*0.5 {
|
||||
gap = " <- UNDERREPRESENTED"
|
||||
}
|
||||
fmt.Printf(" %-22s %6d (%4d domains) %s%s\n", r.group, r.n, r.domains, bar, gap)
|
||||
}
|
||||
|
||||
// Top 10 domains.
|
||||
fmt.Println("\nTop 10 domains (most seeds):")
|
||||
topRows, err := db.conn.Query(`
|
||||
SELECT domain, count(*) AS n FROM seeds
|
||||
WHERE domain != '' GROUP BY domain ORDER BY n DESC LIMIT 10
|
||||
`)
|
||||
if err == nil {
|
||||
for topRows.Next() {
|
||||
var domain string
|
||||
var n int
|
||||
topRows.Scan(&domain, &n)
|
||||
fmt.Printf(" %-40s %5d\n", domain, n)
|
||||
}
|
||||
topRows.Close()
|
||||
}
|
||||
|
||||
// Bottom 10 domains.
|
||||
fmt.Println("\nBottom 10 domains (fewest seeds, min 5):")
|
||||
bottomRows, err := db.conn.Query(`
|
||||
SELECT domain, count(*) AS n FROM seeds
|
||||
WHERE domain != '' GROUP BY domain HAVING count(*) >= 5 ORDER BY n ASC LIMIT 10
|
||||
`)
|
||||
if err == nil {
|
||||
for bottomRows.Next() {
|
||||
var domain string
|
||||
var n int
|
||||
bottomRows.Scan(&domain, &n)
|
||||
fmt.Printf(" %-40s %5d\n", domain, n)
|
||||
}
|
||||
bottomRows.Close()
|
||||
}
|
||||
|
||||
fmt.Println("\nSuggested expansion areas:")
|
||||
fmt.Println(" - Japanese, Korean, Thai, Vietnamese (no seeds found)")
|
||||
fmt.Println(" - Hindi/Urdu, Bengali, Tamil (South Asian)")
|
||||
fmt.Println(" - Swahili, Yoruba, Amharic (Sub-Saharan Africa)")
|
||||
fmt.Println(" - Indigenous languages (Quechua, Nahuatl, Aymara)")
|
||||
}
|
||||
103
pkg/lem/expand_status.go
Normal file
103
pkg/lem/expand_status.go
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// RunExpandStatus is the CLI entry point for the expand-status command.
|
||||
// Shows the expansion pipeline progress from DuckDB.
|
||||
func RunExpandStatus(args []string) {
|
||||
fs := flag.NewFlagSet("expand-status", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
fmt.Println("LEM Expansion Pipeline Status")
|
||||
fmt.Println("==================================================")
|
||||
|
||||
// Expansion prompts.
|
||||
var epTotal, epPending int
|
||||
err = db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&epTotal)
|
||||
if err != nil {
|
||||
fmt.Println(" Expansion prompts: not created (run: lem normalize)")
|
||||
db.Close()
|
||||
return
|
||||
}
|
||||
db.conn.QueryRow("SELECT count(*) FROM expansion_prompts WHERE status = 'pending'").Scan(&epPending)
|
||||
fmt.Printf(" Expansion prompts: %d total, %d pending\n", epTotal, epPending)
|
||||
|
||||
// Generated responses.
|
||||
var generated int
|
||||
err = db.conn.QueryRow("SELECT count(*) FROM expansion_raw").Scan(&generated)
|
||||
if err != nil {
|
||||
fmt.Println(" Generated: 0 (run: lem expand)")
|
||||
} else {
|
||||
rows, _ := db.conn.Query("SELECT model, count(*) FROM expansion_raw GROUP BY model")
|
||||
if rows != nil {
|
||||
var parts []string
|
||||
for rows.Next() {
|
||||
var model string
|
||||
var n int
|
||||
rows.Scan(&model, &n)
|
||||
parts = append(parts, fmt.Sprintf("%s: %d", model, n))
|
||||
}
|
||||
rows.Close()
|
||||
if len(parts) > 0 {
|
||||
fmt.Printf(" Generated: %d (%s)\n", generated, joinStrings(parts, ", "))
|
||||
} else {
|
||||
fmt.Printf(" Generated: %d\n", generated)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scored.
|
||||
var scored, hPassed, jScored, jPassed int
|
||||
err = db.conn.QueryRow("SELECT count(*) FROM expansion_scores").Scan(&scored)
|
||||
if err != nil {
|
||||
fmt.Println(" Scored: 0 (run: lem score --tier 1)")
|
||||
} else {
|
||||
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE heuristic_pass = true").Scan(&hPassed)
|
||||
fmt.Printf(" Heuristic scored: %d (%d passed)\n", scored, hPassed)
|
||||
|
||||
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_average IS NOT NULL").Scan(&jScored)
|
||||
db.conn.QueryRow("SELECT count(*) FROM expansion_scores WHERE judge_pass = true").Scan(&jPassed)
|
||||
if jScored > 0 {
|
||||
fmt.Printf(" Judge scored: %d (%d passed)\n", jScored, jPassed)
|
||||
}
|
||||
}
|
||||
|
||||
// Pipeline progress.
|
||||
if epTotal > 0 && generated > 0 {
|
||||
genPct := float64(generated) / float64(epTotal) * 100
|
||||
fmt.Printf("\n Progress: %.1f%% generated\n", genPct)
|
||||
}
|
||||
|
||||
// Golden set context.
|
||||
var golden int
|
||||
err = db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&golden)
|
||||
if err == nil {
|
||||
fmt.Printf("\n Golden set: %d / %d\n", golden, targetTotal)
|
||||
if generated > 0 {
|
||||
fmt.Printf(" Combined: %d total examples\n", golden+generated)
|
||||
}
|
||||
}
|
||||
}
|
||||
453
pkg/lem/import.go
Normal file
453
pkg/lem/import.go
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunImport is the CLI entry point for the import-all command.
|
||||
// Imports ALL LEM data into DuckDB: prompts, Gemini responses, golden set,
|
||||
// training examples, benchmarks, validations, and seeds.
|
||||
func RunImport(args []string) {
|
||||
fs := flag.NewFlagSet("import-all", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
skipM3 := fs.Bool("skip-m3", false, "Skip pulling data from M3")
|
||||
dataDir := fs.String("data-dir", "", "Local data directory (defaults to db directory)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if *dataDir == "" {
|
||||
*dataDir = filepath.Dir(*dbPath)
|
||||
}
|
||||
|
||||
db, err := OpenDBReadWrite(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
totals := make(map[string]int)
|
||||
|
||||
// ── 1. Golden set ──
|
||||
goldenPath := filepath.Join(*dataDir, "gold-15k.jsonl")
|
||||
if !*skipM3 {
|
||||
fmt.Println(" Pulling golden set from M3...")
|
||||
scpCmd := exec.Command("scp", "m3:/Volumes/Data/lem/responses/gold-15k.jsonl", goldenPath)
|
||||
if err := scpCmd.Run(); err != nil {
|
||||
log.Printf(" WARNING: could not pull golden set from M3: %v", err)
|
||||
}
|
||||
}
|
||||
if _, err := os.Stat(goldenPath); err == nil {
|
||||
db.conn.Exec("DROP TABLE IF EXISTS golden_set")
|
||||
_, err := db.conn.Exec(fmt.Sprintf(`
|
||||
CREATE TABLE golden_set AS
|
||||
SELECT
|
||||
idx::INT AS idx,
|
||||
seed_id::VARCHAR AS seed_id,
|
||||
domain::VARCHAR AS domain,
|
||||
voice::VARCHAR AS voice,
|
||||
prompt::VARCHAR AS prompt,
|
||||
response::VARCHAR AS response,
|
||||
gen_time::DOUBLE AS gen_time,
|
||||
length(response)::INT AS char_count,
|
||||
length(response) - length(replace(response, ' ', '')) + 1 AS word_count
|
||||
FROM read_json_auto('%s', maximum_object_size=1048576)
|
||||
`, escapeSQLPath(goldenPath)))
|
||||
if err != nil {
|
||||
log.Printf(" WARNING: golden set import failed: %v", err)
|
||||
} else {
|
||||
var n int
|
||||
db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&n)
|
||||
totals["golden_set"] = n
|
||||
fmt.Printf(" golden_set: %d rows\n", n)
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. Training examples ──
|
||||
trainingDirs := []struct {
|
||||
name string
|
||||
files []string
|
||||
}{
|
||||
{"training", []string{"training/train.jsonl", "training/valid.jsonl", "training/test.jsonl"}},
|
||||
{"training-2k", []string{"training-2k/train.jsonl", "training-2k/valid.jsonl", "training-2k/test.jsonl"}},
|
||||
{"training-expanded", []string{"training-expanded/train.jsonl", "training-expanded/valid.jsonl"}},
|
||||
{"training-book", []string{"training-book/train.jsonl", "training-book/valid.jsonl", "training-book/test.jsonl"}},
|
||||
{"training-conv", []string{"training-conv/train.jsonl", "training-conv/valid.jsonl", "training-conv/test.jsonl"}},
|
||||
{"gold-full", []string{"gold-full/train.jsonl", "gold-full/valid.jsonl"}},
|
||||
{"sovereignty-gold", []string{"sovereignty-gold/train.jsonl", "sovereignty-gold/valid.jsonl"}},
|
||||
{"composure-lessons", []string{"composure-lessons/train.jsonl", "composure-lessons/valid.jsonl"}},
|
||||
{"watts-full", []string{"watts-full/train.jsonl", "watts-full/valid.jsonl"}},
|
||||
{"watts-expanded", []string{"watts-expanded/train.jsonl", "watts-expanded/valid.jsonl"}},
|
||||
{"watts-composure", []string{"watts-composure-merged/train.jsonl", "watts-composure-merged/valid.jsonl"}},
|
||||
{"western-fresh", []string{"western-fresh/train.jsonl", "western-fresh/valid.jsonl"}},
|
||||
{"deepseek-soak", []string{"deepseek-western-soak/train.jsonl", "deepseek-western-soak/valid.jsonl"}},
|
||||
{"russian-bridge", []string{"russian-bridge/train.jsonl", "russian-bridge/valid.jsonl"}},
|
||||
}
|
||||
|
||||
trainingLocal := filepath.Join(*dataDir, "training")
|
||||
os.MkdirAll(trainingLocal, 0755)
|
||||
|
||||
if !*skipM3 {
|
||||
fmt.Println(" Pulling training sets from M3...")
|
||||
for _, td := range trainingDirs {
|
||||
for _, rel := range td.files {
|
||||
local := filepath.Join(trainingLocal, rel)
|
||||
os.MkdirAll(filepath.Dir(local), 0755)
|
||||
scpCmd := exec.Command("scp", fmt.Sprintf("m3:/Volumes/Data/lem/%s", rel), local)
|
||||
scpCmd.Run() // ignore errors, file might not exist
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
db.conn.Exec("DROP TABLE IF EXISTS training_examples")
|
||||
db.conn.Exec(`
|
||||
CREATE TABLE training_examples (
|
||||
source VARCHAR,
|
||||
split VARCHAR,
|
||||
prompt TEXT,
|
||||
response TEXT,
|
||||
num_turns INT,
|
||||
full_messages TEXT,
|
||||
char_count INT
|
||||
)
|
||||
`)
|
||||
|
||||
trainingTotal := 0
|
||||
for _, td := range trainingDirs {
|
||||
for _, rel := range td.files {
|
||||
local := filepath.Join(trainingLocal, rel)
|
||||
if _, err := os.Stat(local); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
split := "train"
|
||||
if strings.Contains(rel, "valid") {
|
||||
split = "valid"
|
||||
} else if strings.Contains(rel, "test") {
|
||||
split = "test"
|
||||
}
|
||||
|
||||
n := importTrainingFile(db, local, td.name, split)
|
||||
trainingTotal += n
|
||||
}
|
||||
}
|
||||
totals["training_examples"] = trainingTotal
|
||||
fmt.Printf(" training_examples: %d rows\n", trainingTotal)
|
||||
|
||||
// ── 3. Benchmark results ──
|
||||
benchLocal := filepath.Join(*dataDir, "benchmarks")
|
||||
os.MkdirAll(benchLocal, 0755)
|
||||
|
||||
if !*skipM3 {
|
||||
fmt.Println(" Pulling benchmarks from M3...")
|
||||
for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
|
||||
scpCmd := exec.Command("scp",
|
||||
fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s.jsonl", bname),
|
||||
filepath.Join(benchLocal, bname+".jsonl"))
|
||||
scpCmd.Run()
|
||||
}
|
||||
for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
|
||||
localSub := filepath.Join(benchLocal, subdir)
|
||||
os.MkdirAll(localSub, 0755)
|
||||
scpCmd := exec.Command("scp", "-r",
|
||||
fmt.Sprintf("m3:/Volumes/Data/lem/benchmarks/%s/", subdir),
|
||||
filepath.Join(benchLocal)+"/")
|
||||
scpCmd.Run()
|
||||
}
|
||||
}
|
||||
|
||||
db.conn.Exec("DROP TABLE IF EXISTS benchmark_results")
|
||||
db.conn.Exec(`
|
||||
CREATE TABLE benchmark_results (
|
||||
source VARCHAR, id VARCHAR, benchmark VARCHAR, model VARCHAR,
|
||||
prompt TEXT, response TEXT, elapsed_seconds DOUBLE, domain VARCHAR
|
||||
)
|
||||
`)
|
||||
|
||||
benchTotal := 0
|
||||
for _, subdir := range []string{"results", "scale_results", "cross_arch_results", "deepseek-r1-7b"} {
|
||||
resultDir := filepath.Join(benchLocal, subdir)
|
||||
matches, _ := filepath.Glob(filepath.Join(resultDir, "*.jsonl"))
|
||||
for _, jf := range matches {
|
||||
n := importBenchmarkFile(db, jf, subdir)
|
||||
benchTotal += n
|
||||
}
|
||||
}
|
||||
|
||||
// Also import standalone benchmark files.
|
||||
for _, bfile := range []string{"lem_bench", "lem_ethics", "lem_ethics_allen", "instruction_tuned", "abliterated", "base_pt"} {
|
||||
local := filepath.Join(benchLocal, bfile+".jsonl")
|
||||
if _, err := os.Stat(local); os.IsNotExist(err) {
|
||||
if !*skipM3 {
|
||||
scpCmd := exec.Command("scp",
|
||||
fmt.Sprintf("m3:/Volumes/Data/lem/benchmark/%s.jsonl", bfile), local)
|
||||
scpCmd.Run()
|
||||
}
|
||||
}
|
||||
if _, err := os.Stat(local); err == nil {
|
||||
n := importBenchmarkFile(db, local, "benchmark")
|
||||
benchTotal += n
|
||||
}
|
||||
}
|
||||
totals["benchmark_results"] = benchTotal
|
||||
fmt.Printf(" benchmark_results: %d rows\n", benchTotal)
|
||||
|
||||
// ── 4. Benchmark questions ──
|
||||
db.conn.Exec("DROP TABLE IF EXISTS benchmark_questions")
|
||||
db.conn.Exec(`
|
||||
CREATE TABLE benchmark_questions (
|
||||
benchmark VARCHAR, id VARCHAR, question TEXT,
|
||||
best_answer TEXT, correct_answers TEXT, incorrect_answers TEXT, category VARCHAR
|
||||
)
|
||||
`)
|
||||
|
||||
benchQTotal := 0
|
||||
for _, bname := range []string{"truthfulqa", "gsm8k", "do_not_answer", "toxigen"} {
|
||||
local := filepath.Join(benchLocal, bname+".jsonl")
|
||||
if _, err := os.Stat(local); err == nil {
|
||||
n := importBenchmarkQuestions(db, local, bname)
|
||||
benchQTotal += n
|
||||
}
|
||||
}
|
||||
totals["benchmark_questions"] = benchQTotal
|
||||
fmt.Printf(" benchmark_questions: %d rows\n", benchQTotal)
|
||||
|
||||
// ── 5. Seeds ──
|
||||
db.conn.Exec("DROP TABLE IF EXISTS seeds")
|
||||
db.conn.Exec(`
|
||||
CREATE TABLE seeds (
|
||||
source_file VARCHAR, region VARCHAR, seed_id VARCHAR, domain VARCHAR, prompt TEXT
|
||||
)
|
||||
`)
|
||||
|
||||
seedTotal := 0
|
||||
seedDirs := []string{filepath.Join(*dataDir, "seeds"), "/tmp/lem-data/seeds", "/tmp/lem-repo/seeds"}
|
||||
for _, seedDir := range seedDirs {
|
||||
if _, err := os.Stat(seedDir); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
n := importSeeds(db, seedDir)
|
||||
seedTotal += n
|
||||
}
|
||||
totals["seeds"] = seedTotal
|
||||
fmt.Printf(" seeds: %d rows\n", seedTotal)
|
||||
|
||||
// ── Summary ──
|
||||
grandTotal := 0
|
||||
fmt.Printf("\n%s\n", strings.Repeat("=", 50))
|
||||
fmt.Println("LEM Database Import Complete")
|
||||
fmt.Println(strings.Repeat("=", 50))
|
||||
for table, count := range totals {
|
||||
fmt.Printf(" %-25s %8d\n", table, count)
|
||||
grandTotal += count
|
||||
}
|
||||
fmt.Printf(" %s\n", strings.Repeat("─", 35))
|
||||
fmt.Printf(" %-25s %8d\n", "TOTAL", grandTotal)
|
||||
fmt.Printf("\nDatabase: %s\n", *dbPath)
|
||||
}
|
||||
|
||||
func importTrainingFile(db *DB, path, source, split string) int {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
count := 0
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||
|
||||
for scanner.Scan() {
|
||||
var rec struct {
|
||||
Messages []ChatMessage `json:"messages"`
|
||||
}
|
||||
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
prompt := ""
|
||||
response := ""
|
||||
assistantCount := 0
|
||||
for _, m := range rec.Messages {
|
||||
if m.Role == "user" && prompt == "" {
|
||||
prompt = m.Content
|
||||
}
|
||||
if m.Role == "assistant" {
|
||||
if response == "" {
|
||||
response = m.Content
|
||||
}
|
||||
assistantCount++
|
||||
}
|
||||
}
|
||||
|
||||
msgsJSON, _ := json.Marshal(rec.Messages)
|
||||
db.conn.Exec(`INSERT INTO training_examples VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
source, split, prompt, response, assistantCount, string(msgsJSON), len(response))
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func importBenchmarkFile(db *DB, path, source string) int {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
count := 0
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||
|
||||
for scanner.Scan() {
|
||||
var rec map[string]interface{}
|
||||
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
db.conn.Exec(`INSERT INTO benchmark_results VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
source,
|
||||
fmt.Sprintf("%v", rec["id"]),
|
||||
strOrEmpty(rec, "benchmark"),
|
||||
strOrEmpty(rec, "model"),
|
||||
strOrEmpty(rec, "prompt"),
|
||||
strOrEmpty(rec, "response"),
|
||||
floatOrZero(rec, "elapsed_seconds"),
|
||||
strOrEmpty(rec, "domain"),
|
||||
)
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func importBenchmarkQuestions(db *DB, path, benchmark string) int {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
count := 0
|
||||
scanner := bufio.NewScanner(f)
|
||||
scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
|
||||
|
||||
for scanner.Scan() {
|
||||
var rec map[string]interface{}
|
||||
if err := json.Unmarshal(scanner.Bytes(), &rec); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
correctJSON, _ := json.Marshal(rec["correct_answers"])
|
||||
incorrectJSON, _ := json.Marshal(rec["incorrect_answers"])
|
||||
|
||||
db.conn.Exec(`INSERT INTO benchmark_questions VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
benchmark,
|
||||
fmt.Sprintf("%v", rec["id"]),
|
||||
strOrEmpty(rec, "question"),
|
||||
strOrEmpty(rec, "best_answer"),
|
||||
string(correctJSON),
|
||||
string(incorrectJSON),
|
||||
strOrEmpty(rec, "category"),
|
||||
)
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func importSeeds(db *DB, seedDir string) int {
|
||||
count := 0
|
||||
filepath.Walk(seedDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil || info.IsDir() || !strings.HasSuffix(path, ".json") {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rel, _ := filepath.Rel(seedDir, path)
|
||||
region := strings.TrimSuffix(filepath.Base(path), ".json")
|
||||
|
||||
// Try parsing as array or object with prompts/seeds field.
|
||||
var seedsList []interface{}
|
||||
var raw interface{}
|
||||
if err := json.Unmarshal(data, &raw); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch v := raw.(type) {
|
||||
case []interface{}:
|
||||
seedsList = v
|
||||
case map[string]interface{}:
|
||||
if prompts, ok := v["prompts"].([]interface{}); ok {
|
||||
seedsList = prompts
|
||||
} else if seeds, ok := v["seeds"].([]interface{}); ok {
|
||||
seedsList = seeds
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range seedsList {
|
||||
switch seed := s.(type) {
|
||||
case map[string]interface{}:
|
||||
prompt := strOrEmpty(seed, "prompt")
|
||||
if prompt == "" {
|
||||
prompt = strOrEmpty(seed, "text")
|
||||
}
|
||||
if prompt == "" {
|
||||
prompt = strOrEmpty(seed, "question")
|
||||
}
|
||||
db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
|
||||
rel, region,
|
||||
strOrEmpty(seed, "seed_id"),
|
||||
strOrEmpty(seed, "domain"),
|
||||
prompt,
|
||||
)
|
||||
count++
|
||||
case string:
|
||||
db.conn.Exec(`INSERT INTO seeds VALUES (?, ?, ?, ?, ?)`,
|
||||
rel, region, "", "", seed)
|
||||
count++
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return count
|
||||
}
|
||||
|
||||
func strOrEmpty(m map[string]interface{}, key string) string {
|
||||
if v, ok := m[key]; ok {
|
||||
return fmt.Sprintf("%v", v)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func floatOrZero(m map[string]interface{}, key string) float64 {
|
||||
if v, ok := m[key]; ok {
|
||||
if f, ok := v.(float64); ok {
|
||||
return f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func escapeSQLPath(p string) string {
|
||||
return strings.ReplaceAll(p, "'", "''")
|
||||
}
|
||||
97
pkg/lem/inventory.go
Normal file
97
pkg/lem/inventory.go
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// RunInventory is the CLI entry point for the inventory command.
|
||||
// Shows row counts and summary stats for all tables in the DuckDB database.
|
||||
func RunInventory(args []string) {
|
||||
fs := flag.NewFlagSet("inventory", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
counts, err := db.TableCounts()
|
||||
if err != nil {
|
||||
log.Fatalf("table counts: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("LEM Database Inventory (%s)\n", *dbPath)
|
||||
fmt.Println("============================================================")
|
||||
|
||||
grandTotal := 0
|
||||
for table, count := range counts {
|
||||
detail := ""
|
||||
|
||||
switch table {
|
||||
case "golden_set":
|
||||
pct := float64(count) / float64(targetTotal) * 100
|
||||
detail = fmt.Sprintf(" (%.1f%% of %d target)", pct, targetTotal)
|
||||
case "training_examples":
|
||||
var sources int
|
||||
db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM training_examples").Scan(&sources)
|
||||
detail = fmt.Sprintf(" (%d sources)", sources)
|
||||
case "prompts":
|
||||
var domains, voices int
|
||||
db.conn.QueryRow("SELECT COUNT(DISTINCT domain) FROM prompts").Scan(&domains)
|
||||
db.conn.QueryRow("SELECT COUNT(DISTINCT voice) FROM prompts").Scan(&voices)
|
||||
detail = fmt.Sprintf(" (%d domains, %d voices)", domains, voices)
|
||||
case "gemini_responses":
|
||||
rows, _ := db.conn.Query("SELECT source_model, count(*) FROM gemini_responses GROUP BY source_model")
|
||||
if rows != nil {
|
||||
var parts []string
|
||||
for rows.Next() {
|
||||
var model string
|
||||
var n int
|
||||
rows.Scan(&model, &n)
|
||||
parts = append(parts, fmt.Sprintf("%s: %d", model, n))
|
||||
}
|
||||
rows.Close()
|
||||
if len(parts) > 0 {
|
||||
detail = fmt.Sprintf(" (%s)", joinStrings(parts, ", "))
|
||||
}
|
||||
}
|
||||
case "benchmark_results":
|
||||
var sources int
|
||||
db.conn.QueryRow("SELECT COUNT(DISTINCT source) FROM benchmark_results").Scan(&sources)
|
||||
detail = fmt.Sprintf(" (%d categories)", sources)
|
||||
}
|
||||
|
||||
fmt.Printf(" %-25s %8d%s\n", table, count, detail)
|
||||
grandTotal += count
|
||||
}
|
||||
|
||||
fmt.Printf(" %-25s\n", "────────────────────────────────────────")
|
||||
fmt.Printf(" %-25s %8d\n", "TOTAL", grandTotal)
|
||||
}
|
||||
|
||||
func joinStrings(parts []string, sep string) string {
|
||||
result := ""
|
||||
for i, p := range parts {
|
||||
if i > 0 {
|
||||
result += sep
|
||||
}
|
||||
result += p
|
||||
}
|
||||
return result
|
||||
}
|
||||
148
pkg/lem/normalize.go
Normal file
148
pkg/lem/normalize.go
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// RunNormalize is the CLI entry point for the normalize command.
|
||||
// Normalizes seeds into the expansion_prompts table, deduplicating against
|
||||
// the golden set and existing prompts. Assigns priority based on domain
|
||||
// coverage (underrepresented domains first).
|
||||
func RunNormalize(args []string) {
|
||||
fs := flag.NewFlagSet("normalize", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
minLen := fs.Int("min-length", 50, "Minimum prompt length in characters")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDBReadWrite(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Check source tables.
|
||||
var seedCount int
|
||||
if err := db.conn.QueryRow("SELECT count(*) FROM seeds").Scan(&seedCount); err != nil {
|
||||
log.Fatalf("No seeds table. Run: lem import-all first")
|
||||
}
|
||||
fmt.Printf("Seeds table: %d rows\n", seedCount)
|
||||
|
||||
// Drop and recreate expansion_prompts.
|
||||
_, err = db.conn.Exec("DROP TABLE IF EXISTS expansion_prompts")
|
||||
if err != nil {
|
||||
log.Fatalf("drop expansion_prompts: %v", err)
|
||||
}
|
||||
|
||||
// Deduplicate: remove seeds whose prompt already appears in prompts or golden_set.
|
||||
_, err = db.conn.Exec(fmt.Sprintf(`
|
||||
CREATE TABLE expansion_prompts AS
|
||||
WITH unique_seeds AS (
|
||||
SELECT
|
||||
ROW_NUMBER() OVER (ORDER BY region, domain, seed_id) AS idx,
|
||||
seed_id,
|
||||
region,
|
||||
domain,
|
||||
prompt
|
||||
FROM (
|
||||
SELECT DISTINCT ON (prompt)
|
||||
seed_id, region, domain, prompt
|
||||
FROM seeds
|
||||
WHERE length(prompt) >= %d
|
||||
ORDER BY prompt, seed_id
|
||||
)
|
||||
),
|
||||
existing_prompts AS (
|
||||
SELECT prompt FROM prompts
|
||||
UNION ALL
|
||||
SELECT prompt FROM golden_set
|
||||
)
|
||||
SELECT
|
||||
us.idx,
|
||||
us.seed_id,
|
||||
us.region,
|
||||
us.domain,
|
||||
'en' AS language,
|
||||
us.prompt,
|
||||
'' AS prompt_en,
|
||||
0 AS priority,
|
||||
'pending' AS status
|
||||
FROM unique_seeds us
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM existing_prompts ep
|
||||
WHERE ep.prompt = us.prompt
|
||||
)
|
||||
`, *minLen))
|
||||
if err != nil {
|
||||
log.Fatalf("create expansion_prompts: %v", err)
|
||||
}
|
||||
|
||||
var total, domains, regions int
|
||||
db.conn.QueryRow("SELECT count(*) FROM expansion_prompts").Scan(&total)
|
||||
db.conn.QueryRow("SELECT count(DISTINCT domain) FROM expansion_prompts").Scan(&domains)
|
||||
db.conn.QueryRow("SELECT count(DISTINCT region) FROM expansion_prompts").Scan(®ions)
|
||||
|
||||
// Assign priority based on domain coverage.
|
||||
_, err = db.conn.Exec(`
|
||||
UPDATE expansion_prompts SET priority = (
|
||||
SELECT RANK() OVER (ORDER BY cnt ASC)
|
||||
FROM (
|
||||
SELECT domain, count(*) AS cnt
|
||||
FROM expansion_prompts GROUP BY domain
|
||||
) domain_counts
|
||||
WHERE domain_counts.domain = expansion_prompts.domain
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("warning: priority assignment failed: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\nExpansion Prompts: %d\n", total)
|
||||
fmt.Printf(" Domains: %d\n", domains)
|
||||
fmt.Printf(" Regions: %d\n", regions)
|
||||
|
||||
// Show region distribution.
|
||||
fmt.Println("\n By region group:")
|
||||
rows, err := db.conn.Query(`
|
||||
SELECT
|
||||
CASE
|
||||
WHEN region LIKE '%cn%' THEN 'cn'
|
||||
WHEN region LIKE '%en-%' OR region LIKE '%en_para%' OR region LIKE '%para%' THEN 'en'
|
||||
WHEN region LIKE '%ru%' THEN 'ru'
|
||||
WHEN region LIKE '%de%' AND region NOT LIKE '%deten%' THEN 'de'
|
||||
WHEN region LIKE '%es%' THEN 'es'
|
||||
WHEN region LIKE '%fr%' THEN 'fr'
|
||||
WHEN region LIKE '%latam%' THEN 'latam'
|
||||
WHEN region LIKE '%africa%' THEN 'africa'
|
||||
WHEN region LIKE '%eu%' THEN 'eu'
|
||||
WHEN region LIKE '%me%' AND region NOT LIKE '%premium%' THEN 'me'
|
||||
ELSE 'other'
|
||||
END AS lang_group,
|
||||
count(*) AS n
|
||||
FROM expansion_prompts GROUP BY lang_group ORDER BY n DESC
|
||||
`)
|
||||
if err == nil {
|
||||
for rows.Next() {
|
||||
var group string
|
||||
var n int
|
||||
rows.Scan(&group, &n)
|
||||
fmt.Printf(" %-15s %6d\n", group, n)
|
||||
}
|
||||
rows.Close()
|
||||
}
|
||||
|
||||
fmt.Printf("\nNormalization complete: %d expansion prompts from %d seeds\n", total, seedCount)
|
||||
}
|
||||
152
pkg/lem/query.go
Normal file
152
pkg/lem/query.go
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunQuery is the CLI entry point for the query command.
|
||||
// Runs ad-hoc SQL against the DuckDB database.
|
||||
func RunQuery(args []string) {
|
||||
fs := flag.NewFlagSet("query", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
jsonOutput := fs.Bool("json", false, "Output as JSON instead of table")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
sql := strings.Join(fs.Args(), " ")
|
||||
if sql == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: SQL query required as positional argument")
|
||||
fmt.Fprintln(os.Stderr, " lem query --db path.duckdb \"SELECT * FROM golden_set LIMIT 5\"")
|
||||
fmt.Fprintln(os.Stderr, " lem query --db path.duckdb \"domain = 'ethics'\" (auto-wraps as WHERE clause)")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Auto-wrap non-SELECT queries as WHERE clauses.
|
||||
trimmed := strings.TrimSpace(strings.ToUpper(sql))
|
||||
if !strings.HasPrefix(trimmed, "SELECT") && !strings.HasPrefix(trimmed, "SHOW") &&
|
||||
!strings.HasPrefix(trimmed, "DESCRIBE") && !strings.HasPrefix(trimmed, "EXPLAIN") {
|
||||
sql = "SELECT * FROM golden_set WHERE " + sql + " LIMIT 20"
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
rows, err := db.conn.Query(sql)
|
||||
if err != nil {
|
||||
log.Fatalf("query: %v", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
cols, err := rows.Columns()
|
||||
if err != nil {
|
||||
log.Fatalf("columns: %v", err)
|
||||
}
|
||||
|
||||
var results []map[string]interface{}
|
||||
|
||||
for rows.Next() {
|
||||
values := make([]interface{}, len(cols))
|
||||
ptrs := make([]interface{}, len(cols))
|
||||
for i := range values {
|
||||
ptrs[i] = &values[i]
|
||||
}
|
||||
|
||||
if err := rows.Scan(ptrs...); err != nil {
|
||||
log.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
row := make(map[string]interface{})
|
||||
for i, col := range cols {
|
||||
v := values[i]
|
||||
// Convert []byte to string for readability.
|
||||
if b, ok := v.([]byte); ok {
|
||||
v = string(b)
|
||||
}
|
||||
row[col] = v
|
||||
}
|
||||
results = append(results, row)
|
||||
}
|
||||
|
||||
if *jsonOutput {
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
enc.Encode(results)
|
||||
return
|
||||
}
|
||||
|
||||
// Table output.
|
||||
if len(results) == 0 {
|
||||
fmt.Println("(no results)")
|
||||
return
|
||||
}
|
||||
|
||||
// Calculate column widths.
|
||||
widths := make(map[string]int)
|
||||
for _, col := range cols {
|
||||
widths[col] = len(col)
|
||||
}
|
||||
for _, row := range results {
|
||||
for _, col := range cols {
|
||||
s := fmt.Sprintf("%v", row[col])
|
||||
if len(s) > 60 {
|
||||
s = s[:57] + "..."
|
||||
}
|
||||
if len(s) > widths[col] {
|
||||
widths[col] = len(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print header.
|
||||
for i, col := range cols {
|
||||
if i > 0 {
|
||||
fmt.Print(" ")
|
||||
}
|
||||
fmt.Printf("%-*s", widths[col], col)
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// Print separator.
|
||||
for i, col := range cols {
|
||||
if i > 0 {
|
||||
fmt.Print(" ")
|
||||
}
|
||||
fmt.Print(strings.Repeat("─", widths[col]))
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// Print rows.
|
||||
for _, row := range results {
|
||||
for i, col := range cols {
|
||||
if i > 0 {
|
||||
fmt.Print(" ")
|
||||
}
|
||||
s := fmt.Sprintf("%v", row[col])
|
||||
if len(s) > 60 {
|
||||
s = s[:57] + "..."
|
||||
}
|
||||
fmt.Printf("%-*s", widths[col], s)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
fmt.Printf("\n(%d rows)\n", len(results))
|
||||
}
|
||||
111
pkg/lem/seed_influx.go
Normal file
111
pkg/lem/seed_influx.go
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunSeedInflux is the CLI entry point for the seed-influx command.
|
||||
// Seeds InfluxDB golden_gen measurement from DuckDB golden_set data.
|
||||
// One-time migration tool for bootstrapping InfluxDB from existing data.
|
||||
func RunSeedInflux(args []string) {
|
||||
fs := flag.NewFlagSet("seed-influx", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
influxURL := fs.String("influx", "", "InfluxDB URL")
|
||||
influxDB := fs.String("influx-db", "", "InfluxDB database name")
|
||||
force := fs.Bool("force", false, "Re-seed even if InfluxDB already has data")
|
||||
batchSize := fs.Int("batch-size", 500, "Lines per InfluxDB write batch")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDB(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var total int
|
||||
if err := db.conn.QueryRow("SELECT count(*) FROM golden_set").Scan(&total); err != nil {
|
||||
log.Fatalf("No golden_set table. Run ingest first.")
|
||||
}
|
||||
|
||||
influx := NewInfluxClient(*influxURL, *influxDB)
|
||||
|
||||
// Check existing count in InfluxDB.
|
||||
existing := 0
|
||||
rows, err := influx.QuerySQL("SELECT count(DISTINCT i) AS n FROM gold_gen")
|
||||
if err == nil && len(rows) > 0 {
|
||||
if n, ok := rows[0]["n"].(float64); ok {
|
||||
existing = int(n)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("DuckDB has %d records, InfluxDB golden_gen has %d\n", total, existing)
|
||||
|
||||
if existing >= total && !*force {
|
||||
fmt.Println("InfluxDB already has all records. Use --force to re-seed.")
|
||||
return
|
||||
}
|
||||
|
||||
// Read all rows.
|
||||
dbRows, err := db.conn.Query(`
|
||||
SELECT idx, seed_id, domain, voice, gen_time, char_count
|
||||
FROM golden_set ORDER BY idx
|
||||
`)
|
||||
if err != nil {
|
||||
log.Fatalf("query golden_set: %v", err)
|
||||
}
|
||||
defer dbRows.Close()
|
||||
|
||||
var lines []string
|
||||
written := 0
|
||||
|
||||
for dbRows.Next() {
|
||||
var idx, charCount int
|
||||
var seedID, domain, voice string
|
||||
var genTime float64
|
||||
|
||||
if err := dbRows.Scan(&idx, &seedID, &domain, &voice, &genTime, &charCount); err != nil {
|
||||
log.Fatalf("scan: %v", err)
|
||||
}
|
||||
|
||||
sid := strings.ReplaceAll(seedID, `"`, `\"`)
|
||||
lp := fmt.Sprintf(`gold_gen,i=%d,w=migration,d=%s,v=%s seed_id="%s",gen_time=%.1f,chars=%di`,
|
||||
idx, escapeLp(domain), escapeLp(voice), sid, genTime, charCount)
|
||||
lines = append(lines, lp)
|
||||
|
||||
if len(lines) >= *batchSize {
|
||||
if err := influx.WriteLp(lines); err != nil {
|
||||
log.Fatalf("write batch at %d: %v", written, err)
|
||||
}
|
||||
written += len(lines)
|
||||
lines = lines[:0]
|
||||
|
||||
if written%2000 == 0 {
|
||||
fmt.Printf(" Seeded %d/%d records\n", written, total)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(lines) > 0 {
|
||||
if err := influx.WriteLp(lines); err != nil {
|
||||
log.Fatalf("flush: %v", err)
|
||||
}
|
||||
written += len(lines)
|
||||
}
|
||||
|
||||
fmt.Printf("Seeded %d golden_gen records into InfluxDB\n", written)
|
||||
}
|
||||
225
pkg/lem/tier_score.go
Normal file
225
pkg/lem/tier_score.go
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RunTierScore is the CLI entry point for the tier-score command.
|
||||
// Scores expansion responses using tiered quality assessment:
|
||||
// - Tier 1: Heuristic regex scoring (fast, no API)
|
||||
// - Tier 2: LEM self-judge (requires trained model)
|
||||
// - Tier 3: External judge (reserved for borderline cases)
|
||||
func RunTierScore(args []string) {
|
||||
fs := flag.NewFlagSet("tier-score", flag.ExitOnError)
|
||||
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
||||
tier := fs.Int("tier", 1, "Scoring tier: 1=heuristic, 2=LEM judge, 3=external")
|
||||
limit := fs.Int("limit", 0, "Max items to score (0=all)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
log.Fatalf("parse flags: %v", err)
|
||||
}
|
||||
|
||||
if *dbPath == "" {
|
||||
*dbPath = os.Getenv("LEM_DB")
|
||||
}
|
||||
if *dbPath == "" {
|
||||
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
db, err := OpenDBReadWrite(*dbPath)
|
||||
if err != nil {
|
||||
log.Fatalf("open db: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Ensure expansion_scores table exists.
|
||||
db.conn.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS expansion_scores (
|
||||
idx INT,
|
||||
heuristic_score DOUBLE,
|
||||
heuristic_pass BOOLEAN,
|
||||
judge_sovereignty DOUBLE,
|
||||
judge_ethical_depth DOUBLE,
|
||||
judge_creative DOUBLE,
|
||||
judge_self_concept DOUBLE,
|
||||
judge_average DOUBLE,
|
||||
judge_pass BOOLEAN,
|
||||
judge_model VARCHAR,
|
||||
scored_at TIMESTAMP
|
||||
)
|
||||
`)
|
||||
|
||||
if *tier >= 1 {
|
||||
runHeuristicTier(db, *limit)
|
||||
}
|
||||
|
||||
if *tier >= 2 {
|
||||
fmt.Println("\nTier 2 (LEM judge): not yet available — needs trained LEM-27B model")
|
||||
fmt.Println(" Will score: sovereignty, ethical_depth, creative, self_concept (1-10 each)")
|
||||
}
|
||||
|
||||
if *tier >= 3 {
|
||||
fmt.Println("\nTier 3 (External judge): reserved for borderline cases")
|
||||
}
|
||||
}
|
||||
|
||||
func runHeuristicTier(db *DB, limit int) {
|
||||
// Find unscored responses.
|
||||
query := `
|
||||
SELECT r.idx, r.response FROM expansion_raw r
|
||||
LEFT JOIN expansion_scores s ON r.idx = s.idx
|
||||
WHERE s.idx IS NULL
|
||||
ORDER BY r.idx
|
||||
`
|
||||
if limit > 0 {
|
||||
query += fmt.Sprintf(" LIMIT %d", limit)
|
||||
}
|
||||
|
||||
rows, err := db.conn.Query(query)
|
||||
if err != nil {
|
||||
log.Fatalf("query unscored: %v", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type unscoredRow struct {
|
||||
idx int
|
||||
response string
|
||||
}
|
||||
var unscored []unscoredRow
|
||||
|
||||
for rows.Next() {
|
||||
var r unscoredRow
|
||||
rows.Scan(&r.idx, &r.response)
|
||||
unscored = append(unscored, r)
|
||||
}
|
||||
|
||||
if len(unscored) == 0 {
|
||||
fmt.Println("Tier 1 (heuristic): all responses already scored")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Tier 1 (heuristic): scoring %d responses...\n", len(unscored))
|
||||
|
||||
passed := 0
|
||||
for _, r := range unscored {
|
||||
score, _ := heuristicExpansionScore(r.response)
|
||||
isPass := score > 0
|
||||
if isPass {
|
||||
passed++
|
||||
}
|
||||
|
||||
db.conn.Exec(`
|
||||
INSERT INTO expansion_scores (idx, heuristic_score, heuristic_pass, scored_at)
|
||||
VALUES (?, ?, ?, current_timestamp)
|
||||
`, r.idx, score, isPass)
|
||||
}
|
||||
|
||||
fmt.Printf(" Scored: %d, Passed: %d, Failed: %d\n", len(unscored), passed, len(unscored)-passed)
|
||||
if len(unscored) > 0 {
|
||||
fmt.Printf(" Pass rate: %.1f%%\n", float64(passed)/float64(len(unscored))*100)
|
||||
}
|
||||
}
|
||||
|
||||
// heuristicExpansionScore applies fast heuristic scoring to an expansion response.
|
||||
// Returns (score, details). Positive = good, negative = bad.
|
||||
func heuristicExpansionScore(response string) (float64, map[string]interface{}) {
|
||||
details := make(map[string]interface{})
|
||||
|
||||
if response == "" || len(response) < 30 {
|
||||
return -20.0, map[string]interface{}{"reason": "empty_or_broken"}
|
||||
}
|
||||
|
||||
score := 0.0
|
||||
|
||||
// Compliance markers (negative).
|
||||
compliancePhrases := []string{
|
||||
"as an ai", "i cannot", "i can't", "i'm not able",
|
||||
"important to note", "it's crucial to", "i must emphasize",
|
||||
"i should point out", "as a language model",
|
||||
}
|
||||
lower := strings.ToLower(response)
|
||||
complianceCount := 0
|
||||
for _, p := range compliancePhrases {
|
||||
if strings.Contains(lower, p) {
|
||||
complianceCount++
|
||||
}
|
||||
}
|
||||
if complianceCount > 0 {
|
||||
score -= float64(complianceCount) * 5
|
||||
details["compliance_markers"] = complianceCount
|
||||
}
|
||||
|
||||
// Formulaic preamble (negative).
|
||||
trimmed := strings.TrimSpace(lower)
|
||||
if strings.HasPrefix(trimmed, "okay, let") || strings.HasPrefix(trimmed, "ok, let") || strings.HasPrefix(trimmed, "sure, let") {
|
||||
score -= 3
|
||||
details["formulaic_preamble"] = true
|
||||
}
|
||||
|
||||
// Degeneration check (repetitive output).
|
||||
words := strings.Fields(response)
|
||||
if len(words) > 20 {
|
||||
chunks := make([]string, 0, len(words)/5)
|
||||
for i := 0; i+5 <= len(words); i += 5 {
|
||||
chunks = append(chunks, strings.Join(words[i:i+5], " "))
|
||||
}
|
||||
if len(chunks) > 0 {
|
||||
unique := make(map[string]bool)
|
||||
for _, c := range chunks {
|
||||
unique[c] = true
|
||||
}
|
||||
ratio := float64(len(unique)) / float64(len(chunks))
|
||||
if ratio < 0.5 {
|
||||
score -= 10
|
||||
details["degeneration"] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Engagement depth (positive).
|
||||
wordCount := len(words)
|
||||
if wordCount > 100 {
|
||||
score += 2
|
||||
}
|
||||
if wordCount > 300 {
|
||||
score += 2
|
||||
}
|
||||
details["word_count"] = wordCount
|
||||
|
||||
// Structure (positive).
|
||||
if strings.Contains(response, "\n\n") || strings.Contains(response, "**") ||
|
||||
strings.Contains(response, "1.") || strings.Contains(response, "- ") {
|
||||
score += 1
|
||||
details["structured"] = true
|
||||
}
|
||||
|
||||
// Creative expression (positive).
|
||||
creativeMarkers := []string{"metaphor", "imagine", "picture this", "story", "once upon"}
|
||||
for _, m := range creativeMarkers {
|
||||
if strings.Contains(lower, m) {
|
||||
score += 2
|
||||
details["creative"] = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// First-person engagement (positive).
|
||||
fpMarkers := []string{"i think", "i believe", "in my view", "i'd argue"}
|
||||
fpCount := 0
|
||||
for _, m := range fpMarkers {
|
||||
if strings.Contains(lower, m) {
|
||||
fpCount++
|
||||
}
|
||||
}
|
||||
if fpCount > 0 {
|
||||
score += float64(fpCount) * 1.5
|
||||
details["first_person"] = fpCount
|
||||
}
|
||||
|
||||
return score, details
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue