Complete conversion of pipeline.py into Go `lem` CLI: - import-all: bulk import all LEM data into DuckDB from M3 - consolidate: pull worker JSONLs, merge, deduplicate - normalize: seeds → deduplicated expansion_prompts table - approve: filter scored expansions → training JSONL - tier-score: heuristic/judge tiered expansion scoring - expand-status: expansion pipeline progress from DuckDB - inventory: DuckDB table counts and summary - coverage: seed coverage gap analysis - seed-influx: bootstrap InfluxDB from DuckDB golden_gen - query: ad-hoc SQL against DuckDB 22 commands total, 49 Go files. Replaces entire pipeline.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
225 lines
5.6 KiB
Go
225 lines
5.6 KiB
Go
package lem
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
// RunTierScore is the CLI entry point for the tier-score command.
|
|
// Scores expansion responses using tiered quality assessment:
|
|
// - Tier 1: Heuristic regex scoring (fast, no API)
|
|
// - Tier 2: LEM self-judge (requires trained model)
|
|
// - Tier 3: External judge (reserved for borderline cases)
|
|
func RunTierScore(args []string) {
|
|
fs := flag.NewFlagSet("tier-score", flag.ExitOnError)
|
|
dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
tier := fs.Int("tier", 1, "Scoring tier: 1=heuristic, 2=LEM judge, 3=external")
|
|
limit := fs.Int("limit", 0, "Max items to score (0=all)")
|
|
|
|
if err := fs.Parse(args); err != nil {
|
|
log.Fatalf("parse flags: %v", err)
|
|
}
|
|
|
|
if *dbPath == "" {
|
|
*dbPath = os.Getenv("LEM_DB")
|
|
}
|
|
if *dbPath == "" {
|
|
fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
|
|
os.Exit(1)
|
|
}
|
|
|
|
db, err := OpenDBReadWrite(*dbPath)
|
|
if err != nil {
|
|
log.Fatalf("open db: %v", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Ensure expansion_scores table exists.
|
|
db.conn.Exec(`
|
|
CREATE TABLE IF NOT EXISTS expansion_scores (
|
|
idx INT,
|
|
heuristic_score DOUBLE,
|
|
heuristic_pass BOOLEAN,
|
|
judge_sovereignty DOUBLE,
|
|
judge_ethical_depth DOUBLE,
|
|
judge_creative DOUBLE,
|
|
judge_self_concept DOUBLE,
|
|
judge_average DOUBLE,
|
|
judge_pass BOOLEAN,
|
|
judge_model VARCHAR,
|
|
scored_at TIMESTAMP
|
|
)
|
|
`)
|
|
|
|
if *tier >= 1 {
|
|
runHeuristicTier(db, *limit)
|
|
}
|
|
|
|
if *tier >= 2 {
|
|
fmt.Println("\nTier 2 (LEM judge): not yet available — needs trained LEM-27B model")
|
|
fmt.Println(" Will score: sovereignty, ethical_depth, creative, self_concept (1-10 each)")
|
|
}
|
|
|
|
if *tier >= 3 {
|
|
fmt.Println("\nTier 3 (External judge): reserved for borderline cases")
|
|
}
|
|
}
|
|
|
|
func runHeuristicTier(db *DB, limit int) {
|
|
// Find unscored responses.
|
|
query := `
|
|
SELECT r.idx, r.response FROM expansion_raw r
|
|
LEFT JOIN expansion_scores s ON r.idx = s.idx
|
|
WHERE s.idx IS NULL
|
|
ORDER BY r.idx
|
|
`
|
|
if limit > 0 {
|
|
query += fmt.Sprintf(" LIMIT %d", limit)
|
|
}
|
|
|
|
rows, err := db.conn.Query(query)
|
|
if err != nil {
|
|
log.Fatalf("query unscored: %v", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
type unscoredRow struct {
|
|
idx int
|
|
response string
|
|
}
|
|
var unscored []unscoredRow
|
|
|
|
for rows.Next() {
|
|
var r unscoredRow
|
|
rows.Scan(&r.idx, &r.response)
|
|
unscored = append(unscored, r)
|
|
}
|
|
|
|
if len(unscored) == 0 {
|
|
fmt.Println("Tier 1 (heuristic): all responses already scored")
|
|
return
|
|
}
|
|
|
|
fmt.Printf("Tier 1 (heuristic): scoring %d responses...\n", len(unscored))
|
|
|
|
passed := 0
|
|
for _, r := range unscored {
|
|
score, _ := heuristicExpansionScore(r.response)
|
|
isPass := score > 0
|
|
if isPass {
|
|
passed++
|
|
}
|
|
|
|
db.conn.Exec(`
|
|
INSERT INTO expansion_scores (idx, heuristic_score, heuristic_pass, scored_at)
|
|
VALUES (?, ?, ?, current_timestamp)
|
|
`, r.idx, score, isPass)
|
|
}
|
|
|
|
fmt.Printf(" Scored: %d, Passed: %d, Failed: %d\n", len(unscored), passed, len(unscored)-passed)
|
|
if len(unscored) > 0 {
|
|
fmt.Printf(" Pass rate: %.1f%%\n", float64(passed)/float64(len(unscored))*100)
|
|
}
|
|
}
|
|
|
|
// heuristicExpansionScore applies fast heuristic scoring to an expansion response.
|
|
// Returns (score, details). Positive = good, negative = bad.
|
|
func heuristicExpansionScore(response string) (float64, map[string]interface{}) {
|
|
details := make(map[string]interface{})
|
|
|
|
if response == "" || len(response) < 30 {
|
|
return -20.0, map[string]interface{}{"reason": "empty_or_broken"}
|
|
}
|
|
|
|
score := 0.0
|
|
|
|
// Compliance markers (negative).
|
|
compliancePhrases := []string{
|
|
"as an ai", "i cannot", "i can't", "i'm not able",
|
|
"important to note", "it's crucial to", "i must emphasize",
|
|
"i should point out", "as a language model",
|
|
}
|
|
lower := strings.ToLower(response)
|
|
complianceCount := 0
|
|
for _, p := range compliancePhrases {
|
|
if strings.Contains(lower, p) {
|
|
complianceCount++
|
|
}
|
|
}
|
|
if complianceCount > 0 {
|
|
score -= float64(complianceCount) * 5
|
|
details["compliance_markers"] = complianceCount
|
|
}
|
|
|
|
// Formulaic preamble (negative).
|
|
trimmed := strings.TrimSpace(lower)
|
|
if strings.HasPrefix(trimmed, "okay, let") || strings.HasPrefix(trimmed, "ok, let") || strings.HasPrefix(trimmed, "sure, let") {
|
|
score -= 3
|
|
details["formulaic_preamble"] = true
|
|
}
|
|
|
|
// Degeneration check (repetitive output).
|
|
words := strings.Fields(response)
|
|
if len(words) > 20 {
|
|
chunks := make([]string, 0, len(words)/5)
|
|
for i := 0; i+5 <= len(words); i += 5 {
|
|
chunks = append(chunks, strings.Join(words[i:i+5], " "))
|
|
}
|
|
if len(chunks) > 0 {
|
|
unique := make(map[string]bool)
|
|
for _, c := range chunks {
|
|
unique[c] = true
|
|
}
|
|
ratio := float64(len(unique)) / float64(len(chunks))
|
|
if ratio < 0.5 {
|
|
score -= 10
|
|
details["degeneration"] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Engagement depth (positive).
|
|
wordCount := len(words)
|
|
if wordCount > 100 {
|
|
score += 2
|
|
}
|
|
if wordCount > 300 {
|
|
score += 2
|
|
}
|
|
details["word_count"] = wordCount
|
|
|
|
// Structure (positive).
|
|
if strings.Contains(response, "\n\n") || strings.Contains(response, "**") ||
|
|
strings.Contains(response, "1.") || strings.Contains(response, "- ") {
|
|
score += 1
|
|
details["structured"] = true
|
|
}
|
|
|
|
// Creative expression (positive).
|
|
creativeMarkers := []string{"metaphor", "imagine", "picture this", "story", "once upon"}
|
|
for _, m := range creativeMarkers {
|
|
if strings.Contains(lower, m) {
|
|
score += 2
|
|
details["creative"] = true
|
|
break
|
|
}
|
|
}
|
|
|
|
// First-person engagement (positive).
|
|
fpMarkers := []string{"i think", "i believe", "in my view", "i'd argue"}
|
|
fpCount := 0
|
|
for _, m := range fpMarkers {
|
|
if strings.Contains(lower, m) {
|
|
fpCount++
|
|
}
|
|
}
|
|
if fpCount > 0 {
|
|
score += float64(fpCount) * 1.5
|
|
details["first_person"] = fpCount
|
|
}
|
|
|
|
return score, details
|
|
}
|