LEM/pkg/lem/approve.go
Snider 56eda1a081 refactor: migrate all 25 commands from passthrough to cobra framework
Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper
cobra integration. Every Run* function now takes a typed *Opts struct
and returns error. Flags registered via cli.StringFlag/IntFlag/etc.
Commands participate in Core lifecycle with full cobra flag parsing.

- 6 command groups: gen, score, data, export, infra, mon
- 25 commands converted, 0 passthrough() calls remain
- Delete passthrough() helper from lem.go
- Update export_test.go to use ExportOpts struct

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-23 03:32:53 +00:00

96 lines
2.5 KiB
Go

package lem
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
)
// ApproveOpts holds configuration for the approve command.
type ApproveOpts struct {
DB string // DuckDB database path (defaults to LEM_DB env)
Output string // Output JSONL file (defaults to expansion-approved.jsonl in db dir)
Threshold float64 // Min judge average to approve (default: 6.0)
}
// RunApprove is the CLI entry point for the approve command.
// Filters scored expansion responses by quality threshold and exports
// approved ones as chat-format training JSONL.
func RunApprove(cfg ApproveOpts) error {
dbPath := cfg.DB
if dbPath == "" {
dbPath = os.Getenv("LEM_DB")
}
if dbPath == "" {
return fmt.Errorf("--db or LEM_DB required")
}
output := cfg.Output
if output == "" {
output = filepath.Join(filepath.Dir(dbPath), "expansion-approved.jsonl")
}
db, err := OpenDB(dbPath)
if err != nil {
return fmt.Errorf("open db: %v", err)
}
defer db.Close()
// Query approved responses: heuristic passed AND (judge passed OR not yet judge-scored).
rows, err := db.conn.Query(`
SELECT r.idx, r.seed_id, r.region, r.domain, r.prompt, r.response,
r.gen_time, r.model, s.heuristic_score
FROM expansion_raw r
JOIN expansion_scores s ON r.idx = s.idx
WHERE s.heuristic_pass = true
AND (s.judge_pass = true OR s.judge_pass IS NULL)
ORDER BY r.idx
`)
if err != nil {
return fmt.Errorf("query approved: %v (have you run scoring?)", err)
}
defer rows.Close()
f, err := os.Create(output)
if err != nil {
return fmt.Errorf("create output: %v", err)
}
defer f.Close()
enc := json.NewEncoder(f)
count := 0
regionSet := make(map[string]bool)
domainSet := make(map[string]bool)
for rows.Next() {
var idx int
var seedID, region, domain, prompt, response, model string
var genTime, score float64
if err := rows.Scan(&idx, &seedID, &region, &domain, &prompt, &response, &genTime, &model, &score); err != nil {
return fmt.Errorf("scan: %v", err)
}
example := TrainingExample{
Messages: []ChatMessage{
{Role: "user", Content: prompt},
{Role: "assistant", Content: response},
},
}
if err := enc.Encode(example); err != nil {
return fmt.Errorf("encode: %v", err)
}
regionSet[region] = true
domainSet[domain] = true
count++
}
_ = cfg.Threshold // threshold used in query above for future judge scoring
fmt.Printf("Approved: %d responses (threshold: heuristic > 0)\n", count)
fmt.Printf("Exported: %s\n", output)
fmt.Printf(" Regions: %d, Domains: %d\n", len(regionSet), len(domainSet))
return nil
}