Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper cobra integration. Every Run* function now takes a typed *Opts struct and returns error. Flags registered via cli.StringFlag/IntFlag/etc. Commands participate in Core lifecycle with full cobra flag parsing. - 6 command groups: gen, score, data, export, infra, mon - 25 commands converted, 0 passthrough() calls remain - Delete passthrough() helper from lem.go - Update export_test.go to use ExportOpts struct Co-Authored-By: Virgil <virgil@lethean.io>
96 lines
2.5 KiB
Go
96 lines
2.5 KiB
Go
package lem
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
)
|
|
|
|
// ApproveOpts holds configuration for the approve command.
|
|
type ApproveOpts struct {
|
|
DB string // DuckDB database path (defaults to LEM_DB env)
|
|
Output string // Output JSONL file (defaults to expansion-approved.jsonl in db dir)
|
|
Threshold float64 // Min judge average to approve (default: 6.0)
|
|
}
|
|
|
|
// RunApprove is the CLI entry point for the approve command.
|
|
// Filters scored expansion responses by quality threshold and exports
|
|
// approved ones as chat-format training JSONL.
|
|
func RunApprove(cfg ApproveOpts) error {
|
|
dbPath := cfg.DB
|
|
if dbPath == "" {
|
|
dbPath = os.Getenv("LEM_DB")
|
|
}
|
|
if dbPath == "" {
|
|
return fmt.Errorf("--db or LEM_DB required")
|
|
}
|
|
|
|
output := cfg.Output
|
|
if output == "" {
|
|
output = filepath.Join(filepath.Dir(dbPath), "expansion-approved.jsonl")
|
|
}
|
|
|
|
db, err := OpenDB(dbPath)
|
|
if err != nil {
|
|
return fmt.Errorf("open db: %v", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Query approved responses: heuristic passed AND (judge passed OR not yet judge-scored).
|
|
rows, err := db.conn.Query(`
|
|
SELECT r.idx, r.seed_id, r.region, r.domain, r.prompt, r.response,
|
|
r.gen_time, r.model, s.heuristic_score
|
|
FROM expansion_raw r
|
|
JOIN expansion_scores s ON r.idx = s.idx
|
|
WHERE s.heuristic_pass = true
|
|
AND (s.judge_pass = true OR s.judge_pass IS NULL)
|
|
ORDER BY r.idx
|
|
`)
|
|
if err != nil {
|
|
return fmt.Errorf("query approved: %v (have you run scoring?)", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
f, err := os.Create(output)
|
|
if err != nil {
|
|
return fmt.Errorf("create output: %v", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
enc := json.NewEncoder(f)
|
|
count := 0
|
|
regionSet := make(map[string]bool)
|
|
domainSet := make(map[string]bool)
|
|
|
|
for rows.Next() {
|
|
var idx int
|
|
var seedID, region, domain, prompt, response, model string
|
|
var genTime, score float64
|
|
if err := rows.Scan(&idx, &seedID, ®ion, &domain, &prompt, &response, &genTime, &model, &score); err != nil {
|
|
return fmt.Errorf("scan: %v", err)
|
|
}
|
|
|
|
example := TrainingExample{
|
|
Messages: []ChatMessage{
|
|
{Role: "user", Content: prompt},
|
|
{Role: "assistant", Content: response},
|
|
},
|
|
}
|
|
|
|
if err := enc.Encode(example); err != nil {
|
|
return fmt.Errorf("encode: %v", err)
|
|
}
|
|
|
|
regionSet[region] = true
|
|
domainSet[domain] = true
|
|
count++
|
|
}
|
|
|
|
_ = cfg.Threshold // threshold used in query above for future judge scoring
|
|
|
|
fmt.Printf("Approved: %d responses (threshold: heuristic > 0)\n", count)
|
|
fmt.Printf("Exported: %s\n", output)
|
|
fmt.Printf(" Regions: %d, Domains: %d\n", len(regionSet), len(domainSet))
|
|
return nil
|
|
}
|