LEM/cmd/lemcmd/data.go
Snider 56eda1a081 refactor: migrate all 25 commands from passthrough to cobra framework
Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper
cobra integration. Every Run* function now takes a typed *Opts struct
and returns error. Flags registered via cli.StringFlag/IntFlag/etc.
Commands participate in Core lifecycle with full cobra flag parsing.

- 6 command groups: gen, score, data, export, infra, mon
- 25 commands converted, 0 passthrough() calls remain
- Delete passthrough() helper from lem.go
- Update export_test.go to use ExportOpts struct

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-23 03:32:53 +00:00

61 lines
3 KiB
Go

package lemcmd
import (
"forge.lthn.ai/core/cli/pkg/cli"
"forge.lthn.ai/lthn/lem/pkg/lem"
)
func addDataCommands(root *cli.Command) {
dataGroup := cli.NewGroup("data", "Data management commands", "Import, consolidate, normalise, and approve training data.")
// import-all — Import ALL LEM data into DuckDB from M3.
var importCfg lem.ImportOpts
importCmd := cli.NewCommand("import-all", "Import ALL LEM data into DuckDB from M3", "",
func(cmd *cli.Command, args []string) error {
return lem.RunImport(importCfg)
},
)
cli.StringFlag(importCmd, &importCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
cli.BoolFlag(importCmd, &importCfg.SkipM3, "skip-m3", "", false, "Skip pulling data from M3")
cli.StringFlag(importCmd, &importCfg.DataDir, "data-dir", "", "", "Local data directory (defaults to db directory)")
dataGroup.AddCommand(importCmd)
// consolidate — Pull worker JSONLs from M3, merge, deduplicate.
var consolidateCfg lem.ConsolidateOpts
consolidateCmd := cli.NewCommand("consolidate", "Pull worker JSONLs from M3, merge, deduplicate", "",
func(cmd *cli.Command, args []string) error {
return lem.RunConsolidate(consolidateCfg)
},
)
cli.StringFlag(consolidateCmd, &consolidateCfg.Host, "host", "", "m3", "SSH host for remote files")
cli.StringFlag(consolidateCmd, &consolidateCfg.Remote, "remote", "", "/Volumes/Data/lem/responses", "Remote directory for JSONL files")
cli.StringFlag(consolidateCmd, &consolidateCfg.Pattern, "pattern", "", "gold*.jsonl", "File glob pattern")
cli.StringFlag(consolidateCmd, &consolidateCfg.OutputDir, "output", "o", "", "Output directory (defaults to ./responses)")
cli.StringFlag(consolidateCmd, &consolidateCfg.Merged, "merged", "", "", "Merged output file (defaults to gold-merged.jsonl in output dir)")
dataGroup.AddCommand(consolidateCmd)
// normalize — Normalise seeds to deduplicated expansion prompts.
var normalizeCfg lem.NormalizeOpts
normalizeCmd := cli.NewCommand("normalize", "Normalise seeds to deduplicated expansion prompts", "",
func(cmd *cli.Command, args []string) error {
return lem.RunNormalize(normalizeCfg)
},
)
cli.StringFlag(normalizeCmd, &normalizeCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
cli.IntFlag(normalizeCmd, &normalizeCfg.MinLen, "min-length", "", 50, "Minimum prompt length in characters")
dataGroup.AddCommand(normalizeCmd)
// approve — Filter scored expansions to training JSONL.
var approveCfg lem.ApproveOpts
approveCmd := cli.NewCommand("approve", "Filter scored expansions to training JSONL", "",
func(cmd *cli.Command, args []string) error {
return lem.RunApprove(approveCfg)
},
)
cli.StringFlag(approveCmd, &approveCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
cli.StringFlag(approveCmd, &approveCfg.Output, "output", "o", "", "Output JSONL file (defaults to expansion-approved.jsonl in db dir)")
cli.Float64Flag(approveCmd, &approveCfg.Threshold, "threshold", "", 6.0, "Min judge average to approve")
dataGroup.AddCommand(approveCmd)
root.AddCommand(dataGroup)
}