Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper cobra integration. Every Run* function now takes a typed *Opts struct and returns error. Flags registered via cli.StringFlag/IntFlag/etc. Commands participate in Core lifecycle with full cobra flag parsing. - 6 command groups: gen, score, data, export, infra, mon - 25 commands converted, 0 passthrough() calls remain - Delete passthrough() helper from lem.go - Update export_test.go to use ExportOpts struct Co-Authored-By: Virgil <virgil@lethean.io>
61 lines
3 KiB
Go
61 lines
3 KiB
Go
package lemcmd
|
|
|
|
import (
|
|
"forge.lthn.ai/core/cli/pkg/cli"
|
|
"forge.lthn.ai/lthn/lem/pkg/lem"
|
|
)
|
|
|
|
func addDataCommands(root *cli.Command) {
|
|
dataGroup := cli.NewGroup("data", "Data management commands", "Import, consolidate, normalise, and approve training data.")
|
|
|
|
// import-all — Import ALL LEM data into DuckDB from M3.
|
|
var importCfg lem.ImportOpts
|
|
importCmd := cli.NewCommand("import-all", "Import ALL LEM data into DuckDB from M3", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunImport(importCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(importCmd, &importCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
cli.BoolFlag(importCmd, &importCfg.SkipM3, "skip-m3", "", false, "Skip pulling data from M3")
|
|
cli.StringFlag(importCmd, &importCfg.DataDir, "data-dir", "", "", "Local data directory (defaults to db directory)")
|
|
dataGroup.AddCommand(importCmd)
|
|
|
|
// consolidate — Pull worker JSONLs from M3, merge, deduplicate.
|
|
var consolidateCfg lem.ConsolidateOpts
|
|
consolidateCmd := cli.NewCommand("consolidate", "Pull worker JSONLs from M3, merge, deduplicate", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunConsolidate(consolidateCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(consolidateCmd, &consolidateCfg.Host, "host", "", "m3", "SSH host for remote files")
|
|
cli.StringFlag(consolidateCmd, &consolidateCfg.Remote, "remote", "", "/Volumes/Data/lem/responses", "Remote directory for JSONL files")
|
|
cli.StringFlag(consolidateCmd, &consolidateCfg.Pattern, "pattern", "", "gold*.jsonl", "File glob pattern")
|
|
cli.StringFlag(consolidateCmd, &consolidateCfg.OutputDir, "output", "o", "", "Output directory (defaults to ./responses)")
|
|
cli.StringFlag(consolidateCmd, &consolidateCfg.Merged, "merged", "", "", "Merged output file (defaults to gold-merged.jsonl in output dir)")
|
|
dataGroup.AddCommand(consolidateCmd)
|
|
|
|
// normalize — Normalise seeds to deduplicated expansion prompts.
|
|
var normalizeCfg lem.NormalizeOpts
|
|
normalizeCmd := cli.NewCommand("normalize", "Normalise seeds to deduplicated expansion prompts", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunNormalize(normalizeCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(normalizeCmd, &normalizeCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
cli.IntFlag(normalizeCmd, &normalizeCfg.MinLen, "min-length", "", 50, "Minimum prompt length in characters")
|
|
dataGroup.AddCommand(normalizeCmd)
|
|
|
|
// approve — Filter scored expansions to training JSONL.
|
|
var approveCfg lem.ApproveOpts
|
|
approveCmd := cli.NewCommand("approve", "Filter scored expansions to training JSONL", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunApprove(approveCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(approveCmd, &approveCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
cli.StringFlag(approveCmd, &approveCfg.Output, "output", "o", "", "Output JSONL file (defaults to expansion-approved.jsonl in db dir)")
|
|
cli.Float64Flag(approveCmd, &approveCfg.Threshold, "threshold", "", 6.0, "Min judge average to approve")
|
|
dataGroup.AddCommand(approveCmd)
|
|
|
|
root.AddCommand(dataGroup)
|
|
}
|