Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper cobra integration. Every Run* function now takes a typed *Opts struct and returns error. Flags registered via cli.StringFlag/IntFlag/etc. Commands participate in Core lifecycle with full cobra flag parsing. - 6 command groups: gen, score, data, export, infra, mon - 25 commands converted, 0 passthrough() calls remain - Delete passthrough() helper from lem.go - Update export_test.go to use ExportOpts struct Co-Authored-By: Virgil <virgil@lethean.io>
79 lines
4.3 KiB
Go
79 lines
4.3 KiB
Go
package lemcmd
|
|
|
|
import (
|
|
"time"
|
|
|
|
"forge.lthn.ai/core/cli/pkg/cli"
|
|
"forge.lthn.ai/lthn/lem/pkg/lem"
|
|
)
|
|
|
|
func addInfraCommands(root *cli.Command) {
|
|
infraGroup := cli.NewGroup("infra", "Infrastructure commands", "InfluxDB ingestion, DuckDB queries, and distributed workers.")
|
|
|
|
// ingest — push benchmark data into InfluxDB.
|
|
var ingestCfg lem.IngestOpts
|
|
ingestCmd := cli.NewCommand("ingest", "Ingest benchmark data into InfluxDB", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunIngest(ingestCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(ingestCmd, &ingestCfg.Content, "content", "", "", "Content scores JSONL file")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.Capability, "capability", "", "", "Capability scores JSONL file")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.TrainingLog, "training-log", "", "", "MLX LoRA training log file")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.Model, "model", "m", "", "Model name tag (required)")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.RunID, "run-id", "", "", "Run ID tag (defaults to model name)")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.InfluxURL, "influx", "", "", "InfluxDB URL")
|
|
cli.StringFlag(ingestCmd, &ingestCfg.InfluxDB, "influx-db", "", "", "InfluxDB database name")
|
|
cli.IntFlag(ingestCmd, &ingestCfg.BatchSize, "batch-size", "", 100, "Lines per InfluxDB write batch")
|
|
infraGroup.AddCommand(ingestCmd)
|
|
|
|
// seed-influx — seed InfluxDB golden_gen from DuckDB.
|
|
var seedCfg lem.SeedInfluxOpts
|
|
seedCmd := cli.NewCommand("seed-influx", "Seed InfluxDB golden_gen from DuckDB", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunSeedInflux(seedCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(seedCmd, &seedCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
cli.StringFlag(seedCmd, &seedCfg.InfluxURL, "influx", "", "", "InfluxDB URL")
|
|
cli.StringFlag(seedCmd, &seedCfg.InfluxDB, "influx-db", "", "", "InfluxDB database name")
|
|
cli.BoolFlag(seedCmd, &seedCfg.Force, "force", "", false, "Re-seed even if InfluxDB already has data")
|
|
cli.IntFlag(seedCmd, &seedCfg.BatchSize, "batch-size", "", 500, "Lines per InfluxDB write batch")
|
|
infraGroup.AddCommand(seedCmd)
|
|
|
|
// query — run ad-hoc SQL against DuckDB.
|
|
var queryCfg lem.QueryOpts
|
|
queryCmd := cli.NewCommand("query", "Run ad-hoc SQL against DuckDB", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunQuery(queryCfg, args)
|
|
},
|
|
)
|
|
cli.StringFlag(queryCmd, &queryCfg.DB, "db", "", "", "DuckDB database path (defaults to LEM_DB env)")
|
|
cli.BoolFlag(queryCmd, &queryCfg.JSON, "json", "j", false, "Output as JSON instead of table")
|
|
infraGroup.AddCommand(queryCmd)
|
|
|
|
// worker — distributed inference worker node.
|
|
var workerCfg lem.WorkerOpts
|
|
workerCmd := cli.NewCommand("worker", "Run as distributed inference worker node", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunWorker(workerCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(workerCmd, &workerCfg.APIBase, "api", "", "", "LEM API base URL (or LEM_API env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.WorkerID, "id", "", "", "Worker ID (or LEM_WORKER_ID env, defaults to machine UUID)")
|
|
cli.StringFlag(workerCmd, &workerCfg.Name, "name", "n", "", "Worker display name (or LEM_WORKER_NAME env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.APIKey, "key", "k", "", "API key (or LEM_API_KEY env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.GPUType, "gpu", "", "", "GPU type (e.g. 'RTX 3090', or LEM_GPU env)")
|
|
cli.IntFlag(workerCmd, &workerCfg.VRAMGb, "vram", "", 0, "GPU VRAM in GB (or LEM_VRAM_GB env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.Languages, "languages", "", "", "Comma-separated language codes (or LEM_LANGUAGES env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.Models, "models", "", "", "Comma-separated supported model names (or LEM_MODELS env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.InferURL, "infer", "", "", "Local inference endpoint (or LEM_INFER_URL env)")
|
|
cli.StringFlag(workerCmd, &workerCfg.TaskType, "type", "t", "", "Filter by task type (expand, score, translate, seed)")
|
|
cli.IntFlag(workerCmd, &workerCfg.BatchSize, "batch", "b", 5, "Number of tasks to fetch per poll")
|
|
cli.DurationFlag(workerCmd, &workerCfg.PollInterval, "poll", "", 30*time.Second, "Poll interval")
|
|
cli.BoolFlag(workerCmd, &workerCfg.OneShot, "one-shot", "", false, "Process one batch and exit")
|
|
cli.BoolFlag(workerCmd, &workerCfg.DryRun, "dry-run", "", false, "Fetch tasks but don't run inference")
|
|
infraGroup.AddCommand(workerCmd)
|
|
|
|
root.AddCommand(infraGroup)
|
|
}
|