LEM/cmd/lemcmd/export.go
Snider 56eda1a081 refactor: migrate all 25 commands from passthrough to cobra framework
Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper
cobra integration. Every Run* function now takes a typed *Opts struct
and returns error. Flags registered via cli.StringFlag/IntFlag/etc.
Commands participate in Core lifecycle with full cobra flag parsing.

- 6 command groups: gen, score, data, export, infra, mon
- 25 commands converted, 0 passthrough() calls remain
- Delete passthrough() helper from lem.go
- Update export_test.go to use ExportOpts struct

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-23 03:32:53 +00:00

67 lines
3.6 KiB
Go

package lemcmd
import (
"forge.lthn.ai/core/cli/pkg/cli"
"forge.lthn.ai/lthn/lem/pkg/lem"
)
func addExportCommands(root *cli.Command) {
exportGroup := cli.NewGroup("export", "Export and publish commands", "Export training data to JSONL, Parquet, HuggingFace, and PEFT formats.")
// jsonl — export golden set to training-format JSONL splits.
var exportCfg lem.ExportOpts
jsonlCmd := cli.NewCommand("jsonl", "Export golden set to training-format JSONL splits", "",
func(cmd *cli.Command, args []string) error {
return lem.RunExport(exportCfg)
},
)
cli.StringFlag(jsonlCmd, &exportCfg.DBPath, "db", "", "", "DuckDB database path (primary source; defaults to LEM_DB env)")
cli.StringFlag(jsonlCmd, &exportCfg.Input, "input", "i", "", "Input golden set JSONL file (fallback if --db not set)")
cli.StringFlag(jsonlCmd, &exportCfg.OutputDir, "output-dir", "o", "", "Output directory for training files (required)")
cli.IntFlag(jsonlCmd, &exportCfg.TrainPct, "train-pct", "", 90, "Training set percentage")
cli.IntFlag(jsonlCmd, &exportCfg.ValidPct, "valid-pct", "", 5, "Validation set percentage")
cli.IntFlag(jsonlCmd, &exportCfg.TestPct, "test-pct", "", 5, "Test set percentage")
cli.Int64Flag(jsonlCmd, &exportCfg.Seed, "seed", "", 42, "Random seed for shuffling")
cli.IntFlag(jsonlCmd, &exportCfg.MinChars, "min-chars", "", 50, "Minimum response character count")
exportGroup.AddCommand(jsonlCmd)
// parquet — export JSONL training splits to Parquet.
var parquetCfg lem.ParquetOpts
parquetCmd := cli.NewCommand("parquet", "Export JSONL training splits to Parquet", "",
func(cmd *cli.Command, args []string) error {
return lem.RunParquet(parquetCfg)
},
)
cli.StringFlag(parquetCmd, &parquetCfg.Input, "input", "i", "", "Directory containing train.jsonl, valid.jsonl, test.jsonl (required)")
cli.StringFlag(parquetCmd, &parquetCfg.Output, "output", "o", "", "Output directory for Parquet files (defaults to input/parquet)")
exportGroup.AddCommand(parquetCmd)
// publish — push Parquet files to HuggingFace dataset repo.
var publishCfg lem.PublishOpts
publishCmd := cli.NewCommand("publish", "Push Parquet files to HuggingFace dataset repo", "",
func(cmd *cli.Command, args []string) error {
return lem.RunPublish(publishCfg)
},
)
cli.StringFlag(publishCmd, &publishCfg.Input, "input", "i", "", "Directory containing Parquet files (required)")
cli.StringFlag(publishCmd, &publishCfg.Repo, "repo", "", "lthn/LEM-golden-set", "HuggingFace dataset repo ID")
cli.BoolFlag(publishCmd, &publishCfg.Public, "public", "", false, "Make dataset public")
cli.StringFlag(publishCmd, &publishCfg.Token, "token", "", "", "HuggingFace API token (defaults to HF_TOKEN env)")
cli.BoolFlag(publishCmd, &publishCfg.DryRun, "dry-run", "", false, "Show what would be uploaded without uploading")
exportGroup.AddCommand(publishCmd)
// convert — convert MLX LoRA adapter to PEFT format.
var convertCfg lem.ConvertOpts
convertCmd := cli.NewCommand("convert", "Convert MLX LoRA adapter to PEFT format", "",
func(cmd *cli.Command, args []string) error {
return lem.RunConvert(convertCfg)
},
)
cli.StringFlag(convertCmd, &convertCfg.Input, "input", "i", "", "Path to MLX .safetensors file (required)")
cli.StringFlag(convertCmd, &convertCfg.Config, "config", "c", "", "Path to MLX adapter_config.json (required)")
cli.StringFlag(convertCmd, &convertCfg.Output, "output", "o", "./peft_output", "Output directory for PEFT adapter")
cli.StringFlag(convertCmd, &convertCfg.BaseModel, "base-model", "", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "HuggingFace base model ID")
exportGroup.AddCommand(convertCmd)
root.AddCommand(exportGroup)
}