Replace passthrough() + stdlib flag.FlagSet anti-pattern with proper cobra integration. Every Run* function now takes a typed *Opts struct and returns error. Flags registered via cli.StringFlag/IntFlag/etc. Commands participate in Core lifecycle with full cobra flag parsing. - 6 command groups: gen, score, data, export, infra, mon - 25 commands converted, 0 passthrough() calls remain - Delete passthrough() helper from lem.go - Update export_test.go to use ExportOpts struct Co-Authored-By: Virgil <virgil@lethean.io>
67 lines
3.6 KiB
Go
67 lines
3.6 KiB
Go
package lemcmd
|
|
|
|
import (
|
|
"forge.lthn.ai/core/cli/pkg/cli"
|
|
"forge.lthn.ai/lthn/lem/pkg/lem"
|
|
)
|
|
|
|
func addExportCommands(root *cli.Command) {
|
|
exportGroup := cli.NewGroup("export", "Export and publish commands", "Export training data to JSONL, Parquet, HuggingFace, and PEFT formats.")
|
|
|
|
// jsonl — export golden set to training-format JSONL splits.
|
|
var exportCfg lem.ExportOpts
|
|
jsonlCmd := cli.NewCommand("jsonl", "Export golden set to training-format JSONL splits", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunExport(exportCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(jsonlCmd, &exportCfg.DBPath, "db", "", "", "DuckDB database path (primary source; defaults to LEM_DB env)")
|
|
cli.StringFlag(jsonlCmd, &exportCfg.Input, "input", "i", "", "Input golden set JSONL file (fallback if --db not set)")
|
|
cli.StringFlag(jsonlCmd, &exportCfg.OutputDir, "output-dir", "o", "", "Output directory for training files (required)")
|
|
cli.IntFlag(jsonlCmd, &exportCfg.TrainPct, "train-pct", "", 90, "Training set percentage")
|
|
cli.IntFlag(jsonlCmd, &exportCfg.ValidPct, "valid-pct", "", 5, "Validation set percentage")
|
|
cli.IntFlag(jsonlCmd, &exportCfg.TestPct, "test-pct", "", 5, "Test set percentage")
|
|
cli.Int64Flag(jsonlCmd, &exportCfg.Seed, "seed", "", 42, "Random seed for shuffling")
|
|
cli.IntFlag(jsonlCmd, &exportCfg.MinChars, "min-chars", "", 50, "Minimum response character count")
|
|
exportGroup.AddCommand(jsonlCmd)
|
|
|
|
// parquet — export JSONL training splits to Parquet.
|
|
var parquetCfg lem.ParquetOpts
|
|
parquetCmd := cli.NewCommand("parquet", "Export JSONL training splits to Parquet", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunParquet(parquetCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(parquetCmd, &parquetCfg.Input, "input", "i", "", "Directory containing train.jsonl, valid.jsonl, test.jsonl (required)")
|
|
cli.StringFlag(parquetCmd, &parquetCfg.Output, "output", "o", "", "Output directory for Parquet files (defaults to input/parquet)")
|
|
exportGroup.AddCommand(parquetCmd)
|
|
|
|
// publish — push Parquet files to HuggingFace dataset repo.
|
|
var publishCfg lem.PublishOpts
|
|
publishCmd := cli.NewCommand("publish", "Push Parquet files to HuggingFace dataset repo", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunPublish(publishCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(publishCmd, &publishCfg.Input, "input", "i", "", "Directory containing Parquet files (required)")
|
|
cli.StringFlag(publishCmd, &publishCfg.Repo, "repo", "", "lthn/LEM-golden-set", "HuggingFace dataset repo ID")
|
|
cli.BoolFlag(publishCmd, &publishCfg.Public, "public", "", false, "Make dataset public")
|
|
cli.StringFlag(publishCmd, &publishCfg.Token, "token", "", "", "HuggingFace API token (defaults to HF_TOKEN env)")
|
|
cli.BoolFlag(publishCmd, &publishCfg.DryRun, "dry-run", "", false, "Show what would be uploaded without uploading")
|
|
exportGroup.AddCommand(publishCmd)
|
|
|
|
// convert — convert MLX LoRA adapter to PEFT format.
|
|
var convertCfg lem.ConvertOpts
|
|
convertCmd := cli.NewCommand("convert", "Convert MLX LoRA adapter to PEFT format", "",
|
|
func(cmd *cli.Command, args []string) error {
|
|
return lem.RunConvert(convertCfg)
|
|
},
|
|
)
|
|
cli.StringFlag(convertCmd, &convertCfg.Input, "input", "i", "", "Path to MLX .safetensors file (required)")
|
|
cli.StringFlag(convertCmd, &convertCfg.Config, "config", "c", "", "Path to MLX adapter_config.json (required)")
|
|
cli.StringFlag(convertCmd, &convertCfg.Output, "output", "o", "./peft_output", "Output directory for PEFT adapter")
|
|
cli.StringFlag(convertCmd, &convertCfg.BaseModel, "base-model", "", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "HuggingFace base model ID")
|
|
exportGroup.AddCommand(convertCmd)
|
|
|
|
root.AddCommand(exportGroup)
|
|
}
|