feat: port 11 LEM data management commands into core ml
Ports all remaining LEM pipeline commands from pkg/lem into core ml,
eliminating the standalone LEM CLI dependency. Each command is split
into reusable business logic (pkg/ml/) and a thin cobra wrapper
(internal/cmd/ml/).
New commands: query, inventory, metrics, ingest, normalize, seed-influx,
consolidate, import-all, approve, publish, coverage.
Adds Path(), Exec(), QueryRowScan() convenience methods to DB type.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 04:02:28 +00:00
|
|
|
package ml
|
|
|
|
|
|
|
|
|
|
import (
|
2026-02-16 14:24:37 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
2026-02-17 19:19:40 +00:00
|
|
|
"forge.lthn.ai/core/go-ai/ml"
|
feat: port 11 LEM data management commands into core ml
Ports all remaining LEM pipeline commands from pkg/lem into core ml,
eliminating the standalone LEM CLI dependency. Each command is split
into reusable business logic (pkg/ml/) and a thin cobra wrapper
(internal/cmd/ml/).
New commands: query, inventory, metrics, ingest, normalize, seed-influx,
consolidate, import-all, approve, publish, coverage.
Adds Path(), Exec(), QueryRowScan() convenience methods to DB type.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 04:02:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
publishInputDir string
|
|
|
|
|
publishRepo string
|
|
|
|
|
publishPublic bool
|
|
|
|
|
publishToken string
|
|
|
|
|
publishDryRun bool
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var publishCmd = &cli.Command{
|
|
|
|
|
Use: "publish",
|
|
|
|
|
Short: "Upload Parquet dataset to HuggingFace Hub",
|
|
|
|
|
Long: "Uploads train/valid/test Parquet files and an optional dataset card to a HuggingFace dataset repository.",
|
|
|
|
|
RunE: runPublish,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
publishCmd.Flags().StringVar(&publishInputDir, "input-dir", "", "Directory containing Parquet files (required)")
|
|
|
|
|
publishCmd.Flags().StringVar(&publishRepo, "repo", "lthn/LEM-golden-set", "HuggingFace dataset repo ID")
|
|
|
|
|
publishCmd.Flags().BoolVar(&publishPublic, "public", false, "Make dataset public")
|
|
|
|
|
publishCmd.Flags().StringVar(&publishToken, "token", "", "HuggingFace API token (defaults to HF_TOKEN env)")
|
|
|
|
|
publishCmd.Flags().BoolVar(&publishDryRun, "dry-run", false, "Show what would be uploaded without uploading")
|
|
|
|
|
_ = publishCmd.MarkFlagRequired("input-dir")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func runPublish(cmd *cli.Command, args []string) error {
|
|
|
|
|
return ml.Publish(ml.PublishConfig{
|
|
|
|
|
InputDir: publishInputDir,
|
|
|
|
|
Repo: publishRepo,
|
|
|
|
|
Public: publishPublic,
|
|
|
|
|
Token: publishToken,
|
|
|
|
|
DryRun: publishDryRun,
|
|
|
|
|
}, cmd.OutOrStdout())
|
|
|
|
|
}
|