cli/internal/cmd/ml/cmd_publish.go
Claude c5bc97de19 feat: port 11 LEM data management commands into core ml
Ports all remaining LEM pipeline commands from pkg/lem into core ml,
eliminating the standalone LEM CLI dependency. Each command is split
into reusable business logic (pkg/ml/) and a thin cobra wrapper
(internal/cmd/ml/).

New commands: query, inventory, metrics, ingest, normalize, seed-influx,
consolidate, import-all, approve, publish, coverage.

Adds Path(), Exec(), QueryRowScan() convenience methods to DB type.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

40 lines
1.3 KiB
Go

package ml
import (
"forge.lthn.ai/core/cli/pkg/cli"
"forge.lthn.ai/core/cli/pkg/ml"
)
var (
publishInputDir string
publishRepo string
publishPublic bool
publishToken string
publishDryRun bool
)
var publishCmd = &cli.Command{
Use: "publish",
Short: "Upload Parquet dataset to HuggingFace Hub",
Long: "Uploads train/valid/test Parquet files and an optional dataset card to a HuggingFace dataset repository.",
RunE: runPublish,
}
func init() {
publishCmd.Flags().StringVar(&publishInputDir, "input-dir", "", "Directory containing Parquet files (required)")
publishCmd.Flags().StringVar(&publishRepo, "repo", "lthn/LEM-golden-set", "HuggingFace dataset repo ID")
publishCmd.Flags().BoolVar(&publishPublic, "public", false, "Make dataset public")
publishCmd.Flags().StringVar(&publishToken, "token", "", "HuggingFace API token (defaults to HF_TOKEN env)")
publishCmd.Flags().BoolVar(&publishDryRun, "dry-run", false, "Show what would be uploaded without uploading")
_ = publishCmd.MarkFlagRequired("input-dir")
}
func runPublish(cmd *cli.Command, args []string) error {
return ml.Publish(ml.PublishConfig{
InputDir: publishInputDir,
Repo: publishRepo,
Public: publishPublic,
Token: publishToken,
DryRun: publishDryRun,
}, cmd.OutOrStdout())
}