feat: port 11 LEM data management commands into core ml
Ports all remaining LEM pipeline commands from pkg/lem into core ml,
eliminating the standalone LEM CLI dependency. Each command is split
into reusable business logic (pkg/ml/) and a thin cobra wrapper
(internal/cmd/ml/).
New commands: query, inventory, metrics, ingest, normalize, seed-influx,
consolidate, import-all, approve, publish, coverage.
Adds Path(), Exec(), QueryRowScan() convenience methods to DB type.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 04:02:28 +00:00
|
|
|
package ml
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"os"
|
|
|
|
|
|
2026-02-16 14:24:37 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
2026-02-17 19:19:40 +00:00
|
|
|
"forge.lthn.ai/core/go-ai/ml"
|
feat: port 11 LEM data management commands into core ml
Ports all remaining LEM pipeline commands from pkg/lem into core ml,
eliminating the standalone LEM CLI dependency. Each command is split
into reusable business logic (pkg/ml/) and a thin cobra wrapper
(internal/cmd/ml/).
New commands: query, inventory, metrics, ingest, normalize, seed-influx,
consolidate, import-all, approve, publish, coverage.
Adds Path(), Exec(), QueryRowScan() convenience methods to DB type.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 04:02:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var ingestCmd = &cli.Command{
|
|
|
|
|
Use: "ingest",
|
|
|
|
|
Short: "Ingest benchmark scores and training logs into InfluxDB",
|
|
|
|
|
Long: "Reads content score, capability score, and training log files and writes measurements to InfluxDB for the lab dashboard.",
|
|
|
|
|
RunE: runIngest,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
ingestContent string
|
|
|
|
|
ingestCapability string
|
|
|
|
|
ingestTraining string
|
|
|
|
|
ingestRunID string
|
|
|
|
|
ingestBatchSize int
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
ingestCmd.Flags().StringVar(&ingestContent, "content", "", "Content scores JSONL file")
|
|
|
|
|
ingestCmd.Flags().StringVar(&ingestCapability, "capability", "", "Capability scores JSONL file")
|
|
|
|
|
ingestCmd.Flags().StringVar(&ingestTraining, "training-log", "", "MLX LoRA training log file")
|
|
|
|
|
ingestCmd.Flags().StringVar(&ingestRunID, "run-id", "", "Run ID tag (defaults to model name)")
|
|
|
|
|
ingestCmd.Flags().IntVar(&ingestBatchSize, "batch-size", 100, "Lines per InfluxDB write batch")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func runIngest(cmd *cli.Command, args []string) error {
|
|
|
|
|
if modelName == "" {
|
|
|
|
|
return fmt.Errorf("--model is required")
|
|
|
|
|
}
|
|
|
|
|
if ingestContent == "" && ingestCapability == "" && ingestTraining == "" {
|
|
|
|
|
return fmt.Errorf("at least one of --content, --capability, or --training-log is required")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
influx := ml.NewInfluxClient(influxURL, influxDB)
|
|
|
|
|
|
|
|
|
|
cfg := ml.IngestConfig{
|
|
|
|
|
ContentFile: ingestContent,
|
|
|
|
|
CapabilityFile: ingestCapability,
|
|
|
|
|
TrainingLog: ingestTraining,
|
|
|
|
|
Model: modelName,
|
|
|
|
|
RunID: ingestRunID,
|
|
|
|
|
BatchSize: ingestBatchSize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ml.Ingest(influx, cfg, os.Stdout)
|
|
|
|
|
}
|