cli/internal/cmd/ml/cmd_worker.go
Claude 5ff4b8a2eb feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)

All 37 ML tests passing. Binary builds at 138MB with all commands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

80 lines
2.7 KiB
Go

package ml
import (
"time"
"forge.lthn.ai/core/cli/pkg/cli"
"forge.lthn.ai/core/cli/pkg/ml"
)
var (
workerAPIBase string
workerID string
workerName string
workerAPIKey string
workerGPU string
workerVRAM int
workerLangs string
workerModels string
workerInferURL string
workerTaskType string
workerBatchSize int
workerPoll time.Duration
workerOneShot bool
workerDryRun bool
)
var workerCmd = &cli.Command{
Use: "worker",
Short: "Run a distributed worker node",
Long: "Polls the LEM API for tasks, runs local inference, and submits results.",
RunE: runWorker,
}
func init() {
workerCmd.Flags().StringVar(&workerAPIBase, "api", ml.EnvOr("LEM_API", "https://infer.lthn.ai"), "LEM API base URL")
workerCmd.Flags().StringVar(&workerID, "id", ml.EnvOr("LEM_WORKER_ID", ml.MachineID()), "Worker ID")
workerCmd.Flags().StringVar(&workerName, "name", ml.EnvOr("LEM_WORKER_NAME", ml.Hostname()), "Worker display name")
workerCmd.Flags().StringVar(&workerAPIKey, "key", ml.EnvOr("LEM_API_KEY", ""), "API key")
workerCmd.Flags().StringVar(&workerGPU, "gpu", ml.EnvOr("LEM_GPU", ""), "GPU type")
workerCmd.Flags().IntVar(&workerVRAM, "vram", ml.IntEnvOr("LEM_VRAM_GB", 0), "GPU VRAM in GB")
workerCmd.Flags().StringVar(&workerLangs, "languages", ml.EnvOr("LEM_LANGUAGES", ""), "Comma-separated language codes")
workerCmd.Flags().StringVar(&workerModels, "models", ml.EnvOr("LEM_MODELS", ""), "Comma-separated model names")
workerCmd.Flags().StringVar(&workerInferURL, "infer", ml.EnvOr("LEM_INFER_URL", "http://localhost:8090"), "Local inference endpoint")
workerCmd.Flags().StringVar(&workerTaskType, "type", "", "Filter by task type")
workerCmd.Flags().IntVar(&workerBatchSize, "batch", 5, "Tasks per poll")
workerCmd.Flags().DurationVar(&workerPoll, "poll", 30*time.Second, "Poll interval")
workerCmd.Flags().BoolVar(&workerOneShot, "one-shot", false, "Process one batch and exit")
workerCmd.Flags().BoolVar(&workerDryRun, "dry-run", false, "Fetch tasks but don't run inference")
}
func runWorker(cmd *cli.Command, args []string) error {
if workerAPIKey == "" {
workerAPIKey = ml.ReadKeyFile()
}
cfg := &ml.WorkerConfig{
APIBase: workerAPIBase,
WorkerID: workerID,
Name: workerName,
APIKey: workerAPIKey,
GPUType: workerGPU,
VRAMGb: workerVRAM,
InferURL: workerInferURL,
TaskType: workerTaskType,
BatchSize: workerBatchSize,
PollInterval: workerPoll,
OneShot: workerOneShot,
DryRun: workerDryRun,
}
if workerLangs != "" {
cfg.Languages = ml.SplitComma(workerLangs)
}
if workerModels != "" {
cfg.Models = ml.SplitComma(workerModels)
}
ml.RunWorkerLoop(cfg)
return nil
}