cli/cmd/ml/cmd_expand.go

package ml

import (
	"context"
	"fmt"
	"os"

	"forge.lthn.ai/core/go/pkg/cli"
	"forge.lthn.ai/core/go/pkg/ml"
)

var (
	expandWorker  string
	expandOutput  string
	expandLimit   int
	expandDryRun  bool
)

var expandCmd = &cli.Command{
	Use:   "expand",
	Short: "Generate expansion responses from pending prompts",
	Long:  "Reads pending expansion prompts from DuckDB and generates responses via an OpenAI-compatible API.",
	RunE:  runExpand,
}

func init() {
	expandCmd.Flags().StringVar(&expandWorker, "worker", "", "Worker hostname (defaults to os.Hostname())")
	expandCmd.Flags().StringVar(&expandOutput, "output", ".", "Output directory for JSONL files")
	expandCmd.Flags().IntVar(&expandLimit, "limit", 0, "Max prompts to process (0 = all)")
	expandCmd.Flags().BoolVar(&expandDryRun, "dry-run", false, "Print plan and exit without generating")
}

func runExpand(cmd *cli.Command, args []string) error {
	if modelName == "" {
		return fmt.Errorf("--model is required")
	}

	path := dbPath
	if path == "" {
		path = os.Getenv("LEM_DB")
	}
	if path == "" {
		return fmt.Errorf("--db or LEM_DB env is required")
	}

	if expandWorker == "" {
		h, _ := os.Hostname()
		expandWorker = h
	}

	db, err := ml.OpenDBReadWrite(path)
	if err != nil {
		return fmt.Errorf("open db: %w", err)
	}
	defer db.Close()

	rows, err := db.QueryExpansionPrompts("pending", expandLimit)
	if err != nil {
		return fmt.Errorf("query expansion_prompts: %w", err)
	}
	fmt.Printf("Loaded %d pending prompts from %s\n", len(rows), path)

	var prompts []ml.Response
	for _, r := range rows {
		prompt := r.Prompt
		if prompt == "" && r.PromptEn != "" {
			prompt = r.PromptEn
		}
		prompts = append(prompts, ml.Response{
			ID:     r.SeedID,
			Domain: r.Domain,
			Prompt: prompt,
		})
	}

	ctx := context.Background()
	backend := ml.NewHTTPBackend(apiURL, modelName)
	influx := ml.NewInfluxClient(influxURL, influxDB)

	return ml.ExpandPrompts(ctx, backend, influx, prompts, modelName, expandWorker, expandOutput, expandDryRun, expandLimit)
}
feat: add ML inference, scoring, and training pipeline (pkg/ml) Port LEM scoring/training pipeline into CoreGo as pkg/ml with: - Inference abstraction with HTTP, llama-server, and Ollama backends - 3-tier scoring engine (heuristic, exact, LLM judge) - Capability and content probes for model evaluation - GGUF/safetensors format converters, MLX to PEFT adapter conversion - DuckDB integration for training data pipeline - InfluxDB metrics for lab dashboard - Training data export (JSONL + Parquet) - Expansion generation pipeline with distributed workers - 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker) - 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends) All 37 ML tests passing. Binary builds at 138MB with all commands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 00:34:53 +00:00			`package ml`

			`import (`
			`"context"`
			`"fmt"`
			`"os"`

refactor: split CLI from monorepo, import core/go as library (#1) - Change module from forge.lthn.ai/core/go to forge.lthn.ai/core/cli - Remove pkg/ directory (now served from core/go) - Add require + replace for forge.lthn.ai/core/go => ../go - Update go.work to include ../go workspace module - Fix all internal/cmd/* imports: pkg/ refs → forge.lthn.ai/core/go/pkg/ - Rename internal/cmd/sdk package to sdkcmd (avoids conflict with pkg/sdk) - Remove SDK library files from internal/cmd/sdk/ (now in core/go/pkg/sdk/) - Remove duplicate RAG helper functions from internal/cmd/rag/ - Remove stale cmd/core-ide/ (now in core/ide repo) - Update IDE variant to remove core-ide import - Fix test assertion for new module name - Run go mod tidy to sync dependencies core/cli is now a pure CLI application importing core/go for packages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Claude <developers@lethean.io> Reviewed-on: https://forge.lthn.ai/core/cli/pulls/1 2026-02-16 14:24:37 +00:00			`"forge.lthn.ai/core/go/pkg/cli"`
			`"forge.lthn.ai/core/go/pkg/ml"`
feat: add ML inference, scoring, and training pipeline (pkg/ml) Port LEM scoring/training pipeline into CoreGo as pkg/ml with: - Inference abstraction with HTTP, llama-server, and Ollama backends - 3-tier scoring engine (heuristic, exact, LLM judge) - Capability and content probes for model evaluation - GGUF/safetensors format converters, MLX to PEFT adapter conversion - DuckDB integration for training data pipeline - InfluxDB metrics for lab dashboard - Training data export (JSONL + Parquet) - Expansion generation pipeline with distributed workers - 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker) - 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends) All 37 ML tests passing. Binary builds at 138MB with all commands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 00:34:53 +00:00			`)`

			`var (`
			`expandWorker string`
			`expandOutput string`
			`expandLimit int`
			`expandDryRun bool`
			`)`

			`var expandCmd = &cli.Command{`
			`Use: "expand",`
			`Short: "Generate expansion responses from pending prompts",`
			`Long: "Reads pending expansion prompts from DuckDB and generates responses via an OpenAI-compatible API.",`
			`RunE: runExpand,`
			`}`

			`func init() {`
			`expandCmd.Flags().StringVar(&expandWorker, "worker", "", "Worker hostname (defaults to os.Hostname())")`
			`expandCmd.Flags().StringVar(&expandOutput, "output", ".", "Output directory for JSONL files")`
			`expandCmd.Flags().IntVar(&expandLimit, "limit", 0, "Max prompts to process (0 = all)")`
			`expandCmd.Flags().BoolVar(&expandDryRun, "dry-run", false, "Print plan and exit without generating")`
			`}`

			`func runExpand(cmd *cli.Command, args []string) error {`
			`if modelName == "" {`
			`return fmt.Errorf("--model is required")`
			`}`

			`path := dbPath`
			`if path == "" {`
			`path = os.Getenv("LEM_DB")`
			`}`
			`if path == "" {`
			`return fmt.Errorf("--db or LEM_DB env is required")`
			`}`

			`if expandWorker == "" {`
			`h, _ := os.Hostname()`
			`expandWorker = h`
			`}`

			`db, err := ml.OpenDBReadWrite(path)`
			`if err != nil {`
			`return fmt.Errorf("open db: %w", err)`
			`}`
			`defer db.Close()`

			`rows, err := db.QueryExpansionPrompts("pending", expandLimit)`
			`if err != nil {`
			`return fmt.Errorf("query expansion_prompts: %w", err)`
			`}`
			`fmt.Printf("Loaded %d pending prompts from %s\n", len(rows), path)`

			`var prompts []ml.Response`
			`for _, r := range rows {`
			`prompt := r.Prompt`
			`if prompt == "" && r.PromptEn != "" {`
			`prompt = r.PromptEn`
			`}`
			`prompts = append(prompts, ml.Response{`
			`ID: r.SeedID,`
			`Domain: r.Domain,`
			`Prompt: prompt,`
			`})`
			`}`

			`ctx := context.Background()`
			`backend := ml.NewHTTPBackend(apiURL, modelName)`
			`influx := ml.NewInfluxClient(influxURL, influxDB)`

			`return ml.ExpandPrompts(ctx, backend, influx, prompts, modelName, expandWorker, expandOutput, expandDryRun, expandLimit)`
			`}`