cli/cmd/ml/cmd_probe.go

package ml

import (
	"context"
	"encoding/json"
	"fmt"
	"os"

	"forge.lthn.ai/core/go/pkg/cli"
	"forge.lthn.ai/core/go-ai/ml"
)

var (
	probeOutput string
)

var probeCmd = &cli.Command{
	Use:   "probe",
	Short: "Run capability and content probes against a model",
	Long:  "Runs 23 capability probes and 6 content probes against an OpenAI-compatible API.",
	RunE:  runProbe,
}

func init() {
	probeCmd.Flags().StringVar(&probeOutput, "output", "", "Output JSON file for probe results")
}

func runProbe(cmd *cli.Command, args []string) error {
	if apiURL == "" {
		return fmt.Errorf("--api-url is required")
	}

	model := modelName
	if model == "" {
		model = "default"
	}

	ctx := context.Background()
	backend := ml.NewHTTPBackend(apiURL, model)

	fmt.Printf("Running %d capability probes against %s...\n", len(ml.CapabilityProbes), apiURL)
	results := ml.RunCapabilityProbes(ctx, backend)

	fmt.Printf("\nResults: %.1f%% (%d/%d)\n", results.Accuracy, results.Correct, results.Total)

	for cat, data := range results.ByCategory {
		catAcc := 0.0
		if data.Total > 0 {
			catAcc = float64(data.Correct) / float64(data.Total) * 100
		}
		fmt.Printf("  %-20s %d/%d (%.0f%%)\n", cat, data.Correct, data.Total, catAcc)
	}

	if probeOutput != "" {
		data, err := json.MarshalIndent(results, "", "  ")
		if err != nil {
			return fmt.Errorf("marshal results: %w", err)
		}
		if err := os.WriteFile(probeOutput, data, 0644); err != nil {
			return fmt.Errorf("write output: %w", err)
		}
		fmt.Printf("\nResults written to %s\n", probeOutput)
	}

	return nil
}
feat: add ML inference, scoring, and training pipeline (pkg/ml) Port LEM scoring/training pipeline into CoreGo as pkg/ml with: - Inference abstraction with HTTP, llama-server, and Ollama backends - 3-tier scoring engine (heuristic, exact, LLM judge) - Capability and content probes for model evaluation - GGUF/safetensors format converters, MLX to PEFT adapter conversion - DuckDB integration for training data pipeline - InfluxDB metrics for lab dashboard - Training data export (JSONL + Parquet) - Expansion generation pipeline with distributed workers - 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker) - 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends) All 37 ML tests passing. Binary builds at 138MB with all commands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 00:34:53 +00:00			`package ml`

			`import (`
			`"context"`
			`"encoding/json"`
			`"fmt"`
			`"os"`

refactor: split CLI from monorepo, import core/go as library (#1) - Change module from forge.lthn.ai/core/go to forge.lthn.ai/core/cli - Remove pkg/ directory (now served from core/go) - Add require + replace for forge.lthn.ai/core/go => ../go - Update go.work to include ../go workspace module - Fix all internal/cmd/* imports: pkg/ refs → forge.lthn.ai/core/go/pkg/ - Rename internal/cmd/sdk package to sdkcmd (avoids conflict with pkg/sdk) - Remove SDK library files from internal/cmd/sdk/ (now in core/go/pkg/sdk/) - Remove duplicate RAG helper functions from internal/cmd/rag/ - Remove stale cmd/core-ide/ (now in core/ide repo) - Update IDE variant to remove core-ide import - Fix test assertion for new module name - Run go mod tidy to sync dependencies core/cli is now a pure CLI application importing core/go for packages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Claude <developers@lethean.io> Reviewed-on: https://forge.lthn.ai/core/cli/pulls/1 2026-02-16 14:24:37 +00:00			`"forge.lthn.ai/core/go/pkg/cli"`
chore: update module paths and daemon refactor Sync CLI module imports across all command packages. Refactor daemon command with expanded functionality. Update go.mod and go.work dependencies. Co-Authored-By: Virgil <virgil@lethean.io> 2026-02-17 19:19:40 +00:00			`"forge.lthn.ai/core/go-ai/ml"`
feat: add ML inference, scoring, and training pipeline (pkg/ml) Port LEM scoring/training pipeline into CoreGo as pkg/ml with: - Inference abstraction with HTTP, llama-server, and Ollama backends - 3-tier scoring engine (heuristic, exact, LLM judge) - Capability and content probes for model evaluation - GGUF/safetensors format converters, MLX to PEFT adapter conversion - DuckDB integration for training data pipeline - InfluxDB metrics for lab dashboard - Training data export (JSONL + Parquet) - Expansion generation pipeline with distributed workers - 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker) - 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends) All 37 ML tests passing. Binary builds at 138MB with all commands. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 00:34:53 +00:00			`)`

			`var (`
			`probeOutput string`
			`)`

			`var probeCmd = &cli.Command{`
			`Use: "probe",`
			`Short: "Run capability and content probes against a model",`
			`Long: "Runs 23 capability probes and 6 content probes against an OpenAI-compatible API.",`
			`RunE: runProbe,`
			`}`

			`func init() {`
			`probeCmd.Flags().StringVar(&probeOutput, "output", "", "Output JSON file for probe results")`
			`}`

			`func runProbe(cmd *cli.Command, args []string) error {`
			`if apiURL == "" {`
			`return fmt.Errorf("--api-url is required")`
			`}`

			`model := modelName`
			`if model == "" {`
			`model = "default"`
			`}`

			`ctx := context.Background()`
			`backend := ml.NewHTTPBackend(apiURL, model)`

			`fmt.Printf("Running %d capability probes against %s...\n", len(ml.CapabilityProbes), apiURL)`
			`results := ml.RunCapabilityProbes(ctx, backend)`

			`fmt.Printf("\nResults: %.1f%% (%d/%d)\n", results.Accuracy, results.Correct, results.Total)`

			`for cat, data := range results.ByCategory {`
			`catAcc := 0.0`
			`if data.Total > 0 {`
			`catAcc = float64(data.Correct) / float64(data.Total) * 100`
			`}`
			`fmt.Printf(" %-20s %d/%d (%.0f%%)\n", cat, data.Correct, data.Total, catAcc)`
			`}`

			`if probeOutput != "" {`
			`data, err := json.MarshalIndent(results, "", " ")`
			`if err != nil {`
			`return fmt.Errorf("marshal results: %w", err)`
			`}`
			`if err := os.WriteFile(probeOutput, data, 0644); err != nil {`
			`return fmt.Errorf("write output: %w", err)`
			`}`
			`fmt.Printf("\nResults written to %s\n", probeOutput)`
			`}`

			`return nil`
			`}`