go-ai/mcp/tools_ml_test.go
Snider 85e02297cc fix: update ml tool handlers for Result return type
go-ml's Backend.Generate() and Backend.Chat() now return ml.Result{Text, Metrics}
instead of (string, error). Updated mlGenerate and mlProbe handlers to unwrap
result.Text, and updated test mocks to match the new interface.

Added local replace directives for go-inference, go-ml, go-mlx while
these changes propagate through the dependency chain.

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 19:24:32 +00:00

479 lines
13 KiB
Go

package mcp
import (
"context"
"fmt"
"strings"
"testing"
"forge.lthn.ai/core/go-inference"
"forge.lthn.ai/core/go-ml"
"forge.lthn.ai/core/go/pkg/framework"
"forge.lthn.ai/core/go/pkg/log"
)
// --- Mock backend for inference registry ---
// mockInferenceBackend implements inference.Backend for CI testing of ml_backends.
type mockInferenceBackend struct {
name string
available bool
}
func (m *mockInferenceBackend) Name() string { return m.name }
func (m *mockInferenceBackend) Available() bool { return m.available }
func (m *mockInferenceBackend) LoadModel(_ string, _ ...inference.LoadOption) (inference.TextModel, error) {
return nil, fmt.Errorf("mock backend: LoadModel not implemented")
}
// --- Mock ml.Backend for Generate ---
// mockMLBackend implements ml.Backend for CI testing.
type mockMLBackend struct {
name string
available bool
generateResp string
generateErr error
}
func (m *mockMLBackend) Name() string { return m.name }
func (m *mockMLBackend) Available() bool { return m.available }
func (m *mockMLBackend) Generate(_ context.Context, _ string, _ ml.GenOpts) (ml.Result, error) {
return ml.Result{Text: m.generateResp}, m.generateErr
}
func (m *mockMLBackend) Chat(_ context.Context, _ []ml.Message, _ ml.GenOpts) (ml.Result, error) {
return ml.Result{Text: m.generateResp}, m.generateErr
}
// newTestMLSubsystem creates an MLSubsystem with a real ml.Service for testing.
func newTestMLSubsystem(t *testing.T, backends ...ml.Backend) *MLSubsystem {
t.Helper()
c, err := framework.New(
framework.WithName("ml", ml.NewService(ml.Options{})),
)
if err != nil {
t.Fatalf("Failed to create framework core: %v", err)
}
svc, err := framework.ServiceFor[*ml.Service](c, "ml")
if err != nil {
t.Fatalf("Failed to get ML service: %v", err)
}
// Register mock backends
for _, b := range backends {
svc.RegisterBackend(b.Name(), b)
}
return &MLSubsystem{
service: svc,
logger: log.Default(),
}
}
// --- Input/Output struct tests ---
// TestMLGenerateInput_Good verifies all fields can be set.
func TestMLGenerateInput_Good(t *testing.T) {
input := MLGenerateInput{
Prompt: "Hello world",
Backend: "test",
Model: "test-model",
Temperature: 0.7,
MaxTokens: 100,
}
if input.Prompt != "Hello world" {
t.Errorf("Expected prompt 'Hello world', got %q", input.Prompt)
}
if input.Temperature != 0.7 {
t.Errorf("Expected temperature 0.7, got %f", input.Temperature)
}
if input.MaxTokens != 100 {
t.Errorf("Expected max_tokens 100, got %d", input.MaxTokens)
}
}
// TestMLScoreInput_Good verifies all fields can be set.
func TestMLScoreInput_Good(t *testing.T) {
input := MLScoreInput{
Prompt: "test prompt",
Response: "test response",
Suites: "heuristic,semantic",
}
if input.Prompt != "test prompt" {
t.Errorf("Expected prompt 'test prompt', got %q", input.Prompt)
}
if input.Response != "test response" {
t.Errorf("Expected response 'test response', got %q", input.Response)
}
}
// TestMLProbeInput_Good verifies all fields can be set.
func TestMLProbeInput_Good(t *testing.T) {
input := MLProbeInput{
Backend: "test",
Categories: "reasoning,code",
}
if input.Backend != "test" {
t.Errorf("Expected backend 'test', got %q", input.Backend)
}
}
// TestMLStatusInput_Good verifies all fields can be set.
func TestMLStatusInput_Good(t *testing.T) {
input := MLStatusInput{
InfluxURL: "http://localhost:8086",
InfluxDB: "lem",
}
if input.InfluxURL != "http://localhost:8086" {
t.Errorf("Expected InfluxURL, got %q", input.InfluxURL)
}
}
// TestMLBackendsInput_Good verifies empty struct.
func TestMLBackendsInput_Good(t *testing.T) {
_ = MLBackendsInput{}
}
// TestMLBackendsOutput_Good verifies struct fields.
func TestMLBackendsOutput_Good(t *testing.T) {
output := MLBackendsOutput{
Backends: []MLBackendInfo{
{Name: "ollama", Available: true},
{Name: "llama", Available: false},
},
Default: "ollama",
}
if len(output.Backends) != 2 {
t.Fatalf("Expected 2 backends, got %d", len(output.Backends))
}
if output.Default != "ollama" {
t.Errorf("Expected default 'ollama', got %q", output.Default)
}
if !output.Backends[0].Available {
t.Error("Expected first backend to be available")
}
}
// TestMLProbeOutput_Good verifies struct fields.
func TestMLProbeOutput_Good(t *testing.T) {
output := MLProbeOutput{
Total: 2,
Results: []MLProbeResultItem{
{ID: "probe-1", Category: "reasoning", Response: "test"},
{ID: "probe-2", Category: "code", Response: "test2"},
},
}
if output.Total != 2 {
t.Errorf("Expected total 2, got %d", output.Total)
}
if output.Results[0].ID != "probe-1" {
t.Errorf("Expected ID 'probe-1', got %q", output.Results[0].ID)
}
}
// TestMLStatusOutput_Good verifies struct fields.
func TestMLStatusOutput_Good(t *testing.T) {
output := MLStatusOutput{Status: "OK: 5 training runs"}
if output.Status != "OK: 5 training runs" {
t.Errorf("Unexpected status: %q", output.Status)
}
}
// TestMLGenerateOutput_Good verifies struct fields.
func TestMLGenerateOutput_Good(t *testing.T) {
output := MLGenerateOutput{
Response: "Generated text here",
Backend: "ollama",
Model: "qwen3:8b",
}
if output.Response != "Generated text here" {
t.Errorf("Unexpected response: %q", output.Response)
}
}
// TestMLScoreOutput_Good verifies struct fields.
func TestMLScoreOutput_Good(t *testing.T) {
output := MLScoreOutput{
Heuristic: &ml.HeuristicScores{},
}
if output.Heuristic == nil {
t.Error("Expected Heuristic to be set")
}
if output.Semantic != nil {
t.Error("Expected Semantic to be nil")
}
}
// --- Handler validation tests ---
// TestMLGenerate_Bad_EmptyPrompt verifies empty prompt returns error.
func TestMLGenerate_Bad_EmptyPrompt(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlGenerate(ctx, nil, MLGenerateInput{})
if err == nil {
t.Fatal("Expected error for empty prompt")
}
if !strings.Contains(err.Error(), "prompt cannot be empty") {
t.Errorf("Unexpected error: %v", err)
}
}
// TestMLGenerate_Good_WithMockBackend verifies generate works with a mock backend.
func TestMLGenerate_Good_WithMockBackend(t *testing.T) {
mock := &mockMLBackend{
name: "test-mock",
available: true,
generateResp: "mock response",
}
m := newTestMLSubsystem(t, mock)
ctx := context.Background()
_, out, err := m.mlGenerate(ctx, nil, MLGenerateInput{
Prompt: "test",
Backend: "test-mock",
})
if err != nil {
t.Fatalf("mlGenerate failed: %v", err)
}
if out.Response != "mock response" {
t.Errorf("Expected 'mock response', got %q", out.Response)
}
}
// TestMLGenerate_Bad_NoBackend verifies generate fails gracefully without a backend.
func TestMLGenerate_Bad_NoBackend(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlGenerate(ctx, nil, MLGenerateInput{
Prompt: "test",
Backend: "nonexistent",
})
if err == nil {
t.Fatal("Expected error for missing backend")
}
if !strings.Contains(err.Error(), "no backend available") {
t.Errorf("Unexpected error: %v", err)
}
}
// TestMLScore_Bad_EmptyPrompt verifies empty prompt returns error.
func TestMLScore_Bad_EmptyPrompt(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlScore(ctx, nil, MLScoreInput{Response: "some"})
if err == nil {
t.Fatal("Expected error for empty prompt")
}
}
// TestMLScore_Bad_EmptyResponse verifies empty response returns error.
func TestMLScore_Bad_EmptyResponse(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlScore(ctx, nil, MLScoreInput{Prompt: "some"})
if err == nil {
t.Fatal("Expected error for empty response")
}
}
// TestMLScore_Good_Heuristic verifies heuristic scoring without live services.
func TestMLScore_Good_Heuristic(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, out, err := m.mlScore(ctx, nil, MLScoreInput{
Prompt: "What is Go?",
Response: "Go is a statically typed, compiled programming language designed at Google.",
Suites: "heuristic",
})
if err != nil {
t.Fatalf("mlScore failed: %v", err)
}
if out.Heuristic == nil {
t.Fatal("Expected heuristic scores to be set")
}
}
// TestMLScore_Good_DefaultSuite verifies default suite is heuristic.
func TestMLScore_Good_DefaultSuite(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, out, err := m.mlScore(ctx, nil, MLScoreInput{
Prompt: "What is Go?",
Response: "Go is a statically typed, compiled programming language designed at Google.",
})
if err != nil {
t.Fatalf("mlScore failed: %v", err)
}
if out.Heuristic == nil {
t.Fatal("Expected heuristic scores (default suite)")
}
}
// TestMLScore_Bad_SemanticNoJudge verifies semantic scoring fails without a judge.
func TestMLScore_Bad_SemanticNoJudge(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlScore(ctx, nil, MLScoreInput{
Prompt: "test",
Response: "test",
Suites: "semantic",
})
if err == nil {
t.Fatal("Expected error for semantic scoring without judge")
}
if !strings.Contains(err.Error(), "requires a judge") {
t.Errorf("Unexpected error: %v", err)
}
}
// TestMLScore_Bad_ContentSuite verifies content suite redirects to ml_probe.
func TestMLScore_Bad_ContentSuite(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
_, _, err := m.mlScore(ctx, nil, MLScoreInput{
Prompt: "test",
Response: "test",
Suites: "content",
})
if err == nil {
t.Fatal("Expected error for content suite")
}
if !strings.Contains(err.Error(), "ContentProbe") {
t.Errorf("Unexpected error: %v", err)
}
}
// TestMLProbe_Good_WithMockBackend verifies probes run with mock backend.
func TestMLProbe_Good_WithMockBackend(t *testing.T) {
mock := &mockMLBackend{
name: "probe-mock",
available: true,
generateResp: "probe response",
}
m := newTestMLSubsystem(t, mock)
ctx := context.Background()
_, out, err := m.mlProbe(ctx, nil, MLProbeInput{
Backend: "probe-mock",
Categories: "reasoning",
})
if err != nil {
t.Fatalf("mlProbe failed: %v", err)
}
// Should have run probes in the "reasoning" category
for _, r := range out.Results {
if r.Category != "reasoning" {
t.Errorf("Expected category 'reasoning', got %q", r.Category)
}
if r.Response != "probe response" {
t.Errorf("Expected 'probe response', got %q", r.Response)
}
}
if out.Total != len(out.Results) {
t.Errorf("Expected total %d, got %d", len(out.Results), out.Total)
}
}
// TestMLProbe_Good_NoCategory verifies all probes run without category filter.
func TestMLProbe_Good_NoCategory(t *testing.T) {
mock := &mockMLBackend{
name: "all-probe-mock",
available: true,
generateResp: "ok",
}
m := newTestMLSubsystem(t, mock)
ctx := context.Background()
_, out, err := m.mlProbe(ctx, nil, MLProbeInput{Backend: "all-probe-mock"})
if err != nil {
t.Fatalf("mlProbe failed: %v", err)
}
// Should run all 23 probes
if out.Total != len(ml.CapabilityProbes) {
t.Errorf("Expected %d probes, got %d", len(ml.CapabilityProbes), out.Total)
}
}
// TestMLBackends_Good_EmptyRegistry verifies empty result when no backends registered.
func TestMLBackends_Good_EmptyRegistry(t *testing.T) {
m := newTestMLSubsystem(t)
ctx := context.Background()
// Note: inference.List() returns global registry state.
// This test verifies the handler runs without panic.
_, out, err := m.mlBackends(ctx, nil, MLBackendsInput{})
if err != nil {
t.Fatalf("mlBackends failed: %v", err)
}
// We can't guarantee what's in the global registry, but it should not panic
_ = out
}
// TestMLBackends_Good_WithMockInferenceBackend verifies registered backend appears.
func TestMLBackends_Good_WithMockInferenceBackend(t *testing.T) {
// Register a mock backend in the global inference registry
mock := &mockInferenceBackend{name: "test-ci-mock", available: true}
inference.Register(mock)
m := newTestMLSubsystem(t)
ctx := context.Background()
_, out, err := m.mlBackends(ctx, nil, MLBackendsInput{})
if err != nil {
t.Fatalf("mlBackends failed: %v", err)
}
found := false
for _, b := range out.Backends {
if b.Name == "test-ci-mock" {
found = true
if !b.Available {
t.Error("Expected mock backend to be available")
}
}
}
if !found {
t.Error("Expected to find 'test-ci-mock' in backends list")
}
}
// TestMLSubsystem_Good_Name verifies subsystem name.
func TestMLSubsystem_Good_Name(t *testing.T) {
m := newTestMLSubsystem(t)
if m.Name() != "ml" {
t.Errorf("Expected name 'ml', got %q", m.Name())
}
}
// TestNewMLSubsystem_Good verifies constructor.
func TestNewMLSubsystem_Good(t *testing.T) {
c, err := framework.New(
framework.WithName("ml", ml.NewService(ml.Options{})),
)
if err != nil {
t.Fatalf("Failed to create core: %v", err)
}
svc, err := framework.ServiceFor[*ml.Service](c, "ml")
if err != nil {
t.Fatalf("Failed to get service: %v", err)
}
sub := NewMLSubsystem(svc)
if sub == nil {
t.Fatal("Expected non-nil subsystem")
}
if sub.service != svc {
t.Error("Expected service to be set")
}
if sub.logger == nil {
t.Error("Expected logger to be set")
}
}