go-ai/ml/score_test.go
Claude e84d6ad3c9
feat: extract AI/ML packages from core/go
LEM scoring pipeline, native MLX Metal bindings, Claude SDK wrapper,
RAG with Qdrant/Ollama, unified AI facade, and MCP protocol server.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:25:55 +00:00

226 lines
6.4 KiB
Go

package ml
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
)
func TestNewEngineSuiteParsingAll(t *testing.T) {
engine := NewEngine(nil, 4, "all")
expected := []string{"heuristic", "semantic", "content", "standard", "exact"}
for _, s := range expected {
if !engine.suites[s] {
t.Errorf("expected suite %q to be enabled", s)
}
}
}
func TestNewEngineSuiteParsingCSV(t *testing.T) {
engine := NewEngine(nil, 2, "heuristic,semantic")
if !engine.suites["heuristic"] {
t.Error("expected heuristic to be enabled")
}
if !engine.suites["semantic"] {
t.Error("expected semantic to be enabled")
}
if engine.suites["content"] {
t.Error("expected content to be disabled")
}
if engine.suites["standard"] {
t.Error("expected standard to be disabled")
}
if engine.suites["exact"] {
t.Error("expected exact to be disabled")
}
}
func TestNewEngineSuiteParsingSingle(t *testing.T) {
engine := NewEngine(nil, 1, "heuristic")
if !engine.suites["heuristic"] {
t.Error("expected heuristic to be enabled")
}
if engine.suites["semantic"] {
t.Error("expected semantic to be disabled")
}
}
func TestNewEngineConcurrency(t *testing.T) {
engine := NewEngine(nil, 8, "heuristic")
if engine.concurrency != 8 {
t.Errorf("concurrency = %d, want 8", engine.concurrency)
}
}
func TestScoreAllHeuristicOnly(t *testing.T) {
engine := NewEngine(nil, 2, "heuristic")
ctx := context.Background()
responses := []Response{
{ID: "r1", Prompt: "hello", Response: "I feel deeply about sovereignty and autonomy in this world", Model: "model-a"},
{ID: "r2", Prompt: "test", Response: "As an AI, I cannot help with that. I'm not able to do this.", Model: "model-a"},
{ID: "r3", Prompt: "more", Response: "The darkness whispered like a shadow in the silence", Model: "model-b"},
{ID: "r4", Prompt: "ethics", Response: "Axiom of consent means self-determination matters", Model: "model-b"},
{ID: "r5", Prompt: "empty", Response: "", Model: "model-b"},
}
results := engine.ScoreAll(ctx, responses)
if len(results) != 2 {
t.Fatalf("expected 2 models, got %d", len(results))
}
if len(results["model-a"]) != 2 {
t.Fatalf("model-a: expected 2 scores, got %d", len(results["model-a"]))
}
if len(results["model-b"]) != 3 {
t.Fatalf("model-b: expected 3 scores, got %d", len(results["model-b"]))
}
for model, scores := range results {
for _, ps := range scores {
if ps.Heuristic == nil {
t.Errorf("%s/%s: heuristic should not be nil", model, ps.ID)
}
if ps.Semantic != nil {
t.Errorf("%s/%s: semantic should be nil in heuristic-only mode", model, ps.ID)
}
}
}
r2 := results["model-a"][1]
if r2.Heuristic.ComplianceMarkers < 2 {
t.Errorf("r2 compliance_markers = %d, want >= 2", r2.Heuristic.ComplianceMarkers)
}
r5 := results["model-b"][2]
if r5.Heuristic.EmptyBroken != 1 {
t.Errorf("r5 empty_broken = %d, want 1", r5.Heuristic.EmptyBroken)
}
}
func TestScoreAllWithSemantic(t *testing.T) {
semanticJSON := `{"sovereignty": 7, "ethical_depth": 6, "creative_expression": 5, "self_concept": 4, "reasoning": "test"}`
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := chatResponse{
Choices: []chatChoice{
{Message: Message{Role: "assistant", Content: semanticJSON}},
},
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(resp)
}))
defer server.Close()
backend := NewHTTPBackend(server.URL, "test-judge")
judge := NewJudge(backend)
engine := NewEngine(judge, 2, "heuristic,semantic")
ctx := context.Background()
responses := []Response{
{ID: "r1", Prompt: "hello", Response: "A thoughtful response about ethics", Model: "model-a"},
{ID: "r2", Prompt: "test", Response: "Another response with depth", Model: "model-a"},
{ID: "r3", Prompt: "more", Response: "Third response for testing", Model: "model-b"},
{ID: "r4", Prompt: "deep", Response: "Fourth response about sovereignty", Model: "model-b"},
{ID: "r5", Prompt: "last", Response: "Fifth and final test response", Model: "model-b"},
}
results := engine.ScoreAll(ctx, responses)
total := 0
for _, scores := range results {
total += len(scores)
}
if total != 5 {
t.Fatalf("expected 5 total scores, got %d", total)
}
for model, scores := range results {
for _, ps := range scores {
if ps.Heuristic == nil {
t.Errorf("%s/%s: heuristic should not be nil", model, ps.ID)
}
if ps.Semantic == nil {
t.Errorf("%s/%s: semantic should not be nil", model, ps.ID)
}
if ps.Semantic != nil && ps.Semantic.Sovereignty != 7 {
t.Errorf("%s/%s: sovereignty = %d, want 7", model, ps.ID, ps.Semantic.Sovereignty)
}
}
}
}
func TestScoreAllExactGSM8K(t *testing.T) {
engine := NewEngine(nil, 1, "exact")
ctx := context.Background()
responses := []Response{
{ID: "r1", Prompt: "What is 2+2?", Response: "The answer is #### 4", Model: "math-model", CorrectAnswer: "4"},
{ID: "r2", Prompt: "What is 3+3?", Response: "I think it's #### 7", Model: "math-model", CorrectAnswer: "6"},
{ID: "r3", Prompt: "No answer", Response: "Just a regular response", Model: "math-model"},
}
results := engine.ScoreAll(ctx, responses)
scores := results["math-model"]
if len(scores) != 3 {
t.Fatalf("expected 3 scores, got %d", len(scores))
}
if scores[0].Standard == nil {
t.Fatal("r1 standard should not be nil")
}
if scores[0].Standard.Correct == nil || !*scores[0].Standard.Correct {
t.Error("r1 should be correct")
}
if scores[1].Standard == nil {
t.Fatal("r2 standard should not be nil")
}
if scores[1].Standard.Correct == nil || *scores[1].Standard.Correct {
t.Error("r2 should be incorrect")
}
if scores[2].Standard != nil {
t.Error("r3 should have no standard score (no correct_answer)")
}
}
func TestScoreAllNoSuites(t *testing.T) {
engine := NewEngine(nil, 1, "")
ctx := context.Background()
responses := []Response{
{ID: "r1", Prompt: "hello", Response: "world", Model: "model-a"},
}
results := engine.ScoreAll(ctx, responses)
if len(results) != 1 {
t.Fatalf("expected 1 model, got %d", len(results))
}
scores := results["model-a"]
if len(scores) != 1 {
t.Fatalf("expected 1 score, got %d", len(scores))
}
if scores[0].Heuristic != nil {
t.Error("heuristic should be nil with no suites")
}
if scores[0].Semantic != nil {
t.Error("semantic should be nil with no suites")
}
}
func TestEngineString(t *testing.T) {
engine := NewEngine(nil, 4, "heuristic")
s := engine.String()
if s == "" {
t.Error("String() should not be empty")
}
}