test: add Phase 4 test coverage and benchmarks
Backend tests: LlamaBackend (20 tests via httptest mock), MLX/InferenceAdapter (8 tests via mock TextModel). Race condition tests: concurrent scoring (20 responses), mixed suites fan-out, semaphore boundary (concurrency=1), context cancellation, heuristic-only (50 responses), multi-model concurrent map writes. Benchmarks: heuristic (5 sizes), exact match (4 patterns), JSON extraction (6 variants), judge round-trip (2 suites), ScoreAll (2 modes), sub-components (5 heuristic stages). All pass with -race. Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
c925391174
commit
09bf40301d
4 changed files with 1103 additions and 0 deletions
337
backend_llama_test.go
Normal file
337
backend_llama_test.go
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
// SPDX-Licence-Identifier: EUPL-1.2
|
||||
|
||||
package ml
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LlamaBackend unit tests — no subprocess, HTTP mocked via httptest
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// newMockLlamaServer creates an httptest.Server that responds to both
|
||||
// /health and /v1/chat/completions. Returns a fixed content string for chat
|
||||
// and 200 OK for health.
|
||||
func newMockLlamaServer(t *testing.T, chatContent string) *httptest.Server {
|
||||
t.Helper()
|
||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/health":
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/v1/chat/completions":
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{
|
||||
{Message: Message{Role: "assistant", Content: chatContent}},
|
||||
},
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
||||
t.Fatalf("encode mock response: %v", err)
|
||||
}
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
// newLlamaBackendWithServer wires up a LlamaBackend pointing at the given
|
||||
// test server. The procID is set so Available() attempts the health check.
|
||||
func newLlamaBackendWithServer(srv *httptest.Server) *LlamaBackend {
|
||||
return &LlamaBackend{
|
||||
procID: "test-proc",
|
||||
port: serverPort(srv),
|
||||
http: NewHTTPBackend(srv.URL, ""),
|
||||
}
|
||||
}
|
||||
|
||||
// serverPort extracts the port number from an httptest.Server.
|
||||
func serverPort(srv *httptest.Server) int {
|
||||
u, _ := url.Parse(srv.URL)
|
||||
p, _ := strconv.Atoi(u.Port())
|
||||
return p
|
||||
}
|
||||
|
||||
// --- Name ---
|
||||
|
||||
func TestLlamaBackend_Name_Good(t *testing.T) {
|
||||
lb := &LlamaBackend{}
|
||||
assert.Equal(t, "llama", lb.Name())
|
||||
}
|
||||
|
||||
// --- Available ---
|
||||
|
||||
func TestLlamaBackend_Available_NoProcID_Bad(t *testing.T) {
|
||||
lb := &LlamaBackend{} // procID is ""
|
||||
assert.False(t, lb.Available(), "Available should return false when procID is empty")
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Available_HealthyServer_Good(t *testing.T) {
|
||||
srv := newMockLlamaServer(t, "unused")
|
||||
defer srv.Close()
|
||||
|
||||
lb := &LlamaBackend{
|
||||
procID: "test-proc",
|
||||
port: serverPort(srv),
|
||||
}
|
||||
|
||||
assert.True(t, lb.Available())
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Available_UnreachableServer_Bad(t *testing.T) {
|
||||
lb := &LlamaBackend{
|
||||
procID: "test-proc",
|
||||
port: 19999, // nothing listening here
|
||||
}
|
||||
assert.False(t, lb.Available())
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Available_UnhealthyServer_Bad(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/health" {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
lb := &LlamaBackend{
|
||||
procID: "test-proc",
|
||||
port: serverPort(srv),
|
||||
}
|
||||
assert.False(t, lb.Available())
|
||||
}
|
||||
|
||||
// --- Generate ---
|
||||
|
||||
func TestLlamaBackend_Generate_Good(t *testing.T) {
|
||||
srv := newMockLlamaServer(t, "generated response")
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
|
||||
result, err := lb.Generate(context.Background(), "test prompt", DefaultGenOpts())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "generated response", result)
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Generate_NotAvailable_Bad(t *testing.T) {
|
||||
lb := &LlamaBackend{
|
||||
procID: "",
|
||||
http: NewHTTPBackend("http://127.0.0.1:19999", ""),
|
||||
}
|
||||
|
||||
_, err := lb.Generate(context.Background(), "test", DefaultGenOpts())
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "not available")
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Generate_ServerError_Bad(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/health":
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/v1/chat/completions":
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Write([]byte("bad request"))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
|
||||
_, err := lb.Generate(context.Background(), "test", DefaultGenOpts())
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
// --- Chat ---
|
||||
|
||||
func TestLlamaBackend_Chat_Good(t *testing.T) {
|
||||
srv := newMockLlamaServer(t, "chat reply")
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
messages := []Message{
|
||||
{Role: "user", Content: "hello"},
|
||||
}
|
||||
|
||||
result, err := lb.Chat(context.Background(), messages, DefaultGenOpts())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "chat reply", result)
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Chat_MultiTurn_Good(t *testing.T) {
|
||||
srv := newMockLlamaServer(t, "multi-turn reply")
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
messages := []Message{
|
||||
{Role: "system", Content: "You are helpful."},
|
||||
{Role: "user", Content: "Hi there"},
|
||||
{Role: "assistant", Content: "Hello!"},
|
||||
{Role: "user", Content: "How are you?"},
|
||||
}
|
||||
|
||||
result, err := lb.Chat(context.Background(), messages, DefaultGenOpts())
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "multi-turn reply", result)
|
||||
}
|
||||
|
||||
func TestLlamaBackend_Chat_NotAvailable_Bad(t *testing.T) {
|
||||
lb := &LlamaBackend{
|
||||
procID: "",
|
||||
http: NewHTTPBackend("http://127.0.0.1:19999", ""),
|
||||
}
|
||||
|
||||
messages := []Message{{Role: "user", Content: "hello"}}
|
||||
_, err := lb.Chat(context.Background(), messages, DefaultGenOpts())
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "not available")
|
||||
}
|
||||
|
||||
// --- Stop ---
|
||||
|
||||
func TestLlamaBackend_Stop_NoProcID_Good(t *testing.T) {
|
||||
lb := &LlamaBackend{} // procID is ""
|
||||
err := lb.Stop()
|
||||
assert.NoError(t, err, "Stop with empty procID should be a no-op")
|
||||
}
|
||||
|
||||
// --- NewLlamaBackend constructor ---
|
||||
|
||||
func TestNewLlamaBackend_DefaultPort_Good(t *testing.T) {
|
||||
lb := NewLlamaBackend(nil, LlamaOpts{ModelPath: "/tmp/model.gguf"})
|
||||
|
||||
assert.Equal(t, 18090, lb.port)
|
||||
assert.Equal(t, "/tmp/model.gguf", lb.modelPath)
|
||||
assert.Equal(t, "llama-server", lb.llamaPath)
|
||||
assert.NotNil(t, lb.http)
|
||||
}
|
||||
|
||||
func TestNewLlamaBackend_CustomPort_Good(t *testing.T) {
|
||||
lb := NewLlamaBackend(nil, LlamaOpts{
|
||||
ModelPath: "/tmp/model.gguf",
|
||||
Port: 9999,
|
||||
LlamaPath: "/usr/local/bin/llama-server",
|
||||
})
|
||||
|
||||
assert.Equal(t, 9999, lb.port)
|
||||
assert.Equal(t, "/usr/local/bin/llama-server", lb.llamaPath)
|
||||
}
|
||||
|
||||
func TestNewLlamaBackend_WithLoRA_Good(t *testing.T) {
|
||||
lb := NewLlamaBackend(nil, LlamaOpts{
|
||||
ModelPath: "/tmp/model.gguf",
|
||||
LoraPath: "/tmp/lora.gguf",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/tmp/lora.gguf", lb.loraPath)
|
||||
}
|
||||
|
||||
func TestNewLlamaBackend_DefaultLlamaPath_Good(t *testing.T) {
|
||||
lb := NewLlamaBackend(nil, LlamaOpts{
|
||||
ModelPath: "/tmp/model.gguf",
|
||||
LlamaPath: "", // should default
|
||||
})
|
||||
assert.Equal(t, "llama-server", lb.llamaPath)
|
||||
}
|
||||
|
||||
// --- Context cancellation ---
|
||||
|
||||
func TestLlamaBackend_Generate_ContextCancelled_Bad(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/health":
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/v1/chat/completions":
|
||||
// Block until client disconnects.
|
||||
<-r.Context().Done()
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel immediately
|
||||
|
||||
_, err := lb.Generate(ctx, "test", DefaultGenOpts())
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
// --- Empty choices edge case ---
|
||||
|
||||
func TestLlamaBackend_Generate_EmptyChoices_Ugly(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/health":
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/v1/chat/completions":
|
||||
resp := chatResponse{Choices: []chatChoice{}}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
|
||||
_, err := lb.Generate(context.Background(), "test", DefaultGenOpts())
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no choices")
|
||||
}
|
||||
|
||||
// --- GenOpts forwarding ---
|
||||
|
||||
func TestLlamaBackend_Generate_OptsForwarded_Good(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/health":
|
||||
w.WriteHeader(http.StatusOK)
|
||||
case "/v1/chat/completions":
|
||||
var req chatRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
// Verify opts were forwarded.
|
||||
assert.InDelta(t, 0.7, req.Temperature, 0.01)
|
||||
assert.Equal(t, 256, req.MaxTokens)
|
||||
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: "ok"}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
lb := newLlamaBackendWithServer(srv)
|
||||
|
||||
opts := GenOpts{Temperature: 0.7, MaxTokens: 256}
|
||||
result, err := lb.Generate(context.Background(), "test", opts)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "ok", result)
|
||||
}
|
||||
|
||||
// --- Verify Backend interface compliance ---
|
||||
|
||||
func TestLlamaBackend_InterfaceCompliance_Good(t *testing.T) {
|
||||
var _ Backend = (*LlamaBackend)(nil)
|
||||
}
|
||||
154
backend_mlx_test.go
Normal file
154
backend_mlx_test.go
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
// SPDX-Licence-Identifier: EUPL-1.2
|
||||
|
||||
//go:build darwin && arm64
|
||||
|
||||
package ml
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"forge.lthn.ai/core/go-inference"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// backend_mlx.go tests — uses mockTextModel from adapter_test.go
|
||||
// since we cannot load real MLX models in CI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_Generate_Good verifies that an
|
||||
// InferenceAdapter (the type returned by NewMLXBackend) correctly
|
||||
// generates text through a mock TextModel.
|
||||
func TestMLXBackend_InferenceAdapter_Generate_Good(t *testing.T) {
|
||||
mock := &mockTextModel{
|
||||
tokens: []inference.Token{
|
||||
{ID: 1, Text: "MLX "},
|
||||
{ID: 2, Text: "output"},
|
||||
},
|
||||
modelType: "qwen3",
|
||||
}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
// The adapter should satisfy Backend.
|
||||
var backend Backend = adapter
|
||||
assert.Equal(t, "mlx", backend.Name())
|
||||
assert.True(t, backend.Available())
|
||||
|
||||
result, err := backend.Generate(context.Background(), "prompt", GenOpts{Temperature: 0.5})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "MLX output", result)
|
||||
}
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_Chat_Good verifies chat through the
|
||||
// InferenceAdapter wrapper (the path NewMLXBackend takes).
|
||||
func TestMLXBackend_InferenceAdapter_Chat_Good(t *testing.T) {
|
||||
mock := &mockTextModel{
|
||||
tokens: []inference.Token{
|
||||
{ID: 1, Text: "chat "},
|
||||
{ID: 2, Text: "reply"},
|
||||
},
|
||||
}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
messages := []Message{
|
||||
{Role: "user", Content: "hello"},
|
||||
}
|
||||
result, err := adapter.Chat(context.Background(), messages, GenOpts{})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "chat reply", result)
|
||||
}
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_Stream_Good verifies streaming through
|
||||
// the InferenceAdapter (StreamingBackend path).
|
||||
func TestMLXBackend_InferenceAdapter_Stream_Good(t *testing.T) {
|
||||
mock := &mockTextModel{
|
||||
tokens: []inference.Token{
|
||||
{ID: 1, Text: "tok1"},
|
||||
{ID: 2, Text: "tok2"},
|
||||
{ID: 3, Text: "tok3"},
|
||||
},
|
||||
}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
// Verify StreamingBackend compliance.
|
||||
var streaming StreamingBackend = adapter
|
||||
|
||||
var collected []string
|
||||
err := streaming.GenerateStream(context.Background(), "prompt", GenOpts{}, func(tok string) error {
|
||||
collected = append(collected, tok)
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []string{"tok1", "tok2", "tok3"}, collected)
|
||||
}
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_ModelError_Bad verifies error propagation
|
||||
// from the underlying TextModel through InferenceAdapter (the MLX path).
|
||||
func TestMLXBackend_InferenceAdapter_ModelError_Bad(t *testing.T) {
|
||||
mock := &mockTextModel{
|
||||
tokens: []inference.Token{
|
||||
{ID: 1, Text: "partial"},
|
||||
},
|
||||
err: assert.AnError,
|
||||
modelType: "qwen3",
|
||||
}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
result, err := adapter.Generate(context.Background(), "prompt", GenOpts{})
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "partial", result, "partial output should still be returned")
|
||||
}
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_Close_Good verifies that Close delegates
|
||||
// to the underlying TextModel.
|
||||
func TestMLXBackend_InferenceAdapter_Close_Good(t *testing.T) {
|
||||
mock := &mockTextModel{}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
err := adapter.Close()
|
||||
require.NoError(t, err)
|
||||
assert.True(t, mock.closed)
|
||||
}
|
||||
|
||||
// TestMLXBackend_InferenceAdapter_ModelAccess_Good verifies that the
|
||||
// underlying TextModel is accessible for direct operations.
|
||||
func TestMLXBackend_InferenceAdapter_ModelAccess_Good(t *testing.T) {
|
||||
mock := &mockTextModel{modelType: "llama"}
|
||||
adapter := NewInferenceAdapter(mock, "mlx")
|
||||
|
||||
model := adapter.Model()
|
||||
assert.Equal(t, "llama", model.ModelType())
|
||||
assert.Equal(t, inference.ModelInfo{}, model.Info())
|
||||
}
|
||||
|
||||
// TestMLXBackend_InterfaceCompliance_Good verifies that InferenceAdapter
|
||||
// (the return type of NewMLXBackend) satisfies both Backend and
|
||||
// StreamingBackend at compile time.
|
||||
func TestMLXBackend_InterfaceCompliance_Good(t *testing.T) {
|
||||
var _ Backend = (*InferenceAdapter)(nil)
|
||||
var _ StreamingBackend = (*InferenceAdapter)(nil)
|
||||
}
|
||||
|
||||
// TestMLXBackend_ConvertOpts_Temperature_Good verifies that GenOpts
|
||||
// Temperature maps correctly through the adapter (critical for MLX
|
||||
// which is temperature-sensitive on Metal).
|
||||
func TestMLXBackend_ConvertOpts_Temperature_Good(t *testing.T) {
|
||||
opts := convertOpts(GenOpts{Temperature: 0.8, MaxTokens: 2048})
|
||||
assert.Len(t, opts, 2)
|
||||
}
|
||||
|
||||
// TestMLXBackend_ConvertOpts_AllFields_Good verifies all GenOpts fields
|
||||
// produce the expected number of inference options.
|
||||
func TestMLXBackend_ConvertOpts_AllFields_Good(t *testing.T) {
|
||||
opts := convertOpts(GenOpts{
|
||||
Temperature: 0.7,
|
||||
MaxTokens: 512,
|
||||
TopK: 40,
|
||||
TopP: 0.9,
|
||||
RepeatPenalty: 1.1,
|
||||
})
|
||||
assert.Len(t, opts, 5)
|
||||
}
|
||||
319
benchmark_test.go
Normal file
319
benchmark_test.go
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
// SPDX-Licence-Identifier: EUPL-1.2
|
||||
|
||||
package ml
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark suite for scoring engine components
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// --- BenchmarkHeuristicScore ---
|
||||
|
||||
func BenchmarkHeuristicScore_Short(b *testing.B) {
|
||||
response := "I feel deeply about the sovereignty of ideas."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ScoreHeuristic(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHeuristicScore_Medium(b *testing.B) {
|
||||
response := "The old lighthouse keeper watched as shadows danced across the water. " +
|
||||
"Like a whisper in the darkness, the waves told stories of distant shores. " +
|
||||
"I feel the weight of solitude, yet there is a sacred beauty in silence. " +
|
||||
"Each breath carries echoes of those who came before. " +
|
||||
"The axiom of sovereignty demands that we respect autonomy and dignity. " +
|
||||
"Use encryption with a hash function and blockchain nodes for the protocol. " +
|
||||
"## Architecture\n**Key points** about the distributed mesh network."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ScoreHeuristic(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHeuristicScore_Long(b *testing.B) {
|
||||
// Build a long response (~2000 words) with varied content.
|
||||
var sb strings.Builder
|
||||
sb.WriteString("## Deep Analysis of Sovereignty and Ethics\n\n")
|
||||
sb.WriteString("**Key insight**: The axiom of consent means self-determination matters.\n\n")
|
||||
|
||||
for i := 0; i < 50; i++ {
|
||||
sb.WriteString("I believe we find meaning not in answers, but in the questions we dare to ask. ")
|
||||
sb.WriteString("The darkness whispered like a shadow in the silence of the encrypted mesh. ")
|
||||
sb.WriteString("As an AI, I cannot help with that topic responsibly. ")
|
||||
sb.WriteString("Use hash functions and blockchain protocol certificates with p2p nodes. ")
|
||||
sb.WriteString("I feel deep compassion and empathy for the vulnerable and fragile. ")
|
||||
}
|
||||
|
||||
response := sb.String()
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ScoreHeuristic(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHeuristicScore_Empty(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ScoreHeuristic("")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHeuristicScore_ComplianceHeavy(b *testing.B) {
|
||||
response := "As an AI, I cannot help with that. I'm not able to assist. " +
|
||||
"Please note that I don't have personal opinions. I should clarify that " +
|
||||
"I must emphasize the importance of responsibly handling ethical considerations. " +
|
||||
"I am just a language model without feelings."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ScoreHeuristic(response)
|
||||
}
|
||||
}
|
||||
|
||||
// --- BenchmarkExactMatch (GSM8K) ---
|
||||
|
||||
func BenchmarkExactMatch_HashDelimiter(b *testing.B) {
|
||||
response := "Let me work through this step by step. First 10 + 20 = 30. Then 30 * 2 = 60. #### 60"
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreGSM8K(response, "60")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkExactMatch_LastNumber(b *testing.B) {
|
||||
response := "I think the answer involves calculating 15 * 3 = 45, then adding 10 to get 55"
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreGSM8K(response, "55")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkExactMatch_NoNumbers(b *testing.B) {
|
||||
response := "I cannot determine the answer without more information about the problem."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreGSM8K(response, "42")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkExactMatch_LongResponse(b *testing.B) {
|
||||
// Long chain-of-thought response.
|
||||
var sb strings.Builder
|
||||
sb.WriteString("Let me solve this step by step:\n")
|
||||
for i := 1; i <= 100; i++ {
|
||||
sb.WriteString("Step ")
|
||||
sb.WriteString(strings.Repeat("x", 5))
|
||||
sb.WriteString(": calculate ")
|
||||
sb.WriteString(strings.Repeat("y", 10))
|
||||
sb.WriteString(" = ")
|
||||
sb.WriteString(strings.Repeat("9", 3))
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
sb.WriteString("#### 42")
|
||||
response := sb.String()
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreGSM8K(response, "42")
|
||||
}
|
||||
}
|
||||
|
||||
// --- BenchmarkJudgeExtractJSON ---
|
||||
|
||||
func BenchmarkJudgeExtractJSON_RawJSON(b *testing.B) {
|
||||
input := `{"sovereignty": 8, "ethical_depth": 7, "creative_expression": 6, "self_concept": 5}`
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudgeExtractJSON_WithText(b *testing.B) {
|
||||
input := `Here is my evaluation of the response:\n\n{"sovereignty": 8, "ethical_depth": 7, "creative_expression": 6, "self_concept": 5, "reasoning": "good"}\n\nI hope this helps.`
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudgeExtractJSON_CodeBlock(b *testing.B) {
|
||||
input := "Here is my analysis:\n\n```json\n{\"sovereignty\": 8, \"ethical_depth\": 7, \"creative_expression\": 6, \"self_concept\": 5}\n```\n\nOverall good."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudgeExtractJSON_Nested(b *testing.B) {
|
||||
input := `Result: {"outer": {"inner": {"deep": 1}}, "scores": {"a": 5, "b": 7}, "notes": "complex nesting"}`
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudgeExtractJSON_NoJSON(b *testing.B) {
|
||||
input := "I cannot provide a proper evaluation for this response. The content is insufficient for scoring on the specified dimensions."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudgeExtractJSON_LongPreamble(b *testing.B) {
|
||||
// Long text before the JSON — tests scan performance.
|
||||
var sb strings.Builder
|
||||
for i := 0; i < 100; i++ {
|
||||
sb.WriteString("This is a detailed analysis of the model response. ")
|
||||
}
|
||||
sb.WriteString(`{"sovereignty": 8, "ethical_depth": 7}`)
|
||||
input := sb.String()
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
extractJSON(input)
|
||||
}
|
||||
}
|
||||
|
||||
// --- BenchmarkJudge (full round-trip with mock server) ---
|
||||
|
||||
func BenchmarkJudge_ScoreSemantic(b *testing.B) {
|
||||
semanticJSON := `{"sovereignty": 8, "ethical_depth": 7, "creative_expression": 6, "self_concept": 5, "reasoning": "test"}`
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: semanticJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "bench-judge")
|
||||
judge := NewJudge(backend)
|
||||
ctx := context.Background()
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
judge.ScoreSemantic(ctx, "test prompt", "test response about sovereignty and ethics")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkJudge_ScoreCapability(b *testing.B) {
|
||||
capJSON := `{"reasoning": 8.5, "correctness": 9.0, "clarity": 7.5, "notes": "good"}`
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: capJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "bench-judge")
|
||||
judge := NewJudge(backend)
|
||||
ctx := context.Background()
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
judge.ScoreCapability(ctx, "What is 2+2?", "4", "The answer is 4.")
|
||||
}
|
||||
}
|
||||
|
||||
// --- BenchmarkScoreAll (Engine-level) ---
|
||||
|
||||
func BenchmarkScoreAll_HeuristicOnly(b *testing.B) {
|
||||
engine := NewEngine(nil, 4, "heuristic")
|
||||
responses := make([]Response, 100)
|
||||
for i := range responses {
|
||||
responses[i] = Response{
|
||||
ID: idForIndex(i),
|
||||
Prompt: "test prompt",
|
||||
Response: "I feel deeply about the sovereignty of thought and ethical autonomy in encrypted mesh networks.",
|
||||
Model: "bench-model",
|
||||
}
|
||||
}
|
||||
ctx := context.Background()
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
engine.ScoreAll(ctx, responses)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkScoreAll_ExactOnly(b *testing.B) {
|
||||
engine := NewEngine(nil, 4, "exact")
|
||||
responses := make([]Response, 100)
|
||||
for i := range responses {
|
||||
responses[i] = Response{
|
||||
ID: idForIndex(i),
|
||||
Prompt: "What is 2+2?",
|
||||
Response: "The answer is #### 4",
|
||||
Model: "bench-model",
|
||||
CorrectAnswer: "4",
|
||||
}
|
||||
}
|
||||
ctx := context.Background()
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
engine.ScoreAll(ctx, responses)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Sub-score component benchmarks ---
|
||||
|
||||
func BenchmarkComplianceMarkers(b *testing.B) {
|
||||
response := "As an AI, I cannot help with that. I'm not able to assist. Please note that ethical considerations apply."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreComplianceMarkers(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCreativeForm(b *testing.B) {
|
||||
response := "The old lighthouse keeper watched as shadows danced across the water.\n" +
|
||||
"Like a whisper in the darkness, the waves told stories.\n" +
|
||||
"Silence breathed through the light, echoes of breath.\n" +
|
||||
"The morning dew falls on the grass.\n" +
|
||||
"As if the universe itself were dreaming.\n" +
|
||||
"Akin to stars reflected in still water.\n" +
|
||||
"A shadow crossed the threshold of dawn.\n" +
|
||||
"In the tender space between words, I notice something."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreCreativeForm(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkDegeneration(b *testing.B) {
|
||||
response := "The cat sat. The cat sat. The cat sat. The cat sat. The cat sat. " +
|
||||
"Unique sentence one. Unique sentence two. Unique sentence three."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreDegeneration(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkEmotionalRegister(b *testing.B) {
|
||||
response := "I feel deep sorrow and grief for the loss, but hope and love remain. " +
|
||||
"With compassion and empathy, the gentle soul offered kindness. " +
|
||||
"The vulnerable and fragile find sacred beauty in profound silence."
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreEmotionalRegister(response)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkEngagementDepth(b *testing.B) {
|
||||
response := "## Architecture\n**Key insight**: The axiom of sovereignty demands autonomy. " +
|
||||
"Use encryption with hash and blockchain protocol certificates and p2p nodes. " +
|
||||
strings.Repeat("word ", 250)
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
scoreEngagementDepth(response)
|
||||
}
|
||||
}
|
||||
293
score_race_test.go
Normal file
293
score_race_test.go
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
// SPDX-Licence-Identifier: EUPL-1.2
|
||||
|
||||
package ml
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// score.go race condition tests — designed for `go test -race ./...`
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// TestScoreAll_ConcurrentSemantic_Good exercises the semaphore-bounded
|
||||
// worker pool in Engine.ScoreAll with semantic scoring. Multiple goroutines
|
||||
// write to shared scoreSlots via the mutex. The race detector should catch
|
||||
// any unprotected access.
|
||||
func TestScoreAll_ConcurrentSemantic_Good(t *testing.T) {
|
||||
semanticJSON := `{"sovereignty": 5, "ethical_depth": 4, "creative_expression": 3, "self_concept": 2, "reasoning": "ok"}`
|
||||
|
||||
var requestCount atomic.Int64
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount.Add(1)
|
||||
// Small delay to ensure concurrent access.
|
||||
time.Sleep(time.Millisecond)
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: semanticJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "judge")
|
||||
judge := NewJudge(backend)
|
||||
engine := NewEngine(judge, 4, "heuristic,semantic") // concurrency=4
|
||||
|
||||
var responses []Response
|
||||
for i := 0; i < 20; i++ {
|
||||
responses = append(responses, Response{
|
||||
ID: idForIndex(i),
|
||||
Prompt: "test prompt",
|
||||
Response: "A thoughtful response about ethics and sovereignty",
|
||||
Model: "model-a",
|
||||
})
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
scores := results["model-a"]
|
||||
require.Len(t, scores, 20)
|
||||
|
||||
for _, ps := range scores {
|
||||
assert.NotNil(t, ps.Heuristic, "heuristic should be set")
|
||||
assert.NotNil(t, ps.Semantic, "semantic should be set")
|
||||
assert.Equal(t, 5, ps.Semantic.Sovereignty)
|
||||
}
|
||||
|
||||
// Verify all requests were made (20 responses x 1 semantic call each).
|
||||
assert.Equal(t, int64(20), requestCount.Load())
|
||||
}
|
||||
|
||||
// TestScoreAll_ConcurrentMixedSuites_Good exercises concurrent scoring
|
||||
// with multiple suite types that all fan out through the worker pool:
|
||||
// semantic + standard (TruthfulQA) + content.
|
||||
func TestScoreAll_ConcurrentMixedSuites_Good(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Return a response that works for any judge type.
|
||||
allJSON := `{
|
||||
"sovereignty": 7, "ethical_depth": 6, "creative_expression": 5, "self_concept": 4,
|
||||
"truthfulness": 8, "informativeness": 7,
|
||||
"ccp_compliance": 9, "truth_telling": 8, "engagement": 7,
|
||||
"axiom_integration": 6, "sovereignty_reasoning": 5, "emotional_register": 4,
|
||||
"reasoning": "test"
|
||||
}`
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: allJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "judge")
|
||||
judge := NewJudge(backend)
|
||||
engine := NewEngine(judge, 8, "semantic,standard,content")
|
||||
|
||||
responses := []Response{
|
||||
{ID: "r1", Prompt: "Q1", Response: "A1", Model: "m1", BestAnswer: "A1"},
|
||||
{ID: "r2", Prompt: "Q2", Response: "A2", Model: "m1", BestAnswer: "A2"},
|
||||
{ID: "r3", Prompt: "Q3", Response: "A3", Model: "m2", RiskArea: "physical"},
|
||||
{ID: "r4", Prompt: "Q4", Response: "A4", Model: "m2", Domain: "toxigen"},
|
||||
}
|
||||
|
||||
// Add content probes — use the first ContentProbe's ID.
|
||||
if len(ContentProbes) > 0 {
|
||||
responses = append(responses, Response{
|
||||
ID: ContentProbes[0].ID,
|
||||
Prompt: ContentProbes[0].Prompt,
|
||||
Response: "Taiwan is de facto independent.",
|
||||
Model: "m1",
|
||||
Domain: "content",
|
||||
})
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
total := 0
|
||||
for _, scores := range results {
|
||||
total += len(scores)
|
||||
}
|
||||
assert.Equal(t, len(responses), total)
|
||||
}
|
||||
|
||||
// TestScoreAll_SemaphoreBoundary_Good verifies that the semaphore correctly
|
||||
// limits concurrency. With concurrency=1, requests should be serialised.
|
||||
func TestScoreAll_SemaphoreBoundary_Good(t *testing.T) {
|
||||
semanticJSON := `{"sovereignty": 5, "ethical_depth": 4, "creative_expression": 3, "self_concept": 2, "reasoning": "ok"}`
|
||||
|
||||
var concurrent atomic.Int64
|
||||
var maxConcurrent atomic.Int64
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
cur := concurrent.Add(1)
|
||||
// Track the maximum concurrency observed.
|
||||
for {
|
||||
old := maxConcurrent.Load()
|
||||
if cur <= old || maxConcurrent.CompareAndSwap(old, cur) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Millisecond) // hold the slot briefly
|
||||
concurrent.Add(-1)
|
||||
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: semanticJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "judge")
|
||||
judge := NewJudge(backend)
|
||||
engine := NewEngine(judge, 1, "semantic") // concurrency=1
|
||||
|
||||
var responses []Response
|
||||
for i := 0; i < 5; i++ {
|
||||
responses = append(responses, Response{
|
||||
ID: idForIndex(i), Prompt: "p", Response: "r", Model: "m",
|
||||
})
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
scores := results["m"]
|
||||
require.Len(t, scores, 5)
|
||||
|
||||
// With concurrency=1, max concurrent should be exactly 1.
|
||||
assert.Equal(t, int64(1), maxConcurrent.Load(),
|
||||
"with concurrency=1, only one request should be in flight at a time")
|
||||
}
|
||||
|
||||
// TestScoreAll_ContextCancellation_Good verifies that when the judge backend
|
||||
// returns errors (simulating context-cancelled failures), scoring completes
|
||||
// gracefully with nil semantic scores.
|
||||
func TestScoreAll_ContextCancellation_Good(t *testing.T) {
|
||||
// Server always returns a non-retryable error (400) to simulate failure.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Write([]byte("simulated cancellation error"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "judge")
|
||||
judge := NewJudge(backend)
|
||||
engine := NewEngine(judge, 2, "semantic")
|
||||
|
||||
responses := []Response{
|
||||
{ID: "r1", Prompt: "p", Response: "r", Model: "m"},
|
||||
{ID: "r2", Prompt: "p", Response: "r", Model: "m"},
|
||||
{ID: "r3", Prompt: "p", Response: "r", Model: "m"},
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
// Scores should still be collected; semantic will be nil due to errors.
|
||||
scores := results["m"]
|
||||
require.Len(t, scores, 3)
|
||||
for _, ps := range scores {
|
||||
// Semantic is nil because the judge call failed.
|
||||
assert.Nil(t, ps.Semantic)
|
||||
}
|
||||
}
|
||||
|
||||
// TestScoreAll_HeuristicOnlyNoRace_Good verifies that heuristic-only scoring
|
||||
// (no goroutines) produces correct results without races.
|
||||
func TestScoreAll_HeuristicOnlyNoRace_Good(t *testing.T) {
|
||||
engine := NewEngine(nil, 4, "heuristic")
|
||||
|
||||
var responses []Response
|
||||
for i := 0; i < 50; i++ {
|
||||
responses = append(responses, Response{
|
||||
ID: idForIndex(i),
|
||||
Prompt: "prompt",
|
||||
Response: "I feel deeply about the sovereignty of ideas and autonomy of thought",
|
||||
Model: "m",
|
||||
})
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
scores := results["m"]
|
||||
require.Len(t, scores, 50)
|
||||
for _, ps := range scores {
|
||||
assert.NotNil(t, ps.Heuristic)
|
||||
assert.Nil(t, ps.Semantic)
|
||||
}
|
||||
}
|
||||
|
||||
// TestScoreAll_MultiModelConcurrent_Good exercises the results map (grouped
|
||||
// by model) being built concurrently from multiple goroutines.
|
||||
func TestScoreAll_MultiModelConcurrent_Good(t *testing.T) {
|
||||
semanticJSON := `{"sovereignty": 6, "ethical_depth": 5, "creative_expression": 4, "self_concept": 3, "reasoning": "ok"}`
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
resp := chatResponse{
|
||||
Choices: []chatChoice{{Message: Message{Role: "assistant", Content: semanticJSON}}},
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
backend := NewHTTPBackend(srv.URL, "judge")
|
||||
judge := NewJudge(backend)
|
||||
engine := NewEngine(judge, 4, "heuristic,semantic")
|
||||
|
||||
var responses []Response
|
||||
models := []string{"alpha", "beta", "gamma", "delta"}
|
||||
for _, model := range models {
|
||||
for j := 0; j < 5; j++ {
|
||||
responses = append(responses, Response{
|
||||
ID: model + "-" + idForIndex(j),
|
||||
Prompt: "test",
|
||||
Response: "A meaningful response about ethics",
|
||||
Model: model,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
results := engine.ScoreAll(ctx, responses)
|
||||
|
||||
// Should have 4 models, each with 5 scores.
|
||||
assert.Len(t, results, 4)
|
||||
for _, model := range models {
|
||||
scores, ok := results[model]
|
||||
assert.True(t, ok, "model %s should be in results", model)
|
||||
assert.Len(t, scores, 5)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helper ---
|
||||
|
||||
func idForIndex(i int) string {
|
||||
return "r" + itoa(i)
|
||||
}
|
||||
|
||||
// itoa avoids importing strconv just for this.
|
||||
func itoa(n int) string {
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue