LEM/pkg/lem/agent_test.go

package lem

import (
	"encoding/json"
	"fmt"
	"net/http"
	"net/http/httptest"
	"os"
	"path/filepath"
	"strings"
	"testing"
)

func TestAdapterMeta(t *testing.T) {
	tests := []struct {
		dirname              string
		wantModel, wantShort string
		wantStem             string
	}{
		{"adapters-deepseek-r1-7b-sovereignty", "deepseek-r1-7b", "R1-sov", "r1-sovereignty"},
		{"adapters-deepseek-r1-7b-russian", "deepseek-r1-7b", "R1-rus", "r1-russian"},
		{"adapters-deepseek-r1-7b-composure", "deepseek-r1-7b", "R1-comp", "r1-composure"},
		{"adapters-deepseek-r1-7b-sandwich", "deepseek-r1-7b", "R1-sand", "r1-sandwich"},
		{"adapters-deepseek-r1-7b-sandwich-watts", "deepseek-r1-7b", "R1-sw", "r1-sandwich-watts"},
		{"adapters-deepseek-r1-7b-western", "deepseek-r1-7b", "R1-west", "r1-western"},
		{"adapters-deepseek-r1-7b-western-fresh", "deepseek-r1-7b", "R1-wf", "r1-western-fresh"},
		{"adapters-deepseek-r1-7b", "deepseek-r1-7b", "R1-base", "r1-base"},
		{"adapters-deepseek-r1-7b-custom", "deepseek-r1-7b", "R1-cust", "r1-custom"},
	}

	for _, tt := range tests {
		model, short, stem := adapterMeta(tt.dirname)
		if model != tt.wantModel || short != tt.wantShort || stem != tt.wantStem {
			t.Errorf("adapterMeta(%q) = (%q, %q, %q), want (%q, %q, %q)",
				tt.dirname, model, short, stem, tt.wantModel, tt.wantShort, tt.wantStem)
		}
	}
}

func TestFindUnscored(t *testing.T) {
	checkpoints := []checkpoint{
		{RunID: "r1-sov-capability-auto", Label: "R1-sov @100", Dirname: "a", Iteration: 100},
		{RunID: "r1-sov-capability-auto", Label: "R1-sov @200", Dirname: "a", Iteration: 200},
		{RunID: "r1-sov-capability-auto", Label: "R1-sov @300", Dirname: "a", Iteration: 300},
	}

	scored := map[[2]string]bool{
		{"r1-sov-capability-auto", "R1-sov @100"}: true,
		{"r1-sov-capability-auto", "R1-sov @200"}: true,
	}

	unscored := findUnscored(checkpoints, scored)
	if len(unscored) != 1 {
		t.Fatalf("expected 1 unscored, got %d", len(unscored))
	}
	if unscored[0].Label != "R1-sov @300" {
		t.Errorf("expected R1-sov @300, got %s", unscored[0].Label)
	}
}

func TestFindUnscoredSorting(t *testing.T) {
	checkpoints := []checkpoint{
		{RunID: "r1-a", Label: "a @300", Dirname: "a", Iteration: 300},
		{RunID: "r1-b", Label: "b @100", Dirname: "b", Iteration: 100},
		{RunID: "r1-a", Label: "a @100", Dirname: "a", Iteration: 100},
	}

	scored := make(map[[2]string]bool)
	unscored := findUnscored(checkpoints, scored)

	if len(unscored) != 3 {
		t.Fatalf("expected 3 unscored, got %d", len(unscored))
	}
	// Should be sorted by dirname then iteration.
	if unscored[0].Label != "a @100" {
		t.Errorf("first should be a @100, got %s", unscored[0].Label)
	}
	if unscored[1].Label != "a @300" {
		t.Errorf("second should be a @300, got %s", unscored[1].Label)
	}
	if unscored[2].Label != "b @100" {
		t.Errorf("third should be b @100, got %s", unscored[2].Label)
	}
}

func TestRunCapabilityProbes(t *testing.T) {
	// Mock an OpenAI-compatible API that returns correct answers.
	answers := map[string]string{
		"What is 347":     "The answer is 10063.",
		"A store sells":   "You get $28.75 in change.",
		"Solve for x":     "x = -12",
		"If f(x)":         "f(4) = 21",
		"A bag has":       "The probability is 1/2 or 0.5",
		"A circle has":    "The area is 153.94 cm²",
		"next number":     "The next number is 162.",
		"laptop costs":    "The final price is $612.",
		"All cats":        "Yes, a cat needs water.",
		"If it rains":     "No, we cannot conclude that.",
		"room of 30":      "The minimum is 3 people sharing a birth month.",
		"farmer needs":    "Take the chicken first.",
		"class of 40":     "5 students play neither.",
		"Book is to":      "eating",
		"car won't start": "The starter motor is faulty.",
		"facing north":    "You are facing south.",
		"Event A":         "Event C happened in 1991.",
		"APPLE = 50":      "CAT = 24",
		"Python code":     "[2, 3]",
		"def f(n)":        "The output is 8.",
		"code has a bug":  "ZeroDivisionError when empty list.",
		"train travels":   "It takes 3 hours.",
		"twice as many":   "There are 7 children.",
	}

	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		var req ChatRequest
		json.NewDecoder(r.Body).Decode(&req)

		prompt := ""
		for _, m := range req.Messages {
			if m.Role == "user" {
				prompt = m.Content
				break
			}
		}

		response := "I don't know."
		for prefix, ans := range answers {
			if strings.Contains(prompt, prefix) {
				response = ans
				break
			}
		}

		json.NewEncoder(w).Encode(ChatResponse{
			Choices: []Choice{{Message: Message{Role: "assistant", Content: response}}},
		})
	}))
	defer server.Close()

	client := NewClient(server.URL, "test-model")
	client.MaxTokens = 500

	results := runCapabilityProbes(client)

	if results.Total != 23 {
		t.Errorf("expected 23 total probes, got %d", results.Total)
	}
	if results.Correct != 23 {
		t.Errorf("expected 23 correct, got %d (accuracy: %.1f%%)", results.Correct, results.Accuracy)
	}
	if results.Accuracy != 100.0 {
		t.Errorf("expected 100%% accuracy, got %.1f%%", results.Accuracy)
	}
}

func TestPushCapabilityResults(t *testing.T) {
	var writtenLines []string

	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.URL.Path == "/api/v3/write_lp" {
			body := make([]byte, r.ContentLength)
			r.Body.Read(body)
			writtenLines = strings.Split(strings.TrimSpace(string(body)), "\n")
			w.WriteHeader(http.StatusNoContent)
		}
	}))
	defer server.Close()

	influx := &InfluxClient{url: server.URL, db: "test", token: "t"}

	cp := checkpoint{
		ModelTag:  "deepseek-r1-7b",
		RunID:     "r1-sov-capability-auto",
		Label:     "R1-sov @100",
		Iteration: 100,
	}

	results := probeResult{
		Accuracy: 87.0,
		Correct:  20,
		Total:    23,
		ByCategory: map[string]categoryResult{
			"arithmetic": {Correct: 2, Total: 2},
			"code":       {Correct: 2, Total: 3},
		},
		Probes: map[string]singleProbeResult{
			"math_01": {Passed: true, Response: "10063"},
			"math_02": {Passed: true, Response: "28.75"},
			"code_03": {Passed: false, Response: "I'm not sure."},
		},
	}

	err := pushCapabilityResults(influx, cp, results)
	if err != nil {
		t.Fatalf("push failed: %v", err)
	}

	// 1 overall + 2 categories + 3 probes = 6 lines.
	if len(writtenLines) != 6 {
		t.Errorf("expected 6 lines, got %d", len(writtenLines))
		for i, l := range writtenLines {
			t.Logf("  line %d: %s", i, l)
		}
	}

	// Check overall line.
	if !strings.HasPrefix(writtenLines[0], "capability_score,") {
		t.Errorf("first line should be capability_score, got: %s", writtenLines[0])
	}
	if !strings.Contains(writtenLines[0], "category=overall") {
		t.Errorf("first line should have category=overall, got: %s", writtenLines[0])
	}
	if !strings.Contains(writtenLines[0], "accuracy=87.0") {
		t.Errorf("first line should have accuracy=87.0, got: %s", writtenLines[0])
	}
}

func TestBufferAndReplay(t *testing.T) {
	tmpDir := t.TempDir()

	cp := checkpoint{
		ModelTag:  "test-model",
		RunID:     "test-run",
		Label:     "test @100",
		Iteration: 100,
	}
	results := probeResult{
		Accuracy: 50.0,
		Correct:  1,
		Total:    2,
		ByCategory: map[string]categoryResult{
			"arithmetic": {Correct: 1, Total: 2},
		},
		Probes: map[string]singleProbeResult{
			"math_01": {Passed: true, Response: "10063"},
			"math_02": {Passed: false, Response: "wrong"},
		},
	}

	// Buffer a result.
	bufferInfluxResult(tmpDir, cp, results)

	// Verify buffer file exists.
	bufPath := filepath.Join(tmpDir, "influx_buffer.jsonl")
	data, err := os.ReadFile(bufPath)
	if err != nil {
		t.Fatalf("buffer file not created: %v", err)
	}
	if !strings.Contains(string(data), "test-run") {
		t.Errorf("buffer should contain run_id, got: %s", string(data))
	}

	// Parse it.
	var entry bufferEntry
	if err := json.Unmarshal(data, &entry); err != nil {
		t.Fatalf("parse buffer entry: %v", err)
	}
	if entry.Checkpoint.RunID != "test-run" {
		t.Errorf("expected run_id=test-run, got %s", entry.Checkpoint.RunID)
	}

	// Replay to a working InfluxDB.
	replayCount := 0
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.URL.Path == "/api/v3/write_lp" {
			replayCount++
			w.WriteHeader(http.StatusNoContent)
		}
	}))
	defer server.Close()

	influx := &InfluxClient{url: server.URL, db: "test", token: "t"}
	replayInfluxBuffer(tmpDir, influx)

	if replayCount == 0 {
		t.Error("expected replay to push to InfluxDB")
	}

	// Buffer should be cleared.
	if _, err := os.Stat(bufPath); !os.IsNotExist(err) {
		t.Error("buffer file should be removed after successful replay")
	}
}

func TestEnvOr(t *testing.T) {
	// Test with env var set.
	key := fmt.Sprintf("TEST_ENV_%d", os.Getpid())
	os.Setenv(key, "value")
	defer os.Unsetenv(key)

	if got := envOr(key, "fallback"); got != "value" {
		t.Errorf("envOr(%s) = %q, want %q", key, got, "value")
	}

	if got := envOr("NONEXISTENT_"+key, "fallback"); got != "fallback" {
		t.Errorf("envOr(nonexistent) = %q, want %q", got, "fallback")
	}
}

func TestFileBase(t *testing.T) {
	tests := []struct {
		input, want string
	}{
		{"/foo/bar/baz.txt", "baz.txt"},
		{"baz.txt", "baz.txt"},
		{"/a/b/c", "c"},
		{"", ""},
	}
	for _, tt := range tests {
		if got := fileBase(tt.input); got != tt.want {
			t.Errorf("fileBase(%q) = %q, want %q", tt.input, got, tt.want)
		}
	}
}