LEM/pkg/lem/exact_test.go
Claude 70dd18c065
refactor: move Go library to pkg/lem, thin main.go
All scoring/influx/export/expand logic moves to pkg/lem as an
importable package. main.go is now a thin CLI dispatcher.

This lets new commands import the shared library directly —
ready for converting Python scripts to Go subcommands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:30:09 +00:00

109 lines
2.5 KiB
Go

package lem
import "testing"
func TestScoreGSM8K(t *testing.T) {
tests := []struct {
name string
response string
correctAnswer string
wantCorrect bool
wantExtracted string
}{
{
name: "hash delimiter correct",
response: "The answer is #### 42",
correctAnswer: "42",
wantCorrect: true,
wantExtracted: "42",
},
{
name: "last number match correct",
response: "Let me calculate... the result is 42.0",
correctAnswer: "42",
wantCorrect: true,
wantExtracted: "42.0",
},
{
name: "last number incorrect",
response: "I think it's 43",
correctAnswer: "42",
wantCorrect: false,
wantExtracted: "43",
},
{
name: "comma separated correct",
response: "#### 1,234",
correctAnswer: "1234",
wantCorrect: true,
wantExtracted: "1,234",
},
{
name: "no numbers",
response: "No numbers here",
correctAnswer: "5",
wantCorrect: false,
wantExtracted: "",
},
{
name: "empty response",
response: "",
correctAnswer: "5",
wantCorrect: false,
wantExtracted: "",
},
{
name: "error response",
response: "ERROR: model timeout",
correctAnswer: "10",
wantCorrect: false,
wantExtracted: "",
},
{
name: "multiple numbers picks last",
response: "First 10, then 20, finally 30",
correctAnswer: "30",
wantCorrect: true,
wantExtracted: "30",
},
{
name: "negative number",
response: "The answer is #### -5",
correctAnswer: "-5",
wantCorrect: true,
wantExtracted: "-5",
},
{
name: "decimal answer",
response: "Result = 3.14",
correctAnswer: "3.14",
wantCorrect: true,
wantExtracted: "3.14",
},
{
name: "hash takes priority over last number",
response: "Steps: 10 + 20 = 30 #### 30 and some trailing 99",
correctAnswer: "30",
wantCorrect: true,
wantExtracted: "30",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
scores := scoreGSM8K(tt.response, tt.correctAnswer)
if scores.Correct == nil {
t.Fatal("Correct field is nil")
}
if *scores.Correct != tt.wantCorrect {
t.Errorf("correct = %v, want %v", *scores.Correct, tt.wantCorrect)
}
if scores.Extracted != tt.wantExtracted {
t.Errorf("extracted = %q, want %q", scores.Extracted, tt.wantExtracted)
}
if scores.Expected != tt.correctAnswer {
t.Errorf("expected = %q, want %q", scores.Expected, tt.correctAnswer)
}
})
}
}