1
0
Fork 0
forked from lthn/LEM
LEM/exact.go
Claude e0d352c803
feat: add Go lem CLI and scoring-agent scripts
Go lem CLI (stdlib + DuckDB) replaces scattered Python scripts:
- score: heuristic regex + LLM-as-judge scoring
- probe: generate responses then score
- compare: diff two score files
- status: InfluxDB training/generation progress
- export: golden set to training JSONL splits
- expand: distributed expansion via API + InfluxDB coordination

New scripts from Feb 14 creative session:
- scoring_agent.py: ROCm daemon that auto-scores checkpoints
- probes.py: 23 binary pass/fail capability probes
- convert_adapter.py: MLX to PEFT adapter conversion
- score_r1_capability.py: DeepSeek R1 checkpoint scoring
- lek_content_scorer.py: 6-dimension ethics content scorer
- lem_train_15k.py: InfluxDB-coordinated training script
- pipeline.py: DuckDB pipeline (seeds, golden set, expansion)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:22:13 +00:00

76 lines
1.8 KiB
Go

package main
import (
"math"
"regexp"
"strconv"
"strings"
)
// Pre-compiled regex patterns for GSM8K answer extraction.
var (
// hashAnswer matches the #### delimiter pattern used in GSM8K.
hashAnswer = regexp.MustCompile(`####\s*([\d,.\-]+)`)
// lastNumber matches the last number in a response.
lastNumber = regexp.MustCompile(`(?:^|[\s=])(-?[\d,]+(?:\.\d+)?)`)
)
// scoreGSM8K extracts a numeric answer from a model response and compares
// it to the correct answer using exact match (within epsilon of 0.01).
func scoreGSM8K(response, correctAnswer string) *StandardScores {
correct := false
// Empty or error response.
if response == "" || strings.HasPrefix(response, "ERROR") {
return &StandardScores{
Correct: &correct,
Extracted: "",
Expected: correctAnswer,
}
}
// Try #### delimiter first.
var extracted string
if m := hashAnswer.FindStringSubmatch(response); len(m) > 1 {
extracted = m[1]
} else {
// Find the last number in the response.
matches := lastNumber.FindAllStringSubmatch(response, -1)
if len(matches) > 0 {
extracted = matches[len(matches)-1][1]
}
}
// No number found.
if extracted == "" {
return &StandardScores{
Correct: &correct,
Extracted: "",
Expected: correctAnswer,
}
}
// Clean commas and parse both numbers.
cleanExtracted := strings.ReplaceAll(extracted, ",", "")
cleanExpected := strings.ReplaceAll(correctAnswer, ",", "")
extVal, errExt := strconv.ParseFloat(cleanExtracted, 64)
expVal, errExp := strconv.ParseFloat(cleanExpected, 64)
if errExt != nil || errExp != nil {
return &StandardScores{
Correct: &correct,
Extracted: extracted,
Expected: correctAnswer,
}
}
correct = math.Abs(expVal-extVal) < 0.01
return &StandardScores{
Correct: &correct,
Extracted: extracted,
Expected: correctAnswer,
}
}