LEM/pkg/lem/coverage_test.go
Snider c701c2e0af feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined)
- Add KDTree ScoreIndex with cosine distance for probe clustering
- Add score distribution analytics (percentiles, variance, skewness)
- Add grammar-profile dedup filtering to distill pipeline
- Add spatial gap detection (FindGaps) for coverage analysis
- Wire analytics into coverage CLI (PrintScoreAnalytics)

New files: features.go, cluster.go, analytics.go + tests
Modified: distill.go (dedup filter), coverage.go (analytics output)
Dep: github.com/Snider/Poindexter

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:26:06 +00:00

82 lines
3.2 KiB
Go

package lem
import (
"testing"
)
func TestFindGaps_UniformCoverage(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 20, NounDiversity: 25}},
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 9, VerbDiversity: 30, NounDiversity: 35}},
}
gaps := FindGaps(entries, 3)
if gaps == nil {
t.Fatal("expected non-nil gaps")
}
if len(gaps) == 0 {
t.Error("expected some gap reports")
}
}
func TestFindGaps_ClusteredData(t *testing.T) {
// All entries clustered in one corner — grid probes far from cluster should show gaps.
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.10, TenseEntropy: 0.50, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.11, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.12, TenseEntropy: 0.52, QuestionRatio: 0.12, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
}
gaps := FindGaps(entries, 2)
if len(gaps) == 0 {
t.Error("expected gaps in clustered data")
}
// Top gap should have positive distance.
if gaps[0].AvgDistance <= 0 {
t.Error("expected positive distance for worst gap")
}
}
func TestFindGaps_SortedByWorst(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.5, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 30, NounDiversity: 40}},
}
gaps := FindGaps(entries, 2)
if len(gaps) < 2 {
t.Fatalf("expected at least 2 gaps, got %d", len(gaps))
}
// Descending order.
if gaps[0].AvgDistance < gaps[len(gaps)-1].AvgDistance {
t.Error("expected gaps sorted descending by AvgDistance")
}
}
func TestFindGaps_TooFewEntries(t *testing.T) {
entries := []ScoredEntry{
{ID: "solo", Grammar: GrammarScore{VocabRichness: 0.1}},
}
gaps := FindGaps(entries, 1)
if gaps != nil {
t.Error("expected nil for single entry")
}
}
func TestGapReport_HasFields(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 35, NounDiversity: 45}},
}
gaps := FindGaps(entries, 1)
for _, g := range gaps {
if g.AvgDistance < 0 {
t.Error("AvgDistance should be non-negative")
}
if len(g.Probe) != 6 {
t.Errorf("Probe should be 6D, got %d", len(g.Probe))
}
if len(g.NearestIDs) == 0 {
t.Error("NearestIDs should not be empty")
}
}
}