LEM/pkg/lem/cluster_test.go
Snider c701c2e0af feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined)
- Add KDTree ScoreIndex with cosine distance for probe clustering
- Add score distribution analytics (percentiles, variance, skewness)
- Add grammar-profile dedup filtering to distill pipeline
- Add spatial gap detection (FindGaps) for coverage analysis
- Wire analytics into coverage CLI (PrintScoreAnalytics)

New files: features.go, cluster.go, analytics.go + tests
Modified: distill.go (dedup filter), coverage.go (analytics output)
Dep: github.com/Snider/Poindexter

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:26:06 +00:00

163 lines
6.3 KiB
Go

package lem
import (
"testing"
)
func TestNewScoreIndex_Empty(t *testing.T) {
idx, err := NewScoreIndex(nil)
if err == nil {
t.Fatal("expected error for nil input")
}
if idx != nil {
t.Fatal("expected nil index")
}
}
func TestNewScoreIndex_Build(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.4, DomainDepth: 7, VerbDiversity: 20, NounDiversity: 25}},
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if idx.Len() != 3 {
t.Fatalf("expected 3 points, got %d", idx.Len())
}
}
func TestScoreIndex_Nearest(t *testing.T) {
entries := []ScoredEntry{
{ID: "low", Grammar: GrammarScore{VocabRichness: 0.05, TenseEntropy: 0.2, QuestionRatio: 0.1, DomainDepth: 1, VerbDiversity: 5, NounDiversity: 5}},
{ID: "mid", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
{ID: "high", Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 10, VerbDiversity: 30, NounDiversity: 35}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
query := GrammarFeatures(GrammarScore{VocabRichness: 0.14, TenseEntropy: 0.7, QuestionRatio: 0.28, DomainDepth: 4, VerbDiversity: 14, NounDiversity: 18})
nearest, dist, ok := idx.Nearest(query)
if !ok {
t.Fatal("expected a nearest match")
}
if nearest.ID != "mid" {
t.Errorf("nearest = %q, want mid", nearest.ID)
}
if dist < 0 {
t.Errorf("distance should be non-negative, got %f", dist)
}
}
func TestScoreIndex_KNearest(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 0.6, QuestionRatio: 0.2, DomainDepth: 4, VerbDiversity: 10, NounDiversity: 15}},
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 0.9, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 15, NounDiversity: 22}},
{ID: "d", Grammar: GrammarScore{VocabRichness: 0.4, TenseEntropy: 1.2, QuestionRatio: 0.4, DomainDepth: 8, VerbDiversity: 20, NounDiversity: 30}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
query := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.45, QuestionRatio: 0.15, DomainDepth: 3, VerbDiversity: 7, NounDiversity: 11})
results, dists := idx.KNearest(query, 2)
if len(results) != 2 {
t.Fatalf("expected 2 results, got %d", len(results))
}
if len(dists) != 2 {
t.Fatalf("expected 2 distances, got %d", len(dists))
}
}
func TestScoreIndex_Radius(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.21, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
{ID: "far", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
query := GrammarFeatures(GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15})
results, _ := idx.Radius(query, 0.01)
// "a" and "b" should be within radius, "far" should not.
if len(results) < 1 {
t.Errorf("expected at least 1 result within radius, got %d", len(results))
}
}
func TestIsDuplicate_HighSimilarity(t *testing.T) {
entries := []ScoredEntry{
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
nearDup := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20})
if !idx.IsDuplicate(nearDup, 0.05) {
t.Error("expected near-identical vector to be flagged as duplicate")
}
}
func TestIsDuplicate_LowSimilarity(t *testing.T) {
// High vocab/tense, low verb/noun — one angular profile.
entries := []ScoredEntry{
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 1, VerbDiversity: 2, NounDiversity: 3}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Low vocab/tense, high verb/noun — genuinely different angular profile.
different := GrammarFeatures(GrammarScore{VocabRichness: 0.01, TenseEntropy: 0.05, QuestionRatio: 0.01, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50})
if idx.IsDuplicate(different, 0.05) {
t.Error("expected different angular profile to NOT be flagged as duplicate")
}
}
func TestScoreIndex_Insert(t *testing.T) {
entries := []ScoredEntry{
{ID: "seed", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
err = idx.Insert(ScoredEntry{
ID: "new",
Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.2, QuestionRatio: 0.5, DomainDepth: 8, VerbDiversity: 22, NounDiversity: 30},
})
if err != nil {
t.Fatalf("insert error: %v", err)
}
if idx.Len() != 2 {
t.Fatalf("expected 2 entries, got %d", idx.Len())
}
}
func TestScoreIndex_Points(t *testing.T) {
entries := []ScoredEntry{
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1}},
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2}},
}
idx, err := NewScoreIndex(entries)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
pts := idx.Points()
if len(pts) != 2 {
t.Fatalf("expected 2 points, got %d", len(pts))
}
}