package lem import ( "testing" ) func TestNewScoreIndex_Empty(t *testing.T) { idx, err := NewScoreIndex(nil) if err == nil { t.Fatal("expected error for nil input") } if idx != nil { t.Fatal("expected nil index") } } func TestNewScoreIndex_Build(t *testing.T) { entries := []ScoredEntry{ {ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}}, {ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.4, DomainDepth: 7, VerbDiversity: 20, NounDiversity: 25}}, {ID: "c", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } if idx.Len() != 3 { t.Fatalf("expected 3 points, got %d", idx.Len()) } } func TestScoreIndex_Nearest(t *testing.T) { entries := []ScoredEntry{ {ID: "low", Grammar: GrammarScore{VocabRichness: 0.05, TenseEntropy: 0.2, QuestionRatio: 0.1, DomainDepth: 1, VerbDiversity: 5, NounDiversity: 5}}, {ID: "mid", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}}, {ID: "high", Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 10, VerbDiversity: 30, NounDiversity: 35}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } query := GrammarFeatures(GrammarScore{VocabRichness: 0.14, TenseEntropy: 0.7, QuestionRatio: 0.28, DomainDepth: 4, VerbDiversity: 14, NounDiversity: 18}) nearest, dist, ok := idx.Nearest(query) if !ok { t.Fatal("expected a nearest match") } if nearest.ID != "mid" { t.Errorf("nearest = %q, want mid", nearest.ID) } if dist < 0 { t.Errorf("distance should be non-negative, got %f", dist) } } func TestScoreIndex_KNearest(t *testing.T) { entries := []ScoredEntry{ {ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}}, {ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 0.6, QuestionRatio: 0.2, DomainDepth: 4, VerbDiversity: 10, NounDiversity: 15}}, {ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 0.9, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 15, NounDiversity: 22}}, {ID: "d", Grammar: GrammarScore{VocabRichness: 0.4, TenseEntropy: 1.2, QuestionRatio: 0.4, DomainDepth: 8, VerbDiversity: 20, NounDiversity: 30}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } query := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.45, QuestionRatio: 0.15, DomainDepth: 3, VerbDiversity: 7, NounDiversity: 11}) results, dists := idx.KNearest(query, 2) if len(results) != 2 { t.Fatalf("expected 2 results, got %d", len(results)) } if len(dists) != 2 { t.Fatalf("expected 2 distances, got %d", len(dists)) } } func TestScoreIndex_Radius(t *testing.T) { entries := []ScoredEntry{ {ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}}, {ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.21, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}}, {ID: "far", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } query := GrammarFeatures(GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}) results, _ := idx.Radius(query, 0.01) // "a" and "b" should be within radius, "far" should not. if len(results) < 1 { t.Errorf("expected at least 1 result within radius, got %d", len(results)) } } func TestIsDuplicate_HighSimilarity(t *testing.T) { entries := []ScoredEntry{ {ID: "existing", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } nearDup := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}) if !idx.IsDuplicate(nearDup, 0.05) { t.Error("expected near-identical vector to be flagged as duplicate") } } func TestIsDuplicate_LowSimilarity(t *testing.T) { // High vocab/tense, low verb/noun — one angular profile. entries := []ScoredEntry{ {ID: "existing", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 1, VerbDiversity: 2, NounDiversity: 3}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } // Low vocab/tense, high verb/noun — genuinely different angular profile. different := GrammarFeatures(GrammarScore{VocabRichness: 0.01, TenseEntropy: 0.05, QuestionRatio: 0.01, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50}) if idx.IsDuplicate(different, 0.05) { t.Error("expected different angular profile to NOT be flagged as duplicate") } } func TestScoreIndex_Insert(t *testing.T) { entries := []ScoredEntry{ {ID: "seed", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } err = idx.Insert(ScoredEntry{ ID: "new", Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.2, QuestionRatio: 0.5, DomainDepth: 8, VerbDiversity: 22, NounDiversity: 30}, }) if err != nil { t.Fatalf("insert error: %v", err) } if idx.Len() != 2 { t.Fatalf("expected 2 entries, got %d", idx.Len()) } } func TestScoreIndex_Points(t *testing.T) { entries := []ScoredEntry{ {ID: "a", Grammar: GrammarScore{VocabRichness: 0.1}}, {ID: "b", Grammar: GrammarScore{VocabRichness: 0.2}}, } idx, err := NewScoreIndex(entries) if err != nil { t.Fatalf("unexpected error: %v", err) } pts := idx.Points() if len(pts) != 2 { t.Fatalf("expected 2 points, got %d", len(pts)) } }