- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
163 lines
6.3 KiB
Go
163 lines
6.3 KiB
Go
package lem
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestNewScoreIndex_Empty(t *testing.T) {
|
|
idx, err := NewScoreIndex(nil)
|
|
if err == nil {
|
|
t.Fatal("expected error for nil input")
|
|
}
|
|
if idx != nil {
|
|
t.Fatal("expected nil index")
|
|
}
|
|
}
|
|
|
|
func TestNewScoreIndex_Build(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.4, DomainDepth: 7, VerbDiversity: 20, NounDiversity: 25}},
|
|
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if idx.Len() != 3 {
|
|
t.Fatalf("expected 3 points, got %d", idx.Len())
|
|
}
|
|
}
|
|
|
|
func TestScoreIndex_Nearest(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "low", Grammar: GrammarScore{VocabRichness: 0.05, TenseEntropy: 0.2, QuestionRatio: 0.1, DomainDepth: 1, VerbDiversity: 5, NounDiversity: 5}},
|
|
{ID: "mid", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
|
{ID: "high", Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 10, VerbDiversity: 30, NounDiversity: 35}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
query := GrammarFeatures(GrammarScore{VocabRichness: 0.14, TenseEntropy: 0.7, QuestionRatio: 0.28, DomainDepth: 4, VerbDiversity: 14, NounDiversity: 18})
|
|
nearest, dist, ok := idx.Nearest(query)
|
|
if !ok {
|
|
t.Fatal("expected a nearest match")
|
|
}
|
|
if nearest.ID != "mid" {
|
|
t.Errorf("nearest = %q, want mid", nearest.ID)
|
|
}
|
|
if dist < 0 {
|
|
t.Errorf("distance should be non-negative, got %f", dist)
|
|
}
|
|
}
|
|
|
|
func TestScoreIndex_KNearest(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 0.6, QuestionRatio: 0.2, DomainDepth: 4, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 0.9, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 15, NounDiversity: 22}},
|
|
{ID: "d", Grammar: GrammarScore{VocabRichness: 0.4, TenseEntropy: 1.2, QuestionRatio: 0.4, DomainDepth: 8, VerbDiversity: 20, NounDiversity: 30}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
query := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.45, QuestionRatio: 0.15, DomainDepth: 3, VerbDiversity: 7, NounDiversity: 11})
|
|
results, dists := idx.KNearest(query, 2)
|
|
if len(results) != 2 {
|
|
t.Fatalf("expected 2 results, got %d", len(results))
|
|
}
|
|
if len(dists) != 2 {
|
|
t.Fatalf("expected 2 distances, got %d", len(dists))
|
|
}
|
|
}
|
|
|
|
func TestScoreIndex_Radius(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.21, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "far", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
query := GrammarFeatures(GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15})
|
|
results, _ := idx.Radius(query, 0.01)
|
|
// "a" and "b" should be within radius, "far" should not.
|
|
if len(results) < 1 {
|
|
t.Errorf("expected at least 1 result within radius, got %d", len(results))
|
|
}
|
|
}
|
|
|
|
func TestIsDuplicate_HighSimilarity(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
nearDup := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20})
|
|
if !idx.IsDuplicate(nearDup, 0.05) {
|
|
t.Error("expected near-identical vector to be flagged as duplicate")
|
|
}
|
|
}
|
|
|
|
func TestIsDuplicate_LowSimilarity(t *testing.T) {
|
|
// High vocab/tense, low verb/noun — one angular profile.
|
|
entries := []ScoredEntry{
|
|
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 1, VerbDiversity: 2, NounDiversity: 3}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
// Low vocab/tense, high verb/noun — genuinely different angular profile.
|
|
different := GrammarFeatures(GrammarScore{VocabRichness: 0.01, TenseEntropy: 0.05, QuestionRatio: 0.01, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50})
|
|
if idx.IsDuplicate(different, 0.05) {
|
|
t.Error("expected different angular profile to NOT be flagged as duplicate")
|
|
}
|
|
}
|
|
|
|
func TestScoreIndex_Insert(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "seed", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
err = idx.Insert(ScoredEntry{
|
|
ID: "new",
|
|
Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.2, QuestionRatio: 0.5, DomainDepth: 8, VerbDiversity: 22, NounDiversity: 30},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("insert error: %v", err)
|
|
}
|
|
if idx.Len() != 2 {
|
|
t.Fatalf("expected 2 entries, got %d", idx.Len())
|
|
}
|
|
}
|
|
|
|
func TestScoreIndex_Points(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2}},
|
|
}
|
|
idx, err := NewScoreIndex(entries)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
pts := idx.Points()
|
|
if len(pts) != 2 {
|
|
t.Fatalf("expected 2 points, got %d", len(pts))
|
|
}
|
|
}
|