- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
82 lines
3.2 KiB
Go
82 lines
3.2 KiB
Go
package lem
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestFindGaps_UniformCoverage(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 20, NounDiversity: 25}},
|
|
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 9, VerbDiversity: 30, NounDiversity: 35}},
|
|
}
|
|
gaps := FindGaps(entries, 3)
|
|
if gaps == nil {
|
|
t.Fatal("expected non-nil gaps")
|
|
}
|
|
if len(gaps) == 0 {
|
|
t.Error("expected some gap reports")
|
|
}
|
|
}
|
|
|
|
func TestFindGaps_ClusteredData(t *testing.T) {
|
|
// All entries clustered in one corner — grid probes far from cluster should show gaps.
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.10, TenseEntropy: 0.50, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.11, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.12, TenseEntropy: 0.52, QuestionRatio: 0.12, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
}
|
|
gaps := FindGaps(entries, 2)
|
|
if len(gaps) == 0 {
|
|
t.Error("expected gaps in clustered data")
|
|
}
|
|
// Top gap should have positive distance.
|
|
if gaps[0].AvgDistance <= 0 {
|
|
t.Error("expected positive distance for worst gap")
|
|
}
|
|
}
|
|
|
|
func TestFindGaps_SortedByWorst(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.5, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
|
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 30, NounDiversity: 40}},
|
|
}
|
|
gaps := FindGaps(entries, 2)
|
|
if len(gaps) < 2 {
|
|
t.Fatalf("expected at least 2 gaps, got %d", len(gaps))
|
|
}
|
|
// Descending order.
|
|
if gaps[0].AvgDistance < gaps[len(gaps)-1].AvgDistance {
|
|
t.Error("expected gaps sorted descending by AvgDistance")
|
|
}
|
|
}
|
|
|
|
func TestFindGaps_TooFewEntries(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "solo", Grammar: GrammarScore{VocabRichness: 0.1}},
|
|
}
|
|
gaps := FindGaps(entries, 1)
|
|
if gaps != nil {
|
|
t.Error("expected nil for single entry")
|
|
}
|
|
}
|
|
|
|
func TestGapReport_HasFields(t *testing.T) {
|
|
entries := []ScoredEntry{
|
|
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
|
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 35, NounDiversity: 45}},
|
|
}
|
|
gaps := FindGaps(entries, 1)
|
|
for _, g := range gaps {
|
|
if g.AvgDistance < 0 {
|
|
t.Error("AvgDistance should be non-negative")
|
|
}
|
|
if len(g.Probe) != 6 {
|
|
t.Errorf("Probe should be 6D, got %d", len(g.Probe))
|
|
}
|
|
if len(g.NearestIDs) == 0 {
|
|
t.Error("NearestIDs should not be empty")
|
|
}
|
|
}
|
|
}
|