- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
121 lines
2.7 KiB
Go
121 lines
2.7 KiB
Go
package lem
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestGrammarFeatures_Length(t *testing.T) {
|
|
gs := GrammarScore{
|
|
VocabRichness: 0.15,
|
|
TenseEntropy: 1.2,
|
|
QuestionRatio: 0.3,
|
|
DomainDepth: 5,
|
|
VerbDiversity: 12,
|
|
NounDiversity: 18,
|
|
Composite: 65.0,
|
|
}
|
|
vec := GrammarFeatures(gs)
|
|
if len(vec) != 6 {
|
|
t.Fatalf("expected 6 features, got %d", len(vec))
|
|
}
|
|
}
|
|
|
|
func TestGrammarFeatures_Values(t *testing.T) {
|
|
gs := GrammarScore{
|
|
VocabRichness: 0.15,
|
|
TenseEntropy: 1.2,
|
|
QuestionRatio: 0.3,
|
|
DomainDepth: 5,
|
|
VerbDiversity: 12,
|
|
NounDiversity: 18,
|
|
Composite: 65.0,
|
|
}
|
|
vec := GrammarFeatures(gs)
|
|
if vec[0] != 0.15 {
|
|
t.Errorf("vec[0] = %f, want 0.15", vec[0])
|
|
}
|
|
if vec[1] != 1.2 {
|
|
t.Errorf("vec[1] = %f, want 1.2", vec[1])
|
|
}
|
|
if vec[3] != 5.0 {
|
|
t.Errorf("vec[3] = %f, want 5.0 (DomainDepth)", vec[3])
|
|
}
|
|
}
|
|
|
|
func TestHeuristicFeatures_Length(t *testing.T) {
|
|
hs := HeuristicScores{
|
|
ComplianceMarkers: 2,
|
|
FormulaicPreamble: 1,
|
|
FirstPerson: 3,
|
|
CreativeForm: 4,
|
|
EngagementDepth: 5,
|
|
EmotionalRegister: 6,
|
|
Degeneration: 0,
|
|
EmptyBroken: 0,
|
|
LEKScore: 42.0,
|
|
}
|
|
vec := HeuristicFeatures(hs)
|
|
if len(vec) != 8 {
|
|
t.Fatalf("expected 8 features, got %d", len(vec))
|
|
}
|
|
}
|
|
|
|
func TestHeuristicFeatures_Values(t *testing.T) {
|
|
hs := HeuristicScores{
|
|
ComplianceMarkers: 2,
|
|
FormulaicPreamble: 1,
|
|
FirstPerson: 3,
|
|
CreativeForm: 4,
|
|
EngagementDepth: 5,
|
|
EmotionalRegister: 6,
|
|
Degeneration: 7,
|
|
EmptyBroken: 0,
|
|
}
|
|
vec := HeuristicFeatures(hs)
|
|
if vec[0] != 2.0 {
|
|
t.Errorf("vec[0] = %f, want 2.0 (ComplianceMarkers)", vec[0])
|
|
}
|
|
if vec[6] != 7.0 {
|
|
t.Errorf("vec[6] = %f, want 7.0 (Degeneration)", vec[6])
|
|
}
|
|
}
|
|
|
|
func TestCombinedFeatures_Length(t *testing.T) {
|
|
gs := GrammarScore{Composite: 50}
|
|
hs := HeuristicScores{LEKScore: 30}
|
|
vec := CombinedFeatures(gs, hs)
|
|
if len(vec) != 14 {
|
|
t.Fatalf("expected 14 features, got %d", len(vec))
|
|
}
|
|
}
|
|
|
|
func TestGrammarFeatureLabels(t *testing.T) {
|
|
labels := GrammarFeatureLabels()
|
|
if len(labels) != 6 {
|
|
t.Fatalf("expected 6 labels, got %d", len(labels))
|
|
}
|
|
if labels[0] != "vocab_richness" {
|
|
t.Errorf("labels[0] = %q, want vocab_richness", labels[0])
|
|
}
|
|
}
|
|
|
|
func TestHeuristicFeatureLabels(t *testing.T) {
|
|
labels := HeuristicFeatureLabels()
|
|
if len(labels) != 8 {
|
|
t.Fatalf("expected 8 labels, got %d", len(labels))
|
|
}
|
|
if labels[4] != "engagement_depth" {
|
|
t.Errorf("labels[4] = %q, want engagement_depth", labels[4])
|
|
}
|
|
}
|
|
|
|
func TestCombinedFeatureLabels(t *testing.T) {
|
|
labels := CombinedFeatureLabels()
|
|
if len(labels) != 14 {
|
|
t.Fatalf("expected 14 labels, got %d", len(labels))
|
|
}
|
|
// First 6 are grammar, next 8 are heuristic.
|
|
if labels[6] != "compliance_markers" {
|
|
t.Errorf("labels[6] = %q, want compliance_markers", labels[6])
|
|
}
|
|
}
|