LEM/pkg/lem/features.go
Snider c701c2e0af feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined)
- Add KDTree ScoreIndex with cosine distance for probe clustering
- Add score distribution analytics (percentiles, variance, skewness)
- Add grammar-profile dedup filtering to distill pipeline
- Add spatial gap detection (FindGaps) for coverage analysis
- Wire analytics into coverage CLI (PrintScoreAnalytics)

New files: features.go, cluster.go, analytics.go + tests
Modified: distill.go (dedup filter), coverage.go (analytics output)
Dep: github.com/Snider/Poindexter

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:26:06 +00:00

70 lines
2 KiB
Go

package lem
// GrammarFeatures extracts a 6-dimensional feature vector from a GrammarScore.
// Order: VocabRichness, TenseEntropy, QuestionRatio, DomainDepth, VerbDiversity, NounDiversity.
// Composite is excluded — it's a derived weighted sum, not an independent feature.
func GrammarFeatures(gs GrammarScore) []float64 {
return []float64{
gs.VocabRichness,
gs.TenseEntropy,
gs.QuestionRatio,
float64(gs.DomainDepth),
float64(gs.VerbDiversity),
float64(gs.NounDiversity),
}
}
// GrammarFeatureLabels returns axis labels matching GrammarFeatures order.
func GrammarFeatureLabels() []string {
return []string{
"vocab_richness",
"tense_entropy",
"question_ratio",
"domain_depth",
"verb_diversity",
"noun_diversity",
}
}
// HeuristicFeatures extracts an 8-dimensional feature vector from HeuristicScores.
// Order: ComplianceMarkers, FormulaicPreamble, FirstPerson, CreativeForm,
//
// EngagementDepth, EmotionalRegister, Degeneration, EmptyBroken.
//
// LEKScore is excluded — it's a derived weighted sum.
func HeuristicFeatures(hs HeuristicScores) []float64 {
return []float64{
float64(hs.ComplianceMarkers),
float64(hs.FormulaicPreamble),
float64(hs.FirstPerson),
float64(hs.CreativeForm),
float64(hs.EngagementDepth),
float64(hs.EmotionalRegister),
float64(hs.Degeneration),
float64(hs.EmptyBroken),
}
}
// HeuristicFeatureLabels returns axis labels matching HeuristicFeatures order.
func HeuristicFeatureLabels() []string {
return []string{
"compliance_markers",
"formulaic_preamble",
"first_person",
"creative_form",
"engagement_depth",
"emotional_register",
"degeneration",
"empty_broken",
}
}
// CombinedFeatures concatenates grammar (6D) and heuristic (8D) into a 14D vector.
func CombinedFeatures(gs GrammarScore, hs HeuristicScores) []float64 {
return append(GrammarFeatures(gs), HeuristicFeatures(hs)...)
}
// CombinedFeatureLabels returns axis labels for the 14D combined vector.
func CombinedFeatureLabels() []string {
return append(GrammarFeatureLabels(), HeuristicFeatureLabels()...)
}