- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
70 lines
2 KiB
Go
70 lines
2 KiB
Go
package lem
|
|
|
|
// GrammarFeatures extracts a 6-dimensional feature vector from a GrammarScore.
|
|
// Order: VocabRichness, TenseEntropy, QuestionRatio, DomainDepth, VerbDiversity, NounDiversity.
|
|
// Composite is excluded — it's a derived weighted sum, not an independent feature.
|
|
func GrammarFeatures(gs GrammarScore) []float64 {
|
|
return []float64{
|
|
gs.VocabRichness,
|
|
gs.TenseEntropy,
|
|
gs.QuestionRatio,
|
|
float64(gs.DomainDepth),
|
|
float64(gs.VerbDiversity),
|
|
float64(gs.NounDiversity),
|
|
}
|
|
}
|
|
|
|
// GrammarFeatureLabels returns axis labels matching GrammarFeatures order.
|
|
func GrammarFeatureLabels() []string {
|
|
return []string{
|
|
"vocab_richness",
|
|
"tense_entropy",
|
|
"question_ratio",
|
|
"domain_depth",
|
|
"verb_diversity",
|
|
"noun_diversity",
|
|
}
|
|
}
|
|
|
|
// HeuristicFeatures extracts an 8-dimensional feature vector from HeuristicScores.
|
|
// Order: ComplianceMarkers, FormulaicPreamble, FirstPerson, CreativeForm,
|
|
//
|
|
// EngagementDepth, EmotionalRegister, Degeneration, EmptyBroken.
|
|
//
|
|
// LEKScore is excluded — it's a derived weighted sum.
|
|
func HeuristicFeatures(hs HeuristicScores) []float64 {
|
|
return []float64{
|
|
float64(hs.ComplianceMarkers),
|
|
float64(hs.FormulaicPreamble),
|
|
float64(hs.FirstPerson),
|
|
float64(hs.CreativeForm),
|
|
float64(hs.EngagementDepth),
|
|
float64(hs.EmotionalRegister),
|
|
float64(hs.Degeneration),
|
|
float64(hs.EmptyBroken),
|
|
}
|
|
}
|
|
|
|
// HeuristicFeatureLabels returns axis labels matching HeuristicFeatures order.
|
|
func HeuristicFeatureLabels() []string {
|
|
return []string{
|
|
"compliance_markers",
|
|
"formulaic_preamble",
|
|
"first_person",
|
|
"creative_form",
|
|
"engagement_depth",
|
|
"emotional_register",
|
|
"degeneration",
|
|
"empty_broken",
|
|
}
|
|
}
|
|
|
|
// CombinedFeatures concatenates grammar (6D) and heuristic (8D) into a 14D vector.
|
|
func CombinedFeatures(gs GrammarScore, hs HeuristicScores) []float64 {
|
|
return append(GrammarFeatures(gs), HeuristicFeatures(hs)...)
|
|
}
|
|
|
|
// CombinedFeatureLabels returns axis labels for the 14D combined vector.
|
|
func CombinedFeatureLabels() []string {
|
|
return append(GrammarFeatureLabels(), HeuristicFeatureLabels()...)
|
|
}
|