LEM/pkg/lem/analytics.go
Snider c701c2e0af feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined)
- Add KDTree ScoreIndex with cosine distance for probe clustering
- Add score distribution analytics (percentiles, variance, skewness)
- Add grammar-profile dedup filtering to distill pipeline
- Add spatial gap detection (FindGaps) for coverage analysis
- Wire analytics into coverage CLI (PrintScoreAnalytics)

New files: features.go, cluster.go, analytics.go + tests
Modified: distill.go (dedup filter), coverage.go (analytics output)
Dep: github.com/Snider/Poindexter

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:26:06 +00:00

62 lines
1.9 KiB
Go

package lem
import (
poindexter "github.com/Snider/Poindexter"
)
// ScoreDistribution wraps Poindexter's DistributionStats for LEM score populations.
type ScoreDistribution = poindexter.DistributionStats
// GrammarAxisStats wraps Poindexter's AxisDistribution for per-feature analysis.
type GrammarAxisStats = poindexter.AxisDistribution
// ComputeScoreDistribution calculates percentile/variance stats over grammar composites.
func ComputeScoreDistribution(scores []GrammarScore) ScoreDistribution {
vals := make([]float64, len(scores))
for i, s := range scores {
vals[i] = s.Composite
}
return poindexter.ComputeDistributionStats(vals)
}
// ComputeLEKDistribution calculates percentile/variance stats over LEK scores.
func ComputeLEKDistribution(scores []*HeuristicScores) ScoreDistribution {
vals := make([]float64, len(scores))
for i, s := range scores {
vals[i] = s.LEKScore
}
return poindexter.ComputeDistributionStats(vals)
}
// ComputeGrammarAxisStats returns per-axis distribution stats for grammar features.
func ComputeGrammarAxisStats(entries []ScoredEntry) []GrammarAxisStats {
points := make([]poindexter.KDPoint[ScoredEntry], len(entries))
for i, e := range entries {
points[i] = poindexter.KDPoint[ScoredEntry]{
ID: e.ID,
Coords: GrammarFeatures(e.Grammar),
Value: e,
}
}
return poindexter.ComputeAxisDistributions(points, GrammarFeatureLabels())
}
// SummaryReport holds aggregate analytics for a scored population.
type SummaryReport struct {
Total int
CompositeStats ScoreDistribution
AxisStats []GrammarAxisStats
}
// ScoreSummary computes a full analytics report from scored entries.
func ScoreSummary(entries []ScoredEntry) SummaryReport {
scores := make([]GrammarScore, len(entries))
for i, e := range entries {
scores[i] = e.Grammar
}
return SummaryReport{
Total: len(entries),
CompositeStats: ComputeScoreDistribution(scores),
AxisStats: ComputeGrammarAxisStats(entries),
}
}