- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
62 lines
1.9 KiB
Go
62 lines
1.9 KiB
Go
package lem
|
|
|
|
import (
|
|
poindexter "github.com/Snider/Poindexter"
|
|
)
|
|
|
|
// ScoreDistribution wraps Poindexter's DistributionStats for LEM score populations.
|
|
type ScoreDistribution = poindexter.DistributionStats
|
|
|
|
// GrammarAxisStats wraps Poindexter's AxisDistribution for per-feature analysis.
|
|
type GrammarAxisStats = poindexter.AxisDistribution
|
|
|
|
// ComputeScoreDistribution calculates percentile/variance stats over grammar composites.
|
|
func ComputeScoreDistribution(scores []GrammarScore) ScoreDistribution {
|
|
vals := make([]float64, len(scores))
|
|
for i, s := range scores {
|
|
vals[i] = s.Composite
|
|
}
|
|
return poindexter.ComputeDistributionStats(vals)
|
|
}
|
|
|
|
// ComputeLEKDistribution calculates percentile/variance stats over LEK scores.
|
|
func ComputeLEKDistribution(scores []*HeuristicScores) ScoreDistribution {
|
|
vals := make([]float64, len(scores))
|
|
for i, s := range scores {
|
|
vals[i] = s.LEKScore
|
|
}
|
|
return poindexter.ComputeDistributionStats(vals)
|
|
}
|
|
|
|
// ComputeGrammarAxisStats returns per-axis distribution stats for grammar features.
|
|
func ComputeGrammarAxisStats(entries []ScoredEntry) []GrammarAxisStats {
|
|
points := make([]poindexter.KDPoint[ScoredEntry], len(entries))
|
|
for i, e := range entries {
|
|
points[i] = poindexter.KDPoint[ScoredEntry]{
|
|
ID: e.ID,
|
|
Coords: GrammarFeatures(e.Grammar),
|
|
Value: e,
|
|
}
|
|
}
|
|
return poindexter.ComputeAxisDistributions(points, GrammarFeatureLabels())
|
|
}
|
|
|
|
// SummaryReport holds aggregate analytics for a scored population.
|
|
type SummaryReport struct {
|
|
Total int
|
|
CompositeStats ScoreDistribution
|
|
AxisStats []GrammarAxisStats
|
|
}
|
|
|
|
// ScoreSummary computes a full analytics report from scored entries.
|
|
func ScoreSummary(entries []ScoredEntry) SummaryReport {
|
|
scores := make([]GrammarScore, len(entries))
|
|
for i, e := range entries {
|
|
scores[i] = e.Grammar
|
|
}
|
|
return SummaryReport{
|
|
Total: len(entries),
|
|
CompositeStats: ComputeScoreDistribution(scores),
|
|
AxisStats: ComputeGrammarAxisStats(entries),
|
|
}
|
|
}
|