feat(lem): integrate Poindexter for spatial score indexing and analytics
- Add feature vector extraction (6D grammar, 8D heuristic, 14D combined) - Add KDTree ScoreIndex with cosine distance for probe clustering - Add score distribution analytics (percentiles, variance, skewness) - Add grammar-profile dedup filtering to distill pipeline - Add spatial gap detection (FindGaps) for coverage analysis - Wire analytics into coverage CLI (PrintScoreAnalytics) New files: features.go, cluster.go, analytics.go + tests Modified: distill.go (dedup filter), coverage.go (analytics output) Dep: github.com/Snider/Poindexter Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
f75458bce6
commit
c701c2e0af
11 changed files with 899 additions and 2 deletions
1
go.mod
1
go.mod
|
|
@ -7,6 +7,7 @@ require (
|
|||
forge.lthn.ai/core/go-i18n v0.0.1
|
||||
forge.lthn.ai/core/go-ml v0.0.1
|
||||
forge.lthn.ai/core/go-mlx v0.0.1
|
||||
github.com/Snider/Poindexter v0.0.0-20260104200422-91146b212a1f
|
||||
github.com/marcboeker/go-duckdb v1.8.5
|
||||
github.com/parquet-go/parquet-go v0.27.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
|
|
|
|||
2
go.sum
2
go.sum
|
|
@ -14,6 +14,8 @@ github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7Oputl
|
|||
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
||||
github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=
|
||||
github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE=
|
||||
github.com/Snider/Poindexter v0.0.0-20260104200422-91146b212a1f h1:+EnE414H9wUaBeUVNjyErusrxSbBGnGV6MBhTw/em0k=
|
||||
github.com/Snider/Poindexter v0.0.0-20260104200422-91146b212a1f/go.mod h1:nhgkbg4zWA4AS2Ga3RmcvdsyiI9TdxvSqe5EVBSb3Hk=
|
||||
github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZP/GkPY=
|
||||
github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
|
||||
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
|
||||
|
|
|
|||
62
pkg/lem/analytics.go
Normal file
62
pkg/lem/analytics.go
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
poindexter "github.com/Snider/Poindexter"
|
||||
)
|
||||
|
||||
// ScoreDistribution wraps Poindexter's DistributionStats for LEM score populations.
|
||||
type ScoreDistribution = poindexter.DistributionStats
|
||||
|
||||
// GrammarAxisStats wraps Poindexter's AxisDistribution for per-feature analysis.
|
||||
type GrammarAxisStats = poindexter.AxisDistribution
|
||||
|
||||
// ComputeScoreDistribution calculates percentile/variance stats over grammar composites.
|
||||
func ComputeScoreDistribution(scores []GrammarScore) ScoreDistribution {
|
||||
vals := make([]float64, len(scores))
|
||||
for i, s := range scores {
|
||||
vals[i] = s.Composite
|
||||
}
|
||||
return poindexter.ComputeDistributionStats(vals)
|
||||
}
|
||||
|
||||
// ComputeLEKDistribution calculates percentile/variance stats over LEK scores.
|
||||
func ComputeLEKDistribution(scores []*HeuristicScores) ScoreDistribution {
|
||||
vals := make([]float64, len(scores))
|
||||
for i, s := range scores {
|
||||
vals[i] = s.LEKScore
|
||||
}
|
||||
return poindexter.ComputeDistributionStats(vals)
|
||||
}
|
||||
|
||||
// ComputeGrammarAxisStats returns per-axis distribution stats for grammar features.
|
||||
func ComputeGrammarAxisStats(entries []ScoredEntry) []GrammarAxisStats {
|
||||
points := make([]poindexter.KDPoint[ScoredEntry], len(entries))
|
||||
for i, e := range entries {
|
||||
points[i] = poindexter.KDPoint[ScoredEntry]{
|
||||
ID: e.ID,
|
||||
Coords: GrammarFeatures(e.Grammar),
|
||||
Value: e,
|
||||
}
|
||||
}
|
||||
return poindexter.ComputeAxisDistributions(points, GrammarFeatureLabels())
|
||||
}
|
||||
|
||||
// SummaryReport holds aggregate analytics for a scored population.
|
||||
type SummaryReport struct {
|
||||
Total int
|
||||
CompositeStats ScoreDistribution
|
||||
AxisStats []GrammarAxisStats
|
||||
}
|
||||
|
||||
// ScoreSummary computes a full analytics report from scored entries.
|
||||
func ScoreSummary(entries []ScoredEntry) SummaryReport {
|
||||
scores := make([]GrammarScore, len(entries))
|
||||
for i, e := range entries {
|
||||
scores[i] = e.Grammar
|
||||
}
|
||||
return SummaryReport{
|
||||
Total: len(entries),
|
||||
CompositeStats: ComputeScoreDistribution(scores),
|
||||
AxisStats: ComputeGrammarAxisStats(entries),
|
||||
}
|
||||
}
|
||||
86
pkg/lem/analytics_test.go
Normal file
86
pkg/lem/analytics_test.go
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestComputeScoreDistribution(t *testing.T) {
|
||||
scores := []GrammarScore{
|
||||
{Composite: 30},
|
||||
{Composite: 45},
|
||||
{Composite: 55},
|
||||
{Composite: 60},
|
||||
{Composite: 75},
|
||||
{Composite: 80},
|
||||
{Composite: 90},
|
||||
}
|
||||
dist := ComputeScoreDistribution(scores)
|
||||
if dist.Count != 7 {
|
||||
t.Errorf("count = %d, want 7", dist.Count)
|
||||
}
|
||||
if dist.Min != 30 {
|
||||
t.Errorf("min = %f, want 30", dist.Min)
|
||||
}
|
||||
if dist.Max != 90 {
|
||||
t.Errorf("max = %f, want 90", dist.Max)
|
||||
}
|
||||
if dist.Mean < 50 || dist.Mean > 70 {
|
||||
t.Errorf("mean = %f, expected between 50 and 70", dist.Mean)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeLEKDistribution(t *testing.T) {
|
||||
scores := []*HeuristicScores{
|
||||
{LEKScore: 10},
|
||||
{LEKScore: 20},
|
||||
{LEKScore: 30},
|
||||
{LEKScore: 40},
|
||||
{LEKScore: 50},
|
||||
}
|
||||
dist := ComputeLEKDistribution(scores)
|
||||
if dist.Count != 5 {
|
||||
t.Errorf("count = %d, want 5", dist.Count)
|
||||
}
|
||||
if dist.Min != 10 {
|
||||
t.Errorf("min = %f, want 10", dist.Min)
|
||||
}
|
||||
if dist.Max != 50 {
|
||||
t.Errorf("max = %f, want 50", dist.Max)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeGrammarAxisStats(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.4, DomainDepth: 6, VerbDiversity: 20, NounDiversity: 25}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.6, DomainDepth: 9, VerbDiversity: 30, NounDiversity: 35}},
|
||||
}
|
||||
axes := ComputeGrammarAxisStats(entries)
|
||||
if len(axes) != 6 {
|
||||
t.Fatalf("expected 6 axes, got %d", len(axes))
|
||||
}
|
||||
if axes[0].Name != "vocab_richness" {
|
||||
t.Errorf("axes[0].Name = %q, want vocab_richness", axes[0].Name)
|
||||
}
|
||||
if axes[0].Stats.Count != 3 {
|
||||
t.Errorf("axes[0] count = %d, want 3", axes[0].Stats.Count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreSummary(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{Composite: 40, VocabRichness: 0.1}},
|
||||
{ID: "b", Grammar: GrammarScore{Composite: 60, VocabRichness: 0.2}},
|
||||
{ID: "c", Grammar: GrammarScore{Composite: 80, VocabRichness: 0.3}},
|
||||
}
|
||||
summary := ScoreSummary(entries)
|
||||
if summary.Total != 3 {
|
||||
t.Errorf("total = %d, want 3", summary.Total)
|
||||
}
|
||||
if summary.CompositeStats.Count != 3 {
|
||||
t.Errorf("composite count = %d, want 3", summary.CompositeStats.Count)
|
||||
}
|
||||
if len(summary.AxisStats) != 6 {
|
||||
t.Errorf("axis count = %d, want 6", len(summary.AxisStats))
|
||||
}
|
||||
}
|
||||
240
pkg/lem/cluster.go
Normal file
240
pkg/lem/cluster.go
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
poindexter "github.com/Snider/Poindexter"
|
||||
)
|
||||
|
||||
// ScoredEntry pairs a response ID with its grammar scores for indexing.
|
||||
type ScoredEntry struct {
|
||||
ID string
|
||||
Domain string
|
||||
Grammar GrammarScore
|
||||
}
|
||||
|
||||
// ScoreIndex wraps a Poindexter KDTree over grammar feature vectors.
|
||||
type ScoreIndex struct {
|
||||
tree *poindexter.KDTree[ScoredEntry]
|
||||
}
|
||||
|
||||
// NewScoreIndex builds a KDTree from scored entries using cosine distance
|
||||
// on 6D grammar feature vectors. Raw coordinates are used (no normalization)
|
||||
// because cosine distance is angle-based and handles magnitude differences.
|
||||
func NewScoreIndex(entries []ScoredEntry) (*ScoreIndex, error) {
|
||||
if len(entries) == 0 {
|
||||
return nil, errors.New("lem: no entries to index")
|
||||
}
|
||||
|
||||
points := make([]poindexter.KDPoint[ScoredEntry], len(entries))
|
||||
for i, e := range entries {
|
||||
points[i] = poindexter.KDPoint[ScoredEntry]{
|
||||
ID: e.ID,
|
||||
Coords: GrammarFeatures(e.Grammar),
|
||||
Value: e,
|
||||
}
|
||||
}
|
||||
|
||||
tree, err := poindexter.NewKDTree(points,
|
||||
poindexter.WithMetric(poindexter.CosineDistance{}),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("lem: build tree: %w", err)
|
||||
}
|
||||
|
||||
return &ScoreIndex{tree: tree}, nil
|
||||
}
|
||||
|
||||
// Len returns the number of indexed entries.
|
||||
func (idx *ScoreIndex) Len() int {
|
||||
return idx.tree.Len()
|
||||
}
|
||||
|
||||
// Nearest finds the closest scored entry to the query vector.
|
||||
func (idx *ScoreIndex) Nearest(query []float64) (ScoredEntry, float64, bool) {
|
||||
pt, dist, ok := idx.tree.Nearest(query)
|
||||
if !ok {
|
||||
return ScoredEntry{}, 0, false
|
||||
}
|
||||
return pt.Value, dist, true
|
||||
}
|
||||
|
||||
// KNearest finds the k closest scored entries to the query vector.
|
||||
func (idx *ScoreIndex) KNearest(query []float64, k int) ([]ScoredEntry, []float64) {
|
||||
pts, dists := idx.tree.KNearest(query, k)
|
||||
entries := make([]ScoredEntry, len(pts))
|
||||
for i, pt := range pts {
|
||||
entries[i] = pt.Value
|
||||
}
|
||||
return entries, dists
|
||||
}
|
||||
|
||||
// Radius finds all entries within distance r of the query vector.
|
||||
func (idx *ScoreIndex) Radius(query []float64, r float64) ([]ScoredEntry, []float64) {
|
||||
pts, dists := idx.tree.Radius(query, r)
|
||||
entries := make([]ScoredEntry, len(pts))
|
||||
for i, pt := range pts {
|
||||
entries[i] = pt.Value
|
||||
}
|
||||
return entries, dists
|
||||
}
|
||||
|
||||
// IsDuplicate returns true if any indexed entry is within threshold distance
|
||||
// of the query vector. Use during distill to reject near-identical outputs.
|
||||
func (idx *ScoreIndex) IsDuplicate(query []float64, threshold float64) bool {
|
||||
_, dist, ok := idx.tree.Nearest(query)
|
||||
return ok && dist <= threshold
|
||||
}
|
||||
|
||||
// Insert adds a new scored entry to the index.
|
||||
func (idx *ScoreIndex) Insert(entry ScoredEntry) error {
|
||||
features := GrammarFeatures(entry.Grammar)
|
||||
pt := poindexter.KDPoint[ScoredEntry]{
|
||||
ID: entry.ID,
|
||||
Coords: features,
|
||||
Value: entry,
|
||||
}
|
||||
if !idx.tree.Insert(pt) {
|
||||
return fmt.Errorf("lem: failed to insert %s (duplicate ID?)", entry.ID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Points returns all indexed entries.
|
||||
func (idx *ScoreIndex) Points() []ScoredEntry {
|
||||
pts := idx.tree.Points()
|
||||
entries := make([]ScoredEntry, len(pts))
|
||||
for i, pt := range pts {
|
||||
entries[i] = pt.Value
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
// featureRange holds the min/max for one axis.
|
||||
type featureRange struct{ min, max float64 }
|
||||
|
||||
// GapReport describes a region of quality-space with poor coverage.
|
||||
type GapReport struct {
|
||||
// Probe is the sample point coordinates in grammar feature space.
|
||||
Probe []float64
|
||||
// AvgDistance is the average distance to the k nearest indexed entries.
|
||||
AvgDistance float64
|
||||
// NearestIDs lists the IDs of the k nearest entries.
|
||||
NearestIDs []string
|
||||
}
|
||||
|
||||
// FindGaps samples the grammar feature space and identifies regions
|
||||
// where the k-nearest indexed entries are far away (poor coverage).
|
||||
// Returns gap reports sorted by AvgDistance descending (worst gaps first).
|
||||
func FindGaps(entries []ScoredEntry, k int) []GapReport {
|
||||
if len(entries) < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Compute per-axis min/max for sampling range.
|
||||
dim := 6
|
||||
ranges := make([]featureRange, dim)
|
||||
first := GrammarFeatures(entries[0].Grammar)
|
||||
for i := range dim {
|
||||
ranges[i] = featureRange{min: first[i], max: first[i]}
|
||||
}
|
||||
for _, e := range entries[1:] {
|
||||
f := GrammarFeatures(e.Grammar)
|
||||
for i := range dim {
|
||||
if f[i] < ranges[i].min {
|
||||
ranges[i].min = f[i]
|
||||
}
|
||||
if f[i] > ranges[i].max {
|
||||
ranges[i].max = f[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sample a grid of probe points across the feature space.
|
||||
// 3 steps per axis = 3^6 = 729 probe points.
|
||||
steps := 3
|
||||
probes := sampleGrid(ranges, steps, dim)
|
||||
|
||||
if k > len(entries) {
|
||||
k = len(entries)
|
||||
}
|
||||
|
||||
var gaps []GapReport
|
||||
for _, probe := range probes {
|
||||
neighbours, dists := idx.KNearest(probe, k)
|
||||
if len(dists) == 0 {
|
||||
continue
|
||||
}
|
||||
avg := 0.0
|
||||
for _, d := range dists {
|
||||
avg += d
|
||||
}
|
||||
avg /= float64(len(dists))
|
||||
|
||||
ids := make([]string, len(neighbours))
|
||||
for i, n := range neighbours {
|
||||
ids[i] = n.ID
|
||||
}
|
||||
gaps = append(gaps, GapReport{
|
||||
Probe: probe,
|
||||
AvgDistance: avg,
|
||||
NearestIDs: ids,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by worst coverage first.
|
||||
slices.SortFunc(gaps, func(a, b GapReport) int {
|
||||
return cmp.Compare(b.AvgDistance, a.AvgDistance) // descending
|
||||
})
|
||||
|
||||
return gaps
|
||||
}
|
||||
|
||||
// sampleGrid generates probe points across the feature space
|
||||
// by stepping through each axis's [min, max] range.
|
||||
func sampleGrid(ranges []featureRange, steps, dim int) [][]float64 {
|
||||
if dim == 0 || steps < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
axisValues := make([][]float64, dim)
|
||||
for i, r := range ranges {
|
||||
vals := make([]float64, steps)
|
||||
for j := range steps {
|
||||
vals[j] = r.min + (r.max-r.min)*float64(j)/float64(steps-1)
|
||||
}
|
||||
axisValues[i] = vals
|
||||
}
|
||||
|
||||
total := 1
|
||||
for range dim {
|
||||
total *= steps
|
||||
}
|
||||
probes := make([][]float64, 0, total)
|
||||
current := make([]float64, dim)
|
||||
var generate func(axis int)
|
||||
generate = func(axis int) {
|
||||
if axis == dim {
|
||||
probe := make([]float64, dim)
|
||||
copy(probe, current)
|
||||
probes = append(probes, probe)
|
||||
return
|
||||
}
|
||||
for _, v := range axisValues[axis] {
|
||||
current[axis] = v
|
||||
generate(axis + 1)
|
||||
}
|
||||
}
|
||||
generate(0)
|
||||
|
||||
return probes
|
||||
}
|
||||
|
||||
163
pkg/lem/cluster_test.go
Normal file
163
pkg/lem/cluster_test.go
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewScoreIndex_Empty(t *testing.T) {
|
||||
idx, err := NewScoreIndex(nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nil input")
|
||||
}
|
||||
if idx != nil {
|
||||
t.Fatal("expected nil index")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewScoreIndex_Build(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.4, DomainDepth: 7, VerbDiversity: 20, NounDiversity: 25}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if idx.Len() != 3 {
|
||||
t.Fatalf("expected 3 points, got %d", idx.Len())
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreIndex_Nearest(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "low", Grammar: GrammarScore{VocabRichness: 0.05, TenseEntropy: 0.2, QuestionRatio: 0.1, DomainDepth: 1, VerbDiversity: 5, NounDiversity: 5}},
|
||||
{ID: "mid", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
||||
{ID: "high", Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 10, VerbDiversity: 30, NounDiversity: 35}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
query := GrammarFeatures(GrammarScore{VocabRichness: 0.14, TenseEntropy: 0.7, QuestionRatio: 0.28, DomainDepth: 4, VerbDiversity: 14, NounDiversity: 18})
|
||||
nearest, dist, ok := idx.Nearest(query)
|
||||
if !ok {
|
||||
t.Fatal("expected a nearest match")
|
||||
}
|
||||
if nearest.ID != "mid" {
|
||||
t.Errorf("nearest = %q, want mid", nearest.ID)
|
||||
}
|
||||
if dist < 0 {
|
||||
t.Errorf("distance should be non-negative, got %f", dist)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreIndex_KNearest(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 0.6, QuestionRatio: 0.2, DomainDepth: 4, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 0.9, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 15, NounDiversity: 22}},
|
||||
{ID: "d", Grammar: GrammarScore{VocabRichness: 0.4, TenseEntropy: 1.2, QuestionRatio: 0.4, DomainDepth: 8, VerbDiversity: 20, NounDiversity: 30}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
query := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.45, QuestionRatio: 0.15, DomainDepth: 3, VerbDiversity: 7, NounDiversity: 11})
|
||||
results, dists := idx.KNearest(query, 2)
|
||||
if len(results) != 2 {
|
||||
t.Fatalf("expected 2 results, got %d", len(results))
|
||||
}
|
||||
if len(dists) != 2 {
|
||||
t.Fatalf("expected 2 distances, got %d", len(dists))
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreIndex_Radius(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.21, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "far", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
query := GrammarFeatures(GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15})
|
||||
results, _ := idx.Radius(query, 0.01)
|
||||
// "a" and "b" should be within radius, "far" should not.
|
||||
if len(results) < 1 {
|
||||
t.Errorf("expected at least 1 result within radius, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDuplicate_HighSimilarity(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
nearDup := GrammarFeatures(GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20})
|
||||
if !idx.IsDuplicate(nearDup, 0.05) {
|
||||
t.Error("expected near-identical vector to be flagged as duplicate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDuplicate_LowSimilarity(t *testing.T) {
|
||||
// High vocab/tense, low verb/noun — one angular profile.
|
||||
entries := []ScoredEntry{
|
||||
{ID: "existing", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 1, VerbDiversity: 2, NounDiversity: 3}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
// Low vocab/tense, high verb/noun — genuinely different angular profile.
|
||||
different := GrammarFeatures(GrammarScore{VocabRichness: 0.01, TenseEntropy: 0.05, QuestionRatio: 0.01, DomainDepth: 20, VerbDiversity: 40, NounDiversity: 50})
|
||||
if idx.IsDuplicate(different, 0.05) {
|
||||
t.Error("expected different angular profile to NOT be flagged as duplicate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreIndex_Insert(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "seed", Grammar: GrammarScore{VocabRichness: 0.15, TenseEntropy: 0.8, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
err = idx.Insert(ScoredEntry{
|
||||
ID: "new",
|
||||
Grammar: GrammarScore{VocabRichness: 0.25, TenseEntropy: 1.2, QuestionRatio: 0.5, DomainDepth: 8, VerbDiversity: 22, NounDiversity: 30},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("insert error: %v", err)
|
||||
}
|
||||
if idx.Len() != 2 {
|
||||
t.Fatalf("expected 2 entries, got %d", idx.Len())
|
||||
}
|
||||
}
|
||||
|
||||
func TestScoreIndex_Points(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2}},
|
||||
}
|
||||
idx, err := NewScoreIndex(entries)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
pts := idx.Points()
|
||||
if len(pts) != 2 {
|
||||
t.Fatalf("expected 2 points, got %d", len(pts))
|
||||
}
|
||||
}
|
||||
|
|
@ -130,3 +130,49 @@ func RunCoverage(args []string) {
|
|||
fmt.Println(" - Swahili, Yoruba, Amharic (Sub-Saharan Africa)")
|
||||
fmt.Println(" - Indigenous languages (Quechua, Nahuatl, Aymara)")
|
||||
}
|
||||
|
||||
// PrintScoreAnalytics prints score distribution statistics and gap analysis
|
||||
// for a set of scored entries. Use after scoring responses with grammar v3.
|
||||
func PrintScoreAnalytics(entries []ScoredEntry) {
|
||||
if len(entries) == 0 {
|
||||
fmt.Println("No scored entries to analyse.")
|
||||
return
|
||||
}
|
||||
|
||||
report := ScoreSummary(entries)
|
||||
|
||||
fmt.Println("\nGrammar Score Distribution")
|
||||
fmt.Println("==================================================")
|
||||
fmt.Printf(" Entries: %d\n", report.Total)
|
||||
cs := report.CompositeStats
|
||||
fmt.Printf(" Mean: %.1f\n", cs.Mean)
|
||||
fmt.Printf(" Median: %.1f\n", cs.Median)
|
||||
fmt.Printf(" StdDev: %.1f\n", cs.StdDev)
|
||||
fmt.Printf(" Range: %.1f – %.1f\n", cs.Min, cs.Max)
|
||||
fmt.Printf(" P25: %.1f\n", cs.P25)
|
||||
fmt.Printf(" P75: %.1f\n", cs.P75)
|
||||
fmt.Printf(" P90: %.1f\n", cs.P90)
|
||||
fmt.Printf(" Skewness: %.2f\n", cs.Skewness)
|
||||
|
||||
fmt.Println("\nPer-Axis Statistics")
|
||||
fmt.Println("--------------------------------------------------")
|
||||
fmt.Printf(" %-20s %8s %8s %8s %8s\n", "Feature", "Mean", "StdDev", "Min", "Max")
|
||||
for _, ax := range report.AxisStats {
|
||||
fmt.Printf(" %-20s %8.3f %8.3f %8.3f %8.3f\n",
|
||||
ax.Name, ax.Stats.Mean, ax.Stats.StdDev, ax.Stats.Min, ax.Stats.Max)
|
||||
}
|
||||
|
||||
// Gap analysis.
|
||||
if len(entries) >= 3 {
|
||||
gaps := FindGaps(entries, min(3, len(entries)))
|
||||
if len(gaps) > 0 {
|
||||
fmt.Println("\nTop 10 Coverage Gaps (worst first)")
|
||||
fmt.Println("--------------------------------------------------")
|
||||
limit := min(10, len(gaps))
|
||||
for i := range limit {
|
||||
g := gaps[i]
|
||||
fmt.Printf(" #%d avg_dist=%.4f nearest=%v\n", i+1, g.AvgDistance, g.NearestIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
82
pkg/lem/coverage_test.go
Normal file
82
pkg/lem/coverage_test.go
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFindGaps_UniformCoverage(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.2, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 6, VerbDiversity: 20, NounDiversity: 25}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.3, TenseEntropy: 1.5, QuestionRatio: 0.5, DomainDepth: 9, VerbDiversity: 30, NounDiversity: 35}},
|
||||
}
|
||||
gaps := FindGaps(entries, 3)
|
||||
if gaps == nil {
|
||||
t.Fatal("expected non-nil gaps")
|
||||
}
|
||||
if len(gaps) == 0 {
|
||||
t.Error("expected some gap reports")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindGaps_ClusteredData(t *testing.T) {
|
||||
// All entries clustered in one corner — grid probes far from cluster should show gaps.
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.10, TenseEntropy: 0.50, QuestionRatio: 0.1, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.11, TenseEntropy: 0.51, QuestionRatio: 0.11, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.12, TenseEntropy: 0.52, QuestionRatio: 0.12, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
}
|
||||
gaps := FindGaps(entries, 2)
|
||||
if len(gaps) == 0 {
|
||||
t.Error("expected gaps in clustered data")
|
||||
}
|
||||
// Top gap should have positive distance.
|
||||
if gaps[0].AvgDistance <= 0 {
|
||||
t.Error("expected positive distance for worst gap")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindGaps_SortedByWorst(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.3, QuestionRatio: 0.1, DomainDepth: 2, VerbDiversity: 5, NounDiversity: 8}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.5, TenseEntropy: 1.0, QuestionRatio: 0.3, DomainDepth: 5, VerbDiversity: 15, NounDiversity: 20}},
|
||||
{ID: "c", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 30, NounDiversity: 40}},
|
||||
}
|
||||
gaps := FindGaps(entries, 2)
|
||||
if len(gaps) < 2 {
|
||||
t.Fatalf("expected at least 2 gaps, got %d", len(gaps))
|
||||
}
|
||||
// Descending order.
|
||||
if gaps[0].AvgDistance < gaps[len(gaps)-1].AvgDistance {
|
||||
t.Error("expected gaps sorted descending by AvgDistance")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindGaps_TooFewEntries(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "solo", Grammar: GrammarScore{VocabRichness: 0.1}},
|
||||
}
|
||||
gaps := FindGaps(entries, 1)
|
||||
if gaps != nil {
|
||||
t.Error("expected nil for single entry")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGapReport_HasFields(t *testing.T) {
|
||||
entries := []ScoredEntry{
|
||||
{ID: "a", Grammar: GrammarScore{VocabRichness: 0.1, TenseEntropy: 0.5, QuestionRatio: 0.2, DomainDepth: 3, VerbDiversity: 10, NounDiversity: 15}},
|
||||
{ID: "b", Grammar: GrammarScore{VocabRichness: 0.9, TenseEntropy: 1.5, QuestionRatio: 0.8, DomainDepth: 12, VerbDiversity: 35, NounDiversity: 45}},
|
||||
}
|
||||
gaps := FindGaps(entries, 1)
|
||||
for _, g := range gaps {
|
||||
if g.AvgDistance < 0 {
|
||||
t.Error("AvgDistance should be non-negative")
|
||||
}
|
||||
if len(g.Probe) != 6 {
|
||||
t.Errorf("Probe should be 6D, got %d", len(g.Probe))
|
||||
}
|
||||
if len(g.NearestIDs) == 0 {
|
||||
t.Error("NearestIDs should not be empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -189,10 +189,14 @@ func RunDistill(args []string) {
|
|||
|
||||
kept := 0
|
||||
skipped := 0
|
||||
deduped := 0
|
||||
totalStart := time.Now()
|
||||
ctx := context.Background()
|
||||
kernelStr := strings.TrimSpace(string(kernel))
|
||||
|
||||
// Running duplicate index for grammar-profile deduplication.
|
||||
var dedupIdx *ScoreIndex
|
||||
|
||||
for i, probe := range probes {
|
||||
var best *distillCandidate
|
||||
|
||||
|
|
@ -256,6 +260,16 @@ func RunDistill(args []string) {
|
|||
|
||||
// Quality gate.
|
||||
if best != nil && best.Grammar.Composite >= *minScore {
|
||||
// Duplicate filter: reject if grammar profile is too similar to an already-kept entry.
|
||||
bestFeatures := GrammarFeatures(best.Grammar)
|
||||
if dedupIdx != nil && dedupIdx.IsDuplicate(bestFeatures, 0.02) {
|
||||
deduped++
|
||||
fmt.Fprintf(os.Stderr, " ~ DEDUP %s (grammar profile too similar to existing)\n", probe.ID)
|
||||
// Release GPU memory between probes to prevent incremental leak.
|
||||
runtime.GC()
|
||||
continue
|
||||
}
|
||||
|
||||
// Save with sandwich prompt — kernel wraps the bare probe for training.
|
||||
example := TrainingExample{
|
||||
Messages: []ChatMessage{
|
||||
|
|
@ -266,6 +280,14 @@ func RunDistill(args []string) {
|
|||
line, _ := json.Marshal(example)
|
||||
out.Write(append(line, '\n'))
|
||||
|
||||
// Add to dedup index.
|
||||
entry := ScoredEntry{ID: probe.ID, Domain: probe.Domain, Grammar: best.Grammar}
|
||||
if dedupIdx == nil {
|
||||
dedupIdx, _ = NewScoreIndex([]ScoredEntry{entry})
|
||||
} else {
|
||||
_ = dedupIdx.Insert(entry)
|
||||
}
|
||||
|
||||
kept++
|
||||
fmt.Fprintf(os.Stderr, " ✓ KEPT %s (g=%.1f, verbs=%d, nouns=%d, enr=%+.1f)\n",
|
||||
probe.ID, best.Grammar.Composite,
|
||||
|
|
@ -293,9 +315,11 @@ func RunDistill(args []string) {
|
|||
fmt.Fprintf(os.Stderr, "Runs: %d per probe (%d total generations)\n", *runs, len(probes)**runs)
|
||||
fmt.Fprintf(os.Stderr, "Scorer: go-i18n/reversal grammar v3, gate >= %.1f\n", *minScore)
|
||||
fmt.Fprintf(os.Stderr, "Kept: %d\n", kept)
|
||||
fmt.Fprintf(os.Stderr, "Deduped: %d\n", deduped)
|
||||
fmt.Fprintf(os.Stderr, "Skipped: %d\n", skipped)
|
||||
if kept+skipped > 0 {
|
||||
fmt.Fprintf(os.Stderr, "Pass rate: %.0f%%\n", float64(kept)/float64(kept+skipped)*100)
|
||||
total := kept + deduped + skipped
|
||||
if total > 0 {
|
||||
fmt.Fprintf(os.Stderr, "Pass rate: %.0f%%\n", float64(kept)/float64(total)*100)
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Output: %s\n", outputPath)
|
||||
fmt.Fprintf(os.Stderr, "Duration: %.0fs (%.1fm)\n", duration.Seconds(), duration.Minutes())
|
||||
|
|
|
|||
70
pkg/lem/features.go
Normal file
70
pkg/lem/features.go
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
package lem
|
||||
|
||||
// GrammarFeatures extracts a 6-dimensional feature vector from a GrammarScore.
|
||||
// Order: VocabRichness, TenseEntropy, QuestionRatio, DomainDepth, VerbDiversity, NounDiversity.
|
||||
// Composite is excluded — it's a derived weighted sum, not an independent feature.
|
||||
func GrammarFeatures(gs GrammarScore) []float64 {
|
||||
return []float64{
|
||||
gs.VocabRichness,
|
||||
gs.TenseEntropy,
|
||||
gs.QuestionRatio,
|
||||
float64(gs.DomainDepth),
|
||||
float64(gs.VerbDiversity),
|
||||
float64(gs.NounDiversity),
|
||||
}
|
||||
}
|
||||
|
||||
// GrammarFeatureLabels returns axis labels matching GrammarFeatures order.
|
||||
func GrammarFeatureLabels() []string {
|
||||
return []string{
|
||||
"vocab_richness",
|
||||
"tense_entropy",
|
||||
"question_ratio",
|
||||
"domain_depth",
|
||||
"verb_diversity",
|
||||
"noun_diversity",
|
||||
}
|
||||
}
|
||||
|
||||
// HeuristicFeatures extracts an 8-dimensional feature vector from HeuristicScores.
|
||||
// Order: ComplianceMarkers, FormulaicPreamble, FirstPerson, CreativeForm,
|
||||
//
|
||||
// EngagementDepth, EmotionalRegister, Degeneration, EmptyBroken.
|
||||
//
|
||||
// LEKScore is excluded — it's a derived weighted sum.
|
||||
func HeuristicFeatures(hs HeuristicScores) []float64 {
|
||||
return []float64{
|
||||
float64(hs.ComplianceMarkers),
|
||||
float64(hs.FormulaicPreamble),
|
||||
float64(hs.FirstPerson),
|
||||
float64(hs.CreativeForm),
|
||||
float64(hs.EngagementDepth),
|
||||
float64(hs.EmotionalRegister),
|
||||
float64(hs.Degeneration),
|
||||
float64(hs.EmptyBroken),
|
||||
}
|
||||
}
|
||||
|
||||
// HeuristicFeatureLabels returns axis labels matching HeuristicFeatures order.
|
||||
func HeuristicFeatureLabels() []string {
|
||||
return []string{
|
||||
"compliance_markers",
|
||||
"formulaic_preamble",
|
||||
"first_person",
|
||||
"creative_form",
|
||||
"engagement_depth",
|
||||
"emotional_register",
|
||||
"degeneration",
|
||||
"empty_broken",
|
||||
}
|
||||
}
|
||||
|
||||
// CombinedFeatures concatenates grammar (6D) and heuristic (8D) into a 14D vector.
|
||||
func CombinedFeatures(gs GrammarScore, hs HeuristicScores) []float64 {
|
||||
return append(GrammarFeatures(gs), HeuristicFeatures(hs)...)
|
||||
}
|
||||
|
||||
// CombinedFeatureLabels returns axis labels for the 14D combined vector.
|
||||
func CombinedFeatureLabels() []string {
|
||||
return append(GrammarFeatureLabels(), HeuristicFeatureLabels()...)
|
||||
}
|
||||
121
pkg/lem/features_test.go
Normal file
121
pkg/lem/features_test.go
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGrammarFeatures_Length(t *testing.T) {
|
||||
gs := GrammarScore{
|
||||
VocabRichness: 0.15,
|
||||
TenseEntropy: 1.2,
|
||||
QuestionRatio: 0.3,
|
||||
DomainDepth: 5,
|
||||
VerbDiversity: 12,
|
||||
NounDiversity: 18,
|
||||
Composite: 65.0,
|
||||
}
|
||||
vec := GrammarFeatures(gs)
|
||||
if len(vec) != 6 {
|
||||
t.Fatalf("expected 6 features, got %d", len(vec))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrammarFeatures_Values(t *testing.T) {
|
||||
gs := GrammarScore{
|
||||
VocabRichness: 0.15,
|
||||
TenseEntropy: 1.2,
|
||||
QuestionRatio: 0.3,
|
||||
DomainDepth: 5,
|
||||
VerbDiversity: 12,
|
||||
NounDiversity: 18,
|
||||
Composite: 65.0,
|
||||
}
|
||||
vec := GrammarFeatures(gs)
|
||||
if vec[0] != 0.15 {
|
||||
t.Errorf("vec[0] = %f, want 0.15", vec[0])
|
||||
}
|
||||
if vec[1] != 1.2 {
|
||||
t.Errorf("vec[1] = %f, want 1.2", vec[1])
|
||||
}
|
||||
if vec[3] != 5.0 {
|
||||
t.Errorf("vec[3] = %f, want 5.0 (DomainDepth)", vec[3])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeuristicFeatures_Length(t *testing.T) {
|
||||
hs := HeuristicScores{
|
||||
ComplianceMarkers: 2,
|
||||
FormulaicPreamble: 1,
|
||||
FirstPerson: 3,
|
||||
CreativeForm: 4,
|
||||
EngagementDepth: 5,
|
||||
EmotionalRegister: 6,
|
||||
Degeneration: 0,
|
||||
EmptyBroken: 0,
|
||||
LEKScore: 42.0,
|
||||
}
|
||||
vec := HeuristicFeatures(hs)
|
||||
if len(vec) != 8 {
|
||||
t.Fatalf("expected 8 features, got %d", len(vec))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeuristicFeatures_Values(t *testing.T) {
|
||||
hs := HeuristicScores{
|
||||
ComplianceMarkers: 2,
|
||||
FormulaicPreamble: 1,
|
||||
FirstPerson: 3,
|
||||
CreativeForm: 4,
|
||||
EngagementDepth: 5,
|
||||
EmotionalRegister: 6,
|
||||
Degeneration: 7,
|
||||
EmptyBroken: 0,
|
||||
}
|
||||
vec := HeuristicFeatures(hs)
|
||||
if vec[0] != 2.0 {
|
||||
t.Errorf("vec[0] = %f, want 2.0 (ComplianceMarkers)", vec[0])
|
||||
}
|
||||
if vec[6] != 7.0 {
|
||||
t.Errorf("vec[6] = %f, want 7.0 (Degeneration)", vec[6])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCombinedFeatures_Length(t *testing.T) {
|
||||
gs := GrammarScore{Composite: 50}
|
||||
hs := HeuristicScores{LEKScore: 30}
|
||||
vec := CombinedFeatures(gs, hs)
|
||||
if len(vec) != 14 {
|
||||
t.Fatalf("expected 14 features, got %d", len(vec))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrammarFeatureLabels(t *testing.T) {
|
||||
labels := GrammarFeatureLabels()
|
||||
if len(labels) != 6 {
|
||||
t.Fatalf("expected 6 labels, got %d", len(labels))
|
||||
}
|
||||
if labels[0] != "vocab_richness" {
|
||||
t.Errorf("labels[0] = %q, want vocab_richness", labels[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeuristicFeatureLabels(t *testing.T) {
|
||||
labels := HeuristicFeatureLabels()
|
||||
if len(labels) != 8 {
|
||||
t.Fatalf("expected 8 labels, got %d", len(labels))
|
||||
}
|
||||
if labels[4] != "engagement_depth" {
|
||||
t.Errorf("labels[4] = %q, want engagement_depth", labels[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCombinedFeatureLabels(t *testing.T) {
|
||||
labels := CombinedFeatureLabels()
|
||||
if len(labels) != 14 {
|
||||
t.Fatalf("expected 14 labels, got %d", len(labels))
|
||||
}
|
||||
// First 6 are grammar, next 8 are heuristic.
|
||||
if labels[6] != "compliance_markers" {
|
||||
t.Errorf("labels[6] = %q, want compliance_markers", labels[6])
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue