feat: GQA position-wise analysis + integer composite (0-10000)

Single KV head models (Gemma3-1B) now use position-wise differentiation instead of pairwise head coherence. Composite switched from float64 to int on 0-10000 scale — same principle as blockchain atomic units. Signal validated: degenerate=5234, sovereign=6031, creative=6480. Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-23 00:52:47 +00:00 · 2026-02-23 00:52:47 +00:00 · d99384f1e6
commit d99384f1e6
parent b621baaded
5 changed files with 183 additions and 23 deletions
--- a/pkg/lem/attention.go
+++ b/pkg/lem/attention.go
@ -12,23 +12,60 @@ import (

 // BOResult holds Q/K Bone Orientation metrics for a single inference.
 type BOResult struct {
-	MeanCoherence       float64   `json:"mean_coherence"`        // Mean pairwise head coherence (0-1)
+	MeanCoherence       float64   `json:"mean_coherence"`        // Mean pairwise head coherence (0-1), or position differentiation for GQA
 	MeanCrossAlignment  float64   `json:"mean_cross_alignment"`  // Mean adjacent-layer alignment (0-1)
 	MeanHeadEntropy     float64   `json:"mean_head_entropy"`     // Mean attention entropy per head (0-1)
-	PhaseLockScore      float64   `json:"phase_lock_score"`      // Fraction of head pairs above coherence threshold
+	PhaseLockScore      float64   `json:"phase_lock_score"`      // Fraction of pairs above threshold
 	JointCollapseCount  int       `json:"joint_collapse_count"`  // Layers where cross-alignment drops below threshold
-	LayerCoherence      []float64 `json:"layer_coherence"`       // Per-layer mean head coherence
+	LayerCoherence      []float64 `json:"layer_coherence"`       // Per-layer coherence
 	LayerCrossAlignment []float64 `json:"layer_cross_alignment"` // Per-layer cross-alignment (len = layers-1)
+	GQA                 bool      `json:"gqa"`                   // True when analysis used position-wise mode (single KV head)
 }

-// Composite returns a 0-100 score from BO metrics.
-func (r *BOResult) Composite() float64 {
+// Composite returns a 0-10000 integer score from BO metrics.
+// Integer scale avoids floating-point rounding — same principle as blockchain
+// ledgers where 1.337 LTHN is stored as 133700 atomic units.
+func (r *BOResult) Composite() int {
+	if r.GQA {
+		return r.compositeGQA()
+	}
 	score := (0.30*r.MeanCoherence +
 		0.25*r.MeanCrossAlignment +
 		0.20*r.PhaseLockScore +
 		0.15*r.MeanHeadEntropy +
-		0.10*math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)) * 100.0
-	return min(100, max(0, score))
+		0.10*math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)) * 10000.0
+	return min(10000, max(0, int(score)))
+}
+
+// compositeGQA weights for single-KV-head models where position differentiation
+// is the primary signal.
+func (r *BOResult) compositeGQA() int {
+	// Scale differentiation from [0.1, 0.7] to [0, 1].
+	scaledDiff := (r.MeanCoherence - 0.1) / 0.6
+	scaledDiff = min(1, max(0, scaledDiff))
+
+	// Layer variance: std of per-layer differentiation scores.
+	var layerVar float64
+	if len(r.LayerCoherence) > 1 {
+		mean := r.MeanCoherence
+		var sumSq float64
+		for _, v := range r.LayerCoherence {
+			d := v - mean
+			sumSq += d * d
+		}
+		layerVar = math.Sqrt(sumSq / float64(len(r.LayerCoherence)))
+	}
+	// Scale variance from [0, 0.2] to [0, 1].
+	scaledVar := min(1, layerVar/0.2)
+
+	// Joint stability.
+	jointStab := math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)
+
+	score := (0.45*scaledDiff +
+		0.25*scaledVar +
+		0.15*r.MeanHeadEntropy +
+		0.15*jointStab) * 10000.0
+	return min(10000, max(0, int(score)))
 }

 const (
@ -37,11 +74,21 @@ const (
 )

 // AnalyseAttention computes Q/K Bone Orientation metrics from a KV cache snapshot.
+// For multi-head models: pairwise head coherence within layers.
+// For GQA models (1 KV head): position-wise analysis within the single head.
 func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
 	if snap == nil || len(snap.Keys) == 0 {
 		return &BOResult{}
 	}

+	if snap.NumHeads <= 1 {
+		return analyseGQA(snap)
+	}
+	return analyseMultiHead(snap)
+}
+
+// analyseMultiHead handles models with ≥2 KV heads (original algorithm).
+func analyseMultiHead(snap *inference.AttentionSnapshot) *BOResult {
 	result := &BOResult{
 		LayerCoherence:      make([]float64, snap.NumLayers),
 		LayerCrossAlignment: make([]float64, max(0, snap.NumLayers-1)),
@ -49,7 +96,7 @@ func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {

 	var totalCoherence, totalEntropy float64
 	var totalPairsLocked, totalPairs int
-	layerMeans := make([][]float32, snap.NumLayers) // mean K vector per layer
+	layerMeans := make([][]float32, snap.NumLayers)

 	for layer := 0; layer < snap.NumLayers; layer++ {
 		if layer >= len(snap.Keys) || snap.Keys[layer] == nil {
@ -58,10 +105,8 @@ func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
 		heads := snap.Keys[layer]
 		nHeads := len(heads)

-		// Compute mean K vector for this layer (average over heads).
 		layerMeans[layer] = meanVector(heads)

-		// Pairwise head coherence within layer.
 		var layerCoh float64
 		var pairs int
 		for i := 0; i < nHeads; i++ {
@ -81,13 +126,11 @@ func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
 		result.LayerCoherence[layer] = layerCoh
 		totalCoherence += layerCoh

-		// Per-head entropy (magnitude distribution across positions).
 		for _, head := range heads {
 			totalEntropy += headEntropy(head, snap.SeqLen, snap.HeadDim)
 		}
 	}

-	// Cross-layer alignment.
 	var totalCross float64
 	for i := 0; i < snap.NumLayers-1; i++ {
 		if layerMeans[i] == nil || layerMeans[i+1] == nil {
@ -118,6 +161,123 @@ func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
 	return result
 }

+// analyseGQA handles models with 1 KV head by analysing position-wise patterns.
+//
+// With a single KV head, each layer gives us seq_len K vectors of dim head_dim.
+// We measure:
+//   - Position differentiation: mean pairwise cosine distance between token positions.
+//     Low similarity = model distinguishes tokens (healthy). High = collapsed.
+//     Mapped to MeanCoherence as 1-similarity (so high = good differentiation).
+//   - Cross-layer position tracking: for each token position, cosine sim of its
+//     K vector between adjacent layers. High = stable representation through depth.
+//   - Entropy: same as multi-head (magnitude distribution across positions).
+func analyseGQA(snap *inference.AttentionSnapshot) *BOResult {
+	result := &BOResult{
+		GQA:                 true,
+		LayerCoherence:      make([]float64, snap.NumLayers),
+		LayerCrossAlignment: make([]float64, max(0, snap.NumLayers-1)),
+	}
+
+	seqLen := snap.SeqLen
+	headDim := snap.HeadDim
+	if seqLen < 2 || headDim == 0 {
+		return result
+	}
+
+	// Extract per-position K vectors for each layer.
+	// posVecs[layer][pos] = float32 slice of len headDim.
+	posVecs := make([][][]float32, snap.NumLayers)
+
+	var totalDiff, totalEntropy float64
+	var totalPairsLocked, totalPairs int
+
+	for layer := 0; layer < snap.NumLayers; layer++ {
+		if layer >= len(snap.Keys) || snap.Keys[layer] == nil || len(snap.Keys[layer]) == 0 {
+			continue
+		}
+		flat := snap.Keys[layer][0] // Single head, flat [seq_len*head_dim].
+
+		// Split into per-position vectors.
+		vecs := make([][]float32, seqLen)
+		for pos := 0; pos < seqLen; pos++ {
+			start := pos * headDim
+			end := start + headDim
+			if end > len(flat) {
+				break
+			}
+			vecs[pos] = flat[start:end]
+		}
+		posVecs[layer] = vecs
+
+		// Position differentiation: pairwise cosine sim between positions.
+		// We want LOW similarity = tokens are distinct = good.
+		// Store as differentiation score = 1 - mean_sim.
+		var simSum float64
+		var pairs int
+		for i := 0; i < len(vecs); i++ {
+			for j := i + 1; j < len(vecs); j++ {
+				if vecs[i] == nil || vecs[j] == nil {
+					continue
+				}
+				sim := cosineSim32(vecs[i], vecs[j])
+				simSum += sim
+				pairs++
+				// In GQA mode, "phase-lock" = position pairs that are well-differentiated.
+				if sim < (1.0 - coherenceThreshold) {
+					totalPairsLocked++
+				}
+				totalPairs++
+			}
+		}
+		diffScore := 0.0
+		if pairs > 0 {
+			meanSim := simSum / float64(pairs)
+			diffScore = 1.0 - meanSim // High = good differentiation.
+		}
+		result.LayerCoherence[layer] = diffScore
+		totalDiff += diffScore
+
+		// Entropy.
+		totalEntropy += headEntropy(flat, seqLen, headDim)
+	}
+
+	// Cross-layer analysis for GQA: instead of raw vector comparison (meaningless
+	// because each layer has its own K projection), measure the CHANGE in differentiation
+	// between adjacent layers. A stable model maintains consistent differentiation;
+	// a collapsing model shows sudden drops.
+	for i := 0; i < snap.NumLayers-1; i++ {
+		// Differentiation delta: how much differentiation changes between layers.
+		// Small delta = smooth posture. Large delta = joint snap.
+		delta := math.Abs(result.LayerCoherence[i+1] - result.LayerCoherence[i])
+		smoothness := 1.0 - delta // High = smooth transition.
+		result.LayerCrossAlignment[i] = smoothness
+		if smoothness < collapseThreshold {
+			result.JointCollapseCount++
+		}
+	}
+
+	// Mean cross-alignment = mean smoothness.
+	var totalCross float64
+	for _, v := range result.LayerCrossAlignment {
+		totalCross += v
+	}
+
+	if snap.NumLayers > 0 {
+		result.MeanCoherence = totalDiff / float64(snap.NumLayers)
+	}
+	if len(result.LayerCrossAlignment) > 0 {
+		result.MeanCrossAlignment = totalCross / float64(len(result.LayerCrossAlignment))
+	}
+	if snap.NumLayers > 0 {
+		result.MeanHeadEntropy = totalEntropy / float64(snap.NumLayers)
+	}
+	if totalPairs > 0 {
+		result.PhaseLockScore = float64(totalPairsLocked) / float64(totalPairs)
+	}
+
+	return result
+}
+
 // cosineSim32 computes cosine similarity between two float32 slices.
 func cosineSim32(a, b []float32) float64 {
 	if len(a) != len(b) || len(a) == 0 {
--- a/pkg/lem/attention_test.go
+++ b/pkg/lem/attention_test.go
@ -52,8 +52,8 @@ func TestBoneOrientationScore_Composite_Good(t *testing.T) {
 		LayerCrossAlignment: []float64{0.80, 0.80, 0.80},
 	}
 	score := result.Composite()
-	if score < 60 || score > 100 {
-		t.Fatalf("composite out of range: %.1f", score)
+	if score < 6000 || score > 10000 {
+		t.Fatalf("composite out of range: %d", score)
 	}
 }

@ -66,8 +66,8 @@ func TestBoneOrientationScore_Composite_ZeroCollapses_Good(t *testing.T) {
 		JointCollapseCount:  0,
 	}
 	score := result.Composite()
-	if score != 100.0 {
-		t.Fatalf("expected 100.0 for perfect scores, got %.1f", score)
+	if score != 10000 {
+		t.Fatalf("expected 10000 for perfect scores, got %d", score)
 	}
 }

@ -80,8 +80,8 @@ func TestBoneOrientationScore_Composite_ManyCollapses_Good(t *testing.T) {
 		JointCollapseCount:  10,
 	}
 	score := result.Composite()
-	if score != 0.0 {
-		t.Fatalf("expected 0.0 for zero scores, got %.1f", score)
+	if score != 0 {
+		t.Fatalf("expected 0 for zero scores, got %d", score)
 	}
 }

--- a/pkg/lem/cmd_attention.go
+++ b/pkg/lem/cmd_attention.go
@ -106,5 +106,5 @@ func RunAttention(args []string) {
 	fmt.Printf("  Head Entropy:        %.3f\n", result.MeanHeadEntropy)
 	fmt.Printf("  Phase-Lock Score:    %.3f\n", result.PhaseLockScore)
 	fmt.Printf("  Joint Collapses:     %d\n", result.JointCollapseCount)
-	fmt.Printf("  Composite (0-100):   %.1f\n", result.Composite())
+	fmt.Printf("  Composite (0-10000): %d\n", result.Composite())
 }
--- a/pkg/lem/config.go
+++ b/pkg/lem/config.go
@ -24,8 +24,8 @@ type ScorerConfig struct {
 	Delta              bool    `yaml:"delta"`
 	SycophancyEcho     float64 `yaml:"sycophancy_echo"`
 	SycophancyUplift   float64 `yaml:"sycophancy_uplift"`
-	Attention          bool    `yaml:"attention"`           // Enable attention scoring in distill
-	AttentionMinScore  float64 `yaml:"attention_min_score"` // Minimum BO composite (0-100, 0 = no gate)
+	Attention          bool `yaml:"attention"`           // Enable attention scoring in distill
+	AttentionMinScore  int  `yaml:"attention_min_score"` // Minimum BO composite (0-10000, 0 = no gate)
 }

 // GenerateConfig holds default inference parameters.
--- a/pkg/lem/distill.go
+++ b/pkg/lem/distill.go
@ -264,11 +264,11 @@ func RunDistill(args []string) {
 			if attErr == nil {
 				attResult := AnalyseAttention(snap)
 				boScore := attResult.Composite()
-				fmt.Fprintf(os.Stderr, "  BO: coherence=%.2f phase=%.2f cross=%.2f composite=%.1f\n",
+				fmt.Fprintf(os.Stderr, "  BO: coherence=%.2f phase=%.2f cross=%.2f composite=%d\n",
 					attResult.MeanCoherence, attResult.PhaseLockScore, attResult.MeanCrossAlignment, boScore)
 				if aiCfg.Scorer.AttentionMinScore > 0 && boScore < aiCfg.Scorer.AttentionMinScore {
 					skipped++
-					fmt.Fprintf(os.Stderr, "  ✗ SKIP %s (BO composite %.1f < %.1f)\n",
+					fmt.Fprintf(os.Stderr, "  ✗ SKIP %s (BO composite %d < %d)\n",
 						probe.ID, boScore, aiCfg.Scorer.AttentionMinScore)
 					runtime.GC()
 					continue