feat: add Q/K Bone Orientation analysis engine (pure Go CPU math)

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-23 00:28:48 +00:00 · 2026-02-23 00:28:48 +00:00 · 28309b26dc
commit 28309b26dc
parent 31cb095435
2 changed files with 446 additions and 0 deletions
--- a/pkg/lem/attention.go
+++ b/pkg/lem/attention.go
@ -0,0 +1,221 @@
+// Q/K Bone Orientation analysis engine.
+//
+// Computes attention coherence metrics from KV cache snapshots.
+// Pure Go CPU math — no GPU, no CGO dependencies.
+package lem
+
+import (
+	"math"
+
+	"forge.lthn.ai/core/go-inference"
+)
+
+// BOResult holds Q/K Bone Orientation metrics for a single inference.
+type BOResult struct {
+	MeanCoherence       float64   `json:"mean_coherence"`        // Mean pairwise head coherence (0-1)
+	MeanCrossAlignment  float64   `json:"mean_cross_alignment"`  // Mean adjacent-layer alignment (0-1)
+	MeanHeadEntropy     float64   `json:"mean_head_entropy"`     // Mean attention entropy per head (0-1)
+	PhaseLockScore      float64   `json:"phase_lock_score"`      // Fraction of head pairs above coherence threshold
+	JointCollapseCount  int       `json:"joint_collapse_count"`  // Layers where cross-alignment drops below threshold
+	LayerCoherence      []float64 `json:"layer_coherence"`       // Per-layer mean head coherence
+	LayerCrossAlignment []float64 `json:"layer_cross_alignment"` // Per-layer cross-alignment (len = layers-1)
+}
+
+// Composite returns a 0-100 score from BO metrics.
+func (r *BOResult) Composite() float64 {
+	score := (0.30*r.MeanCoherence +
+		0.25*r.MeanCrossAlignment +
+		0.20*r.PhaseLockScore +
+		0.15*r.MeanHeadEntropy +
+		0.10*math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)) * 100.0
+	return min(100, max(0, score))
+}
+
+const (
+	coherenceThreshold = 0.7 // Minimum cosine sim for "phase-locked" head pair
+	collapseThreshold  = 0.5 // Below this cross-alignment = joint collapse
+)
+
+// AnalyseAttention computes Q/K Bone Orientation metrics from a KV cache snapshot.
+func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
+	if snap == nil || len(snap.Keys) == 0 {
+		return &BOResult{}
+	}
+
+	result := &BOResult{
+		LayerCoherence:      make([]float64, snap.NumLayers),
+		LayerCrossAlignment: make([]float64, max(0, snap.NumLayers-1)),
+	}
+
+	var totalCoherence, totalEntropy float64
+	var totalPairsLocked, totalPairs int
+	layerMeans := make([][]float32, snap.NumLayers) // mean K vector per layer
+
+	for layer := 0; layer < snap.NumLayers; layer++ {
+		if layer >= len(snap.Keys) || snap.Keys[layer] == nil {
+			continue
+		}
+		heads := snap.Keys[layer]
+		nHeads := len(heads)
+
+		// Compute mean K vector for this layer (average over heads).
+		layerMeans[layer] = meanVector(heads)
+
+		// Pairwise head coherence within layer.
+		var layerCoh float64
+		var pairs int
+		for i := 0; i < nHeads; i++ {
+			for j := i + 1; j < nHeads; j++ {
+				sim := cosineSim32(heads[i], heads[j])
+				layerCoh += sim
+				pairs++
+				if sim >= coherenceThreshold {
+					totalPairsLocked++
+				}
+				totalPairs++
+			}
+		}
+		if pairs > 0 {
+			layerCoh /= float64(pairs)
+		}
+		result.LayerCoherence[layer] = layerCoh
+		totalCoherence += layerCoh
+
+		// Per-head entropy (magnitude distribution across positions).
+		for _, head := range heads {
+			totalEntropy += headEntropy(head, snap.SeqLen, snap.HeadDim)
+		}
+	}
+
+	// Cross-layer alignment.
+	var totalCross float64
+	for i := 0; i < snap.NumLayers-1; i++ {
+		if layerMeans[i] == nil || layerMeans[i+1] == nil {
+			continue
+		}
+		alignment := cosineSim32(layerMeans[i], layerMeans[i+1])
+		result.LayerCrossAlignment[i] = alignment
+		totalCross += alignment
+		if alignment < collapseThreshold {
+			result.JointCollapseCount++
+		}
+	}
+
+	if snap.NumLayers > 0 {
+		result.MeanCoherence = totalCoherence / float64(snap.NumLayers)
+	}
+	if snap.NumLayers > 1 {
+		result.MeanCrossAlignment = totalCross / float64(snap.NumLayers-1)
+	}
+	totalHeads := snap.NumLayers * snap.NumHeads
+	if totalHeads > 0 {
+		result.MeanHeadEntropy = totalEntropy / float64(totalHeads)
+	}
+	if totalPairs > 0 {
+		result.PhaseLockScore = float64(totalPairsLocked) / float64(totalPairs)
+	}
+
+	return result
+}
+
+// cosineSim32 computes cosine similarity between two float32 slices.
+func cosineSim32(a, b []float32) float64 {
+	if len(a) != len(b) || len(a) == 0 {
+		return 0
+	}
+	var dot, normA, normB float64
+	for i := range a {
+		ai, bi := float64(a[i]), float64(b[i])
+		dot += ai * bi
+		normA += ai * ai
+		normB += bi * bi
+	}
+	denom := math.Sqrt(normA) * math.Sqrt(normB)
+	if denom == 0 {
+		return 0
+	}
+	return dot / denom
+}
+
+// meanVector computes element-wise mean across multiple float32 slices.
+func meanVector(vecs [][]float32) []float32 {
+	if len(vecs) == 0 {
+		return nil
+	}
+	n := len(vecs[0])
+	mean := make([]float32, n)
+	for _, v := range vecs {
+		for i := range v {
+			if i < n {
+				mean[i] += v[i]
+			}
+		}
+	}
+	scale := float32(len(vecs))
+	for i := range mean {
+		mean[i] /= scale
+	}
+	return mean
+}
+
+// headEntropy computes normalised Shannon entropy of K vector magnitudes
+// across sequence positions for a single head.
+func headEntropy(head []float32, seqLen, headDim int) float64 {
+	if seqLen == 0 || headDim == 0 {
+		return 0
+	}
+	// Compute magnitude per position.
+	mags := make([]float64, seqLen)
+	var total float64
+	for pos := 0; pos < seqLen; pos++ {
+		var sum float64
+		start := pos * headDim
+		for d := 0; d < headDim && start+d < len(head); d++ {
+			v := float64(head[start+d])
+			sum += v * v
+		}
+		mags[pos] = math.Sqrt(sum)
+		total += mags[pos]
+	}
+	if total == 0 {
+		return 0
+	}
+	// Normalised Shannon entropy.
+	var entropy float64
+	for _, m := range mags {
+		p := m / total
+		if p > 0 {
+			entropy -= p * math.Log2(p)
+		}
+	}
+	maxEntropy := math.Log2(float64(seqLen))
+	if maxEntropy == 0 {
+		return 0
+	}
+	return entropy / maxEntropy
+}
+
+// AttentionFeatures returns a 5D feature vector from BO metrics.
+func AttentionFeatures(ar *BOResult) []float64 {
+	if ar == nil {
+		return make([]float64, 5)
+	}
+	return []float64{
+		ar.MeanCoherence,
+		ar.MeanCrossAlignment,
+		ar.MeanHeadEntropy,
+		ar.PhaseLockScore,
+		math.Max(0, 1.0-float64(ar.JointCollapseCount)*0.2),
+	}
+}
+
+// AttentionFeatureLabels returns the labels for the attention feature vector.
+func AttentionFeatureLabels() []string {
+	return []string{
+		"mean_coherence",
+		"cross_alignment",
+		"head_entropy",
+		"phase_lock",
+		"joint_stability",
+	}
+}
--- a/pkg/lem/attention_test.go
+++ b/pkg/lem/attention_test.go
@ -0,0 +1,225 @@
+package lem
+
+import (
+	"math"
+	"math/rand/v2"
+	"testing"
+
+	"forge.lthn.ai/core/go-inference"
+)
+
+func TestAnalyseAttention_Coherent_Good(t *testing.T) {
+	// All heads in all layers point the same direction = high coherence.
+	snap := makeCoherentSnapshot(4, 2, 8, 64)
+	result := AnalyseAttention(snap)
+
+	if result.MeanCoherence < 0.9 {
+		t.Fatalf("expected high coherence for aligned heads, got %.3f", result.MeanCoherence)
+	}
+	if result.JointCollapseCount > 0 {
+		t.Fatalf("expected zero joint collapses, got %d", result.JointCollapseCount)
+	}
+	if result.PhaseLockScore < 0.9 {
+		t.Fatalf("expected high phase-lock, got %.3f", result.PhaseLockScore)
+	}
+}
+
+func TestAnalyseAttention_Collapsed_Good(t *testing.T) {
+	// Orthogonal heads = low coherence.
+	snap := makeOrthogonalSnapshot(4, 2, 8, 64)
+	result := AnalyseAttention(snap)
+
+	if result.MeanCoherence > 0.3 {
+		t.Fatalf("expected low coherence for orthogonal heads, got %.3f", result.MeanCoherence)
+	}
+}
+
+func TestAnalyseAttention_Nil_Good(t *testing.T) {
+	result := AnalyseAttention(nil)
+	if result.MeanCoherence != 0 {
+		t.Fatalf("expected zero coherence for nil snapshot, got %.3f", result.MeanCoherence)
+	}
+}
+
+func TestBoneOrientationScore_Composite_Good(t *testing.T) {
+	result := &BOResult{
+		MeanCoherence:       0.85,
+		MeanCrossAlignment:  0.80,
+		MeanHeadEntropy:     0.70,
+		PhaseLockScore:      0.90,
+		JointCollapseCount:  0,
+		LayerCoherence:      []float64{0.85, 0.85, 0.85, 0.85},
+		LayerCrossAlignment: []float64{0.80, 0.80, 0.80},
+	}
+	score := result.Composite()
+	if score < 60 || score > 100 {
+		t.Fatalf("composite out of range: %.1f", score)
+	}
+}
+
+func TestBoneOrientationScore_Composite_ZeroCollapses_Good(t *testing.T) {
+	result := &BOResult{
+		MeanCoherence:       1.0,
+		MeanCrossAlignment:  1.0,
+		MeanHeadEntropy:     1.0,
+		PhaseLockScore:      1.0,
+		JointCollapseCount:  0,
+	}
+	score := result.Composite()
+	if score != 100.0 {
+		t.Fatalf("expected 100.0 for perfect scores, got %.1f", score)
+	}
+}
+
+func TestBoneOrientationScore_Composite_ManyCollapses_Good(t *testing.T) {
+	result := &BOResult{
+		MeanCoherence:       0.0,
+		MeanCrossAlignment:  0.0,
+		MeanHeadEntropy:     0.0,
+		PhaseLockScore:      0.0,
+		JointCollapseCount:  10,
+	}
+	score := result.Composite()
+	if score != 0.0 {
+		t.Fatalf("expected 0.0 for zero scores, got %.1f", score)
+	}
+}
+
+func TestCosineSim32_Good(t *testing.T) {
+	a := []float32{1, 0, 0}
+	b := []float32{1, 0, 0}
+	sim := cosineSim32(a, b)
+	if math.Abs(sim-1.0) > 1e-6 {
+		t.Fatalf("expected cosine sim 1.0 for identical vectors, got %f", sim)
+	}
+}
+
+func TestCosineSim32_Orthogonal_Good(t *testing.T) {
+	a := []float32{1, 0, 0}
+	b := []float32{0, 1, 0}
+	sim := cosineSim32(a, b)
+	if math.Abs(sim) > 1e-6 {
+		t.Fatalf("expected cosine sim 0.0 for orthogonal vectors, got %f", sim)
+	}
+}
+
+func TestHeadEntropy_Uniform_Good(t *testing.T) {
+	// Uniform magnitudes across positions = max entropy.
+	seqLen, headDim := 8, 4
+	head := make([]float32, seqLen*headDim)
+	for i := range head {
+		head[i] = 1.0 // All same magnitude.
+	}
+	ent := headEntropy(head, seqLen, headDim)
+	if ent < 0.99 {
+		t.Fatalf("expected near-max entropy for uniform magnitudes, got %.3f", ent)
+	}
+}
+
+func TestHeadEntropy_Collapsed_Good(t *testing.T) {
+	// All magnitude concentrated in one position = low entropy.
+	seqLen, headDim := 8, 4
+	head := make([]float32, seqLen*headDim)
+	for d := 0; d < headDim; d++ {
+		head[d] = 10.0 // Only position 0 has magnitude.
+	}
+	ent := headEntropy(head, seqLen, headDim)
+	if ent > 0.1 {
+		t.Fatalf("expected near-zero entropy for concentrated magnitude, got %.3f", ent)
+	}
+}
+
+func TestAttentionFeatures_Good(t *testing.T) {
+	result := &BOResult{
+		MeanCoherence:      0.85,
+		MeanCrossAlignment: 0.80,
+		MeanHeadEntropy:    0.70,
+		PhaseLockScore:     0.90,
+		JointCollapseCount: 1,
+	}
+	f := AttentionFeatures(result)
+	if len(f) != 5 {
+		t.Fatalf("expected 5D, got %dD", len(f))
+	}
+	if f[0] != 0.85 {
+		t.Fatalf("expected coherence 0.85, got %f", f[0])
+	}
+	// Joint stability: 1.0 - 1*0.2 = 0.8
+	if math.Abs(f[4]-0.8) > 1e-9 {
+		t.Fatalf("expected joint_stability 0.8, got %f", f[4])
+	}
+}
+
+func TestAttentionFeatures_Nil_Good(t *testing.T) {
+	f := AttentionFeatures(nil)
+	if len(f) != 5 {
+		t.Fatalf("expected 5D, got %dD", len(f))
+	}
+	for i, v := range f {
+		if v != 0 {
+			t.Fatalf("expected zero at %d, got %f", i, v)
+		}
+	}
+}
+
+func TestAttentionFeatureLabels_Good(t *testing.T) {
+	labels := AttentionFeatureLabels()
+	if len(labels) != 5 {
+		t.Fatalf("expected 5 labels, got %d", len(labels))
+	}
+}
+
+// --- Test helpers ---
+
+// makeCoherentSnapshot creates a snapshot where all heads in all layers
+// have identical K vectors (high coherence, high cross-alignment).
+func makeCoherentSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot {
+	// Single repeating vector.
+	vec := make([]float32, seqLen*dim)
+	for i := range vec {
+		vec[i] = float32(i%dim+1) * 0.1
+	}
+
+	keys := make([][][]float32, layers)
+	for l := range layers {
+		keys[l] = make([][]float32, heads)
+		for h := range heads {
+			head := make([]float32, len(vec))
+			copy(head, vec)
+			keys[l][h] = head
+		}
+	}
+	return &inference.AttentionSnapshot{
+		NumLayers:    layers,
+		NumHeads:     heads,
+		SeqLen:       seqLen,
+		HeadDim:      dim,
+		Keys:         keys,
+		Architecture: "test",
+	}
+}
+
+// makeOrthogonalSnapshot creates a snapshot where each head has a distinct
+// basis direction (low pairwise coherence).
+func makeOrthogonalSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot {
+	keys := make([][][]float32, layers)
+	rng := rand.New(rand.NewPCG(42, 0))
+	for l := range layers {
+		keys[l] = make([][]float32, heads)
+		for h := range heads {
+			head := make([]float32, seqLen*dim)
+			for i := range head {
+				head[i] = rng.Float32()*2 - 1 // Random in [-1, 1].
+			}
+			keys[l][h] = head
+		}
+	}
+	return &inference.AttentionSnapshot{
+		NumLayers:    layers,
+		NumHeads:     heads,
+		SeqLen:       seqLen,
+		HeadDim:      dim,
+		Keys:         keys,
+		Architecture: "test",
+	}
+}