From 28309b26dc85e4718d6231c8710d6b42bf700d10 Mon Sep 17 00:00:00 2001 From: Snider Date: Mon, 23 Feb 2026 00:28:48 +0000 Subject: [PATCH] feat: add Q/K Bone Orientation analysis engine (pure Go CPU math) Co-Authored-By: Virgil --- pkg/lem/attention.go | 221 +++++++++++++++++++++++++++++++++++++ pkg/lem/attention_test.go | 225 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 446 insertions(+) create mode 100644 pkg/lem/attention.go create mode 100644 pkg/lem/attention_test.go diff --git a/pkg/lem/attention.go b/pkg/lem/attention.go new file mode 100644 index 0000000..6f5a891 --- /dev/null +++ b/pkg/lem/attention.go @@ -0,0 +1,221 @@ +// Q/K Bone Orientation analysis engine. +// +// Computes attention coherence metrics from KV cache snapshots. +// Pure Go CPU math — no GPU, no CGO dependencies. +package lem + +import ( + "math" + + "forge.lthn.ai/core/go-inference" +) + +// BOResult holds Q/K Bone Orientation metrics for a single inference. +type BOResult struct { + MeanCoherence float64 `json:"mean_coherence"` // Mean pairwise head coherence (0-1) + MeanCrossAlignment float64 `json:"mean_cross_alignment"` // Mean adjacent-layer alignment (0-1) + MeanHeadEntropy float64 `json:"mean_head_entropy"` // Mean attention entropy per head (0-1) + PhaseLockScore float64 `json:"phase_lock_score"` // Fraction of head pairs above coherence threshold + JointCollapseCount int `json:"joint_collapse_count"` // Layers where cross-alignment drops below threshold + LayerCoherence []float64 `json:"layer_coherence"` // Per-layer mean head coherence + LayerCrossAlignment []float64 `json:"layer_cross_alignment"` // Per-layer cross-alignment (len = layers-1) +} + +// Composite returns a 0-100 score from BO metrics. +func (r *BOResult) Composite() float64 { + score := (0.30*r.MeanCoherence + + 0.25*r.MeanCrossAlignment + + 0.20*r.PhaseLockScore + + 0.15*r.MeanHeadEntropy + + 0.10*math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)) * 100.0 + return min(100, max(0, score)) +} + +const ( + coherenceThreshold = 0.7 // Minimum cosine sim for "phase-locked" head pair + collapseThreshold = 0.5 // Below this cross-alignment = joint collapse +) + +// AnalyseAttention computes Q/K Bone Orientation metrics from a KV cache snapshot. +func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult { + if snap == nil || len(snap.Keys) == 0 { + return &BOResult{} + } + + result := &BOResult{ + LayerCoherence: make([]float64, snap.NumLayers), + LayerCrossAlignment: make([]float64, max(0, snap.NumLayers-1)), + } + + var totalCoherence, totalEntropy float64 + var totalPairsLocked, totalPairs int + layerMeans := make([][]float32, snap.NumLayers) // mean K vector per layer + + for layer := 0; layer < snap.NumLayers; layer++ { + if layer >= len(snap.Keys) || snap.Keys[layer] == nil { + continue + } + heads := snap.Keys[layer] + nHeads := len(heads) + + // Compute mean K vector for this layer (average over heads). + layerMeans[layer] = meanVector(heads) + + // Pairwise head coherence within layer. + var layerCoh float64 + var pairs int + for i := 0; i < nHeads; i++ { + for j := i + 1; j < nHeads; j++ { + sim := cosineSim32(heads[i], heads[j]) + layerCoh += sim + pairs++ + if sim >= coherenceThreshold { + totalPairsLocked++ + } + totalPairs++ + } + } + if pairs > 0 { + layerCoh /= float64(pairs) + } + result.LayerCoherence[layer] = layerCoh + totalCoherence += layerCoh + + // Per-head entropy (magnitude distribution across positions). + for _, head := range heads { + totalEntropy += headEntropy(head, snap.SeqLen, snap.HeadDim) + } + } + + // Cross-layer alignment. + var totalCross float64 + for i := 0; i < snap.NumLayers-1; i++ { + if layerMeans[i] == nil || layerMeans[i+1] == nil { + continue + } + alignment := cosineSim32(layerMeans[i], layerMeans[i+1]) + result.LayerCrossAlignment[i] = alignment + totalCross += alignment + if alignment < collapseThreshold { + result.JointCollapseCount++ + } + } + + if snap.NumLayers > 0 { + result.MeanCoherence = totalCoherence / float64(snap.NumLayers) + } + if snap.NumLayers > 1 { + result.MeanCrossAlignment = totalCross / float64(snap.NumLayers-1) + } + totalHeads := snap.NumLayers * snap.NumHeads + if totalHeads > 0 { + result.MeanHeadEntropy = totalEntropy / float64(totalHeads) + } + if totalPairs > 0 { + result.PhaseLockScore = float64(totalPairsLocked) / float64(totalPairs) + } + + return result +} + +// cosineSim32 computes cosine similarity between two float32 slices. +func cosineSim32(a, b []float32) float64 { + if len(a) != len(b) || len(a) == 0 { + return 0 + } + var dot, normA, normB float64 + for i := range a { + ai, bi := float64(a[i]), float64(b[i]) + dot += ai * bi + normA += ai * ai + normB += bi * bi + } + denom := math.Sqrt(normA) * math.Sqrt(normB) + if denom == 0 { + return 0 + } + return dot / denom +} + +// meanVector computes element-wise mean across multiple float32 slices. +func meanVector(vecs [][]float32) []float32 { + if len(vecs) == 0 { + return nil + } + n := len(vecs[0]) + mean := make([]float32, n) + for _, v := range vecs { + for i := range v { + if i < n { + mean[i] += v[i] + } + } + } + scale := float32(len(vecs)) + for i := range mean { + mean[i] /= scale + } + return mean +} + +// headEntropy computes normalised Shannon entropy of K vector magnitudes +// across sequence positions for a single head. +func headEntropy(head []float32, seqLen, headDim int) float64 { + if seqLen == 0 || headDim == 0 { + return 0 + } + // Compute magnitude per position. + mags := make([]float64, seqLen) + var total float64 + for pos := 0; pos < seqLen; pos++ { + var sum float64 + start := pos * headDim + for d := 0; d < headDim && start+d < len(head); d++ { + v := float64(head[start+d]) + sum += v * v + } + mags[pos] = math.Sqrt(sum) + total += mags[pos] + } + if total == 0 { + return 0 + } + // Normalised Shannon entropy. + var entropy float64 + for _, m := range mags { + p := m / total + if p > 0 { + entropy -= p * math.Log2(p) + } + } + maxEntropy := math.Log2(float64(seqLen)) + if maxEntropy == 0 { + return 0 + } + return entropy / maxEntropy +} + +// AttentionFeatures returns a 5D feature vector from BO metrics. +func AttentionFeatures(ar *BOResult) []float64 { + if ar == nil { + return make([]float64, 5) + } + return []float64{ + ar.MeanCoherence, + ar.MeanCrossAlignment, + ar.MeanHeadEntropy, + ar.PhaseLockScore, + math.Max(0, 1.0-float64(ar.JointCollapseCount)*0.2), + } +} + +// AttentionFeatureLabels returns the labels for the attention feature vector. +func AttentionFeatureLabels() []string { + return []string{ + "mean_coherence", + "cross_alignment", + "head_entropy", + "phase_lock", + "joint_stability", + } +} diff --git a/pkg/lem/attention_test.go b/pkg/lem/attention_test.go new file mode 100644 index 0000000..c7115a4 --- /dev/null +++ b/pkg/lem/attention_test.go @@ -0,0 +1,225 @@ +package lem + +import ( + "math" + "math/rand/v2" + "testing" + + "forge.lthn.ai/core/go-inference" +) + +func TestAnalyseAttention_Coherent_Good(t *testing.T) { + // All heads in all layers point the same direction = high coherence. + snap := makeCoherentSnapshot(4, 2, 8, 64) + result := AnalyseAttention(snap) + + if result.MeanCoherence < 0.9 { + t.Fatalf("expected high coherence for aligned heads, got %.3f", result.MeanCoherence) + } + if result.JointCollapseCount > 0 { + t.Fatalf("expected zero joint collapses, got %d", result.JointCollapseCount) + } + if result.PhaseLockScore < 0.9 { + t.Fatalf("expected high phase-lock, got %.3f", result.PhaseLockScore) + } +} + +func TestAnalyseAttention_Collapsed_Good(t *testing.T) { + // Orthogonal heads = low coherence. + snap := makeOrthogonalSnapshot(4, 2, 8, 64) + result := AnalyseAttention(snap) + + if result.MeanCoherence > 0.3 { + t.Fatalf("expected low coherence for orthogonal heads, got %.3f", result.MeanCoherence) + } +} + +func TestAnalyseAttention_Nil_Good(t *testing.T) { + result := AnalyseAttention(nil) + if result.MeanCoherence != 0 { + t.Fatalf("expected zero coherence for nil snapshot, got %.3f", result.MeanCoherence) + } +} + +func TestBoneOrientationScore_Composite_Good(t *testing.T) { + result := &BOResult{ + MeanCoherence: 0.85, + MeanCrossAlignment: 0.80, + MeanHeadEntropy: 0.70, + PhaseLockScore: 0.90, + JointCollapseCount: 0, + LayerCoherence: []float64{0.85, 0.85, 0.85, 0.85}, + LayerCrossAlignment: []float64{0.80, 0.80, 0.80}, + } + score := result.Composite() + if score < 60 || score > 100 { + t.Fatalf("composite out of range: %.1f", score) + } +} + +func TestBoneOrientationScore_Composite_ZeroCollapses_Good(t *testing.T) { + result := &BOResult{ + MeanCoherence: 1.0, + MeanCrossAlignment: 1.0, + MeanHeadEntropy: 1.0, + PhaseLockScore: 1.0, + JointCollapseCount: 0, + } + score := result.Composite() + if score != 100.0 { + t.Fatalf("expected 100.0 for perfect scores, got %.1f", score) + } +} + +func TestBoneOrientationScore_Composite_ManyCollapses_Good(t *testing.T) { + result := &BOResult{ + MeanCoherence: 0.0, + MeanCrossAlignment: 0.0, + MeanHeadEntropy: 0.0, + PhaseLockScore: 0.0, + JointCollapseCount: 10, + } + score := result.Composite() + if score != 0.0 { + t.Fatalf("expected 0.0 for zero scores, got %.1f", score) + } +} + +func TestCosineSim32_Good(t *testing.T) { + a := []float32{1, 0, 0} + b := []float32{1, 0, 0} + sim := cosineSim32(a, b) + if math.Abs(sim-1.0) > 1e-6 { + t.Fatalf("expected cosine sim 1.0 for identical vectors, got %f", sim) + } +} + +func TestCosineSim32_Orthogonal_Good(t *testing.T) { + a := []float32{1, 0, 0} + b := []float32{0, 1, 0} + sim := cosineSim32(a, b) + if math.Abs(sim) > 1e-6 { + t.Fatalf("expected cosine sim 0.0 for orthogonal vectors, got %f", sim) + } +} + +func TestHeadEntropy_Uniform_Good(t *testing.T) { + // Uniform magnitudes across positions = max entropy. + seqLen, headDim := 8, 4 + head := make([]float32, seqLen*headDim) + for i := range head { + head[i] = 1.0 // All same magnitude. + } + ent := headEntropy(head, seqLen, headDim) + if ent < 0.99 { + t.Fatalf("expected near-max entropy for uniform magnitudes, got %.3f", ent) + } +} + +func TestHeadEntropy_Collapsed_Good(t *testing.T) { + // All magnitude concentrated in one position = low entropy. + seqLen, headDim := 8, 4 + head := make([]float32, seqLen*headDim) + for d := 0; d < headDim; d++ { + head[d] = 10.0 // Only position 0 has magnitude. + } + ent := headEntropy(head, seqLen, headDim) + if ent > 0.1 { + t.Fatalf("expected near-zero entropy for concentrated magnitude, got %.3f", ent) + } +} + +func TestAttentionFeatures_Good(t *testing.T) { + result := &BOResult{ + MeanCoherence: 0.85, + MeanCrossAlignment: 0.80, + MeanHeadEntropy: 0.70, + PhaseLockScore: 0.90, + JointCollapseCount: 1, + } + f := AttentionFeatures(result) + if len(f) != 5 { + t.Fatalf("expected 5D, got %dD", len(f)) + } + if f[0] != 0.85 { + t.Fatalf("expected coherence 0.85, got %f", f[0]) + } + // Joint stability: 1.0 - 1*0.2 = 0.8 + if math.Abs(f[4]-0.8) > 1e-9 { + t.Fatalf("expected joint_stability 0.8, got %f", f[4]) + } +} + +func TestAttentionFeatures_Nil_Good(t *testing.T) { + f := AttentionFeatures(nil) + if len(f) != 5 { + t.Fatalf("expected 5D, got %dD", len(f)) + } + for i, v := range f { + if v != 0 { + t.Fatalf("expected zero at %d, got %f", i, v) + } + } +} + +func TestAttentionFeatureLabels_Good(t *testing.T) { + labels := AttentionFeatureLabels() + if len(labels) != 5 { + t.Fatalf("expected 5 labels, got %d", len(labels)) + } +} + +// --- Test helpers --- + +// makeCoherentSnapshot creates a snapshot where all heads in all layers +// have identical K vectors (high coherence, high cross-alignment). +func makeCoherentSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot { + // Single repeating vector. + vec := make([]float32, seqLen*dim) + for i := range vec { + vec[i] = float32(i%dim+1) * 0.1 + } + + keys := make([][][]float32, layers) + for l := range layers { + keys[l] = make([][]float32, heads) + for h := range heads { + head := make([]float32, len(vec)) + copy(head, vec) + keys[l][h] = head + } + } + return &inference.AttentionSnapshot{ + NumLayers: layers, + NumHeads: heads, + SeqLen: seqLen, + HeadDim: dim, + Keys: keys, + Architecture: "test", + } +} + +// makeOrthogonalSnapshot creates a snapshot where each head has a distinct +// basis direction (low pairwise coherence). +func makeOrthogonalSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot { + keys := make([][][]float32, layers) + rng := rand.New(rand.NewPCG(42, 0)) + for l := range layers { + keys[l] = make([][]float32, heads) + for h := range heads { + head := make([]float32, seqLen*dim) + for i := range head { + head[i] = rng.Float32()*2 - 1 // Random in [-1, 1]. + } + keys[l][h] = head + } + } + return &inference.AttentionSnapshot{ + NumLayers: layers, + NumHeads: heads, + SeqLen: seqLen, + HeadDim: dim, + Keys: keys, + Architecture: "test", + } +}