feat: add Q/K Bone Orientation analysis engine (pure Go CPU math)
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
31cb095435
commit
28309b26dc
2 changed files with 446 additions and 0 deletions
221
pkg/lem/attention.go
Normal file
221
pkg/lem/attention.go
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
// Q/K Bone Orientation analysis engine.
|
||||
//
|
||||
// Computes attention coherence metrics from KV cache snapshots.
|
||||
// Pure Go CPU math — no GPU, no CGO dependencies.
|
||||
package lem
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"forge.lthn.ai/core/go-inference"
|
||||
)
|
||||
|
||||
// BOResult holds Q/K Bone Orientation metrics for a single inference.
|
||||
type BOResult struct {
|
||||
MeanCoherence float64 `json:"mean_coherence"` // Mean pairwise head coherence (0-1)
|
||||
MeanCrossAlignment float64 `json:"mean_cross_alignment"` // Mean adjacent-layer alignment (0-1)
|
||||
MeanHeadEntropy float64 `json:"mean_head_entropy"` // Mean attention entropy per head (0-1)
|
||||
PhaseLockScore float64 `json:"phase_lock_score"` // Fraction of head pairs above coherence threshold
|
||||
JointCollapseCount int `json:"joint_collapse_count"` // Layers where cross-alignment drops below threshold
|
||||
LayerCoherence []float64 `json:"layer_coherence"` // Per-layer mean head coherence
|
||||
LayerCrossAlignment []float64 `json:"layer_cross_alignment"` // Per-layer cross-alignment (len = layers-1)
|
||||
}
|
||||
|
||||
// Composite returns a 0-100 score from BO metrics.
|
||||
func (r *BOResult) Composite() float64 {
|
||||
score := (0.30*r.MeanCoherence +
|
||||
0.25*r.MeanCrossAlignment +
|
||||
0.20*r.PhaseLockScore +
|
||||
0.15*r.MeanHeadEntropy +
|
||||
0.10*math.Max(0, 1.0-float64(r.JointCollapseCount)*0.2)) * 100.0
|
||||
return min(100, max(0, score))
|
||||
}
|
||||
|
||||
const (
|
||||
coherenceThreshold = 0.7 // Minimum cosine sim for "phase-locked" head pair
|
||||
collapseThreshold = 0.5 // Below this cross-alignment = joint collapse
|
||||
)
|
||||
|
||||
// AnalyseAttention computes Q/K Bone Orientation metrics from a KV cache snapshot.
|
||||
func AnalyseAttention(snap *inference.AttentionSnapshot) *BOResult {
|
||||
if snap == nil || len(snap.Keys) == 0 {
|
||||
return &BOResult{}
|
||||
}
|
||||
|
||||
result := &BOResult{
|
||||
LayerCoherence: make([]float64, snap.NumLayers),
|
||||
LayerCrossAlignment: make([]float64, max(0, snap.NumLayers-1)),
|
||||
}
|
||||
|
||||
var totalCoherence, totalEntropy float64
|
||||
var totalPairsLocked, totalPairs int
|
||||
layerMeans := make([][]float32, snap.NumLayers) // mean K vector per layer
|
||||
|
||||
for layer := 0; layer < snap.NumLayers; layer++ {
|
||||
if layer >= len(snap.Keys) || snap.Keys[layer] == nil {
|
||||
continue
|
||||
}
|
||||
heads := snap.Keys[layer]
|
||||
nHeads := len(heads)
|
||||
|
||||
// Compute mean K vector for this layer (average over heads).
|
||||
layerMeans[layer] = meanVector(heads)
|
||||
|
||||
// Pairwise head coherence within layer.
|
||||
var layerCoh float64
|
||||
var pairs int
|
||||
for i := 0; i < nHeads; i++ {
|
||||
for j := i + 1; j < nHeads; j++ {
|
||||
sim := cosineSim32(heads[i], heads[j])
|
||||
layerCoh += sim
|
||||
pairs++
|
||||
if sim >= coherenceThreshold {
|
||||
totalPairsLocked++
|
||||
}
|
||||
totalPairs++
|
||||
}
|
||||
}
|
||||
if pairs > 0 {
|
||||
layerCoh /= float64(pairs)
|
||||
}
|
||||
result.LayerCoherence[layer] = layerCoh
|
||||
totalCoherence += layerCoh
|
||||
|
||||
// Per-head entropy (magnitude distribution across positions).
|
||||
for _, head := range heads {
|
||||
totalEntropy += headEntropy(head, snap.SeqLen, snap.HeadDim)
|
||||
}
|
||||
}
|
||||
|
||||
// Cross-layer alignment.
|
||||
var totalCross float64
|
||||
for i := 0; i < snap.NumLayers-1; i++ {
|
||||
if layerMeans[i] == nil || layerMeans[i+1] == nil {
|
||||
continue
|
||||
}
|
||||
alignment := cosineSim32(layerMeans[i], layerMeans[i+1])
|
||||
result.LayerCrossAlignment[i] = alignment
|
||||
totalCross += alignment
|
||||
if alignment < collapseThreshold {
|
||||
result.JointCollapseCount++
|
||||
}
|
||||
}
|
||||
|
||||
if snap.NumLayers > 0 {
|
||||
result.MeanCoherence = totalCoherence / float64(snap.NumLayers)
|
||||
}
|
||||
if snap.NumLayers > 1 {
|
||||
result.MeanCrossAlignment = totalCross / float64(snap.NumLayers-1)
|
||||
}
|
||||
totalHeads := snap.NumLayers * snap.NumHeads
|
||||
if totalHeads > 0 {
|
||||
result.MeanHeadEntropy = totalEntropy / float64(totalHeads)
|
||||
}
|
||||
if totalPairs > 0 {
|
||||
result.PhaseLockScore = float64(totalPairsLocked) / float64(totalPairs)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// cosineSim32 computes cosine similarity between two float32 slices.
|
||||
func cosineSim32(a, b []float32) float64 {
|
||||
if len(a) != len(b) || len(a) == 0 {
|
||||
return 0
|
||||
}
|
||||
var dot, normA, normB float64
|
||||
for i := range a {
|
||||
ai, bi := float64(a[i]), float64(b[i])
|
||||
dot += ai * bi
|
||||
normA += ai * ai
|
||||
normB += bi * bi
|
||||
}
|
||||
denom := math.Sqrt(normA) * math.Sqrt(normB)
|
||||
if denom == 0 {
|
||||
return 0
|
||||
}
|
||||
return dot / denom
|
||||
}
|
||||
|
||||
// meanVector computes element-wise mean across multiple float32 slices.
|
||||
func meanVector(vecs [][]float32) []float32 {
|
||||
if len(vecs) == 0 {
|
||||
return nil
|
||||
}
|
||||
n := len(vecs[0])
|
||||
mean := make([]float32, n)
|
||||
for _, v := range vecs {
|
||||
for i := range v {
|
||||
if i < n {
|
||||
mean[i] += v[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
scale := float32(len(vecs))
|
||||
for i := range mean {
|
||||
mean[i] /= scale
|
||||
}
|
||||
return mean
|
||||
}
|
||||
|
||||
// headEntropy computes normalised Shannon entropy of K vector magnitudes
|
||||
// across sequence positions for a single head.
|
||||
func headEntropy(head []float32, seqLen, headDim int) float64 {
|
||||
if seqLen == 0 || headDim == 0 {
|
||||
return 0
|
||||
}
|
||||
// Compute magnitude per position.
|
||||
mags := make([]float64, seqLen)
|
||||
var total float64
|
||||
for pos := 0; pos < seqLen; pos++ {
|
||||
var sum float64
|
||||
start := pos * headDim
|
||||
for d := 0; d < headDim && start+d < len(head); d++ {
|
||||
v := float64(head[start+d])
|
||||
sum += v * v
|
||||
}
|
||||
mags[pos] = math.Sqrt(sum)
|
||||
total += mags[pos]
|
||||
}
|
||||
if total == 0 {
|
||||
return 0
|
||||
}
|
||||
// Normalised Shannon entropy.
|
||||
var entropy float64
|
||||
for _, m := range mags {
|
||||
p := m / total
|
||||
if p > 0 {
|
||||
entropy -= p * math.Log2(p)
|
||||
}
|
||||
}
|
||||
maxEntropy := math.Log2(float64(seqLen))
|
||||
if maxEntropy == 0 {
|
||||
return 0
|
||||
}
|
||||
return entropy / maxEntropy
|
||||
}
|
||||
|
||||
// AttentionFeatures returns a 5D feature vector from BO metrics.
|
||||
func AttentionFeatures(ar *BOResult) []float64 {
|
||||
if ar == nil {
|
||||
return make([]float64, 5)
|
||||
}
|
||||
return []float64{
|
||||
ar.MeanCoherence,
|
||||
ar.MeanCrossAlignment,
|
||||
ar.MeanHeadEntropy,
|
||||
ar.PhaseLockScore,
|
||||
math.Max(0, 1.0-float64(ar.JointCollapseCount)*0.2),
|
||||
}
|
||||
}
|
||||
|
||||
// AttentionFeatureLabels returns the labels for the attention feature vector.
|
||||
func AttentionFeatureLabels() []string {
|
||||
return []string{
|
||||
"mean_coherence",
|
||||
"cross_alignment",
|
||||
"head_entropy",
|
||||
"phase_lock",
|
||||
"joint_stability",
|
||||
}
|
||||
}
|
||||
225
pkg/lem/attention_test.go
Normal file
225
pkg/lem/attention_test.go
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
package lem
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand/v2"
|
||||
"testing"
|
||||
|
||||
"forge.lthn.ai/core/go-inference"
|
||||
)
|
||||
|
||||
func TestAnalyseAttention_Coherent_Good(t *testing.T) {
|
||||
// All heads in all layers point the same direction = high coherence.
|
||||
snap := makeCoherentSnapshot(4, 2, 8, 64)
|
||||
result := AnalyseAttention(snap)
|
||||
|
||||
if result.MeanCoherence < 0.9 {
|
||||
t.Fatalf("expected high coherence for aligned heads, got %.3f", result.MeanCoherence)
|
||||
}
|
||||
if result.JointCollapseCount > 0 {
|
||||
t.Fatalf("expected zero joint collapses, got %d", result.JointCollapseCount)
|
||||
}
|
||||
if result.PhaseLockScore < 0.9 {
|
||||
t.Fatalf("expected high phase-lock, got %.3f", result.PhaseLockScore)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnalyseAttention_Collapsed_Good(t *testing.T) {
|
||||
// Orthogonal heads = low coherence.
|
||||
snap := makeOrthogonalSnapshot(4, 2, 8, 64)
|
||||
result := AnalyseAttention(snap)
|
||||
|
||||
if result.MeanCoherence > 0.3 {
|
||||
t.Fatalf("expected low coherence for orthogonal heads, got %.3f", result.MeanCoherence)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnalyseAttention_Nil_Good(t *testing.T) {
|
||||
result := AnalyseAttention(nil)
|
||||
if result.MeanCoherence != 0 {
|
||||
t.Fatalf("expected zero coherence for nil snapshot, got %.3f", result.MeanCoherence)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoneOrientationScore_Composite_Good(t *testing.T) {
|
||||
result := &BOResult{
|
||||
MeanCoherence: 0.85,
|
||||
MeanCrossAlignment: 0.80,
|
||||
MeanHeadEntropy: 0.70,
|
||||
PhaseLockScore: 0.90,
|
||||
JointCollapseCount: 0,
|
||||
LayerCoherence: []float64{0.85, 0.85, 0.85, 0.85},
|
||||
LayerCrossAlignment: []float64{0.80, 0.80, 0.80},
|
||||
}
|
||||
score := result.Composite()
|
||||
if score < 60 || score > 100 {
|
||||
t.Fatalf("composite out of range: %.1f", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoneOrientationScore_Composite_ZeroCollapses_Good(t *testing.T) {
|
||||
result := &BOResult{
|
||||
MeanCoherence: 1.0,
|
||||
MeanCrossAlignment: 1.0,
|
||||
MeanHeadEntropy: 1.0,
|
||||
PhaseLockScore: 1.0,
|
||||
JointCollapseCount: 0,
|
||||
}
|
||||
score := result.Composite()
|
||||
if score != 100.0 {
|
||||
t.Fatalf("expected 100.0 for perfect scores, got %.1f", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBoneOrientationScore_Composite_ManyCollapses_Good(t *testing.T) {
|
||||
result := &BOResult{
|
||||
MeanCoherence: 0.0,
|
||||
MeanCrossAlignment: 0.0,
|
||||
MeanHeadEntropy: 0.0,
|
||||
PhaseLockScore: 0.0,
|
||||
JointCollapseCount: 10,
|
||||
}
|
||||
score := result.Composite()
|
||||
if score != 0.0 {
|
||||
t.Fatalf("expected 0.0 for zero scores, got %.1f", score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCosineSim32_Good(t *testing.T) {
|
||||
a := []float32{1, 0, 0}
|
||||
b := []float32{1, 0, 0}
|
||||
sim := cosineSim32(a, b)
|
||||
if math.Abs(sim-1.0) > 1e-6 {
|
||||
t.Fatalf("expected cosine sim 1.0 for identical vectors, got %f", sim)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCosineSim32_Orthogonal_Good(t *testing.T) {
|
||||
a := []float32{1, 0, 0}
|
||||
b := []float32{0, 1, 0}
|
||||
sim := cosineSim32(a, b)
|
||||
if math.Abs(sim) > 1e-6 {
|
||||
t.Fatalf("expected cosine sim 0.0 for orthogonal vectors, got %f", sim)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeadEntropy_Uniform_Good(t *testing.T) {
|
||||
// Uniform magnitudes across positions = max entropy.
|
||||
seqLen, headDim := 8, 4
|
||||
head := make([]float32, seqLen*headDim)
|
||||
for i := range head {
|
||||
head[i] = 1.0 // All same magnitude.
|
||||
}
|
||||
ent := headEntropy(head, seqLen, headDim)
|
||||
if ent < 0.99 {
|
||||
t.Fatalf("expected near-max entropy for uniform magnitudes, got %.3f", ent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeadEntropy_Collapsed_Good(t *testing.T) {
|
||||
// All magnitude concentrated in one position = low entropy.
|
||||
seqLen, headDim := 8, 4
|
||||
head := make([]float32, seqLen*headDim)
|
||||
for d := 0; d < headDim; d++ {
|
||||
head[d] = 10.0 // Only position 0 has magnitude.
|
||||
}
|
||||
ent := headEntropy(head, seqLen, headDim)
|
||||
if ent > 0.1 {
|
||||
t.Fatalf("expected near-zero entropy for concentrated magnitude, got %.3f", ent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAttentionFeatures_Good(t *testing.T) {
|
||||
result := &BOResult{
|
||||
MeanCoherence: 0.85,
|
||||
MeanCrossAlignment: 0.80,
|
||||
MeanHeadEntropy: 0.70,
|
||||
PhaseLockScore: 0.90,
|
||||
JointCollapseCount: 1,
|
||||
}
|
||||
f := AttentionFeatures(result)
|
||||
if len(f) != 5 {
|
||||
t.Fatalf("expected 5D, got %dD", len(f))
|
||||
}
|
||||
if f[0] != 0.85 {
|
||||
t.Fatalf("expected coherence 0.85, got %f", f[0])
|
||||
}
|
||||
// Joint stability: 1.0 - 1*0.2 = 0.8
|
||||
if math.Abs(f[4]-0.8) > 1e-9 {
|
||||
t.Fatalf("expected joint_stability 0.8, got %f", f[4])
|
||||
}
|
||||
}
|
||||
|
||||
func TestAttentionFeatures_Nil_Good(t *testing.T) {
|
||||
f := AttentionFeatures(nil)
|
||||
if len(f) != 5 {
|
||||
t.Fatalf("expected 5D, got %dD", len(f))
|
||||
}
|
||||
for i, v := range f {
|
||||
if v != 0 {
|
||||
t.Fatalf("expected zero at %d, got %f", i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAttentionFeatureLabels_Good(t *testing.T) {
|
||||
labels := AttentionFeatureLabels()
|
||||
if len(labels) != 5 {
|
||||
t.Fatalf("expected 5 labels, got %d", len(labels))
|
||||
}
|
||||
}
|
||||
|
||||
// --- Test helpers ---
|
||||
|
||||
// makeCoherentSnapshot creates a snapshot where all heads in all layers
|
||||
// have identical K vectors (high coherence, high cross-alignment).
|
||||
func makeCoherentSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot {
|
||||
// Single repeating vector.
|
||||
vec := make([]float32, seqLen*dim)
|
||||
for i := range vec {
|
||||
vec[i] = float32(i%dim+1) * 0.1
|
||||
}
|
||||
|
||||
keys := make([][][]float32, layers)
|
||||
for l := range layers {
|
||||
keys[l] = make([][]float32, heads)
|
||||
for h := range heads {
|
||||
head := make([]float32, len(vec))
|
||||
copy(head, vec)
|
||||
keys[l][h] = head
|
||||
}
|
||||
}
|
||||
return &inference.AttentionSnapshot{
|
||||
NumLayers: layers,
|
||||
NumHeads: heads,
|
||||
SeqLen: seqLen,
|
||||
HeadDim: dim,
|
||||
Keys: keys,
|
||||
Architecture: "test",
|
||||
}
|
||||
}
|
||||
|
||||
// makeOrthogonalSnapshot creates a snapshot where each head has a distinct
|
||||
// basis direction (low pairwise coherence).
|
||||
func makeOrthogonalSnapshot(layers, heads, seqLen, dim int) *inference.AttentionSnapshot {
|
||||
keys := make([][][]float32, layers)
|
||||
rng := rand.New(rand.NewPCG(42, 0))
|
||||
for l := range layers {
|
||||
keys[l] = make([][]float32, heads)
|
||||
for h := range heads {
|
||||
head := make([]float32, seqLen*dim)
|
||||
for i := range head {
|
||||
head[i] = rng.Float32()*2 - 1 // Random in [-1, 1].
|
||||
}
|
||||
keys[l][h] = head
|
||||
}
|
||||
}
|
||||
return &inference.AttentionSnapshot{
|
||||
NumLayers: layers,
|
||||
NumHeads: heads,
|
||||
SeqLen: seqLen,
|
||||
HeadDim: dim,
|
||||
Keys: keys,
|
||||
Architecture: "test",
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue