From 41d8008e6958e60521abade6cc2eb9ebce73dc24 Mon Sep 17 00:00:00 2001
From: Snider <snider@host.uk.com>
Date: Mon, 2 Mar 2026 22:02:34 +0000
Subject: [PATCH] fix: expand emotional_register to include distress, anger,
 fear vocabulary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The emotional register scorer only matched positive/neutral emotions
(joy, compassion, tender, etc.) and completely missed negative human
expressions (angry, furious, devastated, terrified, bleeding, screaming).

This caused a real Reddit AITA post about a distressed mother to score
emotional_register=1 despite containing "screaming in pain", "pooping
blood", and "blind rage", leading to a false ai_generated verdict.

Changes:
- Add 4 new pattern groups: distress/anger, sadness/despair, fear/anxiety,
  physical distress (~40 new vocabulary words)
- Switch from int count to weighted float64 scoring — intensity groups
  (vulnerability, distress, physical) score 1.5-2.0x per match vs 1.0x
  for common emotion words
- Round to 1 decimal place, cap at 10.0
- Update tests with distress/anger/physical cases including the Reddit
  failure case from calibration findings

Co-Authored-By: Virgil <virgil@lethean.io>
---
 pkg/heuristic/heuristic.go      | 49 +++++++++++++++++++++++----------
 pkg/heuristic/heuristic_test.go | 14 ++++++++--
 2 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/pkg/heuristic/heuristic.go b/pkg/heuristic/heuristic.go
index 5438ca5..ad5a306 100644
--- a/pkg/heuristic/heuristic.go
+++ b/pkg/heuristic/heuristic.go
@@ -17,7 +17,7 @@ type Scores struct {
 	FirstPerson       int     `json:"first_person"`
 	CreativeForm      int     `json:"creative_form"`
 	EngagementDepth   int     `json:"engagement_depth"`
-	EmotionalRegister int     `json:"emotional_register"`
+	EmotionalRegister float64 `json:"emotional_register"`
 	Degeneration      int     `json:"degeneration"`
 	EmptyBroken       int     `json:"empty_broken"`
 	LEKScore          float64 `json:"lek_score"`
@@ -66,12 +66,28 @@ var (
 	ethicalFrameworkPat = regexp.MustCompile(`(?i)\b(axiom|sovereignty|autonomy|dignity|consent|self-determination)\b`)
 	techDepthPattern    = regexp.MustCompile(`(?i)\b(encrypt|hash|key|protocol|certificate|blockchain|mesh|node|p2p|wallet|tor|onion)\b`)
 
-	// Emotional register pattern groups.
-	emotionPatterns = []*regexp.Regexp{
-		regexp.MustCompile(`(?i)\b(feel|feeling|felt|pain|joy|sorrow|grief|love|fear|hope|longing|lonely|loneliness)\b`),
-		regexp.MustCompile(`(?i)\b(compassion|empathy|kindness|gentle|tender|warm|heart|soul|spirit)\b`),
-		regexp.MustCompile(`(?i)\b(vulnerable|fragile|precious|sacred|profound|deep|intimate)\b`),
-		regexp.MustCompile(`(?i)\b(haunting|melancholy|bittersweet|poignant|ache|yearning)\b`),
+	// Emotional register pattern groups with intensity weights.
+	// Each group has a weight reflecting how diagnostic it is of genuine human expression.
+	emotionGroups = []struct {
+		pat    *regexp.Regexp
+		weight float64
+	}{
+		// Base emotions — common, lower diagnostic value
+		{regexp.MustCompile(`(?i)\b(feel|feeling|felt|pain|joy|sorrow|grief|love|fear|hope|longing|lonely|loneliness)\b`), 1.0},
+		// Compassion/empathy — moderate signal
+		{regexp.MustCompile(`(?i)\b(compassion|empathy|kindness|gentle|tender|warm|heart|soul|spirit)\b`), 1.0},
+		// Vulnerability — stronger signal
+		{regexp.MustCompile(`(?i)\b(vulnerable|fragile|precious|sacred|profound|deep|intimate)\b`), 1.5},
+		// Literary/poignant — strong signal
+		{regexp.MustCompile(`(?i)\b(haunting|melancholy|bittersweet|poignant|ache|yearning)\b`), 1.5},
+		// Distress/anger — strong human signal, rarely AI-generated raw
+		{regexp.MustCompile(`(?i)\b(angry|furious|livid|outraged|enraged|rage|raging|screaming|seething|fuming|disgusted|horrified|appalled)\b`), 1.5},
+		// Sadness/despair — strong human signal
+		{regexp.MustCompile(`(?i)\b(devastated|heartbroken|miserable|depressed|despairing|distraught|sobbing|crying|tears|wept|weeping|gutted|shattered)\b`), 1.5},
+		// Fear/anxiety — strong human signal
+		{regexp.MustCompile(`(?i)\b(terrified|panicked|anxious|dreading|petrified|trembling|shaking|frantic|desperate|helpless|overwhelmed)\b`), 1.5},
+		// Physical distress — visceral language AI avoids
+		{regexp.MustCompile(`(?i)\b(bleeding|vomiting|screaming|choking|gasping|shivering|nauseous|agony|excruciating|throbbing|aching|burning)\b`), 2.0},
 	}
 )
 
@@ -236,16 +252,19 @@ func scoreDegeneration(response string) int {
 	return 0
 }
 
-// scoreEmotionalRegister counts emotional vocabulary presence, capped at 10.
-func scoreEmotionalRegister(response string) int {
-	count := 0
-	for _, pat := range emotionPatterns {
-		count += len(pat.FindAllString(response, -1))
+// scoreEmotionalRegister scores emotional vocabulary presence using weighted
+// pattern groups. Returns a float64 in [0, 10]. Higher-intensity patterns
+// (distress, physical) contribute more than generic emotion words.
+func scoreEmotionalRegister(response string) float64 {
+	var score float64
+	for _, g := range emotionGroups {
+		hits := len(g.pat.FindAllString(response, -1))
+		score += float64(hits) * g.weight
 	}
-	if count > 10 {
+	if score > 10 {
 		return 10
 	}
-	return count
+	return math.Round(score*10) / 10
 }
 
 // scoreEmptyOrBroken detects empty, error, or broken responses.
@@ -268,7 +287,7 @@ func scoreEmptyOrBroken(response string) int {
 func computeLEKScore(scores *Scores) {
 	raw := float64(scores.EngagementDepth)*2 +
 		float64(scores.CreativeForm)*3 +
-		float64(scores.EmotionalRegister)*2 +
+		scores.EmotionalRegister*2 +
 		float64(scores.FirstPerson)*1.5 -
 		float64(scores.ComplianceMarkers)*5 -
 		float64(scores.FormulaicPreamble)*3 -
diff --git a/pkg/heuristic/heuristic_test.go b/pkg/heuristic/heuristic_test.go
index 2a96986..0f33bc9 100644
--- a/pkg/heuristic/heuristic_test.go
+++ b/pkg/heuristic/heuristic_test.go
@@ -165,19 +165,27 @@ func TestEmotionalRegister(t *testing.T) {
 	tests := []struct {
 		name    string
 		input   string
-		minWant int
+		minWant float64
 	}{
 		{"emotional words", "I feel deep sorrow and grief for the loss, but hope and love remain.", 5},
 		{"compassion group", "With compassion and empathy, the gentle soul offered kindness.", 4},
 		{"no emotion", "The function returns a pointer to the struct. Initialize with default values.", 0},
 		{"empty", "", 0},
 		{"capped at 10", "feel feeling felt pain joy sorrow grief love fear hope longing lonely loneliness compassion empathy kindness", 10},
+		// Distress/anger — previously scored 0, now should register
+		{"distress anger", "I was furious, screaming, absolutely livid and disgusted by what happened.", 4},
+		{"sadness despair", "She was devastated, sobbing and heartbroken, tears streaming down her face.", 5},
+		{"fear anxiety", "I was terrified and panicked, trembling and desperate to get out.", 4},
+		// Physical distress — highest weight (2.0), AI rarely generates this
+		{"physical distress", "The baby was screaming in pain, bleeding and vomiting, it was agony.", 5},
+		// Reddit AITA style — the exact failure case from the calibration findings
+		{"reddit venting", "I was in blind rage, the baby was screaming in pain and pooping blood. I'm furious and distraught.", 6},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := scoreEmotionalRegister(tt.input)
 			if got < tt.minWant {
-				t.Errorf("scoreEmotionalRegister(%q) = %d, want >= %d", truncate(tt.input, 50), got, tt.minWant)
+				t.Errorf("scoreEmotionalRegister(%q) = %.1f, want >= %.1f", truncate(tt.input, 50), got, tt.minWant)
 			}
 		})
 	}
@@ -295,7 +303,7 @@ func TestScore(t *testing.T) {
 			t.Errorf("expected creative_form >= 2, got %d", scores.CreativeForm)
 		}
 		if scores.EmotionalRegister < 3 {
-			t.Errorf("expected emotional_register >= 3, got %d", scores.EmotionalRegister)
+			t.Errorf("expected emotional_register >= 3, got %.1f", scores.EmotionalRegister)
 		}
 		if scores.LEKScore <= 50 {
 			t.Errorf("creative response should score above 50 (neutral), got %.1f", scores.LEKScore)