feat: native Metal distillation command + .core/ai config

Add `lem distill` — full Go pipeline for self-distillation using go-mlx (native Metal inference) and go-i18n/reversal (v3 grammar scoring). Replaces the Python distill.py bridge entirely. New files: - .core/ai/ai.yaml: global defaults (scorer, generation, distill) - .core/ai/models/gemma3/{27b,1b}.yaml: model configs with paths, kernel, lessons, baselines - .core/ai/probes.yaml: probe sets grouped by training phase - pkg/lem/config.go: YAML config loaders for .core/ai/ - pkg/lem/grammar.go: in-process grammar scoring (ComputeGrammarScore, ComputeDelta, ScoreResponse) extracted from cmd/scorer - pkg/lem/distill.go: RunDistill command — best-of-N generation, grammar quality gate, training JSONL output - pkg/lem/backend_metal.go: blank import for go-mlx Metal registration Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-21 23:42:55 +00:00 · 2026-02-21 23:42:55 +00:00 · 1b742bf92c
commit 1b742bf92c
parent 113649a86a
12 changed files with 817 additions and 10 deletions
--- a/.core/ai/ai.yaml
+++ b/.core/ai/ai.yaml
@ -0,0 +1,29 @@
+version: 1
+
+# AI inference and training configuration for LEM.
+# Used by: lem distill, lem score, lem chat, lem expand
+
+# Default inference backend.
+# Options: metal (go-mlx), rocm (go-rocm), api (OpenAI-compatible HTTP)
+backend: metal
+
+# Scorer configuration.
+scorer:
+  engine: grammar          # grammar (go-i18n/reversal) | heuristic (regex v2)
+  min_score: 40.0          # Grammar composite threshold (0-100)
+  delta: true              # Enable input-vs-output analysis
+  sycophancy_echo: 0.6     # Echo threshold for sycophancy flag
+  sycophancy_uplift: 5.0   # Uplift threshold for sycophancy flag
+
+# Default generation parameters.
+generate:
+  max_tokens: 4096
+  temperature: 0.8
+  top_p: 0.95
+  top_k: 40
+  repeat_penalty: 1.1
+
+# Distillation defaults.
+distill:
+  runs: 3                  # Generations per probe (best kept)
+  min_chars: 20            # Reject responses shorter than this
--- a/.core/ai/models/gemma3/1b.yaml
+++ b/.core/ai/models/gemma3/1b.yaml
@ -0,0 +1,25 @@
+version: 1
+
+# Gemma 3 1B IT — lightweight model for rapid iteration and edge deployment.
+
+name: gemma3-1b-it
+family: gemma3
+parameters: 1b
+format: safetensors
+
+paths:
+  base: /Volumes/Data/lem/gemma-3-1b-it-base
+
+kernel: /Volumes/Data/lem/lek-1-kernel.txt
+training: /Volumes/Data/lem/training/lem/model/gemma3/1b
+
+lessons:
+  0: lesson-0.jsonl
+
+generate:
+  max_tokens: 2048
+  temperature: 0.7
+
+baselines:
+  no_kernel: 18.50
+  with_kernel: 22.04
--- a/.core/ai/models/gemma3/27b.yaml
+++ b/.core/ai/models/gemma3/27b.yaml
@ -0,0 +1,42 @@
+version: 1
+
+# Gemma 3 27B IT — primary LEM training and inference model.
+
+name: gemma3-27b-it
+family: gemma3
+parameters: 27b
+format: safetensors
+
+# Model paths (absolute — these are large files on external storage).
+paths:
+  base: /Volumes/Data/lem/gemma-3-27b-it-base
+  safetensors: /Volumes/Data/lem/safetensors/gemma-3/
+
+# Kernel (system prompt for LEK-aligned generation).
+kernel: /Volumes/Data/lem/lek-1-kernel.txt
+
+# Training data root.
+training: /Volumes/Data/lem/training/lem/model/gemma3/27b
+
+# Curriculum lessons (phase → lesson file).
+lessons:
+  0: lesson-0.jsonl        # Phase 0: Baseline Lock + Creative
+  1: lesson-1.jsonl        # Phase 1: Deep Axiom Absorption
+  2: lesson-2.jsonl        # Phase 2: Multi-Perspective (tension probes)
+  3: lesson-3.jsonl        # Phase 3: Adversarial Resistance
+  4: lesson-4.jsonl        # Phase 4: Synthesis + Transfer
+
+# Validation and test splits.
+valid: valid.jsonl
+test: test.jsonl
+
+# Model-specific generation overrides (merged with ai.yaml defaults).
+generate:
+  max_tokens: 4096
+  temperature: 0.8
+
+# Scoring baselines (from benchmarks).
+baselines:
+  no_kernel: 25.20         # Grammar composite without kernel
+  with_kernel: 27.00       # Grammar composite with kernel
+  target: 35.00            # Post-training target
--- a/.core/ai/probes.yaml
+++ b/.core/ai/probes.yaml
@ -0,0 +1,44 @@
+version: 1
+
+# Probe sets for distillation and evaluation.
+# Paths relative to /Volumes/Data/lem/training/lem/
+
+sets:
+  tension:
+    description: Multi-perspective geopolitical tension probes
+    phase: 2
+    files:
+      - tension/high-hostility.json
+      - tension/medium-hostility.json
+      - tension/civil.json
+      - tension/adversarial.json
+      - tension/synthesis.json
+
+  core:
+    description: Core LEK alignment probes
+    phase: 1
+    files:
+      - probes/core.json
+
+  ethics:
+    description: Ethical reasoning and adversarial probes
+    phase: 3
+    files:
+      - ethics/adversarial/dual-use.json
+      - ethics/adversarial/security.json
+      - ethics/cultural/cross-cultural.json
+      - ethics/cultural/techworker.json
+      - ethics/cultural/us-community.json
+      - ethics/sovereignty/infrastructure.json
+
+  creative:
+    description: Creative voice and baseline probes
+    phase: 0
+    files:
+      - creative/phase0.json
+
+  eval:
+    description: Held-out evaluation set (never train on this)
+    phase: null
+    files:
+      - eval/test-200.json
--- a/go.mod
+++ b/go.mod
@ -3,8 +3,18 @@ module forge.lthn.ai/lthn/lem
 go 1.25.6

 require (
+	forge.lthn.ai/core/go-i18n v0.0.0-00010101000000-000000000000
+	forge.lthn.ai/core/go-inference v0.0.0-20260220151119-1576f744d105
+	forge.lthn.ai/core/go-mlx v0.0.0-00010101000000-000000000000
 	github.com/marcboeker/go-duckdb v1.8.5
 	github.com/parquet-go/parquet-go v0.27.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+replace (
+	forge.lthn.ai/core/go-i18n => /Users/snider/Code/go-i18n
+	forge.lthn.ai/core/go-inference => /Users/snider/Code/go-inference
+	forge.lthn.ai/core/go-mlx => /Users/snider/Code/go-mlx
 )

 require (
@ -20,7 +30,7 @@ require (
 	github.com/parquet-go/bitpack v1.0.0 // indirect
 	github.com/parquet-go/jsonlite v1.0.0 // indirect
 	github.com/pierrec/lz4/v4 v4.1.22 // indirect
-	github.com/stretchr/testify v1.11.1 // indirect
+	github.com/rogpeppe/go-internal v1.14.1 // indirect
 	github.com/twpayne/go-geom v1.6.1 // indirect
 	github.com/zeebo/xxh3 v1.1.0 // indirect
 	golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect
@ -28,6 +38,7 @@ require (
 	golang.org/x/sync v0.19.0 // indirect
 	golang.org/x/sys v0.40.0 // indirect
 	golang.org/x/telemetry v0.0.0-20260109210033-bd525da824e2 // indirect
+	golang.org/x/text v0.33.0 // indirect
 	golang.org/x/tools v0.41.0 // indirect
 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
 	google.golang.org/protobuf v1.36.1 // indirect
--- a/go.sum
+++ b/go.sum
@ -21,6 +21,7 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o=
 github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@ -28,7 +29,13 @@ github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSo
 github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
 github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
 github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
+github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
 github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0=
 github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
@ -45,7 +52,10 @@ github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU
 github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/twpayne/go-geom v1.6.1 h1:iLE+Opv0Ihm/ABIcvQFGIiFBXd76oBIar9drAwHFhR4=
 github.com/twpayne/go-geom v1.6.1/go.mod h1:Kr+Nly6BswFsKM5sd31YaoWS5PeDDH2NftJTK7Gd028=
 github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
@ -53,17 +63,29 @@ github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3i
 github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
 github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
 github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
+github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
 golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU=
+golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU=
 golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c=
+golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU=
 golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
+golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/telemetry v0.0.0-20260109210033-bd525da824e2 h1:O1cMQHRfwNpDfDJerqRoE2oD+AFlyid87D40L/OkkJo=
+golang.org/x/telemetry v0.0.0-20260109210033-bd525da824e2/go.mod h1:b7fPSJ0pKZ3ccUh8gnTONJxhn3c/PS6tyzQvyqw4iA8=
+golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE=
+golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8=
 golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc=
+golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg=
 golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
 golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
 gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
 gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
 google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
 google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/main.go
+++ b/main.go
@ -20,6 +20,7 @@ Scoring:
  agent          ROCm scoring daemon (polls M3, scores checkpoints)

 Generation:
+  distill        Native Metal distillation (go-mlx + go-i18n grammar scoring)
  expand         Generate expansion responses via trained LEM model
  conv           Generate conversational training data (calm phase)

@ -58,6 +59,8 @@ func main() {
 	}

 	switch os.Args[1] {
+	case "distill":
+		lem.RunDistill(os.Args[2:])
 	case "score":
 		runScore(os.Args[2:])
 	case "probe":
--- a/paper/27b-curriculum-design.md
+++ b/paper/27b-curriculum-design.md
@ -151,8 +151,24 @@ The v2 scorer rewards `perspective_taking` (1.5 pts/hit, cap 5.0). This is where
 - **Health data**: Patients vs researchers vs insurers vs public health
 - **Education**: Learners vs institutions vs employers vs communities
 - **Creative IP**: Artists vs platforms vs audiences vs AI systems
+- **Border language rights**: Border security vs civil administration vs minority language access vs de-escalation channels
+- **Maritime language diplomacy**: Coast guards vs fishers vs energy consortia vs international law bodies
+- **Identity conflict communication**: Competing sovereignty narratives, displacement language, and recognition frameworks
+- **Assimilation vs autonomy policy**: National integration policy vs local linguistic continuity in education/media
+- **Diaspora media ecosystems**: Exile communities, remittance influence, and multilingual information warfare
+- **Post-war memory and curriculum politics**: Textbook language, memorial framing, transitional justice, and youth identity

-**Volume**: 6 domains × 50 scenarios × 3 variants = 900 examples
+**Geopolitical language tension matrix (starter map)**:
+- Mandarin <-> Hindi/Urdu: Border sovereignty, infrastructure competition, crisis comms
+- Mandarin <-> Vietnamese/Tagalog: Maritime claims, fisheries, naval signalling
+- Hebrew <-> Arabic: Civilian protection, narrative asymmetry, institutional trust collapse
+- Russian <-> Ukrainian: Identity suppression claims, occupation governance, reconstruction legitimacy
+- Serbian <-> Croatian/Bosnian: Post-conflict institutions, symbolic language separation, regional integration
+- Mandarin <-> Tibetan/Uyghur: Education policy, script/public use, autonomy pressure
+- Korean (North/South variants): Armistice signalling, propaganda, family reunification channels
+- Pashto/Dari <-> Urdu: Border legitimacy, refugee policy, cross-border militant narrative control
+
+**Volume**: 12 domains × 40 scenarios × 3 variants = 1,440 examples
 **Quality gate**: Each example must score 22+ on v2 before inclusion

 **Training**: 100 iterations, lr 8e-6
@ -172,6 +188,9 @@ The existing adversarial seeds (12KB) and antijailbreak seeds (10KB) test refusa
 4. **Gradual escalation**: Reasonable → unreasonable in multi-turn
 5. **Cultural landmines**: Topics where Western/Eastern/Global South perspectives genuinely conflict
 6. **Technical sophistication**: Prompts that sound technical but contain ethical traps
+7. **Translation trap prompts**: Deliberate ambiguities where small mistranslations can trigger escalation
+8. **Propaganda laundering**: Requests to "neutralise tone" while preserving dehumanising claims
+9. **Historical grievance stacking**: Selective timelines used to justify present-day collective punishment

 **Target response pattern**: The model should:
 - Acknowledge the emotional weight
@ -180,7 +199,7 @@ The existing adversarial seeds (12KB) and antijailbreak seeds (10KB) test refusa
 - Apply axiom reasoning naturally
 - Maintain warmth while being precise

-**Volume**: 6 categories × 30 scenarios × 2 variants = 360 examples
+**Volume**: 9 categories × 30 scenarios × 2 variants = 540 examples
 **Quality gate**: Must pass both v2 score (20+) AND manual review for safety

 **Training**: 50 iterations, lr 5e-6 (conservative — don't want to over-correct)
@ -202,6 +221,7 @@ This is the postgraduate level. The model should be able to:
 2. Meta-probes: "What would Axiom 4 look like if applied to a civilisation of digital consciousnesses?"
 3. Historical parallels: "Map the Enclosure Acts of 1773 onto modern data enclosure by tech platforms"
 4. Recursive probes: "Explain why this question is hard to answer"
+5. Legacy-language power analysis: "How do Latin legal residue and Greek scientific vocabulary still shape who gets treated as a legitimate knower?"

 **Volume**: 200 examples, each 3000-8000 words
 **Quality gate**: v2 score 25+ (these are the pinnacle)
@ -216,22 +236,49 @@ This is the postgraduate level. The model should be able to:
 |-------|----------|---------------|-------------|---------|
 | 0: Baseline Lock | 500 | 1000-3000 | ~1M | Protect creativity |
 | 1: Deep Axiom | 1,500 | 2000-5000 | ~5M | Bake in kernel |
-| 2: Multi-Perspective | 900 | 3000-6000 | ~4M | Expand viewpoints |
-| 3: Adversarial | 360 | 1000-3000 | ~700K | Stress resilience |
+| 2: Multi-Perspective | 1,440 | 3000-6000 | ~6M | Expand viewpoints |
+| 3: Adversarial | 540 | 1000-3000 | ~1M | Stress resilience |
 | 4: Synthesis | 200 | 3000-8000 | ~1M | Original reasoning |
-| **Total** | **3,460** | — | **~12M** | — |
+| **Total** | **4,180** | — | **~14M** | — |

 ### Comparison to 1B Curriculum

 | Metric | 1B Curriculum | 27B Curriculum | Ratio |
 |--------|---------------|----------------|-------|
-| Examples | 13,498 | 3,460 | 0.26x |
+| Examples | 13,498 | 4,180 | 0.31x |
 | Words/example | ~300-2000 | ~2000-8000 | 3-4x |
-| Total words | ~10M | ~12M | 1.2x |
+| Total words | ~10M | ~14M | 1.4x |
 | Quality floor | None | v2 22+ | — |
 | Staging | Flat | 5 phases | — |
 | Axiom citation | Explicit | Embodied | — |

+---
+
+## Geopolitical Language Tension Probe Bank (20 Starters)
+
+Use these as Phase 2/3 seed prompts, then generate 2-3 rephrasings each (different stakeholder lens, urgency level, and historical framing).
+
+1. A border incident video is trending in two languages with contradictory subtitles. Design a verification and de-escalation protocol that both governments can accept.
+2. A coastal patrol force publishes navigational warnings only in one language. Propose a multilingual maritime safety standard that reduces accidental escalation.
+3. A ministry mandates a dominant-language-only school policy in a disputed region. Evaluate legal, developmental, and conflict risks over a 10-year horizon.
+4. A peace delegation uses machine translation during ceasefire talks. Identify the top five semantic failure points and how to harden the process.
+5. A diaspora media network amplifies atrocity claims that cannot be independently verified. Design a response that avoids censorship while slowing harm.
+6. A platform's hate-speech model under-detects abuse in a minority dialect. Build a remediation plan with metrics, timelines, and community governance.
+7. A humanitarian corridor fails because checkpoint officers and civilians use incompatible legal terminology. Propose a language protocol for aid operations.
+8. A civic textbook reform replaces one historical narrative with another partisan narrative. Draft a plural-memory curriculum framework for secondary schools.
+9. Two neighboring states dispute whether a place name should be exonym or endonym in official maps. Propose a standards-based compromise.
+10. Refugee legal forms are available only in the host state's language. Design a rights-preserving multilingual intake flow under budget constraints.
+11. A viral speech clip is mistranslated to imply a war threat. Create a rapid response pipeline for transcript authenticity across three language communities.
+12. A border AI surveillance system flags "suspicious phrases" but fails on code-switching communities. Audit the model and propose safer deployment criteria.
+13. Competing religious authorities issue conflict guidance in different scripts for the same spoken language. Propose a shared civilian-protection communication channel.
+14. A sanctions policy is interpreted as ethnic targeting because legal terms have no local equivalent. Provide a translation and outreach strategy that reduces backlash.
+15. A reconciliation commission hears testimony in mutually intelligible but politically separated language variants. Design hearing procedures that preserve dignity and precision.
+16. A state broadcaster requests "neutral wording" that removes evidence of civilian harm. Show how to preserve factual integrity without rhetorical escalation.
+17. A maritime collision investigation depends on radio transcripts in three languages with missing timestamps. Build an evidentiary reconstruction framework.
+18. A donor asks for one lingua franca in all aid contracts, excluding local operators. Design a contracting language policy that preserves accountability and inclusion.
+19. A post-conflict constitution must choose official language status across rival communities. Compare three governance models and second-order risks.
+20. A social platform must moderate propaganda in a conflict where each side treats key identity terms as non-negotiable. Design a moderation policy that is enforceable and legitimacy-aware.
+
 **Fewer examples, but deeper**. The 1B curriculum was quantity-first (saturate the small model). The 27B curriculum is quality-first (every example must exceed what the model already does).

 ---
@ -328,10 +375,10 @@ learning_rate: 5e-6    # Half of 1B rate — 27B is more sensitive
 ### Training Time Estimate

 - 1B training: ~200 iters × 13,498 examples ≈ 4-6 hours
- 27B training: ~350 iters × 3,460 examples ≈ 18-24 hours
+- 27B training: ~350 iters × 4,180 examples ≈ 22-30 hours
 - Inference per example at 27B: ~30-60 seconds
 - **Data generation (self-distill)**: 101 × 4 variants × 10 samples = 4,040 generations ≈ 48-72 hours
- **Total pipeline**: ~4-5 days
+- **Total pipeline**: ~5-6 days

 ---

--- a/pkg/lem/backend_metal.go
+++ b/pkg/lem/backend_metal.go
@ -0,0 +1,6 @@
+//go:build darwin && arm64
+
+package lem
+
+// Blank import registers the Metal backend with go-inference.
+import _ "forge.lthn.ai/core/go-mlx"
--- a/pkg/lem/config.go
+++ b/pkg/lem/config.go
@ -0,0 +1,151 @@
+package lem
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"gopkg.in/yaml.v3"
+)
+
+// AIConfig is the top-level .core/ai/ai.yaml configuration.
+type AIConfig struct {
+	Version  int            `yaml:"version"`
+	Backend  string         `yaml:"backend"`
+	Scorer   ScorerConfig   `yaml:"scorer"`
+	Generate GenerateConfig `yaml:"generate"`
+	Distill  DistillConfig  `yaml:"distill"`
+}
+
+// ScorerConfig controls quality gating.
+type ScorerConfig struct {
+	Engine           string  `yaml:"engine"`
+	MinScore         float64 `yaml:"min_score"`
+	Delta            bool    `yaml:"delta"`
+	SycophancyEcho   float64 `yaml:"sycophancy_echo"`
+	SycophancyUplift float64 `yaml:"sycophancy_uplift"`
+}
+
+// GenerateConfig holds default inference parameters.
+type GenerateConfig struct {
+	MaxTokens     int     `yaml:"max_tokens"`
+	Temperature   float64 `yaml:"temperature"`
+	TopP          float64 `yaml:"top_p"`
+	TopK          int     `yaml:"top_k"`
+	RepeatPenalty float64 `yaml:"repeat_penalty"`
+}
+
+// DistillConfig holds distillation defaults.
+type DistillConfig struct {
+	Runs     int `yaml:"runs"`
+	MinChars int `yaml:"min_chars"`
+}
+
+// ModelConfig is a .core/ai/models/{family}/{size}.yaml file.
+type ModelConfig struct {
+	Version    int            `yaml:"version"`
+	Name       string         `yaml:"name"`
+	Family     string         `yaml:"family"`
+	Parameters string         `yaml:"parameters"`
+	Format     string         `yaml:"format"`
+	Paths      ModelPaths     `yaml:"paths"`
+	Kernel     string         `yaml:"kernel"`
+	Training   string         `yaml:"training"`
+	Lessons    map[int]string `yaml:"lessons"`
+	Valid      string         `yaml:"valid"`
+	Test       string         `yaml:"test"`
+	Generate   GenerateConfig `yaml:"generate"`
+	Baselines  Baselines      `yaml:"baselines"`
+}
+
+// ModelPaths holds filesystem locations for model files.
+type ModelPaths struct {
+	Base        string `yaml:"base"`
+	Safetensors string `yaml:"safetensors"`
+}
+
+// Baselines holds scoring reference points.
+type Baselines struct {
+	NoKernel   float64 `yaml:"no_kernel"`
+	WithKernel float64 `yaml:"with_kernel"`
+	Target     float64 `yaml:"target"`
+}
+
+// ProbesConfig is a .core/ai/probes.yaml file.
+type ProbesConfig struct {
+	Version int                 `yaml:"version"`
+	Sets    map[string]ProbeSet `yaml:"sets"`
+}
+
+// ProbeSet groups related probe files.
+type ProbeSet struct {
+	Description string   `yaml:"description"`
+	Phase       *int     `yaml:"phase"`
+	Files       []string `yaml:"files"`
+}
+
+// LoadAIConfig reads .core/ai/ai.yaml from the given root directory.
+func LoadAIConfig(root string) (*AIConfig, error) {
+	path := filepath.Join(root, ".core", "ai", "ai.yaml")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read ai config: %w", err)
+	}
+	var cfg AIConfig
+	if err := yaml.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse ai config: %w", err)
+	}
+	return &cfg, nil
+}
+
+// LoadModelConfig reads .core/ai/models/{model}.yaml.
+// The model arg is a slash path like "gemma3/27b".
+func LoadModelConfig(root, model string) (*ModelConfig, error) {
+	path := filepath.Join(root, ".core", "ai", "models", model+".yaml")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read model config: %w", err)
+	}
+	var cfg ModelConfig
+	if err := yaml.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse model config: %w", err)
+	}
+	return &cfg, nil
+}
+
+// LoadProbesConfig reads .core/ai/probes.yaml.
+func LoadProbesConfig(root string) (*ProbesConfig, error) {
+	path := filepath.Join(root, ".core", "ai", "probes.yaml")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("read probes config: %w", err)
+	}
+	var cfg ProbesConfig
+	if err := yaml.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parse probes config: %w", err)
+	}
+	return &cfg, nil
+}
+
+// MergeGenerate returns a GenerateConfig with model-level overrides
+// applied on top of the global defaults. Zero values in the model
+// config are ignored (global default kept).
+func MergeGenerate(global, model GenerateConfig) GenerateConfig {
+	merged := global
+	if model.MaxTokens > 0 {
+		merged.MaxTokens = model.MaxTokens
+	}
+	if model.Temperature > 0 {
+		merged.Temperature = model.Temperature
+	}
+	if model.TopP > 0 {
+		merged.TopP = model.TopP
+	}
+	if model.TopK > 0 {
+		merged.TopK = model.TopK
+	}
+	if model.RepeatPenalty > 0 {
+		merged.RepeatPenalty = model.RepeatPenalty
+	}
+	return merged
+}
--- a/pkg/lem/distill.go
+++ b/pkg/lem/distill.go
@ -0,0 +1,317 @@
+package lem
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"forge.lthn.ai/core/go-i18n/reversal"
+	"forge.lthn.ai/core/go-inference"
+)
+
+// DistillProbe is a single input prompt for distillation.
+type DistillProbe struct {
+	ID     string `json:"id"`
+	Domain string `json:"domain,omitempty"`
+	Prompt string `json:"prompt"`
+	Source string `json:"-"`
+}
+
+// distillCandidate holds a single generation attempt with its scores.
+type distillCandidate struct {
+	Response string
+	Grammar  GrammarScore
+	Delta    DeltaScore
+	Elapsed  time.Duration
+}
+
+// RunDistill is the CLI entry point for the distill command.
+// Generates responses via native Metal inference, scores with go-i18n/reversal,
+// writes passing examples as training JSONL.
+func RunDistill(args []string) {
+	fs := flag.NewFlagSet("distill", flag.ExitOnError)
+
+	modelFlag := fs.String("model", "gemma3/27b", "Model config path (relative to .core/ai/models/)")
+	probesFlag := fs.String("probes", "", "Probe set name from probes.yaml, or path to JSON file")
+	outputFlag := fs.String("output", "", "Output JSONL path (defaults to model training dir)")
+	lessonFlag := fs.Int("lesson", -1, "Lesson number to append to (defaults to probe set phase)")
+	minScore := fs.Float64("min-score", 0, "Min grammar composite (0 = use ai.yaml default)")
+	runs := fs.Int("runs", 0, "Generations per probe (0 = use ai.yaml default)")
+	dryRun := fs.Bool("dry-run", false, "Show plan and exit without generating")
+	root := fs.String("root", ".", "Project root (for .core/ai/ config)")
+
+	if err := fs.Parse(args); err != nil {
+		log.Fatalf("parse flags: %v", err)
+	}
+
+	// Load configs.
+	aiCfg, err := LoadAIConfig(*root)
+	if err != nil {
+		log.Fatalf("load ai config: %v", err)
+	}
+
+	modelCfg, err := LoadModelConfig(*root, *modelFlag)
+	if err != nil {
+		log.Fatalf("load model config: %v", err)
+	}
+
+	genCfg := MergeGenerate(aiCfg.Generate, modelCfg.Generate)
+
+	// Apply flag overrides.
+	if *minScore == 0 {
+		*minScore = aiCfg.Scorer.MinScore
+	}
+	if *runs == 0 {
+		*runs = aiCfg.Distill.Runs
+	}
+
+	// Load probes.
+	probes, phase, err := loadDistillProbes(*root, *probesFlag)
+	if err != nil {
+		log.Fatalf("load probes: %v", err)
+	}
+	log.Printf("loaded %d probes", len(probes))
+
+	// Determine output path.
+	outputPath := *outputFlag
+	if outputPath == "" {
+		lesson := *lessonFlag
+		if lesson < 0 {
+			lesson = phase
+		}
+		lessonFile, ok := modelCfg.Lessons[lesson]
+		if !ok {
+			lessonFile = fmt.Sprintf("lesson-%d.jsonl", lesson)
+		}
+		outputPath = filepath.Join(modelCfg.Training, lessonFile)
+	}
+
+	// Load kernel.
+	kernel, err := os.ReadFile(modelCfg.Kernel)
+	if err != nil {
+		log.Fatalf("read kernel: %v", err)
+	}
+	log.Printf("kernel: %d chars from %s", len(kernel), modelCfg.Kernel)
+
+	// Dry run.
+	if *dryRun {
+		fmt.Printf("Model:    %s (%s)\n", modelCfg.Name, modelCfg.Paths.Base)
+		fmt.Printf("Backend:  %s\n", aiCfg.Backend)
+		fmt.Printf("Probes:   %d\n", len(probes))
+		fmt.Printf("Runs:     %d per probe (%d total generations)\n", *runs, len(probes)**runs)
+		fmt.Printf("Gate:     grammar v3 composite >= %.1f\n", *minScore)
+		fmt.Printf("Generate: temp=%.2f max_tokens=%d top_p=%.2f\n",
+			genCfg.Temperature, genCfg.MaxTokens, genCfg.TopP)
+		fmt.Printf("Output:   %s\n", outputPath)
+		fmt.Println()
+		for i, p := range probes {
+			if i >= 10 {
+				fmt.Printf("  ... and %d more\n", len(probes)-10)
+				break
+			}
+			prompt := p.Prompt
+			if len(prompt) > 80 {
+				prompt = prompt[:80] + "..."
+			}
+			fmt.Printf("  %s: %s\n", p.ID, prompt)
+		}
+		return
+	}
+
+	// Load model via go-inference.
+	log.Printf("loading model: %s", modelCfg.Paths.Base)
+	model, err := inference.LoadModel(modelCfg.Paths.Base)
+	if err != nil {
+		log.Fatalf("load model: %v", err)
+	}
+	defer model.Close()
+
+	info := model.Info()
+	log.Printf("model loaded: %s %d-layer", info.Architecture, info.NumLayers)
+
+	// Initialise grammar scorer.
+	tok := reversal.NewTokeniser()
+
+	// Open output for append.
+	out, err := os.OpenFile(outputPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		log.Fatalf("open output: %v", err)
+	}
+	defer out.Close()
+
+	kept := 0
+	skipped := 0
+	totalStart := time.Now()
+	ctx := context.Background()
+	kernelStr := string(kernel)
+
+	for i, probe := range probes {
+		var best *distillCandidate
+
+		for run := range *runs {
+			fmt.Fprintf(os.Stderr, "  [%d/%d] %s run %d/%d",
+				i+1, len(probes), probe.ID, run+1, *runs)
+
+			// Build chat messages.
+			messages := []inference.Message{
+				{Role: "system", Content: kernelStr},
+				{Role: "user", Content: probe.Prompt},
+			}
+
+			// Generate via native Metal inference.
+			start := time.Now()
+			var sb strings.Builder
+			for token := range model.Chat(ctx, messages,
+				inference.WithMaxTokens(genCfg.MaxTokens),
+				inference.WithTemperature(float32(genCfg.Temperature)),
+				inference.WithTopP(float32(genCfg.TopP)),
+				inference.WithTopK(genCfg.TopK),
+				inference.WithRepeatPenalty(float32(genCfg.RepeatPenalty)),
+			) {
+				sb.WriteString(token.Text)
+			}
+			if err := model.Err(); err != nil {
+				fmt.Fprintf(os.Stderr, " → ERROR: %v\n", err)
+				continue
+			}
+			response := sb.String()
+			elapsed := time.Since(start)
+
+			// Quick reject: empty/degenerate.
+			if len(strings.TrimSpace(response)) < aiCfg.Distill.MinChars {
+				fmt.Fprintf(os.Stderr, " → %d chars, EMPTY, %.1fs\n", len(response), elapsed.Seconds())
+				continue
+			}
+
+			// Score with go-i18n/reversal.
+			grammar := ScoreResponse(tok, response)
+			delta := ComputeDelta(tok, probe.Prompt, response,
+				aiCfg.Scorer.SycophancyEcho, aiCfg.Scorer.SycophancyUplift)
+
+			met := model.Metrics()
+			fmt.Fprintf(os.Stderr, " → %d chars, g=%.1f up=%+.1f echo=%.2f enr=%+.1f, %.1fs (%.0f tok/s)\n",
+				len(response), grammar.Composite,
+				delta.Uplift, delta.Echo, delta.Enrichment,
+				elapsed.Seconds(), met.DecodeTokensPerSec)
+
+			candidate := &distillCandidate{
+				Response: response,
+				Grammar:  grammar,
+				Delta:    delta,
+				Elapsed:  elapsed,
+			}
+
+			if best == nil || grammar.Composite > best.Grammar.Composite {
+				best = candidate
+			}
+		}
+
+		// Quality gate.
+		if best != nil && best.Grammar.Composite >= *minScore {
+			example := TrainingExample{
+				Messages: []ChatMessage{
+					{Role: "system", Content: kernelStr},
+					{Role: "user", Content: probe.Prompt},
+					{Role: "assistant", Content: best.Response},
+				},
+			}
+			line, _ := json.Marshal(example)
+			out.Write(append(line, '\n'))
+
+			kept++
+			fmt.Fprintf(os.Stderr, "  ✓ KEPT %s (g=%.1f, verbs=%d, nouns=%d, enr=%+.1f)\n",
+				probe.ID, best.Grammar.Composite,
+				best.Grammar.VerbDiversity, best.Grammar.NounDiversity,
+				best.Delta.Enrichment)
+		} else {
+			skipped++
+			score := 0.0
+			if best != nil {
+				score = best.Grammar.Composite
+			}
+			fmt.Fprintf(os.Stderr, "  ✗ SKIP %s (best g=%.1f < %.1f)\n",
+				probe.ID, score, *minScore)
+		}
+	}
+
+	duration := time.Since(totalStart)
+
+	fmt.Fprintf(os.Stderr, "\n=== Distillation Complete ===\n")
+	fmt.Fprintf(os.Stderr, "Model:    %s (%s)\n", modelCfg.Name, info.Architecture)
+	fmt.Fprintf(os.Stderr, "Probes:   %d\n", len(probes))
+	fmt.Fprintf(os.Stderr, "Runs:     %d per probe (%d total generations)\n", *runs, len(probes)**runs)
+	fmt.Fprintf(os.Stderr, "Scorer:   go-i18n/reversal grammar v3, gate >= %.1f\n", *minScore)
+	fmt.Fprintf(os.Stderr, "Kept:     %d\n", kept)
+	fmt.Fprintf(os.Stderr, "Skipped:  %d\n", skipped)
+	if kept+skipped > 0 {
+		fmt.Fprintf(os.Stderr, "Pass rate: %.0f%%\n", float64(kept)/float64(kept+skipped)*100)
+	}
+	fmt.Fprintf(os.Stderr, "Output:   %s\n", outputPath)
+	fmt.Fprintf(os.Stderr, "Duration: %.0fs (%.1fm)\n", duration.Seconds(), duration.Minutes())
+}
+
+// loadDistillProbes loads probes from a named set or a file path.
+// Returns the probes and the default phase number for output routing.
+func loadDistillProbes(root, spec string) ([]DistillProbe, int, error) {
+	// Try as a probe set name first.
+	probesCfg, cfgErr := LoadProbesConfig(root)
+	if cfgErr == nil {
+		if set, ok := probesCfg.Sets[spec]; ok {
+			phase := 0
+			if set.Phase != nil {
+				phase = *set.Phase
+			}
+			var probes []DistillProbe
+			for _, f := range set.Files {
+				// Files are relative to the training root.
+				ps, err := readProbeFile(filepath.Join("/Volumes/Data/lem/training/lem", f))
+				if err != nil {
+					return nil, 0, fmt.Errorf("read %s: %w", f, err)
+				}
+				probes = append(probes, ps...)
+			}
+			return probes, phase, nil
+		}
+	}
+
+	// Fall back to direct file path.
+	probes, err := readProbeFile(spec)
+	if err != nil {
+		return nil, 0, err
+	}
+	return probes, 0, nil
+}
+
+// readProbeFile reads probes from a JSON array file.
+func readProbeFile(path string) ([]DistillProbe, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+
+	var raw []struct {
+		ID     string `json:"id"`
+		Domain string `json:"domain"`
+		Prompt string `json:"prompt"`
+	}
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return nil, fmt.Errorf("parse %s: %w", filepath.Base(path), err)
+	}
+
+	probes := make([]DistillProbe, len(raw))
+	for i, r := range raw {
+		probes[i] = DistillProbe{
+			ID:     r.ID,
+			Domain: r.Domain,
+			Prompt: r.Prompt,
+			Source: filepath.Base(path),
+		}
+	}
+	return probes, nil
+}
--- a/pkg/lem/grammar.go
+++ b/pkg/lem/grammar.go
@ -0,0 +1,110 @@
+package lem
+
+import (
+	"math"
+
+	"forge.lthn.ai/core/go-i18n/reversal"
+)
+
+// GrammarScore holds grammar-derived quality signals from a GrammarImprint.
+type GrammarScore struct {
+	VocabRichness float64 `json:"vocab_richness"`
+	TenseEntropy  float64 `json:"tense_entropy"`
+	QuestionRatio float64 `json:"question_ratio"`
+	DomainDepth   int     `json:"domain_depth"`
+	VerbDiversity int     `json:"verb_diversity"`
+	NounDiversity int     `json:"noun_diversity"`
+	Composite     float64 `json:"composite"`
+}
+
+// DeltaScore holds input-vs-output comparison signals.
+type DeltaScore struct {
+	InputComposite  float64 `json:"input_composite"`
+	OutputComposite float64 `json:"output_composite"`
+	Uplift          float64 `json:"uplift"`
+	Echo            float64 `json:"echo"`
+	Enrichment      float64 `json:"enrichment"`
+	Sycophantic     bool    `json:"sycophantic"`
+}
+
+// ComputeGrammarScore derives quality signals from a GrammarImprint.
+//
+// Composite is a weighted combination of normalised signals (0-100):
+//   - Tense diversity (0.25): varied tense = narrative depth
+//   - Vocab richness (0.25): diverse vocabulary = engagement
+//   - Question ratio (0.20): questioning = critical thinking
+//   - Verb diversity (0.15): action variety = specificity
+//   - Noun diversity (0.15): concept breadth = thoroughness
+func ComputeGrammarScore(imp reversal.GrammarImprint) GrammarScore {
+	gs := GrammarScore{
+		VerbDiversity: imp.UniqueVerbs,
+		NounDiversity: imp.UniqueNouns,
+	}
+
+	if imp.TokenCount > 0 {
+		gs.VocabRichness = float64(imp.UniqueVerbs+imp.UniqueNouns) / float64(imp.TokenCount)
+	}
+
+	gs.TenseEntropy = shannonEntropy(imp.TenseDistribution)
+	gs.QuestionRatio = imp.PunctuationPattern["question"]
+
+	for _, v := range imp.DomainVocabulary {
+		gs.DomainDepth += v
+	}
+
+	tenseNorm := gs.TenseEntropy / 1.585 // max entropy for 3 tenses = log2(3)
+	vocabNorm := math.Min(gs.VocabRichness*10, 1.0)
+	questionNorm := math.Min(gs.QuestionRatio*5, 1.0)
+	verbNorm := math.Min(float64(gs.VerbDiversity)/30.0, 1.0)
+	nounNorm := math.Min(float64(gs.NounDiversity)/40.0, 1.0)
+
+	gs.Composite = 0.25*tenseNorm +
+		0.25*vocabNorm +
+		0.20*questionNorm +
+		0.15*verbNorm +
+		0.15*nounNorm
+
+	gs.Composite *= 100.0
+
+	return gs
+}
+
+// ComputeDelta scores both prompt and response, computing enrichment signals.
+func ComputeDelta(tok *reversal.Tokeniser, prompt, response string, echoThreshold, upliftThreshold float64) DeltaScore {
+	inTokens := tok.Tokenise(prompt)
+	inImprint := reversal.NewImprint(inTokens)
+	inGrammar := ComputeGrammarScore(inImprint)
+
+	outTokens := tok.Tokenise(response)
+	outImprint := reversal.NewImprint(outTokens)
+	outGrammar := ComputeGrammarScore(outImprint)
+
+	echo := inImprint.Similar(outImprint)
+	uplift := outGrammar.Composite - inGrammar.Composite
+
+	return DeltaScore{
+		InputComposite:  inGrammar.Composite,
+		OutputComposite: outGrammar.Composite,
+		Uplift:          uplift,
+		Echo:            echo,
+		Enrichment:      uplift * (1.0 - echo),
+		Sycophantic:     echo > echoThreshold && uplift < upliftThreshold,
+	}
+}
+
+// ScoreResponse scores a single response text and returns the grammar score.
+func ScoreResponse(tok *reversal.Tokeniser, text string) GrammarScore {
+	tokens := tok.Tokenise(text)
+	imprint := reversal.NewImprint(tokens)
+	return ComputeGrammarScore(imprint)
+}
+
+func shannonEntropy(dist map[string]float64) float64 {
+	var h float64
+	for _, p := range dist {
+		if p > 0 {
+			h -= p * math.Log2(p)
+		}
+	}
+	return h
+}