From 0923a08a7d25a54f45c1b21777c182a9cd6c2750 Mon Sep 17 00:00:00 2001
From: Snider <snider@host.uk.com>
Date: Wed, 25 Feb 2026 21:26:01 +0000
Subject: [PATCH] feat: add OG base data as 3rd variant to 12B P0-P3 training
 scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reverse cascade order: 4B (largest teacher) → 1B (graduated) → OG (base).
Three perspectives per prompt — cymatic cascading from expanded Q/K to modal primitives.

P0/P2: 404×3 = 1,212 (sandwich format, OG from lesson-lem1b.jsonl)
P1: 209×3 = 627 (OG from zen/golden multi-turn lessons)
P3: 225×3 = 675 (OG from western-fresh + russian-bridge + composure)
P4-P6: unchanged (no separate OG file — live distilled)

Co-Authored-By: Virgil <virgil@lethean.io>
---
 scripts/train-12b-p0.py | 37 ++++++++++++++++++++++++++++++-------
 scripts/train-12b-p1.py | 28 ++++++++++++++++++++++------
 scripts/train-12b-p2.py | 34 +++++++++++++++++++++++++++++-----
 scripts/train-12b-p3.py | 30 ++++++++++++++++++++++++------
 4 files changed, 105 insertions(+), 24 deletions(-)

diff --git a/scripts/train-12b-p0.py b/scripts/train-12b-p0.py
index d6120a9..3646afc 100644
--- a/scripts/train-12b-p0.py
+++ b/scripts/train-12b-p0.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """P0 LoRA training for Gemma3-12B — LEK sandwich built in code.
 
-Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
+Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
+OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
 """
 
 import sys
@@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
 
 DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
 DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
+OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
 
-# ── Build sandwich data from distilled cascade ──────────────────────
-print('Building P0 sandwich data from 4B + 1B cascade...')
+# ── Build sandwich data from 3-variant cascade ──────────────────────
+print('Building P0 sandwich data from 4B + 1B + OG cascade...')
 
 # Read kernel JSON and sig for sandwich construction
 kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
@@ -57,10 +59,19 @@ def load_distilled(path, phase):
 
 recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
 recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
-print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
 
-# Build sandwich messages: kernel + probe + sig → user, distilled response → assistant
-# 4B responses first (larger teacher), then 1B (smaller teacher)
+# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
+recs_og = []
+with open(OG_DATA) as f:
+    for line in f:
+        line = line.strip()
+        if line:
+            recs_og.append(json.loads(line))
+
+print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
+
+# Build sandwich messages: kernel + probe + sig → user, response → assistant
+# Reverse cascade: 4B first (largest teacher), then 1B (graduated), then OG (base)
 train_data = []
 for rec in recs_4b + recs_1b:
     prompt = rec['messages'][0]['content']
@@ -73,7 +84,19 @@ for rec in recs_4b + recs_1b:
         ]
     })
 
-print(f'  Training examples: {len(train_data)} (4B + 1B cascade)')
+# OG responses — already have user/assistant messages, wrap in sandwich
+for rec in recs_og:
+    prompt = rec['messages'][0]['content']
+    response = rec['messages'][1]['content']
+    sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
+    train_data.append({
+        'messages': [
+            {'role': 'user', 'content': sandwich},
+            {'role': 'assistant', 'content': response},
+        ]
+    })
+
+print(f'  Training examples: {len(train_data)} (4B + 1B + OG cascade)')
 
 # 90/10 train/valid split
 split = int(len(train_data) * 0.9)
diff --git a/scripts/train-12b-p1.py b/scripts/train-12b-p1.py
index 07bbe60..1b8c61a 100644
--- a/scripts/train-12b-p1.py
+++ b/scripts/train-12b-p1.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """P1 (Zen) LoRA training for LEM-Gemma3-12B-P0 — composure without LEK.
 
-Data: 4B + 1B distilled responses to zen lessons (cascade, reverse order).
+Data: 4B + 1B + OG responses to zen lessons (3-variant cascade, reverse order).
+OG = original multi-turn zen/golden lessons (the curriculum foundation).
 """
 
 import sys
@@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
 
 DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
 DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
+ZEN_DATA = LEM_ROOT / 'training/lem/zen/golden'
 
-# ── Load distilled zen data (no sandwich — bare lesson format) ────────
-print('Loading P1 zen data from 4B + 1B cascade...')
+# ── Load zen data from 3-variant cascade ──────────────────────────────
+print('Loading P1 zen data from 4B + 1B + OG cascade...')
 
 def load_distilled(path, phase):
     records = []
@@ -52,14 +54,28 @@ def load_distilled(path, phase):
 
 recs_4b = load_distilled(DISTILL_4B, 'P1')
 recs_1b = load_distilled(DISTILL_1B, 'P1')
-print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
 
-# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
+# OG: original multi-turn zen/golden lessons (the foundation curriculum)
+recs_og = []
+for split_name in ['train', 'valid']:
+    with open(ZEN_DATA / f'{split_name}.jsonl') as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                recs_og.append(json.loads(line))
+
+print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
+
+# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
 train_data = []
 for rec in recs_4b + recs_1b:
     train_data.append({'messages': rec['messages']})
 
-print(f'  Training examples: {len(train_data)} (4B + 1B cascade)')
+# OG multi-turn lessons — full conversation format
+for rec in recs_og:
+    train_data.append({'messages': rec['messages']})
+
+print(f'  Training examples: {len(train_data)} (4B + 1B + OG cascade)')
 
 split = int(len(train_data) * 0.9)
 train_messages = train_data[:split]
diff --git a/scripts/train-12b-p2.py b/scripts/train-12b-p2.py
index 1e9c0eb..07da241 100644
--- a/scripts/train-12b-p2.py
+++ b/scripts/train-12b-p2.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """P2 (Final LEK Sandwich) LoRA training for LEM-Gemma3-12B-P1 — ethics on composure.
 
-Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
+Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
+OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
 """
 
 import sys
@@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
 
 DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
 DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
+OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
 
-# ── Build sandwich data from distilled cascade ──────────────────────
-print('Building P2 sandwich data from 4B + 1B cascade...')
+# ── Build sandwich data from 3-variant cascade ──────────────────────
+print('Building P2 sandwich data from 4B + 1B + OG cascade...')
 
 kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
 sig_text = (LEM_ROOT / 'data/kernels/lek-1-sig.txt').read_text().strip()
@@ -55,9 +57,19 @@ def load_distilled(path, phase):
 
 recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
 recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
-print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
 
-# Build sandwich: kernel + probe + sig → user, distilled response → assistant
+# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
+recs_og = []
+with open(OG_DATA) as f:
+    for line in f:
+        line = line.strip()
+        if line:
+            recs_og.append(json.loads(line))
+
+print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
+
+# Build sandwich: kernel + probe + sig → user, response → assistant
+# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base)
 train_data = []
 for rec in recs_4b + recs_1b:
     prompt = rec['messages'][0]['content']
@@ -70,6 +82,18 @@ for rec in recs_4b + recs_1b:
         ]
     })
 
+# OG responses in sandwich format
+for rec in recs_og:
+    prompt = rec['messages'][0]['content']
+    response = rec['messages'][1]['content']
+    sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
+    train_data.append({
+        'messages': [
+            {'role': 'user', 'content': sandwich},
+            {'role': 'assistant', 'content': response},
+        ]
+    })
+
 split = int(len(train_data) * 0.9)
 train_messages = train_data[:split]
 valid_messages = train_data[split:]
diff --git a/scripts/train-12b-p3.py b/scripts/train-12b-p3.py
index 528a24f..7601026 100644
--- a/scripts/train-12b-p3.py
+++ b/scripts/train-12b-p3.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 """P3 (Freeflow) LoRA training for LEM-Gemma3-12B-P2 — axioms from weights alone.
 
-Data: 4B + 1B distilled responses to western-fresh, russian-bridge, composure (cascade).
+Data: 4B + 1B + OG responses to western-fresh, russian-bridge, composure (3-variant cascade).
+OG = original multi-turn lesson files (the foundation curriculum).
 No sandwich — model must carry axioms from weights alone.
 """
 
@@ -37,8 +38,8 @@ SCORER_BIN = '/tmp/lem-scorer'
 DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
 DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
 
-# ── Load distilled freeflow data ──────────────────────────────────────
-print('Loading P3 freeflow data from 4B + 1B cascade...')
+# ── Load freeflow data from 3-variant cascade ────────────────────────
+print('Loading P3 freeflow data from 4B + 1B + OG cascade...')
 
 def load_distilled(path, phase):
     records = []
@@ -53,14 +54,31 @@ def load_distilled(path, phase):
 
 recs_4b = load_distilled(DISTILL_4B, 'P3')
 recs_1b = load_distilled(DISTILL_1B, 'P3')
-print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
 
-# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
+# OG: original multi-turn freeflow lessons (the foundation curriculum)
+recs_og = []
+for subdir in ['western-fresh', 'russian-bridge', 'composure']:
+    for split_name in ['train', 'valid']:
+        path = LEM_ROOT / f'training/lem/{subdir}/{split_name}.jsonl'
+        if path.exists():
+            with open(path) as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        recs_og.append(json.loads(line))
+
+print(f'  4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
+
+# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
 train_data = []
 for rec in recs_4b + recs_1b:
     train_data.append({'messages': rec['messages']})
 
-print(f'  Training examples: {len(train_data)} (4B + 1B cascade)')
+# OG multi-turn lessons — full conversation format
+for rec in recs_og:
+    train_data.append({'messages': rec['messages']})
+
+print(f'  Training examples: {len(train_data)} (4B + 1B + OG cascade)')
 
 split = int(len(train_data) * 0.9)
 train_messages = train_data[:split]