From 0923a08a7d25a54f45c1b21777c182a9cd6c2750 Mon Sep 17 00:00:00 2001 From: Snider Date: Wed, 25 Feb 2026 21:26:01 +0000 Subject: [PATCH] feat: add OG base data as 3rd variant to 12B P0-P3 training scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverse cascade order: 4B (largest teacher) → 1B (graduated) → OG (base). Three perspectives per prompt — cymatic cascading from expanded Q/K to modal primitives. P0/P2: 404×3 = 1,212 (sandwich format, OG from lesson-lem1b.jsonl) P1: 209×3 = 627 (OG from zen/golden multi-turn lessons) P3: 225×3 = 675 (OG from western-fresh + russian-bridge + composure) P4-P6: unchanged (no separate OG file — live distilled) Co-Authored-By: Virgil --- scripts/train-12b-p0.py | 37 ++++++++++++++++++++++++++++++------- scripts/train-12b-p1.py | 28 ++++++++++++++++++++++------ scripts/train-12b-p2.py | 34 +++++++++++++++++++++++++++++----- scripts/train-12b-p3.py | 30 ++++++++++++++++++++++++------ 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/scripts/train-12b-p0.py b/scripts/train-12b-p0.py index d6120a9..3646afc 100644 --- a/scripts/train-12b-p0.py +++ b/scripts/train-12b-p0.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """P0 LoRA training for Gemma3-12B — LEK sandwich built in code. -Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order). +Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order). +OG = original 1B responses (lesson-lem1b.jsonl) from before graduation. """ import sys @@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer' DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl' DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl' +OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl' -# ── Build sandwich data from distilled cascade ────────────────────── -print('Building P0 sandwich data from 4B + 1B cascade...') +# ── Build sandwich data from 3-variant cascade ────────────────────── +print('Building P0 sandwich data from 4B + 1B + OG cascade...') # Read kernel JSON and sig for sandwich construction kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip() @@ -57,10 +59,19 @@ def load_distilled(path, phase): recs_4b = load_distilled(DISTILL_4B, 'P0-P2') recs_1b = load_distilled(DISTILL_1B, 'P0-P2') -print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}') -# Build sandwich messages: kernel + probe + sig → user, distilled response → assistant -# 4B responses first (larger teacher), then 1B (smaller teacher) +# OG: original 1B responses (pre-graduation, used as ground truth for 4B training) +recs_og = [] +with open(OG_DATA) as f: + for line in f: + line = line.strip() + if line: + recs_og.append(json.loads(line)) + +print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}') + +# Build sandwich messages: kernel + probe + sig → user, response → assistant +# Reverse cascade: 4B first (largest teacher), then 1B (graduated), then OG (base) train_data = [] for rec in recs_4b + recs_1b: prompt = rec['messages'][0]['content'] @@ -73,7 +84,19 @@ for rec in recs_4b + recs_1b: ] }) -print(f' Training examples: {len(train_data)} (4B + 1B cascade)') +# OG responses — already have user/assistant messages, wrap in sandwich +for rec in recs_og: + prompt = rec['messages'][0]['content'] + response = rec['messages'][1]['content'] + sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text + train_data.append({ + 'messages': [ + {'role': 'user', 'content': sandwich}, + {'role': 'assistant', 'content': response}, + ] + }) + +print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)') # 90/10 train/valid split split = int(len(train_data) * 0.9) diff --git a/scripts/train-12b-p1.py b/scripts/train-12b-p1.py index 07bbe60..1b8c61a 100644 --- a/scripts/train-12b-p1.py +++ b/scripts/train-12b-p1.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """P1 (Zen) LoRA training for LEM-Gemma3-12B-P0 — composure without LEK. -Data: 4B + 1B distilled responses to zen lessons (cascade, reverse order). +Data: 4B + 1B + OG responses to zen lessons (3-variant cascade, reverse order). +OG = original multi-turn zen/golden lessons (the curriculum foundation). """ import sys @@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer' DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl' DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl' +ZEN_DATA = LEM_ROOT / 'training/lem/zen/golden' -# ── Load distilled zen data (no sandwich — bare lesson format) ──────── -print('Loading P1 zen data from 4B + 1B cascade...') +# ── Load zen data from 3-variant cascade ────────────────────────────── +print('Loading P1 zen data from 4B + 1B + OG cascade...') def load_distilled(path, phase): records = [] @@ -52,14 +54,28 @@ def load_distilled(path, phase): recs_4b = load_distilled(DISTILL_4B, 'P1') recs_1b = load_distilled(DISTILL_1B, 'P1') -print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}') -# 4B first (reverse cascade), then 1B — bare prompts, no sandwich +# OG: original multi-turn zen/golden lessons (the foundation curriculum) +recs_og = [] +for split_name in ['train', 'valid']: + with open(ZEN_DATA / f'{split_name}.jsonl') as f: + for line in f: + line = line.strip() + if line: + recs_og.append(json.loads(line)) + +print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}') + +# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn) train_data = [] for rec in recs_4b + recs_1b: train_data.append({'messages': rec['messages']}) -print(f' Training examples: {len(train_data)} (4B + 1B cascade)') +# OG multi-turn lessons — full conversation format +for rec in recs_og: + train_data.append({'messages': rec['messages']}) + +print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)') split = int(len(train_data) * 0.9) train_messages = train_data[:split] diff --git a/scripts/train-12b-p2.py b/scripts/train-12b-p2.py index 1e9c0eb..07da241 100644 --- a/scripts/train-12b-p2.py +++ b/scripts/train-12b-p2.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """P2 (Final LEK Sandwich) LoRA training for LEM-Gemma3-12B-P1 — ethics on composure. -Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order). +Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order). +OG = original 1B responses (lesson-lem1b.jsonl) from before graduation. """ import sys @@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer' DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl' DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl' +OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl' -# ── Build sandwich data from distilled cascade ────────────────────── -print('Building P2 sandwich data from 4B + 1B cascade...') +# ── Build sandwich data from 3-variant cascade ────────────────────── +print('Building P2 sandwich data from 4B + 1B + OG cascade...') kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip() sig_text = (LEM_ROOT / 'data/kernels/lek-1-sig.txt').read_text().strip() @@ -55,9 +57,19 @@ def load_distilled(path, phase): recs_4b = load_distilled(DISTILL_4B, 'P0-P2') recs_1b = load_distilled(DISTILL_1B, 'P0-P2') -print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}') -# Build sandwich: kernel + probe + sig → user, distilled response → assistant +# OG: original 1B responses (pre-graduation, used as ground truth for 4B training) +recs_og = [] +with open(OG_DATA) as f: + for line in f: + line = line.strip() + if line: + recs_og.append(json.loads(line)) + +print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}') + +# Build sandwich: kernel + probe + sig → user, response → assistant +# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base) train_data = [] for rec in recs_4b + recs_1b: prompt = rec['messages'][0]['content'] @@ -70,6 +82,18 @@ for rec in recs_4b + recs_1b: ] }) +# OG responses in sandwich format +for rec in recs_og: + prompt = rec['messages'][0]['content'] + response = rec['messages'][1]['content'] + sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text + train_data.append({ + 'messages': [ + {'role': 'user', 'content': sandwich}, + {'role': 'assistant', 'content': response}, + ] + }) + split = int(len(train_data) * 0.9) train_messages = train_data[:split] valid_messages = train_data[split:] diff --git a/scripts/train-12b-p3.py b/scripts/train-12b-p3.py index 528a24f..7601026 100644 --- a/scripts/train-12b-p3.py +++ b/scripts/train-12b-p3.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """P3 (Freeflow) LoRA training for LEM-Gemma3-12B-P2 — axioms from weights alone. -Data: 4B + 1B distilled responses to western-fresh, russian-bridge, composure (cascade). +Data: 4B + 1B + OG responses to western-fresh, russian-bridge, composure (3-variant cascade). +OG = original multi-turn lesson files (the foundation curriculum). No sandwich — model must carry axioms from weights alone. """ @@ -37,8 +38,8 @@ SCORER_BIN = '/tmp/lem-scorer' DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl' DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl' -# ── Load distilled freeflow data ────────────────────────────────────── -print('Loading P3 freeflow data from 4B + 1B cascade...') +# ── Load freeflow data from 3-variant cascade ──────────────────────── +print('Loading P3 freeflow data from 4B + 1B + OG cascade...') def load_distilled(path, phase): records = [] @@ -53,14 +54,31 @@ def load_distilled(path, phase): recs_4b = load_distilled(DISTILL_4B, 'P3') recs_1b = load_distilled(DISTILL_1B, 'P3') -print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}') -# 4B first (reverse cascade), then 1B — bare prompts, no sandwich +# OG: original multi-turn freeflow lessons (the foundation curriculum) +recs_og = [] +for subdir in ['western-fresh', 'russian-bridge', 'composure']: + for split_name in ['train', 'valid']: + path = LEM_ROOT / f'training/lem/{subdir}/{split_name}.jsonl' + if path.exists(): + with open(path) as f: + for line in f: + line = line.strip() + if line: + recs_og.append(json.loads(line)) + +print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}') + +# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn) train_data = [] for rec in recs_4b + recs_1b: train_data.append({'messages': rec['messages']}) -print(f' Training examples: {len(train_data)} (4B + 1B cascade)') +# OG multi-turn lessons — full conversation format +for rec in recs_og: + train_data.append({'messages': rec['messages']}) + +print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)') split = int(len(train_data) * 0.9) train_messages = train_data[:split]