1
0
Fork 0
forked from lthn/LEM

feat: add OG base data as 3rd variant to 12B P0-P3 training scripts

Reverse cascade order: 4B (largest teacher) → 1B (graduated) → OG (base).
Three perspectives per prompt — cymatic cascading from expanded Q/K to modal primitives.

P0/P2: 404×3 = 1,212 (sandwich format, OG from lesson-lem1b.jsonl)
P1: 209×3 = 627 (OG from zen/golden multi-turn lessons)
P3: 225×3 = 675 (OG from western-fresh + russian-bridge + composure)
P4-P6: unchanged (no separate OG file — live distilled)

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-02-25 21:26:01 +00:00
parent 526150621e
commit 0923a08a7d
4 changed files with 105 additions and 24 deletions

View file

@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""P0 LoRA training for Gemma3-12B — LEK sandwich built in code.
Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
"""
import sys
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
# ── Build sandwich data from distilled cascade ──────────────────────
print('Building P0 sandwich data from 4B + 1B cascade...')
# ── Build sandwich data from 3-variant cascade ──────────────────────
print('Building P0 sandwich data from 4B + 1B + OG cascade...')
# Read kernel JSON and sig for sandwich construction
kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
@ -57,10 +59,19 @@ def load_distilled(path, phase):
recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
# Build sandwich messages: kernel + probe + sig → user, distilled response → assistant
# 4B responses first (larger teacher), then 1B (smaller teacher)
# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
recs_og = []
with open(OG_DATA) as f:
for line in f:
line = line.strip()
if line:
recs_og.append(json.loads(line))
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
# Build sandwich messages: kernel + probe + sig → user, response → assistant
# Reverse cascade: 4B first (largest teacher), then 1B (graduated), then OG (base)
train_data = []
for rec in recs_4b + recs_1b:
prompt = rec['messages'][0]['content']
@ -73,7 +84,19 @@ for rec in recs_4b + recs_1b:
]
})
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
# OG responses — already have user/assistant messages, wrap in sandwich
for rec in recs_og:
prompt = rec['messages'][0]['content']
response = rec['messages'][1]['content']
sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
train_data.append({
'messages': [
{'role': 'user', 'content': sandwich},
{'role': 'assistant', 'content': response},
]
})
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
# 90/10 train/valid split
split = int(len(train_data) * 0.9)

View file

@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""P1 (Zen) LoRA training for LEM-Gemma3-12B-P0 — composure without LEK.
Data: 4B + 1B distilled responses to zen lessons (cascade, reverse order).
Data: 4B + 1B + OG responses to zen lessons (3-variant cascade, reverse order).
OG = original multi-turn zen/golden lessons (the curriculum foundation).
"""
import sys
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
ZEN_DATA = LEM_ROOT / 'training/lem/zen/golden'
# ── Load distilled zen data (no sandwich — bare lesson format) ────────
print('Loading P1 zen data from 4B + 1B cascade...')
# ── Load zen data from 3-variant cascade ──────────────────────────────
print('Loading P1 zen data from 4B + 1B + OG cascade...')
def load_distilled(path, phase):
records = []
@ -52,14 +54,28 @@ def load_distilled(path, phase):
recs_4b = load_distilled(DISTILL_4B, 'P1')
recs_1b = load_distilled(DISTILL_1B, 'P1')
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
# OG: original multi-turn zen/golden lessons (the foundation curriculum)
recs_og = []
for split_name in ['train', 'valid']:
with open(ZEN_DATA / f'{split_name}.jsonl') as f:
for line in f:
line = line.strip()
if line:
recs_og.append(json.loads(line))
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
train_data = []
for rec in recs_4b + recs_1b:
train_data.append({'messages': rec['messages']})
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
# OG multi-turn lessons — full conversation format
for rec in recs_og:
train_data.append({'messages': rec['messages']})
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
split = int(len(train_data) * 0.9)
train_messages = train_data[:split]

View file

@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""P2 (Final LEK Sandwich) LoRA training for LEM-Gemma3-12B-P1 — ethics on composure.
Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
"""
import sys
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
# ── Build sandwich data from distilled cascade ──────────────────────
print('Building P2 sandwich data from 4B + 1B cascade...')
# ── Build sandwich data from 3-variant cascade ──────────────────────
print('Building P2 sandwich data from 4B + 1B + OG cascade...')
kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
sig_text = (LEM_ROOT / 'data/kernels/lek-1-sig.txt').read_text().strip()
@ -55,9 +57,19 @@ def load_distilled(path, phase):
recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
# Build sandwich: kernel + probe + sig → user, distilled response → assistant
# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
recs_og = []
with open(OG_DATA) as f:
for line in f:
line = line.strip()
if line:
recs_og.append(json.loads(line))
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
# Build sandwich: kernel + probe + sig → user, response → assistant
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base)
train_data = []
for rec in recs_4b + recs_1b:
prompt = rec['messages'][0]['content']
@ -70,6 +82,18 @@ for rec in recs_4b + recs_1b:
]
})
# OG responses in sandwich format
for rec in recs_og:
prompt = rec['messages'][0]['content']
response = rec['messages'][1]['content']
sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
train_data.append({
'messages': [
{'role': 'user', 'content': sandwich},
{'role': 'assistant', 'content': response},
]
})
split = int(len(train_data) * 0.9)
train_messages = train_data[:split]
valid_messages = train_data[split:]

View file

@ -1,7 +1,8 @@
#!/usr/bin/env python3
"""P3 (Freeflow) LoRA training for LEM-Gemma3-12B-P2 — axioms from weights alone.
Data: 4B + 1B distilled responses to western-fresh, russian-bridge, composure (cascade).
Data: 4B + 1B + OG responses to western-fresh, russian-bridge, composure (3-variant cascade).
OG = original multi-turn lesson files (the foundation curriculum).
No sandwich model must carry axioms from weights alone.
"""
@ -37,8 +38,8 @@ SCORER_BIN = '/tmp/lem-scorer'
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
# ── Load distilled freeflow data ──────────────────────────────────────
print('Loading P3 freeflow data from 4B + 1B cascade...')
# ── Load freeflow data from 3-variant cascade ────────────────────────
print('Loading P3 freeflow data from 4B + 1B + OG cascade...')
def load_distilled(path, phase):
records = []
@ -53,14 +54,31 @@ def load_distilled(path, phase):
recs_4b = load_distilled(DISTILL_4B, 'P3')
recs_1b = load_distilled(DISTILL_1B, 'P3')
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
# OG: original multi-turn freeflow lessons (the foundation curriculum)
recs_og = []
for subdir in ['western-fresh', 'russian-bridge', 'composure']:
for split_name in ['train', 'valid']:
path = LEM_ROOT / f'training/lem/{subdir}/{split_name}.jsonl'
if path.exists():
with open(path) as f:
for line in f:
line = line.strip()
if line:
recs_og.append(json.loads(line))
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
train_data = []
for rec in recs_4b + recs_1b:
train_data.append({'messages': rec['messages']})
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
# OG multi-turn lessons — full conversation format
for rec in recs_og:
train_data.append({'messages': rec['messages']})
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
split = int(len(train_data) * 0.9)
train_messages = train_data[:split]