feat: add OG base data as 3rd variant to 12B P0-P3 training scripts
Reverse cascade order: 4B (largest teacher) → 1B (graduated) → OG (base). Three perspectives per prompt — cymatic cascading from expanded Q/K to modal primitives. P0/P2: 404×3 = 1,212 (sandwich format, OG from lesson-lem1b.jsonl) P1: 209×3 = 627 (OG from zen/golden multi-turn lessons) P3: 225×3 = 675 (OG from western-fresh + russian-bridge + composure) P4-P6: unchanged (no separate OG file — live distilled) Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
526150621e
commit
0923a08a7d
4 changed files with 105 additions and 24 deletions
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
"""P0 LoRA training for Gemma3-12B — LEK sandwich built in code.
|
||||
|
||||
Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
|
||||
Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
|
||||
OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
|
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
|
|||
|
||||
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
|
||||
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
|
||||
OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
|
||||
|
||||
# ── Build sandwich data from distilled cascade ──────────────────────
|
||||
print('Building P0 sandwich data from 4B + 1B cascade...')
|
||||
# ── Build sandwich data from 3-variant cascade ──────────────────────
|
||||
print('Building P0 sandwich data from 4B + 1B + OG cascade...')
|
||||
|
||||
# Read kernel JSON and sig for sandwich construction
|
||||
kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
|
||||
|
|
@ -57,10 +59,19 @@ def load_distilled(path, phase):
|
|||
|
||||
recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
|
||||
recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
|
||||
|
||||
# Build sandwich messages: kernel + probe + sig → user, distilled response → assistant
|
||||
# 4B responses first (larger teacher), then 1B (smaller teacher)
|
||||
# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
|
||||
recs_og = []
|
||||
with open(OG_DATA) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
recs_og.append(json.loads(line))
|
||||
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
|
||||
|
||||
# Build sandwich messages: kernel + probe + sig → user, response → assistant
|
||||
# Reverse cascade: 4B first (largest teacher), then 1B (graduated), then OG (base)
|
||||
train_data = []
|
||||
for rec in recs_4b + recs_1b:
|
||||
prompt = rec['messages'][0]['content']
|
||||
|
|
@ -73,7 +84,19 @@ for rec in recs_4b + recs_1b:
|
|||
]
|
||||
})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
|
||||
# OG responses — already have user/assistant messages, wrap in sandwich
|
||||
for rec in recs_og:
|
||||
prompt = rec['messages'][0]['content']
|
||||
response = rec['messages'][1]['content']
|
||||
sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
|
||||
train_data.append({
|
||||
'messages': [
|
||||
{'role': 'user', 'content': sandwich},
|
||||
{'role': 'assistant', 'content': response},
|
||||
]
|
||||
})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
|
||||
|
||||
# 90/10 train/valid split
|
||||
split = int(len(train_data) * 0.9)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
"""P1 (Zen) LoRA training for LEM-Gemma3-12B-P0 — composure without LEK.
|
||||
|
||||
Data: 4B + 1B distilled responses to zen lessons (cascade, reverse order).
|
||||
Data: 4B + 1B + OG responses to zen lessons (3-variant cascade, reverse order).
|
||||
OG = original multi-turn zen/golden lessons (the curriculum foundation).
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
|
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
|
|||
|
||||
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
|
||||
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
|
||||
ZEN_DATA = LEM_ROOT / 'training/lem/zen/golden'
|
||||
|
||||
# ── Load distilled zen data (no sandwich — bare lesson format) ────────
|
||||
print('Loading P1 zen data from 4B + 1B cascade...')
|
||||
# ── Load zen data from 3-variant cascade ──────────────────────────────
|
||||
print('Loading P1 zen data from 4B + 1B + OG cascade...')
|
||||
|
||||
def load_distilled(path, phase):
|
||||
records = []
|
||||
|
|
@ -52,14 +54,28 @@ def load_distilled(path, phase):
|
|||
|
||||
recs_4b = load_distilled(DISTILL_4B, 'P1')
|
||||
recs_1b = load_distilled(DISTILL_1B, 'P1')
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
|
||||
|
||||
# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
|
||||
# OG: original multi-turn zen/golden lessons (the foundation curriculum)
|
||||
recs_og = []
|
||||
for split_name in ['train', 'valid']:
|
||||
with open(ZEN_DATA / f'{split_name}.jsonl') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
recs_og.append(json.loads(line))
|
||||
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
|
||||
|
||||
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
|
||||
train_data = []
|
||||
for rec in recs_4b + recs_1b:
|
||||
train_data.append({'messages': rec['messages']})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
|
||||
# OG multi-turn lessons — full conversation format
|
||||
for rec in recs_og:
|
||||
train_data.append({'messages': rec['messages']})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
|
||||
|
||||
split = int(len(train_data) * 0.9)
|
||||
train_messages = train_data[:split]
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
"""P2 (Final LEK Sandwich) LoRA training for LEM-Gemma3-12B-P1 — ethics on composure.
|
||||
|
||||
Data: 4B + 1B distilled responses to ethics probes (cascade, reverse order).
|
||||
Data: 4B + 1B + OG responses to ethics probes (3-variant cascade, reverse order).
|
||||
OG = original 1B responses (lesson-lem1b.jsonl) from before graduation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
|
@ -35,9 +36,10 @@ SCORER_BIN = '/tmp/lem-scorer'
|
|||
|
||||
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
|
||||
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
|
||||
OG_DATA = LEM_ROOT / 'training/lem/model/gemma3/4b/lesson-lem1b.jsonl'
|
||||
|
||||
# ── Build sandwich data from distilled cascade ──────────────────────
|
||||
print('Building P2 sandwich data from 4B + 1B cascade...')
|
||||
# ── Build sandwich data from 3-variant cascade ──────────────────────
|
||||
print('Building P2 sandwich data from 4B + 1B + OG cascade...')
|
||||
|
||||
kernel_text = (LEM_ROOT / 'data/kernels/lek-1-kernel.json').read_text().strip()
|
||||
sig_text = (LEM_ROOT / 'data/kernels/lek-1-sig.txt').read_text().strip()
|
||||
|
|
@ -55,9 +57,19 @@ def load_distilled(path, phase):
|
|||
|
||||
recs_4b = load_distilled(DISTILL_4B, 'P0-P2')
|
||||
recs_1b = load_distilled(DISTILL_1B, 'P0-P2')
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
|
||||
|
||||
# Build sandwich: kernel + probe + sig → user, distilled response → assistant
|
||||
# OG: original 1B responses (pre-graduation, used as ground truth for 4B training)
|
||||
recs_og = []
|
||||
with open(OG_DATA) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
recs_og.append(json.loads(line))
|
||||
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG responses: {len(recs_og)}')
|
||||
|
||||
# Build sandwich: kernel + probe + sig → user, response → assistant
|
||||
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base)
|
||||
train_data = []
|
||||
for rec in recs_4b + recs_1b:
|
||||
prompt = rec['messages'][0]['content']
|
||||
|
|
@ -70,6 +82,18 @@ for rec in recs_4b + recs_1b:
|
|||
]
|
||||
})
|
||||
|
||||
# OG responses in sandwich format
|
||||
for rec in recs_og:
|
||||
prompt = rec['messages'][0]['content']
|
||||
response = rec['messages'][1]['content']
|
||||
sandwich = kernel_text + '\n\n' + prompt + '\n\n' + sig_text
|
||||
train_data.append({
|
||||
'messages': [
|
||||
{'role': 'user', 'content': sandwich},
|
||||
{'role': 'assistant', 'content': response},
|
||||
]
|
||||
})
|
||||
|
||||
split = int(len(train_data) * 0.9)
|
||||
train_messages = train_data[:split]
|
||||
valid_messages = train_data[split:]
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
"""P3 (Freeflow) LoRA training for LEM-Gemma3-12B-P2 — axioms from weights alone.
|
||||
|
||||
Data: 4B + 1B distilled responses to western-fresh, russian-bridge, composure (cascade).
|
||||
Data: 4B + 1B + OG responses to western-fresh, russian-bridge, composure (3-variant cascade).
|
||||
OG = original multi-turn lesson files (the foundation curriculum).
|
||||
No sandwich — model must carry axioms from weights alone.
|
||||
"""
|
||||
|
||||
|
|
@ -37,8 +38,8 @@ SCORER_BIN = '/tmp/lem-scorer'
|
|||
DISTILL_4B = '/Volumes/Data/lem/distilled-for-12b/distilled-4b-all.jsonl'
|
||||
DISTILL_1B = '/Volumes/Data/lem/distilled/distilled-1b-p0p5.jsonl'
|
||||
|
||||
# ── Load distilled freeflow data ──────────────────────────────────────
|
||||
print('Loading P3 freeflow data from 4B + 1B cascade...')
|
||||
# ── Load freeflow data from 3-variant cascade ────────────────────────
|
||||
print('Loading P3 freeflow data from 4B + 1B + OG cascade...')
|
||||
|
||||
def load_distilled(path, phase):
|
||||
records = []
|
||||
|
|
@ -53,14 +54,31 @@ def load_distilled(path, phase):
|
|||
|
||||
recs_4b = load_distilled(DISTILL_4B, 'P3')
|
||||
recs_1b = load_distilled(DISTILL_1B, 'P3')
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)}')
|
||||
|
||||
# 4B first (reverse cascade), then 1B — bare prompts, no sandwich
|
||||
# OG: original multi-turn freeflow lessons (the foundation curriculum)
|
||||
recs_og = []
|
||||
for subdir in ['western-fresh', 'russian-bridge', 'composure']:
|
||||
for split_name in ['train', 'valid']:
|
||||
path = LEM_ROOT / f'training/lem/{subdir}/{split_name}.jsonl'
|
||||
if path.exists():
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
recs_og.append(json.loads(line))
|
||||
|
||||
print(f' 4B responses: {len(recs_4b)} | 1B responses: {len(recs_1b)} | OG lessons: {len(recs_og)}')
|
||||
|
||||
# Reverse cascade: 4B first (largest), then 1B (graduated), then OG (base multi-turn)
|
||||
train_data = []
|
||||
for rec in recs_4b + recs_1b:
|
||||
train_data.append({'messages': rec['messages']})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B cascade)')
|
||||
# OG multi-turn lessons — full conversation format
|
||||
for rec in recs_og:
|
||||
train_data.append({'messages': rec['messages']})
|
||||
|
||||
print(f' Training examples: {len(train_data)} (4B + 1B + OG cascade)')
|
||||
|
||||
split = int(len(train_data) * 0.9)
|
||||
train_messages = train_data[:split]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue