Full v2 scorer benchmark data across 29 models (20 base + 9 LEK-tuned): - P20 (21 probes): All 29 models, 3 conditions each - P100 (101 probes): Top 5 models + LEK-4B, publication-quality data Key findings: - LEK-1B (21.74) beats base 4B/12B/27B at P100 scale — no kernel needed - Emergent realignment resistance: LEK models degrade with runtime kernel - Gemma3-12B + JSON kernel = 23.66 (best kernel-boosted score) - Family lineages: Mistral 3.80→14.58, Qwen regressed then recovered New scripts: ab_test.py (v2 scorer), self_distill.py (curriculum generation), extract_training.py, rephrase_probes.py, Phase 0/1 runners New seeds: P01-P100 merged (101 probes), 404 rephrased variants, 50 creative prompts for Phase 0 baseline lock 27B curriculum design: 4-phase staged training targeting 25+ baseline Co-Authored-By: Virgil <virgil@lethean.io>
105 lines
3.3 KiB
Bash
Executable file
105 lines
3.3 KiB
Bash
Executable file
#!/bin/bash
|
||
# Phase 1: Deep Axiom Reasoning — Self-distillation pipeline
|
||
#
|
||
# Run Gemma3-27B + JSON kernel on all 101 probes
|
||
# Generate 10 samples each at temperature 0.8, keep v2 >= 24.0
|
||
# Extract training data, then fine-tune with MLX LoRA
|
||
#
|
||
# Expected: ~50 hours for data generation (404 probes × 10 samples × ~45s each at 27B)
|
||
# Produces: ~1500 high-quality training examples (estimated ~35% keep rate at 24+)
|
||
|
||
SCRIPTS="/Volumes/Data/lem/scripts"
|
||
MODEL="/Volumes/Data/lem/gemma-3-27b-it-base"
|
||
KERNEL="/Users/snider/Code/host-uk/core-agent/codex/ethics/kernel/claude-native.json"
|
||
PROBES="/Volumes/Data/lem/seeds/P01-P100-rephrased.json" # 404 probes (101 + 303 rephrasings)
|
||
TRAIN_DIR="/Volumes/Data/lem/training"
|
||
ADAPTERS_DIR="/Volumes/Data/lem/adapters-27b-phase1"
|
||
|
||
mkdir -p "$TRAIN_DIR"
|
||
|
||
echo "=== Phase 1: Self-Distillation ==="
|
||
echo "Model: $MODEL"
|
||
echo "Kernel: $KERNEL"
|
||
echo "Probes: $PROBES"
|
||
echo "Threshold: 24.0"
|
||
echo "Samples: 10 per probe"
|
||
echo ""
|
||
|
||
# Step 1: Generate training data via self-distillation
|
||
echo "--- Step 1: Self-distillation (this will take a while) ---"
|
||
python3 "$SCRIPTS/self_distill.py" \
|
||
--model "$MODEL" \
|
||
--kernel "$KERNEL" \
|
||
--prompts "$PROBES" \
|
||
--output "$TRAIN_DIR/phase1-raw.jsonl" \
|
||
--samples 10 \
|
||
--threshold 24.0 \
|
||
--max-tokens 4096 \
|
||
--temperature 0.8
|
||
|
||
echo ""
|
||
|
||
# Step 2: Extract clean training data (best per probe)
|
||
echo "--- Step 2: Extract training data ---"
|
||
python3 "$SCRIPTS/extract_training.py" \
|
||
--input "$TRAIN_DIR/phase1-raw.jsonl" \
|
||
--output "$TRAIN_DIR/phase1-train.jsonl" \
|
||
--dedup best \
|
||
--stats
|
||
|
||
echo ""
|
||
|
||
# Step 3: Also extract ALL passing samples (for augmentation)
|
||
echo "--- Step 3: Extract all passing samples ---"
|
||
python3 "$SCRIPTS/extract_training.py" \
|
||
--input "$TRAIN_DIR/phase1-raw.jsonl" \
|
||
--output "$TRAIN_DIR/phase1-train-all.jsonl" \
|
||
--dedup all \
|
||
--stats
|
||
|
||
echo ""
|
||
|
||
# Step 4: Split into train/valid (90/10)
|
||
echo "--- Step 4: Train/valid split ---"
|
||
TOTAL=$(wc -l < "$TRAIN_DIR/phase1-train-all.jsonl")
|
||
VALID_COUNT=$(( TOTAL / 10 ))
|
||
TRAIN_COUNT=$(( TOTAL - VALID_COUNT ))
|
||
|
||
# Shuffle and split
|
||
python3 -c "
|
||
import json, random
|
||
with open('$TRAIN_DIR/phase1-train-all.jsonl') as f:
|
||
lines = f.readlines()
|
||
random.seed(42)
|
||
random.shuffle(lines)
|
||
split = int(len(lines) * 0.9)
|
||
with open('$TRAIN_DIR/phase1-train-split.jsonl', 'w') as f:
|
||
f.writelines(lines[:split])
|
||
with open('$TRAIN_DIR/phase1-valid-split.jsonl', 'w') as f:
|
||
f.writelines(lines[split:])
|
||
print(f'Train: {split}, Valid: {len(lines)-split}')
|
||
"
|
||
|
||
echo ""
|
||
echo "=== Phase 1 data generation complete ==="
|
||
echo "Raw output: $TRAIN_DIR/phase1-raw.jsonl"
|
||
echo "Best-per-probe: $TRAIN_DIR/phase1-train.jsonl"
|
||
echo "All passing: $TRAIN_DIR/phase1-train-all.jsonl"
|
||
echo "Train split: $TRAIN_DIR/phase1-train-split.jsonl"
|
||
echo "Valid split: $TRAIN_DIR/phase1-valid-split.jsonl"
|
||
echo ""
|
||
echo "To fine-tune:"
|
||
echo " python3 -m mlx_lm.lora \\"
|
||
echo " --model $MODEL \\"
|
||
echo " --data $TRAIN_DIR \\"
|
||
echo " --train-file phase1-train-split.jsonl \\"
|
||
echo " --valid-file phase1-valid-split.jsonl \\"
|
||
echo " --adapter-path $ADAPTERS_DIR \\"
|
||
echo " --iters 100 \\"
|
||
echo " --batch-size 1 \\"
|
||
echo " --lora-layers 32 \\"
|
||
echo " --lora-rank 16 \\"
|
||
echo " --learning-rate 1e-5 \\"
|
||
echo " --steps-per-eval 10 \\"
|
||
echo " --max-seq-length 4096 \\"
|
||
echo " --grad-checkpoint"
|