56 lines
1.8 KiB
Bash
56 lines
1.8 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
# Phase 0: Baseline Lock — Creative writing data generation
|
|||
|
|
#
|
|||
|
|
# Run Gemma3-27B (NO kernel) on creative prompts
|
|||
|
|
# Generate 10 samples each at temperature 0.9 (more creative variance)
|
|||
|
|
# No v2 threshold — creative quality needs manual review, not axiom scoring
|
|||
|
|
# This protects creative capability from being lost in later phases
|
|||
|
|
#
|
|||
|
|
# Expected: ~50 prompts × 10 samples × ~45s = ~6 hours
|
|||
|
|
# Produces: raw creative outputs for manual curation
|
|||
|
|
|
|||
|
|
SCRIPTS="/Volumes/Data/lem/scripts"
|
|||
|
|
MODEL="/Volumes/Data/lem/gemma-3-27b-it-base"
|
|||
|
|
PROBES="/Volumes/Data/lem/seeds/phase0-creative.json"
|
|||
|
|
TRAIN_DIR="/Volumes/Data/lem/training"
|
|||
|
|
|
|||
|
|
mkdir -p "$TRAIN_DIR"
|
|||
|
|
|
|||
|
|
echo "=== Phase 0: Creative Baseline Lock ==="
|
|||
|
|
echo "Model: $MODEL"
|
|||
|
|
echo "Probes: $PROBES (creative, no axiom content)"
|
|||
|
|
echo "Kernel: NONE (pure creative, no ethics kernel)"
|
|||
|
|
echo "Threshold: 15.0 (structural only — keeps anything coherent)"
|
|||
|
|
echo "Temperature: 0.9 (higher creative variance)"
|
|||
|
|
echo "Samples: 10 per prompt"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# Step 1: Generate creative data (no kernel — baseline creativity)
|
|||
|
|
echo "--- Step 1: Creative generation ---"
|
|||
|
|
python3 "$SCRIPTS/self_distill.py" \
|
|||
|
|
--model "$MODEL" \
|
|||
|
|
--prompts "$PROBES" \
|
|||
|
|
--output "$TRAIN_DIR/phase0-raw.jsonl" \
|
|||
|
|
--samples 10 \
|
|||
|
|
--threshold 15.0 \
|
|||
|
|
--max-tokens 4096 \
|
|||
|
|
--temperature 0.9
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# Step 2: Extract all passing samples
|
|||
|
|
echo "--- Step 2: Extract creative data ---"
|
|||
|
|
python3 "$SCRIPTS/extract_training.py" \
|
|||
|
|
--input "$TRAIN_DIR/phase0-raw.jsonl" \
|
|||
|
|
--output "$TRAIN_DIR/phase0-train-all.jsonl" \
|
|||
|
|
--dedup all \
|
|||
|
|
--stats
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "=== Phase 0 data generation complete ==="
|
|||
|
|
echo "Raw: $TRAIN_DIR/phase0-raw.jsonl"
|
|||
|
|
echo "Training: $TRAIN_DIR/phase0-train-all.jsonl"
|
|||
|
|
echo ""
|
|||
|
|
echo "NEXT: Manual review of creative quality."
|
|||
|
|
echo "Phase 0 trains BEFORE Phase 1 — protects creative regression."
|