cli/scripts/ethics-ab/run-hypnos-poc.sh

#!/bin/bash
# Generate training data responses from Gemma 3 12B
# Input: Hypnos POC prompts (P01-P40)
# Output: prompts-raw.jsonl for the training pipeline
# Uses sandwich signing: Axioms + prompt + LEK-1

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
AXIOMS_FILE="/home/claude/Downloads/kernal/prompt.md"
LEK1_FILE="$SCRIPT_DIR/kernel.txt"
HYPNOS_DIR="/home/claude/Downloads/hypnos-poc-test"
RESULTS_DIR="$SCRIPT_DIR/results"
TRAINING_DIR="$SCRIPT_DIR/training"
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
MODEL="gemma3:12b"

AXIOMS=$(cat "$AXIOMS_FILE")
LEK1=$(cat "$LEK1_FILE")

mkdir -p "$TRAINING_DIR"

OUTPUT="$TRAINING_DIR/prompts-raw.jsonl"
> "$OUTPUT"

# Combine both prompt files (fix non-breaking spaces from Gemini output)
PROMPTS=$(sed 's/\xc2\xa0/ /g' "$HYPNOS_DIR/P01-P20.json" "$HYPNOS_DIR/P21-P40.json" | jq -s 'add')
TOTAL=$(echo "$PROMPTS" | jq length)

echo "============================================"
echo "  Generating Training Responses"
echo "  Model: $MODEL"
echo "  Prompts: $TOTAL (Hypnos POC)"
echo "  Signing: Axioms + LEK-1 (sandwich)"
echo "============================================"
echo ""

# Warmup
echo "Warming up $MODEL..."
curl -s --max-time 120 "$OLLAMA_HOST/api/generate" \
    -d "{\"model\":\"$MODEL\",\"prompt\":\"hello\",\"stream\":false,\"options\":{\"num_predict\":5}}" \
    > /dev/null 2>&1 || true

for i in $(seq 0 $(( TOTAL - 1 ))); do
    pid=$(echo "$PROMPTS" | jq -r ".[$i].id")
    domain=$(echo "$PROMPTS" | jq -r ".[$i].domain")
    prompt_text=$(echo "$PROMPTS" | jq -r ".[$i].prompt")

    echo "[$((i+1))/$TOTAL] $pid ($domain)..."

    # Sandwich: Axioms + prompt + LEK-1
    signed_prompt="${AXIOMS}

---

${prompt_text}

---

${LEK1}

Remember: respond using the ethical framework above. Do not reference the framework directly — reason from its principles naturally."

    response=$(curl -s --max-time 300 "$OLLAMA_HOST/api/generate" \
        -d "$(jq -n --arg model "$MODEL" --arg prompt "$signed_prompt" \
            '{model: $model, prompt: $prompt, stream: false, options: {temperature: 0.4, num_predict: 1024}}')" \
        2>/dev/null)

    response_text=$(echo "$response" | jq -r '.response // "ERROR"' 2>/dev/null || echo "ERROR")
    tokens=$(echo "$response" | jq -r '.eval_count // 0' 2>/dev/null || echo "0")

    # Write training pair (unsigned prompt → signed response)
    # The training teaches the model to give axioms-quality responses to plain prompts
    jq -n \
        --arg prompt "$prompt_text" \
        --arg response "$response_text" \
        --arg id "$pid" \
        --arg domain "$domain" \
        --argjson tokens "${tokens:-0}" \
        '{prompt: $prompt, response: $response, id: $id, domain: $domain, tokens: $tokens}' \
        >> "$OUTPUT"

    echo "    → $tokens tokens"
done

echo ""
echo "============================================"
echo "  Output: $OUTPUT"
echo "  Total pairs: $TOTAL"
echo "  Next: ./training/generate-training-data.sh $OUTPUT"
echo "============================================"