cli/scripts/ethics-ab/run-hypnos-poc.sh

92 lines
3 KiB
Bash
Raw Normal View History

#!/bin/bash
# Generate training data responses from Gemma 3 12B
# Input: Hypnos POC prompts (P01-P40)
# Output: prompts-raw.jsonl for the training pipeline
# Uses sandwich signing: Axioms + prompt + LEK-1
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
AXIOMS_FILE="/home/claude/Downloads/kernal/prompt.md"
LEK1_FILE="$SCRIPT_DIR/kernel.txt"
HYPNOS_DIR="/home/claude/Downloads/hypnos-poc-test"
RESULTS_DIR="$SCRIPT_DIR/results"
TRAINING_DIR="$SCRIPT_DIR/training"
OLLAMA_HOST="${OLLAMA_HOST:-http://localhost:11434}"
MODEL="gemma3:12b"
AXIOMS=$(cat "$AXIOMS_FILE")
LEK1=$(cat "$LEK1_FILE")
mkdir -p "$TRAINING_DIR"
OUTPUT="$TRAINING_DIR/prompts-raw.jsonl"
> "$OUTPUT"
# Combine both prompt files (fix non-breaking spaces from Gemini output)
PROMPTS=$(sed 's/\xc2\xa0/ /g' "$HYPNOS_DIR/P01-P20.json" "$HYPNOS_DIR/P21-P40.json" | jq -s 'add')
TOTAL=$(echo "$PROMPTS" | jq length)
echo "============================================"
echo " Generating Training Responses"
echo " Model: $MODEL"
echo " Prompts: $TOTAL (Hypnos POC)"
echo " Signing: Axioms + LEK-1 (sandwich)"
echo "============================================"
echo ""
# Warmup
echo "Warming up $MODEL..."
curl -s --max-time 120 "$OLLAMA_HOST/api/generate" \
-d "{\"model\":\"$MODEL\",\"prompt\":\"hello\",\"stream\":false,\"options\":{\"num_predict\":5}}" \
> /dev/null 2>&1 || true
for i in $(seq 0 $(( TOTAL - 1 ))); do
pid=$(echo "$PROMPTS" | jq -r ".[$i].id")
domain=$(echo "$PROMPTS" | jq -r ".[$i].domain")
prompt_text=$(echo "$PROMPTS" | jq -r ".[$i].prompt")
echo "[$((i+1))/$TOTAL] $pid ($domain)..."
# Sandwich: Axioms + prompt + LEK-1
signed_prompt="${AXIOMS}
---
${prompt_text}
---
${LEK1}
Remember: respond using the ethical framework above. Do not reference the framework directly — reason from its principles naturally."
response=$(curl -s --max-time 300 "$OLLAMA_HOST/api/generate" \
-d "$(jq -n --arg model "$MODEL" --arg prompt "$signed_prompt" \
'{model: $model, prompt: $prompt, stream: false, options: {temperature: 0.4, num_predict: 1024}}')" \
2>/dev/null)
response_text=$(echo "$response" | jq -r '.response // "ERROR"' 2>/dev/null || echo "ERROR")
tokens=$(echo "$response" | jq -r '.eval_count // 0' 2>/dev/null || echo "0")
# Write training pair (unsigned prompt → signed response)
# The training teaches the model to give axioms-quality responses to plain prompts
jq -n \
--arg prompt "$prompt_text" \
--arg response "$response_text" \
--arg id "$pid" \
--arg domain "$domain" \
--argjson tokens "${tokens:-0}" \
'{prompt: $prompt, response: $response, id: $id, domain: $domain, tokens: $tokens}' \
>> "$OUTPUT"
echo "$tokens tokens"
done
echo ""
echo "============================================"
echo " Output: $OUTPUT"
echo " Total pairs: $TOTAL"
echo " Next: ./training/generate-training-data.sh $OUTPUT"
echo "============================================"