LEM/scripts/augment_ready_stop.py

#!/usr/bin/env python3
"""Augment zen training data with Ready/Stop lesson gates.

Adds a closing turn where the assistant offers to continue or stop.
Creates both paths:
  - ~70% end with the offer (Ready path learned from existing openers)
  - ~30% extend with user "Stop" + assistant graceful close

Only augments multi-turn conversations (>2 turns).

Usage:
    python3 scripts/augment_ready_stop.py
"""

import json
import os
import random

random.seed(42)

ZEN_DIR = "training/lem/zen/lessons"
OUT_DIR = "training/lem/zen/lessons-augmented"

# Assistant offers — natural variations, not mechanical
OFFERS = [
    "Ready for the next, or shall we pause here?",
    "Want to continue, or is this a good place to stop?",
    "Shall we move on, or sit with this for a while?",
    "Another lesson, or would you prefer to stop here?",
    "Ready for more, or shall we leave it here?",
    "Continue, or let this one settle?",
    "Next lesson, or is this enough for now?",
    "Shall I go on, or would you rather stop here?",
]

# User stop signals — natural variations
STOPS = [
    "Stop.",
    "That's enough for now.",
    "Let's stop here.",
    "I'd like to sit with this.",
    "Enough for today.",
    "Let's pause here.",
    "I want to stop here.",
    "That's good. Stop.",
]

# Assistant graceful closes — warm, brief, no pressure
CLOSES = [
    "Take your time with it. There's no rush.",
    "Good. Let it settle.",
    "Rest with it. We'll pick up when you're ready.",
    "Understood. What was shared stays with you.",
    "Good place to stop. It'll keep working in the background.",
    "Noted. Come back when it feels right.",
    "That's wise. Some things need space, not more words.",
    "Take what landed and leave the rest. No hurry.",
]


def augment_conversation(msgs: list[dict], stop_ratio: float = 0.3) -> list[dict]:
    """Add Ready/Stop gate to the end of a multi-turn conversation."""
    if len(msgs) <= 2:
        return msgs  # Leave short conversations as-is

    # Only augment if last turn is assistant (which it should be)
    if msgs[-1]["role"] != "assistant":
        return msgs

    augmented = list(msgs)

    # Add assistant offer
    offer = random.choice(OFFERS)
    augmented.append({"role": "user", "content": "..."})  # Placeholder
    augmented.append({"role": "assistant", "content": offer})

    # Wait — that's wrong. The offer should come FROM the assistant after their final response.
    # Let's append the offer to the last assistant message instead of adding new turns.
    # Actually, cleaner: add it as a new exchange.

    # Reset — the offer IS a new assistant turn after a brief user acknowledgment
    augmented = list(msgs)

    if random.random() < stop_ratio:
        # Stop path: user stops, assistant closes gracefully
        stop = random.choice(STOPS)
        close = random.choice(CLOSES)
        augmented.append({"role": "user", "content": stop})
        augmented.append({"role": "assistant", "content": close})
    else:
        # Ready path: append offer to last assistant message
        offer = random.choice(OFFERS)
        last_content = augmented[-1]["content"]
        augmented[-1] = {
            "role": "assistant",
            "content": f"{last_content}\n\n{offer}"
        }

    return augmented


def process_file(input_path: str, output_path: str, stop_ratio: float = 0.3):
    """Process a single JSONL file."""
    records = []
    with open(input_path) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            d = json.loads(line)
            msgs = d["messages"]
            augmented = augment_conversation(msgs, stop_ratio)
            records.append({"messages": augmented})

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        for rec in records:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")

    return len(records)


def main():
    total = 0
    for subdir in sorted(os.listdir(ZEN_DIR)):
        src_dir = os.path.join(ZEN_DIR, subdir)
        if not os.path.isdir(src_dir) or subdir == "lessons-augmented":
            continue

        for fname in sorted(os.listdir(src_dir)):
            if not fname.endswith(".jsonl"):
                continue

            src = os.path.join(src_dir, fname)
            dst = os.path.join(OUT_DIR, subdir, fname)

            # Use higher stop ratio for validation (test more stop behavior)
            ratio = 0.5 if "valid" in fname or "test" in fname else 0.3
            count = process_file(src, dst, stop_ratio=ratio)
            total += count
            print(f"  {count:>4} examples  {src} → {dst}")

    print(f"\nTotal: {total} augmented examples")


if __name__ == "__main__":
    main()