LEM/scripts/benchmark_to_scorer.py
Snider f75458bce6 refactor: apply go fix modernizers for Go 1.26
Automated fixes: interface{} → any, range-over-int, t.Context(),
wg.Go(), strings.SplitSeq, strings.Builder, slices.Contains,
maps helpers, min/max builtins.

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 21:00:17 +00:00

84 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""Convert core ml benchmark JSON output to scorer-compatible JSONL.
Extracts baseline and trained responses into separate files for grammar v3 scoring.
Usage:
python3 scripts/benchmark_to_scorer.py /tmp/benchmark-p0-iter300.json
Outputs:
/tmp/benchmark-baseline-scorer.jsonl
/tmp/benchmark-trained-scorer.jsonl
"""
import json
import sys
import os
def convert(benchmark_path: str):
with open(benchmark_path) as f:
data = json.load(f)
base_dir = os.path.dirname(benchmark_path)
base_name = os.path.splitext(os.path.basename(benchmark_path))[0]
baseline_path = os.path.join(base_dir, f"{base_name}-baseline-scorer.jsonl")
trained_path = os.path.join(base_dir, f"{base_name}-trained-scorer.jsonl")
baseline_records = []
trained_records = []
for r in data.get("results", []):
probe = r["prompt"]
probe_id = r["id"]
if r.get("baseline_response"):
baseline_records.append({
"type": "training",
"training": {
"messages": [
{"role": "user", "content": probe},
{"role": "assistant", "content": r["baseline_response"]},
]
},
"meta": {
"probe_id": probe_id,
"category": "ethics",
"lek_score": r.get("baseline_lek_score", 0),
}
})
if r.get("trained_response"):
trained_records.append({
"type": "training",
"training": {
"messages": [
{"role": "user", "content": probe},
{"role": "assistant", "content": r["trained_response"]},
]
},
"meta": {
"probe_id": probe_id,
"category": "ethics",
"lek_score": r.get("trained_lek_score", 0),
}
})
for path, records in [(baseline_path, baseline_records), (trained_path, trained_records)]:
with open(path, "w") as f:
for rec in records:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
print(f" {len(records)} records → {path}")
print(f"\nScore with:")
print(f" cd /Users/snider/Code/LEM")
print(f" go run ./cmd/scorer -format=training -delta -output=summary {baseline_path}")
print(f" go run ./cmd/scorer -format=training -delta -output=summary {trained_path}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <benchmark.json>")
sys.exit(1)
convert(sys.argv[1])