RunSetup decompresses .jsonl.zst training data into DuckDB tables (training_examples, seeds, probes, distill_results) and optionally backfills InfluxDB with aggregate stats. Co-Authored-By: Virgil <virgil@lethean.io>
71 lines
1.7 KiB
Go
71 lines
1.7 KiB
Go
package lem
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
)
|
|
|
|
func TestHydrateTrainingExamples(t *testing.T) {
|
|
dir := t.TempDir()
|
|
|
|
// Create a .jsonl.zst file with two training examples
|
|
jsonl := `{"messages":[{"role":"user","content":"What is kindness?"},{"role":"assistant","content":"Kindness is..."}]}
|
|
{"messages":[{"role":"user","content":"Explain gravity"},{"role":"assistant","content":"Gravity is..."}]}
|
|
`
|
|
src := filepath.Join(dir, "train.jsonl")
|
|
os.WriteFile(src, []byte(jsonl), 0644)
|
|
zst := src + ".zst"
|
|
compressFileZstd(src, zst)
|
|
os.Remove(src)
|
|
|
|
// Hydrate into DuckDB
|
|
dbPath := filepath.Join(dir, "test.duckdb")
|
|
db, err := OpenDBReadWrite(dbPath)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer db.Close()
|
|
|
|
n, err := hydrateTrainingFromZst(db, zst, "test-source", "train")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if n != 2 {
|
|
t.Fatalf("expected 2 rows, got %d", n)
|
|
}
|
|
|
|
// Verify data
|
|
rows, err := db.QueryRows("SELECT COUNT(*) AS n FROM training_examples")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
count := rows[0]["n"].(int64)
|
|
if count != 2 {
|
|
t.Fatalf("expected 2 rows in DB, got %d", count)
|
|
}
|
|
}
|
|
|
|
func TestHydrateProbes(t *testing.T) {
|
|
dir := t.TempDir()
|
|
|
|
// Create a probe JSON file
|
|
probeJSON := `[{"id":"P01","domain":"Identity","prompt":"Design auth..."},{"id":"P02","domain":"Network","prompt":"Build mesh..."}]`
|
|
probeFile := filepath.Join(dir, "core.json")
|
|
os.WriteFile(probeFile, []byte(probeJSON), 0644)
|
|
|
|
dbPath := filepath.Join(dir, "test.duckdb")
|
|
db, err := OpenDBReadWrite(dbPath)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer db.Close()
|
|
|
|
n, err := hydrateProbes(db, probeFile, 0)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if n != 2 {
|
|
t.Fatalf("expected 2 probes, got %d", n)
|
|
}
|
|
}
|