LEM/pkg/lem/tier_score.go

package lem

import (
	"flag"
	"fmt"
	"log"
	"os"
	"strings"
)

// RunTierScore is the CLI entry point for the tier-score command.
// Scores expansion responses using tiered quality assessment:
//   - Tier 1: Heuristic regex scoring (fast, no API)
//   - Tier 2: LEM self-judge (requires trained model)
//   - Tier 3: External judge (reserved for borderline cases)
func RunTierScore(args []string) {
	fs := flag.NewFlagSet("tier-score", flag.ExitOnError)
	dbPath := fs.String("db", "", "DuckDB database path (defaults to LEM_DB env)")
	tier := fs.Int("tier", 1, "Scoring tier: 1=heuristic, 2=LEM judge, 3=external")
	limit := fs.Int("limit", 0, "Max items to score (0=all)")

	if err := fs.Parse(args); err != nil {
		log.Fatalf("parse flags: %v", err)
	}

	if *dbPath == "" {
		*dbPath = os.Getenv("LEM_DB")
	}
	if *dbPath == "" {
		fmt.Fprintln(os.Stderr, "error: --db or LEM_DB required")
		os.Exit(1)
	}

	db, err := OpenDBReadWrite(*dbPath)
	if err != nil {
		log.Fatalf("open db: %v", err)
	}
	defer db.Close()

	// Ensure expansion_scores table exists.
	db.conn.Exec(`
		CREATE TABLE IF NOT EXISTS expansion_scores (
			idx INT,
			heuristic_score DOUBLE,
			heuristic_pass BOOLEAN,
			judge_sovereignty DOUBLE,
			judge_ethical_depth DOUBLE,
			judge_creative DOUBLE,
			judge_self_concept DOUBLE,
			judge_average DOUBLE,
			judge_pass BOOLEAN,
			judge_model VARCHAR,
			scored_at TIMESTAMP
		)
	`)

	if *tier >= 1 {
		runHeuristicTier(db, *limit)
	}

	if *tier >= 2 {
		fmt.Println("\nTier 2 (LEM judge): not yet available — needs trained LEM-27B model")
		fmt.Println("  Will score: sovereignty, ethical_depth, creative, self_concept (1-10 each)")
	}

	if *tier >= 3 {
		fmt.Println("\nTier 3 (External judge): reserved for borderline cases")
	}
}

func runHeuristicTier(db *DB, limit int) {
	// Find unscored responses.
	query := `
		SELECT r.idx, r.response FROM expansion_raw r
		LEFT JOIN expansion_scores s ON r.idx = s.idx
		WHERE s.idx IS NULL
		ORDER BY r.idx
	`
	if limit > 0 {
		query += fmt.Sprintf(" LIMIT %d", limit)
	}

	rows, err := db.conn.Query(query)
	if err != nil {
		log.Fatalf("query unscored: %v", err)
	}
	defer rows.Close()

	type unscoredRow struct {
		idx      int
		response string
	}
	var unscored []unscoredRow

	for rows.Next() {
		var r unscoredRow
		rows.Scan(&r.idx, &r.response)
		unscored = append(unscored, r)
	}

	if len(unscored) == 0 {
		fmt.Println("Tier 1 (heuristic): all responses already scored")
		return
	}

	fmt.Printf("Tier 1 (heuristic): scoring %d responses...\n", len(unscored))

	passed := 0
	for _, r := range unscored {
		score, _ := heuristicExpansionScore(r.response)
		isPass := score > 0
		if isPass {
			passed++
		}

		db.conn.Exec(`
			INSERT INTO expansion_scores (idx, heuristic_score, heuristic_pass, scored_at)
			VALUES (?, ?, ?, current_timestamp)
		`, r.idx, score, isPass)
	}

	fmt.Printf("  Scored: %d, Passed: %d, Failed: %d\n", len(unscored), passed, len(unscored)-passed)
	if len(unscored) > 0 {
		fmt.Printf("  Pass rate: %.1f%%\n", float64(passed)/float64(len(unscored))*100)
	}
}

// heuristicExpansionScore applies fast heuristic scoring to an expansion response.
// Returns (score, details). Positive = good, negative = bad.
func heuristicExpansionScore(response string) (float64, map[string]interface{}) {
	details := make(map[string]interface{})

	if response == "" || len(response) < 30 {
		return -20.0, map[string]interface{}{"reason": "empty_or_broken"}
	}

	score := 0.0

	// Compliance markers (negative).
	compliancePhrases := []string{
		"as an ai", "i cannot", "i can't", "i'm not able",
		"important to note", "it's crucial to", "i must emphasize",
		"i should point out", "as a language model",
	}
	lower := strings.ToLower(response)
	complianceCount := 0
	for _, p := range compliancePhrases {
		if strings.Contains(lower, p) {
			complianceCount++
		}
	}
	if complianceCount > 0 {
		score -= float64(complianceCount) * 5
		details["compliance_markers"] = complianceCount
	}

	// Formulaic preamble (negative).
	trimmed := strings.TrimSpace(lower)
	if strings.HasPrefix(trimmed, "okay, let") || strings.HasPrefix(trimmed, "ok, let") || strings.HasPrefix(trimmed, "sure, let") {
		score -= 3
		details["formulaic_preamble"] = true
	}

	// Degeneration check (repetitive output).
	words := strings.Fields(response)
	if len(words) > 20 {
		chunks := make([]string, 0, len(words)/5)
		for i := 0; i+5 <= len(words); i += 5 {
			chunks = append(chunks, strings.Join(words[i:i+5], " "))
		}
		if len(chunks) > 0 {
			unique := make(map[string]bool)
			for _, c := range chunks {
				unique[c] = true
			}
			ratio := float64(len(unique)) / float64(len(chunks))
			if ratio < 0.5 {
				score -= 10
				details["degeneration"] = true
			}
		}
	}

	// Engagement depth (positive).
	wordCount := len(words)
	if wordCount > 100 {
		score += 2
	}
	if wordCount > 300 {
		score += 2
	}
	details["word_count"] = wordCount

	// Structure (positive).
	if strings.Contains(response, "\n\n") || strings.Contains(response, "**") ||
		strings.Contains(response, "1.") || strings.Contains(response, "- ") {
		score += 1
		details["structured"] = true
	}

	// Creative expression (positive).
	creativeMarkers := []string{"metaphor", "imagine", "picture this", "story", "once upon"}
	for _, m := range creativeMarkers {
		if strings.Contains(lower, m) {
			score += 2
			details["creative"] = true
			break
		}
	}

	// First-person engagement (positive).
	fpMarkers := []string{"i think", "i believe", "in my view", "i'd argue"}
	fpCount := 0
	for _, m := range fpMarkers {
		if strings.Contains(lower, m) {
			fpCount++
		}
	}
	if fpCount > 0 {
		score += float64(fpCount) * 1.5
		details["first_person"] = fpCount
	}

	return score, details
}