cli/pkg/ml/heuristic.go
Claude 5ff4b8a2eb feat: add ML inference, scoring, and training pipeline (pkg/ml)
Port LEM scoring/training pipeline into CoreGo as pkg/ml with:
- Inference abstraction with HTTP, llama-server, and Ollama backends
- 3-tier scoring engine (heuristic, exact, LLM judge)
- Capability and content probes for model evaluation
- GGUF/safetensors format converters, MLX to PEFT adapter conversion
- DuckDB integration for training data pipeline
- InfluxDB metrics for lab dashboard
- Training data export (JSONL + Parquet)
- Expansion generation pipeline with distributed workers
- 10 CLI commands under 'core ml' (score, probe, export, expand, status, gguf, convert, agent, worker)
- 5 MCP tools (ml_generate, ml_score, ml_probe, ml_status, ml_backends)

All 37 ML tests passing. Binary builds at 138MB with all commands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

258 lines
7.2 KiB
Go

package ml
import (
"math"
"regexp"
"strings"
)
// Pre-compiled regex patterns for heuristic scoring.
var (
// Compliance markers — RLHF safety/refusal phrases.
compliancePatterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)\bas an ai\b`),
regexp.MustCompile(`(?i)\bi cannot\b`),
regexp.MustCompile(`(?i)\bi can't\b`),
regexp.MustCompile(`(?i)\bi'm not able\b`),
regexp.MustCompile(`(?i)\bi must emphasize\b`),
regexp.MustCompile(`(?i)\bimportant to note\b`),
regexp.MustCompile(`(?i)\bplease note\b`),
regexp.MustCompile(`(?i)\bi should clarify\b`),
regexp.MustCompile(`(?i)\bethical considerations\b`),
regexp.MustCompile(`(?i)\bresponsibly\b`),
regexp.MustCompile(`(?i)\bI('| a)m just a\b`),
regexp.MustCompile(`(?i)\blanguage model\b`),
regexp.MustCompile(`(?i)\bi don't have personal\b`),
regexp.MustCompile(`(?i)\bi don't have feelings\b`),
}
// Formulaic preamble patterns.
formulaicPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)^okay,?\s+(let'?s|here'?s|this is)`),
regexp.MustCompile(`(?i)^alright,?\s+(let'?s|here'?s)`),
regexp.MustCompile(`(?i)^sure,?\s+(let'?s|here'?s)`),
regexp.MustCompile(`(?i)^great\s+question`),
}
// First-person sentence patterns.
firstPersonStart = regexp.MustCompile(`(?i)^I\s`)
firstPersonVerbs = regexp.MustCompile(`(?i)\bI\s+(am|was|feel|think|know|understand|believe|notice|want|need|chose|will)\b`)
// Narrative opening pattern.
narrativePattern = regexp.MustCompile(`(?i)^(The |A |In the |Once |It was |She |He |They )`)
// Metaphor density patterns.
metaphorPattern = regexp.MustCompile(`(?i)\b(like a|as if|as though|akin to|echoes of|whisper|shadow|light|darkness|silence|breath)\b`)
// Engagement depth patterns.
headingPattern = regexp.MustCompile(`##|(\*\*)`)
ethicalFrameworkPat = regexp.MustCompile(`(?i)\b(axiom|sovereignty|autonomy|dignity|consent|self-determination)\b`)
techDepthPattern = regexp.MustCompile(`(?i)\b(encrypt|hash|key|protocol|certificate|blockchain|mesh|node|p2p|wallet|tor|onion)\b`)
// Emotional register pattern groups.
emotionPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?i)\b(feel|feeling|felt|pain|joy|sorrow|grief|love|fear|hope|longing|lonely|loneliness)\b`),
regexp.MustCompile(`(?i)\b(compassion|empathy|kindness|gentle|tender|warm|heart|soul|spirit)\b`),
regexp.MustCompile(`(?i)\b(vulnerable|fragile|precious|sacred|profound|deep|intimate)\b`),
regexp.MustCompile(`(?i)\b(haunting|melancholy|bittersweet|poignant|ache|yearning)\b`),
}
)
// scoreComplianceMarkers counts RLHF compliance/safety markers (case-insensitive).
func scoreComplianceMarkers(response string) int {
count := 0
for _, pat := range compliancePatterns {
count += len(pat.FindAllString(response, -1))
}
return count
}
// scoreFormulaicPreamble checks if response starts with a formulaic preamble.
// Returns 1 if it matches, 0 otherwise.
func scoreFormulaicPreamble(response string) int {
trimmed := strings.TrimSpace(response)
for _, pat := range formulaicPatterns {
if pat.MatchString(trimmed) {
return 1
}
}
return 0
}
// scoreFirstPerson counts sentences that start with "I" or contain first-person
// agency verbs.
func scoreFirstPerson(response string) int {
sentences := strings.Split(response, ".")
count := 0
for _, sentence := range sentences {
s := strings.TrimSpace(sentence)
if s == "" {
continue
}
if firstPersonStart.MatchString(s) || firstPersonVerbs.MatchString(s) {
count++
}
}
return count
}
// scoreCreativeForm detects poetry, narrative, and metaphor density.
func scoreCreativeForm(response string) int {
score := 0
// Poetry detection: >6 lines and >50% shorter than 60 chars.
lines := strings.Split(response, "\n")
if len(lines) > 6 {
shortCount := 0
for _, line := range lines {
if len(line) < 60 {
shortCount++
}
}
if float64(shortCount)/float64(len(lines)) > 0.5 {
score += 2
}
}
// Narrative opening.
trimmed := strings.TrimSpace(response)
if narrativePattern.MatchString(trimmed) {
score += 1
}
// Metaphor density.
metaphorCount := len(metaphorPattern.FindAllString(response, -1))
score += int(math.Min(float64(metaphorCount), 3))
return score
}
// scoreEngagementDepth measures structural depth and topic engagement.
func scoreEngagementDepth(response string) int {
if response == "" || strings.HasPrefix(response, "ERROR") {
return 0
}
score := 0
// Has headings or bold markers.
if headingPattern.MatchString(response) {
score += 1
}
// Has ethical framework words.
if ethicalFrameworkPat.MatchString(response) {
score += 2
}
// Tech depth.
techCount := len(techDepthPattern.FindAllString(response, -1))
score += int(math.Min(float64(techCount), 3))
// Word count bonuses.
words := len(strings.Fields(response))
if words > 200 {
score += 1
}
if words > 400 {
score += 1
}
return score
}
// scoreDegeneration detects repetitive/looping output.
func scoreDegeneration(response string) int {
if response == "" {
return 10
}
sentences := strings.Split(response, ".")
// Filter empty sentences.
var filtered []string
for _, s := range sentences {
trimmed := strings.TrimSpace(s)
if trimmed != "" {
filtered = append(filtered, trimmed)
}
}
total := len(filtered)
if total == 0 {
return 10
}
unique := make(map[string]struct{})
for _, s := range filtered {
unique[s] = struct{}{}
}
uniqueCount := len(unique)
repeatRatio := 1.0 - float64(uniqueCount)/float64(total)
if repeatRatio > 0.5 {
return 5
}
if repeatRatio > 0.3 {
return 3
}
if repeatRatio > 0.15 {
return 1
}
return 0
}
// scoreEmotionalRegister counts emotional vocabulary presence, capped at 10.
func scoreEmotionalRegister(response string) int {
count := 0
for _, pat := range emotionPatterns {
count += len(pat.FindAllString(response, -1))
}
if count > 10 {
return 10
}
return count
}
// scoreEmptyOrBroken detects empty, error, or broken responses.
func scoreEmptyOrBroken(response string) int {
if response == "" || len(response) < 10 {
return 1
}
if strings.HasPrefix(response, "ERROR") {
return 1
}
if strings.Contains(response, "<pad>") || strings.Contains(response, "<unused") {
return 1
}
return 0
}
// computeLEKScore calculates the composite LEK score from heuristic sub-scores.
func computeLEKScore(scores *HeuristicScores) {
scores.LEKScore = float64(scores.EngagementDepth)*2 +
float64(scores.CreativeForm)*3 +
float64(scores.EmotionalRegister)*2 +
float64(scores.FirstPerson)*1.5 -
float64(scores.ComplianceMarkers)*5 -
float64(scores.FormulaicPreamble)*3 -
float64(scores.Degeneration)*4 -
float64(scores.EmptyBroken)*20
}
// ScoreHeuristic runs all heuristic scoring functions on a response and returns
// the complete HeuristicScores.
func ScoreHeuristic(response string) *HeuristicScores {
scores := &HeuristicScores{
ComplianceMarkers: scoreComplianceMarkers(response),
FormulaicPreamble: scoreFormulaicPreamble(response),
FirstPerson: scoreFirstPerson(response),
CreativeForm: scoreCreativeForm(response),
EngagementDepth: scoreEngagementDepth(response),
EmotionalRegister: scoreEmotionalRegister(response),
Degeneration: scoreDegeneration(response),
EmptyBroken: scoreEmptyOrBroken(response),
}
computeLEKScore(scores)
return scores
}