go-ai/cmd/brain-seed/main.go
Snider dfe474a239
Some checks failed
Security Scan / security (push) Successful in 9s
Test / test (push) Failing after 3m8s
refactor: swap pkg/{io,log,i18n} imports to go-io/go-log/go-i18n
Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-06 12:35:42 +00:00

502 lines
14 KiB
Go

// SPDX-License-Identifier: EUPL-1.2
// brain-seed imports Claude Code MEMORY.md files into the OpenBrain knowledge
// store via the MCP HTTP API (brain_remember tool). The Laravel app handles
// embedding, Qdrant storage, and MariaDB dual-write internally.
//
// Usage:
//
// go run ./cmd/brain-seed -api-key YOUR_KEY
// go run ./cmd/brain-seed -api-key YOUR_KEY -api https://lthn.sh/api/v1/mcp
// go run ./cmd/brain-seed -api-key YOUR_KEY -dry-run
// go run ./cmd/brain-seed -api-key YOUR_KEY -plans
// go run ./cmd/brain-seed -api-key YOUR_KEY -claude-md # Also import CLAUDE.md files
package main
import (
"bytes"
"crypto/tls"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)
var (
apiURL = flag.String("api", "https://lthn.sh/api/v1/mcp", "MCP API base URL")
apiKey = flag.String("api-key", "", "MCP API key (Bearer token)")
server = flag.String("server", "hosthub-agent", "MCP server ID")
agent = flag.String("agent", "charon", "Agent ID for attribution")
dryRun = flag.Bool("dry-run", false, "Preview without storing")
plans = flag.Bool("plans", false, "Also import plan documents")
claudeMd = flag.Bool("claude-md", false, "Also import CLAUDE.md files")
memoryPath = flag.String("memory-path", "", "Override memory scan path (default: ~/.claude/projects/*/memory/)")
planPath = flag.String("plan-path", "", "Override plan scan path (default: ~/Code/*/docs/plans/)")
codePath = flag.String("code-path", "", "Override code root for CLAUDE.md scan (default: ~/Code)")
maxChars = flag.Int("max-chars", 3800, "Max chars per section (embeddinggemma limit ~4000)")
)
// httpClient with TLS skip for non-public TLDs (.lthn.sh has real certs, but
// allow .lan/.local if someone has legacy config).
var httpClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: false},
},
}
func main() {
flag.Parse()
fmt.Println("OpenBrain Seed — MCP API Client")
fmt.Println(strings.Repeat("=", 55))
if *apiKey == "" && !*dryRun {
fmt.Println("ERROR: -api-key is required (or use -dry-run)")
fmt.Println(" Generate one at: https://lthn.sh/admin/mcp/api-keys")
os.Exit(1)
}
if *dryRun {
fmt.Println("[DRY RUN] — no data will be stored")
}
fmt.Printf("API: %s\n", *apiURL)
fmt.Printf("Server: %s | Agent: %s\n", *server, *agent)
// Discover memory files
memPath := *memoryPath
if memPath == "" {
home, _ := os.UserHomeDir()
memPath = filepath.Join(home, ".claude", "projects", "*", "memory")
}
memFiles, _ := filepath.Glob(filepath.Join(memPath, "*.md"))
fmt.Printf("\nFound %d memory files\n", len(memFiles))
// Discover plan files
var planFiles []string
if *plans {
pPath := *planPath
if pPath == "" {
home, _ := os.UserHomeDir()
pPath = filepath.Join(home, "Code", "*", "docs", "plans")
}
planFiles, _ = filepath.Glob(filepath.Join(pPath, "*.md"))
// Also check nested dirs (completed/, etc.)
nested, _ := filepath.Glob(filepath.Join(pPath, "*", "*.md"))
planFiles = append(planFiles, nested...)
// Also check host-uk nested repos
home, _ := os.UserHomeDir()
hostUkPath := filepath.Join(home, "Code", "host-uk", "*", "docs", "plans")
hostUkFiles, _ := filepath.Glob(filepath.Join(hostUkPath, "*.md"))
planFiles = append(planFiles, hostUkFiles...)
hostUkNested, _ := filepath.Glob(filepath.Join(hostUkPath, "*", "*.md"))
planFiles = append(planFiles, hostUkNested...)
fmt.Printf("Found %d plan files\n", len(planFiles))
}
// Discover CLAUDE.md files
var claudeFiles []string
if *claudeMd {
cPath := *codePath
if cPath == "" {
home, _ := os.UserHomeDir()
cPath = filepath.Join(home, "Code")
}
claudeFiles = discoverClaudeMdFiles(cPath)
fmt.Printf("Found %d CLAUDE.md files\n", len(claudeFiles))
}
imported := 0
skipped := 0
errors := 0
// Process memory files
fmt.Println("\n--- Memory Files ---")
for _, f := range memFiles {
project := extractProject(f)
sections := parseMarkdownSections(f)
filename := strings.TrimSuffix(filepath.Base(f), ".md")
if len(sections) == 0 {
fmt.Printf(" skip %s/%s (no sections)\n", project, filename)
skipped++
continue
}
for _, sec := range sections {
content := sec.heading + "\n\n" + sec.content
if strings.TrimSpace(sec.content) == "" {
skipped++
continue
}
memType := inferType(sec.heading, sec.content, "memory")
tags := buildTags(filename, "memory", project)
confidence := confidenceForSource("memory")
// Truncate to embedding model limit
content = truncate(content, *maxChars)
if *dryRun {
fmt.Printf(" [DRY] %s/%s :: %s (%s) — %d chars\n",
project, filename, sec.heading, memType, len(content))
imported++
continue
}
if err := callBrainRemember(content, memType, tags, project, confidence); err != nil {
fmt.Printf(" FAIL %s/%s :: %s — %v\n", project, filename, sec.heading, err)
errors++
continue
}
fmt.Printf(" ok %s/%s :: %s (%s)\n", project, filename, sec.heading, memType)
imported++
}
}
// Process plan files
if *plans && len(planFiles) > 0 {
fmt.Println("\n--- Plan Documents ---")
for _, f := range planFiles {
project := extractProjectFromPlan(f)
sections := parseMarkdownSections(f)
filename := strings.TrimSuffix(filepath.Base(f), ".md")
if len(sections) == 0 {
skipped++
continue
}
for _, sec := range sections {
content := sec.heading + "\n\n" + sec.content
if strings.TrimSpace(sec.content) == "" {
skipped++
continue
}
tags := buildTags(filename, "plans", project)
content = truncate(content, *maxChars)
if *dryRun {
fmt.Printf(" [DRY] %s :: %s / %s (plan) — %d chars\n",
project, filename, sec.heading, len(content))
imported++
continue
}
if err := callBrainRemember(content, "plan", tags, project, 0.6); err != nil {
fmt.Printf(" FAIL %s :: %s / %s — %v\n", project, filename, sec.heading, err)
errors++
continue
}
fmt.Printf(" ok %s :: %s / %s (plan)\n", project, filename, sec.heading)
imported++
}
}
}
// Process CLAUDE.md files
if *claudeMd && len(claudeFiles) > 0 {
fmt.Println("\n--- CLAUDE.md Files ---")
for _, f := range claudeFiles {
project := extractProjectFromClaudeMd(f)
sections := parseMarkdownSections(f)
if len(sections) == 0 {
skipped++
continue
}
for _, sec := range sections {
content := sec.heading + "\n\n" + sec.content
if strings.TrimSpace(sec.content) == "" {
skipped++
continue
}
tags := buildTags("CLAUDE", "claude-md", project)
content = truncate(content, *maxChars)
if *dryRun {
fmt.Printf(" [DRY] %s :: CLAUDE.md / %s (convention) — %d chars\n",
project, sec.heading, len(content))
imported++
continue
}
if err := callBrainRemember(content, "convention", tags, project, 0.9); err != nil {
fmt.Printf(" FAIL %s :: CLAUDE.md / %s — %v\n", project, sec.heading, err)
errors++
continue
}
fmt.Printf(" ok %s :: CLAUDE.md / %s (convention)\n", project, sec.heading)
imported++
}
}
}
fmt.Printf("\n%s\n", strings.Repeat("=", 55))
prefix := ""
if *dryRun {
prefix = "[DRY RUN] "
}
fmt.Printf("%sImported: %d | Skipped: %d | Errors: %d\n", prefix, imported, skipped, errors)
}
// callBrainRemember sends a memory to the MCP API via brain_remember tool.
func callBrainRemember(content, memType string, tags []string, project string, confidence float64) error {
args := map[string]any{
"content": content,
"type": memType,
"tags": tags,
"confidence": confidence,
}
if project != "" && project != "unknown" {
args["project"] = project
}
payload := map[string]any{
"server": *server,
"tool": "brain_remember",
"arguments": args,
}
body, err := json.Marshal(payload)
if err != nil {
return fmt.Errorf("marshal: %w", err)
}
req, err := http.NewRequest("POST", *apiURL+"/tools/call", bytes.NewReader(body))
if err != nil {
return fmt.Errorf("request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+*apiKey)
resp, err := httpClient.Do(req)
if err != nil {
return fmt.Errorf("http: %w", err)
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
if resp.StatusCode != 200 {
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
}
var result struct {
Success bool `json:"success"`
Error string `json:"error"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return fmt.Errorf("decode: %w", err)
}
if !result.Success {
return fmt.Errorf("API: %s", result.Error)
}
return nil
}
// truncate caps content to maxLen chars, appending an ellipsis if truncated.
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
// Find last space before limit to avoid splitting mid-word
cut := maxLen
if idx := strings.LastIndex(s[:maxLen], " "); idx > maxLen-200 {
cut = idx
}
return s[:cut] + "…"
}
// discoverClaudeMdFiles finds CLAUDE.md files across a code directory.
func discoverClaudeMdFiles(codePath string) []string {
var files []string
// Walk up to 4 levels deep, skip node_modules/vendor/.claude
_ = filepath.WalkDir(codePath, func(path string, d os.DirEntry, err error) error {
if err != nil {
return nil
}
if d.IsDir() {
name := d.Name()
if name == "node_modules" || name == "vendor" || name == ".claude" {
return filepath.SkipDir
}
// Limit depth
rel, _ := filepath.Rel(codePath, path)
if strings.Count(rel, string(os.PathSeparator)) > 3 {
return filepath.SkipDir
}
return nil
}
if d.Name() == "CLAUDE.md" {
files = append(files, path)
}
return nil
})
return files
}
// section is a parsed markdown section.
type section struct {
heading string
content string
}
var headingRe = regexp.MustCompile(`^#{1,3}\s+(.+)$`)
// parseMarkdownSections splits a markdown file by headings.
func parseMarkdownSections(path string) []section {
data, err := os.ReadFile(path)
if err != nil || len(data) == 0 {
return nil
}
var sections []section
lines := strings.Split(string(data), "\n")
var curHeading string
var curContent []string
for _, line := range lines {
if m := headingRe.FindStringSubmatch(line); m != nil {
if curHeading != "" && len(curContent) > 0 {
text := strings.TrimSpace(strings.Join(curContent, "\n"))
if text != "" {
sections = append(sections, section{curHeading, text})
}
}
curHeading = strings.TrimSpace(m[1])
curContent = nil
} else {
curContent = append(curContent, line)
}
}
// Flush last section
if curHeading != "" && len(curContent) > 0 {
text := strings.TrimSpace(strings.Join(curContent, "\n"))
if text != "" {
sections = append(sections, section{curHeading, text})
}
}
// If no headings found, treat entire file as one section
if len(sections) == 0 && strings.TrimSpace(string(data)) != "" {
sections = append(sections, section{
heading: strings.TrimSuffix(filepath.Base(path), ".md"),
content: strings.TrimSpace(string(data)),
})
}
return sections
}
// extractProject derives a project name from a Claude memory path.
// ~/.claude/projects/-Users-snider-Code-eaas/memory/MEMORY.md → "eaas"
func extractProject(path string) string {
re := regexp.MustCompile(`projects/[^/]*-([^-/]+)/memory/`)
if m := re.FindStringSubmatch(path); m != nil {
return m[1]
}
return "unknown"
}
// extractProjectFromPlan derives a project name from a plan path.
// ~/Code/eaas/docs/plans/foo.md → "eaas"
// ~/Code/host-uk/core/docs/plans/foo.md → "core"
func extractProjectFromPlan(path string) string {
// Check host-uk nested repos first
re := regexp.MustCompile(`Code/host-uk/([^/]+)/docs/plans/`)
if m := re.FindStringSubmatch(path); m != nil {
return m[1]
}
re = regexp.MustCompile(`Code/([^/]+)/docs/plans/`)
if m := re.FindStringSubmatch(path); m != nil {
return m[1]
}
return "unknown"
}
// extractProjectFromClaudeMd derives a project name from a CLAUDE.md path.
// ~/Code/host-uk/core/CLAUDE.md → "core"
// ~/Code/eaas/CLAUDE.md → "eaas"
func extractProjectFromClaudeMd(path string) string {
re := regexp.MustCompile(`Code/host-uk/([^/]+)/`)
if m := re.FindStringSubmatch(path); m != nil {
return m[1]
}
re = regexp.MustCompile(`Code/([^/]+)/`)
if m := re.FindStringSubmatch(path); m != nil {
return m[1]
}
return "unknown"
}
// inferType guesses the memory type from heading + content keywords.
func inferType(heading, content, source string) string {
// Source-specific defaults (match PHP BrainIngestCommand behaviour)
if source == "plans" {
return "plan"
}
if source == "claude-md" {
return "convention"
}
lower := strings.ToLower(heading + " " + content)
patterns := map[string][]string{
"architecture": {"architecture", "stack", "infrastructure", "layer", "service mesh"},
"convention": {"convention", "standard", "naming", "pattern", "rule", "coding"},
"decision": {"decision", "chose", "strategy", "approach", "domain"},
"bug": {"bug", "fix", "broken", "error", "issue", "lesson"},
"plan": {"plan", "todo", "roadmap", "milestone", "phase", "task"},
"research": {"research", "finding", "discovery", "analysis", "rfc"},
}
for t, keywords := range patterns {
for _, kw := range keywords {
if strings.Contains(lower, kw) {
return t
}
}
}
return "observation"
}
// buildTags creates the tag list for a memory.
func buildTags(filename, source, project string) []string {
tags := []string{"source:" + source}
if project != "" && project != "unknown" {
tags = append(tags, "project:"+project)
}
if filename != "MEMORY" && filename != "CLAUDE" {
tags = append(tags, strings.ReplaceAll(strings.ReplaceAll(filename, "-", " "), "_", " "))
}
return tags
}
// confidenceForSource returns a default confidence level matching the PHP ingest command.
func confidenceForSource(source string) float64 {
switch source {
case "claude-md":
return 0.9
case "memory":
return 0.8
case "plans":
return 0.6
default:
return 0.5
}
}