502 lines
14 KiB
Go
502 lines
14 KiB
Go
// SPDX-License-Identifier: EUPL-1.2
|
|
|
|
// brain-seed imports Claude Code MEMORY.md files into the OpenBrain knowledge
|
|
// store via the MCP HTTP API (brain_remember tool). The Laravel app handles
|
|
// embedding, Qdrant storage, and MariaDB dual-write internally.
|
|
//
|
|
// Usage:
|
|
//
|
|
// go run ./cmd/brain-seed -api-key YOUR_KEY
|
|
// go run ./cmd/brain-seed -api-key YOUR_KEY -api https://lthn.sh/api/v1/mcp
|
|
// go run ./cmd/brain-seed -api-key YOUR_KEY -dry-run
|
|
// go run ./cmd/brain-seed -api-key YOUR_KEY -plans
|
|
// go run ./cmd/brain-seed -api-key YOUR_KEY -claude-md # Also import CLAUDE.md files
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
var (
|
|
apiURL = flag.String("api", "https://lthn.sh/api/v1/mcp", "MCP API base URL")
|
|
apiKey = flag.String("api-key", "", "MCP API key (Bearer token)")
|
|
server = flag.String("server", "hosthub-agent", "MCP server ID")
|
|
agent = flag.String("agent", "charon", "Agent ID for attribution")
|
|
dryRun = flag.Bool("dry-run", false, "Preview without storing")
|
|
plans = flag.Bool("plans", false, "Also import plan documents")
|
|
claudeMd = flag.Bool("claude-md", false, "Also import CLAUDE.md files")
|
|
memoryPath = flag.String("memory-path", "", "Override memory scan path (default: ~/.claude/projects/*/memory/)")
|
|
planPath = flag.String("plan-path", "", "Override plan scan path (default: ~/Code/*/docs/plans/)")
|
|
codePath = flag.String("code-path", "", "Override code root for CLAUDE.md scan (default: ~/Code)")
|
|
maxChars = flag.Int("max-chars", 3800, "Max chars per section (embeddinggemma limit ~4000)")
|
|
)
|
|
|
|
// httpClient with TLS skip for non-public TLDs (.lthn.sh has real certs, but
|
|
// allow .lan/.local if someone has legacy config).
|
|
var httpClient = &http.Client{
|
|
Timeout: 30 * time.Second,
|
|
Transport: &http.Transport{
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: false},
|
|
},
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
fmt.Println("OpenBrain Seed — MCP API Client")
|
|
fmt.Println(strings.Repeat("=", 55))
|
|
|
|
if *apiKey == "" && !*dryRun {
|
|
fmt.Println("ERROR: -api-key is required (or use -dry-run)")
|
|
fmt.Println(" Generate one at: https://lthn.sh/admin/mcp/api-keys")
|
|
os.Exit(1)
|
|
}
|
|
|
|
if *dryRun {
|
|
fmt.Println("[DRY RUN] — no data will be stored")
|
|
}
|
|
|
|
fmt.Printf("API: %s\n", *apiURL)
|
|
fmt.Printf("Server: %s | Agent: %s\n", *server, *agent)
|
|
|
|
// Discover memory files
|
|
memPath := *memoryPath
|
|
if memPath == "" {
|
|
home, _ := os.UserHomeDir()
|
|
memPath = filepath.Join(home, ".claude", "projects", "*", "memory")
|
|
}
|
|
memFiles, _ := filepath.Glob(filepath.Join(memPath, "*.md"))
|
|
fmt.Printf("\nFound %d memory files\n", len(memFiles))
|
|
|
|
// Discover plan files
|
|
var planFiles []string
|
|
if *plans {
|
|
pPath := *planPath
|
|
if pPath == "" {
|
|
home, _ := os.UserHomeDir()
|
|
pPath = filepath.Join(home, "Code", "*", "docs", "plans")
|
|
}
|
|
planFiles, _ = filepath.Glob(filepath.Join(pPath, "*.md"))
|
|
// Also check nested dirs (completed/, etc.)
|
|
nested, _ := filepath.Glob(filepath.Join(pPath, "*", "*.md"))
|
|
planFiles = append(planFiles, nested...)
|
|
|
|
// Also check host-uk nested repos
|
|
home, _ := os.UserHomeDir()
|
|
hostUkPath := filepath.Join(home, "Code", "host-uk", "*", "docs", "plans")
|
|
hostUkFiles, _ := filepath.Glob(filepath.Join(hostUkPath, "*.md"))
|
|
planFiles = append(planFiles, hostUkFiles...)
|
|
hostUkNested, _ := filepath.Glob(filepath.Join(hostUkPath, "*", "*.md"))
|
|
planFiles = append(planFiles, hostUkNested...)
|
|
|
|
fmt.Printf("Found %d plan files\n", len(planFiles))
|
|
}
|
|
|
|
// Discover CLAUDE.md files
|
|
var claudeFiles []string
|
|
if *claudeMd {
|
|
cPath := *codePath
|
|
if cPath == "" {
|
|
home, _ := os.UserHomeDir()
|
|
cPath = filepath.Join(home, "Code")
|
|
}
|
|
claudeFiles = discoverClaudeMdFiles(cPath)
|
|
fmt.Printf("Found %d CLAUDE.md files\n", len(claudeFiles))
|
|
}
|
|
|
|
imported := 0
|
|
skipped := 0
|
|
errors := 0
|
|
|
|
// Process memory files
|
|
fmt.Println("\n--- Memory Files ---")
|
|
for _, f := range memFiles {
|
|
project := extractProject(f)
|
|
sections := parseMarkdownSections(f)
|
|
filename := strings.TrimSuffix(filepath.Base(f), ".md")
|
|
|
|
if len(sections) == 0 {
|
|
fmt.Printf(" skip %s/%s (no sections)\n", project, filename)
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
for _, sec := range sections {
|
|
content := sec.heading + "\n\n" + sec.content
|
|
if strings.TrimSpace(sec.content) == "" {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
memType := inferType(sec.heading, sec.content, "memory")
|
|
tags := buildTags(filename, "memory", project)
|
|
confidence := confidenceForSource("memory")
|
|
|
|
// Truncate to embedding model limit
|
|
content = truncate(content, *maxChars)
|
|
|
|
if *dryRun {
|
|
fmt.Printf(" [DRY] %s/%s :: %s (%s) — %d chars\n",
|
|
project, filename, sec.heading, memType, len(content))
|
|
imported++
|
|
continue
|
|
}
|
|
|
|
if err := callBrainRemember(content, memType, tags, project, confidence); err != nil {
|
|
fmt.Printf(" FAIL %s/%s :: %s — %v\n", project, filename, sec.heading, err)
|
|
errors++
|
|
continue
|
|
}
|
|
fmt.Printf(" ok %s/%s :: %s (%s)\n", project, filename, sec.heading, memType)
|
|
imported++
|
|
}
|
|
}
|
|
|
|
// Process plan files
|
|
if *plans && len(planFiles) > 0 {
|
|
fmt.Println("\n--- Plan Documents ---")
|
|
for _, f := range planFiles {
|
|
project := extractProjectFromPlan(f)
|
|
sections := parseMarkdownSections(f)
|
|
filename := strings.TrimSuffix(filepath.Base(f), ".md")
|
|
|
|
if len(sections) == 0 {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
for _, sec := range sections {
|
|
content := sec.heading + "\n\n" + sec.content
|
|
if strings.TrimSpace(sec.content) == "" {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
tags := buildTags(filename, "plans", project)
|
|
content = truncate(content, *maxChars)
|
|
|
|
if *dryRun {
|
|
fmt.Printf(" [DRY] %s :: %s / %s (plan) — %d chars\n",
|
|
project, filename, sec.heading, len(content))
|
|
imported++
|
|
continue
|
|
}
|
|
|
|
if err := callBrainRemember(content, "plan", tags, project, 0.6); err != nil {
|
|
fmt.Printf(" FAIL %s :: %s / %s — %v\n", project, filename, sec.heading, err)
|
|
errors++
|
|
continue
|
|
}
|
|
fmt.Printf(" ok %s :: %s / %s (plan)\n", project, filename, sec.heading)
|
|
imported++
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process CLAUDE.md files
|
|
if *claudeMd && len(claudeFiles) > 0 {
|
|
fmt.Println("\n--- CLAUDE.md Files ---")
|
|
for _, f := range claudeFiles {
|
|
project := extractProjectFromClaudeMd(f)
|
|
sections := parseMarkdownSections(f)
|
|
|
|
if len(sections) == 0 {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
for _, sec := range sections {
|
|
content := sec.heading + "\n\n" + sec.content
|
|
if strings.TrimSpace(sec.content) == "" {
|
|
skipped++
|
|
continue
|
|
}
|
|
|
|
tags := buildTags("CLAUDE", "claude-md", project)
|
|
content = truncate(content, *maxChars)
|
|
|
|
if *dryRun {
|
|
fmt.Printf(" [DRY] %s :: CLAUDE.md / %s (convention) — %d chars\n",
|
|
project, sec.heading, len(content))
|
|
imported++
|
|
continue
|
|
}
|
|
|
|
if err := callBrainRemember(content, "convention", tags, project, 0.9); err != nil {
|
|
fmt.Printf(" FAIL %s :: CLAUDE.md / %s — %v\n", project, sec.heading, err)
|
|
errors++
|
|
continue
|
|
}
|
|
fmt.Printf(" ok %s :: CLAUDE.md / %s (convention)\n", project, sec.heading)
|
|
imported++
|
|
}
|
|
}
|
|
}
|
|
|
|
fmt.Printf("\n%s\n", strings.Repeat("=", 55))
|
|
prefix := ""
|
|
if *dryRun {
|
|
prefix = "[DRY RUN] "
|
|
}
|
|
fmt.Printf("%sImported: %d | Skipped: %d | Errors: %d\n", prefix, imported, skipped, errors)
|
|
}
|
|
|
|
// callBrainRemember sends a memory to the MCP API via brain_remember tool.
|
|
func callBrainRemember(content, memType string, tags []string, project string, confidence float64) error {
|
|
args := map[string]any{
|
|
"content": content,
|
|
"type": memType,
|
|
"tags": tags,
|
|
"confidence": confidence,
|
|
}
|
|
if project != "" && project != "unknown" {
|
|
args["project"] = project
|
|
}
|
|
|
|
payload := map[string]any{
|
|
"server": *server,
|
|
"tool": "brain_remember",
|
|
"arguments": args,
|
|
}
|
|
|
|
body, err := json.Marshal(payload)
|
|
if err != nil {
|
|
return fmt.Errorf("marshal: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", *apiURL+"/tools/call", bytes.NewReader(body))
|
|
if err != nil {
|
|
return fmt.Errorf("request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+*apiKey)
|
|
|
|
resp, err := httpClient.Do(req)
|
|
if err != nil {
|
|
return fmt.Errorf("http: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != 200 {
|
|
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
|
|
}
|
|
|
|
var result struct {
|
|
Success bool `json:"success"`
|
|
Error string `json:"error"`
|
|
}
|
|
if err := json.Unmarshal(respBody, &result); err != nil {
|
|
return fmt.Errorf("decode: %w", err)
|
|
}
|
|
if !result.Success {
|
|
return fmt.Errorf("API: %s", result.Error)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// truncate caps content to maxLen chars, appending an ellipsis if truncated.
|
|
func truncate(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
// Find last space before limit to avoid splitting mid-word
|
|
cut := maxLen
|
|
if idx := strings.LastIndex(s[:maxLen], " "); idx > maxLen-200 {
|
|
cut = idx
|
|
}
|
|
return s[:cut] + "…"
|
|
}
|
|
|
|
// discoverClaudeMdFiles finds CLAUDE.md files across a code directory.
|
|
func discoverClaudeMdFiles(codePath string) []string {
|
|
var files []string
|
|
|
|
// Walk up to 4 levels deep, skip node_modules/vendor/.claude
|
|
_ = filepath.WalkDir(codePath, func(path string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
if d.IsDir() {
|
|
name := d.Name()
|
|
if name == "node_modules" || name == "vendor" || name == ".claude" {
|
|
return filepath.SkipDir
|
|
}
|
|
// Limit depth
|
|
rel, _ := filepath.Rel(codePath, path)
|
|
if strings.Count(rel, string(os.PathSeparator)) > 3 {
|
|
return filepath.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
if d.Name() == "CLAUDE.md" {
|
|
files = append(files, path)
|
|
}
|
|
return nil
|
|
})
|
|
|
|
return files
|
|
}
|
|
|
|
// section is a parsed markdown section.
|
|
type section struct {
|
|
heading string
|
|
content string
|
|
}
|
|
|
|
var headingRe = regexp.MustCompile(`^#{1,3}\s+(.+)$`)
|
|
|
|
// parseMarkdownSections splits a markdown file by headings.
|
|
func parseMarkdownSections(path string) []section {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil || len(data) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var sections []section
|
|
lines := strings.Split(string(data), "\n")
|
|
var curHeading string
|
|
var curContent []string
|
|
|
|
for _, line := range lines {
|
|
if m := headingRe.FindStringSubmatch(line); m != nil {
|
|
if curHeading != "" && len(curContent) > 0 {
|
|
text := strings.TrimSpace(strings.Join(curContent, "\n"))
|
|
if text != "" {
|
|
sections = append(sections, section{curHeading, text})
|
|
}
|
|
}
|
|
curHeading = strings.TrimSpace(m[1])
|
|
curContent = nil
|
|
} else {
|
|
curContent = append(curContent, line)
|
|
}
|
|
}
|
|
|
|
// Flush last section
|
|
if curHeading != "" && len(curContent) > 0 {
|
|
text := strings.TrimSpace(strings.Join(curContent, "\n"))
|
|
if text != "" {
|
|
sections = append(sections, section{curHeading, text})
|
|
}
|
|
}
|
|
|
|
// If no headings found, treat entire file as one section
|
|
if len(sections) == 0 && strings.TrimSpace(string(data)) != "" {
|
|
sections = append(sections, section{
|
|
heading: strings.TrimSuffix(filepath.Base(path), ".md"),
|
|
content: strings.TrimSpace(string(data)),
|
|
})
|
|
}
|
|
|
|
return sections
|
|
}
|
|
|
|
// extractProject derives a project name from a Claude memory path.
|
|
// ~/.claude/projects/-Users-snider-Code-eaas/memory/MEMORY.md → "eaas"
|
|
func extractProject(path string) string {
|
|
re := regexp.MustCompile(`projects/[^/]*-([^-/]+)/memory/`)
|
|
if m := re.FindStringSubmatch(path); m != nil {
|
|
return m[1]
|
|
}
|
|
return "unknown"
|
|
}
|
|
|
|
// extractProjectFromPlan derives a project name from a plan path.
|
|
// ~/Code/eaas/docs/plans/foo.md → "eaas"
|
|
// ~/Code/host-uk/core/docs/plans/foo.md → "core"
|
|
func extractProjectFromPlan(path string) string {
|
|
// Check host-uk nested repos first
|
|
re := regexp.MustCompile(`Code/host-uk/([^/]+)/docs/plans/`)
|
|
if m := re.FindStringSubmatch(path); m != nil {
|
|
return m[1]
|
|
}
|
|
re = regexp.MustCompile(`Code/([^/]+)/docs/plans/`)
|
|
if m := re.FindStringSubmatch(path); m != nil {
|
|
return m[1]
|
|
}
|
|
return "unknown"
|
|
}
|
|
|
|
// extractProjectFromClaudeMd derives a project name from a CLAUDE.md path.
|
|
// ~/Code/host-uk/core/CLAUDE.md → "core"
|
|
// ~/Code/eaas/CLAUDE.md → "eaas"
|
|
func extractProjectFromClaudeMd(path string) string {
|
|
re := regexp.MustCompile(`Code/host-uk/([^/]+)/`)
|
|
if m := re.FindStringSubmatch(path); m != nil {
|
|
return m[1]
|
|
}
|
|
re = regexp.MustCompile(`Code/([^/]+)/`)
|
|
if m := re.FindStringSubmatch(path); m != nil {
|
|
return m[1]
|
|
}
|
|
return "unknown"
|
|
}
|
|
|
|
// inferType guesses the memory type from heading + content keywords.
|
|
func inferType(heading, content, source string) string {
|
|
// Source-specific defaults (match PHP BrainIngestCommand behaviour)
|
|
if source == "plans" {
|
|
return "plan"
|
|
}
|
|
if source == "claude-md" {
|
|
return "convention"
|
|
}
|
|
|
|
lower := strings.ToLower(heading + " " + content)
|
|
patterns := map[string][]string{
|
|
"architecture": {"architecture", "stack", "infrastructure", "layer", "service mesh"},
|
|
"convention": {"convention", "standard", "naming", "pattern", "rule", "coding"},
|
|
"decision": {"decision", "chose", "strategy", "approach", "domain"},
|
|
"bug": {"bug", "fix", "broken", "error", "issue", "lesson"},
|
|
"plan": {"plan", "todo", "roadmap", "milestone", "phase", "task"},
|
|
"research": {"research", "finding", "discovery", "analysis", "rfc"},
|
|
}
|
|
for t, keywords := range patterns {
|
|
for _, kw := range keywords {
|
|
if strings.Contains(lower, kw) {
|
|
return t
|
|
}
|
|
}
|
|
}
|
|
return "observation"
|
|
}
|
|
|
|
// buildTags creates the tag list for a memory.
|
|
func buildTags(filename, source, project string) []string {
|
|
tags := []string{"source:" + source}
|
|
if project != "" && project != "unknown" {
|
|
tags = append(tags, "project:"+project)
|
|
}
|
|
if filename != "MEMORY" && filename != "CLAUDE" {
|
|
tags = append(tags, strings.ReplaceAll(strings.ReplaceAll(filename, "-", " "), "_", " "))
|
|
}
|
|
return tags
|
|
}
|
|
|
|
// confidenceForSource returns a default confidence level matching the PHP ingest command.
|
|
func confidenceForSource(source string) float64 {
|
|
switch source {
|
|
case "claude-md":
|
|
return 0.9
|
|
case "memory":
|
|
return 0.8
|
|
case "plans":
|
|
return 0.6
|
|
default:
|
|
return 0.5
|
|
}
|
|
}
|