diff --git a/Boot.php b/Boot.php index e99c2d5..0d87e72 100644 --- a/Boot.php +++ b/Boot.php @@ -131,6 +131,7 @@ class Boot extends ServiceProvider $event->command(Console\Commands\GenerateCommand::class); $event->command(Console\Commands\PlanRetentionCommand::class); $event->command(Console\Commands\BrainSeedMemoryCommand::class); + $event->command(Console\Commands\BrainIngestCommand::class); } /** diff --git a/Console/Commands/BrainIngestCommand.php b/Console/Commands/BrainIngestCommand.php new file mode 100644 index 0000000..3b3170b --- /dev/null +++ b/Console/Commands/BrainIngestCommand.php @@ -0,0 +1,418 @@ + */ + private array $stats = ['imported' => 0, 'skipped' => 0, 'errors' => 0]; + + public function handle(BrainService $brain): int + { + $workspaceId = $this->option('workspace'); + if (! $workspaceId) { + $this->error('--workspace is required.'); + + return self::FAILURE; + } + + $source = $this->option('source') ?? 'all'; + $codePath = $this->option('code-path') ?? $this->expandHome('~/Code'); + $isDryRun = (bool) $this->option('dry-run'); + + $sources = $source === 'all' + ? ['memory', 'plans', 'claude-md', 'tasks'] + : [strtolower($source)]; + + // Gather all files first + $filesBySource = []; + foreach ($sources as $src) { + $files = match ($src) { + 'memory' => $this->discoverMemoryFiles(), + 'plans' => $this->discoverPlanFiles($codePath), + 'claude-md' => $this->discoverClaudeMdFiles($codePath), + 'tasks' => $this->discoverTaskFiles(), + default => [], + }; + $filesBySource[$src] = $files; + $this->info(sprintf(' [%s] %d file(s)', $src, count($files))); + } + + $totalFiles = array_sum(array_map('count', $filesBySource)); + $this->newLine(); + $this->info("Total: {$totalFiles} file(s) to process."); + + if ($totalFiles === 0) { + return self::SUCCESS; + } + + if (! $isDryRun) { + if ($this->option('fresh')) { + $this->warn('Clearing existing collection...'); + $this->clearCollection($brain); + } + $brain->ensureCollection(); + } + + foreach ($filesBySource as $src => $files) { + $this->newLine(); + $this->comment("--- {$src} ---"); + + foreach ($files as $file) { + $this->processFile($brain, $file, $src, (int) $workspaceId, $this->option('agent') ?? 'virgil', $isDryRun); + } + } + + $this->newLine(); + $prefix = $isDryRun ? '[DRY RUN] ' : ''; + $this->info("{$prefix}Done. Imported: {$this->stats['imported']}, Skipped: {$this->stats['skipped']}, Errors: {$this->stats['errors']}"); + + return self::SUCCESS; + } + + /** + * Process a single file into sectioned memories. + */ + private function processFile(BrainService $brain, string $file, string $source, int $workspaceId, string $agentId, bool $isDryRun): void + { + $sections = $this->parseMarkdownSections($file); + $filename = basename($file, '.md'); + $project = $this->extractProject($file, $source); + + if (empty($sections)) { + $this->stats['skipped']++; + + return; + } + + foreach ($sections as $section) { + if (trim($section['content']) === '') { + $this->stats['skipped']++; + + continue; + } + + $type = $this->inferType($section['heading'], $section['content'], $source); + $tags = $this->buildTags($section['heading'], $filename, $source, $project); + + if ($isDryRun) { + $this->line(sprintf( + ' %s :: %s (%s) — %d chars [%s]', + $filename, + $section['heading'], + $type, + strlen($section['content']), + implode(', ', $tags), + )); + $this->stats['imported']++; + + continue; + } + + try { + $brain->remember([ + 'workspace_id' => $workspaceId, + 'agent_id' => $agentId, + 'type' => $type, + 'content' => $section['heading']."\n\n".$section['content'], + 'tags' => $tags, + 'project' => $project, + 'confidence' => $this->confidenceForSource($source), + ]); + $this->stats['imported']++; + } catch (\Throwable $e) { + $this->warn(" Error: {$filename} :: {$section['heading']} — {$e->getMessage()}"); + $this->stats['errors']++; + } + } + } + + // ------------------------------------------------------------------------- + // File discovery + // ------------------------------------------------------------------------- + + /** @return array */ + private function discoverMemoryFiles(): array + { + $pattern = $this->expandHome('~/.claude/projects/*/memory/*.md'); + + return glob($pattern) ?: []; + } + + /** @return array */ + private function discoverPlanFiles(string $codePath): array + { + $files = []; + + // ~/.claude/plans (superpowers plans) + $claudePlans = $this->expandHome('~/.claude/plans'); + if (is_dir($claudePlans)) { + $files = array_merge($files, $this->findMd($claudePlans)); + } + + // docs/plans across all repos in ~/Code + if (is_dir($codePath)) { + $finder = Finder::create() + ->files() + ->name('*.md') + ->in($codePath) + ->path('/docs\/plans\//') + ->notPath('node_modules') + ->notPath('vendor') + ->sortByName(); + + foreach ($finder as $file) { + $files[] = $file->getRealPath(); + } + } + + return $files; + } + + /** @return array */ + private function discoverClaudeMdFiles(string $codePath): array + { + if (! is_dir($codePath)) { + return []; + } + + $finder = Finder::create() + ->files() + ->name('CLAUDE.md') + ->in($codePath) + ->depth('< 4') + ->notPath('node_modules') + ->notPath('vendor') + ->notPath('.claude') + ->sortByName(); + + $files = []; + foreach ($finder as $file) { + $files[] = $file->getRealPath(); + } + + return $files; + } + + /** @return array */ + private function discoverTaskFiles(): array + { + $tasksDir = $this->expandHome('~/Code/host-uk/core/tasks'); + if (! is_dir($tasksDir)) { + return []; + } + + $finder = Finder::create() + ->files() + ->name('*.md') + ->in($tasksDir) + ->notPath('recovered-hostuk') + ->notPath('recovered-root') + ->sortByName(); + + $files = []; + foreach ($finder as $file) { + $files[] = $file->getRealPath(); + } + + return $files; + } + + /** @return array */ + private function findMd(string $dir): array + { + $files = []; + foreach (glob("{$dir}/*.md") ?: [] as $f) { + $files[] = $f; + } + // Include subdirectories (e.g. completed/) + foreach (glob("{$dir}/*/*.md") ?: [] as $f) { + $files[] = $f; + } + + return $files; + } + + // ------------------------------------------------------------------------- + // Parsing + // ------------------------------------------------------------------------- + + /** @return array */ + private function parseMarkdownSections(string $filePath): array + { + $content = file_get_contents($filePath); + if ($content === false || trim($content) === '') { + return []; + } + + $sections = []; + $lines = explode("\n", $content); + $currentHeading = ''; + $currentContent = []; + + foreach ($lines as $line) { + if (preg_match('/^#{1,3}\s+(.+)$/', $line, $matches)) { + if ($currentHeading !== '' && ! empty($currentContent)) { + $text = trim(implode("\n", $currentContent)); + if ($text !== '') { + $sections[] = ['heading' => $currentHeading, 'content' => $text]; + } + } + $currentHeading = trim($matches[1]); + $currentContent = []; + } else { + $currentContent[] = $line; + } + } + + // Flush last section + if ($currentHeading !== '' && ! empty($currentContent)) { + $text = trim(implode("\n", $currentContent)); + if ($text !== '') { + $sections[] = ['heading' => $currentHeading, 'content' => $text]; + } + } + + // If no headings found, treat entire file as one section + if (empty($sections) && trim($content) !== '') { + $sections[] = [ + 'heading' => basename($filePath, '.md'), + 'content' => trim($content), + ]; + } + + return $sections; + } + + // ------------------------------------------------------------------------- + // Metadata + // ------------------------------------------------------------------------- + + private function extractProject(string $filePath, string $source): ?string + { + // Memory files: ~/.claude/projects/-Users-snider-Code-{project}/memory/ + if (preg_match('/projects\/[^\/]*-([^-\/]+)\/memory\//', $filePath, $m)) { + return $m[1]; + } + + // Code repos: ~/Code/{project}/ or ~/Code/host-uk/{project}/ + if (preg_match('#/Code/host-uk/([^/]+)/#', $filePath, $m)) { + return $m[1]; + } + if (preg_match('#/Code/([^/]+)/#', $filePath, $m)) { + return $m[1]; + } + + return null; + } + + private function inferType(string $heading, string $content, string $source): string + { + // Source-specific defaults + if ($source === 'plans') { + return 'plan'; + } + if ($source === 'claude-md') { + return 'convention'; + } + + $lower = strtolower($heading.' '.$content); + + $patterns = [ + 'architecture' => ['architecture', 'stack', 'infrastructure', 'layer', 'service mesh'], + 'convention' => ['convention', 'standard', 'naming', 'pattern', 'rule', 'coding'], + 'decision' => ['decision', 'chose', 'strategy', 'approach', 'domain'], + 'bug' => ['bug', 'fix', 'broken', 'error', 'issue', 'lesson'], + 'plan' => ['plan', 'todo', 'roadmap', 'milestone', 'phase', 'task'], + 'research' => ['research', 'finding', 'discovery', 'analysis', 'rfc'], + ]; + + foreach ($patterns as $type => $keywords) { + foreach ($keywords as $keyword) { + if (str_contains($lower, $keyword)) { + return $type; + } + } + } + + return 'observation'; + } + + /** @return array */ + private function buildTags(string $heading, string $filename, string $source, ?string $project): array + { + $tags = ["source:{$source}"]; + + if ($project) { + $tags[] = "project:{$project}"; + } + + if ($filename !== 'MEMORY' && $filename !== 'CLAUDE') { + $tags[] = str_replace(['-', '_'], ' ', $filename); + } + + return $tags; + } + + private function confidenceForSource(string $source): float + { + return match ($source) { + 'memory' => 0.8, + 'claude-md' => 0.9, + 'plans' => 0.6, + 'tasks' => 0.5, + default => 0.5, + }; + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private function clearCollection(BrainService $brain): void + { + $reflection = new \ReflectionClass($brain); + $prop = $reflection->getProperty('qdrantUrl'); + $qdrantUrl = $prop->getValue($brain); + $prop = $reflection->getProperty('collection'); + $collection = $prop->getValue($brain); + + \Illuminate\Support\Facades\Http::withoutVerifying() + ->timeout(10) + ->delete("{$qdrantUrl}/collections/{$collection}"); + } + + private function expandHome(string $path): string + { + if (str_starts_with($path, '~/')) { + $home = getenv('HOME') ?: ('/Users/'.get_current_user()); + + return $home.substr($path, 1); + } + + return $path; + } +}