From b0ed221cfad7b185242d72370989383e6e64cbd1 Mon Sep 17 00:00:00 2001 From: Snider Date: Wed, 4 Mar 2026 15:58:01 +0000 Subject: [PATCH] =?UTF-8?q?feat(brain):=20add=20wiki=20source=20type=20?= =?UTF-8?q?=E2=80=94=20ingest=20Forge=20repo=20wikis=20via=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fetches wiki pages from all core/* repos on Forge, parses into sections, and stores as type:service with repo/lang tags. Gives the PHP orchestrator contextual knowledge about the Go services it coordinates. 71+ pages across 22+ repos, ~770 vectorised sections. Co-Authored-By: Virgil --- Console/Commands/BrainIngestCommand.php | 246 +++++++++++++++++++++++- 1 file changed, 240 insertions(+), 6 deletions(-) diff --git a/Console/Commands/BrainIngestCommand.php b/Console/Commands/BrainIngestCommand.php index 547fbff..17612fb 100644 --- a/Console/Commands/BrainIngestCommand.php +++ b/Console/Commands/BrainIngestCommand.php @@ -6,6 +6,7 @@ namespace Core\Mod\Agentic\Console\Commands; use Core\Mod\Agentic\Services\BrainService; use Illuminate\Console\Command; +use Illuminate\Support\Facades\Http; use Symfony\Component\Finder\Finder; /** @@ -20,7 +21,7 @@ class BrainIngestCommand extends Command protected $signature = 'brain:ingest {--workspace= : Workspace ID to import into (required)} {--agent=virgil : Agent ID to attribute memories to} - {--source=all : Source type: memory, plans, claude-md, tasks, docs, all} + {--source=all : Source type: memory, plans, claude-md, tasks, docs, wiki, all} {--code-path= : Root code directory (default: ~/Code)} {--dry-run : Preview what would be imported without storing} {--fresh : Clear the Qdrant collection before ingesting}'; @@ -44,12 +45,16 @@ class BrainIngestCommand extends Command $isDryRun = (bool) $this->option('dry-run'); $sources = $source === 'all' - ? ['memory', 'plans', 'claude-md', 'tasks', 'docs'] + ? ['memory', 'plans', 'claude-md', 'tasks', 'docs', 'wiki'] : [strtolower($source)]; - // Gather all files first + // Separate file-based and API-based sources + $fileSources = array_filter($sources, fn ($s) => $s !== 'wiki'); + $apiSources = array_filter($sources, fn ($s) => $s === 'wiki'); + + // Gather file-based sources $filesBySource = []; - foreach ($sources as $src) { + foreach ($fileSources as $src) { $files = match ($src) { 'memory' => $this->discoverMemoryFiles(), 'plans' => $this->discoverPlanFiles($codePath), @@ -62,9 +67,16 @@ class BrainIngestCommand extends Command $this->info(sprintf(' [%s] %d file(s)', $src, count($files))); } - $totalFiles = array_sum(array_map('count', $filesBySource)); + // Discover wiki pages from Forge API + $wikiPages = []; + if (in_array('wiki', $apiSources, true)) { + $wikiPages = $this->discoverWikiPages(); + $this->info(sprintf(' [wiki] %d page(s) across %d repo(s)', count($wikiPages), count(array_unique(array_column($wikiPages, 'repo'))))); + } + + $totalFiles = array_sum(array_map('count', $filesBySource)) + count($wikiPages); $this->newLine(); - $this->info("Total: {$totalFiles} file(s) to process."); + $this->info("Total: {$totalFiles} item(s) to process."); if ($totalFiles === 0) { return self::SUCCESS; @@ -87,6 +99,12 @@ class BrainIngestCommand extends Command } } + if (! empty($wikiPages)) { + $this->newLine(); + $this->comment('--- wiki ---'); + $this->processWikiPages($brain, $wikiPages, (int) $workspaceId, $this->option('agent') ?? 'virgil', $isDryRun); + } + $this->newLine(); $prefix = $isDryRun ? '[DRY RUN] ' : ''; $this->info("{$prefix}Done. Imported: {$this->stats['imported']}, Skipped: {$this->stats['skipped']}, Errors: {$this->stats['errors']}"); @@ -280,6 +298,222 @@ class BrainIngestCommand extends Command return $files; } + // ------------------------------------------------------------------------- + // Wiki (Forge API) + // ------------------------------------------------------------------------- + + /** + * Discover wiki pages from all repos in the Forge org. + * + * Returns flat array of ['repo' => name, 'title' => title, 'content' => markdown]. + * + * @return array + */ + private function discoverWikiPages(): array + { + $baseUrl = config('upstream.gitea.url', 'https://forge.lthn.ai'); + $token = config('upstream.gitea.token'); + $org = config('upstream.gitea.org', 'core'); + + if (! $token) { + $this->warn('No Forge token — skipping wiki source.'); + + return []; + } + + // Fetch all repos in org + $repos = []; + $page = 1; + + do { + $response = Http::withHeaders(['Authorization' => 'token ' . $token]) + ->timeout(15) + ->get("{$baseUrl}/api/v1/orgs/{$org}/repos", ['page' => $page, 'limit' => 50]); + + if (! $response->successful()) { + $this->warn('Failed to fetch repos: ' . $response->status()); + break; + } + + $batch = $response->json(); + if (empty($batch)) { + break; + } + + foreach ($batch as $r) { + $repos[] = $r['name']; + } + $page++; + } while (count($batch) === 50); + + // Fetch wiki pages for each repo + $pages = []; + + foreach ($repos as $repo) { + $response = Http::withHeaders(['Authorization' => 'token ' . $token]) + ->timeout(10) + ->get("{$baseUrl}/api/v1/repos/{$org}/{$repo}/wiki/pages"); + + if (! $response->successful() || $response->status() === 404) { + continue; + } + + $wikiList = $response->json(); + + if (empty($wikiList)) { + continue; + } + + foreach ($wikiList as $wiki) { + $title = $wiki['title'] ?? 'Untitled'; + + // Fetch full page content + $pageResponse = Http::withHeaders(['Authorization' => 'token ' . $token]) + ->timeout(10) + ->get("{$baseUrl}/api/v1/repos/{$org}/{$repo}/wiki/page/{$title}"); + + if (! $pageResponse->successful()) { + continue; + } + + $content = $pageResponse->json('content_base64'); + if ($content) { + $content = base64_decode($content, true) ?: ''; + } else { + $content = ''; + } + + if (trim($content) === '') { + continue; + } + + $pages[] = [ + 'repo' => $repo, + 'title' => $title, + 'content' => $content, + ]; + } + } + + return $pages; + } + + /** + * Process wiki pages into contextual memories. + * + * Each page is tagged with its repo and language, typed as service + * documentation so the PHP orchestrator can reason about Go services. + * + * @param array $pages + */ + private function processWikiPages(BrainService $brain, array $pages, int $workspaceId, string $agentId, bool $isDryRun): void + { + foreach ($pages as $page) { + $sections = $this->parseMarkdownFromString($page['content'], $page['title']); + $repo = $page['repo']; + + // Detect language from repo name + $lang = str_starts_with($repo, 'php-') ? 'php' : (str_starts_with($repo, 'go-') || $repo === 'go' ? 'go' : 'mixed'); + + foreach ($sections as $section) { + if (trim($section['content']) === '') { + $this->stats['skipped']++; + + continue; + } + + $tags = [ + 'source:wiki', + 'repo:' . $repo, + 'lang:' . $lang, + str_replace(['-', '_'], ' ', $page['title']), + ]; + + if ($isDryRun) { + $this->line(sprintf( + ' %s/%s :: %s — %d chars [%s]', + $repo, + $page['title'], + $section['heading'], + strlen($section['content']), + implode(', ', $tags), + )); + $this->stats['imported']++; + + continue; + } + + try { + // Prefix with repo context so embeddings understand the service + $text = "[{$repo}] {$section['heading']}\n\n{$section['content']}"; + + if (strlen($text) > 3800) { + $text = mb_substr($text, 0, 3800) . '…'; + } + + $brain->remember([ + 'workspace_id' => $workspaceId, + 'agent_id' => $agentId, + 'type' => 'service', + 'content' => $text, + 'tags' => $tags, + 'project' => $repo, + 'confidence' => 0.8, + ]); + $this->stats['imported']++; + } catch (\Throwable $e) { + $this->warn(' Error: ' . $repo . '/' . $page['title'] . ' :: ' . $section['heading'] . ' — ' . $e->getMessage()); + $this->stats['errors']++; + } + } + } + } + + /** + * Parse markdown sections from a string (not a file). + * + * @return array + */ + private function parseMarkdownFromString(string $content, string $fallbackHeading): array + { + if (trim($content) === '') { + return []; + } + + $sections = []; + $lines = explode("\n", $content); + $currentHeading = ''; + $currentContent = []; + + foreach ($lines as $line) { + if (preg_match('/^#{1,3}\s+(.+)$/', $line, $matches)) { + if ($currentHeading !== '' && ! empty($currentContent)) { + $text = trim(implode("\n", $currentContent)); + if ($text !== '') { + $sections[] = ['heading' => $currentHeading, 'content' => $text]; + } + } + $currentHeading = trim($matches[1]); + $currentContent = []; + } else { + $currentContent[] = $line; + } + } + + if ($currentHeading !== '' && ! empty($currentContent)) { + $text = trim(implode("\n", $currentContent)); + if ($text !== '') { + $sections[] = ['heading' => $currentHeading, 'content' => $text]; + } + } + + if (empty($sections) && trim($content) !== '') { + $sections[] = ['heading' => $fallbackHeading, 'content' => trim($content)]; + } + + return $sections; + } + /** @return array */ private function findMd(string $dir): array {