feat(brain): add wiki source type — ingest Forge repo wikis via API

Fetches wiki pages from all core/* repos on Forge, parses into sections, and stores as type:service with repo/lang tags. Gives the PHP orchestrator contextual knowledge about the Go services it coordinates. 71+ pages across 22+ repos, ~770 vectorised sections. Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-04 15:58:01 +00:00 · 2026-03-04 15:58:01 +00:00 · b0ed221cfa
commit b0ed221cfa
parent 01826bc5e9
1 changed files with 240 additions and 6 deletions
--- a/Console/Commands/BrainIngestCommand.php
+++ b/Console/Commands/BrainIngestCommand.php
@ -6,6 +6,7 @@ namespace Core\Mod\Agentic\Console\Commands;

 use Core\Mod\Agentic\Services\BrainService;
 use Illuminate\Console\Command;
+use Illuminate\Support\Facades\Http;
 use Symfony\Component\Finder\Finder;

 /**
@ -20,7 +21,7 @@ class BrainIngestCommand extends Command
    protected $signature = 'brain:ingest
        {--workspace= : Workspace ID to import into (required)}
        {--agent=virgil : Agent ID to attribute memories to}
-        {--source=all : Source type: memory, plans, claude-md, tasks, docs, all}
+        {--source=all : Source type: memory, plans, claude-md, tasks, docs, wiki, all}
        {--code-path= : Root code directory (default: ~/Code)}
        {--dry-run : Preview what would be imported without storing}
        {--fresh : Clear the Qdrant collection before ingesting}';
@ -44,12 +45,16 @@ class BrainIngestCommand extends Command
        $isDryRun = (bool) $this->option('dry-run');

        $sources = $source === 'all'
-            ? ['memory', 'plans', 'claude-md', 'tasks', 'docs']
+            ? ['memory', 'plans', 'claude-md', 'tasks', 'docs', 'wiki']
            : [strtolower($source)];

-        // Gather all files first
+        // Separate file-based and API-based sources
+        $fileSources = array_filter($sources, fn ($s) => $s !== 'wiki');
+        $apiSources = array_filter($sources, fn ($s) => $s === 'wiki');
+
+        // Gather file-based sources
        $filesBySource = [];
-        foreach ($sources as $src) {
+        foreach ($fileSources as $src) {
            $files = match ($src) {
                'memory' => $this->discoverMemoryFiles(),
                'plans' => $this->discoverPlanFiles($codePath),
@ -62,9 +67,16 @@ class BrainIngestCommand extends Command
            $this->info(sprintf('  [%s] %d file(s)', $src, count($files)));
        }

-        $totalFiles = array_sum(array_map('count', $filesBySource));
+        // Discover wiki pages from Forge API
+        $wikiPages = [];
+        if (in_array('wiki', $apiSources, true)) {
+            $wikiPages = $this->discoverWikiPages();
+            $this->info(sprintf('  [wiki] %d page(s) across %d repo(s)', count($wikiPages), count(array_unique(array_column($wikiPages, 'repo')))));
+        }
+
+        $totalFiles = array_sum(array_map('count', $filesBySource)) + count($wikiPages);
        $this->newLine();
-        $this->info("Total: {$totalFiles} file(s) to process.");
+        $this->info("Total: {$totalFiles} item(s) to process.");

        if ($totalFiles === 0) {
            return self::SUCCESS;
@ -87,6 +99,12 @@ class BrainIngestCommand extends Command
            }
        }

+        if (! empty($wikiPages)) {
+            $this->newLine();
+            $this->comment('--- wiki ---');
+            $this->processWikiPages($brain, $wikiPages, (int) $workspaceId, $this->option('agent') ?? 'virgil', $isDryRun);
+        }
+
        $this->newLine();
        $prefix = $isDryRun ? '[DRY RUN] ' : '';
        $this->info("{$prefix}Done. Imported: {$this->stats['imported']}, Skipped: {$this->stats['skipped']}, Errors: {$this->stats['errors']}");
@ -280,6 +298,222 @@ class BrainIngestCommand extends Command
        return $files;
    }

+    // -------------------------------------------------------------------------
+    // Wiki (Forge API)
+    // -------------------------------------------------------------------------
+
+    /**
+     * Discover wiki pages from all repos in the Forge org.
+     *
+     * Returns flat array of ['repo' => name, 'title' => title, 'content' => markdown].
+     *
+     * @return array<array{repo: string, title: string, content: string}>
+     */
+    private function discoverWikiPages(): array
+    {
+        $baseUrl = config('upstream.gitea.url', 'https://forge.lthn.ai');
+        $token = config('upstream.gitea.token');
+        $org = config('upstream.gitea.org', 'core');
+
+        if (! $token) {
+            $this->warn('No Forge token — skipping wiki source.');
+
+            return [];
+        }
+
+        // Fetch all repos in org
+        $repos = [];
+        $page = 1;
+
+        do {
+            $response = Http::withHeaders(['Authorization' => 'token ' . $token])
+                ->timeout(15)
+                ->get("{$baseUrl}/api/v1/orgs/{$org}/repos", ['page' => $page, 'limit' => 50]);
+
+            if (! $response->successful()) {
+                $this->warn('Failed to fetch repos: ' . $response->status());
+                break;
+            }
+
+            $batch = $response->json();
+            if (empty($batch)) {
+                break;
+            }
+
+            foreach ($batch as $r) {
+                $repos[] = $r['name'];
+            }
+            $page++;
+        } while (count($batch) === 50);
+
+        // Fetch wiki pages for each repo
+        $pages = [];
+
+        foreach ($repos as $repo) {
+            $response = Http::withHeaders(['Authorization' => 'token ' . $token])
+                ->timeout(10)
+                ->get("{$baseUrl}/api/v1/repos/{$org}/{$repo}/wiki/pages");
+
+            if (! $response->successful() || $response->status() === 404) {
+                continue;
+            }
+
+            $wikiList = $response->json();
+
+            if (empty($wikiList)) {
+                continue;
+            }
+
+            foreach ($wikiList as $wiki) {
+                $title = $wiki['title'] ?? 'Untitled';
+
+                // Fetch full page content
+                $pageResponse = Http::withHeaders(['Authorization' => 'token ' . $token])
+                    ->timeout(10)
+                    ->get("{$baseUrl}/api/v1/repos/{$org}/{$repo}/wiki/page/{$title}");
+
+                if (! $pageResponse->successful()) {
+                    continue;
+                }
+
+                $content = $pageResponse->json('content_base64');
+                if ($content) {
+                    $content = base64_decode($content, true) ?: '';
+                } else {
+                    $content = '';
+                }
+
+                if (trim($content) === '') {
+                    continue;
+                }
+
+                $pages[] = [
+                    'repo' => $repo,
+                    'title' => $title,
+                    'content' => $content,
+                ];
+            }
+        }
+
+        return $pages;
+    }
+
+    /**
+     * Process wiki pages into contextual memories.
+     *
+     * Each page is tagged with its repo and language, typed as service
+     * documentation so the PHP orchestrator can reason about Go services.
+     *
+     * @param array<array{repo: string, title: string, content: string}> $pages
+     */
+    private function processWikiPages(BrainService $brain, array $pages, int $workspaceId, string $agentId, bool $isDryRun): void
+    {
+        foreach ($pages as $page) {
+            $sections = $this->parseMarkdownFromString($page['content'], $page['title']);
+            $repo = $page['repo'];
+
+            // Detect language from repo name
+            $lang = str_starts_with($repo, 'php-') ? 'php' : (str_starts_with($repo, 'go-') || $repo === 'go' ? 'go' : 'mixed');
+
+            foreach ($sections as $section) {
+                if (trim($section['content']) === '') {
+                    $this->stats['skipped']++;
+
+                    continue;
+                }
+
+                $tags = [
+                    'source:wiki',
+                    'repo:' . $repo,
+                    'lang:' . $lang,
+                    str_replace(['-', '_'], ' ', $page['title']),
+                ];
+
+                if ($isDryRun) {
+                    $this->line(sprintf(
+                        '  %s/%s :: %s — %d chars [%s]',
+                        $repo,
+                        $page['title'],
+                        $section['heading'],
+                        strlen($section['content']),
+                        implode(', ', $tags),
+                    ));
+                    $this->stats['imported']++;
+
+                    continue;
+                }
+
+                try {
+                    // Prefix with repo context so embeddings understand the service
+                    $text = "[{$repo}] {$section['heading']}\n\n{$section['content']}";
+
+                    if (strlen($text) > 3800) {
+                        $text = mb_substr($text, 0, 3800) . '…';
+                    }
+
+                    $brain->remember([
+                        'workspace_id' => $workspaceId,
+                        'agent_id' => $agentId,
+                        'type' => 'service',
+                        'content' => $text,
+                        'tags' => $tags,
+                        'project' => $repo,
+                        'confidence' => 0.8,
+                    ]);
+                    $this->stats['imported']++;
+                } catch (\Throwable $e) {
+                    $this->warn('  Error: ' . $repo . '/' . $page['title'] . ' :: ' . $section['heading'] . ' — ' . $e->getMessage());
+                    $this->stats['errors']++;
+                }
+            }
+        }
+    }
+
+    /**
+     * Parse markdown sections from a string (not a file).
+     *
+     * @return array<array{heading: string, content: string}>
+     */
+    private function parseMarkdownFromString(string $content, string $fallbackHeading): array
+    {
+        if (trim($content) === '') {
+            return [];
+        }
+
+        $sections = [];
+        $lines = explode("\n", $content);
+        $currentHeading = '';
+        $currentContent = [];
+
+        foreach ($lines as $line) {
+            if (preg_match('/^#{1,3}\s+(.+)$/', $line, $matches)) {
+                if ($currentHeading !== '' && ! empty($currentContent)) {
+                    $text = trim(implode("\n", $currentContent));
+                    if ($text !== '') {
+                        $sections[] = ['heading' => $currentHeading, 'content' => $text];
+                    }
+                }
+                $currentHeading = trim($matches[1]);
+                $currentContent = [];
+            } else {
+                $currentContent[] = $line;
+            }
+        }
+
+        if ($currentHeading !== '' && ! empty($currentContent)) {
+            $text = trim(implode("\n", $currentContent));
+            if ($text !== '') {
+                $sections[] = ['heading' => $currentHeading, 'content' => $text];
+            }
+        }
+
+        if (empty($sections) && trim($content) !== '') {
+            $sections[] = ['heading' => $fallbackHeading, 'content' => trim($content)];
+        }
+
+        return $sections;
+    }
+
    /** @return array<string> */
    private function findMd(string $dir): array
    {