2026-03-03 11:10:37 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
namespace Core\Mod\Agentic\Console\Commands;
|
|
|
|
|
|
|
|
|
|
use Core\Mod\Agentic\Services\BrainService;
|
|
|
|
|
use Illuminate\Console\Command;
|
|
|
|
|
use Symfony\Component\Finder\Finder;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Comprehensive knowledge ingestion into OpenBrain.
|
|
|
|
|
*
|
|
|
|
|
* Discovers markdown files across multiple source types and ingests
|
|
|
|
|
* them as sectioned memories with embedded vectors. Designed to
|
|
|
|
|
* archive scattered knowledge before filesystem cleanup.
|
|
|
|
|
*/
|
|
|
|
|
class BrainIngestCommand extends Command
|
|
|
|
|
{
|
|
|
|
|
protected $signature = 'brain:ingest
|
|
|
|
|
{--workspace= : Workspace ID to import into (required)}
|
|
|
|
|
{--agent=virgil : Agent ID to attribute memories to}
|
|
|
|
|
{--source=all : Source type: memory, plans, claude-md, tasks, all}
|
|
|
|
|
{--code-path= : Root code directory (default: ~/Code)}
|
|
|
|
|
{--dry-run : Preview what would be imported without storing}
|
|
|
|
|
{--fresh : Clear the Qdrant collection before ingesting}';
|
|
|
|
|
|
|
|
|
|
protected $description = 'Ingest markdown knowledge from across the filesystem into OpenBrain';
|
|
|
|
|
|
|
|
|
|
/** @var array<string, int> */
|
|
|
|
|
private array $stats = ['imported' => 0, 'skipped' => 0, 'errors' => 0];
|
|
|
|
|
|
|
|
|
|
public function handle(BrainService $brain): int
|
|
|
|
|
{
|
|
|
|
|
$workspaceId = $this->option('workspace');
|
|
|
|
|
if (! $workspaceId) {
|
|
|
|
|
$this->error('--workspace is required.');
|
|
|
|
|
|
|
|
|
|
return self::FAILURE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$source = $this->option('source') ?? 'all';
|
|
|
|
|
$codePath = $this->option('code-path') ?? $this->expandHome('~/Code');
|
|
|
|
|
$isDryRun = (bool) $this->option('dry-run');
|
|
|
|
|
|
|
|
|
|
$sources = $source === 'all'
|
|
|
|
|
? ['memory', 'plans', 'claude-md', 'tasks']
|
|
|
|
|
: [strtolower($source)];
|
|
|
|
|
|
|
|
|
|
// Gather all files first
|
|
|
|
|
$filesBySource = [];
|
|
|
|
|
foreach ($sources as $src) {
|
|
|
|
|
$files = match ($src) {
|
|
|
|
|
'memory' => $this->discoverMemoryFiles(),
|
|
|
|
|
'plans' => $this->discoverPlanFiles($codePath),
|
|
|
|
|
'claude-md' => $this->discoverClaudeMdFiles($codePath),
|
|
|
|
|
'tasks' => $this->discoverTaskFiles(),
|
|
|
|
|
default => [],
|
|
|
|
|
};
|
|
|
|
|
$filesBySource[$src] = $files;
|
|
|
|
|
$this->info(sprintf(' [%s] %d file(s)', $src, count($files)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$totalFiles = array_sum(array_map('count', $filesBySource));
|
|
|
|
|
$this->newLine();
|
|
|
|
|
$this->info("Total: {$totalFiles} file(s) to process.");
|
|
|
|
|
|
|
|
|
|
if ($totalFiles === 0) {
|
|
|
|
|
return self::SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (! $isDryRun) {
|
|
|
|
|
if ($this->option('fresh')) {
|
|
|
|
|
$this->warn('Clearing existing collection...');
|
|
|
|
|
$this->clearCollection($brain);
|
|
|
|
|
}
|
|
|
|
|
$brain->ensureCollection();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($filesBySource as $src => $files) {
|
|
|
|
|
$this->newLine();
|
|
|
|
|
$this->comment("--- {$src} ---");
|
|
|
|
|
|
|
|
|
|
foreach ($files as $file) {
|
|
|
|
|
$this->processFile($brain, $file, $src, (int) $workspaceId, $this->option('agent') ?? 'virgil', $isDryRun);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->newLine();
|
|
|
|
|
$prefix = $isDryRun ? '[DRY RUN] ' : '';
|
|
|
|
|
$this->info("{$prefix}Done. Imported: {$this->stats['imported']}, Skipped: {$this->stats['skipped']}, Errors: {$this->stats['errors']}");
|
|
|
|
|
|
|
|
|
|
return self::SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Process a single file into sectioned memories.
|
|
|
|
|
*/
|
|
|
|
|
private function processFile(BrainService $brain, string $file, string $source, int $workspaceId, string $agentId, bool $isDryRun): void
|
|
|
|
|
{
|
|
|
|
|
$sections = $this->parseMarkdownSections($file);
|
|
|
|
|
$filename = basename($file, '.md');
|
|
|
|
|
$project = $this->extractProject($file, $source);
|
|
|
|
|
|
|
|
|
|
if (empty($sections)) {
|
|
|
|
|
$this->stats['skipped']++;
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreach ($sections as $section) {
|
|
|
|
|
if (trim($section['content']) === '') {
|
|
|
|
|
$this->stats['skipped']++;
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$type = $this->inferType($section['heading'], $section['content'], $source);
|
|
|
|
|
$tags = $this->buildTags($section['heading'], $filename, $source, $project);
|
|
|
|
|
|
|
|
|
|
if ($isDryRun) {
|
|
|
|
|
$this->line(sprintf(
|
|
|
|
|
' %s :: %s (%s) — %d chars [%s]',
|
|
|
|
|
$filename,
|
|
|
|
|
$section['heading'],
|
|
|
|
|
$type,
|
|
|
|
|
strlen($section['content']),
|
|
|
|
|
implode(', ', $tags),
|
|
|
|
|
));
|
|
|
|
|
$this->stats['imported']++;
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2026-03-03 16:02:11 +00:00
|
|
|
$text = $section['heading']."\n\n".$section['content'];
|
|
|
|
|
|
|
|
|
|
// embeddinggemma has a 2048-token context (~4K chars).
|
|
|
|
|
// Truncate oversized sections to avoid Ollama 500 errors.
|
|
|
|
|
if (strlen($text) > 3800) {
|
|
|
|
|
$text = mb_substr($text, 0, 3800).'…';
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-03 11:10:37 +00:00
|
|
|
$brain->remember([
|
|
|
|
|
'workspace_id' => $workspaceId,
|
|
|
|
|
'agent_id' => $agentId,
|
|
|
|
|
'type' => $type,
|
2026-03-03 16:02:11 +00:00
|
|
|
'content' => $text,
|
2026-03-03 11:10:37 +00:00
|
|
|
'tags' => $tags,
|
|
|
|
|
'project' => $project,
|
|
|
|
|
'confidence' => $this->confidenceForSource($source),
|
|
|
|
|
]);
|
|
|
|
|
$this->stats['imported']++;
|
|
|
|
|
} catch (\Throwable $e) {
|
|
|
|
|
$this->warn(" Error: {$filename} :: {$section['heading']} — {$e->getMessage()}");
|
|
|
|
|
$this->stats['errors']++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// File discovery
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function discoverMemoryFiles(): array
|
|
|
|
|
{
|
|
|
|
|
$pattern = $this->expandHome('~/.claude/projects/*/memory/*.md');
|
|
|
|
|
|
|
|
|
|
return glob($pattern) ?: [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function discoverPlanFiles(string $codePath): array
|
|
|
|
|
{
|
|
|
|
|
$files = [];
|
|
|
|
|
|
|
|
|
|
// ~/.claude/plans (superpowers plans)
|
|
|
|
|
$claudePlans = $this->expandHome('~/.claude/plans');
|
|
|
|
|
if (is_dir($claudePlans)) {
|
|
|
|
|
$files = array_merge($files, $this->findMd($claudePlans));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// docs/plans across all repos in ~/Code
|
|
|
|
|
if (is_dir($codePath)) {
|
|
|
|
|
$finder = Finder::create()
|
|
|
|
|
->files()
|
|
|
|
|
->name('*.md')
|
|
|
|
|
->in($codePath)
|
|
|
|
|
->path('/docs\/plans\//')
|
|
|
|
|
->notPath('node_modules')
|
|
|
|
|
->notPath('vendor')
|
|
|
|
|
->sortByName();
|
|
|
|
|
|
|
|
|
|
foreach ($finder as $file) {
|
|
|
|
|
$files[] = $file->getRealPath();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $files;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function discoverClaudeMdFiles(string $codePath): array
|
|
|
|
|
{
|
|
|
|
|
if (! is_dir($codePath)) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$finder = Finder::create()
|
|
|
|
|
->files()
|
|
|
|
|
->name('CLAUDE.md')
|
|
|
|
|
->in($codePath)
|
|
|
|
|
->depth('< 4')
|
|
|
|
|
->notPath('node_modules')
|
|
|
|
|
->notPath('vendor')
|
|
|
|
|
->notPath('.claude')
|
|
|
|
|
->sortByName();
|
|
|
|
|
|
|
|
|
|
$files = [];
|
|
|
|
|
foreach ($finder as $file) {
|
|
|
|
|
$files[] = $file->getRealPath();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $files;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function discoverTaskFiles(): array
|
|
|
|
|
{
|
|
|
|
|
$tasksDir = $this->expandHome('~/Code/host-uk/core/tasks');
|
|
|
|
|
if (! is_dir($tasksDir)) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$finder = Finder::create()
|
|
|
|
|
->files()
|
|
|
|
|
->name('*.md')
|
|
|
|
|
->in($tasksDir)
|
|
|
|
|
->notPath('recovered-hostuk')
|
|
|
|
|
->notPath('recovered-root')
|
|
|
|
|
->sortByName();
|
|
|
|
|
|
|
|
|
|
$files = [];
|
|
|
|
|
foreach ($finder as $file) {
|
|
|
|
|
$files[] = $file->getRealPath();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $files;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function findMd(string $dir): array
|
|
|
|
|
{
|
|
|
|
|
$files = [];
|
|
|
|
|
foreach (glob("{$dir}/*.md") ?: [] as $f) {
|
|
|
|
|
$files[] = $f;
|
|
|
|
|
}
|
|
|
|
|
// Include subdirectories (e.g. completed/)
|
|
|
|
|
foreach (glob("{$dir}/*/*.md") ?: [] as $f) {
|
|
|
|
|
$files[] = $f;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $files;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// Parsing
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/** @return array<array{heading: string, content: string}> */
|
|
|
|
|
private function parseMarkdownSections(string $filePath): array
|
|
|
|
|
{
|
|
|
|
|
$content = file_get_contents($filePath);
|
|
|
|
|
if ($content === false || trim($content) === '') {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$sections = [];
|
|
|
|
|
$lines = explode("\n", $content);
|
|
|
|
|
$currentHeading = '';
|
|
|
|
|
$currentContent = [];
|
|
|
|
|
|
|
|
|
|
foreach ($lines as $line) {
|
|
|
|
|
if (preg_match('/^#{1,3}\s+(.+)$/', $line, $matches)) {
|
|
|
|
|
if ($currentHeading !== '' && ! empty($currentContent)) {
|
|
|
|
|
$text = trim(implode("\n", $currentContent));
|
|
|
|
|
if ($text !== '') {
|
|
|
|
|
$sections[] = ['heading' => $currentHeading, 'content' => $text];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
$currentHeading = trim($matches[1]);
|
|
|
|
|
$currentContent = [];
|
|
|
|
|
} else {
|
|
|
|
|
$currentContent[] = $line;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Flush last section
|
|
|
|
|
if ($currentHeading !== '' && ! empty($currentContent)) {
|
|
|
|
|
$text = trim(implode("\n", $currentContent));
|
|
|
|
|
if ($text !== '') {
|
|
|
|
|
$sections[] = ['heading' => $currentHeading, 'content' => $text];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If no headings found, treat entire file as one section
|
|
|
|
|
if (empty($sections) && trim($content) !== '') {
|
|
|
|
|
$sections[] = [
|
|
|
|
|
'heading' => basename($filePath, '.md'),
|
|
|
|
|
'content' => trim($content),
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $sections;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// Metadata
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
private function extractProject(string $filePath, string $source): ?string
|
|
|
|
|
{
|
|
|
|
|
// Memory files: ~/.claude/projects/-Users-snider-Code-{project}/memory/
|
|
|
|
|
if (preg_match('/projects\/[^\/]*-([^-\/]+)\/memory\//', $filePath, $m)) {
|
|
|
|
|
return $m[1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Code repos: ~/Code/{project}/ or ~/Code/host-uk/{project}/
|
|
|
|
|
if (preg_match('#/Code/host-uk/([^/]+)/#', $filePath, $m)) {
|
|
|
|
|
return $m[1];
|
|
|
|
|
}
|
|
|
|
|
if (preg_match('#/Code/([^/]+)/#', $filePath, $m)) {
|
|
|
|
|
return $m[1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function inferType(string $heading, string $content, string $source): string
|
|
|
|
|
{
|
|
|
|
|
// Source-specific defaults
|
|
|
|
|
if ($source === 'plans') {
|
|
|
|
|
return 'plan';
|
|
|
|
|
}
|
|
|
|
|
if ($source === 'claude-md') {
|
|
|
|
|
return 'convention';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$lower = strtolower($heading.' '.$content);
|
|
|
|
|
|
|
|
|
|
$patterns = [
|
|
|
|
|
'architecture' => ['architecture', 'stack', 'infrastructure', 'layer', 'service mesh'],
|
|
|
|
|
'convention' => ['convention', 'standard', 'naming', 'pattern', 'rule', 'coding'],
|
|
|
|
|
'decision' => ['decision', 'chose', 'strategy', 'approach', 'domain'],
|
|
|
|
|
'bug' => ['bug', 'fix', 'broken', 'error', 'issue', 'lesson'],
|
|
|
|
|
'plan' => ['plan', 'todo', 'roadmap', 'milestone', 'phase', 'task'],
|
|
|
|
|
'research' => ['research', 'finding', 'discovery', 'analysis', 'rfc'],
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
foreach ($patterns as $type => $keywords) {
|
|
|
|
|
foreach ($keywords as $keyword) {
|
|
|
|
|
if (str_contains($lower, $keyword)) {
|
|
|
|
|
return $type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 'observation';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return array<string> */
|
|
|
|
|
private function buildTags(string $heading, string $filename, string $source, ?string $project): array
|
|
|
|
|
{
|
|
|
|
|
$tags = ["source:{$source}"];
|
|
|
|
|
|
|
|
|
|
if ($project) {
|
|
|
|
|
$tags[] = "project:{$project}";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ($filename !== 'MEMORY' && $filename !== 'CLAUDE') {
|
|
|
|
|
$tags[] = str_replace(['-', '_'], ' ', $filename);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $tags;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function confidenceForSource(string $source): float
|
|
|
|
|
{
|
|
|
|
|
return match ($source) {
|
|
|
|
|
'memory' => 0.8,
|
|
|
|
|
'claude-md' => 0.9,
|
|
|
|
|
'plans' => 0.6,
|
|
|
|
|
'tasks' => 0.5,
|
|
|
|
|
default => 0.5,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
// Helpers
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
private function clearCollection(BrainService $brain): void
|
|
|
|
|
{
|
|
|
|
|
$reflection = new \ReflectionClass($brain);
|
|
|
|
|
$prop = $reflection->getProperty('qdrantUrl');
|
|
|
|
|
$qdrantUrl = $prop->getValue($brain);
|
|
|
|
|
$prop = $reflection->getProperty('collection');
|
|
|
|
|
$collection = $prop->getValue($brain);
|
|
|
|
|
|
2026-03-03 16:02:11 +00:00
|
|
|
// Clear Qdrant collection.
|
2026-03-03 11:10:37 +00:00
|
|
|
\Illuminate\Support\Facades\Http::withoutVerifying()
|
|
|
|
|
->timeout(10)
|
|
|
|
|
->delete("{$qdrantUrl}/collections/{$collection}");
|
2026-03-03 16:02:11 +00:00
|
|
|
|
|
|
|
|
// Truncate the DB table so rows stay in sync with Qdrant.
|
|
|
|
|
\Core\Mod\Agentic\Models\BrainMemory::query()->forceDelete();
|
2026-03-03 11:10:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function expandHome(string $path): string
|
|
|
|
|
{
|
|
|
|
|
if (str_starts_with($path, '~/')) {
|
|
|
|
|
$home = getenv('HOME') ?: ('/Users/'.get_current_user());
|
|
|
|
|
|
|
|
|
|
return $home.substr($path, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $path;
|
|
|
|
|
}
|
|
|
|
|
}
|