feat(brain): add docs source type for framework documentation ingestion
Discovers markdown files from core-php/docs/build/php/ and packages/ for vectorisation into OpenBrain. Tagged as source:docs with 0.85 confidence, typed as documentation. Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
b32d339a53
commit
01826bc5e9
1 changed files with 38 additions and 3 deletions
|
|
@ -20,7 +20,7 @@ class BrainIngestCommand extends Command
|
|||
protected $signature = 'brain:ingest
|
||||
{--workspace= : Workspace ID to import into (required)}
|
||||
{--agent=virgil : Agent ID to attribute memories to}
|
||||
{--source=all : Source type: memory, plans, claude-md, tasks, all}
|
||||
{--source=all : Source type: memory, plans, claude-md, tasks, docs, all}
|
||||
{--code-path= : Root code directory (default: ~/Code)}
|
||||
{--dry-run : Preview what would be imported without storing}
|
||||
{--fresh : Clear the Qdrant collection before ingesting}';
|
||||
|
|
@ -44,7 +44,7 @@ class BrainIngestCommand extends Command
|
|||
$isDryRun = (bool) $this->option('dry-run');
|
||||
|
||||
$sources = $source === 'all'
|
||||
? ['memory', 'plans', 'claude-md', 'tasks']
|
||||
? ['memory', 'plans', 'claude-md', 'tasks', 'docs']
|
||||
: [strtolower($source)];
|
||||
|
||||
// Gather all files first
|
||||
|
|
@ -55,6 +55,7 @@ class BrainIngestCommand extends Command
|
|||
'plans' => $this->discoverPlanFiles($codePath),
|
||||
'claude-md' => $this->discoverClaudeMdFiles($codePath),
|
||||
'tasks' => $this->discoverTaskFiles(),
|
||||
'docs' => $this->discoverDocFiles($codePath),
|
||||
default => [],
|
||||
};
|
||||
$filesBySource[$src] = $files;
|
||||
|
|
@ -249,6 +250,36 @@ class BrainIngestCommand extends Command
|
|||
return $files;
|
||||
}
|
||||
|
||||
/** @return array<string> */
|
||||
private function discoverDocFiles(string $codePath): array
|
||||
{
|
||||
$files = [];
|
||||
|
||||
// CorePHP framework docs (build/php + packages)
|
||||
$docRoots = [
|
||||
$codePath.'/host-uk/core-php/docs/build/php',
|
||||
$codePath.'/host-uk/core-php/docs/packages',
|
||||
];
|
||||
|
||||
foreach ($docRoots as $root) {
|
||||
if (! is_dir($root)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$finder = Finder::create()
|
||||
->files()
|
||||
->name('*.md')
|
||||
->in($root)
|
||||
->sortByName();
|
||||
|
||||
foreach ($finder as $file) {
|
||||
$files[] = $file->getRealPath();
|
||||
}
|
||||
}
|
||||
|
||||
return $files;
|
||||
}
|
||||
|
||||
/** @return array<string> */
|
||||
private function findMd(string $dir): array
|
||||
{
|
||||
|
|
@ -346,6 +377,9 @@ class BrainIngestCommand extends Command
|
|||
if ($source === 'claude-md') {
|
||||
return 'convention';
|
||||
}
|
||||
if ($source === 'docs') {
|
||||
return 'documentation';
|
||||
}
|
||||
|
||||
$lower = strtolower($heading.' '.$content);
|
||||
|
||||
|
|
@ -388,8 +422,9 @@ class BrainIngestCommand extends Command
|
|||
private function confidenceForSource(string $source): float
|
||||
{
|
||||
return match ($source) {
|
||||
'memory' => 0.8,
|
||||
'claude-md' => 0.9,
|
||||
'docs' => 0.85,
|
||||
'memory' => 0.8,
|
||||
'plans' => 0.6,
|
||||
'tasks' => 0.5,
|
||||
default => 0.5,
|
||||
|
|
|
|||
Reference in a new issue