From c5c50f310c5bfc84ce5e8cdca0ccffd80e2722b5 Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 15 Mar 2026 07:42:38 +0000 Subject: [PATCH] feat(brain): improve recall quality and ingest deduplication - Add source field to brain_memories (manual, ingest:claude-md, etc.) - Pass Qdrant similarity scores through to API response - Minimum 50-char content length filter during ingest - Content hash deduplication prevents duplicate memories on re-ingest - Update VALID_TYPES to include all 13 memory types - Include score and source in toMcpContext response Co-Authored-By: Virgil --- .../Console/Commands/BrainIngestCommand.php | 27 ++++++++++++++++--- ...01_000010_add_source_to_brain_memories.php | 26 ++++++++++++++++++ src/php/Models/BrainMemory.php | 11 +++++++- src/php/Services/BrainService.php | 6 +++-- 4 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 src/php/Migrations/0001_01_01_000010_add_source_to_brain_memories.php diff --git a/src/php/Console/Commands/BrainIngestCommand.php b/src/php/Console/Commands/BrainIngestCommand.php index 17612fb..cf314a7 100644 --- a/src/php/Console/Commands/BrainIngestCommand.php +++ b/src/php/Console/Commands/BrainIngestCommand.php @@ -128,22 +128,41 @@ class BrainIngestCommand extends Command } foreach ($sections as $section) { - if (trim($section['content']) === '') { + $content = trim($section['content']); + + // Skip sections that are too short to be useful + if ($content === '' || strlen($content) < 50) { $this->stats['skipped']++; continue; } - $type = $this->inferType($section['heading'], $section['content'], $source); + $type = $this->inferType($section['heading'], $content, $source); $tags = $this->buildTags($section['heading'], $filename, $source, $project); + $text = $section['heading']."\n\n".$content; + + // Content hash dedup — skip if identical content already exists + if (! $isDryRun) { + $contentHash = md5($text); + $exists = \Core\Mod\Agentic\Models\BrainMemory::where('workspace_id', $workspaceId) + ->whereRaw('MD5(content) = ?', [$contentHash]) + ->exists(); + + if ($exists) { + $this->stats['skipped']++; + + continue; + } + } + if ($isDryRun) { $this->line(sprintf( ' %s :: %s (%s) — %d chars [%s]', $filename, $section['heading'], $type, - strlen($section['content']), + strlen($content), implode(', ', $tags), )); $this->stats['imported']++; @@ -152,7 +171,6 @@ class BrainIngestCommand extends Command } try { - $text = $section['heading']."\n\n".$section['content']; // embeddinggemma has a 2048-token context (~4K chars). // Truncate oversized sections to avoid Ollama 500 errors. @@ -168,6 +186,7 @@ class BrainIngestCommand extends Command 'tags' => $tags, 'project' => $project, 'confidence' => $this->confidenceForSource($source), + 'source' => 'ingest:'.$source, ]); $this->stats['imported']++; } catch (\Throwable $e) { diff --git a/src/php/Migrations/0001_01_01_000010_add_source_to_brain_memories.php b/src/php/Migrations/0001_01_01_000010_add_source_to_brain_memories.php new file mode 100644 index 0000000..c1053f4 --- /dev/null +++ b/src/php/Migrations/0001_01_01_000010_add_source_to_brain_memories.php @@ -0,0 +1,26 @@ +table('brain_memories', function (Blueprint $table) { + $table->string('source', 100)->nullable()->after('confidence') + ->comment('Origin: manual, ingest:claude-md, ingest:plans, etc.'); + $table->index('source'); + }); + } + + public function down(): void + { + Schema::connection('brain')->table('brain_memories', function (Blueprint $table) { + $table->dropColumn('source'); + }); + } +}; diff --git a/src/php/Models/BrainMemory.php b/src/php/Models/BrainMemory.php index 1181a25..acf1a58 100644 --- a/src/php/Models/BrainMemory.php +++ b/src/php/Models/BrainMemory.php @@ -42,6 +42,7 @@ class BrainMemory extends Model /** Valid memory types. */ public const VALID_TYPES = [ + 'fact', 'decision', 'observation', 'convention', @@ -49,6 +50,11 @@ class BrainMemory extends Model 'plan', 'bug', 'architecture', + 'documentation', + 'service', + 'pattern', + 'context', + 'procedure', ]; protected $connection = 'brain'; @@ -65,6 +71,7 @@ class BrainMemory extends Model 'confidence', 'supersedes_id', 'expires_at', + 'source', ]; protected $casts = [ @@ -171,7 +178,7 @@ class BrainMemory extends Model } /** Format the memory for MCP tool responses. */ - public function toMcpContext(): array + public function toMcpContext(float $score = 0.0): array { return [ 'id' => $this->id, @@ -181,6 +188,8 @@ class BrainMemory extends Model 'tags' => $this->tags, 'project' => $this->project, 'confidence' => $this->confidence, + 'score' => round($score, 4), + 'source' => $this->source ?? 'manual', 'supersedes_id' => $this->supersedes_id, 'expires_at' => $this->expires_at?->toIso8601String(), 'created_at' => $this->created_at?->toIso8601String(), diff --git a/src/php/Services/BrainService.php b/src/php/Services/BrainService.php index e4f0d7e..c533407 100644 --- a/src/php/Services/BrainService.php +++ b/src/php/Services/BrainService.php @@ -85,6 +85,7 @@ class BrainService 'tags' => $memory->tags ?? [], 'project' => $memory->project, 'confidence' => $memory->confidence, + 'source' => $memory->source ?? 'manual', 'created_at' => $memory->created_at->toIso8601String(), ]); $payload['vector'] = $vector; @@ -146,8 +147,9 @@ class BrainService ->values(); return [ - 'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext())->all(), - 'scores' => $scoreMap, + 'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext( + (float) ($scoreMap[$m->id] ?? 0.0) + ))->all(), ]; }