feat(brain): improve recall quality and ingest deduplication

- Add source field to brain_memories (manual, ingest:claude-md, etc.)
- Pass Qdrant similarity scores through to API response
- Minimum 50-char content length filter during ingest
- Content hash deduplication prevents duplicate memories on re-ingest
- Update VALID_TYPES to include all 13 memory types
- Include score and source in toMcpContext response

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-03-15 07:42:38 +00:00
parent 116fecb0d6
commit c5c50f310c
4 changed files with 63 additions and 7 deletions

View file

@ -128,22 +128,41 @@ class BrainIngestCommand extends Command
}
foreach ($sections as $section) {
if (trim($section['content']) === '') {
$content = trim($section['content']);
// Skip sections that are too short to be useful
if ($content === '' || strlen($content) < 50) {
$this->stats['skipped']++;
continue;
}
$type = $this->inferType($section['heading'], $section['content'], $source);
$type = $this->inferType($section['heading'], $content, $source);
$tags = $this->buildTags($section['heading'], $filename, $source, $project);
$text = $section['heading']."\n\n".$content;
// Content hash dedup — skip if identical content already exists
if (! $isDryRun) {
$contentHash = md5($text);
$exists = \Core\Mod\Agentic\Models\BrainMemory::where('workspace_id', $workspaceId)
->whereRaw('MD5(content) = ?', [$contentHash])
->exists();
if ($exists) {
$this->stats['skipped']++;
continue;
}
}
if ($isDryRun) {
$this->line(sprintf(
' %s :: %s (%s) — %d chars [%s]',
$filename,
$section['heading'],
$type,
strlen($section['content']),
strlen($content),
implode(', ', $tags),
));
$this->stats['imported']++;
@ -152,7 +171,6 @@ class BrainIngestCommand extends Command
}
try {
$text = $section['heading']."\n\n".$section['content'];
// embeddinggemma has a 2048-token context (~4K chars).
// Truncate oversized sections to avoid Ollama 500 errors.
@ -168,6 +186,7 @@ class BrainIngestCommand extends Command
'tags' => $tags,
'project' => $project,
'confidence' => $this->confidenceForSource($source),
'source' => 'ingest:'.$source,
]);
$this->stats['imported']++;
} catch (\Throwable $e) {

View file

@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::connection('brain')->table('brain_memories', function (Blueprint $table) {
$table->string('source', 100)->nullable()->after('confidence')
->comment('Origin: manual, ingest:claude-md, ingest:plans, etc.');
$table->index('source');
});
}
public function down(): void
{
Schema::connection('brain')->table('brain_memories', function (Blueprint $table) {
$table->dropColumn('source');
});
}
};

View file

@ -42,6 +42,7 @@ class BrainMemory extends Model
/** Valid memory types. */
public const VALID_TYPES = [
'fact',
'decision',
'observation',
'convention',
@ -49,6 +50,11 @@ class BrainMemory extends Model
'plan',
'bug',
'architecture',
'documentation',
'service',
'pattern',
'context',
'procedure',
];
protected $connection = 'brain';
@ -65,6 +71,7 @@ class BrainMemory extends Model
'confidence',
'supersedes_id',
'expires_at',
'source',
];
protected $casts = [
@ -171,7 +178,7 @@ class BrainMemory extends Model
}
/** Format the memory for MCP tool responses. */
public function toMcpContext(): array
public function toMcpContext(float $score = 0.0): array
{
return [
'id' => $this->id,
@ -181,6 +188,8 @@ class BrainMemory extends Model
'tags' => $this->tags,
'project' => $this->project,
'confidence' => $this->confidence,
'score' => round($score, 4),
'source' => $this->source ?? 'manual',
'supersedes_id' => $this->supersedes_id,
'expires_at' => $this->expires_at?->toIso8601String(),
'created_at' => $this->created_at?->toIso8601String(),

View file

@ -85,6 +85,7 @@ class BrainService
'tags' => $memory->tags ?? [],
'project' => $memory->project,
'confidence' => $memory->confidence,
'source' => $memory->source ?? 'manual',
'created_at' => $memory->created_at->toIso8601String(),
]);
$payload['vector'] = $vector;
@ -146,8 +147,9 @@ class BrainService
->values();
return [
'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext())->all(),
'scores' => $scoreMap,
'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext(
(float) ($scoreMap[$m->id] ?? 0.0)
))->all(),
];
}