feat(brain): improve recall quality and ingest deduplication
- Add source field to brain_memories (manual, ingest:claude-md, etc.) - Pass Qdrant similarity scores through to API response - Minimum 50-char content length filter during ingest - Content hash deduplication prevents duplicate memories on re-ingest - Update VALID_TYPES to include all 13 memory types - Include score and source in toMcpContext response Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
116fecb0d6
commit
c5c50f310c
4 changed files with 63 additions and 7 deletions
|
|
@ -128,22 +128,41 @@ class BrainIngestCommand extends Command
|
|||
}
|
||||
|
||||
foreach ($sections as $section) {
|
||||
if (trim($section['content']) === '') {
|
||||
$content = trim($section['content']);
|
||||
|
||||
// Skip sections that are too short to be useful
|
||||
if ($content === '' || strlen($content) < 50) {
|
||||
$this->stats['skipped']++;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$type = $this->inferType($section['heading'], $section['content'], $source);
|
||||
$type = $this->inferType($section['heading'], $content, $source);
|
||||
$tags = $this->buildTags($section['heading'], $filename, $source, $project);
|
||||
|
||||
$text = $section['heading']."\n\n".$content;
|
||||
|
||||
// Content hash dedup — skip if identical content already exists
|
||||
if (! $isDryRun) {
|
||||
$contentHash = md5($text);
|
||||
$exists = \Core\Mod\Agentic\Models\BrainMemory::where('workspace_id', $workspaceId)
|
||||
->whereRaw('MD5(content) = ?', [$contentHash])
|
||||
->exists();
|
||||
|
||||
if ($exists) {
|
||||
$this->stats['skipped']++;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ($isDryRun) {
|
||||
$this->line(sprintf(
|
||||
' %s :: %s (%s) — %d chars [%s]',
|
||||
$filename,
|
||||
$section['heading'],
|
||||
$type,
|
||||
strlen($section['content']),
|
||||
strlen($content),
|
||||
implode(', ', $tags),
|
||||
));
|
||||
$this->stats['imported']++;
|
||||
|
|
@ -152,7 +171,6 @@ class BrainIngestCommand extends Command
|
|||
}
|
||||
|
||||
try {
|
||||
$text = $section['heading']."\n\n".$section['content'];
|
||||
|
||||
// embeddinggemma has a 2048-token context (~4K chars).
|
||||
// Truncate oversized sections to avoid Ollama 500 errors.
|
||||
|
|
@ -168,6 +186,7 @@ class BrainIngestCommand extends Command
|
|||
'tags' => $tags,
|
||||
'project' => $project,
|
||||
'confidence' => $this->confidenceForSource($source),
|
||||
'source' => 'ingest:'.$source,
|
||||
]);
|
||||
$this->stats['imported']++;
|
||||
} catch (\Throwable $e) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::connection('brain')->table('brain_memories', function (Blueprint $table) {
|
||||
$table->string('source', 100)->nullable()->after('confidence')
|
||||
->comment('Origin: manual, ingest:claude-md, ingest:plans, etc.');
|
||||
$table->index('source');
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::connection('brain')->table('brain_memories', function (Blueprint $table) {
|
||||
$table->dropColumn('source');
|
||||
});
|
||||
}
|
||||
};
|
||||
|
|
@ -42,6 +42,7 @@ class BrainMemory extends Model
|
|||
|
||||
/** Valid memory types. */
|
||||
public const VALID_TYPES = [
|
||||
'fact',
|
||||
'decision',
|
||||
'observation',
|
||||
'convention',
|
||||
|
|
@ -49,6 +50,11 @@ class BrainMemory extends Model
|
|||
'plan',
|
||||
'bug',
|
||||
'architecture',
|
||||
'documentation',
|
||||
'service',
|
||||
'pattern',
|
||||
'context',
|
||||
'procedure',
|
||||
];
|
||||
|
||||
protected $connection = 'brain';
|
||||
|
|
@ -65,6 +71,7 @@ class BrainMemory extends Model
|
|||
'confidence',
|
||||
'supersedes_id',
|
||||
'expires_at',
|
||||
'source',
|
||||
];
|
||||
|
||||
protected $casts = [
|
||||
|
|
@ -171,7 +178,7 @@ class BrainMemory extends Model
|
|||
}
|
||||
|
||||
/** Format the memory for MCP tool responses. */
|
||||
public function toMcpContext(): array
|
||||
public function toMcpContext(float $score = 0.0): array
|
||||
{
|
||||
return [
|
||||
'id' => $this->id,
|
||||
|
|
@ -181,6 +188,8 @@ class BrainMemory extends Model
|
|||
'tags' => $this->tags,
|
||||
'project' => $this->project,
|
||||
'confidence' => $this->confidence,
|
||||
'score' => round($score, 4),
|
||||
'source' => $this->source ?? 'manual',
|
||||
'supersedes_id' => $this->supersedes_id,
|
||||
'expires_at' => $this->expires_at?->toIso8601String(),
|
||||
'created_at' => $this->created_at?->toIso8601String(),
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ class BrainService
|
|||
'tags' => $memory->tags ?? [],
|
||||
'project' => $memory->project,
|
||||
'confidence' => $memory->confidence,
|
||||
'source' => $memory->source ?? 'manual',
|
||||
'created_at' => $memory->created_at->toIso8601String(),
|
||||
]);
|
||||
$payload['vector'] = $vector;
|
||||
|
|
@ -146,8 +147,9 @@ class BrainService
|
|||
->values();
|
||||
|
||||
return [
|
||||
'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext())->all(),
|
||||
'scores' => $scoreMap,
|
||||
'memories' => $memories->map(fn (BrainMemory $m) => $m->toMcpContext(
|
||||
(float) ($scoreMap[$m->id] ?? 0.0)
|
||||
))->all(),
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue