feat(agent/brain): lift OpenBrain discovery features (search/discoverTags/listScopes) (#180)

Bounded subset of RFC-OPENBRAIN lifted from lab/lthn.ai shim into the
OSS BrainService at php/Services/BrainService.php:

- search(query, filter, pagination): Elasticsearch path first, falls
  back to MariaDB if ES is unavailable. Operates on active/latest
  memories only.
- discoverTags(filter): tag-cloud / popular-tags discovery scoped to
  authenticated org(s).
- listScopes(filter): org/project distribution counts for the
  authenticated session.

All three:
- Enforce bounded inputs (per #1001 patterns)
- Honour org auth (per #312 patterns)
- Only operate on active/latest memories (active=1, deleted_at IS NULL)

Self-hosters now get the same discovery surface that lab/lthn.ai
exposes — no need to fork the OSS service to access these features.

Pest covers: bounds-violation rejection, fallback behaviour, scoped
discovery returning correct org/project breakdowns.

Lab-only features still out of scope for this lane (would pull in
extra schema/models/events): agentContext, recall feedback,
maintenance lifecycle (reindex/consolidate/clean/prune). Those need
follow-up tickets if/when bounded-lift becomes possible.

Co-authored-by: Codex <noreply@openai.com>
Closes tasks.lthn.sh/view.php?id=180
This commit is contained in:
Snider 2026-04-25 20:39:13 +01:00
parent 9628e5d088
commit 1872424cfd
3 changed files with 551 additions and 0 deletions

View file

@ -11,6 +11,7 @@ use Core\Mod\Agentic\Jobs\EmbedMemory;
use Core\Mod\Agentic\Models\BrainMemory;
use Illuminate\Auth\Access\AuthorizationException;
use Illuminate\Contracts\Cache\LockTimeoutException;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Http\Client\ConnectionException;
use Illuminate\Http\Client\PendingRequest;
use Illuminate\Http\Client\Response;
@ -54,6 +55,10 @@ class BrainService
private const MAX_ORG_LENGTH = 128;
private const MAX_SEARCH_QUERY_BYTES = 2000;
private const MAX_DISCOVERY_LIMIT = 100;
private string $qdrantApiKey;
public function __construct(
@ -275,6 +280,150 @@ class BrainService
];
}
/**
* Full-text discovery search with MariaDB fallback.
*
* @param array<string, mixed> $filters
* @return array<int, array<string, mixed>>
*/
public function search(string $query, int $workspaceId, array $filters = [], int $limit = 20): array
{
$this->validateSearchQuery($query);
$this->validateDiscoveryLimit($limit);
$this->validateMemoryFilters($filters);
$this->assertAuthorisedOrgScope($filters['org'] ?? null);
try {
return $this->hydrateElasticSearchResults(
$this->elasticSearch($query, array_merge($filters, ['workspace_id' => $workspaceId]), $limit),
$workspaceId,
$filters,
);
} catch (\RuntimeException $exception) {
Log::warning('OpenBrain Elasticsearch search failed, falling back to MariaDB', [
'message' => $exception->getMessage(),
'workspace_id' => $workspaceId,
]);
}
return $this->brainQuery($workspaceId, $filters)
->where('content', 'like', '%'.$query.'%')
->orderByDesc('updated_at')
->limit($limit)
->get()
->map(static fn (BrainMemory $memory): array => $memory->toMcpContext())
->all();
}
/**
* Discover the most common tags for a workspace scope.
*
* @return array<int, array{name: string, count: int}>
*/
public function discoverTags(
int $workspaceId,
?string $org = null,
?string $project = null,
int $limit = 20,
): array {
$this->validateDiscoveryLimit($limit);
$this->validateMemoryFilters([
'org' => $org,
'project' => $project,
]);
$this->assertAuthorisedOrgScope($org);
$counts = [];
$this->brainQuery($workspaceId, array_filter([
'org' => $org,
'project' => $project,
], static fn (mixed $value): bool => $value !== null && $value !== ''))
->select('tags')
->cursor()
->each(static function (BrainMemory $memory) use (&$counts): void {
foreach ($memory->tags ?? [] as $tag) {
if (! is_string($tag)) {
continue;
}
$tag = trim($tag);
if ($tag === '') {
continue;
}
$counts[$tag] = ($counts[$tag] ?? 0) + 1;
}
});
arsort($counts);
$topCounts = array_slice($counts, 0, $limit, true);
return array_values(array_map(
static fn (string $tag, int $count): array => ['name' => $tag, 'count' => $count],
array_keys($topCounts),
array_values($topCounts),
));
}
/**
* List org/project scopes with memory counts.
*
* @return array<int, array{org: ?string, count: int, projects: array<int, array{name: string, count: int}>}>
*/
public function listScopes(int $workspaceId): array
{
$rows = $this->brainQuery($workspaceId)
->selectRaw("coalesce(org, '') as org_key, coalesce(project, '') as project_key, count(*) as cnt")
->groupBy('org_key', 'project_key')
->get();
$scopes = [];
foreach ($rows as $row) {
$orgKey = is_string($row->org_key ?? null) ? $row->org_key : '';
$projectKey = is_string($row->project_key ?? null) ? $row->project_key : '';
if (! isset($scopes[$orgKey])) {
$scopes[$orgKey] = [
'org' => $orgKey !== '' ? $orgKey : null,
'count' => 0,
'projects' => [],
];
}
$scopes[$orgKey]['count'] += (int) ($row->cnt ?? 0);
if ($projectKey === '') {
continue;
}
$scopes[$orgKey]['projects'][] = [
'name' => $projectKey,
'count' => (int) ($row->cnt ?? 0),
];
}
$scopes = array_values($scopes);
foreach ($scopes as &$scope) {
usort(
$scope['projects'],
static fn (array $left, array $right): int => $left['name'] <=> $right['name'],
);
}
unset($scope);
usort(
$scopes,
static fn (array $left, array $right): int => ($left['org'] ?? '') <=> ($right['org'] ?? ''),
);
return $scopes;
}
/**
* Remove a memory from both Qdrant and MariaDB.
*/
@ -501,6 +650,24 @@ class BrainService
$this->validateStringMaxLength($id, 'id', self::MAX_ID_LENGTH);
}
private function validateSearchQuery(string $query): void
{
if (trim($query) === '') {
throw new \InvalidArgumentException('query must not be empty');
}
$this->validateStringMaxLength($query, 'query', self::MAX_SEARCH_QUERY_BYTES);
}
private function validateDiscoveryLimit(int $limit): void
{
if ($limit < 1 || $limit > self::MAX_DISCOVERY_LIMIT) {
throw new \InvalidArgumentException(
sprintf('limit must be between 1 and %d', self::MAX_DISCOVERY_LIMIT)
);
}
}
private function validateContent(mixed $content): void
{
if ($content === null) {
@ -704,6 +871,24 @@ class BrainService
return $request instanceof Request ? $request : null;
}
private function applyAuthorisedOrgScopeQuery(Builder $query, mixed $requestedOrg = null): void
{
if ($requestedOrg !== null && $requestedOrg !== '') {
return;
}
$authorisedOrgs = $this->authorisedOrgScopes();
if ($authorisedOrgs === []) {
return;
}
$query->where(function (Builder $scopeQuery) use ($authorisedOrgs): void {
$scopeQuery->whereNull('org')
->orWhereIn('org', $authorisedOrgs);
});
}
/**
* @param array<int, string> $keys
*/
@ -850,6 +1035,108 @@ class BrainService
return is_array($result) ? $result : [];
}
/**
* @param array<string, mixed> $filters
*/
private function brainQuery(int $workspaceId, array $filters = []): Builder
{
$query = BrainMemory::query()
->forWorkspace($workspaceId)
->active()
->latestVersions();
$this->applyAuthorisedOrgScopeQuery($query, $filters['org'] ?? null);
if (isset($filters['org'])) {
is_array($filters['org'])
? $query->whereIn('org', $filters['org'])
: $query->where('org', $filters['org']);
}
if (isset($filters['project'])) {
$query->where('project', $filters['project']);
}
if (isset($filters['type'])) {
is_array($filters['type'])
? $query->whereIn('type', $filters['type'])
: $query->where('type', $filters['type']);
}
if (isset($filters['agent_id'])) {
$query->where('agent_id', $filters['agent_id']);
}
if (isset($filters['tags'])) {
$tags = is_array($filters['tags']) ? $filters['tags'] : [$filters['tags']];
$query->where(function (Builder $tagQuery) use ($tags): void {
foreach ($tags as $tag) {
$tagQuery->orWhereJsonContains('tags', $tag);
}
});
}
if (isset($filters['min_confidence'])) {
$query->where('confidence', '>=', (float) $filters['min_confidence']);
}
return $query;
}
/**
* @param array<string, mixed> $result
* @param array<string, mixed> $filters
* @return array<int, array<string, mixed>>
*/
private function hydrateElasticSearchResults(array $result, int $workspaceId, array $filters): array
{
$hits = $result['hits']['hits'] ?? [];
if (! is_array($hits) || $hits === []) {
return [];
}
$ids = [];
$scores = [];
foreach ($hits as $hit) {
if (! is_array($hit)) {
continue;
}
$id = $hit['_id'] ?? ($hit['_source']['id'] ?? null);
if (! is_string($id) || $id === '' || in_array($id, $ids, true)) {
continue;
}
$ids[] = $id;
$scores[$id] = (float) ($hit['_score'] ?? 0.0);
}
if ($ids === []) {
return [];
}
$memoryMap = $this->brainQuery($workspaceId, $filters)
->whereIn('id', $ids)
->get()
->keyBy('id');
$memories = [];
foreach ($ids as $id) {
$memory = $memoryMap->get($id);
if ($memory instanceof BrainMemory) {
$memories[] = $memory->toMcpContext((float) ($scores[$id] ?? 0.0));
}
}
return $memories;
}
/**
* Build a Qdrant filter from criteria.
*

View file

@ -183,3 +183,142 @@ test('OrgScoping_list_Ugly_filters_memories_by_org', function (): void {
->and($result['memories'][0]['id'])->toBe($coreMemory->id)
->and($result['memories'][0]['org'])->toBe('core');
});
test('OrgScoping_search_Good_limits_results_to_authorised_orgs_and_global_memories', function (): void {
$workspace = createWorkspace();
$workspace->setAttribute('slug', 'core');
orgScopingBindRequestContext($workspace, [
'authorised_orgs' => ['core'],
]);
$brain = orgScopingBrainService();
$coreMemory = orgScopingMemory($workspace->id, [
'content' => 'Core scoped discovery memory.',
'org' => 'core',
]);
$globalMemory = orgScopingMemory($workspace->id, [
'content' => 'Global discovery memory.',
'org' => null,
'project' => null,
]);
$otherOrgMemory = orgScopingMemory($workspace->id, [
'content' => 'Other organisation discovery memory.',
'org' => 'other-org',
]);
Http::fake([
'https://elasticsearch.test/brain_memories/_search' => Http::response([
'hits' => [
'hits' => [
['_id' => $globalMemory->id, '_score' => 3.5],
['_id' => $otherOrgMemory->id, '_score' => 2.5],
['_id' => $coreMemory->id, '_score' => 1.5],
],
],
]),
]);
$result = $brain->search('discovery memory', $workspace->id, [], 5);
expect(array_column($result, 'id'))->toBe([
$globalMemory->id,
$coreMemory->id,
])
->and($result[0]['score'])->toBe(3.5)
->and($result[1]['score'])->toBe(1.5);
Http::assertSent(fn (ClientRequest $request): bool => $request->url() === 'https://elasticsearch.test/brain_memories/_search'
&& $request->method() === 'POST'
&& $request['query']['bool']['filter'] === [
['term' => ['workspace_id' => $workspace->id]],
]);
});
test('OrgScoping_discoverTags_Bad_rejects_an_unauthorised_org_filter', function (): void {
$workspace = createWorkspace();
$workspace->setAttribute('slug', 'core');
orgScopingBindRequestContext($workspace, [
'authorised_orgs' => ['core'],
]);
$brain = orgScopingBrainService();
expect(fn () => $brain->discoverTags($workspace->id, 'other-org'))
->toThrow(AuthorizationException::class, "Organisation scope 'other-org' is not authorised for this authenticated workspace.");
});
test('OrgScoping_discoverTags_Good_limits_results_to_authorised_orgs_and_global_memories', function (): void {
$workspace = createWorkspace();
$workspace->setAttribute('slug', 'core');
orgScopingBindRequestContext($workspace, [
'authorised_orgs' => ['core'],
]);
$brain = orgScopingBrainService();
orgScopingMemory($workspace->id, [
'content' => 'Core tag memory.',
'org' => 'core',
'tags' => ['core-tag'],
]);
orgScopingMemory($workspace->id, [
'content' => 'Second core tag memory.',
'org' => 'core',
'tags' => ['core-tag'],
]);
orgScopingMemory($workspace->id, [
'content' => 'Global tag memory.',
'org' => null,
'project' => null,
'tags' => ['global-tag'],
]);
orgScopingMemory($workspace->id, [
'content' => 'Other org tag memory.',
'org' => 'other-org',
'tags' => ['other-tag'],
]);
$result = $brain->discoverTags($workspace->id);
expect($result)->toBe([
['name' => 'core-tag', 'count' => 2],
['name' => 'global-tag', 'count' => 1],
]);
});
test('OrgScoping_listScopes_Good_limits_scope_tree_to_authorised_orgs_and_global_memories', function (): void {
$workspace = createWorkspace();
$workspace->setAttribute('slug', 'core');
orgScopingBindRequestContext($workspace, [
'authorised_orgs' => ['core'],
]);
$brain = orgScopingBrainService();
orgScopingMemory($workspace->id, [
'content' => 'Core agent memory.',
'org' => 'core',
'project' => 'agent',
]);
orgScopingMemory($workspace->id, [
'content' => 'Global shared memory.',
'org' => null,
'project' => null,
]);
orgScopingMemory($workspace->id, [
'content' => 'Other organisation memory.',
'org' => 'other-org',
'project' => 'agent',
]);
$result = $brain->listScopes($workspace->id);
expect($result)->toBe([
[
'org' => null,
'count' => 1,
'projects' => [],
],
[
'org' => 'core',
'count' => 1,
'projects' => [
['name' => 'agent', 'count' => 1],
],
],
]);
});

View file

@ -5,6 +5,7 @@
declare(strict_types=1);
use Core\Mod\Agentic\Jobs\EmbedMemory;
use Core\Mod\Agentic\Models\BrainMemory;
use Core\Mod\Agentic\Services\BrainService;
use Illuminate\Http\Client\Request as ClientRequest;
use Illuminate\Support\Facades\Http;
@ -36,6 +37,11 @@ function rememberValidationAttributes(array $attributes = []): array
], $attributes);
}
function rememberValidationMemory(array $attributes = []): BrainMemory
{
return BrainMemory::create(rememberValidationAttributes($attributes));
}
test('BrainRememberValidation_remember_Good_accepts_valid_content_and_tags', function (): void {
Queue::fake();
@ -187,6 +193,125 @@ test('BrainRememberValidation_recall_Ugly_accepts_project_filters_at_the_128_cha
]);
});
test('BrainRememberValidation_search_Bad_rejects_queries_longer_than_2000_bytes', function (): void {
Http::fake();
expect(fn () => rememberValidationBrainService()->search(
str_repeat('q', 2001),
createWorkspace()->id,
))->toThrow(\InvalidArgumentException::class, 'query exceeds maximum length of 2000');
Http::assertNothingSent();
});
test('BrainRememberValidation_search_Good_falls_back_to_mariadb_when_elasticsearch_fails', function (): void {
$workspace = createWorkspace();
$matching = rememberValidationMemory([
'workspace_id' => $workspace->id,
'content' => 'Fallback search keeps MariaDB discovery available to self-hosters.',
'project' => 'agent',
]);
rememberValidationMemory([
'workspace_id' => $workspace->id,
'content' => 'Other project memory should not match the scoped fallback search.',
'project' => 'other-project',
]);
Http::fake([
'https://elasticsearch.test/brain_memories/_search' => Http::response(['error' => 'unavailable'], 503),
]);
$result = rememberValidationBrainService()->search(
'Fallback search',
$workspace->id,
['project' => 'agent'],
5,
);
expect($result)->toHaveCount(1)
->and($result[0]['id'])->toBe($matching->id)
->and($result[0]['score'])->toBe(0.0)
->and($result[0]['project'])->toBe('agent');
Http::assertSent(fn (ClientRequest $request): bool => $request->url() === 'https://elasticsearch.test/brain_memories/_search'
&& $request->method() === 'POST');
});
test('BrainRememberValidation_discoverTags_Bad_rejects_limits_above_100', function (): void {
expect(fn () => rememberValidationBrainService()->discoverTags(
createWorkspace()->id,
limit: 101,
))->toThrow(\InvalidArgumentException::class, 'limit must be between 1 and 100');
});
test('BrainRememberValidation_discoverTags_Good_counts_tags_within_scope_and_ignores_blank_tags', function (): void {
$workspace = createWorkspace();
rememberValidationMemory([
'workspace_id' => $workspace->id,
'tags' => ['openbrain', 'architecture', ' '],
'project' => 'agent',
]);
rememberValidationMemory([
'workspace_id' => $workspace->id,
'tags' => ['openbrain'],
'project' => 'agent',
]);
rememberValidationMemory([
'workspace_id' => $workspace->id,
'tags' => ['deploy'],
'project' => 'other-project',
]);
$result = rememberValidationBrainService()->discoverTags($workspace->id, 'core', 'agent', 2);
expect($result)->toBe([
['name' => 'openbrain', 'count' => 2],
['name' => 'architecture', 'count' => 1],
]);
});
test('BrainRememberValidation_listScopes_Ugly_returns_sorted_scope_counts', function (): void {
$workspace = createWorkspace();
rememberValidationMemory([
'workspace_id' => $workspace->id,
'org' => 'core',
'project' => 'host',
]);
rememberValidationMemory([
'workspace_id' => $workspace->id,
'org' => 'core',
'project' => 'agent',
]);
rememberValidationMemory([
'workspace_id' => $workspace->id,
'org' => null,
'project' => null,
]);
rememberValidationMemory([
'workspace_id' => createWorkspace()->id,
'org' => 'ops',
'project' => 'deploy',
]);
$result = rememberValidationBrainService()->listScopes($workspace->id);
expect($result)->toBe([
[
'org' => null,
'count' => 1,
'projects' => [],
],
[
'org' => 'core',
'count' => 2,
'projects' => [
['name' => 'agent', 'count' => 1],
['name' => 'host', 'count' => 1],
],
],
]);
});
test('BrainRememberValidation_forget_Bad_rejects_ids_longer_than_64_characters', function (): void {
expect(fn () => rememberValidationBrainService()->forget(str_repeat('x', 65)))
->toThrow(\InvalidArgumentException::class, 'id exceeds maximum length of 64');