php-agentic/Services/AgentDetection.php

442 lines
14 KiB
PHP
Raw Normal View History

2026-01-27 00:28:29 +00:00
<?php
declare(strict_types=1);
namespace Core\Mod\Agentic\Services;
2026-01-27 00:28:29 +00:00
use Core\Mod\Agentic\Models\AgentApiKey;
use Core\Mod\Agentic\Support\AgentIdentity;
2026-01-27 00:28:29 +00:00
use Illuminate\Http\Request;
/**
* Service for detecting AI agents from HTTP requests.
*
* Identifies AI agent providers (Anthropic, OpenAI, Google, etc.) from:
* - User-Agent string patterns
* - MCP token headers
* - Absence of typical browser indicators
*
* Part of the Trees for Agents system for rewarding AI agent referrals.
*/
class AgentDetection
{
/**
* User-Agent patterns for known AI providers.
*
* @var array<string, array{pattern: string, model_pattern: ?string}>
*/
protected const PROVIDER_PATTERNS = [
'anthropic' => [
'patterns' => [
'/claude[\s\-_]?code/i',
'/\banthopic\b/i',
'/\banthropic[\s\-_]?api\b/i',
'/\bclaude\b.*\bai\b/i',
'/\bclaude\b.*\bassistant\b/i',
],
'model_patterns' => [
'claude-opus' => '/claude[\s\-_]?opus/i',
'claude-sonnet' => '/claude[\s\-_]?sonnet/i',
'claude-haiku' => '/claude[\s\-_]?haiku/i',
],
],
'openai' => [
'patterns' => [
'/\bChatGPT\b/i',
'/\bOpenAI\b/i',
'/\bGPT[\s\-_]?4\b/i',
'/\bGPT[\s\-_]?3\.?5\b/i',
'/\bo1[\s\-_]?preview\b/i',
'/\bo1[\s\-_]?mini\b/i',
],
'model_patterns' => [
'gpt-4' => '/\bGPT[\s\-_]?4/i',
'gpt-3.5' => '/\bGPT[\s\-_]?3\.?5/i',
'o1' => '/\bo1[\s\-_]?(preview|mini)?\b/i',
],
],
'google' => [
'patterns' => [
'/\bGoogle[\s\-_]?AI\b/i',
'/\bGemini\b/i',
'/\bBard\b/i',
'/\bPaLM\b/i',
],
'model_patterns' => [
'gemini-pro' => '/gemini[\s\-_]?(1\.5[\s\-_]?)?pro/i',
'gemini-ultra' => '/gemini[\s\-_]?(1\.5[\s\-_]?)?ultra/i',
'gemini-flash' => '/gemini[\s\-_]?(1\.5[\s\-_]?)?flash/i',
],
],
'meta' => [
'patterns' => [
'/\bMeta[\s\-_]?AI\b/i',
'/\bLLaMA\b/i',
'/\bLlama[\s\-_]?[23]\b/i',
],
'model_patterns' => [
'llama-3' => '/llama[\s\-_]?3/i',
'llama-2' => '/llama[\s\-_]?2/i',
],
],
'mistral' => [
'patterns' => [
'/\bMistral\b/i',
'/\bMixtral\b/i',
],
'model_patterns' => [
'mistral-large' => '/mistral[\s\-_]?large/i',
'mistral-medium' => '/mistral[\s\-_]?medium/i',
'mixtral' => '/mixtral/i',
],
],
];
/**
* Patterns that indicate a typical web browser.
* If none of these are present, it might be programmatic access.
*/
protected const BROWSER_INDICATORS = [
'/\bMozilla\b/i',
'/\bChrome\b/i',
'/\bSafari\b/i',
'/\bFirefox\b/i',
'/\bEdge\b/i',
'/\bOpera\b/i',
'/\bMSIE\b/i',
'/\bTrident\b/i',
];
/**
* Known bot patterns that are NOT AI agents.
* These should return notAnAgent, not unknown.
*/
protected const NON_AGENT_BOTS = [
'/\bGooglebot\b/i',
'/\bBingbot\b/i',
'/\bYandexBot\b/i',
'/\bDuckDuckBot\b/i',
'/\bBaiduspider\b/i',
'/\bfacebookexternalhit\b/i',
'/\bTwitterbot\b/i',
'/\bLinkedInBot\b/i',
'/\bSlackbot\b/i',
'/\bDiscordBot\b/i',
'/\bTelegramBot\b/i',
'/\bWhatsApp\//i',
'/\bApplebot\b/i',
'/\bSEMrushBot\b/i',
'/\bAhrefsBot\b/i',
'/\bcurl\b/i',
'/\bwget\b/i',
'/\bpython-requests\b/i',
'/\bgo-http-client\b/i',
'/\bPostman\b/i',
'/\bInsomnia\b/i',
'/\baxios\b/i',
'/\bnode-fetch\b/i',
'/\bUptimeRobot\b/i',
'/\bPingdom\b/i',
'/\bDatadog\b/i',
'/\bNewRelic\b/i',
];
/**
* The MCP token header name.
*/
protected const MCP_TOKEN_HEADER = 'X-MCP-Token';
/**
* Identify an agent from an HTTP request.
*/
public function identify(Request $request): AgentIdentity
{
// First, check for MCP token (highest priority)
$mcpToken = $request->header(self::MCP_TOKEN_HEADER);
if ($mcpToken) {
return $this->identifyFromMcpToken($mcpToken);
}
// Then check User-Agent
$userAgent = $request->userAgent();
return $this->identifyFromUserAgent($userAgent);
}
/**
* Identify an agent from a User-Agent string.
*/
public function identifyFromUserAgent(?string $userAgent): AgentIdentity
{
if (! $userAgent || trim($userAgent) === '') {
// Empty User-Agent is suspicious but not definitive
return AgentIdentity::unknownAgent();
}
// Check for known AI providers first (highest confidence)
foreach (self::PROVIDER_PATTERNS as $provider => $config) {
foreach ($config['patterns'] as $pattern) {
if (preg_match($pattern, $userAgent)) {
$model = $this->detectModel($userAgent, $config['model_patterns']);
return $this->createProviderIdentity($provider, $model, AgentIdentity::CONFIDENCE_HIGH);
}
}
}
// Check for non-agent bots (search engines, monitoring, etc.)
foreach (self::NON_AGENT_BOTS as $pattern) {
if (preg_match($pattern, $userAgent)) {
return AgentIdentity::notAnAgent();
}
}
// Check if it looks like a normal browser
if ($this->looksLikeBrowser($userAgent)) {
return AgentIdentity::notAnAgent();
}
// No browser indicators and not a known bot — might be an unknown agent
return AgentIdentity::unknownAgent();
}
/**
* Identify an agent from an MCP token.
*
* MCP tokens can encode provider and model information for registered agents.
* Supports two token formats:
* - Structured: "provider:model:secret" (e.g., "anthropic:claude-opus:abc123")
* - Opaque: "ak_xxxx..." (registered AgentApiKey, looked up in database)
*/
public function identifyFromMcpToken(string $token): AgentIdentity
{
// Check for opaque token format (AgentApiKey)
// AgentApiKey tokens start with "ak_" prefix
if (str_starts_with($token, 'ak_')) {
return $this->identifyFromAgentApiKey($token);
}
// Try structured token format: "provider:model:secret"
// Expected token formats:
// - "anthropic:claude-opus:abc123" (provider:model:secret)
// - "openai:gpt-4:xyz789"
$parts = explode(':', $token, 3);
if (count($parts) >= 2) {
$provider = strtolower($parts[0]);
$model = $parts[1] ?? null;
// Validate provider is in our known list
if ($this->isValidProvider($provider)) {
return $this->createProviderIdentity($provider, $model, AgentIdentity::CONFIDENCE_HIGH);
}
}
// Unrecognised token format — return unknown with medium confidence
// (token present suggests agent, but we cannot identify provider)
return new AgentIdentity('unknown', null, AgentIdentity::CONFIDENCE_MEDIUM);
}
/**
* Identify an agent from a registered AgentApiKey token.
*
* Looks up the token in the database and extracts provider/model
* from the key's metadata if available.
*/
protected function identifyFromAgentApiKey(string $token): AgentIdentity
{
$apiKey = AgentApiKey::findByKey($token);
if ($apiKey === null) {
// Token not found in database — invalid or revoked
return AgentIdentity::unknownAgent();
}
// Check if the key is active
if (! $apiKey->isActive()) {
// Expired or revoked key — still an agent, but unknown
return AgentIdentity::unknownAgent();
}
// Extract provider and model from key name or permissions
// Key names often follow pattern: "Claude Opus Agent" or "GPT-4 Integration"
$provider = $this->extractProviderFromKeyName($apiKey->name);
$model = $this->extractModelFromKeyName($apiKey->name);
if ($provider !== null) {
return $this->createProviderIdentity($provider, $model, AgentIdentity::CONFIDENCE_HIGH);
}
// Valid key but cannot determine provider — return unknown with high confidence
// (we know it's a registered agent, just not which provider)
return new AgentIdentity('unknown', null, AgentIdentity::CONFIDENCE_HIGH);
}
/**
* Extract provider from an API key name.
*
* Attempts to identify provider from common naming patterns:
* - "Claude Agent", "Anthropic Integration" => anthropic
* - "GPT-4 Agent", "OpenAI Integration" => openai
* - "Gemini Agent", "Google AI" => google
*/
protected function extractProviderFromKeyName(string $name): ?string
{
$nameLower = strtolower($name);
// Check for provider keywords
$providerPatterns = [
'anthropic' => ['anthropic', 'claude'],
'openai' => ['openai', 'gpt', 'chatgpt', 'o1-'],
'google' => ['google', 'gemini', 'bard', 'palm'],
'meta' => ['meta', 'llama'],
'mistral' => ['mistral', 'mixtral'],
];
foreach ($providerPatterns as $provider => $keywords) {
foreach ($keywords as $keyword) {
if (str_contains($nameLower, $keyword)) {
return $provider;
}
}
}
return null;
}
/**
* Extract model from an API key name.
*
* Attempts to identify specific model from naming patterns:
* - "Claude Opus Agent" => claude-opus
* - "GPT-4 Integration" => gpt-4
*/
protected function extractModelFromKeyName(string $name): ?string
{
$nameLower = strtolower($name);
// Check for model keywords
$modelPatterns = [
'claude-opus' => ['opus'],
'claude-sonnet' => ['sonnet'],
'claude-haiku' => ['haiku'],
'gpt-4' => ['gpt-4', 'gpt4'],
'gpt-3.5' => ['gpt-3.5', 'gpt3.5', 'turbo'],
'o1' => ['o1-preview', 'o1-mini', 'o1 '],
'gemini-pro' => ['gemini pro', 'gemini-pro'],
'gemini-flash' => ['gemini flash', 'gemini-flash'],
'llama-3' => ['llama 3', 'llama-3', 'llama3'],
];
foreach ($modelPatterns as $model => $keywords) {
foreach ($keywords as $keyword) {
if (str_contains($nameLower, $keyword)) {
return $model;
}
}
}
return null;
}
/**
* Check if the User-Agent looks like a normal web browser.
*/
protected function looksLikeBrowser(?string $userAgent): bool
{
if (! $userAgent) {
return false;
}
foreach (self::BROWSER_INDICATORS as $pattern) {
if (preg_match($pattern, $userAgent)) {
return true;
}
}
return false;
}
/**
* Detect the model from User-Agent patterns.
*
* @param array<string, string> $modelPatterns
*/
protected function detectModel(string $userAgent, array $modelPatterns): ?string
{
foreach ($modelPatterns as $model => $pattern) {
if (preg_match($pattern, $userAgent)) {
return $model;
}
}
return null;
}
/**
* Create an identity for a known provider.
*/
protected function createProviderIdentity(string $provider, ?string $model, string $confidence): AgentIdentity
{
return match ($provider) {
'anthropic' => AgentIdentity::anthropic($model, $confidence),
'openai' => AgentIdentity::openai($model, $confidence),
'google' => AgentIdentity::google($model, $confidence),
'meta' => AgentIdentity::meta($model, $confidence),
'mistral' => AgentIdentity::mistral($model, $confidence),
'local' => AgentIdentity::local($model, $confidence),
default => new AgentIdentity($provider, $model, $confidence),
};
}
/**
* Check if a provider name is valid.
*/
public function isValidProvider(string $provider): bool
{
return in_array($provider, [
'anthropic',
'openai',
'google',
'meta',
'mistral',
'local',
'unknown',
], true);
}
/**
* Get the list of valid providers.
*
* @return string[]
*/
public function getValidProviders(): array
{
return [
'anthropic',
'openai',
'google',
'meta',
'mistral',
'local',
'unknown',
];
}
/**
* Check if a request appears to be from an AI agent.
*/
public function isAgent(Request $request): bool
{
return $this->identify($request)->isAgent();
}
/**
* Check if a User-Agent appears to be from an AI agent.
*/
public function isAgentUserAgent(?string $userAgent): bool
{
return $this->identifyFromUserAgent($userAgent)->isAgent();
}
}