From 794cff9409167b6ded721c184fb50e4ec3fc05ab Mon Sep 17 00:00:00 2001 From: Athena Date: Tue, 10 Feb 2026 20:32:41 +0000 Subject: [PATCH] feat(agentic): add agent allowance system for model quotas and budgets Implements quota enforcement for agents including daily token limits, daily job limits, concurrent job caps, model allowlists, and global per-model budgets. Quota recovery returns 50% for failed jobs and 100% for cancelled jobs. Go: AllowanceService with MemoryStore, AllowanceStore interface, and 25 tests covering all enforcement paths. Laravel: migration for 5 tables (agent_allowances, quota_usage, model_quotas, usage_reports, repo_limits), Eloquent models, AllowanceService, QuotaMiddleware, and REST API routes. Closes #99 Co-Authored-By: Claude Opus 4.6 --- .../app/Http/Middleware/QuotaMiddleware.php | 46 ++ .../laravel/app/Models/AgentAllowance.php | 43 ++ .../laravel/app/Models/ModelQuota.php | 26 ++ .../laravel/app/Models/QuotaUsage.php | 36 ++ .../laravel/app/Models/UsageReport.php | 29 ++ .../laravel/app/Services/AllowanceService.php | 183 ++++++++ cmd/core-app/laravel/bootstrap/app.php | 1 + ...1_000002_create_agent_allowances_table.php | 75 ++++ cmd/core-app/laravel/routes/api.php | 146 +++++++ pkg/agentic/allowance.go | 299 +++++++++++++ pkg/agentic/allowance_service.go | 176 ++++++++ pkg/agentic/allowance_test.go | 407 ++++++++++++++++++ 12 files changed, 1467 insertions(+) create mode 100644 cmd/core-app/laravel/app/Http/Middleware/QuotaMiddleware.php create mode 100644 cmd/core-app/laravel/app/Models/AgentAllowance.php create mode 100644 cmd/core-app/laravel/app/Models/ModelQuota.php create mode 100644 cmd/core-app/laravel/app/Models/QuotaUsage.php create mode 100644 cmd/core-app/laravel/app/Models/UsageReport.php create mode 100644 cmd/core-app/laravel/app/Services/AllowanceService.php create mode 100644 cmd/core-app/laravel/database/migrations/0001_01_01_000002_create_agent_allowances_table.php create mode 100644 cmd/core-app/laravel/routes/api.php create mode 100644 pkg/agentic/allowance.go create mode 100644 pkg/agentic/allowance_service.go create mode 100644 pkg/agentic/allowance_test.go diff --git a/cmd/core-app/laravel/app/Http/Middleware/QuotaMiddleware.php b/cmd/core-app/laravel/app/Http/Middleware/QuotaMiddleware.php new file mode 100644 index 00000000..7f5ac907 --- /dev/null +++ b/cmd/core-app/laravel/app/Http/Middleware/QuotaMiddleware.php @@ -0,0 +1,46 @@ +header('X-Agent-ID', $request->input('agent_id', '')); + $model = $request->input('model', ''); + + if ($agentId === '') { + return response()->json([ + 'error' => 'agent_id is required', + ], 400); + } + + $result = $this->allowanceService->check($agentId, $model); + + if (! $result['allowed']) { + return response()->json([ + 'error' => 'quota_exceeded', + 'status' => $result['status'], + 'reason' => $result['reason'], + 'remaining_tokens' => $result['remaining_tokens'], + 'remaining_jobs' => $result['remaining_jobs'], + ], 429); + } + + // Attach quota info to request for downstream use + $request->merge(['_quota' => $result]); + + return $next($request); + } +} diff --git a/cmd/core-app/laravel/app/Models/AgentAllowance.php b/cmd/core-app/laravel/app/Models/AgentAllowance.php new file mode 100644 index 00000000..85e8e66c --- /dev/null +++ b/cmd/core-app/laravel/app/Models/AgentAllowance.php @@ -0,0 +1,43 @@ + 'integer', + 'daily_job_limit' => 'integer', + 'concurrent_jobs' => 'integer', + 'max_job_duration_minutes' => 'integer', + 'model_allowlist' => 'array', + ]; + } + + public function usageRecords(): HasMany + { + return $this->hasMany(QuotaUsage::class, 'agent_id', 'agent_id'); + } + + public function todayUsage(): ?QuotaUsage + { + return $this->usageRecords() + ->where('period_date', now()->toDateString()) + ->first(); + } +} diff --git a/cmd/core-app/laravel/app/Models/ModelQuota.php b/cmd/core-app/laravel/app/Models/ModelQuota.php new file mode 100644 index 00000000..ffc18be0 --- /dev/null +++ b/cmd/core-app/laravel/app/Models/ModelQuota.php @@ -0,0 +1,26 @@ + 'integer', + 'hourly_rate_limit' => 'integer', + 'cost_ceiling' => 'integer', + ]; + } +} diff --git a/cmd/core-app/laravel/app/Models/QuotaUsage.php b/cmd/core-app/laravel/app/Models/QuotaUsage.php new file mode 100644 index 00000000..c21ea664 --- /dev/null +++ b/cmd/core-app/laravel/app/Models/QuotaUsage.php @@ -0,0 +1,36 @@ + 'integer', + 'jobs_started' => 'integer', + 'active_jobs' => 'integer', + 'period_date' => 'date', + ]; + } + + public function allowance(): BelongsTo + { + return $this->belongsTo(AgentAllowance::class, 'agent_id', 'agent_id'); + } +} diff --git a/cmd/core-app/laravel/app/Models/UsageReport.php b/cmd/core-app/laravel/app/Models/UsageReport.php new file mode 100644 index 00000000..1df865c6 --- /dev/null +++ b/cmd/core-app/laravel/app/Models/UsageReport.php @@ -0,0 +1,29 @@ + 'integer', + 'tokens_out' => 'integer', + 'reported_at' => 'datetime', + ]; + } +} diff --git a/cmd/core-app/laravel/app/Services/AllowanceService.php b/cmd/core-app/laravel/app/Services/AllowanceService.php new file mode 100644 index 00000000..84189921 --- /dev/null +++ b/cmd/core-app/laravel/app/Services/AllowanceService.php @@ -0,0 +1,183 @@ +first(); + + if (! $allowance) { + return [ + 'allowed' => false, + 'status' => 'exceeded', + 'remaining_tokens' => 0, + 'remaining_jobs' => 0, + 'reason' => 'no allowance configured for agent', + ]; + } + + $usage = QuotaUsage::firstOrCreate( + ['agent_id' => $agentId, 'period_date' => now()->toDateString()], + ['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0], + ); + + $result = [ + 'allowed' => true, + 'status' => 'ok', + 'remaining_tokens' => -1, + 'remaining_jobs' => -1, + 'reason' => null, + ]; + + // Check model allowlist + if ($model !== '' && ! empty($allowance->model_allowlist)) { + if (! in_array($model, $allowance->model_allowlist, true)) { + return array_merge($result, [ + 'allowed' => false, + 'status' => 'exceeded', + 'reason' => "model not in allowlist: {$model}", + ]); + } + } + + // Check daily token limit + if ($allowance->daily_token_limit > 0) { + $remaining = $allowance->daily_token_limit - $usage->tokens_used; + $result['remaining_tokens'] = $remaining; + + if ($remaining <= 0) { + return array_merge($result, [ + 'allowed' => false, + 'status' => 'exceeded', + 'reason' => 'daily token limit exceeded', + ]); + } + + $ratio = $usage->tokens_used / $allowance->daily_token_limit; + if ($ratio >= 0.8) { + $result['status'] = 'warning'; + } + } + + // Check daily job limit + if ($allowance->daily_job_limit > 0) { + $remaining = $allowance->daily_job_limit - $usage->jobs_started; + $result['remaining_jobs'] = $remaining; + + if ($remaining <= 0) { + return array_merge($result, [ + 'allowed' => false, + 'status' => 'exceeded', + 'reason' => 'daily job limit exceeded', + ]); + } + } + + // Check concurrent jobs + if ($allowance->concurrent_jobs > 0 && $usage->active_jobs >= $allowance->concurrent_jobs) { + return array_merge($result, [ + 'allowed' => false, + 'status' => 'exceeded', + 'reason' => 'concurrent job limit reached', + ]); + } + + // Check global model quota + if ($model !== '') { + $modelQuota = ModelQuota::where('model', $model)->first(); + + if ($modelQuota && $modelQuota->daily_token_budget > 0) { + $modelUsage = UsageReport::where('model', $model) + ->whereDate('reported_at', now()->toDateString()) + ->sum(\DB::raw('tokens_in + tokens_out')); + + if ($modelUsage >= $modelQuota->daily_token_budget) { + return array_merge($result, [ + 'allowed' => false, + 'status' => 'exceeded', + 'reason' => "global model token budget exceeded for: {$model}", + ]); + } + } + } + + return $result; + } + + /** + * Record usage from an agent runner report. + */ + public function recordUsage(array $report): void + { + $agentId = $report['agent_id']; + $totalTokens = ($report['tokens_in'] ?? 0) + ($report['tokens_out'] ?? 0); + + $usage = QuotaUsage::firstOrCreate( + ['agent_id' => $agentId, 'period_date' => now()->toDateString()], + ['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0], + ); + + // Persist the raw report + UsageReport::create([ + 'agent_id' => $report['agent_id'], + 'job_id' => $report['job_id'], + 'model' => $report['model'] ?? null, + 'tokens_in' => $report['tokens_in'] ?? 0, + 'tokens_out' => $report['tokens_out'] ?? 0, + 'event' => $report['event'], + 'reported_at' => $report['timestamp'] ?? now(), + ]); + + match ($report['event']) { + 'job_started' => $usage->increment('jobs_started') || $usage->increment('active_jobs'), + 'job_completed' => $this->handleCompleted($usage, $totalTokens), + 'job_failed' => $this->handleFailed($usage, $totalTokens), + 'job_cancelled' => $this->handleCancelled($usage, $totalTokens), + default => null, + }; + } + + /** + * Reset daily usage counters for an agent. + */ + public function resetAgent(string $agentId): void + { + QuotaUsage::updateOrCreate( + ['agent_id' => $agentId, 'period_date' => now()->toDateString()], + ['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0], + ); + } + + private function handleCompleted(QuotaUsage $usage, int $totalTokens): void + { + $usage->increment('tokens_used', $totalTokens); + $usage->decrement('active_jobs'); + } + + private function handleFailed(QuotaUsage $usage, int $totalTokens): void + { + $returnAmount = intdiv($totalTokens, 2); + $usage->increment('tokens_used', $totalTokens - $returnAmount); + $usage->decrement('active_jobs'); + } + + private function handleCancelled(QuotaUsage $usage, int $totalTokens): void + { + $usage->decrement('active_jobs'); + // 100% returned — no token charge + } +} diff --git a/cmd/core-app/laravel/bootstrap/app.php b/cmd/core-app/laravel/bootstrap/app.php index ba8f1fff..6c37a976 100644 --- a/cmd/core-app/laravel/bootstrap/app.php +++ b/cmd/core-app/laravel/bootstrap/app.php @@ -9,6 +9,7 @@ use Illuminate\Foundation\Configuration\Middleware; return Application::configure(basePath: dirname(__DIR__)) ->withRouting( web: __DIR__.'/../routes/web.php', + api: __DIR__.'/../routes/api.php', commands: __DIR__.'/../routes/console.php', ) ->withMiddleware(function (Middleware $middleware) { diff --git a/cmd/core-app/laravel/database/migrations/0001_01_01_000002_create_agent_allowances_table.php b/cmd/core-app/laravel/database/migrations/0001_01_01_000002_create_agent_allowances_table.php new file mode 100644 index 00000000..9a6d62ae --- /dev/null +++ b/cmd/core-app/laravel/database/migrations/0001_01_01_000002_create_agent_allowances_table.php @@ -0,0 +1,75 @@ +id(); + $table->string('agent_id')->unique(); + $table->bigInteger('daily_token_limit')->default(0); + $table->integer('daily_job_limit')->default(0); + $table->integer('concurrent_jobs')->default(1); + $table->integer('max_job_duration_minutes')->default(0); + $table->json('model_allowlist')->nullable(); + $table->timestamps(); + }); + + Schema::create('quota_usage', function (Blueprint $table) { + $table->id(); + $table->string('agent_id')->index(); + $table->bigInteger('tokens_used')->default(0); + $table->integer('jobs_started')->default(0); + $table->integer('active_jobs')->default(0); + $table->date('period_date')->index(); + $table->timestamps(); + + $table->unique(['agent_id', 'period_date']); + }); + + Schema::create('model_quotas', function (Blueprint $table) { + $table->id(); + $table->string('model')->unique(); + $table->bigInteger('daily_token_budget')->default(0); + $table->integer('hourly_rate_limit')->default(0); + $table->bigInteger('cost_ceiling')->default(0); + $table->timestamps(); + }); + + Schema::create('usage_reports', function (Blueprint $table) { + $table->id(); + $table->string('agent_id')->index(); + $table->string('job_id')->index(); + $table->string('model')->nullable(); + $table->bigInteger('tokens_in')->default(0); + $table->bigInteger('tokens_out')->default(0); + $table->string('event'); + $table->timestamp('reported_at'); + $table->timestamps(); + }); + + Schema::create('repo_limits', function (Blueprint $table) { + $table->id(); + $table->string('repo')->unique(); + $table->integer('max_daily_prs')->default(0); + $table->integer('max_daily_issues')->default(0); + $table->integer('cooldown_after_failure_minutes')->default(0); + $table->timestamps(); + }); + } + + public function down(): void + { + Schema::dropIfExists('repo_limits'); + Schema::dropIfExists('usage_reports'); + Schema::dropIfExists('model_quotas'); + Schema::dropIfExists('quota_usage'); + Schema::dropIfExists('agent_allowances'); + } +}; diff --git a/cmd/core-app/laravel/routes/api.php b/cmd/core-app/laravel/routes/api.php new file mode 100644 index 00000000..557fdfeb --- /dev/null +++ b/cmd/core-app/laravel/routes/api.php @@ -0,0 +1,146 @@ + response()->json(['status' => 'ok'])); + +// Agent allowance CRUD +Route::prefix('allowances/agents')->group(function () { + Route::get('/', function () { + return AgentAllowance::all(); + }); + + Route::get('/{agentId}', function (string $agentId) { + $allowance = AgentAllowance::where('agent_id', $agentId)->first(); + + if (! $allowance) { + return response()->json(['error' => 'not found'], 404); + } + + return $allowance; + }); + + Route::post('/', function (Request $request) { + $validated = $request->validate([ + 'agent_id' => 'required|string|unique:agent_allowances,agent_id', + 'daily_token_limit' => 'integer|min:0', + 'daily_job_limit' => 'integer|min:0', + 'concurrent_jobs' => 'integer|min:0', + 'max_job_duration_minutes' => 'integer|min:0', + 'model_allowlist' => 'array', + 'model_allowlist.*' => 'string', + ]); + + return AgentAllowance::create($validated); + }); + + Route::put('/{agentId}', function (Request $request, string $agentId) { + $allowance = AgentAllowance::where('agent_id', $agentId)->first(); + + if (! $allowance) { + return response()->json(['error' => 'not found'], 404); + } + + $validated = $request->validate([ + 'daily_token_limit' => 'integer|min:0', + 'daily_job_limit' => 'integer|min:0', + 'concurrent_jobs' => 'integer|min:0', + 'max_job_duration_minutes' => 'integer|min:0', + 'model_allowlist' => 'array', + 'model_allowlist.*' => 'string', + ]); + + $allowance->update($validated); + + return $allowance; + }); + + Route::delete('/{agentId}', function (string $agentId) { + AgentAllowance::where('agent_id', $agentId)->delete(); + + return response()->json(['status' => 'deleted']); + }); +}); + +// Quota check endpoint +Route::get('/allowances/check/{agentId}', function (Request $request, string $agentId, AllowanceService $svc) { + $model = $request->query('model', ''); + + return response()->json($svc->check($agentId, $model)); +}); + +// Usage reporting endpoint +Route::post('/allowances/usage', function (Request $request, AllowanceService $svc) { + $validated = $request->validate([ + 'agent_id' => 'required|string', + 'job_id' => 'required|string', + 'model' => 'nullable|string', + 'tokens_in' => 'integer|min:0', + 'tokens_out' => 'integer|min:0', + 'event' => 'required|in:job_started,job_completed,job_failed,job_cancelled', + 'timestamp' => 'nullable|date', + ]); + + $svc->recordUsage($validated); + + return response()->json(['status' => 'recorded']); +}); + +// Daily reset endpoint +Route::post('/allowances/reset/{agentId}', function (string $agentId, AllowanceService $svc) { + $svc->resetAgent($agentId); + + return response()->json(['status' => 'reset']); +}); + +// Model quota management +Route::prefix('allowances/models')->group(function () { + Route::get('/', fn () => ModelQuota::all()); + + Route::post('/', function (Request $request) { + $validated = $request->validate([ + 'model' => 'required|string|unique:model_quotas,model', + 'daily_token_budget' => 'integer|min:0', + 'hourly_rate_limit' => 'integer|min:0', + 'cost_ceiling' => 'integer|min:0', + ]); + + return ModelQuota::create($validated); + }); + + Route::put('/{model}', function (Request $request, string $model) { + $quota = ModelQuota::where('model', $model)->first(); + + if (! $quota) { + return response()->json(['error' => 'not found'], 404); + } + + $validated = $request->validate([ + 'daily_token_budget' => 'integer|min:0', + 'hourly_rate_limit' => 'integer|min:0', + 'cost_ceiling' => 'integer|min:0', + ]); + + $quota->update($validated); + + return $quota; + }); +}); diff --git a/pkg/agentic/allowance.go b/pkg/agentic/allowance.go new file mode 100644 index 00000000..cef20474 --- /dev/null +++ b/pkg/agentic/allowance.go @@ -0,0 +1,299 @@ +package agentic + +import ( + "sync" + "time" +) + +// AllowanceStatus indicates the current state of an agent's quota. +type AllowanceStatus string + +const ( + // AllowanceOK indicates the agent has remaining quota. + AllowanceOK AllowanceStatus = "ok" + // AllowanceWarning indicates the agent is at 80%+ usage. + AllowanceWarning AllowanceStatus = "warning" + // AllowanceExceeded indicates the agent has exceeded its quota. + AllowanceExceeded AllowanceStatus = "exceeded" +) + +// AgentAllowance defines the quota limits for a single agent. +type AgentAllowance struct { + // AgentID is the unique identifier for the agent. + AgentID string `json:"agent_id" yaml:"agent_id"` + // DailyTokenLimit is the maximum tokens (in+out) per 24h. 0 means unlimited. + DailyTokenLimit int64 `json:"daily_token_limit" yaml:"daily_token_limit"` + // DailyJobLimit is the maximum jobs per 24h. 0 means unlimited. + DailyJobLimit int `json:"daily_job_limit" yaml:"daily_job_limit"` + // ConcurrentJobs is the maximum simultaneous jobs. 0 means unlimited. + ConcurrentJobs int `json:"concurrent_jobs" yaml:"concurrent_jobs"` + // MaxJobDuration is the maximum job duration before kill. 0 means unlimited. + MaxJobDuration time.Duration `json:"max_job_duration" yaml:"max_job_duration"` + // ModelAllowlist restricts which models this agent can use. Empty means all. + ModelAllowlist []string `json:"model_allowlist,omitempty" yaml:"model_allowlist"` +} + +// ModelQuota defines global per-model limits across all agents. +type ModelQuota struct { + // Model is the model identifier (e.g. "claude-sonnet-4-5-20250929"). + Model string `json:"model" yaml:"model"` + // DailyTokenBudget is the total tokens across all agents per 24h. + DailyTokenBudget int64 `json:"daily_token_budget" yaml:"daily_token_budget"` + // HourlyRateLimit is the max requests per hour. + HourlyRateLimit int `json:"hourly_rate_limit" yaml:"hourly_rate_limit"` + // CostCeiling stops all usage if cumulative cost exceeds this (in cents). + CostCeiling int64 `json:"cost_ceiling" yaml:"cost_ceiling"` +} + +// RepoLimit defines per-repository rate limits. +type RepoLimit struct { + // Repo is the repository identifier (e.g. "owner/repo"). + Repo string `json:"repo" yaml:"repo"` + // MaxDailyPRs is the maximum PRs per day. 0 means unlimited. + MaxDailyPRs int `json:"max_daily_prs" yaml:"max_daily_prs"` + // MaxDailyIssues is the maximum issues per day. 0 means unlimited. + MaxDailyIssues int `json:"max_daily_issues" yaml:"max_daily_issues"` + // CooldownAfterFailure is the wait time after a failure before retrying. + CooldownAfterFailure time.Duration `json:"cooldown_after_failure" yaml:"cooldown_after_failure"` +} + +// UsageRecord tracks an agent's current usage within a quota period. +type UsageRecord struct { + // AgentID is the agent this record belongs to. + AgentID string `json:"agent_id"` + // TokensUsed is the total tokens consumed in the current period. + TokensUsed int64 `json:"tokens_used"` + // JobsStarted is the total jobs started in the current period. + JobsStarted int `json:"jobs_started"` + // ActiveJobs is the number of currently running jobs. + ActiveJobs int `json:"active_jobs"` + // PeriodStart is when the current quota period began. + PeriodStart time.Time `json:"period_start"` +} + +// QuotaCheckResult is the outcome of a pre-dispatch allowance check. +type QuotaCheckResult struct { + // Allowed indicates whether the agent may proceed. + Allowed bool `json:"allowed"` + // Status is the current allowance state. + Status AllowanceStatus `json:"status"` + // Remaining is the number of tokens remaining in the period. + RemainingTokens int64 `json:"remaining_tokens"` + // RemainingJobs is the number of jobs remaining in the period. + RemainingJobs int `json:"remaining_jobs"` + // Reason explains why the check failed (if !Allowed). + Reason string `json:"reason,omitempty"` +} + +// QuotaEvent represents a change in quota usage, used for recovery. +type QuotaEvent string + +const ( + // QuotaEventJobStarted deducts quota when a job begins. + QuotaEventJobStarted QuotaEvent = "job_started" + // QuotaEventJobCompleted deducts nothing (already counted). + QuotaEventJobCompleted QuotaEvent = "job_completed" + // QuotaEventJobFailed returns 50% of token quota. + QuotaEventJobFailed QuotaEvent = "job_failed" + // QuotaEventJobCancelled returns 100% of token quota. + QuotaEventJobCancelled QuotaEvent = "job_cancelled" +) + +// UsageReport is emitted by the agent runner to report token consumption. +type UsageReport struct { + // AgentID is the agent that consumed tokens. + AgentID string `json:"agent_id"` + // JobID identifies the specific job. + JobID string `json:"job_id"` + // Model is the model used. + Model string `json:"model"` + // TokensIn is the number of input tokens consumed. + TokensIn int64 `json:"tokens_in"` + // TokensOut is the number of output tokens consumed. + TokensOut int64 `json:"tokens_out"` + // Event is the type of quota event. + Event QuotaEvent `json:"event"` + // Timestamp is when the usage occurred. + Timestamp time.Time `json:"timestamp"` +} + +// AllowanceStore is the interface for persisting and querying allowance data. +// Implementations may use Redis, SQLite, or any backing store. +type AllowanceStore interface { + // GetAllowance returns the quota limits for an agent. + GetAllowance(agentID string) (*AgentAllowance, error) + // SetAllowance persists quota limits for an agent. + SetAllowance(a *AgentAllowance) error + // GetUsage returns the current usage record for an agent. + GetUsage(agentID string) (*UsageRecord, error) + // IncrementUsage atomically adds to an agent's usage counters. + IncrementUsage(agentID string, tokens int64, jobs int) error + // DecrementActiveJobs reduces the active job count by 1. + DecrementActiveJobs(agentID string) error + // ReturnTokens adds tokens back to the agent's remaining quota. + ReturnTokens(agentID string, tokens int64) error + // ResetUsage clears usage counters for an agent (daily reset). + ResetUsage(agentID string) error + // GetModelQuota returns global limits for a model. + GetModelQuota(model string) (*ModelQuota, error) + // GetModelUsage returns current token usage for a model. + GetModelUsage(model string) (int64, error) + // IncrementModelUsage atomically adds to a model's usage counter. + IncrementModelUsage(model string, tokens int64) error +} + +// MemoryStore is an in-memory AllowanceStore for testing and single-node use. +type MemoryStore struct { + mu sync.RWMutex + allowances map[string]*AgentAllowance + usage map[string]*UsageRecord + modelQuotas map[string]*ModelQuota + modelUsage map[string]int64 +} + +// NewMemoryStore creates a new in-memory allowance store. +func NewMemoryStore() *MemoryStore { + return &MemoryStore{ + allowances: make(map[string]*AgentAllowance), + usage: make(map[string]*UsageRecord), + modelQuotas: make(map[string]*ModelQuota), + modelUsage: make(map[string]int64), + } +} + +// GetAllowance returns the quota limits for an agent. +func (m *MemoryStore) GetAllowance(agentID string) (*AgentAllowance, error) { + m.mu.RLock() + defer m.mu.RUnlock() + a, ok := m.allowances[agentID] + if !ok { + return nil, &APIError{Code: 404, Message: "allowance not found for agent: " + agentID} + } + cp := *a + return &cp, nil +} + +// SetAllowance persists quota limits for an agent. +func (m *MemoryStore) SetAllowance(a *AgentAllowance) error { + m.mu.Lock() + defer m.mu.Unlock() + cp := *a + m.allowances[a.AgentID] = &cp + return nil +} + +// GetUsage returns the current usage record for an agent. +func (m *MemoryStore) GetUsage(agentID string) (*UsageRecord, error) { + m.mu.RLock() + defer m.mu.RUnlock() + u, ok := m.usage[agentID] + if !ok { + return &UsageRecord{ + AgentID: agentID, + PeriodStart: startOfDay(time.Now().UTC()), + }, nil + } + cp := *u + return &cp, nil +} + +// IncrementUsage atomically adds to an agent's usage counters. +func (m *MemoryStore) IncrementUsage(agentID string, tokens int64, jobs int) error { + m.mu.Lock() + defer m.mu.Unlock() + u, ok := m.usage[agentID] + if !ok { + u = &UsageRecord{ + AgentID: agentID, + PeriodStart: startOfDay(time.Now().UTC()), + } + m.usage[agentID] = u + } + u.TokensUsed += tokens + u.JobsStarted += jobs + if jobs > 0 { + u.ActiveJobs += jobs + } + return nil +} + +// DecrementActiveJobs reduces the active job count by 1. +func (m *MemoryStore) DecrementActiveJobs(agentID string) error { + m.mu.Lock() + defer m.mu.Unlock() + u, ok := m.usage[agentID] + if !ok { + return nil + } + if u.ActiveJobs > 0 { + u.ActiveJobs-- + } + return nil +} + +// ReturnTokens adds tokens back to the agent's remaining quota. +func (m *MemoryStore) ReturnTokens(agentID string, tokens int64) error { + m.mu.Lock() + defer m.mu.Unlock() + u, ok := m.usage[agentID] + if !ok { + return nil + } + u.TokensUsed -= tokens + if u.TokensUsed < 0 { + u.TokensUsed = 0 + } + return nil +} + +// ResetUsage clears usage counters for an agent. +func (m *MemoryStore) ResetUsage(agentID string) error { + m.mu.Lock() + defer m.mu.Unlock() + m.usage[agentID] = &UsageRecord{ + AgentID: agentID, + PeriodStart: startOfDay(time.Now().UTC()), + } + return nil +} + +// GetModelQuota returns global limits for a model. +func (m *MemoryStore) GetModelQuota(model string) (*ModelQuota, error) { + m.mu.RLock() + defer m.mu.RUnlock() + q, ok := m.modelQuotas[model] + if !ok { + return nil, &APIError{Code: 404, Message: "model quota not found: " + model} + } + cp := *q + return &cp, nil +} + +// GetModelUsage returns current token usage for a model. +func (m *MemoryStore) GetModelUsage(model string) (int64, error) { + m.mu.RLock() + defer m.mu.RUnlock() + return m.modelUsage[model], nil +} + +// IncrementModelUsage atomically adds to a model's usage counter. +func (m *MemoryStore) IncrementModelUsage(model string, tokens int64) error { + m.mu.Lock() + defer m.mu.Unlock() + m.modelUsage[model] += tokens + return nil +} + +// SetModelQuota sets global limits for a model (used in testing). +func (m *MemoryStore) SetModelQuota(q *ModelQuota) { + m.mu.Lock() + defer m.mu.Unlock() + cp := *q + m.modelQuotas[q.Model] = &cp +} + +// startOfDay returns midnight UTC for the given time. +func startOfDay(t time.Time) time.Time { + y, mo, d := t.Date() + return time.Date(y, mo, d, 0, 0, 0, 0, time.UTC) +} diff --git a/pkg/agentic/allowance_service.go b/pkg/agentic/allowance_service.go new file mode 100644 index 00000000..8988f471 --- /dev/null +++ b/pkg/agentic/allowance_service.go @@ -0,0 +1,176 @@ +package agentic + +import ( + "slices" + + "github.com/host-uk/core/pkg/log" +) + +// AllowanceService enforces agent quota limits. It provides pre-dispatch checks, +// runtime usage recording, and quota recovery for failed/cancelled jobs. +type AllowanceService struct { + store AllowanceStore +} + +// NewAllowanceService creates a new AllowanceService with the given store. +func NewAllowanceService(store AllowanceStore) *AllowanceService { + return &AllowanceService{store: store} +} + +// Check performs a pre-dispatch allowance check for the given agent and model. +// It verifies daily token limits, daily job limits, concurrent job limits, and +// model allowlists. Returns a QuotaCheckResult indicating whether the agent may proceed. +func (s *AllowanceService) Check(agentID, model string) (*QuotaCheckResult, error) { + const op = "AllowanceService.Check" + + allowance, err := s.store.GetAllowance(agentID) + if err != nil { + return nil, log.E(op, "failed to get allowance", err) + } + + usage, err := s.store.GetUsage(agentID) + if err != nil { + return nil, log.E(op, "failed to get usage", err) + } + + result := &QuotaCheckResult{ + Allowed: true, + Status: AllowanceOK, + RemainingTokens: -1, // unlimited + RemainingJobs: -1, // unlimited + } + + // Check model allowlist + if len(allowance.ModelAllowlist) > 0 && model != "" { + if !slices.Contains(allowance.ModelAllowlist, model) { + result.Allowed = false + result.Status = AllowanceExceeded + result.Reason = "model not in allowlist: " + model + return result, nil + } + } + + // Check daily token limit + if allowance.DailyTokenLimit > 0 { + remaining := allowance.DailyTokenLimit - usage.TokensUsed + result.RemainingTokens = remaining + if remaining <= 0 { + result.Allowed = false + result.Status = AllowanceExceeded + result.Reason = "daily token limit exceeded" + return result, nil + } + ratio := float64(usage.TokensUsed) / float64(allowance.DailyTokenLimit) + if ratio >= 0.8 { + result.Status = AllowanceWarning + } + } + + // Check daily job limit + if allowance.DailyJobLimit > 0 { + remaining := allowance.DailyJobLimit - usage.JobsStarted + result.RemainingJobs = remaining + if remaining <= 0 { + result.Allowed = false + result.Status = AllowanceExceeded + result.Reason = "daily job limit exceeded" + return result, nil + } + } + + // Check concurrent jobs + if allowance.ConcurrentJobs > 0 && usage.ActiveJobs >= allowance.ConcurrentJobs { + result.Allowed = false + result.Status = AllowanceExceeded + result.Reason = "concurrent job limit reached" + return result, nil + } + + // Check global model quota + if model != "" { + modelQuota, err := s.store.GetModelQuota(model) + if err == nil && modelQuota.DailyTokenBudget > 0 { + modelUsage, err := s.store.GetModelUsage(model) + if err == nil && modelUsage >= modelQuota.DailyTokenBudget { + result.Allowed = false + result.Status = AllowanceExceeded + result.Reason = "global model token budget exceeded for: " + model + return result, nil + } + } + } + + return result, nil +} + +// RecordUsage processes a usage report, updating counters and handling quota recovery. +func (s *AllowanceService) RecordUsage(report UsageReport) error { + const op = "AllowanceService.RecordUsage" + + totalTokens := report.TokensIn + report.TokensOut + + switch report.Event { + case QuotaEventJobStarted: + if err := s.store.IncrementUsage(report.AgentID, 0, 1); err != nil { + return log.E(op, "failed to increment job count", err) + } + + case QuotaEventJobCompleted: + if err := s.store.IncrementUsage(report.AgentID, totalTokens, 0); err != nil { + return log.E(op, "failed to record token usage", err) + } + if err := s.store.DecrementActiveJobs(report.AgentID); err != nil { + return log.E(op, "failed to decrement active jobs", err) + } + // Record model-level usage + if report.Model != "" { + if err := s.store.IncrementModelUsage(report.Model, totalTokens); err != nil { + return log.E(op, "failed to record model usage", err) + } + } + + case QuotaEventJobFailed: + // Record partial usage, return 50% of tokens + if err := s.store.IncrementUsage(report.AgentID, totalTokens, 0); err != nil { + return log.E(op, "failed to record token usage", err) + } + if err := s.store.DecrementActiveJobs(report.AgentID); err != nil { + return log.E(op, "failed to decrement active jobs", err) + } + returnAmount := totalTokens / 2 + if returnAmount > 0 { + if err := s.store.ReturnTokens(report.AgentID, returnAmount); err != nil { + return log.E(op, "failed to return tokens", err) + } + } + // Still record model-level usage (net of return) + if report.Model != "" { + if err := s.store.IncrementModelUsage(report.Model, totalTokens-returnAmount); err != nil { + return log.E(op, "failed to record model usage", err) + } + } + + case QuotaEventJobCancelled: + // Return 100% of tokens + if err := s.store.DecrementActiveJobs(report.AgentID); err != nil { + return log.E(op, "failed to decrement active jobs", err) + } + if totalTokens > 0 { + if err := s.store.ReturnTokens(report.AgentID, totalTokens); err != nil { + return log.E(op, "failed to return tokens", err) + } + } + // No model-level usage for cancelled jobs + } + + return nil +} + +// ResetAgent clears daily usage counters for the given agent (midnight reset). +func (s *AllowanceService) ResetAgent(agentID string) error { + const op = "AllowanceService.ResetAgent" + if err := s.store.ResetUsage(agentID); err != nil { + return log.E(op, "failed to reset usage", err) + } + return nil +} diff --git a/pkg/agentic/allowance_test.go b/pkg/agentic/allowance_test.go new file mode 100644 index 00000000..3ddf3d68 --- /dev/null +++ b/pkg/agentic/allowance_test.go @@ -0,0 +1,407 @@ +package agentic + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- MemoryStore tests --- + +func TestMemoryStore_SetGetAllowance_Good(t *testing.T) { + store := NewMemoryStore() + a := &AgentAllowance{ + AgentID: "agent-1", + DailyTokenLimit: 100000, + DailyJobLimit: 10, + ConcurrentJobs: 2, + MaxJobDuration: 30 * time.Minute, + ModelAllowlist: []string{"claude-sonnet-4-5-20250929"}, + } + + err := store.SetAllowance(a) + require.NoError(t, err) + + got, err := store.GetAllowance("agent-1") + require.NoError(t, err) + assert.Equal(t, a.AgentID, got.AgentID) + assert.Equal(t, a.DailyTokenLimit, got.DailyTokenLimit) + assert.Equal(t, a.DailyJobLimit, got.DailyJobLimit) + assert.Equal(t, a.ConcurrentJobs, got.ConcurrentJobs) + assert.Equal(t, a.ModelAllowlist, got.ModelAllowlist) +} + +func TestMemoryStore_GetAllowance_Bad_NotFound(t *testing.T) { + store := NewMemoryStore() + _, err := store.GetAllowance("nonexistent") + require.Error(t, err) +} + +func TestMemoryStore_IncrementUsage_Good(t *testing.T) { + store := NewMemoryStore() + + err := store.IncrementUsage("agent-1", 5000, 1) + require.NoError(t, err) + + usage, err := store.GetUsage("agent-1") + require.NoError(t, err) + assert.Equal(t, int64(5000), usage.TokensUsed) + assert.Equal(t, 1, usage.JobsStarted) + assert.Equal(t, 1, usage.ActiveJobs) +} + +func TestMemoryStore_DecrementActiveJobs_Good(t *testing.T) { + store := NewMemoryStore() + + _ = store.IncrementUsage("agent-1", 0, 2) + _ = store.DecrementActiveJobs("agent-1") + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, 1, usage.ActiveJobs) +} + +func TestMemoryStore_DecrementActiveJobs_Good_FloorAtZero(t *testing.T) { + store := NewMemoryStore() + + _ = store.DecrementActiveJobs("agent-1") // no-op, no usage record + _ = store.IncrementUsage("agent-1", 0, 0) + _ = store.DecrementActiveJobs("agent-1") // should stay at 0 + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, 0, usage.ActiveJobs) +} + +func TestMemoryStore_ReturnTokens_Good(t *testing.T) { + store := NewMemoryStore() + + _ = store.IncrementUsage("agent-1", 10000, 0) + err := store.ReturnTokens("agent-1", 5000) + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, int64(5000), usage.TokensUsed) +} + +func TestMemoryStore_ReturnTokens_Good_FloorAtZero(t *testing.T) { + store := NewMemoryStore() + + _ = store.IncrementUsage("agent-1", 1000, 0) + _ = store.ReturnTokens("agent-1", 5000) // more than used + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, int64(0), usage.TokensUsed) +} + +func TestMemoryStore_ResetUsage_Good(t *testing.T) { + store := NewMemoryStore() + + _ = store.IncrementUsage("agent-1", 50000, 5) + err := store.ResetUsage("agent-1") + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, int64(0), usage.TokensUsed) + assert.Equal(t, 0, usage.JobsStarted) + assert.Equal(t, 0, usage.ActiveJobs) +} + +func TestMemoryStore_ModelUsage_Good(t *testing.T) { + store := NewMemoryStore() + + _ = store.IncrementModelUsage("claude-sonnet", 10000) + _ = store.IncrementModelUsage("claude-sonnet", 5000) + + usage, err := store.GetModelUsage("claude-sonnet") + require.NoError(t, err) + assert.Equal(t, int64(15000), usage) +} + +// --- AllowanceService.Check tests --- + +func TestAllowanceServiceCheck_Good(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + DailyTokenLimit: 100000, + DailyJobLimit: 10, + ConcurrentJobs: 2, + }) + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.True(t, result.Allowed) + assert.Equal(t, AllowanceOK, result.Status) + assert.Equal(t, int64(100000), result.RemainingTokens) + assert.Equal(t, 10, result.RemainingJobs) +} + +func TestAllowanceServiceCheck_Good_Warning(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + DailyTokenLimit: 100000, + }) + _ = store.IncrementUsage("agent-1", 85000, 0) + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.True(t, result.Allowed) + assert.Equal(t, AllowanceWarning, result.Status) + assert.Equal(t, int64(15000), result.RemainingTokens) +} + +func TestAllowanceServiceCheck_Bad_TokenLimitExceeded(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + DailyTokenLimit: 100000, + }) + _ = store.IncrementUsage("agent-1", 100001, 0) + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.False(t, result.Allowed) + assert.Equal(t, AllowanceExceeded, result.Status) + assert.Contains(t, result.Reason, "daily token limit") +} + +func TestAllowanceServiceCheck_Bad_JobLimitExceeded(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + DailyJobLimit: 5, + }) + _ = store.IncrementUsage("agent-1", 0, 5) + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.False(t, result.Allowed) + assert.Contains(t, result.Reason, "daily job limit") +} + +func TestAllowanceServiceCheck_Bad_ConcurrentLimitReached(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + ConcurrentJobs: 1, + }) + _ = store.IncrementUsage("agent-1", 0, 1) // 1 active job + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.False(t, result.Allowed) + assert.Contains(t, result.Reason, "concurrent job limit") +} + +func TestAllowanceServiceCheck_Bad_ModelNotInAllowlist(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + ModelAllowlist: []string{"claude-sonnet-4-5-20250929"}, + }) + + result, err := svc.Check("agent-1", "claude-opus-4-6") + require.NoError(t, err) + assert.False(t, result.Allowed) + assert.Contains(t, result.Reason, "model not in allowlist") +} + +func TestAllowanceServiceCheck_Good_ModelInAllowlist(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + ModelAllowlist: []string{"claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001"}, + }) + + result, err := svc.Check("agent-1", "claude-sonnet-4-5-20250929") + require.NoError(t, err) + assert.True(t, result.Allowed) +} + +func TestAllowanceServiceCheck_Good_EmptyModelSkipsCheck(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + ModelAllowlist: []string{"claude-sonnet-4-5-20250929"}, + }) + + result, err := svc.Check("agent-1", "") + require.NoError(t, err) + assert.True(t, result.Allowed) +} + +func TestAllowanceServiceCheck_Bad_GlobalModelBudgetExceeded(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.SetAllowance(&AgentAllowance{ + AgentID: "agent-1", + }) + store.SetModelQuota(&ModelQuota{ + Model: "claude-opus-4-6", + DailyTokenBudget: 500000, + }) + _ = store.IncrementModelUsage("claude-opus-4-6", 500001) + + result, err := svc.Check("agent-1", "claude-opus-4-6") + require.NoError(t, err) + assert.False(t, result.Allowed) + assert.Contains(t, result.Reason, "global model token budget") +} + +func TestAllowanceServiceCheck_Bad_NoAllowance(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _, err := svc.Check("unknown-agent", "") + require.Error(t, err) +} + +// --- AllowanceService.RecordUsage tests --- + +func TestAllowanceServiceRecordUsage_Good_JobStarted(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + err := svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + Event: QuotaEventJobStarted, + }) + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, 1, usage.JobsStarted) + assert.Equal(t, 1, usage.ActiveJobs) + assert.Equal(t, int64(0), usage.TokensUsed) +} + +func TestAllowanceServiceRecordUsage_Good_JobCompleted(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + // Start a job first + _ = svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + Event: QuotaEventJobStarted, + }) + + err := svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + Model: "claude-sonnet", + TokensIn: 1000, + TokensOut: 500, + Event: QuotaEventJobCompleted, + }) + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, int64(1500), usage.TokensUsed) + assert.Equal(t, 0, usage.ActiveJobs) + + modelUsage, _ := store.GetModelUsage("claude-sonnet") + assert.Equal(t, int64(1500), modelUsage) +} + +func TestAllowanceServiceRecordUsage_Good_JobFailed_ReturnsHalf(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + Event: QuotaEventJobStarted, + }) + + err := svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + Model: "claude-sonnet", + TokensIn: 1000, + TokensOut: 1000, + Event: QuotaEventJobFailed, + }) + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + // 2000 tokens used, 1000 returned (50%) = 1000 net + assert.Equal(t, int64(1000), usage.TokensUsed) + assert.Equal(t, 0, usage.ActiveJobs) + + // Model sees net usage (2000 - 1000 = 1000) + modelUsage, _ := store.GetModelUsage("claude-sonnet") + assert.Equal(t, int64(1000), modelUsage) +} + +func TestAllowanceServiceRecordUsage_Good_JobCancelled_ReturnsAll(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.IncrementUsage("agent-1", 5000, 1) // simulate pre-existing usage + + err := svc.RecordUsage(UsageReport{ + AgentID: "agent-1", + JobID: "job-1", + TokensIn: 500, + TokensOut: 500, + Event: QuotaEventJobCancelled, + }) + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + // 5000 pre-existing - 1000 returned = 4000 + assert.Equal(t, int64(4000), usage.TokensUsed) + assert.Equal(t, 0, usage.ActiveJobs) +} + +// --- AllowanceService.ResetAgent tests --- + +func TestAllowanceServiceResetAgent_Good(t *testing.T) { + store := NewMemoryStore() + svc := NewAllowanceService(store) + + _ = store.IncrementUsage("agent-1", 50000, 5) + + err := svc.ResetAgent("agent-1") + require.NoError(t, err) + + usage, _ := store.GetUsage("agent-1") + assert.Equal(t, int64(0), usage.TokensUsed) + assert.Equal(t, 0, usage.JobsStarted) +} + +// --- startOfDay helper test --- + +func TestStartOfDay_Good(t *testing.T) { + input := time.Date(2026, 2, 10, 15, 30, 45, 0, time.UTC) + expected := time.Date(2026, 2, 10, 0, 0, 0, 0, time.UTC) + assert.Equal(t, expected, startOfDay(input)) +} + +// --- AllowanceStatus tests --- + +func TestAllowanceStatus_Good_Values(t *testing.T) { + assert.Equal(t, AllowanceStatus("ok"), AllowanceOK) + assert.Equal(t, AllowanceStatus("warning"), AllowanceWarning) + assert.Equal(t, AllowanceStatus("exceeded"), AllowanceExceeded) +}