feat/updates #1

Merged
Snider merged 51 commits from feat/updates into dev 2026-02-16 05:54:07 +00:00
12 changed files with 1467 additions and 0 deletions
Showing only changes of commit 7a474d0690 - Show all commits

View file

@ -0,0 +1,46 @@
<?php
declare(strict_types=1);
namespace App\Http\Middleware;
use App\Services\AllowanceService;
use Closure;
use Illuminate\Http\Request;
use Symfony\Component\HttpFoundation\Response;
class QuotaMiddleware
{
public function __construct(
private readonly AllowanceService $allowanceService,
) {}
public function handle(Request $request, Closure $next): Response
{
$agentId = $request->header('X-Agent-ID', $request->input('agent_id', ''));
$model = $request->input('model', '');
if ($agentId === '') {
return response()->json([
'error' => 'agent_id is required',
], 400);
}
$result = $this->allowanceService->check($agentId, $model);
if (! $result['allowed']) {
return response()->json([
'error' => 'quota_exceeded',
'status' => $result['status'],
'reason' => $result['reason'],
'remaining_tokens' => $result['remaining_tokens'],
'remaining_jobs' => $result['remaining_jobs'],
], 429);
}
// Attach quota info to request for downstream use
$request->merge(['_quota' => $result]);
return $next($request);
}
}

View file

@ -0,0 +1,43 @@
<?php
declare(strict_types=1);
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany;
class AgentAllowance extends Model
{
protected $fillable = [
'agent_id',
'daily_token_limit',
'daily_job_limit',
'concurrent_jobs',
'max_job_duration_minutes',
'model_allowlist',
];
protected function casts(): array
{
return [
'daily_token_limit' => 'integer',
'daily_job_limit' => 'integer',
'concurrent_jobs' => 'integer',
'max_job_duration_minutes' => 'integer',
'model_allowlist' => 'array',
];
}
public function usageRecords(): HasMany
{
return $this->hasMany(QuotaUsage::class, 'agent_id', 'agent_id');
}
public function todayUsage(): ?QuotaUsage
{
return $this->usageRecords()
->where('period_date', now()->toDateString())
->first();
}
}

View file

@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
class ModelQuota extends Model
{
protected $fillable = [
'model',
'daily_token_budget',
'hourly_rate_limit',
'cost_ceiling',
];
protected function casts(): array
{
return [
'daily_token_budget' => 'integer',
'hourly_rate_limit' => 'integer',
'cost_ceiling' => 'integer',
];
}
}

View file

@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
class QuotaUsage extends Model
{
protected $table = 'quota_usage';
protected $fillable = [
'agent_id',
'tokens_used',
'jobs_started',
'active_jobs',
'period_date',
];
protected function casts(): array
{
return [
'tokens_used' => 'integer',
'jobs_started' => 'integer',
'active_jobs' => 'integer',
'period_date' => 'date',
];
}
public function allowance(): BelongsTo
{
return $this->belongsTo(AgentAllowance::class, 'agent_id', 'agent_id');
}
}

View file

@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
class UsageReport extends Model
{
protected $fillable = [
'agent_id',
'job_id',
'model',
'tokens_in',
'tokens_out',
'event',
'reported_at',
];
protected function casts(): array
{
return [
'tokens_in' => 'integer',
'tokens_out' => 'integer',
'reported_at' => 'datetime',
];
}
}

View file

@ -0,0 +1,183 @@
<?php
declare(strict_types=1);
namespace App\Services;
use App\Models\AgentAllowance;
use App\Models\ModelQuota;
use App\Models\QuotaUsage;
use App\Models\UsageReport;
class AllowanceService
{
/**
* Pre-dispatch check: verify agent has remaining allowance.
*
* @return array{allowed: bool, status: string, remaining_tokens: int, remaining_jobs: int, reason: ?string}
*/
public function check(string $agentId, string $model = ''): array
{
$allowance = AgentAllowance::where('agent_id', $agentId)->first();
if (! $allowance) {
return [
'allowed' => false,
'status' => 'exceeded',
'remaining_tokens' => 0,
'remaining_jobs' => 0,
'reason' => 'no allowance configured for agent',
];
}
$usage = QuotaUsage::firstOrCreate(
['agent_id' => $agentId, 'period_date' => now()->toDateString()],
['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0],
);
$result = [
'allowed' => true,
'status' => 'ok',
'remaining_tokens' => -1,
'remaining_jobs' => -1,
'reason' => null,
];
// Check model allowlist
if ($model !== '' && ! empty($allowance->model_allowlist)) {
if (! in_array($model, $allowance->model_allowlist, true)) {
return array_merge($result, [
'allowed' => false,
'status' => 'exceeded',
'reason' => "model not in allowlist: {$model}",
]);
}
}
// Check daily token limit
if ($allowance->daily_token_limit > 0) {
$remaining = $allowance->daily_token_limit - $usage->tokens_used;
$result['remaining_tokens'] = $remaining;
if ($remaining <= 0) {
return array_merge($result, [
'allowed' => false,
'status' => 'exceeded',
'reason' => 'daily token limit exceeded',
]);
}
$ratio = $usage->tokens_used / $allowance->daily_token_limit;
if ($ratio >= 0.8) {
$result['status'] = 'warning';
}
}
// Check daily job limit
if ($allowance->daily_job_limit > 0) {
$remaining = $allowance->daily_job_limit - $usage->jobs_started;
$result['remaining_jobs'] = $remaining;
if ($remaining <= 0) {
return array_merge($result, [
'allowed' => false,
'status' => 'exceeded',
'reason' => 'daily job limit exceeded',
]);
}
}
// Check concurrent jobs
if ($allowance->concurrent_jobs > 0 && $usage->active_jobs >= $allowance->concurrent_jobs) {
return array_merge($result, [
'allowed' => false,
'status' => 'exceeded',
'reason' => 'concurrent job limit reached',
]);
}
// Check global model quota
if ($model !== '') {
$modelQuota = ModelQuota::where('model', $model)->first();
if ($modelQuota && $modelQuota->daily_token_budget > 0) {
$modelUsage = UsageReport::where('model', $model)
->whereDate('reported_at', now()->toDateString())
->sum(\DB::raw('tokens_in + tokens_out'));
if ($modelUsage >= $modelQuota->daily_token_budget) {
return array_merge($result, [
'allowed' => false,
'status' => 'exceeded',
'reason' => "global model token budget exceeded for: {$model}",
]);
}
}
}
return $result;
}
/**
* Record usage from an agent runner report.
*/
public function recordUsage(array $report): void
{
$agentId = $report['agent_id'];
$totalTokens = ($report['tokens_in'] ?? 0) + ($report['tokens_out'] ?? 0);
$usage = QuotaUsage::firstOrCreate(
['agent_id' => $agentId, 'period_date' => now()->toDateString()],
['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0],
);
// Persist the raw report
UsageReport::create([
'agent_id' => $report['agent_id'],
'job_id' => $report['job_id'],
'model' => $report['model'] ?? null,
'tokens_in' => $report['tokens_in'] ?? 0,
'tokens_out' => $report['tokens_out'] ?? 0,
'event' => $report['event'],
'reported_at' => $report['timestamp'] ?? now(),
]);
match ($report['event']) {
'job_started' => $usage->increment('jobs_started') || $usage->increment('active_jobs'),
'job_completed' => $this->handleCompleted($usage, $totalTokens),
'job_failed' => $this->handleFailed($usage, $totalTokens),
'job_cancelled' => $this->handleCancelled($usage, $totalTokens),
default => null,
};
}
/**
* Reset daily usage counters for an agent.
*/
public function resetAgent(string $agentId): void
{
QuotaUsage::updateOrCreate(
['agent_id' => $agentId, 'period_date' => now()->toDateString()],
['tokens_used' => 0, 'jobs_started' => 0, 'active_jobs' => 0],
);
}
private function handleCompleted(QuotaUsage $usage, int $totalTokens): void
{
$usage->increment('tokens_used', $totalTokens);
$usage->decrement('active_jobs');
}
private function handleFailed(QuotaUsage $usage, int $totalTokens): void
{
$returnAmount = intdiv($totalTokens, 2);
$usage->increment('tokens_used', $totalTokens - $returnAmount);
$usage->decrement('active_jobs');
}
private function handleCancelled(QuotaUsage $usage, int $totalTokens): void
{
$usage->decrement('active_jobs');
// 100% returned — no token charge
}
}

View file

@ -9,6 +9,7 @@ use Illuminate\Foundation\Configuration\Middleware;
return Application::configure(basePath: dirname(__DIR__))
->withRouting(
web: __DIR__.'/../routes/web.php',
api: __DIR__.'/../routes/api.php',
commands: __DIR__.'/../routes/console.php',
)
->withMiddleware(function (Middleware $middleware) {

View file

@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('agent_allowances', function (Blueprint $table) {
$table->id();
$table->string('agent_id')->unique();
$table->bigInteger('daily_token_limit')->default(0);
$table->integer('daily_job_limit')->default(0);
$table->integer('concurrent_jobs')->default(1);
$table->integer('max_job_duration_minutes')->default(0);
$table->json('model_allowlist')->nullable();
$table->timestamps();
});
Schema::create('quota_usage', function (Blueprint $table) {
$table->id();
$table->string('agent_id')->index();
$table->bigInteger('tokens_used')->default(0);
$table->integer('jobs_started')->default(0);
$table->integer('active_jobs')->default(0);
$table->date('period_date')->index();
$table->timestamps();
$table->unique(['agent_id', 'period_date']);
});
Schema::create('model_quotas', function (Blueprint $table) {
$table->id();
$table->string('model')->unique();
$table->bigInteger('daily_token_budget')->default(0);
$table->integer('hourly_rate_limit')->default(0);
$table->bigInteger('cost_ceiling')->default(0);
$table->timestamps();
});
Schema::create('usage_reports', function (Blueprint $table) {
$table->id();
$table->string('agent_id')->index();
$table->string('job_id')->index();
$table->string('model')->nullable();
$table->bigInteger('tokens_in')->default(0);
$table->bigInteger('tokens_out')->default(0);
$table->string('event');
$table->timestamp('reported_at');
$table->timestamps();
});
Schema::create('repo_limits', function (Blueprint $table) {
$table->id();
$table->string('repo')->unique();
$table->integer('max_daily_prs')->default(0);
$table->integer('max_daily_issues')->default(0);
$table->integer('cooldown_after_failure_minutes')->default(0);
$table->timestamps();
});
}
public function down(): void
{
Schema::dropIfExists('repo_limits');
Schema::dropIfExists('usage_reports');
Schema::dropIfExists('model_quotas');
Schema::dropIfExists('quota_usage');
Schema::dropIfExists('agent_allowances');
}
};

View file

@ -0,0 +1,146 @@
<?php
declare(strict_types=1);
use App\Models\AgentAllowance;
use App\Models\ModelQuota;
use App\Models\RepoLimit;
use App\Services\AllowanceService;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Route;
/*
|--------------------------------------------------------------------------
| Allowance API Routes
|--------------------------------------------------------------------------
|
| Endpoints for managing agent quotas, checking allowances, and recording
| usage. Protected endpoints use QuotaMiddleware for enforcement.
|
*/
// Health check for quota service
Route::get('/allowances/health', fn () => response()->json(['status' => 'ok']));
// Agent allowance CRUD
Route::prefix('allowances/agents')->group(function () {
Route::get('/', function () {
return AgentAllowance::all();
});
Route::get('/{agentId}', function (string $agentId) {
$allowance = AgentAllowance::where('agent_id', $agentId)->first();
if (! $allowance) {
return response()->json(['error' => 'not found'], 404);
}
return $allowance;
});
Route::post('/', function (Request $request) {
$validated = $request->validate([
'agent_id' => 'required|string|unique:agent_allowances,agent_id',
'daily_token_limit' => 'integer|min:0',
'daily_job_limit' => 'integer|min:0',
'concurrent_jobs' => 'integer|min:0',
'max_job_duration_minutes' => 'integer|min:0',
'model_allowlist' => 'array',
'model_allowlist.*' => 'string',
]);
return AgentAllowance::create($validated);
});
Route::put('/{agentId}', function (Request $request, string $agentId) {
$allowance = AgentAllowance::where('agent_id', $agentId)->first();
if (! $allowance) {
return response()->json(['error' => 'not found'], 404);
}
$validated = $request->validate([
'daily_token_limit' => 'integer|min:0',
'daily_job_limit' => 'integer|min:0',
'concurrent_jobs' => 'integer|min:0',
'max_job_duration_minutes' => 'integer|min:0',
'model_allowlist' => 'array',
'model_allowlist.*' => 'string',
]);
$allowance->update($validated);
return $allowance;
});
Route::delete('/{agentId}', function (string $agentId) {
AgentAllowance::where('agent_id', $agentId)->delete();
return response()->json(['status' => 'deleted']);
});
});
// Quota check endpoint
Route::get('/allowances/check/{agentId}', function (Request $request, string $agentId, AllowanceService $svc) {
$model = $request->query('model', '');
return response()->json($svc->check($agentId, $model));
});
// Usage reporting endpoint
Route::post('/allowances/usage', function (Request $request, AllowanceService $svc) {
$validated = $request->validate([
'agent_id' => 'required|string',
'job_id' => 'required|string',
'model' => 'nullable|string',
'tokens_in' => 'integer|min:0',
'tokens_out' => 'integer|min:0',
'event' => 'required|in:job_started,job_completed,job_failed,job_cancelled',
'timestamp' => 'nullable|date',
]);
$svc->recordUsage($validated);
return response()->json(['status' => 'recorded']);
});
// Daily reset endpoint
Route::post('/allowances/reset/{agentId}', function (string $agentId, AllowanceService $svc) {
$svc->resetAgent($agentId);
return response()->json(['status' => 'reset']);
});
// Model quota management
Route::prefix('allowances/models')->group(function () {
Route::get('/', fn () => ModelQuota::all());
Route::post('/', function (Request $request) {
$validated = $request->validate([
'model' => 'required|string|unique:model_quotas,model',
'daily_token_budget' => 'integer|min:0',
'hourly_rate_limit' => 'integer|min:0',
'cost_ceiling' => 'integer|min:0',
]);
return ModelQuota::create($validated);
});
Route::put('/{model}', function (Request $request, string $model) {
$quota = ModelQuota::where('model', $model)->first();
if (! $quota) {
return response()->json(['error' => 'not found'], 404);
}
$validated = $request->validate([
'daily_token_budget' => 'integer|min:0',
'hourly_rate_limit' => 'integer|min:0',
'cost_ceiling' => 'integer|min:0',
]);
$quota->update($validated);
return $quota;
});
});

299
pkg/agentic/allowance.go Normal file
View file

@ -0,0 +1,299 @@
package agentic
import (
"sync"
"time"
)
// AllowanceStatus indicates the current state of an agent's quota.
type AllowanceStatus string
const (
// AllowanceOK indicates the agent has remaining quota.
AllowanceOK AllowanceStatus = "ok"
// AllowanceWarning indicates the agent is at 80%+ usage.
AllowanceWarning AllowanceStatus = "warning"
// AllowanceExceeded indicates the agent has exceeded its quota.
AllowanceExceeded AllowanceStatus = "exceeded"
)
// AgentAllowance defines the quota limits for a single agent.
type AgentAllowance struct {
// AgentID is the unique identifier for the agent.
AgentID string `json:"agent_id" yaml:"agent_id"`
// DailyTokenLimit is the maximum tokens (in+out) per 24h. 0 means unlimited.
DailyTokenLimit int64 `json:"daily_token_limit" yaml:"daily_token_limit"`
// DailyJobLimit is the maximum jobs per 24h. 0 means unlimited.
DailyJobLimit int `json:"daily_job_limit" yaml:"daily_job_limit"`
// ConcurrentJobs is the maximum simultaneous jobs. 0 means unlimited.
ConcurrentJobs int `json:"concurrent_jobs" yaml:"concurrent_jobs"`
// MaxJobDuration is the maximum job duration before kill. 0 means unlimited.
MaxJobDuration time.Duration `json:"max_job_duration" yaml:"max_job_duration"`
// ModelAllowlist restricts which models this agent can use. Empty means all.
ModelAllowlist []string `json:"model_allowlist,omitempty" yaml:"model_allowlist"`
}
// ModelQuota defines global per-model limits across all agents.
type ModelQuota struct {
// Model is the model identifier (e.g. "claude-sonnet-4-5-20250929").
Model string `json:"model" yaml:"model"`
// DailyTokenBudget is the total tokens across all agents per 24h.
DailyTokenBudget int64 `json:"daily_token_budget" yaml:"daily_token_budget"`
// HourlyRateLimit is the max requests per hour.
HourlyRateLimit int `json:"hourly_rate_limit" yaml:"hourly_rate_limit"`
// CostCeiling stops all usage if cumulative cost exceeds this (in cents).
CostCeiling int64 `json:"cost_ceiling" yaml:"cost_ceiling"`
}
// RepoLimit defines per-repository rate limits.
type RepoLimit struct {
// Repo is the repository identifier (e.g. "owner/repo").
Repo string `json:"repo" yaml:"repo"`
// MaxDailyPRs is the maximum PRs per day. 0 means unlimited.
MaxDailyPRs int `json:"max_daily_prs" yaml:"max_daily_prs"`
// MaxDailyIssues is the maximum issues per day. 0 means unlimited.
MaxDailyIssues int `json:"max_daily_issues" yaml:"max_daily_issues"`
// CooldownAfterFailure is the wait time after a failure before retrying.
CooldownAfterFailure time.Duration `json:"cooldown_after_failure" yaml:"cooldown_after_failure"`
}
// UsageRecord tracks an agent's current usage within a quota period.
type UsageRecord struct {
// AgentID is the agent this record belongs to.
AgentID string `json:"agent_id"`
// TokensUsed is the total tokens consumed in the current period.
TokensUsed int64 `json:"tokens_used"`
// JobsStarted is the total jobs started in the current period.
JobsStarted int `json:"jobs_started"`
// ActiveJobs is the number of currently running jobs.
ActiveJobs int `json:"active_jobs"`
// PeriodStart is when the current quota period began.
PeriodStart time.Time `json:"period_start"`
}
// QuotaCheckResult is the outcome of a pre-dispatch allowance check.
type QuotaCheckResult struct {
// Allowed indicates whether the agent may proceed.
Allowed bool `json:"allowed"`
// Status is the current allowance state.
Status AllowanceStatus `json:"status"`
// Remaining is the number of tokens remaining in the period.
RemainingTokens int64 `json:"remaining_tokens"`
// RemainingJobs is the number of jobs remaining in the period.
RemainingJobs int `json:"remaining_jobs"`
// Reason explains why the check failed (if !Allowed).
Reason string `json:"reason,omitempty"`
}
// QuotaEvent represents a change in quota usage, used for recovery.
type QuotaEvent string
const (
// QuotaEventJobStarted deducts quota when a job begins.
QuotaEventJobStarted QuotaEvent = "job_started"
// QuotaEventJobCompleted deducts nothing (already counted).
QuotaEventJobCompleted QuotaEvent = "job_completed"
// QuotaEventJobFailed returns 50% of token quota.
QuotaEventJobFailed QuotaEvent = "job_failed"
// QuotaEventJobCancelled returns 100% of token quota.
QuotaEventJobCancelled QuotaEvent = "job_cancelled"
)
// UsageReport is emitted by the agent runner to report token consumption.
type UsageReport struct {
// AgentID is the agent that consumed tokens.
AgentID string `json:"agent_id"`
// JobID identifies the specific job.
JobID string `json:"job_id"`
// Model is the model used.
Model string `json:"model"`
// TokensIn is the number of input tokens consumed.
TokensIn int64 `json:"tokens_in"`
// TokensOut is the number of output tokens consumed.
TokensOut int64 `json:"tokens_out"`
// Event is the type of quota event.
Event QuotaEvent `json:"event"`
// Timestamp is when the usage occurred.
Timestamp time.Time `json:"timestamp"`
}
// AllowanceStore is the interface for persisting and querying allowance data.
// Implementations may use Redis, SQLite, or any backing store.
type AllowanceStore interface {
// GetAllowance returns the quota limits for an agent.
GetAllowance(agentID string) (*AgentAllowance, error)
// SetAllowance persists quota limits for an agent.
SetAllowance(a *AgentAllowance) error
// GetUsage returns the current usage record for an agent.
GetUsage(agentID string) (*UsageRecord, error)
// IncrementUsage atomically adds to an agent's usage counters.
IncrementUsage(agentID string, tokens int64, jobs int) error
// DecrementActiveJobs reduces the active job count by 1.
DecrementActiveJobs(agentID string) error
// ReturnTokens adds tokens back to the agent's remaining quota.
ReturnTokens(agentID string, tokens int64) error
// ResetUsage clears usage counters for an agent (daily reset).
ResetUsage(agentID string) error
// GetModelQuota returns global limits for a model.
GetModelQuota(model string) (*ModelQuota, error)
// GetModelUsage returns current token usage for a model.
GetModelUsage(model string) (int64, error)
// IncrementModelUsage atomically adds to a model's usage counter.
IncrementModelUsage(model string, tokens int64) error
}
// MemoryStore is an in-memory AllowanceStore for testing and single-node use.
type MemoryStore struct {
mu sync.RWMutex
allowances map[string]*AgentAllowance
usage map[string]*UsageRecord
modelQuotas map[string]*ModelQuota
modelUsage map[string]int64
}
// NewMemoryStore creates a new in-memory allowance store.
func NewMemoryStore() *MemoryStore {
return &MemoryStore{
allowances: make(map[string]*AgentAllowance),
usage: make(map[string]*UsageRecord),
modelQuotas: make(map[string]*ModelQuota),
modelUsage: make(map[string]int64),
}
}
// GetAllowance returns the quota limits for an agent.
func (m *MemoryStore) GetAllowance(agentID string) (*AgentAllowance, error) {
m.mu.RLock()
defer m.mu.RUnlock()
a, ok := m.allowances[agentID]
if !ok {
return nil, &APIError{Code: 404, Message: "allowance not found for agent: " + agentID}
}
cp := *a
return &cp, nil
}
// SetAllowance persists quota limits for an agent.
func (m *MemoryStore) SetAllowance(a *AgentAllowance) error {
m.mu.Lock()
defer m.mu.Unlock()
cp := *a
m.allowances[a.AgentID] = &cp
return nil
}
// GetUsage returns the current usage record for an agent.
func (m *MemoryStore) GetUsage(agentID string) (*UsageRecord, error) {
m.mu.RLock()
defer m.mu.RUnlock()
u, ok := m.usage[agentID]
if !ok {
return &UsageRecord{
AgentID: agentID,
PeriodStart: startOfDay(time.Now().UTC()),
}, nil
}
cp := *u
return &cp, nil
}
// IncrementUsage atomically adds to an agent's usage counters.
func (m *MemoryStore) IncrementUsage(agentID string, tokens int64, jobs int) error {
m.mu.Lock()
defer m.mu.Unlock()
u, ok := m.usage[agentID]
if !ok {
u = &UsageRecord{
AgentID: agentID,
PeriodStart: startOfDay(time.Now().UTC()),
}
m.usage[agentID] = u
}
u.TokensUsed += tokens
u.JobsStarted += jobs
if jobs > 0 {
u.ActiveJobs += jobs
}
return nil
}
// DecrementActiveJobs reduces the active job count by 1.
func (m *MemoryStore) DecrementActiveJobs(agentID string) error {
m.mu.Lock()
defer m.mu.Unlock()
u, ok := m.usage[agentID]
if !ok {
return nil
}
if u.ActiveJobs > 0 {
u.ActiveJobs--
}
return nil
}
// ReturnTokens adds tokens back to the agent's remaining quota.
func (m *MemoryStore) ReturnTokens(agentID string, tokens int64) error {
m.mu.Lock()
defer m.mu.Unlock()
u, ok := m.usage[agentID]
if !ok {
return nil
}
u.TokensUsed -= tokens
if u.TokensUsed < 0 {
u.TokensUsed = 0
}
return nil
}
// ResetUsage clears usage counters for an agent.
func (m *MemoryStore) ResetUsage(agentID string) error {
m.mu.Lock()
defer m.mu.Unlock()
m.usage[agentID] = &UsageRecord{
AgentID: agentID,
PeriodStart: startOfDay(time.Now().UTC()),
}
return nil
}
// GetModelQuota returns global limits for a model.
func (m *MemoryStore) GetModelQuota(model string) (*ModelQuota, error) {
m.mu.RLock()
defer m.mu.RUnlock()
q, ok := m.modelQuotas[model]
if !ok {
return nil, &APIError{Code: 404, Message: "model quota not found: " + model}
}
cp := *q
return &cp, nil
}
// GetModelUsage returns current token usage for a model.
func (m *MemoryStore) GetModelUsage(model string) (int64, error) {
m.mu.RLock()
defer m.mu.RUnlock()
return m.modelUsage[model], nil
}
// IncrementModelUsage atomically adds to a model's usage counter.
func (m *MemoryStore) IncrementModelUsage(model string, tokens int64) error {
m.mu.Lock()
defer m.mu.Unlock()
m.modelUsage[model] += tokens
return nil
}
// SetModelQuota sets global limits for a model (used in testing).
func (m *MemoryStore) SetModelQuota(q *ModelQuota) {
m.mu.Lock()
defer m.mu.Unlock()
cp := *q
m.modelQuotas[q.Model] = &cp
}
// startOfDay returns midnight UTC for the given time.
func startOfDay(t time.Time) time.Time {
y, mo, d := t.Date()
return time.Date(y, mo, d, 0, 0, 0, 0, time.UTC)
}

View file

@ -0,0 +1,176 @@
package agentic
import (
"slices"
"github.com/host-uk/core/pkg/log"
)
// AllowanceService enforces agent quota limits. It provides pre-dispatch checks,
// runtime usage recording, and quota recovery for failed/cancelled jobs.
type AllowanceService struct {
store AllowanceStore
}
// NewAllowanceService creates a new AllowanceService with the given store.
func NewAllowanceService(store AllowanceStore) *AllowanceService {
return &AllowanceService{store: store}
}
// Check performs a pre-dispatch allowance check for the given agent and model.
// It verifies daily token limits, daily job limits, concurrent job limits, and
// model allowlists. Returns a QuotaCheckResult indicating whether the agent may proceed.
func (s *AllowanceService) Check(agentID, model string) (*QuotaCheckResult, error) {
const op = "AllowanceService.Check"
allowance, err := s.store.GetAllowance(agentID)
if err != nil {
return nil, log.E(op, "failed to get allowance", err)
}
usage, err := s.store.GetUsage(agentID)
if err != nil {
return nil, log.E(op, "failed to get usage", err)
}
result := &QuotaCheckResult{
Allowed: true,
Status: AllowanceOK,
RemainingTokens: -1, // unlimited
RemainingJobs: -1, // unlimited
}
// Check model allowlist
if len(allowance.ModelAllowlist) > 0 && model != "" {
if !slices.Contains(allowance.ModelAllowlist, model) {
result.Allowed = false
result.Status = AllowanceExceeded
result.Reason = "model not in allowlist: " + model
return result, nil
}
}
// Check daily token limit
if allowance.DailyTokenLimit > 0 {
remaining := allowance.DailyTokenLimit - usage.TokensUsed
result.RemainingTokens = remaining
if remaining <= 0 {
result.Allowed = false
result.Status = AllowanceExceeded
result.Reason = "daily token limit exceeded"
return result, nil
}
ratio := float64(usage.TokensUsed) / float64(allowance.DailyTokenLimit)
if ratio >= 0.8 {
result.Status = AllowanceWarning
}
}
// Check daily job limit
if allowance.DailyJobLimit > 0 {
remaining := allowance.DailyJobLimit - usage.JobsStarted
result.RemainingJobs = remaining
if remaining <= 0 {
result.Allowed = false
result.Status = AllowanceExceeded
result.Reason = "daily job limit exceeded"
return result, nil
}
}
// Check concurrent jobs
if allowance.ConcurrentJobs > 0 && usage.ActiveJobs >= allowance.ConcurrentJobs {
result.Allowed = false
result.Status = AllowanceExceeded
result.Reason = "concurrent job limit reached"
return result, nil
}
// Check global model quota
if model != "" {
modelQuota, err := s.store.GetModelQuota(model)
if err == nil && modelQuota.DailyTokenBudget > 0 {
modelUsage, err := s.store.GetModelUsage(model)
if err == nil && modelUsage >= modelQuota.DailyTokenBudget {
result.Allowed = false
result.Status = AllowanceExceeded
result.Reason = "global model token budget exceeded for: " + model
return result, nil
}
}
}
return result, nil
}
// RecordUsage processes a usage report, updating counters and handling quota recovery.
func (s *AllowanceService) RecordUsage(report UsageReport) error {
const op = "AllowanceService.RecordUsage"
totalTokens := report.TokensIn + report.TokensOut
switch report.Event {
case QuotaEventJobStarted:
if err := s.store.IncrementUsage(report.AgentID, 0, 1); err != nil {
return log.E(op, "failed to increment job count", err)
}
case QuotaEventJobCompleted:
if err := s.store.IncrementUsage(report.AgentID, totalTokens, 0); err != nil {
return log.E(op, "failed to record token usage", err)
}
if err := s.store.DecrementActiveJobs(report.AgentID); err != nil {
return log.E(op, "failed to decrement active jobs", err)
}
// Record model-level usage
if report.Model != "" {
if err := s.store.IncrementModelUsage(report.Model, totalTokens); err != nil {
return log.E(op, "failed to record model usage", err)
}
}
case QuotaEventJobFailed:
// Record partial usage, return 50% of tokens
if err := s.store.IncrementUsage(report.AgentID, totalTokens, 0); err != nil {
return log.E(op, "failed to record token usage", err)
}
if err := s.store.DecrementActiveJobs(report.AgentID); err != nil {
return log.E(op, "failed to decrement active jobs", err)
}
returnAmount := totalTokens / 2
if returnAmount > 0 {
if err := s.store.ReturnTokens(report.AgentID, returnAmount); err != nil {
return log.E(op, "failed to return tokens", err)
}
}
// Still record model-level usage (net of return)
if report.Model != "" {
if err := s.store.IncrementModelUsage(report.Model, totalTokens-returnAmount); err != nil {
return log.E(op, "failed to record model usage", err)
}
}
case QuotaEventJobCancelled:
// Return 100% of tokens
if err := s.store.DecrementActiveJobs(report.AgentID); err != nil {
return log.E(op, "failed to decrement active jobs", err)
}
if totalTokens > 0 {
if err := s.store.ReturnTokens(report.AgentID, totalTokens); err != nil {
return log.E(op, "failed to return tokens", err)
}
}
// No model-level usage for cancelled jobs
}
return nil
}
// ResetAgent clears daily usage counters for the given agent (midnight reset).
func (s *AllowanceService) ResetAgent(agentID string) error {
const op = "AllowanceService.ResetAgent"
if err := s.store.ResetUsage(agentID); err != nil {
return log.E(op, "failed to reset usage", err)
}
return nil
}

View file

@ -0,0 +1,407 @@
package agentic
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// --- MemoryStore tests ---
func TestMemoryStore_SetGetAllowance_Good(t *testing.T) {
store := NewMemoryStore()
a := &AgentAllowance{
AgentID: "agent-1",
DailyTokenLimit: 100000,
DailyJobLimit: 10,
ConcurrentJobs: 2,
MaxJobDuration: 30 * time.Minute,
ModelAllowlist: []string{"claude-sonnet-4-5-20250929"},
}
err := store.SetAllowance(a)
require.NoError(t, err)
got, err := store.GetAllowance("agent-1")
require.NoError(t, err)
assert.Equal(t, a.AgentID, got.AgentID)
assert.Equal(t, a.DailyTokenLimit, got.DailyTokenLimit)
assert.Equal(t, a.DailyJobLimit, got.DailyJobLimit)
assert.Equal(t, a.ConcurrentJobs, got.ConcurrentJobs)
assert.Equal(t, a.ModelAllowlist, got.ModelAllowlist)
}
func TestMemoryStore_GetAllowance_Bad_NotFound(t *testing.T) {
store := NewMemoryStore()
_, err := store.GetAllowance("nonexistent")
require.Error(t, err)
}
func TestMemoryStore_IncrementUsage_Good(t *testing.T) {
store := NewMemoryStore()
err := store.IncrementUsage("agent-1", 5000, 1)
require.NoError(t, err)
usage, err := store.GetUsage("agent-1")
require.NoError(t, err)
assert.Equal(t, int64(5000), usage.TokensUsed)
assert.Equal(t, 1, usage.JobsStarted)
assert.Equal(t, 1, usage.ActiveJobs)
}
func TestMemoryStore_DecrementActiveJobs_Good(t *testing.T) {
store := NewMemoryStore()
_ = store.IncrementUsage("agent-1", 0, 2)
_ = store.DecrementActiveJobs("agent-1")
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, 1, usage.ActiveJobs)
}
func TestMemoryStore_DecrementActiveJobs_Good_FloorAtZero(t *testing.T) {
store := NewMemoryStore()
_ = store.DecrementActiveJobs("agent-1") // no-op, no usage record
_ = store.IncrementUsage("agent-1", 0, 0)
_ = store.DecrementActiveJobs("agent-1") // should stay at 0
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, 0, usage.ActiveJobs)
}
func TestMemoryStore_ReturnTokens_Good(t *testing.T) {
store := NewMemoryStore()
_ = store.IncrementUsage("agent-1", 10000, 0)
err := store.ReturnTokens("agent-1", 5000)
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, int64(5000), usage.TokensUsed)
}
func TestMemoryStore_ReturnTokens_Good_FloorAtZero(t *testing.T) {
store := NewMemoryStore()
_ = store.IncrementUsage("agent-1", 1000, 0)
_ = store.ReturnTokens("agent-1", 5000) // more than used
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, int64(0), usage.TokensUsed)
}
func TestMemoryStore_ResetUsage_Good(t *testing.T) {
store := NewMemoryStore()
_ = store.IncrementUsage("agent-1", 50000, 5)
err := store.ResetUsage("agent-1")
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, int64(0), usage.TokensUsed)
assert.Equal(t, 0, usage.JobsStarted)
assert.Equal(t, 0, usage.ActiveJobs)
}
func TestMemoryStore_ModelUsage_Good(t *testing.T) {
store := NewMemoryStore()
_ = store.IncrementModelUsage("claude-sonnet", 10000)
_ = store.IncrementModelUsage("claude-sonnet", 5000)
usage, err := store.GetModelUsage("claude-sonnet")
require.NoError(t, err)
assert.Equal(t, int64(15000), usage)
}
// --- AllowanceService.Check tests ---
func TestAllowanceServiceCheck_Good(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
DailyTokenLimit: 100000,
DailyJobLimit: 10,
ConcurrentJobs: 2,
})
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.True(t, result.Allowed)
assert.Equal(t, AllowanceOK, result.Status)
assert.Equal(t, int64(100000), result.RemainingTokens)
assert.Equal(t, 10, result.RemainingJobs)
}
func TestAllowanceServiceCheck_Good_Warning(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
DailyTokenLimit: 100000,
})
_ = store.IncrementUsage("agent-1", 85000, 0)
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.True(t, result.Allowed)
assert.Equal(t, AllowanceWarning, result.Status)
assert.Equal(t, int64(15000), result.RemainingTokens)
}
func TestAllowanceServiceCheck_Bad_TokenLimitExceeded(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
DailyTokenLimit: 100000,
})
_ = store.IncrementUsage("agent-1", 100001, 0)
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.False(t, result.Allowed)
assert.Equal(t, AllowanceExceeded, result.Status)
assert.Contains(t, result.Reason, "daily token limit")
}
func TestAllowanceServiceCheck_Bad_JobLimitExceeded(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
DailyJobLimit: 5,
})
_ = store.IncrementUsage("agent-1", 0, 5)
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.False(t, result.Allowed)
assert.Contains(t, result.Reason, "daily job limit")
}
func TestAllowanceServiceCheck_Bad_ConcurrentLimitReached(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
ConcurrentJobs: 1,
})
_ = store.IncrementUsage("agent-1", 0, 1) // 1 active job
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.False(t, result.Allowed)
assert.Contains(t, result.Reason, "concurrent job limit")
}
func TestAllowanceServiceCheck_Bad_ModelNotInAllowlist(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
ModelAllowlist: []string{"claude-sonnet-4-5-20250929"},
})
result, err := svc.Check("agent-1", "claude-opus-4-6")
require.NoError(t, err)
assert.False(t, result.Allowed)
assert.Contains(t, result.Reason, "model not in allowlist")
}
func TestAllowanceServiceCheck_Good_ModelInAllowlist(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
ModelAllowlist: []string{"claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001"},
})
result, err := svc.Check("agent-1", "claude-sonnet-4-5-20250929")
require.NoError(t, err)
assert.True(t, result.Allowed)
}
func TestAllowanceServiceCheck_Good_EmptyModelSkipsCheck(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
ModelAllowlist: []string{"claude-sonnet-4-5-20250929"},
})
result, err := svc.Check("agent-1", "")
require.NoError(t, err)
assert.True(t, result.Allowed)
}
func TestAllowanceServiceCheck_Bad_GlobalModelBudgetExceeded(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.SetAllowance(&AgentAllowance{
AgentID: "agent-1",
})
store.SetModelQuota(&ModelQuota{
Model: "claude-opus-4-6",
DailyTokenBudget: 500000,
})
_ = store.IncrementModelUsage("claude-opus-4-6", 500001)
result, err := svc.Check("agent-1", "claude-opus-4-6")
require.NoError(t, err)
assert.False(t, result.Allowed)
assert.Contains(t, result.Reason, "global model token budget")
}
func TestAllowanceServiceCheck_Bad_NoAllowance(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_, err := svc.Check("unknown-agent", "")
require.Error(t, err)
}
// --- AllowanceService.RecordUsage tests ---
func TestAllowanceServiceRecordUsage_Good_JobStarted(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
err := svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
Event: QuotaEventJobStarted,
})
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, 1, usage.JobsStarted)
assert.Equal(t, 1, usage.ActiveJobs)
assert.Equal(t, int64(0), usage.TokensUsed)
}
func TestAllowanceServiceRecordUsage_Good_JobCompleted(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
// Start a job first
_ = svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
Event: QuotaEventJobStarted,
})
err := svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
Model: "claude-sonnet",
TokensIn: 1000,
TokensOut: 500,
Event: QuotaEventJobCompleted,
})
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, int64(1500), usage.TokensUsed)
assert.Equal(t, 0, usage.ActiveJobs)
modelUsage, _ := store.GetModelUsage("claude-sonnet")
assert.Equal(t, int64(1500), modelUsage)
}
func TestAllowanceServiceRecordUsage_Good_JobFailed_ReturnsHalf(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
Event: QuotaEventJobStarted,
})
err := svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
Model: "claude-sonnet",
TokensIn: 1000,
TokensOut: 1000,
Event: QuotaEventJobFailed,
})
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
// 2000 tokens used, 1000 returned (50%) = 1000 net
assert.Equal(t, int64(1000), usage.TokensUsed)
assert.Equal(t, 0, usage.ActiveJobs)
// Model sees net usage (2000 - 1000 = 1000)
modelUsage, _ := store.GetModelUsage("claude-sonnet")
assert.Equal(t, int64(1000), modelUsage)
}
func TestAllowanceServiceRecordUsage_Good_JobCancelled_ReturnsAll(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.IncrementUsage("agent-1", 5000, 1) // simulate pre-existing usage
err := svc.RecordUsage(UsageReport{
AgentID: "agent-1",
JobID: "job-1",
TokensIn: 500,
TokensOut: 500,
Event: QuotaEventJobCancelled,
})
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
// 5000 pre-existing - 1000 returned = 4000
assert.Equal(t, int64(4000), usage.TokensUsed)
assert.Equal(t, 0, usage.ActiveJobs)
}
// --- AllowanceService.ResetAgent tests ---
func TestAllowanceServiceResetAgent_Good(t *testing.T) {
store := NewMemoryStore()
svc := NewAllowanceService(store)
_ = store.IncrementUsage("agent-1", 50000, 5)
err := svc.ResetAgent("agent-1")
require.NoError(t, err)
usage, _ := store.GetUsage("agent-1")
assert.Equal(t, int64(0), usage.TokensUsed)
assert.Equal(t, 0, usage.JobsStarted)
}
// --- startOfDay helper test ---
func TestStartOfDay_Good(t *testing.T) {
input := time.Date(2026, 2, 10, 15, 30, 45, 0, time.UTC)
expected := time.Date(2026, 2, 10, 0, 0, 0, 0, time.UTC)
assert.Equal(t, expected, startOfDay(input))
}
// --- AllowanceStatus tests ---
func TestAllowanceStatus_Good_Values(t *testing.T) {
assert.Equal(t, AllowanceStatus("ok"), AllowanceOK)
assert.Equal(t, AllowanceStatus("warning"), AllowanceWarning)
assert.Equal(t, AllowanceStatus("exceeded"), AllowanceExceeded)
}