package agentic import ( "sync" "time" ) // AllowanceStatus indicates the current state of an agent's quota. type AllowanceStatus string const ( // AllowanceOK indicates the agent has remaining quota. AllowanceOK AllowanceStatus = "ok" // AllowanceWarning indicates the agent is at 80%+ usage. AllowanceWarning AllowanceStatus = "warning" // AllowanceExceeded indicates the agent has exceeded its quota. AllowanceExceeded AllowanceStatus = "exceeded" ) // AgentAllowance defines the quota limits for a single agent. type AgentAllowance struct { // AgentID is the unique identifier for the agent. AgentID string `json:"agent_id" yaml:"agent_id"` // DailyTokenLimit is the maximum tokens (in+out) per 24h. 0 means unlimited. DailyTokenLimit int64 `json:"daily_token_limit" yaml:"daily_token_limit"` // DailyJobLimit is the maximum jobs per 24h. 0 means unlimited. DailyJobLimit int `json:"daily_job_limit" yaml:"daily_job_limit"` // ConcurrentJobs is the maximum simultaneous jobs. 0 means unlimited. ConcurrentJobs int `json:"concurrent_jobs" yaml:"concurrent_jobs"` // MaxJobDuration is the maximum job duration before kill. 0 means unlimited. MaxJobDuration time.Duration `json:"max_job_duration" yaml:"max_job_duration"` // ModelAllowlist restricts which models this agent can use. Empty means all. ModelAllowlist []string `json:"model_allowlist,omitempty" yaml:"model_allowlist"` } // ModelQuota defines global per-model limits across all agents. type ModelQuota struct { // Model is the model identifier (e.g. "claude-sonnet-4-5-20250929"). Model string `json:"model" yaml:"model"` // DailyTokenBudget is the total tokens across all agents per 24h. DailyTokenBudget int64 `json:"daily_token_budget" yaml:"daily_token_budget"` // HourlyRateLimit is the max requests per hour. HourlyRateLimit int `json:"hourly_rate_limit" yaml:"hourly_rate_limit"` // CostCeiling stops all usage if cumulative cost exceeds this (in cents). CostCeiling int64 `json:"cost_ceiling" yaml:"cost_ceiling"` } // RepoLimit defines per-repository rate limits. type RepoLimit struct { // Repo is the repository identifier (e.g. "owner/repo"). Repo string `json:"repo" yaml:"repo"` // MaxDailyPRs is the maximum PRs per day. 0 means unlimited. MaxDailyPRs int `json:"max_daily_prs" yaml:"max_daily_prs"` // MaxDailyIssues is the maximum issues per day. 0 means unlimited. MaxDailyIssues int `json:"max_daily_issues" yaml:"max_daily_issues"` // CooldownAfterFailure is the wait time after a failure before retrying. CooldownAfterFailure time.Duration `json:"cooldown_after_failure" yaml:"cooldown_after_failure"` } // UsageRecord tracks an agent's current usage within a quota period. type UsageRecord struct { // AgentID is the agent this record belongs to. AgentID string `json:"agent_id"` // TokensUsed is the total tokens consumed in the current period. TokensUsed int64 `json:"tokens_used"` // JobsStarted is the total jobs started in the current period. JobsStarted int `json:"jobs_started"` // ActiveJobs is the number of currently running jobs. ActiveJobs int `json:"active_jobs"` // PeriodStart is when the current quota period began. PeriodStart time.Time `json:"period_start"` } // QuotaCheckResult is the outcome of a pre-dispatch allowance check. type QuotaCheckResult struct { // Allowed indicates whether the agent may proceed. Allowed bool `json:"allowed"` // Status is the current allowance state. Status AllowanceStatus `json:"status"` // Remaining is the number of tokens remaining in the period. RemainingTokens int64 `json:"remaining_tokens"` // RemainingJobs is the number of jobs remaining in the period. RemainingJobs int `json:"remaining_jobs"` // Reason explains why the check failed (if !Allowed). Reason string `json:"reason,omitempty"` } // QuotaEvent represents a change in quota usage, used for recovery. type QuotaEvent string const ( // QuotaEventJobStarted deducts quota when a job begins. QuotaEventJobStarted QuotaEvent = "job_started" // QuotaEventJobCompleted deducts nothing (already counted). QuotaEventJobCompleted QuotaEvent = "job_completed" // QuotaEventJobFailed returns 50% of token quota. QuotaEventJobFailed QuotaEvent = "job_failed" // QuotaEventJobCancelled returns 100% of token quota. QuotaEventJobCancelled QuotaEvent = "job_cancelled" ) // UsageReport is emitted by the agent runner to report token consumption. type UsageReport struct { // AgentID is the agent that consumed tokens. AgentID string `json:"agent_id"` // JobID identifies the specific job. JobID string `json:"job_id"` // Model is the model used. Model string `json:"model"` // TokensIn is the number of input tokens consumed. TokensIn int64 `json:"tokens_in"` // TokensOut is the number of output tokens consumed. TokensOut int64 `json:"tokens_out"` // Event is the type of quota event. Event QuotaEvent `json:"event"` // Timestamp is when the usage occurred. Timestamp time.Time `json:"timestamp"` } // AllowanceStore is the interface for persisting and querying allowance data. // Implementations may use Redis, SQLite, or any backing store. type AllowanceStore interface { // GetAllowance returns the quota limits for an agent. GetAllowance(agentID string) (*AgentAllowance, error) // SetAllowance persists quota limits for an agent. SetAllowance(a *AgentAllowance) error // GetUsage returns the current usage record for an agent. GetUsage(agentID string) (*UsageRecord, error) // IncrementUsage atomically adds to an agent's usage counters. IncrementUsage(agentID string, tokens int64, jobs int) error // DecrementActiveJobs reduces the active job count by 1. DecrementActiveJobs(agentID string) error // ReturnTokens adds tokens back to the agent's remaining quota. ReturnTokens(agentID string, tokens int64) error // ResetUsage clears usage counters for an agent (daily reset). ResetUsage(agentID string) error // GetModelQuota returns global limits for a model. GetModelQuota(model string) (*ModelQuota, error) // GetModelUsage returns current token usage for a model. GetModelUsage(model string) (int64, error) // IncrementModelUsage atomically adds to a model's usage counter. IncrementModelUsage(model string, tokens int64) error } // MemoryStore is an in-memory AllowanceStore for testing and single-node use. type MemoryStore struct { mu sync.RWMutex allowances map[string]*AgentAllowance usage map[string]*UsageRecord modelQuotas map[string]*ModelQuota modelUsage map[string]int64 } // NewMemoryStore creates a new in-memory allowance store. func NewMemoryStore() *MemoryStore { return &MemoryStore{ allowances: make(map[string]*AgentAllowance), usage: make(map[string]*UsageRecord), modelQuotas: make(map[string]*ModelQuota), modelUsage: make(map[string]int64), } } // GetAllowance returns the quota limits for an agent. func (m *MemoryStore) GetAllowance(agentID string) (*AgentAllowance, error) { m.mu.RLock() defer m.mu.RUnlock() a, ok := m.allowances[agentID] if !ok { return nil, &APIError{Code: 404, Message: "allowance not found for agent: " + agentID} } cp := *a return &cp, nil } // SetAllowance persists quota limits for an agent. func (m *MemoryStore) SetAllowance(a *AgentAllowance) error { m.mu.Lock() defer m.mu.Unlock() cp := *a m.allowances[a.AgentID] = &cp return nil } // GetUsage returns the current usage record for an agent. func (m *MemoryStore) GetUsage(agentID string) (*UsageRecord, error) { m.mu.RLock() defer m.mu.RUnlock() u, ok := m.usage[agentID] if !ok { return &UsageRecord{ AgentID: agentID, PeriodStart: startOfDay(time.Now().UTC()), }, nil } cp := *u return &cp, nil } // IncrementUsage atomically adds to an agent's usage counters. func (m *MemoryStore) IncrementUsage(agentID string, tokens int64, jobs int) error { m.mu.Lock() defer m.mu.Unlock() u, ok := m.usage[agentID] if !ok { u = &UsageRecord{ AgentID: agentID, PeriodStart: startOfDay(time.Now().UTC()), } m.usage[agentID] = u } u.TokensUsed += tokens u.JobsStarted += jobs if jobs > 0 { u.ActiveJobs += jobs } return nil } // DecrementActiveJobs reduces the active job count by 1. func (m *MemoryStore) DecrementActiveJobs(agentID string) error { m.mu.Lock() defer m.mu.Unlock() u, ok := m.usage[agentID] if !ok { return nil } if u.ActiveJobs > 0 { u.ActiveJobs-- } return nil } // ReturnTokens adds tokens back to the agent's remaining quota. func (m *MemoryStore) ReturnTokens(agentID string, tokens int64) error { m.mu.Lock() defer m.mu.Unlock() u, ok := m.usage[agentID] if !ok { return nil } u.TokensUsed -= tokens if u.TokensUsed < 0 { u.TokensUsed = 0 } return nil } // ResetUsage clears usage counters for an agent. func (m *MemoryStore) ResetUsage(agentID string) error { m.mu.Lock() defer m.mu.Unlock() m.usage[agentID] = &UsageRecord{ AgentID: agentID, PeriodStart: startOfDay(time.Now().UTC()), } return nil } // GetModelQuota returns global limits for a model. func (m *MemoryStore) GetModelQuota(model string) (*ModelQuota, error) { m.mu.RLock() defer m.mu.RUnlock() q, ok := m.modelQuotas[model] if !ok { return nil, &APIError{Code: 404, Message: "model quota not found: " + model} } cp := *q return &cp, nil } // GetModelUsage returns current token usage for a model. func (m *MemoryStore) GetModelUsage(model string) (int64, error) { m.mu.RLock() defer m.mu.RUnlock() return m.modelUsage[model], nil } // IncrementModelUsage atomically adds to a model's usage counter. func (m *MemoryStore) IncrementModelUsage(model string, tokens int64) error { m.mu.Lock() defer m.mu.Unlock() m.modelUsage[model] += tokens return nil } // SetModelQuota sets global limits for a model (used in testing). func (m *MemoryStore) SetModelQuota(q *ModelQuota) { m.mu.Lock() defer m.mu.Unlock() cp := *q m.modelQuotas[q.Model] = &cp } // startOfDay returns midnight UTC for the given time. func startOfDay(t time.Time) time.Time { y, mo, d := t.Date() return time.Date(y, mo, d, 0, 0, 0, 0, time.UTC) }