go/pkg/agentic/allowance.go

package agentic

import (
	"sync"
	"time"
)

// AllowanceStatus indicates the current state of an agent's quota.
type AllowanceStatus string

const (
	// AllowanceOK indicates the agent has remaining quota.
	AllowanceOK AllowanceStatus = "ok"
	// AllowanceWarning indicates the agent is at 80%+ usage.
	AllowanceWarning AllowanceStatus = "warning"
	// AllowanceExceeded indicates the agent has exceeded its quota.
	AllowanceExceeded AllowanceStatus = "exceeded"
)

// AgentAllowance defines the quota limits for a single agent.
type AgentAllowance struct {
	// AgentID is the unique identifier for the agent.
	AgentID string `json:"agent_id" yaml:"agent_id"`
	// DailyTokenLimit is the maximum tokens (in+out) per 24h. 0 means unlimited.
	DailyTokenLimit int64 `json:"daily_token_limit" yaml:"daily_token_limit"`
	// DailyJobLimit is the maximum jobs per 24h. 0 means unlimited.
	DailyJobLimit int `json:"daily_job_limit" yaml:"daily_job_limit"`
	// ConcurrentJobs is the maximum simultaneous jobs. 0 means unlimited.
	ConcurrentJobs int `json:"concurrent_jobs" yaml:"concurrent_jobs"`
	// MaxJobDuration is the maximum job duration before kill. 0 means unlimited.
	MaxJobDuration time.Duration `json:"max_job_duration" yaml:"max_job_duration"`
	// ModelAllowlist restricts which models this agent can use. Empty means all.
	ModelAllowlist []string `json:"model_allowlist,omitempty" yaml:"model_allowlist"`
}

// ModelQuota defines global per-model limits across all agents.
type ModelQuota struct {
	// Model is the model identifier (e.g. "claude-sonnet-4-5-20250929").
	Model string `json:"model" yaml:"model"`
	// DailyTokenBudget is the total tokens across all agents per 24h.
	DailyTokenBudget int64 `json:"daily_token_budget" yaml:"daily_token_budget"`
	// HourlyRateLimit is the max requests per hour.
	HourlyRateLimit int `json:"hourly_rate_limit" yaml:"hourly_rate_limit"`
	// CostCeiling stops all usage if cumulative cost exceeds this (in cents).
	CostCeiling int64 `json:"cost_ceiling" yaml:"cost_ceiling"`
}

// RepoLimit defines per-repository rate limits.
type RepoLimit struct {
	// Repo is the repository identifier (e.g. "owner/repo").
	Repo string `json:"repo" yaml:"repo"`
	// MaxDailyPRs is the maximum PRs per day. 0 means unlimited.
	MaxDailyPRs int `json:"max_daily_prs" yaml:"max_daily_prs"`
	// MaxDailyIssues is the maximum issues per day. 0 means unlimited.
	MaxDailyIssues int `json:"max_daily_issues" yaml:"max_daily_issues"`
	// CooldownAfterFailure is the wait time after a failure before retrying.
	CooldownAfterFailure time.Duration `json:"cooldown_after_failure" yaml:"cooldown_after_failure"`
}

// UsageRecord tracks an agent's current usage within a quota period.
type UsageRecord struct {
	// AgentID is the agent this record belongs to.
	AgentID string `json:"agent_id"`
	// TokensUsed is the total tokens consumed in the current period.
	TokensUsed int64 `json:"tokens_used"`
	// JobsStarted is the total jobs started in the current period.
	JobsStarted int `json:"jobs_started"`
	// ActiveJobs is the number of currently running jobs.
	ActiveJobs int `json:"active_jobs"`
	// PeriodStart is when the current quota period began.
	PeriodStart time.Time `json:"period_start"`
}

// QuotaCheckResult is the outcome of a pre-dispatch allowance check.
type QuotaCheckResult struct {
	// Allowed indicates whether the agent may proceed.
	Allowed bool `json:"allowed"`
	// Status is the current allowance state.
	Status AllowanceStatus `json:"status"`
	// Remaining is the number of tokens remaining in the period.
	RemainingTokens int64 `json:"remaining_tokens"`
	// RemainingJobs is the number of jobs remaining in the period.
	RemainingJobs int `json:"remaining_jobs"`
	// Reason explains why the check failed (if !Allowed).
	Reason string `json:"reason,omitempty"`
}

// QuotaEvent represents a change in quota usage, used for recovery.
type QuotaEvent string

const (
	// QuotaEventJobStarted deducts quota when a job begins.
	QuotaEventJobStarted QuotaEvent = "job_started"
	// QuotaEventJobCompleted deducts nothing (already counted).
	QuotaEventJobCompleted QuotaEvent = "job_completed"
	// QuotaEventJobFailed returns 50% of token quota.
	QuotaEventJobFailed QuotaEvent = "job_failed"
	// QuotaEventJobCancelled returns 100% of token quota.
	QuotaEventJobCancelled QuotaEvent = "job_cancelled"
)

// UsageReport is emitted by the agent runner to report token consumption.
type UsageReport struct {
	// AgentID is the agent that consumed tokens.
	AgentID string `json:"agent_id"`
	// JobID identifies the specific job.
	JobID string `json:"job_id"`
	// Model is the model used.
	Model string `json:"model"`
	// TokensIn is the number of input tokens consumed.
	TokensIn int64 `json:"tokens_in"`
	// TokensOut is the number of output tokens consumed.
	TokensOut int64 `json:"tokens_out"`
	// Event is the type of quota event.
	Event QuotaEvent `json:"event"`
	// Timestamp is when the usage occurred.
	Timestamp time.Time `json:"timestamp"`
}

// AllowanceStore is the interface for persisting and querying allowance data.
// Implementations may use Redis, SQLite, or any backing store.
type AllowanceStore interface {
	// GetAllowance returns the quota limits for an agent.
	GetAllowance(agentID string) (*AgentAllowance, error)
	// SetAllowance persists quota limits for an agent.
	SetAllowance(a *AgentAllowance) error
	// GetUsage returns the current usage record for an agent.
	GetUsage(agentID string) (*UsageRecord, error)
	// IncrementUsage atomically adds to an agent's usage counters.
	IncrementUsage(agentID string, tokens int64, jobs int) error
	// DecrementActiveJobs reduces the active job count by 1.
	DecrementActiveJobs(agentID string) error
	// ReturnTokens adds tokens back to the agent's remaining quota.
	ReturnTokens(agentID string, tokens int64) error
	// ResetUsage clears usage counters for an agent (daily reset).
	ResetUsage(agentID string) error
	// GetModelQuota returns global limits for a model.
	GetModelQuota(model string) (*ModelQuota, error)
	// GetModelUsage returns current token usage for a model.
	GetModelUsage(model string) (int64, error)
	// IncrementModelUsage atomically adds to a model's usage counter.
	IncrementModelUsage(model string, tokens int64) error
}

// MemoryStore is an in-memory AllowanceStore for testing and single-node use.
type MemoryStore struct {
	mu          sync.RWMutex
	allowances  map[string]*AgentAllowance
	usage       map[string]*UsageRecord
	modelQuotas map[string]*ModelQuota
	modelUsage  map[string]int64
}

// NewMemoryStore creates a new in-memory allowance store.
func NewMemoryStore() *MemoryStore {
	return &MemoryStore{
		allowances:  make(map[string]*AgentAllowance),
		usage:       make(map[string]*UsageRecord),
		modelQuotas: make(map[string]*ModelQuota),
		modelUsage:  make(map[string]int64),
	}
}

// GetAllowance returns the quota limits for an agent.
func (m *MemoryStore) GetAllowance(agentID string) (*AgentAllowance, error) {
	m.mu.RLock()
	defer m.mu.RUnlock()
	a, ok := m.allowances[agentID]
	if !ok {
		return nil, &APIError{Code: 404, Message: "allowance not found for agent: " + agentID}
	}
	cp := *a
	return &cp, nil
}

// SetAllowance persists quota limits for an agent.
func (m *MemoryStore) SetAllowance(a *AgentAllowance) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	cp := *a
	m.allowances[a.AgentID] = &cp
	return nil
}

// GetUsage returns the current usage record for an agent.
func (m *MemoryStore) GetUsage(agentID string) (*UsageRecord, error) {
	m.mu.RLock()
	defer m.mu.RUnlock()
	u, ok := m.usage[agentID]
	if !ok {
		return &UsageRecord{
			AgentID:     agentID,
			PeriodStart: startOfDay(time.Now().UTC()),
		}, nil
	}
	cp := *u
	return &cp, nil
}

// IncrementUsage atomically adds to an agent's usage counters.
func (m *MemoryStore) IncrementUsage(agentID string, tokens int64, jobs int) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	u, ok := m.usage[agentID]
	if !ok {
		u = &UsageRecord{
			AgentID:     agentID,
			PeriodStart: startOfDay(time.Now().UTC()),
		}
		m.usage[agentID] = u
	}
	u.TokensUsed += tokens
	u.JobsStarted += jobs
	if jobs > 0 {
		u.ActiveJobs += jobs
	}
	return nil
}

// DecrementActiveJobs reduces the active job count by 1.
func (m *MemoryStore) DecrementActiveJobs(agentID string) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	u, ok := m.usage[agentID]
	if !ok {
		return nil
	}
	if u.ActiveJobs > 0 {
		u.ActiveJobs--
	}
	return nil
}

// ReturnTokens adds tokens back to the agent's remaining quota.
func (m *MemoryStore) ReturnTokens(agentID string, tokens int64) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	u, ok := m.usage[agentID]
	if !ok {
		return nil
	}
	u.TokensUsed -= tokens
	if u.TokensUsed < 0 {
		u.TokensUsed = 0
	}
	return nil
}

// ResetUsage clears usage counters for an agent.
func (m *MemoryStore) ResetUsage(agentID string) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	m.usage[agentID] = &UsageRecord{
		AgentID:     agentID,
		PeriodStart: startOfDay(time.Now().UTC()),
	}
	return nil
}

// GetModelQuota returns global limits for a model.
func (m *MemoryStore) GetModelQuota(model string) (*ModelQuota, error) {
	m.mu.RLock()
	defer m.mu.RUnlock()
	q, ok := m.modelQuotas[model]
	if !ok {
		return nil, &APIError{Code: 404, Message: "model quota not found: " + model}
	}
	cp := *q
	return &cp, nil
}

// GetModelUsage returns current token usage for a model.
func (m *MemoryStore) GetModelUsage(model string) (int64, error) {
	m.mu.RLock()
	defer m.mu.RUnlock()
	return m.modelUsage[model], nil
}

// IncrementModelUsage atomically adds to a model's usage counter.
func (m *MemoryStore) IncrementModelUsage(model string, tokens int64) error {
	m.mu.Lock()
	defer m.mu.Unlock()
	m.modelUsage[model] += tokens
	return nil
}

// SetModelQuota sets global limits for a model (used in testing).
func (m *MemoryStore) SetModelQuota(q *ModelQuota) {
	m.mu.Lock()
	defer m.mu.Unlock()
	cp := *q
	m.modelQuotas[q.Model] = &cp
}

// startOfDay returns midnight UTC for the given time.
func startOfDay(t time.Time) time.Time {
	y, mo, d := t.Date()
	return time.Date(y, mo, d, 0, 0, 0, 0, time.UTC)
}