feat(agentci): rate limiting and native Go dispatch runner
Adds pkg/ratelimit for Gemini API rate limiting with sliding window
(RPM/TPM/RPD), persistent state, and token counting. Replaces the
bash agent-runner.sh with a native Go implementation under
`core ai dispatch {run,watch,status}` for local queue processing.
Rate limiting:
- Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window
- WaitForCapacity blocks until capacity available or context cancelled
- Persistent state in ~/.core/ratelimits.yaml
- Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite
- CountTokens helper calls Google tokenizer API
- CLI: core ai ratelimits {show,reset,count,config,check}
Dispatch runner:
- core ai dispatch run — process single ticket from queue
- core ai dispatch watch — daemon mode with configurable interval
- core ai dispatch status — show queue/active/done counts
- Supports claude/codex/gemini runners with rate-limited Gemini
- File-based locking with stale PID detection
- Completion handler updates issue labels on success/failure
Closes #42
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 02:59:17 +00:00
|
|
|
package ai
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
|
2026-02-16 14:24:37 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/log"
|
|
|
|
|
"forge.lthn.ai/core/go/pkg/ratelimit"
|
feat(agentci): rate limiting and native Go dispatch runner
Adds pkg/ratelimit for Gemini API rate limiting with sliding window
(RPM/TPM/RPD), persistent state, and token counting. Replaces the
bash agent-runner.sh with a native Go implementation under
`core ai dispatch {run,watch,status}` for local queue processing.
Rate limiting:
- Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window
- WaitForCapacity blocks until capacity available or context cancelled
- Persistent state in ~/.core/ratelimits.yaml
- Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite
- CountTokens helper calls Google tokenizer API
- CLI: core ai ratelimits {show,reset,count,config,check}
Dispatch runner:
- core ai dispatch run — process single ticket from queue
- core ai dispatch watch — daemon mode with configurable interval
- core ai dispatch status — show queue/active/done counts
- Supports claude/codex/gemini runners with rate-limited Gemini
- File-based locking with stale PID detection
- Completion handler updates issue labels on success/failure
Closes #42
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 02:59:17 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// executeWithRateLimit wraps an agent execution with rate limiting logic.
|
|
|
|
|
// It estimates token usage, waits for capacity, executes the runner, and records usage.
|
|
|
|
|
func executeWithRateLimit(ctx context.Context, model, prompt string, runner func() (bool, int, error)) (bool, int, error) {
|
|
|
|
|
rl, err := ratelimit.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Warn("Failed to initialize rate limiter, proceeding without limits", "error", err)
|
|
|
|
|
return runner()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := rl.Load(); err != nil {
|
|
|
|
|
log.Warn("Failed to load rate limit state", "error", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Estimate tokens from prompt length (1 token ≈ 4 chars)
|
|
|
|
|
estTokens := len(prompt) / 4
|
|
|
|
|
if estTokens == 0 {
|
|
|
|
|
estTokens = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.Info("Checking rate limits", "model", model, "est_tokens", estTokens)
|
|
|
|
|
|
|
|
|
|
if err := rl.WaitForCapacity(ctx, model, estTokens); err != nil {
|
|
|
|
|
return false, -1, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
success, exitCode, runErr := runner()
|
|
|
|
|
|
|
|
|
|
// Record usage with conservative output estimate (actual tokens unknown from shell runner).
|
|
|
|
|
outputEst := estTokens / 10
|
|
|
|
|
if outputEst < 50 {
|
|
|
|
|
outputEst = 50
|
|
|
|
|
}
|
|
|
|
|
rl.RecordUsage(model, estTokens, outputEst)
|
|
|
|
|
|
|
|
|
|
if err := rl.Persist(); err != nil {
|
|
|
|
|
log.Warn("Failed to persist rate limit state", "error", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return success, exitCode, runErr
|
|
|
|
|
}
|