feat(agentci): rate limiting and native Go dispatch runner
Adds pkg/ratelimit for Gemini API rate limiting with sliding window
(RPM/TPM/RPD), persistent state, and token counting. Replaces the
bash agent-runner.sh with a native Go implementation under
`core ai dispatch {run,watch,status}` for local queue processing.
Rate limiting:
- Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window
- WaitForCapacity blocks until capacity available or context cancelled
- Persistent state in ~/.core/ratelimits.yaml
- Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite
- CountTokens helper calls Google tokenizer API
- CLI: core ai ratelimits {show,reset,count,config,check}
Dispatch runner:
- core ai dispatch run — process single ticket from queue
- core ai dispatch watch — daemon mode with configurable interval
- core ai dispatch status — show queue/active/done counts
- Supports claude/codex/gemini runners with rate-limited Gemini
- File-based locking with stale PID detection
- Completion handler updates issue labels on success/failure
Closes #42
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 02:59:17 +00:00
|
|
|
package ai
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"os"
|
|
|
|
|
"strconv"
|
|
|
|
|
"text/tabwriter"
|
|
|
|
|
"time"
|
|
|
|
|
|
2026-02-16 14:22:18 +00:00
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
|
|
|
|
"forge.lthn.ai/core/go/pkg/config"
|
|
|
|
|
"forge.lthn.ai/core/go/pkg/ratelimit"
|
feat(agentci): rate limiting and native Go dispatch runner
Adds pkg/ratelimit for Gemini API rate limiting with sliding window
(RPM/TPM/RPD), persistent state, and token counting. Replaces the
bash agent-runner.sh with a native Go implementation under
`core ai dispatch {run,watch,status}` for local queue processing.
Rate limiting:
- Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window
- WaitForCapacity blocks until capacity available or context cancelled
- Persistent state in ~/.core/ratelimits.yaml
- Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite
- CountTokens helper calls Google tokenizer API
- CLI: core ai ratelimits {show,reset,count,config,check}
Dispatch runner:
- core ai dispatch run — process single ticket from queue
- core ai dispatch watch — daemon mode with configurable interval
- core ai dispatch status — show queue/active/done counts
- Supports claude/codex/gemini runners with rate-limited Gemini
- File-based locking with stale PID detection
- Completion handler updates issue labels on success/failure
Closes #42
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 02:59:17 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// AddRateLimitCommands registers the 'ratelimits' subcommand group under 'ai'.
|
|
|
|
|
func AddRateLimitCommands(parent *cli.Command) {
|
|
|
|
|
rlCmd := &cli.Command{
|
|
|
|
|
Use: "ratelimits",
|
|
|
|
|
Short: "Manage Gemini API rate limits",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rlCmd.AddCommand(rlShowCmd())
|
|
|
|
|
rlCmd.AddCommand(rlResetCmd())
|
|
|
|
|
rlCmd.AddCommand(rlCountCmd())
|
|
|
|
|
rlCmd.AddCommand(rlConfigCmd())
|
|
|
|
|
rlCmd.AddCommand(rlCheckCmd())
|
|
|
|
|
|
|
|
|
|
parent.AddCommand(rlCmd)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rlShowCmd() *cli.Command {
|
|
|
|
|
return &cli.Command{
|
|
|
|
|
Use: "show",
|
|
|
|
|
Short: "Show current rate limit usage",
|
|
|
|
|
RunE: func(cmd *cli.Command, args []string) error {
|
|
|
|
|
rl, err := ratelimit.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := rl.Load(); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stats := rl.AllStats()
|
|
|
|
|
|
|
|
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
|
|
|
|
|
fmt.Fprintln(w, "MODEL\tRPM\tTPM\tRPD\tSTATUS")
|
|
|
|
|
|
|
|
|
|
for model, s := range stats {
|
|
|
|
|
rpmStr := fmt.Sprintf("%d/%s", s.RPM, formatLimit(s.MaxRPM))
|
|
|
|
|
tpmStr := fmt.Sprintf("%d/%s", s.TPM, formatLimit(s.MaxTPM))
|
|
|
|
|
rpdStr := fmt.Sprintf("%d/%s", s.RPD, formatLimit(s.MaxRPD))
|
|
|
|
|
|
|
|
|
|
status := "OK"
|
|
|
|
|
if (s.MaxRPM > 0 && s.RPM >= s.MaxRPM) ||
|
|
|
|
|
(s.MaxTPM > 0 && s.TPM >= s.MaxTPM) ||
|
|
|
|
|
(s.MaxRPD > 0 && s.RPD >= s.MaxRPD) {
|
|
|
|
|
status = "LIMITED"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", model, rpmStr, tpmStr, rpdStr, status)
|
|
|
|
|
}
|
|
|
|
|
w.Flush()
|
|
|
|
|
return nil
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rlResetCmd() *cli.Command {
|
|
|
|
|
return &cli.Command{
|
|
|
|
|
Use: "reset [model]",
|
|
|
|
|
Short: "Reset usage counters for a model (or all)",
|
|
|
|
|
RunE: func(cmd *cli.Command, args []string) error {
|
|
|
|
|
rl, err := ratelimit.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := rl.Load(); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
model := ""
|
|
|
|
|
if len(args) > 0 {
|
|
|
|
|
model = args[0]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rl.Reset(model)
|
|
|
|
|
if err := rl.Persist(); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if model == "" {
|
|
|
|
|
fmt.Println("Reset stats for all models.")
|
|
|
|
|
} else {
|
|
|
|
|
fmt.Printf("Reset stats for model %q.\n", model)
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rlCountCmd() *cli.Command {
|
|
|
|
|
return &cli.Command{
|
|
|
|
|
Use: "count <model> <text>",
|
|
|
|
|
Short: "Count tokens for text using Gemini API",
|
|
|
|
|
Args: cli.ExactArgs(2),
|
|
|
|
|
RunE: func(cmd *cli.Command, args []string) error {
|
|
|
|
|
model := args[0]
|
|
|
|
|
text := args[1]
|
|
|
|
|
|
|
|
|
|
cfg, err := config.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var apiKey string
|
|
|
|
|
if err := cfg.Get("agentci.gemini_api_key", &apiKey); err != nil || apiKey == "" {
|
|
|
|
|
apiKey = os.Getenv("GEMINI_API_KEY")
|
|
|
|
|
}
|
|
|
|
|
if apiKey == "" {
|
|
|
|
|
return fmt.Errorf("GEMINI_API_KEY not found in config or env")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
count, err := ratelimit.CountTokens(apiKey, model, text)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("Model: %s\nTokens: %d\n", model, count)
|
|
|
|
|
return nil
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rlConfigCmd() *cli.Command {
|
|
|
|
|
return &cli.Command{
|
|
|
|
|
Use: "config",
|
|
|
|
|
Short: "Show configured quotas",
|
|
|
|
|
RunE: func(cmd *cli.Command, args []string) error {
|
|
|
|
|
rl, err := ratelimit.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
|
|
|
|
|
fmt.Fprintln(w, "MODEL\tMAX RPM\tMAX TPM\tMAX RPD")
|
|
|
|
|
|
|
|
|
|
for model, q := range rl.Quotas {
|
|
|
|
|
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n",
|
|
|
|
|
model,
|
|
|
|
|
formatLimit(q.MaxRPM),
|
|
|
|
|
formatLimit(q.MaxTPM),
|
|
|
|
|
formatLimit(q.MaxRPD))
|
|
|
|
|
}
|
|
|
|
|
w.Flush()
|
|
|
|
|
return nil
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rlCheckCmd() *cli.Command {
|
|
|
|
|
return &cli.Command{
|
|
|
|
|
Use: "check <model> <estimated-tokens>",
|
|
|
|
|
Short: "Check rate limit capacity for a model",
|
|
|
|
|
Args: cli.ExactArgs(2),
|
|
|
|
|
RunE: func(cmd *cli.Command, args []string) error {
|
|
|
|
|
model := args[0]
|
|
|
|
|
tokens, err := strconv.Atoi(args[1])
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("invalid token count: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rl, err := ratelimit.New()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := rl.Load(); err != nil {
|
|
|
|
|
fmt.Printf("Warning: could not load existing state: %v\n", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stats := rl.Stats(model)
|
|
|
|
|
canSend := rl.CanSend(model, tokens)
|
|
|
|
|
|
|
|
|
|
status := "RATE LIMITED"
|
|
|
|
|
if canSend {
|
|
|
|
|
status = "OK"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fmt.Printf("Model: %s\n", model)
|
|
|
|
|
fmt.Printf("Request Cost: %d tokens\n", tokens)
|
|
|
|
|
fmt.Printf("Status: %s\n", status)
|
|
|
|
|
fmt.Printf("\nCurrent Usage (1m window):\n")
|
|
|
|
|
fmt.Printf(" RPM: %d / %s\n", stats.RPM, formatLimit(stats.MaxRPM))
|
|
|
|
|
fmt.Printf(" TPM: %d / %s\n", stats.TPM, formatLimit(stats.MaxTPM))
|
|
|
|
|
fmt.Printf(" RPD: %d / %s (reset: %s)\n", stats.RPD, formatLimit(stats.MaxRPD), stats.DayStart.Format(time.RFC3339))
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func formatLimit(limit int) string {
|
|
|
|
|
if limit == 0 {
|
|
|
|
|
return "∞"
|
|
|
|
|
}
|
|
|
|
|
if limit >= 1000000 {
|
|
|
|
|
return fmt.Sprintf("%dM", limit/1000000)
|
|
|
|
|
}
|
|
|
|
|
if limit >= 1000 {
|
|
|
|
|
return fmt.Sprintf("%dK", limit/1000)
|
|
|
|
|
}
|
|
|
|
|
return fmt.Sprintf("%d", limit)
|
|
|
|
|
}
|