cli/internal/cmd/ai/ratelimit_dispatch.go

package ai

import (
	"context"

	"github.com/host-uk/core/pkg/log"
	"github.com/host-uk/core/pkg/ratelimit"
)

// executeWithRateLimit wraps an agent execution with rate limiting logic.
// It estimates token usage, waits for capacity, executes the runner, and records usage.
func executeWithRateLimit(ctx context.Context, model, prompt string, runner func() (bool, int, error)) (bool, int, error) {
	rl, err := ratelimit.New()
	if err != nil {
		log.Warn("Failed to initialize rate limiter, proceeding without limits", "error", err)
		return runner()
	}

	if err := rl.Load(); err != nil {
		log.Warn("Failed to load rate limit state", "error", err)
	}

	// Estimate tokens from prompt length (1 token ≈ 4 chars)
	estTokens := len(prompt) / 4
	if estTokens == 0 {
		estTokens = 1
	}

	log.Info("Checking rate limits", "model", model, "est_tokens", estTokens)

	if err := rl.WaitForCapacity(ctx, model, estTokens); err != nil {
		return false, -1, err
	}

	success, exitCode, runErr := runner()

	// Record usage with conservative output estimate (actual tokens unknown from shell runner).
	outputEst := estTokens / 10
	if outputEst < 50 {
		outputEst = 50
	}
	rl.RecordUsage(model, estTokens, outputEst)

	if err := rl.Persist(); err != nil {
		log.Warn("Failed to persist rate limit state", "error", err)
	}

	return success, exitCode, runErr
}
feat(agentci): rate limiting and native Go dispatch runner Adds pkg/ratelimit for Gemini API rate limiting with sliding window (RPM/TPM/RPD), persistent state, and token counting. Replaces the bash agent-runner.sh with a native Go implementation under `core ai dispatch {run,watch,status}` for local queue processing. Rate limiting: - Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window - WaitForCapacity blocks until capacity available or context cancelled - Persistent state in ~/.core/ratelimits.yaml - Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite - CountTokens helper calls Google tokenizer API - CLI: core ai ratelimits {show,reset,count,config,check} Dispatch runner: - core ai dispatch run — process single ticket from queue - core ai dispatch watch — daemon mode with configurable interval - core ai dispatch status — show queue/active/done counts - Supports claude/codex/gemini runners with rate-limited Gemini - File-based locking with stale PID detection - Completion handler updates issue labels on success/failure Closes #42 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-10 02:59:17 +00:00			`package ai`

			`import (`
			`"context"`

			`"github.com/host-uk/core/pkg/log"`
			`"github.com/host-uk/core/pkg/ratelimit"`
			`)`

			`// executeWithRateLimit wraps an agent execution with rate limiting logic.`
			`// It estimates token usage, waits for capacity, executes the runner, and records usage.`
			`func executeWithRateLimit(ctx context.Context, model, prompt string, runner func() (bool, int, error)) (bool, int, error) {`
			`rl, err := ratelimit.New()`
			`if err != nil {`
			`log.Warn("Failed to initialize rate limiter, proceeding without limits", "error", err)`
			`return runner()`
			`}`

			`if err := rl.Load(); err != nil {`
			`log.Warn("Failed to load rate limit state", "error", err)`
			`}`

			`// Estimate tokens from prompt length (1 token ≈ 4 chars)`
			`estTokens := len(prompt) / 4`
			`if estTokens == 0 {`
			`estTokens = 1`
			`}`

			`log.Info("Checking rate limits", "model", model, "est_tokens", estTokens)`

			`if err := rl.WaitForCapacity(ctx, model, estTokens); err != nil {`
			`return false, -1, err`
			`}`

			`success, exitCode, runErr := runner()`

			`// Record usage with conservative output estimate (actual tokens unknown from shell runner).`
			`outputEst := estTokens / 10`
			`if outputEst < 50 {`
			`outputEst = 50`
			`}`
			`rl.RecordUsage(model, estTokens, outputEst)`

			`if err := rl.Persist(); err != nil {`
			`log.Warn("Failed to persist rate limit state", "error", err)`
			`}`

			`return success, exitCode, runErr`
			`}`