cli/internal/cmd/ai/cmd_ratelimits.go

package ai

import (
	"fmt"
	"os"
	"strconv"
	"text/tabwriter"
	"time"

	"forge.lthn.ai/core/go/pkg/cli"
	"forge.lthn.ai/core/go/pkg/config"
	"forge.lthn.ai/core/go/pkg/ratelimit"
)

// AddRateLimitCommands registers the 'ratelimits' subcommand group under 'ai'.
func AddRateLimitCommands(parent *cli.Command) {
	rlCmd := &cli.Command{
		Use:   "ratelimits",
		Short: "Manage Gemini API rate limits",
	}

	rlCmd.AddCommand(rlShowCmd())
	rlCmd.AddCommand(rlResetCmd())
	rlCmd.AddCommand(rlCountCmd())
	rlCmd.AddCommand(rlConfigCmd())
	rlCmd.AddCommand(rlCheckCmd())

	parent.AddCommand(rlCmd)
}

func rlShowCmd() *cli.Command {
	return &cli.Command{
		Use:   "show",
		Short: "Show current rate limit usage",
		RunE: func(cmd *cli.Command, args []string) error {
			rl, err := ratelimit.New()
			if err != nil {
				return err
			}
			if err := rl.Load(); err != nil {
				return err
			}

			stats := rl.AllStats()

			w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
			fmt.Fprintln(w, "MODEL\tRPM\tTPM\tRPD\tSTATUS")

			for model, s := range stats {
				rpmStr := fmt.Sprintf("%d/%s", s.RPM, formatLimit(s.MaxRPM))
				tpmStr := fmt.Sprintf("%d/%s", s.TPM, formatLimit(s.MaxTPM))
				rpdStr := fmt.Sprintf("%d/%s", s.RPD, formatLimit(s.MaxRPD))

				status := "OK"
				if (s.MaxRPM > 0 && s.RPM >= s.MaxRPM) ||
					(s.MaxTPM > 0 && s.TPM >= s.MaxTPM) ||
					(s.MaxRPD > 0 && s.RPD >= s.MaxRPD) {
					status = "LIMITED"
				}

				fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", model, rpmStr, tpmStr, rpdStr, status)
			}
			w.Flush()
			return nil
		},
	}
}

func rlResetCmd() *cli.Command {
	return &cli.Command{
		Use:   "reset [model]",
		Short: "Reset usage counters for a model (or all)",
		RunE: func(cmd *cli.Command, args []string) error {
			rl, err := ratelimit.New()
			if err != nil {
				return err
			}
			if err := rl.Load(); err != nil {
				return err
			}

			model := ""
			if len(args) > 0 {
				model = args[0]
			}

			rl.Reset(model)
			if err := rl.Persist(); err != nil {
				return err
			}

			if model == "" {
				fmt.Println("Reset stats for all models.")
			} else {
				fmt.Printf("Reset stats for model %q.\n", model)
			}
			return nil
		},
	}
}

func rlCountCmd() *cli.Command {
	return &cli.Command{
		Use:   "count <model> <text>",
		Short: "Count tokens for text using Gemini API",
		Args:  cli.ExactArgs(2),
		RunE: func(cmd *cli.Command, args []string) error {
			model := args[0]
			text := args[1]

			cfg, err := config.New()
			if err != nil {
				return err
			}

			var apiKey string
			if err := cfg.Get("agentci.gemini_api_key", &apiKey); err != nil || apiKey == "" {
				apiKey = os.Getenv("GEMINI_API_KEY")
			}
			if apiKey == "" {
				return fmt.Errorf("GEMINI_API_KEY not found in config or env")
			}

			count, err := ratelimit.CountTokens(apiKey, model, text)
			if err != nil {
				return err
			}

			fmt.Printf("Model: %s\nTokens: %d\n", model, count)
			return nil
		},
	}
}

func rlConfigCmd() *cli.Command {
	return &cli.Command{
		Use:   "config",
		Short: "Show configured quotas",
		RunE: func(cmd *cli.Command, args []string) error {
			rl, err := ratelimit.New()
			if err != nil {
				return err
			}

			w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
			fmt.Fprintln(w, "MODEL\tMAX RPM\tMAX TPM\tMAX RPD")

			for model, q := range rl.Quotas {
				fmt.Fprintf(w, "%s\t%s\t%s\t%s\n",
					model,
					formatLimit(q.MaxRPM),
					formatLimit(q.MaxTPM),
					formatLimit(q.MaxRPD))
			}
			w.Flush()
			return nil
		},
	}
}

func rlCheckCmd() *cli.Command {
	return &cli.Command{
		Use:   "check <model> <estimated-tokens>",
		Short: "Check rate limit capacity for a model",
		Args:  cli.ExactArgs(2),
		RunE: func(cmd *cli.Command, args []string) error {
			model := args[0]
			tokens, err := strconv.Atoi(args[1])
			if err != nil {
				return fmt.Errorf("invalid token count: %w", err)
			}

			rl, err := ratelimit.New()
			if err != nil {
				return err
			}
			if err := rl.Load(); err != nil {
				fmt.Printf("Warning: could not load existing state: %v\n", err)
			}

			stats := rl.Stats(model)
			canSend := rl.CanSend(model, tokens)

			status := "RATE LIMITED"
			if canSend {
				status = "OK"
			}

			fmt.Printf("Model:        %s\n", model)
			fmt.Printf("Request Cost: %d tokens\n", tokens)
			fmt.Printf("Status:       %s\n", status)
			fmt.Printf("\nCurrent Usage (1m window):\n")
			fmt.Printf("  RPM: %d / %s\n", stats.RPM, formatLimit(stats.MaxRPM))
			fmt.Printf("  TPM: %d / %s\n", stats.TPM, formatLimit(stats.MaxTPM))
			fmt.Printf("  RPD: %d / %s (reset: %s)\n", stats.RPD, formatLimit(stats.MaxRPD), stats.DayStart.Format(time.RFC3339))

			return nil
		},
	}
}

func formatLimit(limit int) string {
	if limit == 0 {
		return "∞"
	}
	if limit >= 1000000 {
		return fmt.Sprintf("%dM", limit/1000000)
	}
	if limit >= 1000 {
		return fmt.Sprintf("%dK", limit/1000)
	}
	return fmt.Sprintf("%d", limit)
}
feat(agentci): rate limiting and native Go dispatch runner Adds pkg/ratelimit for Gemini API rate limiting with sliding window (RPM/TPM/RPD), persistent state, and token counting. Replaces the bash agent-runner.sh with a native Go implementation under `core ai dispatch {run,watch,status}` for local queue processing. Rate limiting: - Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window - WaitForCapacity blocks until capacity available or context cancelled - Persistent state in ~/.core/ratelimits.yaml - Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite - CountTokens helper calls Google tokenizer API - CLI: core ai ratelimits {show,reset,count,config,check} Dispatch runner: - core ai dispatch run — process single ticket from queue - core ai dispatch watch — daemon mode with configurable interval - core ai dispatch status — show queue/active/done counts - Supports claude/codex/gemini runners with rate-limited Gemini - File-based locking with stale PID detection - Completion handler updates issue labels on success/failure Closes #42 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-10 02:59:17 +00:00			`package ai`

			`import (`
			`"fmt"`
			`"os"`
			`"strconv"`
			`"text/tabwriter"`
			`"time"`

refactor: split CLI from monorepo, import core/go as library - Change module from forge.lthn.ai/core/go to forge.lthn.ai/core/cli - Remove pkg/ directory (now served from core/go) - Add require + replace for forge.lthn.ai/core/go => ../go - Update go.work to include ../go workspace module - Fix all internal/cmd/* imports: pkg/ refs → forge.lthn.ai/core/go/pkg/ - Rename internal/cmd/sdk package to sdkcmd (avoids conflict with pkg/sdk) - Remove SDK library files from internal/cmd/sdk/ (now in core/go/pkg/sdk/) - Remove duplicate RAG helper functions from internal/cmd/rag/ - Remove stale cmd/core-ide/ (now in core/ide repo) - Update IDE variant to remove core-ide import - Fix test assertion for new module name - Run go mod tidy to sync dependencies core/cli is now a pure CLI application importing core/go for packages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 14:22:18 +00:00			`"forge.lthn.ai/core/go/pkg/cli"`
			`"forge.lthn.ai/core/go/pkg/config"`
			`"forge.lthn.ai/core/go/pkg/ratelimit"`
feat(agentci): rate limiting and native Go dispatch runner Adds pkg/ratelimit for Gemini API rate limiting with sliding window (RPM/TPM/RPD), persistent state, and token counting. Replaces the bash agent-runner.sh with a native Go implementation under `core ai dispatch {run,watch,status}` for local queue processing. Rate limiting: - Per-model quotas (RPM, TPM, RPD) with 1-minute sliding window - WaitForCapacity blocks until capacity available or context cancelled - Persistent state in ~/.core/ratelimits.yaml - Default quotas for Gemini 3 Pro/Flash, 2.5 Pro, 2.0 Flash/Lite - CountTokens helper calls Google tokenizer API - CLI: core ai ratelimits {show,reset,count,config,check} Dispatch runner: - core ai dispatch run — process single ticket from queue - core ai dispatch watch — daemon mode with configurable interval - core ai dispatch status — show queue/active/done counts - Supports claude/codex/gemini runners with rate-limited Gemini - File-based locking with stale PID detection - Completion handler updates issue labels on success/failure Closes #42 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-10 02:59:17 +00:00			`)`

			`// AddRateLimitCommands registers the 'ratelimits' subcommand group under 'ai'.`
			`func AddRateLimitCommands(parent *cli.Command) {`
			`rlCmd := &cli.Command{`
			`Use: "ratelimits",`
			`Short: "Manage Gemini API rate limits",`
			`}`

			`rlCmd.AddCommand(rlShowCmd())`
			`rlCmd.AddCommand(rlResetCmd())`
			`rlCmd.AddCommand(rlCountCmd())`
			`rlCmd.AddCommand(rlConfigCmd())`
			`rlCmd.AddCommand(rlCheckCmd())`

			`parent.AddCommand(rlCmd)`
			`}`

			`func rlShowCmd() *cli.Command {`
			`return &cli.Command{`
			`Use: "show",`
			`Short: "Show current rate limit usage",`
			`RunE: func(cmd *cli.Command, args []string) error {`
			`rl, err := ratelimit.New()`
			`if err != nil {`
			`return err`
			`}`
			`if err := rl.Load(); err != nil {`
			`return err`
			`}`

			`stats := rl.AllStats()`

			`w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)`
			`fmt.Fprintln(w, "MODEL\tRPM\tTPM\tRPD\tSTATUS")`

			`for model, s := range stats {`
			`rpmStr := fmt.Sprintf("%d/%s", s.RPM, formatLimit(s.MaxRPM))`
			`tpmStr := fmt.Sprintf("%d/%s", s.TPM, formatLimit(s.MaxTPM))`
			`rpdStr := fmt.Sprintf("%d/%s", s.RPD, formatLimit(s.MaxRPD))`

			`status := "OK"`
			`if (s.MaxRPM > 0 && s.RPM >= s.MaxRPM) \|\|`
			`(s.MaxTPM > 0 && s.TPM >= s.MaxTPM) \|\|`
			`(s.MaxRPD > 0 && s.RPD >= s.MaxRPD) {`
			`status = "LIMITED"`
			`}`

			`fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n", model, rpmStr, tpmStr, rpdStr, status)`
			`}`
			`w.Flush()`
			`return nil`
			`},`
			`}`
			`}`

			`func rlResetCmd() *cli.Command {`
			`return &cli.Command{`
			`Use: "reset [model]",`
			`Short: "Reset usage counters for a model (or all)",`
			`RunE: func(cmd *cli.Command, args []string) error {`
			`rl, err := ratelimit.New()`
			`if err != nil {`
			`return err`
			`}`
			`if err := rl.Load(); err != nil {`
			`return err`
			`}`

			`model := ""`
			`if len(args) > 0 {`
			`model = args[0]`
			`}`

			`rl.Reset(model)`
			`if err := rl.Persist(); err != nil {`
			`return err`
			`}`

			`if model == "" {`
			`fmt.Println("Reset stats for all models.")`
			`} else {`
			`fmt.Printf("Reset stats for model %q.\n", model)`
			`}`
			`return nil`
			`},`
			`}`
			`}`

			`func rlCountCmd() *cli.Command {`
			`return &cli.Command{`
			`Use: "count <model> <text>",`
			`Short: "Count tokens for text using Gemini API",`
			`Args: cli.ExactArgs(2),`
			`RunE: func(cmd *cli.Command, args []string) error {`
			`model := args[0]`
			`text := args[1]`

			`cfg, err := config.New()`
			`if err != nil {`
			`return err`
			`}`

			`var apiKey string`
			`if err := cfg.Get("agentci.gemini_api_key", &apiKey); err != nil \|\| apiKey == "" {`
			`apiKey = os.Getenv("GEMINI_API_KEY")`
			`}`
			`if apiKey == "" {`
			`return fmt.Errorf("GEMINI_API_KEY not found in config or env")`
			`}`

			`count, err := ratelimit.CountTokens(apiKey, model, text)`
			`if err != nil {`
			`return err`
			`}`

			`fmt.Printf("Model: %s\nTokens: %d\n", model, count)`
			`return nil`
			`},`
			`}`
			`}`

			`func rlConfigCmd() *cli.Command {`
			`return &cli.Command{`
			`Use: "config",`
			`Short: "Show configured quotas",`
			`RunE: func(cmd *cli.Command, args []string) error {`
			`rl, err := ratelimit.New()`
			`if err != nil {`
			`return err`
			`}`

			`w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)`
			`fmt.Fprintln(w, "MODEL\tMAX RPM\tMAX TPM\tMAX RPD")`

			`for model, q := range rl.Quotas {`
			`fmt.Fprintf(w, "%s\t%s\t%s\t%s\n",`
			`model,`
			`formatLimit(q.MaxRPM),`
			`formatLimit(q.MaxTPM),`
			`formatLimit(q.MaxRPD))`
			`}`
			`w.Flush()`
			`return nil`
			`},`
			`}`
			`}`

			`func rlCheckCmd() *cli.Command {`
			`return &cli.Command{`
			`Use: "check <model> <estimated-tokens>",`
			`Short: "Check rate limit capacity for a model",`
			`Args: cli.ExactArgs(2),`
			`RunE: func(cmd *cli.Command, args []string) error {`
			`model := args[0]`
			`tokens, err := strconv.Atoi(args[1])`
			`if err != nil {`
			`return fmt.Errorf("invalid token count: %w", err)`
			`}`

			`rl, err := ratelimit.New()`
			`if err != nil {`
			`return err`
			`}`
			`if err := rl.Load(); err != nil {`
			`fmt.Printf("Warning: could not load existing state: %v\n", err)`
			`}`

			`stats := rl.Stats(model)`
			`canSend := rl.CanSend(model, tokens)`

			`status := "RATE LIMITED"`
			`if canSend {`
			`status = "OK"`
			`}`

			`fmt.Printf("Model: %s\n", model)`
			`fmt.Printf("Request Cost: %d tokens\n", tokens)`
			`fmt.Printf("Status: %s\n", status)`
			`fmt.Printf("\nCurrent Usage (1m window):\n")`
			`fmt.Printf(" RPM: %d / %s\n", stats.RPM, formatLimit(stats.MaxRPM))`
			`fmt.Printf(" TPM: %d / %s\n", stats.TPM, formatLimit(stats.MaxTPM))`
			`fmt.Printf(" RPD: %d / %s (reset: %s)\n", stats.RPD, formatLimit(stats.MaxRPD), stats.DayStart.Format(time.RFC3339))`

			`return nil`
			`},`
			`}`
			`}`

			`func formatLimit(limit int) string {`
			`if limit == 0 {`
			`return "∞"`
			`}`
			`if limit >= 1000000 {`
			`return fmt.Sprintf("%dM", limit/1000000)`
			`}`
			`if limit >= 1000 {`
			`return fmt.Sprintf("%dK", limit/1000)`
			`}`
			`return fmt.Sprintf("%d", limit)`
			`}`