From 2c8b8d4d8926997b5199c1b6adf1294f7a59fd1e Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 19 Feb 2026 16:54:29 +0000 Subject: [PATCH] Add "Home" --- Home.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 Home.md diff --git a/Home.md b/Home.md new file mode 100644 index 0000000..6eb00ea --- /dev/null +++ b/Home.md @@ -0,0 +1,96 @@ +# go-ratelimit + +`forge.lthn.ai/core/go-ratelimit` -- Token counting and rate limiting for LLM API calls. + +Manages per-model rate limits with sliding-window tracking for requests per minute (RPM), tokens per minute (TPM), and requests per day (RPD). Includes Google Generative AI token counting and YAML-based state persistence. + +## Installation + +```bash +go get forge.lthn.ai/core/go-ratelimit@latest +``` + +**Dependencies**: `gopkg.in/yaml.v3` + +## Core Types + +```go +// ModelQuota defines the rate limits for a specific model. +type ModelQuota struct { + MaxRPM int // Requests per minute (0 = unlimited) + MaxTPM int // Tokens per minute (0 = unlimited) + MaxRPD int // Requests per day (0 = unlimited) +} + +// RateLimiter manages rate limits across multiple models. +type RateLimiter struct { + Quotas map[string]ModelQuota // Model name -> quota + State map[string]*UsageStats // Model name -> current usage +} +``` + +## Quick Start + +```go +package main + +import ( + "context" + "fmt" + "log" + + "forge.lthn.ai/core/go-ratelimit" +) + +func main() { + rl, err := ratelimit.New() + if err != nil { + log.Fatal(err) + } + _ = rl.Load() // Load persisted state + + model := "gemini-2.5-pro" + + // Check before sending + if rl.CanSend(model, 5000) { + // ... make API call ... + rl.RecordUsage(model, 3000, 2000) + _ = rl.Persist() + } + + // Or block until capacity is available + ctx := context.Background() + if err := rl.WaitForCapacity(ctx, model, 5000); err != nil { + log.Fatal(err) + } + + // View current stats + stats := rl.Stats(model) + fmt.Printf("RPM: %d/%d TPM: %d/%d RPD: %d/%d\n", + stats.RPM, stats.MaxRPM, stats.TPM, stats.MaxTPM, stats.RPD, stats.MaxRPD) +} +``` + +## API Summary + +| Function / Method | Description | +|-------------------|-------------| +| `New()` | Create a limiter with default Gemini quotas | +| `Load()` | Read persisted state from `~/.core/ratelimits.yaml` | +| `Persist()` | Write current state to disk | +| `CanSend(model, tokens)` | Check if a request would violate limits | +| `RecordUsage(model, prompt, output)` | Record a completed API call | +| `WaitForCapacity(ctx, model, tokens)` | Block until capacity is available | +| `Stats(model)` | Get current usage snapshot for one model | +| `AllStats()` | Get usage snapshots for all tracked models | +| `Reset(model)` | Clear stats for one model (or all if empty) | +| `CountTokens(apiKey, model, text)` | Count tokens via Google Generative AI API | + +## Pages + +- [[Model-Quotas]] -- Token counting and per-model quota configuration +- [[Usage-Tracking]] -- Sliding window algorithm, persistence, and blocking waits + +## Licence + +EUPL-1.2