Add "Home"
parent
5cd0c7c8e4
commit
2c8b8d4d89
1 changed files with 96 additions and 0 deletions
96
Home.md
Normal file
96
Home.md
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# go-ratelimit
|
||||
|
||||
`forge.lthn.ai/core/go-ratelimit` -- Token counting and rate limiting for LLM API calls.
|
||||
|
||||
Manages per-model rate limits with sliding-window tracking for requests per minute (RPM), tokens per minute (TPM), and requests per day (RPD). Includes Google Generative AI token counting and YAML-based state persistence.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
go get forge.lthn.ai/core/go-ratelimit@latest
|
||||
```
|
||||
|
||||
**Dependencies**: `gopkg.in/yaml.v3`
|
||||
|
||||
## Core Types
|
||||
|
||||
```go
|
||||
// ModelQuota defines the rate limits for a specific model.
|
||||
type ModelQuota struct {
|
||||
MaxRPM int // Requests per minute (0 = unlimited)
|
||||
MaxTPM int // Tokens per minute (0 = unlimited)
|
||||
MaxRPD int // Requests per day (0 = unlimited)
|
||||
}
|
||||
|
||||
// RateLimiter manages rate limits across multiple models.
|
||||
type RateLimiter struct {
|
||||
Quotas map[string]ModelQuota // Model name -> quota
|
||||
State map[string]*UsageStats // Model name -> current usage
|
||||
}
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"forge.lthn.ai/core/go-ratelimit"
|
||||
)
|
||||
|
||||
func main() {
|
||||
rl, err := ratelimit.New()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
_ = rl.Load() // Load persisted state
|
||||
|
||||
model := "gemini-2.5-pro"
|
||||
|
||||
// Check before sending
|
||||
if rl.CanSend(model, 5000) {
|
||||
// ... make API call ...
|
||||
rl.RecordUsage(model, 3000, 2000)
|
||||
_ = rl.Persist()
|
||||
}
|
||||
|
||||
// Or block until capacity is available
|
||||
ctx := context.Background()
|
||||
if err := rl.WaitForCapacity(ctx, model, 5000); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// View current stats
|
||||
stats := rl.Stats(model)
|
||||
fmt.Printf("RPM: %d/%d TPM: %d/%d RPD: %d/%d\n",
|
||||
stats.RPM, stats.MaxRPM, stats.TPM, stats.MaxTPM, stats.RPD, stats.MaxRPD)
|
||||
}
|
||||
```
|
||||
|
||||
## API Summary
|
||||
|
||||
| Function / Method | Description |
|
||||
|-------------------|-------------|
|
||||
| `New()` | Create a limiter with default Gemini quotas |
|
||||
| `Load()` | Read persisted state from `~/.core/ratelimits.yaml` |
|
||||
| `Persist()` | Write current state to disk |
|
||||
| `CanSend(model, tokens)` | Check if a request would violate limits |
|
||||
| `RecordUsage(model, prompt, output)` | Record a completed API call |
|
||||
| `WaitForCapacity(ctx, model, tokens)` | Block until capacity is available |
|
||||
| `Stats(model)` | Get current usage snapshot for one model |
|
||||
| `AllStats()` | Get usage snapshots for all tracked models |
|
||||
| `Reset(model)` | Clear stats for one model (or all if empty) |
|
||||
| `CountTokens(apiKey, model, text)` | Count tokens via Google Generative AI API |
|
||||
|
||||
## Pages
|
||||
|
||||
- [[Model-Quotas]] -- Token counting and per-model quota configuration
|
||||
- [[Usage-Tracking]] -- Sliding window algorithm, persistence, and blocking waits
|
||||
|
||||
## Licence
|
||||
|
||||
EUPL-1.2
|
||||
Loading…
Add table
Reference in a new issue