cli/pkg/collect/ratelimit.go
Snider 32a3613a3a feat: add collect, config, crypt, plugin packages and fix all lint issues
Add four new infrastructure packages with CLI commands:
- pkg/config: layered configuration (defaults → file → env → flags)
- pkg/crypt: crypto primitives (Argon2id, AES-GCM, ChaCha20, HMAC, checksums)
- pkg/plugin: plugin system with GitHub-based install/update/remove
- pkg/collect: collection subsystem (GitHub, BitcoinTalk, market, papers, excavate)

Fix all golangci-lint issues across the entire codebase (~100 errcheck,
staticcheck SA1012/SA1019/ST1005, unused, ineffassign fixes) so that
`core go qa` passes with 0 issues.

Closes #167, #168, #170, #250, #251, #252, #253, #254, #255, #256

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 11:32:41 +00:00

130 lines
3.2 KiB
Go

package collect
import (
"context"
"fmt"
"os/exec"
"strconv"
"strings"
"sync"
"time"
core "github.com/host-uk/core/pkg/framework/core"
)
// RateLimiter tracks per-source rate limiting to avoid overwhelming APIs.
type RateLimiter struct {
mu sync.Mutex
delays map[string]time.Duration
last map[string]time.Time
}
// Default rate limit delays per source.
var defaultDelays = map[string]time.Duration{
"github": 500 * time.Millisecond,
"bitcointalk": 2 * time.Second,
"coingecko": 1500 * time.Millisecond,
"iacr": 1 * time.Second,
"arxiv": 1 * time.Second,
}
// NewRateLimiter creates a limiter with default delays.
func NewRateLimiter() *RateLimiter {
delays := make(map[string]time.Duration, len(defaultDelays))
for k, v := range defaultDelays {
delays[k] = v
}
return &RateLimiter{
delays: delays,
last: make(map[string]time.Time),
}
}
// Wait blocks until the rate limit allows the next request for the given source.
// It respects context cancellation.
func (r *RateLimiter) Wait(ctx context.Context, source string) error {
r.mu.Lock()
delay, ok := r.delays[source]
if !ok {
delay = 500 * time.Millisecond
}
lastTime := r.last[source]
elapsed := time.Since(lastTime)
if elapsed >= delay {
// Enough time has passed — claim the slot immediately.
r.last[source] = time.Now()
r.mu.Unlock()
return nil
}
remaining := delay - elapsed
r.mu.Unlock()
// Wait outside the lock, then reclaim.
select {
case <-ctx.Done():
return core.E("collect.RateLimiter.Wait", "context cancelled", ctx.Err())
case <-time.After(remaining):
}
r.mu.Lock()
r.last[source] = time.Now()
r.mu.Unlock()
return nil
}
// SetDelay sets the delay for a source.
func (r *RateLimiter) SetDelay(source string, d time.Duration) {
r.mu.Lock()
defer r.mu.Unlock()
r.delays[source] = d
}
// GetDelay returns the delay configured for a source.
func (r *RateLimiter) GetDelay(source string) time.Duration {
r.mu.Lock()
defer r.mu.Unlock()
if d, ok := r.delays[source]; ok {
return d
}
return 500 * time.Millisecond
}
// CheckGitHubRateLimit checks GitHub API rate limit status via gh api.
// Returns used and limit counts. Auto-pauses at 75% usage by increasing
// the GitHub rate limit delay.
func (r *RateLimiter) CheckGitHubRateLimit() (used, limit int, err error) {
cmd := exec.Command("gh", "api", "rate_limit", "--jq", ".rate | \"\\(.used) \\(.limit)\"")
out, err := cmd.Output()
if err != nil {
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to check rate limit", err)
}
parts := strings.Fields(strings.TrimSpace(string(out)))
if len(parts) != 2 {
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit",
fmt.Sprintf("unexpected output format: %q", string(out)), nil)
}
used, err = strconv.Atoi(parts[0])
if err != nil {
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to parse used count", err)
}
limit, err = strconv.Atoi(parts[1])
if err != nil {
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to parse limit count", err)
}
// Auto-pause at 75% usage
if limit > 0 {
usage := float64(used) / float64(limit)
if usage >= 0.75 {
r.SetDelay("github", 5*time.Second)
}
}
return used, limit, nil
}