Forgejo and Gitea SDK wrappers, multi-repo git utilities, AgentCI dispatch, distributed job orchestrator, and data collection pipelines. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
130 lines
3.2 KiB
Go
130 lines
3.2 KiB
Go
package collect
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
core "forge.lthn.ai/core/go/pkg/framework/core"
|
|
)
|
|
|
|
// RateLimiter tracks per-source rate limiting to avoid overwhelming APIs.
|
|
type RateLimiter struct {
|
|
mu sync.Mutex
|
|
delays map[string]time.Duration
|
|
last map[string]time.Time
|
|
}
|
|
|
|
// Default rate limit delays per source.
|
|
var defaultDelays = map[string]time.Duration{
|
|
"github": 500 * time.Millisecond,
|
|
"bitcointalk": 2 * time.Second,
|
|
"coingecko": 1500 * time.Millisecond,
|
|
"iacr": 1 * time.Second,
|
|
"arxiv": 1 * time.Second,
|
|
}
|
|
|
|
// NewRateLimiter creates a limiter with default delays.
|
|
func NewRateLimiter() *RateLimiter {
|
|
delays := make(map[string]time.Duration, len(defaultDelays))
|
|
for k, v := range defaultDelays {
|
|
delays[k] = v
|
|
}
|
|
return &RateLimiter{
|
|
delays: delays,
|
|
last: make(map[string]time.Time),
|
|
}
|
|
}
|
|
|
|
// Wait blocks until the rate limit allows the next request for the given source.
|
|
// It respects context cancellation.
|
|
func (r *RateLimiter) Wait(ctx context.Context, source string) error {
|
|
r.mu.Lock()
|
|
delay, ok := r.delays[source]
|
|
if !ok {
|
|
delay = 500 * time.Millisecond
|
|
}
|
|
lastTime := r.last[source]
|
|
|
|
elapsed := time.Since(lastTime)
|
|
if elapsed >= delay {
|
|
// Enough time has passed — claim the slot immediately.
|
|
r.last[source] = time.Now()
|
|
r.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
remaining := delay - elapsed
|
|
r.mu.Unlock()
|
|
|
|
// Wait outside the lock, then reclaim.
|
|
select {
|
|
case <-ctx.Done():
|
|
return core.E("collect.RateLimiter.Wait", "context cancelled", ctx.Err())
|
|
case <-time.After(remaining):
|
|
}
|
|
|
|
r.mu.Lock()
|
|
r.last[source] = time.Now()
|
|
r.mu.Unlock()
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetDelay sets the delay for a source.
|
|
func (r *RateLimiter) SetDelay(source string, d time.Duration) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
r.delays[source] = d
|
|
}
|
|
|
|
// GetDelay returns the delay configured for a source.
|
|
func (r *RateLimiter) GetDelay(source string) time.Duration {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
if d, ok := r.delays[source]; ok {
|
|
return d
|
|
}
|
|
return 500 * time.Millisecond
|
|
}
|
|
|
|
// CheckGitHubRateLimit checks GitHub API rate limit status via gh api.
|
|
// Returns used and limit counts. Auto-pauses at 75% usage by increasing
|
|
// the GitHub rate limit delay.
|
|
func (r *RateLimiter) CheckGitHubRateLimit() (used, limit int, err error) {
|
|
cmd := exec.Command("gh", "api", "rate_limit", "--jq", ".rate | \"\\(.used) \\(.limit)\"")
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to check rate limit", err)
|
|
}
|
|
|
|
parts := strings.Fields(strings.TrimSpace(string(out)))
|
|
if len(parts) != 2 {
|
|
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit",
|
|
fmt.Sprintf("unexpected output format: %q", string(out)), nil)
|
|
}
|
|
|
|
used, err = strconv.Atoi(parts[0])
|
|
if err != nil {
|
|
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to parse used count", err)
|
|
}
|
|
|
|
limit, err = strconv.Atoi(parts[1])
|
|
if err != nil {
|
|
return 0, 0, core.E("collect.RateLimiter.CheckGitHubRateLimit", "failed to parse limit count", err)
|
|
}
|
|
|
|
// Auto-pause at 75% usage
|
|
if limit > 0 {
|
|
usage := float64(used) / float64(limit)
|
|
if usage >= 0.75 {
|
|
r.SetDelay("github", 5*time.Second)
|
|
}
|
|
}
|
|
|
|
return used, limit, nil
|
|
}
|