This commit introduces a configurable rate-limiting system for all HTTP requests made by the application. Key features include: - A token bucket algorithm for rate limiting. - Per-domain configuration via a YAML file (`--rate-config`). - Wildcard domain matching (e.g., `*.archive.org`). - Dynamic adjustments based on `429` responses and `Retry-After` headers. - New CLI flags (`--rate-limit`, `--burst`) for on-the-fly configuration. I began by creating a new `http` package to centralize the rate-limiting logic. I then integrated this package into the `website` and `github` collectors, ensuring that all outgoing HTTP requests are subject to the new rate-limiting rules. Throughout the implementation, I added comprehensive unit and integration tests to validate the new functionality. This process also uncovered several pre-existing issues in the test suite, which I have now fixed. These fixes include: - Correcting mock implementations for `http.Client` and `vcs.GitCloner`. - Updating outdated function signatures in tests and examples. - Resolving missing dependencies and syntax errors in test files. - Stabilizing flaky tests. Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
56 lines
1.2 KiB
Go
56 lines
1.2 KiB
Go
package http
|
|
|
|
import (
|
|
"gopkg.in/yaml.v3"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
// Config represents the rate limiting configuration.
|
|
type Config struct {
|
|
Defaults Rate `yaml:"defaults"`
|
|
Domains map[string]Rate `yaml:"domains"`
|
|
}
|
|
|
|
// Rate represents a rate limit.
|
|
type Rate struct {
|
|
RequestsPerSecond float64 `yaml:"requests_per_second"`
|
|
Burst int `yaml:"burst"`
|
|
Reason string `yaml:"reason,omitempty"`
|
|
}
|
|
|
|
// ParseConfig parses a configuration file.
|
|
func ParseConfig(path string) (*Config, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var config Config
|
|
err = yaml.Unmarshal(data, &config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &config, nil
|
|
}
|
|
|
|
// GetRate returns the rate limit for a given domain.
|
|
func (c *Config) GetRate(domain string) Rate {
|
|
// Check for an exact match first.
|
|
if rate, ok := c.Domains[domain]; ok {
|
|
return rate
|
|
}
|
|
|
|
// Check for a wildcard match.
|
|
parts := strings.Split(domain, ".")
|
|
for i := 1; i < len(parts); i++ {
|
|
wildcard := "*." + strings.Join(parts[i:], ".")
|
|
if rate, ok := c.Domains[wildcard]; ok {
|
|
return rate
|
|
}
|
|
}
|
|
|
|
// Return the default rate.
|
|
return c.Defaults
|
|
}
|