Borg/pkg/github/github.go
google-labs-jules[bot] 1b98ba1c3d feat: Configurable rate limiting per domain
This commit introduces a configurable rate-limiting system for all HTTP requests made by the application.

Key features include:
- A token bucket algorithm for rate limiting.
- Per-domain configuration via a YAML file (`--rate-config`).
- Wildcard domain matching (e.g., `*.archive.org`).
- Dynamic adjustments based on `429` responses and `Retry-After` headers.
- New CLI flags (`--rate-limit`, `--burst`) for on-the-fly configuration.

I began by creating a new `http` package to centralize the rate-limiting logic. I then integrated this package into the `website` and `github` collectors, ensuring that all outgoing HTTP requests are subject to the new rate-limiting rules.

Throughout the implementation, I added comprehensive unit and integration tests to validate the new functionality. This process also uncovered several pre-existing issues in the test suite, which I have now fixed. These fixes include:
- Correcting mock implementations for `http.Client` and `vcs.GitCloner`.
- Updating outdated function signatures in tests and examples.
- Resolving missing dependencies and syntax errors in test files.
- Stabilizing flaky tests.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:53:44 +00:00

127 lines
3.2 KiB
Go

package github
import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"strings"
"golang.org/x/oauth2"
)
type Repo struct {
CloneURL string `json:"clone_url"`
}
// GithubClient is an interface for interacting with the Github API.
type GithubClient interface {
GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error)
}
// NewGithubClient creates a new GithubClient.
func NewGithubClient(client *http.Client) GithubClient {
return &githubClient{
client: client,
}
}
type githubClient struct {
client *http.Client
}
// NewAuthenticatedClient creates a new authenticated http client.
var NewAuthenticatedClient = func(ctx context.Context, baseClient *http.Client) *http.Client {
if baseClient == nil {
baseClient = http.DefaultClient
}
token := os.Getenv("GITHUB_TOKEN")
if token == "" {
return baseClient
}
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
)
ctx = context.WithValue(ctx, oauth2.HTTPClient, baseClient)
return oauth2.NewClient(ctx, ts)
}
func (g *githubClient) GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error) {
return g.getPublicReposWithAPIURL(ctx, "https://api.github.com", userOrOrg)
}
func (g *githubClient) getPublicReposWithAPIURL(ctx context.Context, apiURL, userOrOrg string) ([]string, error) {
client := NewAuthenticatedClient(ctx, g.client)
var allCloneURLs []string
url := fmt.Sprintf("%s/users/%s/repos", apiURL, userOrOrg)
isFirstRequest := true
for {
if err := ctx.Err(); err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
// If it's the first request for a user and it's a 404, we can try the org endpoint.
if isFirstRequest && strings.Contains(url, "/users/") && resp.StatusCode == http.StatusNotFound {
resp.Body.Close()
url = fmt.Sprintf("%s/orgs/%s/repos", apiURL, userOrOrg)
isFirstRequest = false // We are now trying the org endpoint.
continue // Re-run the loop with the org URL.
}
status := resp.Status
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch repos: %s", status)
}
isFirstRequest = false // Subsequent requests are for pagination.
var repos []Repo
if err := json.NewDecoder(resp.Body).Decode(&repos); err != nil {
resp.Body.Close()
return nil, err
}
resp.Body.Close()
for _, repo := range repos {
allCloneURLs = append(allCloneURLs, repo.CloneURL)
}
linkHeader := resp.Header.Get("Link")
nextURL := g.findNextURL(linkHeader)
if nextURL == "" {
break
}
url = nextURL
}
return allCloneURLs, nil
}
func (g *githubClient) findNextURL(linkHeader string) string {
links := strings.Split(linkHeader, ",")
for _, link := range links {
parts := strings.Split(link, ";")
if len(parts) < 2 {
continue
}
if strings.TrimSpace(parts[1]) == `rel="next"` {
urlPart := strings.TrimSpace(parts[0])
if strings.HasPrefix(urlPart, "<") && strings.HasSuffix(urlPart, ">") {
return urlPart[1 : len(urlPart)-1]
}
}
}
return ""
}