Borg/pkg/github/github.go
google-labs-jules[bot] 6071dc74f1 feat: Implement bandwidth limiting for collect commands
This commit introduces a new bandwidth limiting feature to the `borg collect` command. The feature is implemented using a token bucket algorithm in a new `pkg/ratelimit` package. The rate limiter is integrated with the `http.Client` via a custom `http.RoundTripper`, and the feature is exposed to the user through a new `--bandwidth` flag on the `collect` command.

The bandwidth limiting feature has been applied to the `website` and `github` collectors, and unit and integration tests have been added to verify the functionality.

The following changes have been made:

- Created a new `pkg/ratelimit` package with a token bucket implementation.
- Integrated the rate limiter with `http.Client` using a custom `http.RoundTripper`.
- Added a `--bandwidth` flag to the `collect` command.
- Applied the bandwidth limit to the `website` and `github` collectors.
- Added unit tests for the rate limiter and bandwidth parsing logic.
- Added integration tests for the `collect website` and `collect github repo` commands.

The following issues were encountered and were being addressed when the session ended:

- Build errors in the `cmd` package, specifically in `cmd/all.go` and `cmd/all_test.go`.
- The need for a `MockGithubClient` in the `mocks` package.
- The `website` package needs to be refactored to reduce code duplication.
- The rate limiter's performance can be improved.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:54:01 +00:00

132 lines
3.2 KiB
Go

package github
import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"strings"
"golang.org/x/oauth2"
)
type Repo struct {
CloneURL string `json:"clone_url"`
}
// GithubClient is an interface for interacting with the Github API.
type GithubClient interface {
GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error)
}
// NewGithubClient creates a new GithubClient.
func NewGithubClient(client *http.Client) GithubClient {
return &githubClient{client: client}
}
type githubClient struct {
client *http.Client
}
// NewAuthenticatedClient creates a new authenticated http client.
var NewAuthenticatedClient = func(ctx context.Context, transport http.RoundTripper) *http.Client {
if transport == nil {
transport = http.DefaultTransport
}
token := os.Getenv("GITHUB_TOKEN")
if token == "" {
return &http.Client{Transport: transport}
}
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
)
return &http.Client{
Transport: &oauth2.Transport{
Base: transport,
Source: ts,
},
}
}
func (g *githubClient) GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error) {
return g.GetPublicReposWithAPIURL(ctx, "https://api.github.com", userOrOrg)
}
func (g *githubClient) GetPublicReposWithAPIURL(ctx context.Context, apiURL, userOrOrg string) ([]string, error) {
client := g.client
if client == nil {
client = NewAuthenticatedClient(ctx, nil)
}
var allCloneURLs []string
url := fmt.Sprintf("%s/users/%s/repos", apiURL, userOrOrg)
isFirstRequest := true
for {
if err := ctx.Err(); err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
// If it's the first request for a user and it's a 404, we can try the org endpoint.
if isFirstRequest && strings.Contains(url, "/users/") && resp.StatusCode == http.StatusNotFound {
resp.Body.Close()
url = fmt.Sprintf("%s/orgs/%s/repos", apiURL, userOrOrg)
isFirstRequest = false // We are now trying the org endpoint.
continue // Re-run the loop with the org URL.
}
status := resp.Status
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch repos: %s", status)
}
isFirstRequest = false // Subsequent requests are for pagination.
var repos []Repo
if err := json.NewDecoder(resp.Body).Decode(&repos); err != nil {
resp.Body.Close()
return nil, err
}
resp.Body.Close()
for _, repo := range repos {
allCloneURLs = append(allCloneURLs, repo.CloneURL)
}
linkHeader := resp.Header.Get("Link")
nextURL := g.findNextURL(linkHeader)
if nextURL == "" {
break
}
url = nextURL
}
return allCloneURLs, nil
}
func (g *githubClient) findNextURL(linkHeader string) string {
links := strings.Split(linkHeader, ",")
for _, link := range links {
parts := strings.Split(link, ";")
if len(parts) < 2 {
continue
}
if strings.TrimSpace(parts[1]) == `rel="next"` {
urlPart := strings.TrimSpace(parts[0])
if strings.HasPrefix(urlPart, "<") && strings.HasSuffix(urlPart, ">") {
return urlPart[1 : len(urlPart)-1]
}
}
}
return ""
}