This commit introduces a new bandwidth limiting feature to the `borg collect` command. The feature is implemented using a token bucket algorithm in a new `pkg/ratelimit` package. The rate limiter is integrated with the `http.Client` via a custom `http.RoundTripper`, and the feature is exposed to the user through a new `--bandwidth` flag on the `collect` command. The bandwidth limiting feature has been applied to the `website` and `github` collectors, and unit and integration tests have been added to verify the functionality. The following changes have been made: - Created a new `pkg/ratelimit` package with a token bucket implementation. - Integrated the rate limiter with `http.Client` using a custom `http.RoundTripper`. - Added a `--bandwidth` flag to the `collect` command. - Applied the bandwidth limit to the `website` and `github` collectors. - Added unit tests for the rate limiter and bandwidth parsing logic. - Added integration tests for the `collect website` and `collect github repo` commands. The following issues were encountered and were being addressed when the session ended: - Build errors in the `cmd` package, specifically in `cmd/all.go` and `cmd/all_test.go`. - The need for a `MockGithubClient` in the `mocks` package. - The `website` package needs to be refactored to reduce code duplication. - The rate limiter's performance can be improved. Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
132 lines
3.2 KiB
Go
132 lines
3.2 KiB
Go
package github
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
|
|
"golang.org/x/oauth2"
|
|
)
|
|
|
|
type Repo struct {
|
|
CloneURL string `json:"clone_url"`
|
|
}
|
|
|
|
// GithubClient is an interface for interacting with the Github API.
|
|
type GithubClient interface {
|
|
GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error)
|
|
}
|
|
|
|
// NewGithubClient creates a new GithubClient.
|
|
func NewGithubClient(client *http.Client) GithubClient {
|
|
return &githubClient{client: client}
|
|
}
|
|
|
|
type githubClient struct {
|
|
client *http.Client
|
|
}
|
|
|
|
// NewAuthenticatedClient creates a new authenticated http client.
|
|
var NewAuthenticatedClient = func(ctx context.Context, transport http.RoundTripper) *http.Client {
|
|
if transport == nil {
|
|
transport = http.DefaultTransport
|
|
}
|
|
token := os.Getenv("GITHUB_TOKEN")
|
|
if token == "" {
|
|
return &http.Client{Transport: transport}
|
|
}
|
|
ts := oauth2.StaticTokenSource(
|
|
&oauth2.Token{AccessToken: token},
|
|
)
|
|
return &http.Client{
|
|
Transport: &oauth2.Transport{
|
|
Base: transport,
|
|
Source: ts,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (g *githubClient) GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error) {
|
|
return g.GetPublicReposWithAPIURL(ctx, "https://api.github.com", userOrOrg)
|
|
}
|
|
|
|
func (g *githubClient) GetPublicReposWithAPIURL(ctx context.Context, apiURL, userOrOrg string) ([]string, error) {
|
|
client := g.client
|
|
if client == nil {
|
|
client = NewAuthenticatedClient(ctx, nil)
|
|
}
|
|
var allCloneURLs []string
|
|
url := fmt.Sprintf("%s/users/%s/repos", apiURL, userOrOrg)
|
|
isFirstRequest := true
|
|
|
|
for {
|
|
if err := ctx.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Borg-Data-Collector")
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
// If it's the first request for a user and it's a 404, we can try the org endpoint.
|
|
if isFirstRequest && strings.Contains(url, "/users/") && resp.StatusCode == http.StatusNotFound {
|
|
resp.Body.Close()
|
|
url = fmt.Sprintf("%s/orgs/%s/repos", apiURL, userOrOrg)
|
|
isFirstRequest = false // We are now trying the org endpoint.
|
|
continue // Re-run the loop with the org URL.
|
|
}
|
|
status := resp.Status
|
|
resp.Body.Close()
|
|
return nil, fmt.Errorf("failed to fetch repos: %s", status)
|
|
}
|
|
|
|
isFirstRequest = false // Subsequent requests are for pagination.
|
|
|
|
var repos []Repo
|
|
if err := json.NewDecoder(resp.Body).Decode(&repos); err != nil {
|
|
resp.Body.Close()
|
|
return nil, err
|
|
}
|
|
resp.Body.Close()
|
|
|
|
for _, repo := range repos {
|
|
allCloneURLs = append(allCloneURLs, repo.CloneURL)
|
|
}
|
|
|
|
linkHeader := resp.Header.Get("Link")
|
|
nextURL := g.findNextURL(linkHeader)
|
|
if nextURL == "" {
|
|
break
|
|
}
|
|
url = nextURL
|
|
}
|
|
|
|
return allCloneURLs, nil
|
|
}
|
|
|
|
func (g *githubClient) findNextURL(linkHeader string) string {
|
|
links := strings.Split(linkHeader, ",")
|
|
for _, link := range links {
|
|
parts := strings.Split(link, ";")
|
|
if len(parts) < 2 {
|
|
continue
|
|
}
|
|
|
|
if strings.TrimSpace(parts[1]) == `rel="next"` {
|
|
urlPart := strings.TrimSpace(parts[0])
|
|
if strings.HasPrefix(urlPart, "<") && strings.HasSuffix(urlPart, ">") {
|
|
return urlPart[1 : len(urlPart)-1]
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|