Borg/cmd/collect_github_repo_test.go
google-labs-jules[bot] 1b98ba1c3d feat: Configurable rate limiting per domain
This commit introduces a configurable rate-limiting system for all HTTP requests made by the application.

Key features include:
- A token bucket algorithm for rate limiting.
- Per-domain configuration via a YAML file (`--rate-config`).
- Wildcard domain matching (e.g., `*.archive.org`).
- Dynamic adjustments based on `429` responses and `Retry-After` headers.
- New CLI flags (`--rate-limit`, `--burst`) for on-the-fly configuration.

I began by creating a new `http` package to centralize the rate-limiting logic. I then integrated this package into the `website` and `github` collectors, ensuring that all outgoing HTTP requests are subject to the new rate-limiting rules.

Throughout the implementation, I added comprehensive unit and integration tests to validate the new functionality. This process also uncovered several pre-existing issues in the test suite, which I have now fixed. These fixes include:
- Correcting mock implementations for `http.Client` and `vcs.GitCloner`.
- Updating outdated function signatures in tests and examples.
- Resolving missing dependencies and syntax errors in test files.
- Stabilizing flaky tests.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:53:44 +00:00

91 lines
2.3 KiB
Go

package cmd
import (
"fmt"
"path/filepath"
"testing"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/mocks"
"github.com/Snider/Borg/pkg/vcs"
"github.com/spf13/cobra"
)
func TestCollectGithubRepoCmd_Good(t *testing.T) {
// Setup mock Git cloner
mockCloner := &mocks.MockGitCloner{
DN: datanode.New(),
Err: nil,
}
oldCloner := GitCloner
GitCloner = mockCloner
defer func() {
GitCloner = oldCloner
}()
rootCmd := NewRootCmd()
collectCmd := NewCollectCmd()
githubCmd := GetCollectGithubCmd()
repoCmd := NewCollectGithubRepoCmd()
githubCmd.AddCommand(repoCmd)
collectCmd.AddCommand(githubCmd)
rootCmd.AddCommand(collectCmd)
// Execute command
out := filepath.Join(t.TempDir(), "out")
_, err := executeCommand(rootCmd, "collect", "github", "repo", "https://github.com/testuser/repo1", "--output", out)
if err != nil {
t.Fatalf("collect github repo command failed: %v", err)
}
}
func TestCollectGithubRepoCmd_Bad(t *testing.T) {
// Setup mock Git cloner to return an error
mockCloner := &mocks.MockGitCloner{
DN: nil,
Err: fmt.Errorf("git clone error"),
}
oldCloner := GitCloner
GitCloner = mockCloner
defer func() {
GitCloner = oldCloner
}()
rootCmd := NewRootCmd()
collectCmd := NewCollectCmd()
githubCmd := GetCollectGithubCmd()
repoCmd := NewCollectGithubRepoCmd()
githubCmd.AddCommand(repoCmd)
collectCmd.AddCommand(githubCmd)
rootCmd.AddCommand(collectCmd)
// Execute command
out := filepath.Join(t.TempDir(), "out")
_, err := executeCommand(rootCmd, "collect", "github", "repo", "https://github.com/testuser/repo1", "--output", out)
if err == nil {
t.Fatal("expected an error, but got none")
}
}
func TestCollectGithubRepoCmd_Ugly(t *testing.T) {
t.Run("Invalid repo URL", func(t *testing.T) {
rootCmd := NewRootCmd()
collectCmd := NewCollectCmd()
githubCmd := GetCollectGithubCmd()
repoCmd := NewCollectGithubRepoCmd()
githubCmd.AddCommand(repoCmd)
collectCmd.AddCommand(githubCmd)
rootCmd.AddCommand(collectCmd)
repoCmd.RunE = func(cmd *cobra.Command, args []string) error {
cloner := vcs.NewGitClonerWithClient(nil)
_, err := cloner.CloneGitRepository(args[0], nil)
return err
}
_, err := executeCommand(rootCmd, "collect", "github", "repo", "not-a-github-url")
if err == nil {
t.Fatal("expected an error for invalid repo URL, but got none")
}
})
}