Borg/cmd/collect_website_test.go
google-labs-jules[bot] 6071dc74f1 feat: Implement bandwidth limiting for collect commands
This commit introduces a new bandwidth limiting feature to the `borg collect` command. The feature is implemented using a token bucket algorithm in a new `pkg/ratelimit` package. The rate limiter is integrated with the `http.Client` via a custom `http.RoundTripper`, and the feature is exposed to the user through a new `--bandwidth` flag on the `collect` command.

The bandwidth limiting feature has been applied to the `website` and `github` collectors, and unit and integration tests have been added to verify the functionality.

The following changes have been made:

- Created a new `pkg/ratelimit` package with a token bucket implementation.
- Integrated the rate limiter with `http.Client` using a custom `http.RoundTripper`.
- Added a `--bandwidth` flag to the `collect` command.
- Applied the bandwidth limit to the `website` and `github` collectors.
- Added unit tests for the rate limiter and bandwidth parsing logic.
- Added integration tests for the `collect website` and `collect github repo` commands.

The following issues were encountered and were being addressed when the session ended:

- Build errors in the `cmd` package, specifically in `cmd/all.go` and `cmd/all_test.go`.
- The need for a `MockGithubClient` in the `mocks` package.
- The `website` package needs to be refactored to reduce code duplication.
- The rate limiter's performance can be improved.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:54:01 +00:00

110 lines
3.4 KiB
Go

package cmd
import (
"fmt"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/website"
"github.com/schollz/progressbar/v3"
)
func TestCollectWebsiteCmd_Good(t *testing.T) {
// Mock the website downloader
oldDownloadAndPackageWebsiteWithClient := website.DownloadAndPackageWebsiteWithClient
website.DownloadAndPackageWebsiteWithClient = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, client *http.Client) (*datanode.DataNode, error) {
return datanode.New(), nil
}
defer func() {
website.DownloadAndPackageWebsiteWithClient = oldDownloadAndPackageWebsiteWithClient
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
_, err := executeCommand(rootCmd, "collect", "website", "https://example.com", "--output", out)
if err != nil {
t.Fatalf("collect website command failed: %v", err)
}
}
func TestCollectWebsiteCmd_Bad(t *testing.T) {
// Mock the website downloader to return an error
oldDownloadAndPackageWebsiteWithClient := website.DownloadAndPackageWebsiteWithClient
website.DownloadAndPackageWebsiteWithClient = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, client *http.Client) (*datanode.DataNode, error) {
return nil, fmt.Errorf("website error")
}
defer func() {
website.DownloadAndPackageWebsiteWithClient = oldDownloadAndPackageWebsiteWithClient
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
_, err := executeCommand(rootCmd, "collect", "website", "https://example.com", "--output", out)
if err == nil {
t.Fatal("expected an error, but got none")
}
}
func TestCollectWebsiteCmd_Ugly(t *testing.T) {
t.Run("No arguments", func(t *testing.T) {
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
_, err := executeCommand(rootCmd, "collect", "website")
if err == nil {
t.Fatal("expected an error for no arguments, but got none")
}
if !strings.Contains(err.Error(), "accepts 1 arg(s), received 0") {
t.Errorf("unexpected error message: %v", err)
}
})
t.Run("Invalid bandwidth", func(t *testing.T) {
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
_, err := executeCommand(rootCmd, "collect", "website", "https://example.com", "--bandwidth", "1Gbps")
if err == nil {
t.Fatal("expected an error for invalid bandwidth, but got none")
}
if !strings.Contains(err.Error(), "invalid bandwidth") {
t.Errorf("unexpected error message: %v", err)
}
})
}
func TestCollectWebsiteCmd_Bandwidth(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write(make([]byte, 1024*1024)) // 1MB
}))
defer server.Close()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
// Create a temporary directory for the output file
outDir := t.TempDir()
out := filepath.Join(outDir, "out")
// Execute command with a bandwidth limit
start := time.Now()
_, err := executeCommand(rootCmd, "collect", "website", server.URL, "--output", out, "--bandwidth", "500KB/s")
if err != nil {
t.Fatalf("collect website command failed: %v", err)
}
elapsed := time.Since(start)
// Check if the download took at least 2 seconds
if elapsed < 2*time.Second {
t.Errorf("expected download to take at least 2 seconds, but it took %s", elapsed)
}
}