Borg/cmd/all.go
google-labs-jules[bot] 3d7c7c4634 feat: Add configurable timeouts for HTTP requests
This commit introduces configurable timeouts for HTTP requests made by the `collect` commands.

Key changes:
- Created a new `pkg/httpclient` package with a `NewClient` function that returns an `http.Client` with configurable timeouts for total, connect, TLS, and header stages.
- Added `--timeout`, `--connect-timeout`, `--tls-timeout`, and `--header-timeout` persistent flags to the `collect` command, making them available to all its subcommands.
- Refactored the `pkg/website`, `pkg/pwa`, and `pkg/github` packages to accept and use a custom `http.Client`, allowing the timeout configurations to be injected.
- Updated the `collect website`, `collect pwa`, and `collect github repos` commands to create a configured HTTP client based on the new flags and pass it to the respective packages.
- Added unit tests for the `pkg/httpclient` package to verify correct timeout configuration.
- Fixed all test and build failures that resulted from the refactoring.
- Addressed an unrelated build failure by creating a placeholder file (`pkg/player/frontend/demo-track.smsg`).

This work addresses the initial requirement for configurable timeouts via command-line flags. Further work is needed to implement per-domain overrides from a configuration file and idle timeouts for large file downloads.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:58:11 +00:00

187 lines
5.1 KiB
Go

package cmd
import (
"fmt"
"io"
"io/fs"
"net/url"
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/github"
"github.com/Snider/Borg/pkg/httpclient"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
"github.com/Snider/Borg/pkg/ui"
"github.com/Snider/Borg/pkg/vcs"
"github.com/spf13/cobra"
)
var allCmd = NewAllCmd()
func NewAllCmd() *cobra.Command {
allCmd := &cobra.Command{
Use: "all [url]",
Short: "Collect all resources from a URL",
Long: `Collect all resources from a URL, dispatching to the appropriate collector based on the URL type.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
url := args[0]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
if format != "datanode" && format != "tim" && format != "trix" {
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
}
owner, err := parseGithubOwner(url)
if err != nil {
return err
}
totalTimeout, _ := cmd.Flags().GetDuration("timeout")
connectTimeout, _ := cmd.Flags().GetDuration("connect-timeout")
tlsTimeout, _ := cmd.Flags().GetDuration("tls-timeout")
headerTimeout, _ := cmd.Flags().GetDuration("header-timeout")
httpClient := httpclient.NewClient(totalTimeout, connectTimeout, tlsTimeout, headerTimeout)
githubClient := github.NewGithubClient(httpClient)
repos, err := githubClient.GetPublicRepos(cmd.Context(), owner)
if err != nil {
return err
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
var progressWriter io.Writer
if prompter.IsInteractive() {
bar := ui.NewProgressBar(len(repos), "Cloning repositories")
progressWriter = ui.NewProgressWriter(bar)
}
cloner := vcs.NewGitCloner()
allDataNodes := datanode.New()
for _, repoURL := range repos {
dn, err := cloner.CloneGitRepository(repoURL, progressWriter)
if err != nil {
// Log the error and continue
fmt.Fprintln(cmd.ErrOrStderr(), "Error cloning repository:", err)
continue
}
// This is not an efficient way to merge datanodes, but it's the only way for now
// A better approach would be to add a Merge method to the DataNode
repoName := strings.TrimSuffix(repoURL, ".git")
parts := strings.Split(repoName, "/")
repoName = parts[len(parts)-1]
err = dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if err != nil {
return err
}
if !de.IsDir() {
err := func() error {
file, err := dn.Open(path)
if err != nil {
return err
}
defer file.Close()
data, err := io.ReadAll(file)
if err != nil {
return err
}
allDataNodes.AddData(repoName+"/"+path, data)
return nil
}()
if err != nil {
return err
}
}
return nil
})
if err != nil {
fmt.Fprintln(cmd.ErrOrStderr(), "Error walking datanode:", err)
continue
}
}
var data []byte
if format == "tim" {
tim, err := tim.FromDataNode(allDataNodes)
if err != nil {
return fmt.Errorf("error creating tim: %w", err)
}
data, err = tim.ToTar()
if err != nil {
return fmt.Errorf("error serializing tim: %w", err)
}
} else if format == "trix" {
data, err = trix.ToTrix(allDataNodes, password)
if err != nil {
return fmt.Errorf("error serializing trix: %w", err)
}
} else {
data, err = allDataNodes.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "All repositories saved to", outputFile)
return nil
},
}
allCmd.PersistentFlags().String("output", "all.dat", "Output file for the DataNode")
allCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)")
allCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
allCmd.PersistentFlags().String("password", "", "Password for encryption")
return allCmd
}
func GetAllCmd() *cobra.Command {
return allCmd
}
func init() {
RootCmd.AddCommand(GetAllCmd())
}
func parseGithubOwner(u string) (string, error) {
owner, _, err := github.ParseRepoFromURL(u)
if err == nil {
return owner, nil
}
parsedURL, err := url.Parse(u)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
path := strings.Trim(parsedURL.Path, "/")
if path == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
parts := strings.Split(path, "/")
if len(parts) != 1 || parts[0] == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
return parts[0], nil
}