Borg/pkg/httpclient/client.go
google-labs-jules[bot] e057ddfa7f feat: Add Proxy and Tor Support for Website Collector
This commit introduces support for routing website collection requests through HTTP/SOCKS5 proxies or Tor.

New flags for the `borg collect website` command:
- `--proxy <url>`: Route requests through a single HTTP or SOCKS5 proxy.
- `--proxy-list <file>`: Select a random proxy from a file for all requests.
- `--tor`: Route requests through a Tor SOCKS5 proxy (defaults to 127.0.0.1:9050).

Key changes:
- Created a new `pkg/httpclient` to centralize the creation of proxy-configured `http.Client` instances.
- Refactored `pkg/website` to use a `Downloader` interface, allowing for dependency injection of the HTTP client.
- Added validation to ensure the new proxy flags are mutually exclusive.
- Implemented support for SOCKS5 authentication via credentials in the proxy URL.
- Added comprehensive unit tests for the new functionality.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:52:32 +00:00

90 lines
1.8 KiB
Go

package httpclient
import (
"bufio"
"fmt"
"net/http"
"net/url"
"os"
"math/rand"
"time"
"golang.org/x/net/proxy"
)
func init() {
rand.Seed(time.Now().UnixNano())
}
// NewClient creates a new http.Client with the specified proxy settings.
func NewClient(proxyURL, proxyList string, useTor bool) (*http.Client, error) {
if useTor {
proxyURL = "socks5://127.0.0.1:9050"
}
if proxyList != "" {
proxies, err := readProxyList(proxyList)
if err != nil {
return nil, err
}
if len(proxies) > 0 {
proxyURL = proxies[rand.Intn(len(proxies))]
}
}
if proxyURL != "" {
proxyURLParsed, err := url.Parse(proxyURL)
if err != nil {
return nil, fmt.Errorf("error parsing proxy URL: %w", err)
}
var transport http.RoundTripper
if proxyURLParsed.Scheme == "socks5" {
var auth *proxy.Auth
if proxyURLParsed.User != nil {
password, _ := proxyURLParsed.User.Password()
auth = &proxy.Auth{
User: proxyURLParsed.User.Username(),
Password: password,
}
}
dialer, err := proxy.SOCKS5("tcp", proxyURLParsed.Host, auth, proxy.Direct)
if err != nil {
return nil, fmt.Errorf("error creating SOCKS5 dialer: %w", err)
}
transport = &http.Transport{
Dial: dialer.Dial,
}
} else {
transport = &http.Transport{
Proxy: http.ProxyURL(proxyURLParsed),
}
}
return &http.Client{
Transport: transport,
}, nil
}
return &http.Client{}, nil
}
func readProxyList(filename string) ([]string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("error opening proxy list file: %w", err)
}
defer file.Close()
var proxies []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
proxies = append(proxies, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading proxy list file: %w", err)
}
return proxies, nil
}