This commit is contained in:
Snider 2026-02-14 18:28:06 +00:00 committed by GitHub
commit 1599a81145
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 932 additions and 102 deletions

View file

@ -1,10 +1,10 @@
package cmd
import (
"fmt"
"io"
"io/fs"
"net/url"
"os"
"strings"
@ -18,98 +18,105 @@ import (
"github.com/spf13/cobra"
)
var allCmd = NewAllCmd()
var githubAllCmd = NewGithubAllCmd()
func NewAllCmd() *cobra.Command {
allCmd := &cobra.Command{
Use: "all [url]",
Short: "Collect all resources from a URL",
Long: `Collect all resources from a URL, dispatching to the appropriate collector based on the URL type.`,
func NewGithubAllCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "all <owner/repo>",
Short: "Collect all resources from a GitHub repository",
Long: `Collect all resources from a GitHub repository, including code, issues, and pull requests.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
url := args[0]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("error getting output flag: %w", err)
}
format, err := cmd.Flags().GetString("format")
if err != nil {
return fmt.Errorf("error getting format flag: %w", err)
}
compression, err := cmd.Flags().GetString("compression")
if err != nil {
return fmt.Errorf("error getting compression flag: %w", err)
}
password, err := cmd.Flags().GetString("password")
if err != nil {
return fmt.Errorf("error getting password flag: %w", err)
}
collectIssues, err := cmd.Flags().GetBool("issues")
if err != nil {
return fmt.Errorf("error getting issues flag: %w", err)
}
collectPRs, err := cmd.Flags().GetBool("prs")
if err != nil {
return fmt.Errorf("error getting prs flag: %w", err)
}
collectCode, err := cmd.Flags().GetBool("code")
if err != nil {
return fmt.Errorf("error getting code flag: %w", err)
}
if format != "datanode" && format != "tim" && format != "trix" {
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
}
owner, err := parseGithubOwner(url)
if err != nil {
return err
}
repos, err := GithubClient.GetPublicRepos(cmd.Context(), owner)
if err != nil {
return err
}
allDataNodes := datanode.New()
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
var progressWriter io.Writer
if prompter.IsInteractive() {
bar := ui.NewProgressBar(len(repos), "Cloning repositories")
progressWriter = ui.NewProgressWriter(bar)
}
cloner := vcs.NewGitCloner()
allDataNodes := datanode.New()
for _, repoURL := range repos {
if collectCode {
var progressWriter io.Writer
if prompter.IsInteractive() {
bar := ui.NewProgressBar(-1, "Cloning repository")
progressWriter = ui.NewProgressWriter(bar)
}
cloner := vcs.NewGitCloner()
repoURL := fmt.Sprintf("https://github.com/%s/%s.git", owner, repo)
dn, err := cloner.CloneGitRepository(repoURL, progressWriter)
if err != nil {
// Log the error and continue
fmt.Fprintln(cmd.ErrOrStderr(), "Error cloning repository:", err)
continue
return fmt.Errorf("error cloning repository: %w", err)
}
// This is not an efficient way to merge datanodes, but it's the only way for now
// A better approach would be to add a Merge method to the DataNode
repoName := strings.TrimSuffix(repoURL, ".git")
parts := strings.Split(repoName, "/")
repoName = parts[len(parts)-1]
if mergeErr := mergeDataNodes(allDataNodes, dn, "code"); mergeErr != nil {
return fmt.Errorf("error merging code datanode: %w", mergeErr)
}
}
err = dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if err != nil {
return err
}
if !de.IsDir() {
err := func() error {
file, err := dn.Open(path)
if err != nil {
return err
}
defer file.Close()
data, err := io.ReadAll(file)
if err != nil {
return err
}
allDataNodes.AddData(repoName+"/"+path, data)
return nil
}()
if err != nil {
return err
}
}
return nil
})
client := github.NewGithubClient()
if collectIssues {
dn, err := client.GetIssues(cmd.Context(), owner, repo)
if err != nil {
fmt.Fprintln(cmd.ErrOrStderr(), "Error walking datanode:", err)
continue
return fmt.Errorf("error getting issues: %w", err)
}
if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil {
return fmt.Errorf("error merging issues datanode: %w", mergeErr)
}
}
if collectPRs {
dn, err := client.GetPullRequests(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting pull requests: %w", err)
}
if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil {
return fmt.Errorf("error merging pull requests datanode: %w", mergeErr)
}
}
var data []byte
if format == "tim" {
tim, err := tim.FromDataNode(allDataNodes)
t, err := tim.FromDataNode(allDataNodes)
if err != nil {
return fmt.Errorf("error creating tim: %w", err)
}
data, err = tim.ToTar()
data, err = t.ToTar()
if err != nil {
return fmt.Errorf("error serializing tim: %w", err)
}
@ -130,49 +137,67 @@ func NewAllCmd() *cobra.Command {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = fmt.Sprintf("%s-all.%s", repo, format)
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "All repositories saved to", outputFile)
fmt.Fprintln(cmd.OutOrStdout(), "All resources saved to", outputFile)
return nil
},
}
allCmd.PersistentFlags().String("output", "all.dat", "Output file for the DataNode")
allCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)")
allCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
allCmd.PersistentFlags().String("password", "", "Password for encryption")
return allCmd
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
cmd.Flags().String("password", "", "Password for encryption")
cmd.Flags().Bool("issues", true, "Collect issues")
cmd.Flags().Bool("prs", true, "Collect pull requests")
cmd.Flags().Bool("code", true, "Collect code")
return cmd
}
func GetAllCmd() *cobra.Command {
return allCmd
func GetGithubAllCmd() *cobra.Command {
return githubAllCmd
}
func init() {
RootCmd.AddCommand(GetAllCmd())
collectGithubCmd.AddCommand(GetGithubAllCmd())
}
func parseGithubOwner(u string) (string, error) {
owner, _, err := github.ParseRepoFromURL(u)
if err == nil {
return owner, nil
}
parsedURL, err := url.Parse(u)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
path := strings.Trim(parsedURL.Path, "/")
if path == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
parts := strings.Split(path, "/")
if len(parts) != 1 || parts[0] == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
return parts[0], nil
func mergeDataNodes(dest *datanode.DataNode, src *datanode.DataNode, prefix string) error {
return src.Walk(".", func(path string, de fs.DirEntry, err error) error {
if err != nil {
return err
}
if !de.IsDir() {
err := func() error {
file, err := src.Open(path)
if err != nil {
return err
}
defer file.Close()
data, err := io.ReadAll(file)
if err != nil {
return err
}
destPath := path
if prefix != "" {
destPath = prefix + "/" + path
}
dest.AddData(destPath, data)
return nil
}()
if err != nil {
return err
}
}
return nil
})
}

View file

@ -42,7 +42,7 @@ func TestAllCmd_Good(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetAllCmd())
rootCmd.AddCommand(GetGithubAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
@ -75,7 +75,7 @@ func TestAllCmd_Bad(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetAllCmd())
rootCmd.AddCommand(GetGithubAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
@ -104,7 +104,7 @@ func TestAllCmd_Ugly(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetAllCmd())
rootCmd.AddCommand(GetGithubAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")

View file

@ -0,0 +1,84 @@
package cmd
import (
"fmt"
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/github"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
// NewCollectGithubIssuesCmd creates a new cobra command for collecting github issues.
func NewCollectGithubIssuesCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "issues <owner/repo>",
Short: "Collect issues from a GitHub repository",
Long: `Collect all issues from a GitHub repository and store them in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
if format != "datanode" {
return fmt.Errorf("invalid format: %s (must be 'datanode')", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
client := github.NewGithubClient()
dn, err := client.GetIssues(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting issues: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = "issues." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Issues saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
return cmd
}
func init() {
GetCollectGithubCmd().AddCommand(NewCollectGithubIssuesCmd())
}

View file

@ -0,0 +1,53 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/Snider/Borg/pkg/github"
"github.com/stretchr/testify/assert"
)
func TestCollectGithubIssuesCmd(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/issues" {
w.Header().Set("Content-Type", "application/json")
issues := []github.Issue{
{Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"},
}
json.NewEncoder(w).Encode(issues)
} else if r.URL.Path == "/repos/owner/repo/issues/1/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := github.NewAuthenticatedClient
github.NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
github.NewAuthenticatedClient = originalNewAuthenticatedClient
}()
cmd := NewCollectGithubIssuesCmd()
var out bytes.Buffer
cmd.SetOut(&out)
cmd.SetErr(&out)
cmd.SetArgs([]string{"owner/repo", "--output", "issues.dat"})
err := cmd.Execute()
assert.NoError(t, err)
_, err = os.Stat("issues.dat")
assert.NoError(t, err)
os.Remove("issues.dat")
}

84
cmd/collect_github_prs.go Normal file
View file

@ -0,0 +1,84 @@
package cmd
import (
"fmt"
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/github"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
// NewCollectGithubPrsCmd creates a new cobra command for collecting github pull requests.
func NewCollectGithubPrsCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "prs <owner/repo>",
Short: "Collect pull requests from a GitHub repository",
Long: `Collect all pull requests from a GitHub repository and store them in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
if format != "datanode" {
return fmt.Errorf("invalid format: %s (must be 'datanode')", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
client := github.NewGithubClient()
dn, err := client.GetPullRequests(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting pull requests: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = "prs." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Pull requests saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
return cmd
}
func init() {
GetCollectGithubCmd().AddCommand(NewCollectGithubPrsCmd())
}

View file

@ -0,0 +1,64 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/Snider/Borg/pkg/github"
"github.com/stretchr/testify/assert"
)
func TestCollectGithubPrsCmd(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/pulls" {
w.Header().Set("Content-Type", "application/json")
prs := []github.PullRequest{
{
Number: 1, Title: "PR 1",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"},
},
},
}
json.NewEncoder(w).Encode(prs)
} else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" {
w.Write([]byte("diff --git a/file b/file"))
} else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := github.NewAuthenticatedClient
github.NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
github.NewAuthenticatedClient = originalNewAuthenticatedClient
}()
cmd := NewCollectGithubPrsCmd()
var out bytes.Buffer
cmd.SetOut(&out)
cmd.SetErr(&out)
cmd.SetArgs([]string{"owner/repo", "--output", "prs.dat"})
err := cmd.Execute()
assert.NoError(t, err)
_, err = os.Stat("prs.dat")
assert.NoError(t, err)
os.Remove("prs.dat")
}

View file

@ -8,6 +8,7 @@ import (
"os"
"strings"
"github.com/Snider/Borg/pkg/datanode"
"golang.org/x/oauth2"
)
@ -18,6 +19,8 @@ type Repo struct {
// GithubClient is an interface for interacting with the Github API.
type GithubClient interface {
GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error)
GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error)
GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error)
}
// NewGithubClient creates a new GithubClient.
@ -25,7 +28,9 @@ func NewGithubClient() GithubClient {
return &githubClient{}
}
type githubClient struct{}
type githubClient struct {
apiURL string
}
// NewAuthenticatedClient creates a new authenticated http client.
var NewAuthenticatedClient = func(ctx context.Context) *http.Client {

156
pkg/github/issue.go Normal file
View file

@ -0,0 +1,156 @@
package github
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/Snider/Borg/pkg/datanode"
)
type Issue struct {
Number int `json:"number"`
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Labels []struct {
Name string `json:"name"`
} `json:"labels"`
CommentsURL string `json:"comments_url"`
}
type Comment struct {
Body string `json:"body"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
func (g *githubClient) GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error) {
dn := datanode.New()
client := NewAuthenticatedClient(ctx)
apiURL := "https://api.github.com"
if g.apiURL != "" {
apiURL = g.apiURL
}
url := fmt.Sprintf("%s/repos/%s/%s/issues", apiURL, owner, repo)
var allIssues []Issue
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch issues: %s", resp.Status)
}
var issues []Issue
if err := json.NewDecoder(resp.Body).Decode(&issues); err != nil {
return nil, err
}
allIssues = append(allIssues, issues...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
for _, issue := range allIssues {
var markdown strings.Builder
markdown.WriteString(fmt.Sprintf("# Issue %d: %s\n\n", issue.Number, issue.Title))
markdown.WriteString(fmt.Sprintf("**Author**: %s\n", issue.User.Login))
markdown.WriteString(fmt.Sprintf("**State**: %s\n", issue.State))
markdown.WriteString(fmt.Sprintf("**Created**: %s\n", issue.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(fmt.Sprintf("**Updated**: %s\n\n", issue.UpdatedAt.Format(time.RFC1123)))
if len(issue.Labels) > 0 {
markdown.WriteString("**Labels**:\n")
for _, label := range issue.Labels {
markdown.WriteString(fmt.Sprintf("- %s\n", label.Name))
}
markdown.WriteString("\n")
}
markdown.WriteString("## Body\n\n")
markdown.WriteString(issue.Body)
markdown.WriteString("\n\n")
// Fetch comments
comments, err := g.getComments(ctx, issue.CommentsURL)
if err != nil {
return nil, err
}
if len(comments) > 0 {
markdown.WriteString("## Comments\n\n")
for _, comment := range comments {
markdown.WriteString(fmt.Sprintf("**%s** commented on %s:\n\n", comment.User.Login, comment.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(comment.Body)
markdown.WriteString("\n\n---\n\n")
}
}
filename := fmt.Sprintf("issues/%d.md", issue.Number)
dn.AddData(filename, []byte(markdown.String()))
}
// Add an index file
index, err := json.MarshalIndent(allIssues, "", " ")
if err != nil {
return nil, err
}
dn.AddData("issues/INDEX.json", index)
return dn, nil
}
func (g *githubClient) getComments(ctx context.Context, url string) ([]Comment, error) {
client := NewAuthenticatedClient(ctx)
var allComments []Comment
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch comments: %s", resp.Status)
}
var comments []Comment
if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil {
return nil, err
}
allComments = append(allComments, comments...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
return allComments, nil
}

67
pkg/github/issue_test.go Normal file
View file

@ -0,0 +1,67 @@
package github
import (
"context"
"encoding/json"
"io/fs"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetIssues(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/issues" {
w.Header().Set("Content-Type", "application/json")
issues := []Issue{
{Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"},
{Number: 2, Title: "Issue 2", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/2/comments"},
}
json.NewEncoder(w).Encode(issues)
} else if r.URL.Path == "/repos/owner/repo/issues/1/comments" {
w.Header().Set("Content-Type", "application/json")
comments := []Comment{
{Body: "Comment 1"},
}
json.NewEncoder(w).Encode(comments)
} else if r.URL.Path == "/repos/owner/repo/issues/2/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := NewAuthenticatedClient
NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
NewAuthenticatedClient = originalNewAuthenticatedClient
}()
client := &githubClient{apiURL: server.URL}
dn, err := client.GetIssues(context.Background(), "owner", "repo")
assert.NoError(t, err)
assert.NotNil(t, dn)
expectedFiles := []string{
"issues/1.md",
"issues/2.md",
"issues/INDEX.json",
}
actualFiles := []string{}
dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if !de.IsDir() {
actualFiles = append(actualFiles, path)
}
return nil
})
assert.ElementsMatch(t, expectedFiles, actualFiles)
}

201
pkg/github/pull_request.go Normal file
View file

@ -0,0 +1,201 @@
package github
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/Snider/Borg/pkg/datanode"
)
type PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
MergedAt time.Time `json:"merged_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Labels []struct {
Name string `json:"name"`
} `json:"labels"`
Links struct {
Comments struct {
Href string `json:"href"`
} `json:"comments"`
ReviewComments struct {
Href string `json:"href"`
} `json:"review_comments"`
} `json:"_links"`
DiffURL string `json:"diff_url"`
}
type ReviewComment struct {
Body string `json:"body"`
Path string `json:"path"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
func (g *githubClient) GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error) {
dn := datanode.New()
client := NewAuthenticatedClient(ctx)
apiURL := "https://api.github.com"
if g.apiURL != "" {
apiURL = g.apiURL
}
// Get both open and closed pull requests
url := fmt.Sprintf("%s/repos/%s/%s/pulls?state=all", apiURL, owner, repo)
var allPRs []PullRequest
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch pull requests: %s", resp.Status)
}
var prs []PullRequest
if err := json.NewDecoder(resp.Body).Decode(&prs); err != nil {
return nil, err
}
allPRs = append(allPRs, prs...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
for _, pr := range allPRs {
var markdown strings.Builder
markdown.WriteString(fmt.Sprintf("# PR %d: %s\n\n", pr.Number, pr.Title))
markdown.WriteString(fmt.Sprintf("**Author**: %s\n", pr.User.Login))
markdown.WriteString(fmt.Sprintf("**State**: %s\n", pr.State))
markdown.WriteString(fmt.Sprintf("**Created**: %s\n", pr.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(fmt.Sprintf("**Updated**: %s\n", pr.UpdatedAt.Format(time.RFC1123)))
if !pr.MergedAt.IsZero() {
markdown.WriteString(fmt.Sprintf("**Merged**: %s\n", pr.MergedAt.Format(time.RFC1123)))
}
markdown.WriteString(fmt.Sprintf("\n**[View Diff](%s)**\n\n", pr.DiffURL))
if len(pr.Labels) > 0 {
markdown.WriteString("**Labels**:\n")
for _, label := range pr.Labels {
markdown.WriteString(fmt.Sprintf("- %s\n", label.Name))
}
markdown.WriteString("\n")
}
markdown.WriteString("## Body\n\n")
markdown.WriteString(pr.Body)
markdown.WriteString("\n\n")
// Fetch diff
diff, err := g.getDiff(ctx, pr.DiffURL)
if err != nil {
return nil, fmt.Errorf("failed to get diff for PR #%d: %w", pr.Number, err)
}
dn.AddData(fmt.Sprintf("pulls/%d.diff", pr.Number), diff)
// Fetch review comments
reviewComments, err := g.getReviewComments(ctx, pr.Links.ReviewComments.Href)
if err != nil {
return nil, err
}
if len(reviewComments) > 0 {
markdown.WriteString("## Review Comments\n\n")
for _, comment := range reviewComments {
markdown.WriteString(fmt.Sprintf("**%s** commented on `%s` at %s:\n\n", comment.User.Login, comment.Path, comment.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(comment.Body)
markdown.WriteString("\n\n---\n\n")
}
}
filename := fmt.Sprintf("pulls/%d.md", pr.Number)
dn.AddData(filename, []byte(markdown.String()))
}
// Add an index file
index, err := json.MarshalIndent(allPRs, "", " ")
if err != nil {
return nil, err
}
dn.AddData("pulls/INDEX.json", index)
return dn, nil
}
func (g *githubClient) getReviewComments(ctx context.Context, url string) ([]ReviewComment, error) {
client := NewAuthenticatedClient(ctx)
var allComments []ReviewComment
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch review comments: %s", resp.Status)
}
var comments []ReviewComment
if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil {
return nil, err
}
allComments = append(allComments, comments...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
return allComments, nil
}
func (g *githubClient) getDiff(ctx context.Context, url string) ([]byte, error) {
client := NewAuthenticatedClient(ctx)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
req.Header.Set("Accept", "application/vnd.github.v3.diff")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch diff: %s", resp.Status)
}
return io.ReadAll(resp.Body)
}

View file

@ -0,0 +1,91 @@
package github
import (
"context"
"encoding/json"
"io/fs"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetPullRequests(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/pulls" {
w.Header().Set("Content-Type", "application/json")
prs := []PullRequest{
{
Number: 1, Title: "PR 1",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"},
},
},
{
Number: 2, Title: "PR 2",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/2.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/2/comments"},
},
},
}
json.NewEncoder(w).Encode(prs)
} else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" {
w.Write([]byte("diff --git a/file b/file"))
} else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" {
w.Header().Set("Content-Type", "application/json")
comments := []ReviewComment{
{Body: "Review Comment 1"},
}
json.NewEncoder(w).Encode(comments)
} else if r.URL.Path == "/repos/owner/repo/pulls/2.diff" {
w.Write([]byte("diff --git a/file2 b/file2"))
} else if r.URL.Path == "/repos/owner/repo/pulls/2/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := NewAuthenticatedClient
NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
NewAuthenticatedClient = originalNewAuthenticatedClient
}()
client := &githubClient{apiURL: server.URL}
dn, err := client.GetPullRequests(context.Background(), "owner", "repo")
assert.NoError(t, err)
assert.NotNil(t, dn)
expectedFiles := []string{
"pulls/1.md",
"pulls/1.diff",
"pulls/2.md",
"pulls/2.diff",
"pulls/INDEX.json",
}
actualFiles := []string{}
dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if !de.IsDir() {
actualFiles = append(actualFiles, path)
}
return nil
})
assert.ElementsMatch(t, expectedFiles, actualFiles)
}