diff --git a/cmd/all.go b/cmd/all.go index 84a06db..d9ca780 100644 --- a/cmd/all.go +++ b/cmd/all.go @@ -1,10 +1,10 @@ + package cmd import ( "fmt" "io" "io/fs" - "net/url" "os" "strings" @@ -18,98 +18,105 @@ import ( "github.com/spf13/cobra" ) -var allCmd = NewAllCmd() +var githubAllCmd = NewGithubAllCmd() -func NewAllCmd() *cobra.Command { - allCmd := &cobra.Command{ - Use: "all [url]", - Short: "Collect all resources from a URL", - Long: `Collect all resources from a URL, dispatching to the appropriate collector based on the URL type.`, +func NewGithubAllCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "all ", + Short: "Collect all resources from a GitHub repository", + Long: `Collect all resources from a GitHub repository, including code, issues, and pull requests.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - url := args[0] - outputFile, _ := cmd.Flags().GetString("output") - format, _ := cmd.Flags().GetString("format") - compression, _ := cmd.Flags().GetString("compression") - password, _ := cmd.Flags().GetString("password") + repoPath := args[0] + parts := strings.Split(repoPath, "/") + if len(parts) != 2 { + return fmt.Errorf("invalid repository path: %s (must be in the format /)", repoPath) + } + owner, repo := parts[0], parts[1] + + outputFile, err := cmd.Flags().GetString("output") + if err != nil { + return fmt.Errorf("error getting output flag: %w", err) + } + format, err := cmd.Flags().GetString("format") + if err != nil { + return fmt.Errorf("error getting format flag: %w", err) + } + compression, err := cmd.Flags().GetString("compression") + if err != nil { + return fmt.Errorf("error getting compression flag: %w", err) + } + password, err := cmd.Flags().GetString("password") + if err != nil { + return fmt.Errorf("error getting password flag: %w", err) + } + collectIssues, err := cmd.Flags().GetBool("issues") + if err != nil { + return fmt.Errorf("error getting issues flag: %w", err) + } + collectPRs, err := cmd.Flags().GetBool("prs") + if err != nil { + return fmt.Errorf("error getting prs flag: %w", err) + } + collectCode, err := cmd.Flags().GetBool("code") + if err != nil { + return fmt.Errorf("error getting code flag: %w", err) + } if format != "datanode" && format != "tim" && format != "trix" { return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format) } - owner, err := parseGithubOwner(url) - if err != nil { - return err - } - - repos, err := GithubClient.GetPublicRepos(cmd.Context(), owner) - if err != nil { - return err - } - + allDataNodes := datanode.New() prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote) prompter.Start() defer prompter.Stop() - var progressWriter io.Writer - if prompter.IsInteractive() { - bar := ui.NewProgressBar(len(repos), "Cloning repositories") - progressWriter = ui.NewProgressWriter(bar) - } - - cloner := vcs.NewGitCloner() - allDataNodes := datanode.New() - - for _, repoURL := range repos { + if collectCode { + var progressWriter io.Writer + if prompter.IsInteractive() { + bar := ui.NewProgressBar(-1, "Cloning repository") + progressWriter = ui.NewProgressWriter(bar) + } + cloner := vcs.NewGitCloner() + repoURL := fmt.Sprintf("https://github.com/%s/%s.git", owner, repo) dn, err := cloner.CloneGitRepository(repoURL, progressWriter) if err != nil { - // Log the error and continue - fmt.Fprintln(cmd.ErrOrStderr(), "Error cloning repository:", err) - continue + return fmt.Errorf("error cloning repository: %w", err) } - // This is not an efficient way to merge datanodes, but it's the only way for now - // A better approach would be to add a Merge method to the DataNode - repoName := strings.TrimSuffix(repoURL, ".git") - parts := strings.Split(repoName, "/") - repoName = parts[len(parts)-1] + if mergeErr := mergeDataNodes(allDataNodes, dn, "code"); mergeErr != nil { + return fmt.Errorf("error merging code datanode: %w", mergeErr) + } + } - err = dn.Walk(".", func(path string, de fs.DirEntry, err error) error { - if err != nil { - return err - } - if !de.IsDir() { - err := func() error { - file, err := dn.Open(path) - if err != nil { - return err - } - defer file.Close() - data, err := io.ReadAll(file) - if err != nil { - return err - } - allDataNodes.AddData(repoName+"/"+path, data) - return nil - }() - if err != nil { - return err - } - } - return nil - }) + client := github.NewGithubClient() + if collectIssues { + dn, err := client.GetIssues(cmd.Context(), owner, repo) if err != nil { - fmt.Fprintln(cmd.ErrOrStderr(), "Error walking datanode:", err) - continue + return fmt.Errorf("error getting issues: %w", err) + } + if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil { + return fmt.Errorf("error merging issues datanode: %w", mergeErr) + } + } + + if collectPRs { + dn, err := client.GetPullRequests(cmd.Context(), owner, repo) + if err != nil { + return fmt.Errorf("error getting pull requests: %w", err) + } + if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil { + return fmt.Errorf("error merging pull requests datanode: %w", mergeErr) } } var data []byte if format == "tim" { - tim, err := tim.FromDataNode(allDataNodes) + t, err := tim.FromDataNode(allDataNodes) if err != nil { return fmt.Errorf("error creating tim: %w", err) } - data, err = tim.ToTar() + data, err = t.ToTar() if err != nil { return fmt.Errorf("error serializing tim: %w", err) } @@ -130,49 +137,67 @@ func NewAllCmd() *cobra.Command { return fmt.Errorf("error compressing data: %w", err) } + if outputFile == "" { + outputFile = fmt.Sprintf("%s-all.%s", repo, format) + if compression != "none" { + outputFile += "." + compression + } + } + err = os.WriteFile(outputFile, compressedData, 0644) if err != nil { return fmt.Errorf("error writing DataNode to file: %w", err) } - fmt.Fprintln(cmd.OutOrStdout(), "All repositories saved to", outputFile) - + fmt.Fprintln(cmd.OutOrStdout(), "All resources saved to", outputFile) return nil }, } - allCmd.PersistentFlags().String("output", "all.dat", "Output file for the DataNode") - allCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)") - allCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)") - allCmd.PersistentFlags().String("password", "", "Password for encryption") - return allCmd + cmd.Flags().String("output", "", "Output file for the DataNode") + cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)") + cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") + cmd.Flags().String("password", "", "Password for encryption") + cmd.Flags().Bool("issues", true, "Collect issues") + cmd.Flags().Bool("prs", true, "Collect pull requests") + cmd.Flags().Bool("code", true, "Collect code") + return cmd } -func GetAllCmd() *cobra.Command { - return allCmd +func GetGithubAllCmd() *cobra.Command { + return githubAllCmd } func init() { - RootCmd.AddCommand(GetAllCmd()) + collectGithubCmd.AddCommand(GetGithubAllCmd()) } -func parseGithubOwner(u string) (string, error) { - owner, _, err := github.ParseRepoFromURL(u) - if err == nil { - return owner, nil - } - - parsedURL, err := url.Parse(u) - if err != nil { - return "", fmt.Errorf("invalid URL: %w", err) - } - - path := strings.Trim(parsedURL.Path, "/") - if path == "" { - return "", fmt.Errorf("invalid owner URL: %s", u) - } - parts := strings.Split(path, "/") - if len(parts) != 1 || parts[0] == "" { - return "", fmt.Errorf("invalid owner URL: %s", u) - } - return parts[0], nil +func mergeDataNodes(dest *datanode.DataNode, src *datanode.DataNode, prefix string) error { + return src.Walk(".", func(path string, de fs.DirEntry, err error) error { + if err != nil { + return err + } + if !de.IsDir() { + err := func() error { + file, err := src.Open(path) + if err != nil { + return err + } + defer file.Close() + data, err := io.ReadAll(file) + if err != nil { + return err + } + destPath := path + if prefix != "" { + destPath = prefix + "/" + path + } + dest.AddData(destPath, data) + return nil + }() + if err != nil { + return err + } + } + return nil + }) } diff --git a/cmd/all_test.go b/cmd/all_test.go index 66b4af1..a6d28e3 100644 --- a/cmd/all_test.go +++ b/cmd/all_test.go @@ -42,7 +42,7 @@ func TestAllCmd_Good(t *testing.T) { }() rootCmd := NewRootCmd() - rootCmd.AddCommand(GetAllCmd()) + rootCmd.AddCommand(GetGithubAllCmd()) // Execute command out := filepath.Join(t.TempDir(), "out") @@ -75,7 +75,7 @@ func TestAllCmd_Bad(t *testing.T) { }() rootCmd := NewRootCmd() - rootCmd.AddCommand(GetAllCmd()) + rootCmd.AddCommand(GetGithubAllCmd()) // Execute command out := filepath.Join(t.TempDir(), "out") @@ -104,7 +104,7 @@ func TestAllCmd_Ugly(t *testing.T) { }() rootCmd := NewRootCmd() - rootCmd.AddCommand(GetAllCmd()) + rootCmd.AddCommand(GetGithubAllCmd()) // Execute command out := filepath.Join(t.TempDir(), "out") diff --git a/cmd/collect_github_issues.go b/cmd/collect_github_issues.go new file mode 100644 index 0000000..8c975a4 --- /dev/null +++ b/cmd/collect_github_issues.go @@ -0,0 +1,84 @@ +package cmd + +import ( + "fmt" + "os" + "strings" + + "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/github" + "github.com/Snider/Borg/pkg/ui" + "github.com/spf13/cobra" +) + +// NewCollectGithubIssuesCmd creates a new cobra command for collecting github issues. +func NewCollectGithubIssuesCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "issues ", + Short: "Collect issues from a GitHub repository", + Long: `Collect all issues from a GitHub repository and store them in a DataNode.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + repoPath := args[0] + parts := strings.Split(repoPath, "/") + if len(parts) != 2 { + return fmt.Errorf("invalid repository path: %s (must be in the format /)", repoPath) + } + owner, repo := parts[0], parts[1] + + outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") + + if format != "datanode" { + return fmt.Errorf("invalid format: %s (must be 'datanode')", format) + } + if compression != "none" && compression != "gz" && compression != "xz" { + return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression) + } + + prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote) + prompter.Start() + defer prompter.Stop() + + client := github.NewGithubClient() + dn, err := client.GetIssues(cmd.Context(), owner, repo) + if err != nil { + return fmt.Errorf("error getting issues: %w", err) + } + + data, err := dn.ToTar() + if err != nil { + return fmt.Errorf("error serializing DataNode: %w", err) + } + + compressedData, err := compress.Compress(data, compression) + if err != nil { + return fmt.Errorf("error compressing data: %w", err) + } + + if outputFile == "" { + outputFile = "issues." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) + if err != nil { + return fmt.Errorf("error writing DataNode to file: %w", err) + } + + fmt.Fprintln(cmd.OutOrStdout(), "Issues saved to", outputFile) + return nil + }, + } + cmd.Flags().String("output", "", "Output file for the DataNode") + cmd.Flags().String("format", "datanode", "Output format (datanode)") + cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") + return cmd +} + +func init() { + GetCollectGithubCmd().AddCommand(NewCollectGithubIssuesCmd()) +} diff --git a/cmd/collect_github_issues_test.go b/cmd/collect_github_issues_test.go new file mode 100644 index 0000000..6f39ab8 --- /dev/null +++ b/cmd/collect_github_issues_test.go @@ -0,0 +1,53 @@ +package cmd + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/Snider/Borg/pkg/github" + "github.com/stretchr/testify/assert" +) + +func TestCollectGithubIssuesCmd(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/owner/repo/issues" { + w.Header().Set("Content-Type", "application/json") + issues := []github.Issue{ + {Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"}, + } + json.NewEncoder(w).Encode(issues) + } else if r.URL.Path == "/repos/owner/repo/issues/1/comments" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("[]")) + } else { + http.NotFound(w, r) + } + })) + defer server.Close() + + originalNewAuthenticatedClient := github.NewAuthenticatedClient + github.NewAuthenticatedClient = func(ctx context.Context) *http.Client { + return server.Client() + } + defer func() { + github.NewAuthenticatedClient = originalNewAuthenticatedClient + }() + + cmd := NewCollectGithubIssuesCmd() + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{"owner/repo", "--output", "issues.dat"}) + err := cmd.Execute() + + assert.NoError(t, err) + + _, err = os.Stat("issues.dat") + assert.NoError(t, err) + os.Remove("issues.dat") +} diff --git a/cmd/collect_github_prs.go b/cmd/collect_github_prs.go new file mode 100644 index 0000000..16f687c --- /dev/null +++ b/cmd/collect_github_prs.go @@ -0,0 +1,84 @@ +package cmd + +import ( + "fmt" + "os" + "strings" + + "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/github" + "github.com/Snider/Borg/pkg/ui" + "github.com/spf13/cobra" +) + +// NewCollectGithubPrsCmd creates a new cobra command for collecting github pull requests. +func NewCollectGithubPrsCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "prs ", + Short: "Collect pull requests from a GitHub repository", + Long: `Collect all pull requests from a GitHub repository and store them in a DataNode.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + repoPath := args[0] + parts := strings.Split(repoPath, "/") + if len(parts) != 2 { + return fmt.Errorf("invalid repository path: %s (must be in the format /)", repoPath) + } + owner, repo := parts[0], parts[1] + + outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") + + if format != "datanode" { + return fmt.Errorf("invalid format: %s (must be 'datanode')", format) + } + if compression != "none" && compression != "gz" && compression != "xz" { + return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression) + } + + prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote) + prompter.Start() + defer prompter.Stop() + + client := github.NewGithubClient() + dn, err := client.GetPullRequests(cmd.Context(), owner, repo) + if err != nil { + return fmt.Errorf("error getting pull requests: %w", err) + } + + data, err := dn.ToTar() + if err != nil { + return fmt.Errorf("error serializing DataNode: %w", err) + } + + compressedData, err := compress.Compress(data, compression) + if err != nil { + return fmt.Errorf("error compressing data: %w", err) + } + + if outputFile == "" { + outputFile = "prs." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) + if err != nil { + return fmt.Errorf("error writing DataNode to file: %w", err) + } + + fmt.Fprintln(cmd.OutOrStdout(), "Pull requests saved to", outputFile) + return nil + }, + } + cmd.Flags().String("output", "", "Output file for the DataNode") + cmd.Flags().String("format", "datanode", "Output format (datanode)") + cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") + return cmd +} + +func init() { + GetCollectGithubCmd().AddCommand(NewCollectGithubPrsCmd()) +} diff --git a/cmd/collect_github_prs_test.go b/cmd/collect_github_prs_test.go new file mode 100644 index 0000000..2558b5f --- /dev/null +++ b/cmd/collect_github_prs_test.go @@ -0,0 +1,64 @@ +package cmd + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/Snider/Borg/pkg/github" + "github.com/stretchr/testify/assert" +) + +func TestCollectGithubPrsCmd(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/owner/repo/pulls" { + w.Header().Set("Content-Type", "application/json") + prs := []github.PullRequest{ + { + Number: 1, Title: "PR 1", + DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff", + Links: struct { + Comments struct{ Href string `json:"href"` } `json:"comments"` + ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"` + }{ + ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"}, + }, + }, + } + json.NewEncoder(w).Encode(prs) + } else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" { + w.Write([]byte("diff --git a/file b/file")) + } else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("[]")) + } else { + http.NotFound(w, r) + } + })) + defer server.Close() + + originalNewAuthenticatedClient := github.NewAuthenticatedClient + github.NewAuthenticatedClient = func(ctx context.Context) *http.Client { + return server.Client() + } + defer func() { + github.NewAuthenticatedClient = originalNewAuthenticatedClient + }() + + cmd := NewCollectGithubPrsCmd() + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{"owner/repo", "--output", "prs.dat"}) + err := cmd.Execute() + + assert.NoError(t, err) + + _, err = os.Stat("prs.dat") + assert.NoError(t, err) + os.Remove("prs.dat") +} diff --git a/pkg/github/github.go b/pkg/github/github.go index 2e2e832..5e2aeb7 100644 --- a/pkg/github/github.go +++ b/pkg/github/github.go @@ -8,6 +8,7 @@ import ( "os" "strings" + "github.com/Snider/Borg/pkg/datanode" "golang.org/x/oauth2" ) @@ -18,6 +19,8 @@ type Repo struct { // GithubClient is an interface for interacting with the Github API. type GithubClient interface { GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error) + GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error) + GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error) } // NewGithubClient creates a new GithubClient. @@ -25,7 +28,9 @@ func NewGithubClient() GithubClient { return &githubClient{} } -type githubClient struct{} +type githubClient struct { + apiURL string +} // NewAuthenticatedClient creates a new authenticated http client. var NewAuthenticatedClient = func(ctx context.Context) *http.Client { diff --git a/pkg/github/issue.go b/pkg/github/issue.go new file mode 100644 index 0000000..18569c6 --- /dev/null +++ b/pkg/github/issue.go @@ -0,0 +1,156 @@ +package github + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + "github.com/Snider/Borg/pkg/datanode" +) + +type Issue struct { + Number int `json:"number"` + Title string `json:"title"` + Body string `json:"body"` + State string `json:"state"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + User struct { + Login string `json:"login"` + } `json:"user"` + Labels []struct { + Name string `json:"name"` + } `json:"labels"` + CommentsURL string `json:"comments_url"` +} + +type Comment struct { + Body string `json:"body"` + CreatedAt time.Time `json:"created_at"` + User struct { + Login string `json:"login"` + } `json:"user"` +} + +func (g *githubClient) GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error) { + dn := datanode.New() + client := NewAuthenticatedClient(ctx) + apiURL := "https://api.github.com" + if g.apiURL != "" { + apiURL = g.apiURL + } + url := fmt.Sprintf("%s/repos/%s/%s/issues", apiURL, owner, repo) + + var allIssues []Issue + for url != "" { + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Borg-Data-Collector") + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("failed to fetch issues: %s", resp.Status) + } + + var issues []Issue + if err := json.NewDecoder(resp.Body).Decode(&issues); err != nil { + return nil, err + } + + allIssues = append(allIssues, issues...) + + linkHeader := resp.Header.Get("Link") + url = g.findNextURL(linkHeader) + } + + for _, issue := range allIssues { + var markdown strings.Builder + markdown.WriteString(fmt.Sprintf("# Issue %d: %s\n\n", issue.Number, issue.Title)) + markdown.WriteString(fmt.Sprintf("**Author**: %s\n", issue.User.Login)) + markdown.WriteString(fmt.Sprintf("**State**: %s\n", issue.State)) + markdown.WriteString(fmt.Sprintf("**Created**: %s\n", issue.CreatedAt.Format(time.RFC1123))) + markdown.WriteString(fmt.Sprintf("**Updated**: %s\n\n", issue.UpdatedAt.Format(time.RFC1123))) + + if len(issue.Labels) > 0 { + markdown.WriteString("**Labels**:\n") + for _, label := range issue.Labels { + markdown.WriteString(fmt.Sprintf("- %s\n", label.Name)) + } + markdown.WriteString("\n") + } + + markdown.WriteString("## Body\n\n") + markdown.WriteString(issue.Body) + markdown.WriteString("\n\n") + + // Fetch comments + comments, err := g.getComments(ctx, issue.CommentsURL) + if err != nil { + return nil, err + } + + if len(comments) > 0 { + markdown.WriteString("## Comments\n\n") + for _, comment := range comments { + markdown.WriteString(fmt.Sprintf("**%s** commented on %s:\n\n", comment.User.Login, comment.CreatedAt.Format(time.RFC1123))) + markdown.WriteString(comment.Body) + markdown.WriteString("\n\n---\n\n") + } + } + + filename := fmt.Sprintf("issues/%d.md", issue.Number) + dn.AddData(filename, []byte(markdown.String())) + } + + // Add an index file + index, err := json.MarshalIndent(allIssues, "", " ") + if err != nil { + return nil, err + } + dn.AddData("issues/INDEX.json", index) + + return dn, nil +} + +func (g *githubClient) getComments(ctx context.Context, url string) ([]Comment, error) { + client := NewAuthenticatedClient(ctx) + var allComments []Comment + + for url != "" { + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Borg-Data-Collector") + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("failed to fetch comments: %s", resp.Status) + } + + var comments []Comment + if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil { + return nil, err + } + + allComments = append(allComments, comments...) + + linkHeader := resp.Header.Get("Link") + url = g.findNextURL(linkHeader) + } + + return allComments, nil +} diff --git a/pkg/github/issue_test.go b/pkg/github/issue_test.go new file mode 100644 index 0000000..e3c2f2f --- /dev/null +++ b/pkg/github/issue_test.go @@ -0,0 +1,67 @@ +package github + +import ( + "context" + "encoding/json" + "io/fs" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetIssues(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/owner/repo/issues" { + w.Header().Set("Content-Type", "application/json") + issues := []Issue{ + {Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"}, + {Number: 2, Title: "Issue 2", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/2/comments"}, + } + json.NewEncoder(w).Encode(issues) + } else if r.URL.Path == "/repos/owner/repo/issues/1/comments" { + w.Header().Set("Content-Type", "application/json") + comments := []Comment{ + {Body: "Comment 1"}, + } + json.NewEncoder(w).Encode(comments) + } else if r.URL.Path == "/repos/owner/repo/issues/2/comments" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("[]")) + } else { + http.NotFound(w, r) + } + })) + defer server.Close() + + originalNewAuthenticatedClient := NewAuthenticatedClient + NewAuthenticatedClient = func(ctx context.Context) *http.Client { + return server.Client() + } + defer func() { + NewAuthenticatedClient = originalNewAuthenticatedClient + }() + + client := &githubClient{apiURL: server.URL} + dn, err := client.GetIssues(context.Background(), "owner", "repo") + + assert.NoError(t, err) + assert.NotNil(t, dn) + + expectedFiles := []string{ + "issues/1.md", + "issues/2.md", + "issues/INDEX.json", + } + + actualFiles := []string{} + dn.Walk(".", func(path string, de fs.DirEntry, err error) error { + if !de.IsDir() { + actualFiles = append(actualFiles, path) + } + return nil + }) + + assert.ElementsMatch(t, expectedFiles, actualFiles) +} diff --git a/pkg/github/pull_request.go b/pkg/github/pull_request.go new file mode 100644 index 0000000..5cef326 --- /dev/null +++ b/pkg/github/pull_request.go @@ -0,0 +1,201 @@ +package github + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/Snider/Borg/pkg/datanode" +) + +type PullRequest struct { + Number int `json:"number"` + Title string `json:"title"` + Body string `json:"body"` + State string `json:"state"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + MergedAt time.Time `json:"merged_at"` + User struct { + Login string `json:"login"` + } `json:"user"` + Labels []struct { + Name string `json:"name"` + } `json:"labels"` + Links struct { + Comments struct { + Href string `json:"href"` + } `json:"comments"` + ReviewComments struct { + Href string `json:"href"` + } `json:"review_comments"` + } `json:"_links"` + DiffURL string `json:"diff_url"` +} + +type ReviewComment struct { + Body string `json:"body"` + Path string `json:"path"` + CreatedAt time.Time `json:"created_at"` + User struct { + Login string `json:"login"` + } `json:"user"` +} + +func (g *githubClient) GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error) { + dn := datanode.New() + client := NewAuthenticatedClient(ctx) + apiURL := "https://api.github.com" + if g.apiURL != "" { + apiURL = g.apiURL + } + // Get both open and closed pull requests + url := fmt.Sprintf("%s/repos/%s/%s/pulls?state=all", apiURL, owner, repo) + + var allPRs []PullRequest + for url != "" { + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Borg-Data-Collector") + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("failed to fetch pull requests: %s", resp.Status) + } + + var prs []PullRequest + if err := json.NewDecoder(resp.Body).Decode(&prs); err != nil { + return nil, err + } + + allPRs = append(allPRs, prs...) + + linkHeader := resp.Header.Get("Link") + url = g.findNextURL(linkHeader) + } + + for _, pr := range allPRs { + var markdown strings.Builder + markdown.WriteString(fmt.Sprintf("# PR %d: %s\n\n", pr.Number, pr.Title)) + markdown.WriteString(fmt.Sprintf("**Author**: %s\n", pr.User.Login)) + markdown.WriteString(fmt.Sprintf("**State**: %s\n", pr.State)) + markdown.WriteString(fmt.Sprintf("**Created**: %s\n", pr.CreatedAt.Format(time.RFC1123))) + markdown.WriteString(fmt.Sprintf("**Updated**: %s\n", pr.UpdatedAt.Format(time.RFC1123))) + if !pr.MergedAt.IsZero() { + markdown.WriteString(fmt.Sprintf("**Merged**: %s\n", pr.MergedAt.Format(time.RFC1123))) + } + markdown.WriteString(fmt.Sprintf("\n**[View Diff](%s)**\n\n", pr.DiffURL)) + + if len(pr.Labels) > 0 { + markdown.WriteString("**Labels**:\n") + for _, label := range pr.Labels { + markdown.WriteString(fmt.Sprintf("- %s\n", label.Name)) + } + markdown.WriteString("\n") + } + + markdown.WriteString("## Body\n\n") + markdown.WriteString(pr.Body) + markdown.WriteString("\n\n") + + // Fetch diff + diff, err := g.getDiff(ctx, pr.DiffURL) + if err != nil { + return nil, fmt.Errorf("failed to get diff for PR #%d: %w", pr.Number, err) + } + dn.AddData(fmt.Sprintf("pulls/%d.diff", pr.Number), diff) + + // Fetch review comments + reviewComments, err := g.getReviewComments(ctx, pr.Links.ReviewComments.Href) + if err != nil { + return nil, err + } + + if len(reviewComments) > 0 { + markdown.WriteString("## Review Comments\n\n") + for _, comment := range reviewComments { + markdown.WriteString(fmt.Sprintf("**%s** commented on `%s` at %s:\n\n", comment.User.Login, comment.Path, comment.CreatedAt.Format(time.RFC1123))) + markdown.WriteString(comment.Body) + markdown.WriteString("\n\n---\n\n") + } + } + + filename := fmt.Sprintf("pulls/%d.md", pr.Number) + dn.AddData(filename, []byte(markdown.String())) + } + + // Add an index file + index, err := json.MarshalIndent(allPRs, "", " ") + if err != nil { + return nil, err + } + dn.AddData("pulls/INDEX.json", index) + + return dn, nil +} + +func (g *githubClient) getReviewComments(ctx context.Context, url string) ([]ReviewComment, error) { + client := NewAuthenticatedClient(ctx) + var allComments []ReviewComment + + for url != "" { + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Borg-Data-Collector") + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return nil, fmt.Errorf("failed to fetch review comments: %s", resp.Status) + } + + var comments []ReviewComment + if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil { + return nil, err + } + + allComments = append(allComments, comments...) + + linkHeader := resp.Header.Get("Link") + url = g.findNextURL(linkHeader) + } + + return allComments, nil +} + +func (g *githubClient) getDiff(ctx context.Context, url string) ([]byte, error) { + client := NewAuthenticatedClient(ctx) + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Borg-Data-Collector") + req.Header.Set("Accept", "application/vnd.github.v3.diff") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch diff: %s", resp.Status) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/github/pull_request_test.go b/pkg/github/pull_request_test.go new file mode 100644 index 0000000..806b547 --- /dev/null +++ b/pkg/github/pull_request_test.go @@ -0,0 +1,91 @@ +package github + +import ( + "context" + "encoding/json" + "io/fs" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetPullRequests(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/owner/repo/pulls" { + w.Header().Set("Content-Type", "application/json") + prs := []PullRequest{ + { + Number: 1, Title: "PR 1", + DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff", + Links: struct { + Comments struct{ Href string `json:"href"` } `json:"comments"` + ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"` + }{ + ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"}, + }, + }, + { + Number: 2, Title: "PR 2", + DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/2.diff", + Links: struct { + Comments struct{ Href string `json:"href"` } `json:"comments"` + ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"` + }{ + ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/2/comments"}, + }, + }, + } + json.NewEncoder(w).Encode(prs) + } else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" { + w.Write([]byte("diff --git a/file b/file")) + } else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" { + w.Header().Set("Content-Type", "application/json") + comments := []ReviewComment{ + {Body: "Review Comment 1"}, + } + json.NewEncoder(w).Encode(comments) + } else if r.URL.Path == "/repos/owner/repo/pulls/2.diff" { + w.Write([]byte("diff --git a/file2 b/file2")) + } else if r.URL.Path == "/repos/owner/repo/pulls/2/comments" { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("[]")) + } else { + http.NotFound(w, r) + } + })) + defer server.Close() + + originalNewAuthenticatedClient := NewAuthenticatedClient + NewAuthenticatedClient = func(ctx context.Context) *http.Client { + return server.Client() + } + defer func() { + NewAuthenticatedClient = originalNewAuthenticatedClient + }() + + client := &githubClient{apiURL: server.URL} + dn, err := client.GetPullRequests(context.Background(), "owner", "repo") + + assert.NoError(t, err) + assert.NotNil(t, dn) + + expectedFiles := []string{ + "pulls/1.md", + "pulls/1.diff", + "pulls/2.md", + "pulls/2.diff", + "pulls/INDEX.json", + } + + actualFiles := []string{} + dn.Walk(".", func(path string, de fs.DirEntry, err error) error { + if !de.IsDir() { + actualFiles = append(actualFiles, path) + } + return nil + }) + + assert.ElementsMatch(t, expectedFiles, actualFiles) +}