diff --git a/collect/bitcointalk.go b/collect/bitcointalk.go index c8b3fec..9cc8478 100644 --- a/collect/bitcointalk.go +++ b/collect/bitcointalk.go @@ -3,6 +3,7 @@ package collect import ( "context" "fmt" + "iter" "net/http" "path/filepath" "strings" @@ -172,26 +173,38 @@ func (b *BitcoinTalkCollector) fetchPage(ctx context.Context, pageURL string) ([ // It looks for the common BitcoinTalk post structure using div.post elements. func extractPosts(doc *html.Node) []btPost { var posts []btPost - var walk func(*html.Node) + for p := range extractPostsIter(doc) { + posts = append(posts, p) + } + return posts +} - walk = func(n *html.Node) { - if n.Type == html.ElementNode && n.Data == "div" { - for _, attr := range n.Attr { - if attr.Key == "class" && strings.Contains(attr.Val, "post") { - post := parsePost(n) - if post.Content != "" { - posts = append(posts, post) +// extractPostsIter returns an iterator over post data extracted from a parsed HTML document. +func extractPostsIter(doc *html.Node) iter.Seq[btPost] { + return func(yield func(btPost) bool) { + var walk func(*html.Node) bool + walk = func(n *html.Node) bool { + if n.Type == html.ElementNode && n.Data == "div" { + for _, attr := range n.Attr { + if attr.Key == "class" && strings.Contains(attr.Val, "post") { + post := parsePost(n) + if post.Content != "" { + if !yield(post) { + return false + } + } } } } + for c := n.FirstChild; c != nil; c = c.NextSibling { + if !walk(c) { + return false + } + } + return true } - for c := n.FirstChild; c != nil; c = c.NextSibling { - walk(c) - } + walk(doc) } - - walk(doc) - return posts } // parsePost extracts author, date, and content from a post div. diff --git a/collect/excavate_test.go b/collect/excavate_test.go index 2643551..5709399 100644 --- a/collect/excavate_test.go +++ b/collect/excavate_test.go @@ -26,7 +26,7 @@ func (m *mockCollector) Collect(ctx context.Context, cfg *Config) (*Result, erro } result := &Result{Source: m.name, Items: m.items} - for i := 0; i < m.items; i++ { + for i := range m.items { result.Files = append(result.Files, fmt.Sprintf("/output/%s/%d.md", m.name, i)) } diff --git a/collect/papers.go b/collect/papers.go index 9c2a3fc..2ae06fc 100644 --- a/collect/papers.go +++ b/collect/papers.go @@ -4,6 +4,7 @@ import ( "context" "encoding/xml" "fmt" + "iter" "net/http" "net/url" "path/filepath" @@ -289,26 +290,38 @@ func arxivEntryToPaper(entry arxivEntry) paper { // extractIACRPapers extracts paper metadata from an IACR search results page. func extractIACRPapers(doc *html.Node) []paper { var papers []paper - var walk func(*html.Node) + for p := range extractIACRPapersIter(doc) { + papers = append(papers, p) + } + return papers +} - walk = func(n *html.Node) { - if n.Type == html.ElementNode && n.Data == "div" { - for _, attr := range n.Attr { - if attr.Key == "class" && strings.Contains(attr.Val, "paperentry") { - ppr := parseIACREntry(n) - if ppr.Title != "" { - papers = append(papers, ppr) +// extractIACRPapersIter returns an iterator over paper metadata extracted from an IACR search results page. +func extractIACRPapersIter(doc *html.Node) iter.Seq[paper] { + return func(yield func(paper) bool) { + var walk func(*html.Node) bool + walk = func(n *html.Node) bool { + if n.Type == html.ElementNode && n.Data == "div" { + for _, attr := range n.Attr { + if attr.Key == "class" && strings.Contains(attr.Val, "paperentry") { + ppr := parseIACREntry(n) + if ppr.Title != "" { + if !yield(ppr) { + return false + } + } } } } + for c := n.FirstChild; c != nil; c = c.NextSibling { + if !walk(c) { + return false + } + } + return true } - for c := n.FirstChild; c != nil; c = c.NextSibling { - walk(c) - } + walk(doc) } - - walk(doc) - return papers } // parseIACREntry extracts paper data from an IACR paper entry div. diff --git a/collect/process.go b/collect/process.go index b907bd9..a5147f8 100644 --- a/collect/process.go +++ b/collect/process.go @@ -4,8 +4,9 @@ import ( "context" "encoding/json" "fmt" + "maps" "path/filepath" - "sort" + "slices" "strings" core "forge.lthn.ai/core/go/pkg/framework/core" @@ -301,12 +302,7 @@ func jsonToMarkdown(content string) (string, error) { func jsonValueToMarkdown(b *strings.Builder, data any, depth int) { switch v := data.(type) { case map[string]any: - keys := make([]string, 0, len(v)) - for key := range v { - keys = append(keys, key) - } - sort.Strings(keys) - for _, key := range keys { + for _, key := range slices.Sorted(maps.Keys(v)) { val := v[key] indent := strings.Repeat(" ", depth) switch child := val.(type) { diff --git a/forge/issues.go b/forge/issues.go index 28a4c7e..6c0a1b5 100644 --- a/forge/issues.go +++ b/forge/issues.go @@ -1,6 +1,8 @@ package forge import ( + "iter" + forgejo "codeberg.org/mvdkleijn/forgejo-sdk/forgejo/v2" "forge.lthn.ai/core/go/pkg/log" @@ -123,6 +125,40 @@ func (c *Client) ListPullRequests(owner, repo string, state string) ([]*forgejo. return all, nil } +// ListPullRequestsIter returns an iterator over pull requests for the given repository. +func (c *Client) ListPullRequestsIter(owner, repo string, state string) iter.Seq2[*forgejo.PullRequest, error] { + st := forgejo.StateOpen + switch state { + case "closed": + st = forgejo.StateClosed + case "all": + st = forgejo.StateAll + } + + return func(yield func(*forgejo.PullRequest, error) bool) { + page := 1 + for { + prs, resp, err := c.api.ListRepoPullRequests(owner, repo, forgejo.ListPullRequestsOptions{ + ListOptions: forgejo.ListOptions{Page: page, PageSize: 50}, + State: st, + }) + if err != nil { + yield(nil, log.E("forge.ListPullRequests", "failed to list pull requests", err)) + return + } + for _, pr := range prs { + if !yield(pr, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // GetPullRequest returns a single pull request by number. func (c *Client) GetPullRequest(owner, repo string, number int64) (*forgejo.PullRequest, error) { pr, _, err := c.api.GetPullRequest(owner, repo, number) diff --git a/forge/repos.go b/forge/repos.go index 504d5db..abe29d9 100644 --- a/forge/repos.go +++ b/forge/repos.go @@ -1,6 +1,8 @@ package forge import ( + "iter" + forgejo "codeberg.org/mvdkleijn/forgejo-sdk/forgejo/v2" "forge.lthn.ai/core/go/pkg/log" @@ -30,6 +32,31 @@ func (c *Client) ListOrgRepos(org string) ([]*forgejo.Repository, error) { return all, nil } +// ListOrgReposIter returns an iterator over repositories for the given organisation. +func (c *Client) ListOrgReposIter(org string) iter.Seq2[*forgejo.Repository, error] { + return func(yield func(*forgejo.Repository, error) bool) { + page := 1 + for { + repos, resp, err := c.api.ListOrgRepos(org, forgejo.ListOrgReposOptions{ + ListOptions: forgejo.ListOptions{Page: page, PageSize: 50}, + }) + if err != nil { + yield(nil, log.E("forge.ListOrgRepos", "failed to list org repos", err)) + return + } + for _, repo := range repos { + if !yield(repo, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // ListUserRepos returns all repositories for the authenticated user. func (c *Client) ListUserRepos() ([]*forgejo.Repository, error) { var all []*forgejo.Repository @@ -54,6 +81,31 @@ func (c *Client) ListUserRepos() ([]*forgejo.Repository, error) { return all, nil } +// ListUserReposIter returns an iterator over repositories for the authenticated user. +func (c *Client) ListUserReposIter() iter.Seq2[*forgejo.Repository, error] { + return func(yield func(*forgejo.Repository, error) bool) { + page := 1 + for { + repos, resp, err := c.api.ListMyRepos(forgejo.ListReposOptions{ + ListOptions: forgejo.ListOptions{Page: page, PageSize: 50}, + }) + if err != nil { + yield(nil, log.E("forge.ListUserRepos", "failed to list user repos", err)) + return + } + for _, repo := range repos { + if !yield(repo, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // GetRepo returns a single repository by owner and name. func (c *Client) GetRepo(owner, name string) (*forgejo.Repository, error) { repo, _, err := c.api.GetRepo(owner, name) diff --git a/git/git.go b/git/git.go index 67665cc..53ded5f 100644 --- a/git/git.go +++ b/git/git.go @@ -5,8 +5,10 @@ import ( "bytes" "context" "io" + "iter" "os" "os/exec" + "slices" "strconv" "strings" "sync" @@ -69,6 +71,18 @@ func Status(ctx context.Context, opts StatusOptions) []RepoStatus { return results } +// StatusIter returns an iterator over git status for multiple repositories. +func StatusIter(ctx context.Context, opts StatusOptions) iter.Seq[RepoStatus] { + return func(yield func(RepoStatus) bool) { + results := Status(ctx, opts) + for _, r := range results { + if !yield(r) { + return + } + } + } +} + // getStatus gets the git status for a single repository. func getStatus(ctx context.Context, path, name string) RepoStatus { status := RepoStatus{ @@ -197,30 +211,35 @@ type PushResult struct { // PushMultiple pushes multiple repositories sequentially. // Sequential because SSH passphrase prompts need user interaction. func PushMultiple(ctx context.Context, paths []string, names map[string]string) []PushResult { - results := make([]PushResult, len(paths)) + return slices.Collect(PushMultipleIter(ctx, paths, names)) +} - for i, path := range paths { - name := names[path] - if name == "" { - name = path +// PushMultipleIter returns an iterator that pushes repositories sequentially and yields results. +func PushMultipleIter(ctx context.Context, paths []string, names map[string]string) iter.Seq[PushResult] { + return func(yield func(PushResult) bool) { + for _, path := range paths { + name := names[path] + if name == "" { + name = path + } + + result := PushResult{ + Name: name, + Path: path, + } + + err := Push(ctx, path) + if err != nil { + result.Error = err + } else { + result.Success = true + } + + if !yield(result) { + return + } } - - result := PushResult{ - Name: name, - Path: path, - } - - err := Push(ctx, path) - if err != nil { - result.Error = err - } else { - result.Success = true - } - - results[i] = result } - - return results } // gitCommand runs a git command and returns stdout. diff --git a/git/service.go b/git/service.go index 892d6fc..d7591f5 100644 --- a/git/service.go +++ b/git/service.go @@ -2,6 +2,8 @@ package git import ( "context" + "iter" + "slices" "forge.lthn.ai/core/go/pkg/framework" ) @@ -103,6 +105,11 @@ func (s *Service) handleTask(c *framework.Core, t framework.Task) (any, bool, er // Status returns last status result. func (s *Service) Status() []RepoStatus { return s.lastStatus } +// StatusIter returns an iterator over last status result. +func (s *Service) StatusIter() iter.Seq[RepoStatus] { + return slices.Values(s.lastStatus) +} + // DirtyRepos returns repos with uncommitted changes. func (s *Service) DirtyRepos() []RepoStatus { var dirty []RepoStatus @@ -114,6 +121,19 @@ func (s *Service) DirtyRepos() []RepoStatus { return dirty } +// DirtyReposIter returns an iterator over repos with uncommitted changes. +func (s *Service) DirtyReposIter() iter.Seq[RepoStatus] { + return func(yield func(RepoStatus) bool) { + for _, st := range s.lastStatus { + if st.Error == nil && st.IsDirty() { + if !yield(st) { + return + } + } + } + } +} + // AheadRepos returns repos with unpushed commits. func (s *Service) AheadRepos() []RepoStatus { var ahead []RepoStatus @@ -124,3 +144,16 @@ func (s *Service) AheadRepos() []RepoStatus { } return ahead } + +// AheadReposIter returns an iterator over repos with unpushed commits. +func (s *Service) AheadReposIter() iter.Seq[RepoStatus] { + return func(yield func(RepoStatus) bool) { + for _, st := range s.lastStatus { + if st.Error == nil && st.HasUnpushed() { + if !yield(st) { + return + } + } + } + } +} diff --git a/gitea/issues.go b/gitea/issues.go index 3f0d788..e2943a0 100644 --- a/gitea/issues.go +++ b/gitea/issues.go @@ -1,6 +1,8 @@ package gitea import ( + "iter" + "code.gitea.io/sdk/gitea" "forge.lthn.ai/core/go/pkg/log" @@ -98,6 +100,40 @@ func (c *Client) ListPullRequests(owner, repo string, state string) ([]*gitea.Pu return all, nil } +// ListPullRequestsIter returns an iterator over pull requests for the given repository. +func (c *Client) ListPullRequestsIter(owner, repo string, state string) iter.Seq2[*gitea.PullRequest, error] { + st := gitea.StateOpen + switch state { + case "closed": + st = gitea.StateClosed + case "all": + st = gitea.StateAll + } + + return func(yield func(*gitea.PullRequest, error) bool) { + page := 1 + for { + prs, resp, err := c.api.ListRepoPullRequests(owner, repo, gitea.ListPullRequestsOptions{ + ListOptions: gitea.ListOptions{Page: page, PageSize: 50}, + State: st, + }) + if err != nil { + yield(nil, log.E("gitea.ListPullRequests", "failed to list pull requests", err)) + return + } + for _, pr := range prs { + if !yield(pr, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // GetPullRequest returns a single pull request by number. func (c *Client) GetPullRequest(owner, repo string, number int64) (*gitea.PullRequest, error) { pr, _, err := c.api.GetPullRequest(owner, repo, number) diff --git a/gitea/repos.go b/gitea/repos.go index e7380c3..416ae92 100644 --- a/gitea/repos.go +++ b/gitea/repos.go @@ -1,6 +1,8 @@ package gitea import ( + "iter" + "code.gitea.io/sdk/gitea" "forge.lthn.ai/core/go/pkg/log" @@ -30,6 +32,31 @@ func (c *Client) ListOrgRepos(org string) ([]*gitea.Repository, error) { return all, nil } +// ListOrgReposIter returns an iterator over repositories for the given organisation. +func (c *Client) ListOrgReposIter(org string) iter.Seq2[*gitea.Repository, error] { + return func(yield func(*gitea.Repository, error) bool) { + page := 1 + for { + repos, resp, err := c.api.ListOrgRepos(org, gitea.ListOrgReposOptions{ + ListOptions: gitea.ListOptions{Page: page, PageSize: 50}, + }) + if err != nil { + yield(nil, log.E("gitea.ListOrgRepos", "failed to list org repos", err)) + return + } + for _, repo := range repos { + if !yield(repo, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // ListUserRepos returns all repositories for the authenticated user. func (c *Client) ListUserRepos() ([]*gitea.Repository, error) { var all []*gitea.Repository @@ -54,6 +81,31 @@ func (c *Client) ListUserRepos() ([]*gitea.Repository, error) { return all, nil } +// ListUserReposIter returns an iterator over repositories for the authenticated user. +func (c *Client) ListUserReposIter() iter.Seq2[*gitea.Repository, error] { + return func(yield func(*gitea.Repository, error) bool) { + page := 1 + for { + repos, resp, err := c.api.ListMyRepos(gitea.ListReposOptions{ + ListOptions: gitea.ListOptions{Page: page, PageSize: 50}, + }) + if err != nil { + yield(nil, log.E("gitea.ListUserRepos", "failed to list user repos", err)) + return + } + for _, repo := range repos { + if !yield(repo, nil) { + return + } + } + if resp == nil || page >= resp.LastPage { + break + } + page++ + } + } +} + // GetRepo returns a single repository by owner and name. func (c *Client) GetRepo(owner, name string) (*gitea.Repository, error) { repo, _, err := c.api.GetRepo(owner, name) diff --git a/go.sum b/go.sum index b6bbe43..5300fb5 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,11 @@ code.gitea.io/sdk/gitea v0.23.2/go.mod h1:yyF5+GhljqvA30sRDreoyHILruNiy4ASufugzY codeberg.org/mvdkleijn/forgejo-sdk/forgejo/v2 v2.2.0 h1:HTCWpzyWQOHDWt3LzI6/d2jvUDsw/vgGRWm/8BTvcqI= codeberg.org/mvdkleijn/forgejo-sdk/forgejo/v2 v2.2.0/go.mod h1:ZglEEDj+qkxYUb+SQIeqGtFxQrbaMYqIOgahNKb7uxs= forge.lthn.ai/core/cli v0.0.1 h1:nqpc4Tv8a4H/ERei+/71DVQxkCFU8HPFJP4120qPXgk= +forge.lthn.ai/core/cli v0.0.1/go.mod h1:xa3Nqw3sUtYYJ1k+1jYul18tgs6sBevCUsGsHJI1hHA= forge.lthn.ai/core/go v0.0.1 h1:ubk4nmkA3treOUNgPS28wKd1jB6cUlEQUV7jDdGa3zM= +forge.lthn.ai/core/go v0.0.1/go.mod h1:59YsnuMaAGQUxIhX68oK2/HnhQJEPWL1iEZhDTrNCbY= forge.lthn.ai/core/go-crypt v0.0.1 h1:fmFc2SJ/VOXDRjkcYoLWfL7lI4HfPJeVS/Na6zHHcvw= +forge.lthn.ai/core/go-crypt v0.0.1/go.mod h1:/j/rUN2ZMV7x1B5BYxH3QdwkgZg0HNBw5XuyFZeyxBY= github.com/42wim/httpsig v1.2.3 h1:xb0YyWhkYj57SPtfSttIobJUPJZB9as1nsfo7KWVcEs= github.com/42wim/httpsig v1.2.3/go.mod h1:nZq9OlYKDrUBhptd77IHx4/sZZD+IxTBADvAPI9G/EM= github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=