cli/pkg/collect/collect.go

104 lines
2.8 KiB
Go
Raw Normal View History

feat: infrastructure packages and lint cleanup (#281) * ci: consolidate duplicate workflows and merge CodeQL configs Remove 17 duplicate workflow files that were split copies of the combined originals. Each family (CI, CodeQL, Coverage, PR Build, Alpha Release) had the same job duplicated across separate push/pull_request/schedule/manual trigger files. Merge codeql.yml and codescan.yml into a single codeql.yml with a language matrix covering go, javascript-typescript, python, and actions — matching the previous default setup coverage. Remaining workflows (one per family): - ci.yml (push + PR + manual) - codeql.yml (push + PR + schedule, all languages) - coverage.yml (push + PR + manual) - alpha-release.yml (push + manual) - pr-build.yml (PR + manual) - release.yml (tag push) - agent-verify.yml, auto-label.yml, auto-project.yml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat: add collect, config, crypt, plugin packages and fix all lint issues Add four new infrastructure packages with CLI commands: - pkg/config: layered configuration (defaults → file → env → flags) - pkg/crypt: crypto primitives (Argon2id, AES-GCM, ChaCha20, HMAC, checksums) - pkg/plugin: plugin system with GitHub-based install/update/remove - pkg/collect: collection subsystem (GitHub, BitcoinTalk, market, papers, excavate) Fix all golangci-lint issues across the entire codebase (~100 errcheck, staticcheck SA1012/SA1019/ST1005, unused, ineffassign fixes) so that `core go qa` passes with 0 issues. Closes #167, #168, #170, #250, #251, #252, #253, #254, #255, #256 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 11:34:43 +00:00
// Package collect provides a data collection subsystem for gathering information
// from multiple sources including GitHub, BitcoinTalk, CoinGecko, and academic
// paper repositories. It supports rate limiting, incremental state tracking,
// and event-driven progress reporting.
package collect
import (
"context"
"path/filepath"
"forge.lthn.ai/core/cli/pkg/io"
feat: infrastructure packages and lint cleanup (#281) * ci: consolidate duplicate workflows and merge CodeQL configs Remove 17 duplicate workflow files that were split copies of the combined originals. Each family (CI, CodeQL, Coverage, PR Build, Alpha Release) had the same job duplicated across separate push/pull_request/schedule/manual trigger files. Merge codeql.yml and codescan.yml into a single codeql.yml with a language matrix covering go, javascript-typescript, python, and actions — matching the previous default setup coverage. Remaining workflows (one per family): - ci.yml (push + PR + manual) - codeql.yml (push + PR + schedule, all languages) - coverage.yml (push + PR + manual) - alpha-release.yml (push + manual) - pr-build.yml (PR + manual) - release.yml (tag push) - agent-verify.yml, auto-label.yml, auto-project.yml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat: add collect, config, crypt, plugin packages and fix all lint issues Add four new infrastructure packages with CLI commands: - pkg/config: layered configuration (defaults → file → env → flags) - pkg/crypt: crypto primitives (Argon2id, AES-GCM, ChaCha20, HMAC, checksums) - pkg/plugin: plugin system with GitHub-based install/update/remove - pkg/collect: collection subsystem (GitHub, BitcoinTalk, market, papers, excavate) Fix all golangci-lint issues across the entire codebase (~100 errcheck, staticcheck SA1012/SA1019/ST1005, unused, ineffassign fixes) so that `core go qa` passes with 0 issues. Closes #167, #168, #170, #250, #251, #252, #253, #254, #255, #256 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 11:34:43 +00:00
)
// Collector is the interface all collection sources implement.
type Collector interface {
// Name returns a human-readable name for this collector.
Name() string
// Collect gathers data from the source and writes it to the configured output.
Collect(ctx context.Context, cfg *Config) (*Result, error)
}
// Config holds shared configuration for all collectors.
type Config struct {
// Output is the storage medium for writing collected data.
Output io.Medium
// OutputDir is the base directory for all collected data.
OutputDir string
// Limiter provides per-source rate limiting.
Limiter *RateLimiter
// State tracks collection progress for incremental runs.
State *State
// Dispatcher manages event dispatch for progress reporting.
Dispatcher *Dispatcher
// Verbose enables detailed logging output.
Verbose bool
// DryRun simulates collection without writing files.
DryRun bool
}
// Result holds the output of a collection run.
type Result struct {
// Source identifies which collector produced this result.
Source string
// Items is the number of items successfully collected.
Items int
// Errors is the number of errors encountered during collection.
Errors int
// Skipped is the number of items skipped (e.g. already collected).
Skipped int
// Files lists the paths of all files written.
Files []string
}
// NewConfig creates a Config with sensible defaults.
// It initialises a MockMedium for output if none is provided,
// sets up a rate limiter, state tracker, and event dispatcher.
func NewConfig(outputDir string) *Config {
m := io.NewMockMedium()
return &Config{
Output: m,
OutputDir: outputDir,
Limiter: NewRateLimiter(),
State: NewState(m, filepath.Join(outputDir, ".collect-state.json")),
Dispatcher: NewDispatcher(),
}
}
// NewConfigWithMedium creates a Config using the specified storage medium.
func NewConfigWithMedium(m io.Medium, outputDir string) *Config {
return &Config{
Output: m,
OutputDir: outputDir,
Limiter: NewRateLimiter(),
State: NewState(m, filepath.Join(outputDir, ".collect-state.json")),
Dispatcher: NewDispatcher(),
}
}
// MergeResults combines multiple results into a single aggregated result.
func MergeResults(source string, results ...*Result) *Result {
merged := &Result{Source: source}
for _, r := range results {
if r == nil {
continue
}
merged.Items += r.Items
merged.Errors += r.Errors
merged.Skipped += r.Skipped
merged.Files = append(merged.Files, r.Files...)
}
return merged
}