package collect import ( "context" "encoding/json" "net/http" "net/http/httptest" "testing" "time" "forge.lthn.ai/core/go/pkg/io" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // --- GitHub collector: context cancellation and orchestration --- func TestGitHubCollector_Collect_Good_ContextCancelledInLoop(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = false ctx, cancel := context.WithCancel(context.Background()) cancel() // Cancel immediately g := &GitHubCollector{Org: "test-org", Repo: "test-repo"} result, err := g.Collect(ctx, cfg) // The context cancellation should be detected in the loop assert.Error(t, err) assert.NotNil(t, result) } func TestGitHubCollector_Collect_Good_IssuesOnlyDryRunProgress(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true var progressCount int cfg.Dispatcher.On(EventProgress, func(e Event) { progressCount++ }) g := &GitHubCollector{Org: "test-org", Repo: "test-repo", IssuesOnly: true} result, err := g.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.GreaterOrEqual(t, progressCount, 1) } func TestGitHubCollector_Collect_Good_PRsOnlyDryRunSkipsIssues(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true g := &GitHubCollector{Org: "test-org", Repo: "test-repo", PRsOnly: true} result, err := g.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) } func TestGitHubCollector_Collect_Good_EmitsStartAndComplete(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true var starts, completes int cfg.Dispatcher.On(EventStart, func(e Event) { starts++ }) cfg.Dispatcher.On(EventComplete, func(e Event) { completes++ }) g := &GitHubCollector{Org: "test-org", Repo: "test-repo"} _, err := g.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 1, starts) assert.Equal(t, 1, completes) } func TestGitHubCollector_Collect_Good_NilDispatcherHandled(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true cfg.Dispatcher = nil g := &GitHubCollector{Org: "test-org", Repo: "test-repo"} result, err := g.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) } func TestFormatIssueMarkdown_Good_NoBodyNoURL(t *testing.T) { issue := ghIssue{ Number: 1, Title: "No Body Issue", State: "open", Author: ghAuthor{Login: "user"}, URL: "", Body: "", } md := formatIssueMarkdown(issue) assert.Contains(t, md, "# No Body Issue") assert.NotContains(t, md, "**URL:**") } // --- Market collector: fetchJSON edge cases --- func TestFetchJSON_Bad_NonJSONBody(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`not json`)) })) defer srv.Close() _, err := fetchJSON[coinData](context.Background(), srv.URL) assert.Error(t, err) } func TestFetchJSON_Bad_MalformedURL(t *testing.T) { _, err := fetchJSON[coinData](context.Background(), "://bad-url") assert.Error(t, err) } func TestFetchJSON_Bad_ServerUnavailable(t *testing.T) { _, err := fetchJSON[coinData](context.Background(), "http://127.0.0.1:1") assert.Error(t, err) } func TestFetchJSON_Bad_Non200StatusCode(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotFound) })) defer srv.Close() _, err := fetchJSON[coinData](context.Background(), srv.URL) assert.Error(t, err) assert.Contains(t, err.Error(), "unexpected status code") } func TestMarketCollector_Collect_Bad_MissingCoinID(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") mc := &MarketCollector{CoinID: ""} _, err := mc.Collect(context.Background(), cfg) assert.Error(t, err) assert.Contains(t, err.Error(), "coin ID is required") } func TestMarketCollector_Collect_Good_NoDispatcher(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") data := coinData{ID: "test", Symbol: "tst", Name: "Test", MarketData: marketData{CurrentPrice: map[string]float64{"usd": 1.0}}} _ = json.NewEncoder(w).Encode(data) })) defer srv.Close() oldURL := coinGeckoBaseURL coinGeckoBaseURL = srv.URL defer func() { coinGeckoBaseURL = oldURL }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil cfg.Dispatcher = nil mc := &MarketCollector{CoinID: "test"} result, err := mc.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) } func TestMarketCollector_Collect_Bad_CurrentFetchFails(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) defer srv.Close() oldURL := coinGeckoBaseURL coinGeckoBaseURL = srv.URL defer func() { coinGeckoBaseURL = oldURL }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil mc := &MarketCollector{CoinID: "fail-coin"} result, err := mc.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.Equal(t, 1, result.Errors) } func TestMarketCollector_CollectHistorical_Good_DefaultDays(t *testing.T) { callCount := 0 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { callCount++ w.Header().Set("Content-Type", "application/json") if callCount == 1 { data := coinData{ID: "test", Symbol: "tst", Name: "Test", MarketData: marketData{CurrentPrice: map[string]float64{"usd": 1.0}}} _ = json.NewEncoder(w).Encode(data) } else { assert.Contains(t, r.URL.RawQuery, "days=365") data := historicalData{Prices: [][]float64{{1705305600000, 1.0}}} _ = json.NewEncoder(w).Encode(data) } })) defer srv.Close() oldURL := coinGeckoBaseURL coinGeckoBaseURL = srv.URL defer func() { coinGeckoBaseURL = oldURL }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil mc := &MarketCollector{CoinID: "test", Historical: true} result, err := mc.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 3, result.Items) } func TestMarketCollector_CollectHistorical_Good_WithRateLimiter(t *testing.T) { callCount := 0 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { callCount++ w.Header().Set("Content-Type", "application/json") if callCount == 1 { data := coinData{ID: "test", Symbol: "tst", Name: "Test", MarketData: marketData{CurrentPrice: map[string]float64{"usd": 1.0}}} _ = json.NewEncoder(w).Encode(data) } else { data := historicalData{Prices: [][]float64{{1705305600000, 1.0}}} _ = json.NewEncoder(w).Encode(data) } })) defer srv.Close() oldURL := coinGeckoBaseURL coinGeckoBaseURL = srv.URL defer func() { coinGeckoBaseURL = oldURL }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = NewRateLimiter() cfg.Limiter.SetDelay("coingecko", 1*time.Millisecond) mc := &MarketCollector{CoinID: "test", Historical: true} result, err := mc.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 3, result.Items) } // --- State: error paths --- func TestState_Load_Bad_MalformedJSON(t *testing.T) { m := io.NewMockMedium() m.Files["/state.json"] = `{invalid json` s := NewState(m, "/state.json") err := s.Load() assert.Error(t, err) } // --- Process: additional coverage for uncovered branches --- func TestHTMLToMarkdown_Good_PreCodeBlock(t *testing.T) { input := `
some code here` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "```") assert.Contains(t, result, "some code here") } func TestHTMLToMarkdown_Good_StrongAndEmElements(t *testing.T) { input := `bold and italic` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "**bold**") assert.Contains(t, result, "*italic*") } func TestHTMLToMarkdown_Good_InlineCode(t *testing.T) { input := `
var x = 1`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "`var x = 1`")
}
func TestHTMLToMarkdown_Good_AnchorWithHref(t *testing.T) {
input := `Click here`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "[Click here](https://example.com)")
}
func TestHTMLToMarkdown_Good_ScriptTagRemoved(t *testing.T) {
input := `Safe text
` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "Safe text") assert.NotContains(t, result, "alert") } func TestHTMLToMarkdown_Good_H1H2H3Headers(t *testing.T) { input := `First paragraph
Second paragraph
` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "First paragraph") assert.Contains(t, result, "Second paragraph") } func TestJSONToMarkdown_Bad_Malformed(t *testing.T) { _, err := JSONToMarkdown(`{invalid}`) assert.Error(t, err) } func TestJSONToMarkdown_Good_FlatObject(t *testing.T) { input := `{"name": "Alice", "age": 30}` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "**name:** Alice") assert.Contains(t, result, "**age:** 30") } func TestJSONToMarkdown_Good_ScalarList(t *testing.T) { input := `["hello", "world"]` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "- hello") assert.Contains(t, result, "- world") } func TestJSONToMarkdown_Good_ObjectContainingArray(t *testing.T) { input := `{"items": [1, 2, 3]}` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "**items:**") } func TestProcessor_Process_Bad_MissingDir(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") p := &Processor{Source: "test", Dir: ""} _, err := p.Process(context.Background(), cfg) assert.Error(t, err) assert.Contains(t, err.Error(), "directory is required") } func TestProcessor_Process_Good_DryRunEmitsProgress(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true var progressCount int cfg.Dispatcher.On(EventProgress, func(e Event) { progressCount++ }) p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.Equal(t, 1, progressCount) } func TestProcessor_Process_Good_SkipsUnsupportedExtension(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/data.csv"] = `a,b,c` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.Equal(t, 1, result.Skipped) } func TestProcessor_Process_Good_MarkdownPassthroughTrimmed(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/readme.md"] = `# Hello World ` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 1, result.Items) content, readErr := m.Read("/output/processed/test/readme.md") require.NoError(t, readErr) assert.Equal(t, "# Hello World", content) } func TestProcessor_Process_Good_HTMExtensionHandled(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/page.htm"] = `Text
` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil cfg.Dispatcher = nil p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 1, result.Items) } // --- BitcoinTalk: additional edge cases --- func TestBitcoinTalkCollector_Name_Good_EmptyTopicAndURL(t *testing.T) { b := &BitcoinTalkCollector{} assert.Equal(t, "bitcointalk:", b.Name()) } func TestBitcoinTalkCollector_Collect_Good_NilDispatcherHandled(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(sampleBTCTalkPage(2))) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil cfg.Dispatcher = nil b := &BitcoinTalkCollector{TopicID: "12345"} result, err := b.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) } func TestBitcoinTalkCollector_Collect_Good_DryRunEmitsProgress(t *testing.T) { m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.DryRun = true var progressEmitted bool cfg.Dispatcher.On(EventProgress, func(e Event) { progressEmitted = true }) b := &BitcoinTalkCollector{TopicID: "12345"} result, err := b.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.True(t, progressEmitted) } func TestParsePostsFromHTML_Good_PostWithNoInnerContent(t *testing.T) { htmlContent := `