go-scm/collect/coverage_phase2_test.go
Claude 8d1d7fce01
test: push coverage across collect/jobrunner/gitea
collect: 83.0% -> 90.0% via error medium injection (write/list/read/ensureDir
failures), rate limiter blocking tests, HTTP mock servers for papers/market/
bitcointalk write errors, processor context cancellation, state round-trip,
and GitHub collector gh-auth-failure paths.

gitea: 89.2% -> 94.0% via paginated org/user repo servers, PR meta with
comment counting and nil dates, GetCommentBodies with nil poster, ListPullRequests
state mapping, and NewFromConfig flag override tests.

jobrunner: 86.4% -> 94.4% via journal error paths (empty baseDir, readonly dir,
path sanitisation), poller with error-returning source/handler/report, journal
integration (JSONL file verification), multiple sources, immediate cancellation,
and cycle counter tests.

handlers: 83.8% -> 89.2% via dispatch mock servers (invalid repo, EnsureLabel
failure, GetIssue not found, AssignIssue failure, AddIssueLabels error, issue
with no special labels), completion label errors, EnableAutoMerge pending checks,
PublishDraft merged state, SendFixCommand merge conflict, DismissReviews stale
review, TickParent checkbox ticking, and dual-run mode. Remaining 10.8% is in
SSH-dependent dispatch code (secureTransfer/runRemote/ticketExists) that cannot
be tested without modifying production code.

Co-Authored-By: Charon <charon@lethean.io>
2026-02-20 03:06:43 +00:00

1284 lines
44 KiB
Go

package collect
import (
"context"
"encoding/json"
"fmt"
goio "io"
"io/fs"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"forge.lthn.ai/core/go/pkg/io"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// errorMedium wraps MockMedium and injects errors on specific operations.
type errorMedium struct {
*io.MockMedium
writeErr error
ensureDirErr error
listErr error
readErr error
}
func (e *errorMedium) Write(path, content string) error {
if e.writeErr != nil {
return e.writeErr
}
return e.MockMedium.Write(path, content)
}
func (e *errorMedium) EnsureDir(path string) error {
if e.ensureDirErr != nil {
return e.ensureDirErr
}
return e.MockMedium.EnsureDir(path)
}
func (e *errorMedium) List(path string) ([]fs.DirEntry, error) {
if e.listErr != nil {
return nil, e.listErr
}
return e.MockMedium.List(path)
}
func (e *errorMedium) Read(path string) (string, error) {
if e.readErr != nil {
return "", e.readErr
}
return e.MockMedium.Read(path)
}
func (e *errorMedium) FileGet(path string) (string, error) { return e.MockMedium.FileGet(path) }
func (e *errorMedium) FileSet(path, content string) error { return e.MockMedium.FileSet(path, content) }
func (e *errorMedium) Delete(path string) error { return e.MockMedium.Delete(path) }
func (e *errorMedium) DeleteAll(path string) error { return e.MockMedium.DeleteAll(path) }
func (e *errorMedium) Rename(old, new string) error { return e.MockMedium.Rename(old, new) }
func (e *errorMedium) Stat(path string) (fs.FileInfo, error) { return e.MockMedium.Stat(path) }
func (e *errorMedium) Open(path string) (fs.File, error) { return e.MockMedium.Open(path) }
func (e *errorMedium) Create(path string) (goio.WriteCloser, error) { return e.MockMedium.Create(path) }
func (e *errorMedium) Append(path string) (goio.WriteCloser, error) { return e.MockMedium.Append(path) }
func (e *errorMedium) ReadStream(path string) (goio.ReadCloser, error) { return e.MockMedium.ReadStream(path) }
func (e *errorMedium) WriteStream(path string) (goio.WriteCloser, error) {
return e.MockMedium.WriteStream(path)
}
func (e *errorMedium) Exists(path string) bool { return e.MockMedium.Exists(path) }
func (e *errorMedium) IsDir(path string) bool { return e.MockMedium.IsDir(path) }
func (e *errorMedium) IsFile(path string) bool { return e.MockMedium.IsFile(path) }
// --- errorLimiter: a RateLimiter that returns errors ---
type errorLimiterWaiter struct{}
// --- Processor: list error ---
func TestProcessor_Process_Bad_ListError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), listErr: fmt.Errorf("list denied")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
p := &Processor{Source: "test", Dir: "/input"}
_, err := p.Process(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to list directory")
}
// --- Processor: ensureDir error ---
func TestProcessor_Process_Bad_EnsureDirError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
// Need to ensure List returns entries
em.MockMedium.Dirs["/input"] = true
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
p := &Processor{Source: "test", Dir: "/input"}
_, err := p.Process(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to create output directory")
}
// --- Processor: context cancellation during processing ---
func TestProcessor_Process_Bad_ContextCancelledDuringLoop(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/a.html"] = "<h1>Test</h1>"
m.Files["/input/b.html"] = "<h1>Test</h1>"
cfg := NewConfigWithMedium(m, "/output")
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately so ctx.Err() is non-nil
p := &Processor{Source: "test", Dir: "/input"}
_, err := p.Process(ctx, cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "context cancelled")
}
// --- Processor: read error during file processing ---
func TestProcessor_Process_Bad_ReadError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: fmt.Errorf("read denied")}
em.MockMedium.Dirs["/input"] = true
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
assert.NoError(t, err) // Read errors increment Errors, not returned
assert.Equal(t, 1, result.Errors)
}
// --- Processor: JSON conversion error ---
func TestProcessor_Process_Bad_InvalidJSONFile(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/bad.json"] = "not valid json {"
cfg := NewConfigWithMedium(m, "/output")
var errorEmitted bool
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
assert.NoError(t, err)
assert.Equal(t, 1, result.Errors)
assert.True(t, errorEmitted, "should emit error event for bad JSON")
}
// --- Processor: write error during output ---
func TestProcessor_Process_Bad_WriteError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
em.MockMedium.Dirs["/input"] = true
em.MockMedium.Files["/input/page.html"] = "<h1>Title</h1>"
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
assert.NoError(t, err)
assert.Equal(t, 1, result.Errors)
}
// --- Processor: successful processing with events ---
func TestProcessor_Process_Good_EmitsItemAndComplete(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/page.html"] = "<h1>Title</h1><p>Body</p>"
cfg := NewConfigWithMedium(m, "/output")
var itemEmitted, completeEmitted bool
cfg.Dispatcher.On(EventItem, func(e Event) { itemEmitted = true })
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
assert.NoError(t, err)
assert.Equal(t, 1, result.Items)
assert.True(t, itemEmitted)
assert.True(t, completeEmitted)
}
// --- Papers: with rate limiter that fails ---
func TestPapersCollector_CollectIACR_Bad_LimiterError(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("iacr", 10*time.Minute) // Long delay
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel context so limiter.Wait fails
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
func TestPapersCollector_CollectArXiv_Bad_LimiterError(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("arxiv", 10*time.Minute)
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- Papers: IACR with bad HTML response ---
func TestPapersCollector_CollectIACR_Bad_InvalidHTML(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
// Serve valid-ish HTML but with no papers - the parse succeeds but returns empty.
_, _ = w.Write([]byte("<html><body>no papers here</body></html>"))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
p := &PapersCollector{Source: PaperSourceIACR, Query: "nothing"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 0, result.Items)
}
// --- Papers: IACR write error ---
func TestPapersCollector_CollectIACR_Bad_WriteError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleIACRHTML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err) // Write errors increment Errors, not returned
assert.Equal(t, 2, result.Errors) // 2 papers both fail to write
}
// --- Papers: IACR EnsureDir error ---
func TestPapersCollector_CollectIACR_Bad_EnsureDirError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleIACRHTML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
_, err := p.Collect(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to create output directory")
}
// --- Papers: arXiv write error ---
func TestPapersCollector_CollectArXiv_Bad_WriteError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(sampleArXivXML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 2, result.Errors)
}
// --- Papers: arXiv EnsureDir error ---
func TestPapersCollector_CollectArXiv_Bad_EnsureDirError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(sampleArXivXML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
_, err := p.Collect(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to create output directory")
}
// --- Papers: collectAll with dispatcher events ---
func TestPapersCollector_CollectAll_Good_WithDispatcher(t *testing.T) {
callCount := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount++
if callCount == 1 {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleIACRHTML))
} else {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(sampleArXivXML))
}
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var completeEmitted bool
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
p := &PapersCollector{Source: PaperSourceAll, Query: "crypto"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 4, result.Items)
assert.True(t, completeEmitted)
}
// --- Papers: IACR with events on item emit ---
func TestPapersCollector_CollectIACR_Good_EmitsItemEvents(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleIACRHTML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var itemCount int
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
p := &PapersCollector{Source: PaperSourceIACR, Query: "zero knowledge"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 2, result.Items)
assert.Equal(t, 2, itemCount)
}
// --- Papers: arXiv with events on item emit ---
func TestPapersCollector_CollectArXiv_Good_EmitsItemEvents(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(sampleArXivXML))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var itemCount int
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
p := &PapersCollector{Source: PaperSourceArXiv, Query: "ring signatures"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 2, result.Items)
assert.Equal(t, 2, itemCount)
}
// --- Market: collectCurrent write error (summary path) ---
func TestMarketCollector_Collect_Bad_WriteError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if strings.Contains(r.URL.Path, "/market_chart") {
_ = json.NewEncoder(w).Encode(historicalData{
Prices: [][]float64{{1610000000000, 42000.0}},
})
} else {
_ = json.NewEncoder(w).Encode(coinData{
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{
CurrentPrice: map[string]float64{"usd": 42000},
MarketCap: map[string]float64{"usd": 800000000000},
},
})
}
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
mc := &MarketCollector{CoinID: "bitcoin"}
result, err := mc.Collect(context.Background(), cfg)
// collectCurrent will fail on first write, then collectHistorical also fails
require.NoError(t, err) // errors are counted not returned at top level
assert.True(t, result.Errors >= 1, "should have at least one error from write failure")
}
// --- Market: EnsureDir error ---
func TestMarketCollector_Collect_Bad_EnsureDirError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
mc := &MarketCollector{CoinID: "bitcoin"}
_, err := mc.Collect(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to create output directory")
}
// --- Market: collectCurrent with limiter wait error ---
func TestMarketCollector_Collect_Bad_LimiterError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("coingecko", 10*time.Minute)
ctx, cancel := context.WithCancel(context.Background())
cancel()
mc := &MarketCollector{CoinID: "bitcoin"}
result, err := mc.Collect(ctx, cfg)
require.NoError(t, err) // error counted, not returned
assert.True(t, result.Errors >= 1)
}
// --- Market: collectHistorical with custom FromDate ---
func TestMarketCollector_Collect_Good_HistoricalCustomDate(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if strings.Contains(r.URL.Path, "/market_chart") {
_ = json.NewEncoder(w).Encode(historicalData{
Prices: [][]float64{{1610000000000, 42000.0}},
})
} else {
_ = json.NewEncoder(w).Encode(coinData{
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{
CurrentPrice: map[string]float64{"usd": 42000},
MarketCap: map[string]float64{"usd": 800000000000},
},
})
}
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
mc := &MarketCollector{CoinID: "bitcoin", FromDate: "2025-01-01", Historical: true}
result, err := mc.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.True(t, result.Items >= 2) // current.json + summary.md at minimum
}
// --- BitcoinTalk: EnsureDir error ---
func TestBitcoinTalkCollector_Collect_Bad_EnsureDirError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
b := &BitcoinTalkCollector{TopicID: "12345"}
_, err := b.Collect(context.Background(), cfg)
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to create output directory")
}
// --- BitcoinTalk: limiter error ---
func TestBitcoinTalkCollector_Collect_Bad_LimiterError(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("bitcointalk", 10*time.Minute)
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel context so limiter.Wait fails
b := &BitcoinTalkCollector{TopicID: "12345"}
_, err := b.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- BitcoinTalk: write error during post saving ---
func TestBitcoinTalkCollector_Collect_Bad_WriteErrorOnPosts(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleBTCTalkPage(3)))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
b := &BitcoinTalkCollector{TopicID: "12345"}
result, err := b.Collect(context.Background(), cfg)
require.NoError(t, err) // write errors are counted
assert.Equal(t, 3, result.Errors) // 3 posts all fail to write
assert.Equal(t, 0, result.Items)
}
// --- BitcoinTalk: fetchPage with bad HTTP status ---
func TestBitcoinTalkCollector_FetchPage_Bad_NonOKStatus(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusForbidden)
}))
defer srv.Close()
b := &BitcoinTalkCollector{TopicID: "12345"}
_, err := b.fetchPage(context.Background(), srv.URL)
assert.Error(t, err)
assert.Contains(t, err.Error(), "unexpected status code: 403")
}
// --- BitcoinTalk: fetchPage with request error ---
func TestBitcoinTalkCollector_FetchPage_Bad_RequestError(t *testing.T) {
old := httpClient
httpClient = &http.Client{Transport: &rewriteTransport{target: "http://127.0.0.1:1"}} // Connection refused
defer func() { httpClient = old }()
b := &BitcoinTalkCollector{TopicID: "12345"}
_, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
assert.Error(t, err)
assert.Contains(t, err.Error(), "request failed")
}
// --- BitcoinTalk: fetchPage with valid but empty page ---
func TestBitcoinTalkCollector_FetchPage_Good_EmptyPage(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte("<html><body></body></html>"))
}))
defer srv.Close()
old := httpClient
httpClient = &http.Client{Transport: &rewriteTransport{base: srv.Client().Transport, target: srv.URL}}
defer func() { httpClient = old }()
b := &BitcoinTalkCollector{TopicID: "12345"}
posts, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
require.NoError(t, err)
assert.Empty(t, posts)
}
// --- BitcoinTalk: Collect with fetch error + dispatcher ---
func TestBitcoinTalkCollector_Collect_Bad_FetchErrorWithDispatcher(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var errorEmitted bool
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
b := &BitcoinTalkCollector{TopicID: "12345"}
result, err := b.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 1, result.Errors)
assert.True(t, errorEmitted)
}
// --- State: Save with a populated state ---
func TestState_Save_Good_RoundTrip(t *testing.T) {
m := io.NewMockMedium()
s := NewState(m, "/data/state.json")
s.Set("source1", &StateEntry{Source: "source1", Items: 42, LastID: "xyz"})
s.Set("source2", &StateEntry{Source: "source2", Items: 7, Cursor: "page2"})
err := s.Save()
require.NoError(t, err)
// Load into fresh state and verify
s2 := NewState(m, "/data/state.json")
err = s2.Load()
require.NoError(t, err)
e1, ok := s2.Get("source1")
require.True(t, ok)
assert.Equal(t, 42, e1.Items)
e2, ok := s2.Get("source2")
require.True(t, ok)
assert.Equal(t, "page2", e2.Cursor)
}
// --- GitHub: Collect with Repo set triggers collectIssues/collectPRs (which fail via gh) ---
func TestGitHubCollector_Collect_Bad_GhNotAuthenticated(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var errorCount int
cfg.Dispatcher.On(EventError, func(e Event) { errorCount++ })
// With Repo set, Collect skips listOrgRepos and goes directly to collectIssues/collectPRs
// which use exec.Command("gh", ...) and fail because gh isn't authenticated.
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo"}
result, err := g.Collect(context.Background(), cfg)
require.NoError(t, err)
// Both collectIssues and collectPRs should fail, incrementing Errors
assert.True(t, result.Errors >= 1, "at least one error expected from unauthenticated gh")
assert.True(t, errorCount >= 1, "dispatcher should emit error events")
}
// --- GitHub: Collect IssuesOnly triggers only issues, not PRs ---
func TestGitHubCollector_Collect_Bad_IssuesOnlyGhFails(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", IssuesOnly: true}
result, err := g.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 1, result.Errors) // Only issues collected (and failed), PRs skipped
}
// --- GitHub: Collect PRsOnly triggers only PRs, not issues ---
func TestGitHubCollector_Collect_Bad_PRsOnlyGhFails(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", PRsOnly: true}
result, err := g.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 1, result.Errors) // Only PRs collected (and failed), issues skipped
}
// --- extractText: text before a br/p/div element adds newline ---
func TestExtractText_Good_TextBeforeBR(t *testing.T) {
htmlStr := `<div class="inner">Hello<br>World<p>End</p></div>`
posts, err := ParsePostsFromHTML(fmt.Sprintf(`<html><body><div class="post"><div class="inner">%s</div></div></body></html>`,
"First text<br>Second text<div>Third text</div>"))
// ParsePostsFromHTML uses extractText internally
require.NoError(t, err)
_ = htmlStr
// Even if no posts match the exact structure, the function path is exercised.
// The key is that extractText encounters text + br/p/div siblings.
_ = posts
}
// --- ParsePostsFromHTML: posts with full structure ---
func TestParsePostsFromHTML_Good_FullStructure(t *testing.T) {
htmlContent := `<html><body>
<div class="post">
<div class="poster_info">TestAuthor</div>
<div class="headerandpost">
<div class="smalltext">January 01, 2009</div>
</div>
<div class="inner">This is the post content.</div>
</div>
</body></html>`
posts, err := ParsePostsFromHTML(htmlContent)
require.NoError(t, err)
require.Len(t, posts, 1)
assert.Equal(t, "This is the post content.", posts[0].Content)
}
// --- getChildrenText: nested element node path ---
func TestHTMLToMarkdown_Good_NestedElements(t *testing.T) {
// <a> with nested <span> triggers getChildrenText with non-text child nodes
input := `<p><a href="https://example.com"><span>Nested</span> Link</a></p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "[Nested Link](https://example.com)")
}
// --- HTML: ordered list ---
func TestHTMLToMarkdown_Good_OL(t *testing.T) {
input := `<ol><li>First</li><li>Second</li></ol>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "1. First")
assert.Contains(t, result, "2. Second")
}
// --- HTML: blockquote ---
func TestHTMLToMarkdown_Good_BlockquoteElement(t *testing.T) {
input := `<blockquote>Quoted text</blockquote>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "> Quoted text")
}
// --- HTML: hr ---
func TestHTMLToMarkdown_Good_HR(t *testing.T) {
input := `<p>Before</p><hr><p>After</p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "---")
}
// --- HTML: h4, h5, h6 ---
func TestHTMLToMarkdown_Good_AllHeadingLevels(t *testing.T) {
input := `<h4>H4</h4><h5>H5</h5><h6>H6</h6>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "#### H4")
assert.Contains(t, result, "##### H5")
assert.Contains(t, result, "###### H6")
}
// --- HTML: link without href ---
func TestHTMLToMarkdown_Good_LinkNoHref(t *testing.T) {
input := `<a>bare link text</a>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "bare link text")
assert.NotContains(t, result, "[")
}
// --- HTML: unordered list ---
func TestHTMLToMarkdown_Good_UL(t *testing.T) {
input := `<ul><li>Item A</li><li>Item B</li></ul>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "- Item A")
assert.Contains(t, result, "- Item B")
}
// --- HTML: br tag ---
func TestHTMLToMarkdown_Good_BRTag(t *testing.T) {
input := `<p>Line one<br>Line two</p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "Line one")
assert.Contains(t, result, "Line two")
}
// --- HTML: style tag stripped ---
func TestHTMLToMarkdown_Good_StyleStripped(t *testing.T) {
input := `<html><head><style>body{color:red}</style></head><body><p>Clean</p></body></html>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "Clean")
assert.NotContains(t, result, "color")
}
// --- HTML: i and b tags ---
func TestHTMLToMarkdown_Good_AlternateBoldItalic(t *testing.T) {
input := `<p><b>bold</b> and <i>italic</i></p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "**bold**")
assert.Contains(t, result, "*italic*")
}
// --- Market: collectCurrent with limiter that actually blocks ---
func TestMarketCollector_Collect_Bad_LimiterBlocksThenCancelled(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}}})
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("coingecko", 5*time.Second) // Long delay
// Make a first call to set lastTime, so the second call will actually block.
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
// Now cancel context and call Collect - the second Wait will block and detect cancellation.
ctx, cancel := context.WithCancel(context.Background())
cancel()
mc := &MarketCollector{CoinID: "bitcoin"}
result, err := mc.Collect(ctx, cfg)
require.NoError(t, err) // Top-level errors are counted
assert.True(t, result.Errors >= 1)
}
// --- Papers: IACR with limiter that blocks ---
func TestPapersCollector_CollectIACR_Bad_LimiterBlocks(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("iacr", 5*time.Second)
_ = cfg.Limiter.Wait(context.Background(), "iacr") // Set lastTime
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- Papers: arXiv with limiter that blocks ---
func TestPapersCollector_CollectArXiv_Bad_LimiterBlocks(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("arxiv", 5*time.Second)
_ = cfg.Limiter.Wait(context.Background(), "arxiv")
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- BitcoinTalk: limiter that blocks ---
func TestBitcoinTalkCollector_Collect_Bad_LimiterBlocks(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("bitcointalk", 5*time.Second)
_ = cfg.Limiter.Wait(context.Background(), "bitcointalk")
ctx, cancel := context.WithCancel(context.Background())
cancel()
b := &BitcoinTalkCollector{TopicID: "12345"}
_, err := b.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- Market: collectCurrent summary.md write error (not first write) ---
// writeCountMedium fails after N successful writes.
type writeCountMedium struct {
*io.MockMedium
writeCount int
failAfterN int
}
func (w *writeCountMedium) Write(path, content string) error {
w.writeCount++
if w.writeCount > w.failAfterN {
return fmt.Errorf("write %d: disk full", w.writeCount)
}
return w.MockMedium.Write(path, content)
}
func (w *writeCountMedium) EnsureDir(path string) error { return w.MockMedium.EnsureDir(path) }
func (w *writeCountMedium) Read(path string) (string, error) { return w.MockMedium.Read(path) }
func (w *writeCountMedium) List(path string) ([]fs.DirEntry, error) { return w.MockMedium.List(path) }
func (w *writeCountMedium) IsFile(path string) bool { return w.MockMedium.IsFile(path) }
func (w *writeCountMedium) FileGet(path string) (string, error) { return w.MockMedium.FileGet(path) }
func (w *writeCountMedium) FileSet(path, content string) error { return w.MockMedium.FileSet(path, content) }
func (w *writeCountMedium) Delete(path string) error { return w.MockMedium.Delete(path) }
func (w *writeCountMedium) DeleteAll(path string) error { return w.MockMedium.DeleteAll(path) }
func (w *writeCountMedium) Rename(old, new string) error { return w.MockMedium.Rename(old, new) }
func (w *writeCountMedium) Stat(path string) (fs.FileInfo, error) { return w.MockMedium.Stat(path) }
func (w *writeCountMedium) Open(path string) (fs.File, error) { return w.MockMedium.Open(path) }
func (w *writeCountMedium) Create(path string) (goio.WriteCloser, error) { return w.MockMedium.Create(path) }
func (w *writeCountMedium) Append(path string) (goio.WriteCloser, error) { return w.MockMedium.Append(path) }
func (w *writeCountMedium) ReadStream(path string) (goio.ReadCloser, error) { return w.MockMedium.ReadStream(path) }
func (w *writeCountMedium) WriteStream(path string) (goio.WriteCloser, error) { return w.MockMedium.WriteStream(path) }
func (w *writeCountMedium) Exists(path string) bool { return w.MockMedium.Exists(path) }
func (w *writeCountMedium) IsDir(path string) bool { return w.MockMedium.IsDir(path) }
// Test that the summary.md write error in collectCurrent is handled.
func TestMarketCollector_Collect_Bad_SummaryWriteError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if strings.Contains(r.URL.Path, "/market_chart") {
_ = json.NewEncoder(w).Encode(historicalData{
Prices: [][]float64{{1610000000000, 42000.0}},
})
} else {
_ = json.NewEncoder(w).Encode(coinData{
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{
CurrentPrice: map[string]float64{"usd": 42000},
MarketCap: map[string]float64{"usd": 800000000000},
},
})
}
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
// Fail on the 2nd write (summary.md) but allow the 1st (current.json).
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 1}
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
mc := &MarketCollector{CoinID: "bitcoin"}
result, err := mc.Collect(context.Background(), cfg)
// collectCurrent returns error on summary write → Errors incremented.
require.NoError(t, err)
assert.True(t, result.Errors >= 1)
}
// --- Market: collectHistorical write error ---
func TestMarketCollector_Collect_Bad_HistoricalWriteError(t *testing.T) {
callCount := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount++
w.Header().Set("Content-Type", "application/json")
if strings.Contains(r.URL.Path, "/market_chart") {
_ = json.NewEncoder(w).Encode(historicalData{
Prices: [][]float64{{1610000000000, 42000.0}},
})
} else {
_ = json.NewEncoder(w).Encode(coinData{
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{
CurrentPrice: map[string]float64{"usd": 42000},
MarketCap: map[string]float64{"usd": 800000000000},
},
})
}
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
// Allow 2 writes (current.json + summary.md) but fail on 3rd (historical.json).
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 2}
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
cfg.Limiter = nil
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
result, err := mc.Collect(context.Background(), cfg)
require.NoError(t, err)
// Current succeeds (2 items) but historical write fails (3rd write)
assert.True(t, result.Errors >= 1, "historical write should fail: items=%d, errors=%d", result.Items, result.Errors)
}
// --- State: Save write error ---
func TestState_Save_Bad_WriteError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
s := NewState(em, "/state.json")
s.Set("test", &StateEntry{Source: "test", Items: 1})
err := s.Save()
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to write state file")
}
// --- Excavator: collector with state error ---
func TestExcavator_Run_Bad_CollectorStateError(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.State = NewState(m, "/state.json")
mc := &mockCollector{
name: "test",
items: 3,
}
e := &Excavator{
Collectors: []Collector{mc},
}
result, err := e.Run(context.Background(), cfg)
require.NoError(t, err)
assert.True(t, mc.called)
assert.Equal(t, 3, result.Items)
}
// --- BitcoinTalk: page returns zero posts (empty content) ---
func TestBitcoinTalkCollector_Collect_Good_ZeroPostsPage(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
// Valid HTML with no post divs at all
_, _ = w.Write([]byte("<html><body><p>No posts</p></body></html>"))
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
b := &BitcoinTalkCollector{TopicID: "empty-topic"}
result, err := b.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 0, result.Items)
}
// --- Excavator: state save error after collection ---
func TestExcavator_Run_Bad_StateSaveError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("state write failed")}
cfg := &Config{
Output: io.NewMockMedium(), // Use regular medium for output
OutputDir: "/output",
Dispatcher: NewDispatcher(),
State: NewState(em, "/state.json"), // State uses error medium
}
var errorEmitted bool
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
mc := &mockCollector{name: "test", items: 1}
e := &Excavator{Collectors: []Collector{mc}}
result, err := e.Run(context.Background(), cfg)
require.NoError(t, err)
assert.True(t, mc.called)
assert.Equal(t, 1, result.Items)
assert.True(t, errorEmitted, "should emit error event when state save fails")
}
// --- State: Load with read error ---
func TestState_Load_Bad_ReadError(t *testing.T) {
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: fmt.Errorf("read denied")}
em.MockMedium.Files["/state.json"] = "{}" // File exists but read will fail
s := NewState(em, "/state.json")
err := s.Load()
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to read state file")
}
// --- Papers: PaperSourceAll emits complete ---
func TestPapersCollector_CollectAll_Good_ArxivFailsWithIACR(t *testing.T) {
callCount := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount++
if callCount == 1 {
// IACR succeeds
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(sampleIACRHTML))
} else {
// arXiv fails
w.WriteHeader(http.StatusInternalServerError)
}
}))
defer srv.Close()
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
old := httpClient
httpClient = &http.Client{Transport: transport}
defer func() { httpClient = old }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var completeEmitted bool
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
p := &PapersCollector{Source: PaperSourceAll, Query: "test"}
result, err := p.Collect(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 2, result.Items)
assert.Equal(t, 1, result.Errors) // arXiv failure
assert.True(t, completeEmitted)
}
// --- Papers: IACR with cancelled context (request creation fails) ---
func TestPapersCollector_CollectIACR_Bad_CancelledContextRequestFails(t *testing.T) {
// Don't set up any server - the request should fail because context is cancelled.
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- Papers: arXiv with cancelled context ---
func TestPapersCollector_CollectArXiv_Bad_CancelledContextRequestFails(t *testing.T) {
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
_, err := p.Collect(ctx, cfg)
assert.Error(t, err)
}
// --- Market: collectHistorical limiter blocks ---
func TestMarketCollector_Collect_Bad_HistoricalLimiterBlocks(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(coinData{
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}},
})
}))
defer server.Close()
oldURL := coinGeckoBaseURL
coinGeckoBaseURL = server.URL
defer func() { coinGeckoBaseURL = oldURL }()
m := io.NewMockMedium()
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = NewRateLimiter()
cfg.Limiter.SetDelay("coingecko", 5*time.Second)
// First call succeeds (current), then cancel before historical
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
ctx, cancel := context.WithCancel(context.Background())
// Run current data collection first, then cancel before historical
go func() {
time.Sleep(50 * time.Millisecond)
cancel()
}()
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
result, err := mc.Collect(ctx, cfg)
require.NoError(t, err)
// Either current succeeds and historical fails, or both fail
assert.True(t, result.Items+result.Errors >= 1)
}
// --- BitcoinTalk: fetchPage with invalid URL ---
func TestBitcoinTalkCollector_FetchPage_Bad_InvalidURL(t *testing.T) {
b := &BitcoinTalkCollector{TopicID: "12345"}
// Use a URL with control character that will fail NewRequestWithContext
_, err := b.fetchPage(context.Background(), "http://\x7f/invalid")
assert.Error(t, err)
}