collect: 83.0% -> 90.0% via error medium injection (write/list/read/ensureDir failures), rate limiter blocking tests, HTTP mock servers for papers/market/ bitcointalk write errors, processor context cancellation, state round-trip, and GitHub collector gh-auth-failure paths. gitea: 89.2% -> 94.0% via paginated org/user repo servers, PR meta with comment counting and nil dates, GetCommentBodies with nil poster, ListPullRequests state mapping, and NewFromConfig flag override tests. jobrunner: 86.4% -> 94.4% via journal error paths (empty baseDir, readonly dir, path sanitisation), poller with error-returning source/handler/report, journal integration (JSONL file verification), multiple sources, immediate cancellation, and cycle counter tests. handlers: 83.8% -> 89.2% via dispatch mock servers (invalid repo, EnsureLabel failure, GetIssue not found, AssignIssue failure, AddIssueLabels error, issue with no special labels), completion label errors, EnableAutoMerge pending checks, PublishDraft merged state, SendFixCommand merge conflict, DismissReviews stale review, TickParent checkbox ticking, and dual-run mode. Remaining 10.8% is in SSH-dependent dispatch code (secureTransfer/runRemote/ticketExists) that cannot be tested without modifying production code. Co-Authored-By: Charon <charon@lethean.io>
1284 lines
44 KiB
Go
1284 lines
44 KiB
Go
package collect
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
goio "io"
|
|
"io/fs"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"forge.lthn.ai/core/go/pkg/io"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// errorMedium wraps MockMedium and injects errors on specific operations.
|
|
type errorMedium struct {
|
|
*io.MockMedium
|
|
writeErr error
|
|
ensureDirErr error
|
|
listErr error
|
|
readErr error
|
|
}
|
|
|
|
func (e *errorMedium) Write(path, content string) error {
|
|
if e.writeErr != nil {
|
|
return e.writeErr
|
|
}
|
|
return e.MockMedium.Write(path, content)
|
|
}
|
|
func (e *errorMedium) EnsureDir(path string) error {
|
|
if e.ensureDirErr != nil {
|
|
return e.ensureDirErr
|
|
}
|
|
return e.MockMedium.EnsureDir(path)
|
|
}
|
|
func (e *errorMedium) List(path string) ([]fs.DirEntry, error) {
|
|
if e.listErr != nil {
|
|
return nil, e.listErr
|
|
}
|
|
return e.MockMedium.List(path)
|
|
}
|
|
func (e *errorMedium) Read(path string) (string, error) {
|
|
if e.readErr != nil {
|
|
return "", e.readErr
|
|
}
|
|
return e.MockMedium.Read(path)
|
|
}
|
|
func (e *errorMedium) FileGet(path string) (string, error) { return e.MockMedium.FileGet(path) }
|
|
func (e *errorMedium) FileSet(path, content string) error { return e.MockMedium.FileSet(path, content) }
|
|
func (e *errorMedium) Delete(path string) error { return e.MockMedium.Delete(path) }
|
|
func (e *errorMedium) DeleteAll(path string) error { return e.MockMedium.DeleteAll(path) }
|
|
func (e *errorMedium) Rename(old, new string) error { return e.MockMedium.Rename(old, new) }
|
|
func (e *errorMedium) Stat(path string) (fs.FileInfo, error) { return e.MockMedium.Stat(path) }
|
|
func (e *errorMedium) Open(path string) (fs.File, error) { return e.MockMedium.Open(path) }
|
|
func (e *errorMedium) Create(path string) (goio.WriteCloser, error) { return e.MockMedium.Create(path) }
|
|
func (e *errorMedium) Append(path string) (goio.WriteCloser, error) { return e.MockMedium.Append(path) }
|
|
func (e *errorMedium) ReadStream(path string) (goio.ReadCloser, error) { return e.MockMedium.ReadStream(path) }
|
|
func (e *errorMedium) WriteStream(path string) (goio.WriteCloser, error) {
|
|
return e.MockMedium.WriteStream(path)
|
|
}
|
|
func (e *errorMedium) Exists(path string) bool { return e.MockMedium.Exists(path) }
|
|
func (e *errorMedium) IsDir(path string) bool { return e.MockMedium.IsDir(path) }
|
|
func (e *errorMedium) IsFile(path string) bool { return e.MockMedium.IsFile(path) }
|
|
|
|
// --- errorLimiter: a RateLimiter that returns errors ---
|
|
|
|
type errorLimiterWaiter struct{}
|
|
|
|
// --- Processor: list error ---
|
|
|
|
func TestProcessor_Process_Bad_ListError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), listErr: fmt.Errorf("list denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to list directory")
|
|
}
|
|
|
|
// --- Processor: ensureDir error ---
|
|
|
|
func TestProcessor_Process_Bad_EnsureDirError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
|
|
// Need to ensure List returns entries
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Processor: context cancellation during processing ---
|
|
|
|
func TestProcessor_Process_Bad_ContextCancelledDuringLoop(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/a.html"] = "<h1>Test</h1>"
|
|
m.Files["/input/b.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel immediately so ctx.Err() is non-nil
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(ctx, cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "context cancelled")
|
|
}
|
|
|
|
// --- Processor: read error during file processing ---
|
|
|
|
func TestProcessor_Process_Bad_ReadError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: fmt.Errorf("read denied")}
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err) // Read errors increment Errors, not returned
|
|
assert.Equal(t, 1, result.Errors)
|
|
}
|
|
|
|
// --- Processor: JSON conversion error ---
|
|
|
|
func TestProcessor_Process_Bad_InvalidJSONFile(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/bad.json"] = "not valid json {"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
assert.True(t, errorEmitted, "should emit error event for bad JSON")
|
|
}
|
|
|
|
// --- Processor: write error during output ---
|
|
|
|
func TestProcessor_Process_Bad_WriteError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/page.html"] = "<h1>Title</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
}
|
|
|
|
// --- Processor: successful processing with events ---
|
|
|
|
func TestProcessor_Process_Good_EmitsItemAndComplete(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/page.html"] = "<h1>Title</h1><p>Body</p>"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
var itemEmitted, completeEmitted bool
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemEmitted = true })
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Items)
|
|
assert.True(t, itemEmitted)
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: with rate limiter that fails ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("iacr", 10*time.Minute) // Long delay
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel context so limiter.Wait fails
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("arxiv", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: IACR with bad HTML response ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_InvalidHTML(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
// Serve valid-ish HTML but with no papers - the parse succeeds but returns empty.
|
|
_, _ = w.Write([]byte("<html><body>no papers here</body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "nothing"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- Papers: IACR write error ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_WriteError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err) // Write errors increment Errors, not returned
|
|
assert.Equal(t, 2, result.Errors) // 2 papers both fail to write
|
|
}
|
|
|
|
// --- Papers: IACR EnsureDir error ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_EnsureDirError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Papers: arXiv write error ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_WriteError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Errors)
|
|
}
|
|
|
|
// --- Papers: arXiv EnsureDir error ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_EnsureDirError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Papers: collectAll with dispatcher events ---
|
|
|
|
func TestPapersCollector_CollectAll_Good_WithDispatcher(t *testing.T) {
|
|
callCount := 0
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
if callCount == 1 {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
} else {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var completeEmitted bool
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &PapersCollector{Source: PaperSourceAll, Query: "crypto"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 4, result.Items)
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: IACR with events on item emit ---
|
|
|
|
func TestPapersCollector_CollectIACR_Good_EmitsItemEvents(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var itemCount int
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "zero knowledge"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 2, itemCount)
|
|
}
|
|
|
|
// --- Papers: arXiv with events on item emit ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Good_EmitsItemEvents(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var itemCount int
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "ring signatures"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 2, itemCount)
|
|
}
|
|
|
|
// --- Market: collectCurrent write error (summary path) ---
|
|
|
|
func TestMarketCollector_Collect_Bad_WriteError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
// collectCurrent will fail on first write, then collectHistorical also fails
|
|
require.NoError(t, err) // errors are counted not returned at top level
|
|
assert.True(t, result.Errors >= 1, "should have at least one error from write failure")
|
|
}
|
|
|
|
// --- Market: EnsureDir error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_EnsureDirError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
_, err := mc.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Market: collectCurrent with limiter wait error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_LimiterError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err) // error counted, not returned
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Market: collectHistorical with custom FromDate ---
|
|
|
|
func TestMarketCollector_Collect_Good_HistoricalCustomDate(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", FromDate: "2025-01-01", Historical: true}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, result.Items >= 2) // current.json + summary.md at minimum
|
|
}
|
|
|
|
// --- BitcoinTalk: EnsureDir error ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_EnsureDirError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: fmt.Errorf("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- BitcoinTalk: limiter error ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("bitcointalk", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel context so limiter.Wait fails
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- BitcoinTalk: write error during post saving ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_WriteErrorOnPosts(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleBTCTalkPage(3)))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err) // write errors are counted
|
|
assert.Equal(t, 3, result.Errors) // 3 posts all fail to write
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with bad HTTP status ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_NonOKStatus(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusForbidden)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.fetchPage(context.Background(), srv.URL)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "unexpected status code: 403")
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with request error ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_RequestError(t *testing.T) {
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: &rewriteTransport{target: "http://127.0.0.1:1"}} // Connection refused
|
|
defer func() { httpClient = old }()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "request failed")
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with valid but empty page ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Good_EmptyPage(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte("<html><body></body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: &rewriteTransport{base: srv.Client().Transport, target: srv.URL}}
|
|
defer func() { httpClient = old }()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
posts, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
|
|
require.NoError(t, err)
|
|
assert.Empty(t, posts)
|
|
}
|
|
|
|
// --- BitcoinTalk: Collect with fetch error + dispatcher ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_FetchErrorWithDispatcher(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
assert.True(t, errorEmitted)
|
|
}
|
|
|
|
// --- State: Save with a populated state ---
|
|
|
|
func TestState_Save_Good_RoundTrip(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
s := NewState(m, "/data/state.json")
|
|
|
|
s.Set("source1", &StateEntry{Source: "source1", Items: 42, LastID: "xyz"})
|
|
s.Set("source2", &StateEntry{Source: "source2", Items: 7, Cursor: "page2"})
|
|
|
|
err := s.Save()
|
|
require.NoError(t, err)
|
|
|
|
// Load into fresh state and verify
|
|
s2 := NewState(m, "/data/state.json")
|
|
err = s2.Load()
|
|
require.NoError(t, err)
|
|
|
|
e1, ok := s2.Get("source1")
|
|
require.True(t, ok)
|
|
assert.Equal(t, 42, e1.Items)
|
|
|
|
e2, ok := s2.Get("source2")
|
|
require.True(t, ok)
|
|
assert.Equal(t, "page2", e2.Cursor)
|
|
}
|
|
|
|
// --- GitHub: Collect with Repo set triggers collectIssues/collectPRs (which fail via gh) ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_GhNotAuthenticated(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var errorCount int
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorCount++ })
|
|
|
|
// With Repo set, Collect skips listOrgRepos and goes directly to collectIssues/collectPRs
|
|
// which use exec.Command("gh", ...) and fail because gh isn't authenticated.
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo"}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
// Both collectIssues and collectPRs should fail, incrementing Errors
|
|
assert.True(t, result.Errors >= 1, "at least one error expected from unauthenticated gh")
|
|
assert.True(t, errorCount >= 1, "dispatcher should emit error events")
|
|
}
|
|
|
|
// --- GitHub: Collect IssuesOnly triggers only issues, not PRs ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_IssuesOnlyGhFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", IssuesOnly: true}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors) // Only issues collected (and failed), PRs skipped
|
|
}
|
|
|
|
// --- GitHub: Collect PRsOnly triggers only PRs, not issues ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_PRsOnlyGhFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", PRsOnly: true}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors) // Only PRs collected (and failed), issues skipped
|
|
}
|
|
|
|
// --- extractText: text before a br/p/div element adds newline ---
|
|
|
|
func TestExtractText_Good_TextBeforeBR(t *testing.T) {
|
|
htmlStr := `<div class="inner">Hello<br>World<p>End</p></div>`
|
|
posts, err := ParsePostsFromHTML(fmt.Sprintf(`<html><body><div class="post"><div class="inner">%s</div></div></body></html>`,
|
|
"First text<br>Second text<div>Third text</div>"))
|
|
// ParsePostsFromHTML uses extractText internally
|
|
require.NoError(t, err)
|
|
_ = htmlStr
|
|
// Even if no posts match the exact structure, the function path is exercised.
|
|
// The key is that extractText encounters text + br/p/div siblings.
|
|
_ = posts
|
|
}
|
|
|
|
// --- ParsePostsFromHTML: posts with full structure ---
|
|
|
|
func TestParsePostsFromHTML_Good_FullStructure(t *testing.T) {
|
|
htmlContent := `<html><body>
|
|
<div class="post">
|
|
<div class="poster_info">TestAuthor</div>
|
|
<div class="headerandpost">
|
|
<div class="smalltext">January 01, 2009</div>
|
|
</div>
|
|
<div class="inner">This is the post content.</div>
|
|
</div>
|
|
</body></html>`
|
|
|
|
posts, err := ParsePostsFromHTML(htmlContent)
|
|
require.NoError(t, err)
|
|
require.Len(t, posts, 1)
|
|
assert.Equal(t, "This is the post content.", posts[0].Content)
|
|
}
|
|
|
|
// --- getChildrenText: nested element node path ---
|
|
|
|
func TestHTMLToMarkdown_Good_NestedElements(t *testing.T) {
|
|
// <a> with nested <span> triggers getChildrenText with non-text child nodes
|
|
input := `<p><a href="https://example.com"><span>Nested</span> Link</a></p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "[Nested Link](https://example.com)")
|
|
}
|
|
|
|
// --- HTML: ordered list ---
|
|
|
|
func TestHTMLToMarkdown_Good_OL(t *testing.T) {
|
|
input := `<ol><li>First</li><li>Second</li></ol>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "1. First")
|
|
assert.Contains(t, result, "2. Second")
|
|
}
|
|
|
|
// --- HTML: blockquote ---
|
|
|
|
func TestHTMLToMarkdown_Good_BlockquoteElement(t *testing.T) {
|
|
input := `<blockquote>Quoted text</blockquote>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "> Quoted text")
|
|
}
|
|
|
|
// --- HTML: hr ---
|
|
|
|
func TestHTMLToMarkdown_Good_HR(t *testing.T) {
|
|
input := `<p>Before</p><hr><p>After</p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "---")
|
|
}
|
|
|
|
// --- HTML: h4, h5, h6 ---
|
|
|
|
func TestHTMLToMarkdown_Good_AllHeadingLevels(t *testing.T) {
|
|
input := `<h4>H4</h4><h5>H5</h5><h6>H6</h6>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "#### H4")
|
|
assert.Contains(t, result, "##### H5")
|
|
assert.Contains(t, result, "###### H6")
|
|
}
|
|
|
|
// --- HTML: link without href ---
|
|
|
|
func TestHTMLToMarkdown_Good_LinkNoHref(t *testing.T) {
|
|
input := `<a>bare link text</a>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "bare link text")
|
|
assert.NotContains(t, result, "[")
|
|
}
|
|
|
|
// --- HTML: unordered list ---
|
|
|
|
func TestHTMLToMarkdown_Good_UL(t *testing.T) {
|
|
input := `<ul><li>Item A</li><li>Item B</li></ul>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "- Item A")
|
|
assert.Contains(t, result, "- Item B")
|
|
}
|
|
|
|
// --- HTML: br tag ---
|
|
|
|
func TestHTMLToMarkdown_Good_BRTag(t *testing.T) {
|
|
input := `<p>Line one<br>Line two</p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "Line one")
|
|
assert.Contains(t, result, "Line two")
|
|
}
|
|
|
|
// --- HTML: style tag stripped ---
|
|
|
|
func TestHTMLToMarkdown_Good_StyleStripped(t *testing.T) {
|
|
input := `<html><head><style>body{color:red}</style></head><body><p>Clean</p></body></html>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "Clean")
|
|
assert.NotContains(t, result, "color")
|
|
}
|
|
|
|
// --- HTML: i and b tags ---
|
|
|
|
func TestHTMLToMarkdown_Good_AlternateBoldItalic(t *testing.T) {
|
|
input := `<p><b>bold</b> and <i>italic</i></p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "**bold**")
|
|
assert.Contains(t, result, "*italic*")
|
|
}
|
|
|
|
// --- Market: collectCurrent with limiter that actually blocks ---
|
|
|
|
func TestMarketCollector_Collect_Bad_LimiterBlocksThenCancelled(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}}})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 5*time.Second) // Long delay
|
|
|
|
// Make a first call to set lastTime, so the second call will actually block.
|
|
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
|
|
|
|
// Now cancel context and call Collect - the second Wait will block and detect cancellation.
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err) // Top-level errors are counted
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Papers: IACR with limiter that blocks ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("iacr", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "iacr") // Set lastTime
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: arXiv with limiter that blocks ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("arxiv", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "arxiv")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- BitcoinTalk: limiter that blocks ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("bitcointalk", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "bitcointalk")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Market: collectCurrent summary.md write error (not first write) ---
|
|
|
|
// writeCountMedium fails after N successful writes.
|
|
type writeCountMedium struct {
|
|
*io.MockMedium
|
|
writeCount int
|
|
failAfterN int
|
|
}
|
|
|
|
func (w *writeCountMedium) Write(path, content string) error {
|
|
w.writeCount++
|
|
if w.writeCount > w.failAfterN {
|
|
return fmt.Errorf("write %d: disk full", w.writeCount)
|
|
}
|
|
return w.MockMedium.Write(path, content)
|
|
}
|
|
func (w *writeCountMedium) EnsureDir(path string) error { return w.MockMedium.EnsureDir(path) }
|
|
func (w *writeCountMedium) Read(path string) (string, error) { return w.MockMedium.Read(path) }
|
|
func (w *writeCountMedium) List(path string) ([]fs.DirEntry, error) { return w.MockMedium.List(path) }
|
|
func (w *writeCountMedium) IsFile(path string) bool { return w.MockMedium.IsFile(path) }
|
|
func (w *writeCountMedium) FileGet(path string) (string, error) { return w.MockMedium.FileGet(path) }
|
|
func (w *writeCountMedium) FileSet(path, content string) error { return w.MockMedium.FileSet(path, content) }
|
|
func (w *writeCountMedium) Delete(path string) error { return w.MockMedium.Delete(path) }
|
|
func (w *writeCountMedium) DeleteAll(path string) error { return w.MockMedium.DeleteAll(path) }
|
|
func (w *writeCountMedium) Rename(old, new string) error { return w.MockMedium.Rename(old, new) }
|
|
func (w *writeCountMedium) Stat(path string) (fs.FileInfo, error) { return w.MockMedium.Stat(path) }
|
|
func (w *writeCountMedium) Open(path string) (fs.File, error) { return w.MockMedium.Open(path) }
|
|
func (w *writeCountMedium) Create(path string) (goio.WriteCloser, error) { return w.MockMedium.Create(path) }
|
|
func (w *writeCountMedium) Append(path string) (goio.WriteCloser, error) { return w.MockMedium.Append(path) }
|
|
func (w *writeCountMedium) ReadStream(path string) (goio.ReadCloser, error) { return w.MockMedium.ReadStream(path) }
|
|
func (w *writeCountMedium) WriteStream(path string) (goio.WriteCloser, error) { return w.MockMedium.WriteStream(path) }
|
|
func (w *writeCountMedium) Exists(path string) bool { return w.MockMedium.Exists(path) }
|
|
func (w *writeCountMedium) IsDir(path string) bool { return w.MockMedium.IsDir(path) }
|
|
|
|
// Test that the summary.md write error in collectCurrent is handled.
|
|
func TestMarketCollector_Collect_Bad_SummaryWriteError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
// Fail on the 2nd write (summary.md) but allow the 1st (current.json).
|
|
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 1}
|
|
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
// collectCurrent returns error on summary write → Errors incremented.
|
|
require.NoError(t, err)
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Market: collectHistorical write error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_HistoricalWriteError(t *testing.T) {
|
|
callCount := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
// Allow 2 writes (current.json + summary.md) but fail on 3rd (historical.json).
|
|
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 2}
|
|
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
// Current succeeds (2 items) but historical write fails (3rd write)
|
|
assert.True(t, result.Errors >= 1, "historical write should fail: items=%d, errors=%d", result.Items, result.Errors)
|
|
}
|
|
|
|
// --- State: Save write error ---
|
|
|
|
func TestState_Save_Bad_WriteError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("disk full")}
|
|
s := NewState(em, "/state.json")
|
|
s.Set("test", &StateEntry{Source: "test", Items: 1})
|
|
|
|
err := s.Save()
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to write state file")
|
|
}
|
|
|
|
// --- Excavator: collector with state error ---
|
|
|
|
func TestExcavator_Run_Bad_CollectorStateError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.State = NewState(m, "/state.json")
|
|
|
|
mc := &mockCollector{
|
|
name: "test",
|
|
items: 3,
|
|
}
|
|
|
|
e := &Excavator{
|
|
Collectors: []Collector{mc},
|
|
}
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, mc.called)
|
|
assert.Equal(t, 3, result.Items)
|
|
}
|
|
|
|
// --- BitcoinTalk: page returns zero posts (empty content) ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Good_ZeroPostsPage(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
// Valid HTML with no post divs at all
|
|
_, _ = w.Write([]byte("<html><body><p>No posts</p></body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "empty-topic"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- Excavator: state save error after collection ---
|
|
|
|
func TestExcavator_Run_Bad_StateSaveError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: fmt.Errorf("state write failed")}
|
|
cfg := &Config{
|
|
Output: io.NewMockMedium(), // Use regular medium for output
|
|
OutputDir: "/output",
|
|
Dispatcher: NewDispatcher(),
|
|
State: NewState(em, "/state.json"), // State uses error medium
|
|
}
|
|
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
mc := &mockCollector{name: "test", items: 1}
|
|
|
|
e := &Excavator{Collectors: []Collector{mc}}
|
|
result, err := e.Run(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, mc.called)
|
|
assert.Equal(t, 1, result.Items)
|
|
assert.True(t, errorEmitted, "should emit error event when state save fails")
|
|
}
|
|
|
|
// --- State: Load with read error ---
|
|
|
|
func TestState_Load_Bad_ReadError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: fmt.Errorf("read denied")}
|
|
em.MockMedium.Files["/state.json"] = "{}" // File exists but read will fail
|
|
|
|
s := NewState(em, "/state.json")
|
|
err := s.Load()
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read state file")
|
|
}
|
|
|
|
// --- Papers: PaperSourceAll emits complete ---
|
|
|
|
func TestPapersCollector_CollectAll_Good_ArxivFailsWithIACR(t *testing.T) {
|
|
callCount := 0
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
if callCount == 1 {
|
|
// IACR succeeds
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
} else {
|
|
// arXiv fails
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var completeEmitted bool
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &PapersCollector{Source: PaperSourceAll, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 1, result.Errors) // arXiv failure
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: IACR with cancelled context (request creation fails) ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_CancelledContextRequestFails(t *testing.T) {
|
|
// Don't set up any server - the request should fail because context is cancelled.
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: arXiv with cancelled context ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_CancelledContextRequestFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Market: collectHistorical limiter blocks ---
|
|
|
|
func TestMarketCollector_Collect_Bad_HistoricalLimiterBlocks(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}},
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 5*time.Second)
|
|
|
|
// First call succeeds (current), then cancel before historical
|
|
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
// Run current data collection first, then cancel before historical
|
|
go func() {
|
|
time.Sleep(50 * time.Millisecond)
|
|
cancel()
|
|
}()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err)
|
|
// Either current succeeds and historical fails, or both fail
|
|
assert.True(t, result.Items+result.Errors >= 1)
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with invalid URL ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_InvalidURL(t *testing.T) {
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
// Use a URL with control character that will fail NewRequestWithContext
|
|
_, err := b.fetchPage(context.Background(), "http://\x7f/invalid")
|
|
assert.Error(t, err)
|
|
}
|