1307 lines
44 KiB
Go
1307 lines
44 KiB
Go
// SPDX-License-Identifier: EUPL-1.2
|
|
|
|
package collect
|
|
|
|
import (
|
|
"context"
|
|
core "dappco.re/go/core"
|
|
fmt "dappco.re/go/core/scm/internal/ax/fmtx"
|
|
json "dappco.re/go/core/scm/internal/ax/jsonx"
|
|
strings "dappco.re/go/core/scm/internal/ax/stringsx"
|
|
goio "io"
|
|
"io/fs"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
|
|
"dappco.re/go/core/io"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func testErr(msg string) error {
|
|
return core.E("collect.test", msg, nil)
|
|
}
|
|
|
|
func testErrf(format string, args ...any) error {
|
|
return core.E("collect.test", fmt.Sprintf(format, args...), nil)
|
|
}
|
|
|
|
// errorMedium wraps MockMedium and injects errors on specific operations.
|
|
type errorMedium struct {
|
|
*io.MockMedium
|
|
writeErr error
|
|
ensureDirErr error
|
|
listErr error
|
|
readErr error
|
|
}
|
|
|
|
func (e *errorMedium) Write(path, content string) error {
|
|
if e.writeErr != nil {
|
|
return e.writeErr
|
|
}
|
|
return e.MockMedium.Write(path, content)
|
|
}
|
|
func (e *errorMedium) EnsureDir(path string) error {
|
|
if e.ensureDirErr != nil {
|
|
return e.ensureDirErr
|
|
}
|
|
return e.MockMedium.EnsureDir(path)
|
|
}
|
|
func (e *errorMedium) List(path string) ([]fs.DirEntry, error) {
|
|
if e.listErr != nil {
|
|
return nil, e.listErr
|
|
}
|
|
return e.MockMedium.List(path)
|
|
}
|
|
func (e *errorMedium) Read(path string) (string, error) {
|
|
if e.readErr != nil {
|
|
return "", e.readErr
|
|
}
|
|
return e.MockMedium.Read(path)
|
|
}
|
|
func (e *errorMedium) FileGet(path string) (string, error) { return e.MockMedium.FileGet(path) }
|
|
func (e *errorMedium) FileSet(path, content string) error { return e.MockMedium.FileSet(path, content) }
|
|
func (e *errorMedium) Delete(path string) error { return e.MockMedium.Delete(path) }
|
|
func (e *errorMedium) DeleteAll(path string) error { return e.MockMedium.DeleteAll(path) }
|
|
func (e *errorMedium) Rename(old, new string) error { return e.MockMedium.Rename(old, new) }
|
|
func (e *errorMedium) Stat(path string) (fs.FileInfo, error) { return e.MockMedium.Stat(path) }
|
|
func (e *errorMedium) Open(path string) (fs.File, error) { return e.MockMedium.Open(path) }
|
|
func (e *errorMedium) Create(path string) (goio.WriteCloser, error) { return e.MockMedium.Create(path) }
|
|
func (e *errorMedium) Append(path string) (goio.WriteCloser, error) { return e.MockMedium.Append(path) }
|
|
func (e *errorMedium) ReadStream(path string) (goio.ReadCloser, error) {
|
|
return e.MockMedium.ReadStream(path)
|
|
}
|
|
func (e *errorMedium) WriteStream(path string) (goio.WriteCloser, error) {
|
|
return e.MockMedium.WriteStream(path)
|
|
}
|
|
func (e *errorMedium) Exists(path string) bool { return e.MockMedium.Exists(path) }
|
|
func (e *errorMedium) IsDir(path string) bool { return e.MockMedium.IsDir(path) }
|
|
func (e *errorMedium) IsFile(path string) bool { return e.MockMedium.IsFile(path) }
|
|
|
|
// --- errorLimiter: a RateLimiter that returns errors ---
|
|
|
|
type errorLimiterWaiter struct{}
|
|
|
|
// --- Processor: list error ---
|
|
|
|
func TestProcessor_Process_Bad_ListError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), listErr: testErr("list denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to list directory")
|
|
}
|
|
|
|
// --- Processor: ensureDir error ---
|
|
|
|
func TestProcessor_Process_Bad_EnsureDirError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: testErr("mkdir denied")}
|
|
// Need to ensure List returns entries
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Processor: context cancellation during processing ---
|
|
|
|
func TestProcessor_Process_Bad_ContextCancelledDuringLoop(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/a.html"] = "<h1>Test</h1>"
|
|
m.Files["/input/b.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel immediately so ctx.Err() is non-nil
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
_, err := p.Process(ctx, cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "context cancelled")
|
|
}
|
|
|
|
// --- Processor: read error during file processing ---
|
|
|
|
func TestProcessor_Process_Bad_ReadError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: testErr("read denied")}
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/test.html"] = "<h1>Test</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err) // Read errors increment Errors, not returned
|
|
assert.Equal(t, 1, result.Errors)
|
|
}
|
|
|
|
// --- Processor: JSON conversion error ---
|
|
|
|
func TestProcessor_Process_Bad_InvalidJSONFile(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/bad.json"] = "not valid json {"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
assert.True(t, errorEmitted, "should emit error event for bad JSON")
|
|
}
|
|
|
|
// --- Processor: write error during output ---
|
|
|
|
func TestProcessor_Process_Bad_WriteError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
em.MockMedium.Dirs["/input"] = true
|
|
em.MockMedium.Files["/input/page.html"] = "<h1>Title</h1>"
|
|
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
}
|
|
|
|
// --- Processor: successful processing with events ---
|
|
|
|
func TestProcessor_Process_Good_EmitsItemAndComplete(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
m.Dirs["/input"] = true
|
|
m.Files["/input/page.html"] = "<h1>Title</h1><p>Body</p>"
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
var itemEmitted, completeEmitted bool
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemEmitted = true })
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &Processor{Source: "test", Dir: "/input"}
|
|
result, err := p.Process(context.Background(), cfg)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 1, result.Items)
|
|
assert.True(t, itemEmitted)
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: with rate limiter that fails ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("iacr", 10*time.Minute) // Long delay
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel context so limiter.Wait fails
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("arxiv", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: IACR with bad HTML response ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_InvalidHTML(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
// Serve valid-ish HTML but with no papers - the parse succeeds but returns empty.
|
|
_, _ = w.Write([]byte("<html><body>no papers here</body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "nothing"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- Papers: IACR write error ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_WriteError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err) // Write errors increment Errors, not returned
|
|
assert.Equal(t, 2, result.Errors) // 2 papers both fail to write
|
|
}
|
|
|
|
// --- Papers: IACR EnsureDir error ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_EnsureDirError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: testErr("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Papers: arXiv write error ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_WriteError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Errors)
|
|
}
|
|
|
|
// --- Papers: arXiv EnsureDir error ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_EnsureDirError(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: testErr("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Papers: collectAll with dispatcher events ---
|
|
|
|
func TestPapersCollector_CollectAll_Good_WithDispatcher(t *testing.T) {
|
|
callCount := 0
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
if callCount == 1 {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
} else {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var completeEmitted bool
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &PapersCollector{Source: PaperSourceAll, Query: "crypto"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 4, result.Items)
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: IACR with events on item emit ---
|
|
|
|
func TestPapersCollector_CollectIACR_Good_EmitsItemEvents(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var itemCount int
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "zero knowledge"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 2, itemCount)
|
|
}
|
|
|
|
// --- Papers: arXiv with events on item emit ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Good_EmitsItemEvents(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(sampleArXivXML))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var itemCount int
|
|
cfg.Dispatcher.On(EventItem, func(e Event) { itemCount++ })
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "ring signatures"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 2, itemCount)
|
|
}
|
|
|
|
// --- Market: collectCurrent write error (summary path) ---
|
|
|
|
func TestMarketCollector_Collect_Bad_WriteError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
// collectCurrent will fail on first write, then collectHistorical also fails
|
|
require.NoError(t, err) // errors are counted not returned at top level
|
|
assert.True(t, result.Errors >= 1, "should have at least one error from write failure")
|
|
}
|
|
|
|
// --- Market: EnsureDir error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_EnsureDirError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: testErr("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
_, err := mc.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- Market: collectCurrent with limiter wait error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_LimiterError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin"})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err) // error counted, not returned
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Market: collectHistorical with custom FromDate ---
|
|
|
|
func TestMarketCollector_Collect_Good_HistoricalCustomDate(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", FromDate: "2025-01-01", Historical: true}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, result.Items >= 2) // current.json + summary.md at minimum
|
|
}
|
|
|
|
// --- BitcoinTalk: EnsureDir error ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_EnsureDirError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), ensureDirErr: testErr("mkdir denied")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(context.Background(), cfg)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to create output directory")
|
|
}
|
|
|
|
// --- BitcoinTalk: limiter error ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_LimiterError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("bitcointalk", 10*time.Minute)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel() // Cancel context so limiter.Wait fails
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- BitcoinTalk: write error during post saving ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_WriteErrorOnPosts(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleBTCTalkPage(3)))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
cfg := &Config{Output: em, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err) // write errors are counted
|
|
assert.Equal(t, 3, result.Errors) // 3 posts all fail to write
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with bad HTTP status ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_NonOKStatus(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusForbidden)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.fetchPage(context.Background(), srv.URL)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "unexpected status code: 403")
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with request error ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_RequestError(t *testing.T) {
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: &rewriteTransport{target: "http://127.0.0.1:1"}} // Connection refused
|
|
defer func() { httpClient = old }()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "request failed")
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with valid but empty page ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Good_EmptyPage(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte("<html><body></body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: &rewriteTransport{base: srv.Client().Transport, target: srv.URL}}
|
|
defer func() { httpClient = old }()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
posts, err := b.fetchPage(context.Background(), "https://bitcointalk.org/index.php?topic=12345.0")
|
|
require.NoError(t, err)
|
|
assert.Empty(t, posts)
|
|
}
|
|
|
|
// --- BitcoinTalk: Collect with fetch error + dispatcher ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_FetchErrorWithDispatcher(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors)
|
|
assert.True(t, errorEmitted)
|
|
}
|
|
|
|
// --- State: Save with a populated state ---
|
|
|
|
func TestState_Save_Good_RoundTrip(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
s := NewState(m, "/data/state.json")
|
|
|
|
s.Set("source1", &StateEntry{Source: "source1", Items: 42, LastID: "xyz"})
|
|
s.Set("source2", &StateEntry{Source: "source2", Items: 7, Cursor: "page2"})
|
|
|
|
err := s.Save()
|
|
require.NoError(t, err)
|
|
|
|
// Load into fresh state and verify
|
|
s2 := NewState(m, "/data/state.json")
|
|
err = s2.Load()
|
|
require.NoError(t, err)
|
|
|
|
e1, ok := s2.Get("source1")
|
|
require.True(t, ok)
|
|
assert.Equal(t, 42, e1.Items)
|
|
|
|
e2, ok := s2.Get("source2")
|
|
require.True(t, ok)
|
|
assert.Equal(t, "page2", e2.Cursor)
|
|
}
|
|
|
|
// --- GitHub: Collect with Repo set triggers collectIssues/collectPRs (which fail via gh) ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_GhNotAuthenticated(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var errorCount int
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorCount++ })
|
|
|
|
// With Repo set, Collect skips listOrgRepos and goes directly to collectIssues/collectPRs
|
|
// which use exec.Command("gh", ...) and fail because gh isn't authenticated.
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo"}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
// Both collectIssues and collectPRs should fail, incrementing Errors
|
|
assert.True(t, result.Errors >= 1, "at least one error expected from unauthenticated gh")
|
|
assert.True(t, errorCount >= 1, "dispatcher should emit error events")
|
|
}
|
|
|
|
// --- GitHub: Collect IssuesOnly triggers only issues, not PRs ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_IssuesOnlyGhFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", IssuesOnly: true}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors) // Only issues collected (and failed), PRs skipped
|
|
}
|
|
|
|
// --- GitHub: Collect PRsOnly triggers only PRs, not issues ---
|
|
|
|
func TestGitHubCollector_Collect_Bad_PRsOnlyGhFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
g := &GitHubCollector{Org: "nonexistent-test-org-999", Repo: "nonexistent-repo", PRsOnly: true}
|
|
result, err := g.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 1, result.Errors) // Only PRs collected (and failed), issues skipped
|
|
}
|
|
|
|
// --- extractText: text before a br/p/div element adds newline ---
|
|
|
|
func TestExtractText_Good_TextBeforeBR(t *testing.T) {
|
|
htmlStr := `<div class="inner">Hello<br>World<p>End</p></div>`
|
|
posts, err := ParsePostsFromHTML(fmt.Sprintf(`<html><body><div class="post"><div class="inner">%s</div></div></body></html>`,
|
|
"First text<br>Second text<div>Third text</div>"))
|
|
// ParsePostsFromHTML uses extractText internally
|
|
require.NoError(t, err)
|
|
_ = htmlStr
|
|
// Even if no posts match the exact structure, the function path is exercised.
|
|
// The key is that extractText encounters text + br/p/div siblings.
|
|
_ = posts
|
|
}
|
|
|
|
// --- ParsePostsFromHTML: posts with full structure ---
|
|
|
|
func TestParsePostsFromHTML_Good_FullStructure(t *testing.T) {
|
|
htmlContent := `<html><body>
|
|
<div class="post">
|
|
<div class="poster_info">TestAuthor</div>
|
|
<div class="headerandpost">
|
|
<div class="smalltext">January 01, 2009</div>
|
|
</div>
|
|
<div class="inner">This is the post content.</div>
|
|
</div>
|
|
</body></html>`
|
|
|
|
posts, err := ParsePostsFromHTML(htmlContent)
|
|
require.NoError(t, err)
|
|
require.Len(t, posts, 1)
|
|
assert.Equal(t, "This is the post content.", posts[0].Content)
|
|
}
|
|
|
|
// --- getChildrenText: nested element node path ---
|
|
|
|
func TestHTMLToMarkdown_Good_NestedElements(t *testing.T) {
|
|
// <a> with nested <span> triggers getChildrenText with non-text child nodes
|
|
input := `<p><a href="https://example.com"><span>Nested</span> Link</a></p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "[Nested Link](https://example.com)")
|
|
}
|
|
|
|
// --- HTML: ordered list ---
|
|
|
|
func TestHTMLToMarkdown_Good_OL(t *testing.T) {
|
|
input := `<ol><li>First</li><li>Second</li></ol>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "1. First")
|
|
assert.Contains(t, result, "2. Second")
|
|
}
|
|
|
|
// --- HTML: blockquote ---
|
|
|
|
func TestHTMLToMarkdown_Good_BlockquoteElement(t *testing.T) {
|
|
input := `<blockquote>Quoted text</blockquote>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "> Quoted text")
|
|
}
|
|
|
|
// --- HTML: hr ---
|
|
|
|
func TestHTMLToMarkdown_Good_HR(t *testing.T) {
|
|
input := `<p>Before</p><hr><p>After</p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "---")
|
|
}
|
|
|
|
// --- HTML: h4, h5, h6 ---
|
|
|
|
func TestHTMLToMarkdown_Good_AllHeadingLevels(t *testing.T) {
|
|
input := `<h4>H4</h4><h5>H5</h5><h6>H6</h6>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "#### H4")
|
|
assert.Contains(t, result, "##### H5")
|
|
assert.Contains(t, result, "###### H6")
|
|
}
|
|
|
|
// --- HTML: link without href ---
|
|
|
|
func TestHTMLToMarkdown_Good_LinkNoHref(t *testing.T) {
|
|
input := `<a>bare link text</a>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "bare link text")
|
|
assert.NotContains(t, result, "[")
|
|
}
|
|
|
|
// --- HTML: unordered list ---
|
|
|
|
func TestHTMLToMarkdown_Good_UL(t *testing.T) {
|
|
input := `<ul><li>Item A</li><li>Item B</li></ul>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "- Item A")
|
|
assert.Contains(t, result, "- Item B")
|
|
}
|
|
|
|
// --- HTML: br tag ---
|
|
|
|
func TestHTMLToMarkdown_Good_BRTag(t *testing.T) {
|
|
input := `<p>Line one<br>Line two</p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "Line one")
|
|
assert.Contains(t, result, "Line two")
|
|
}
|
|
|
|
// --- HTML: style tag stripped ---
|
|
|
|
func TestHTMLToMarkdown_Good_StyleStripped(t *testing.T) {
|
|
input := `<html><head><style>body{color:red}</style></head><body><p>Clean</p></body></html>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "Clean")
|
|
assert.NotContains(t, result, "color")
|
|
}
|
|
|
|
// --- HTML: i and b tags ---
|
|
|
|
func TestHTMLToMarkdown_Good_AlternateBoldItalic(t *testing.T) {
|
|
input := `<p><b>bold</b> and <i>italic</i></p>`
|
|
result, err := HTMLToMarkdown(input)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "**bold**")
|
|
assert.Contains(t, result, "*italic*")
|
|
}
|
|
|
|
// --- Market: collectCurrent with limiter that actually blocks ---
|
|
|
|
func TestMarketCollector_Collect_Bad_LimiterBlocksThenCancelled(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}}})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 5*time.Second) // Long delay
|
|
|
|
// Make a first call to set lastTime, so the second call will actually block.
|
|
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
|
|
|
|
// Now cancel context and call Collect - the second Wait will block and detect cancellation.
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err) // Top-level errors are counted
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Papers: IACR with limiter that blocks ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("iacr", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "iacr") // Set lastTime
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: arXiv with limiter that blocks ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("arxiv", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "arxiv")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- BitcoinTalk: limiter that blocks ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Bad_LimiterBlocks(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("bitcointalk", 5*time.Second)
|
|
_ = cfg.Limiter.Wait(context.Background(), "bitcointalk")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
_, err := b.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Market: collectCurrent summary.md write error (not first write) ---
|
|
|
|
// writeCountMedium fails after N successful writes.
|
|
type writeCountMedium struct {
|
|
*io.MockMedium
|
|
writeCount int
|
|
failAfterN int
|
|
}
|
|
|
|
func (w *writeCountMedium) Write(path, content string) error {
|
|
w.writeCount++
|
|
if w.writeCount > w.failAfterN {
|
|
return testErrf("write %d: disk full", w.writeCount)
|
|
}
|
|
return w.MockMedium.Write(path, content)
|
|
}
|
|
func (w *writeCountMedium) EnsureDir(path string) error { return w.MockMedium.EnsureDir(path) }
|
|
func (w *writeCountMedium) Read(path string) (string, error) { return w.MockMedium.Read(path) }
|
|
func (w *writeCountMedium) List(path string) ([]fs.DirEntry, error) { return w.MockMedium.List(path) }
|
|
func (w *writeCountMedium) IsFile(path string) bool { return w.MockMedium.IsFile(path) }
|
|
func (w *writeCountMedium) FileGet(path string) (string, error) { return w.MockMedium.FileGet(path) }
|
|
func (w *writeCountMedium) FileSet(path, content string) error {
|
|
return w.MockMedium.FileSet(path, content)
|
|
}
|
|
func (w *writeCountMedium) Delete(path string) error { return w.MockMedium.Delete(path) }
|
|
func (w *writeCountMedium) DeleteAll(path string) error { return w.MockMedium.DeleteAll(path) }
|
|
func (w *writeCountMedium) Rename(old, new string) error { return w.MockMedium.Rename(old, new) }
|
|
func (w *writeCountMedium) Stat(path string) (fs.FileInfo, error) { return w.MockMedium.Stat(path) }
|
|
func (w *writeCountMedium) Open(path string) (fs.File, error) { return w.MockMedium.Open(path) }
|
|
func (w *writeCountMedium) Create(path string) (goio.WriteCloser, error) {
|
|
return w.MockMedium.Create(path)
|
|
}
|
|
func (w *writeCountMedium) Append(path string) (goio.WriteCloser, error) {
|
|
return w.MockMedium.Append(path)
|
|
}
|
|
func (w *writeCountMedium) ReadStream(path string) (goio.ReadCloser, error) {
|
|
return w.MockMedium.ReadStream(path)
|
|
}
|
|
func (w *writeCountMedium) WriteStream(path string) (goio.WriteCloser, error) {
|
|
return w.MockMedium.WriteStream(path)
|
|
}
|
|
func (w *writeCountMedium) Exists(path string) bool { return w.MockMedium.Exists(path) }
|
|
func (w *writeCountMedium) IsDir(path string) bool { return w.MockMedium.IsDir(path) }
|
|
|
|
// Test that the summary.md write error in collectCurrent is handled.
|
|
func TestMarketCollector_Collect_Bad_SummaryWriteError(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
// Fail on the 2nd write (summary.md) but allow the 1st (current.json).
|
|
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 1}
|
|
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin"}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
// collectCurrent returns error on summary write → Errors incremented.
|
|
require.NoError(t, err)
|
|
assert.True(t, result.Errors >= 1)
|
|
}
|
|
|
|
// --- Market: collectHistorical write error ---
|
|
|
|
func TestMarketCollector_Collect_Bad_HistoricalWriteError(t *testing.T) {
|
|
callCount := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
w.Header().Set("Content-Type", "application/json")
|
|
if strings.Contains(r.URL.Path, "/market_chart") {
|
|
_ = json.NewEncoder(w).Encode(historicalData{
|
|
Prices: [][]float64{{1610000000000, 42000.0}},
|
|
})
|
|
} else {
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{
|
|
CurrentPrice: map[string]float64{"usd": 42000},
|
|
MarketCap: map[string]float64{"usd": 800000000000},
|
|
},
|
|
})
|
|
}
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
// Allow 2 writes (current.json + summary.md) but fail on 3rd (historical.json).
|
|
wm := &writeCountMedium{MockMedium: io.NewMockMedium(), failAfterN: 2}
|
|
cfg := &Config{Output: wm, OutputDir: "/output", Dispatcher: NewDispatcher()}
|
|
cfg.Limiter = nil
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
|
|
result, err := mc.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
// Current succeeds (2 items) but historical write fails (3rd write)
|
|
assert.True(t, result.Errors >= 1, "historical write should fail: items=%d, errors=%d", result.Items, result.Errors)
|
|
}
|
|
|
|
// --- State: Save write error ---
|
|
|
|
func TestState_Save_Bad_WriteError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("disk full")}
|
|
s := NewState(em, "/state.json")
|
|
s.Set("test", &StateEntry{Source: "test", Items: 1})
|
|
|
|
err := s.Save()
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to write state file")
|
|
}
|
|
|
|
// --- Excavator: collector with state error ---
|
|
|
|
func TestExcavator_Run_Bad_CollectorStateError(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.State = NewState(m, "/state.json")
|
|
|
|
mc := &mockCollector{
|
|
name: "test",
|
|
items: 3,
|
|
}
|
|
|
|
e := &Excavator{
|
|
Collectors: []Collector{mc},
|
|
}
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, mc.called)
|
|
assert.Equal(t, 3, result.Items)
|
|
}
|
|
|
|
// --- BitcoinTalk: page returns zero posts (empty content) ---
|
|
|
|
func TestBitcoinTalkCollector_Collect_Good_ZeroPostsPage(t *testing.T) {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
// Valid HTML with no post divs at all
|
|
_, _ = w.Write([]byte("<html><body><p>No posts</p></body></html>"))
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
b := &BitcoinTalkCollector{TopicID: "empty-topic"}
|
|
result, err := b.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 0, result.Items)
|
|
}
|
|
|
|
// --- Excavator: state save error after collection ---
|
|
|
|
func TestExcavator_Run_Bad_StateSaveError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), writeErr: testErr("state write failed")}
|
|
cfg := &Config{
|
|
Output: io.NewMockMedium(), // Use regular medium for output
|
|
OutputDir: "/output",
|
|
Dispatcher: NewDispatcher(),
|
|
State: NewState(em, "/state.json"), // State uses error medium
|
|
}
|
|
|
|
var errorEmitted bool
|
|
cfg.Dispatcher.On(EventError, func(e Event) { errorEmitted = true })
|
|
|
|
mc := &mockCollector{name: "test", items: 1}
|
|
|
|
e := &Excavator{Collectors: []Collector{mc}}
|
|
result, err := e.Run(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.True(t, mc.called)
|
|
assert.Equal(t, 1, result.Items)
|
|
assert.True(t, errorEmitted, "should emit error event when state save fails")
|
|
}
|
|
|
|
// --- State: Load with read error ---
|
|
|
|
func TestState_Load_Bad_ReadError(t *testing.T) {
|
|
em := &errorMedium{MockMedium: io.NewMockMedium(), readErr: testErr("read denied")}
|
|
em.MockMedium.Files["/state.json"] = "{}" // File exists but read will fail
|
|
|
|
s := NewState(em, "/state.json")
|
|
err := s.Load()
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read state file")
|
|
}
|
|
|
|
// --- Papers: PaperSourceAll emits complete ---
|
|
|
|
func TestPapersCollector_CollectAll_Good_ArxivFailsWithIACR(t *testing.T) {
|
|
callCount := 0
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
callCount++
|
|
if callCount == 1 {
|
|
// IACR succeeds
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(sampleIACRHTML))
|
|
} else {
|
|
// arXiv fails
|
|
w.WriteHeader(http.StatusInternalServerError)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL}
|
|
old := httpClient
|
|
httpClient = &http.Client{Transport: transport}
|
|
defer func() { httpClient = old }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
var completeEmitted bool
|
|
cfg.Dispatcher.On(EventComplete, func(e Event) { completeEmitted = true })
|
|
|
|
p := &PapersCollector{Source: PaperSourceAll, Query: "test"}
|
|
result, err := p.Collect(context.Background(), cfg)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, 2, result.Items)
|
|
assert.Equal(t, 1, result.Errors) // arXiv failure
|
|
assert.True(t, completeEmitted)
|
|
}
|
|
|
|
// --- Papers: IACR with cancelled context (request creation fails) ---
|
|
|
|
func TestPapersCollector_CollectIACR_Bad_CancelledContextRequestFails(t *testing.T) {
|
|
// Don't set up any server - the request should fail because context is cancelled.
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceIACR, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Papers: arXiv with cancelled context ---
|
|
|
|
func TestPapersCollector_CollectArXiv_Bad_CancelledContextRequestFails(t *testing.T) {
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = nil
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"}
|
|
_, err := p.Collect(ctx, cfg)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
// --- Market: collectHistorical limiter blocks ---
|
|
|
|
func TestMarketCollector_Collect_Bad_HistoricalLimiterBlocks(t *testing.T) {
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(coinData{
|
|
ID: "bitcoin", Symbol: "btc", Name: "Bitcoin",
|
|
MarketData: marketData{CurrentPrice: map[string]float64{"usd": 42000}},
|
|
})
|
|
}))
|
|
defer server.Close()
|
|
|
|
oldURL := coinGeckoBaseURL
|
|
coinGeckoBaseURL = server.URL
|
|
defer func() { coinGeckoBaseURL = oldURL }()
|
|
|
|
m := io.NewMockMedium()
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
cfg.Limiter = NewRateLimiter()
|
|
cfg.Limiter.SetDelay("coingecko", 5*time.Second)
|
|
|
|
// First call succeeds (current), then cancel before historical
|
|
_ = cfg.Limiter.Wait(context.Background(), "coingecko")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
// Run current data collection first, then cancel before historical
|
|
go func() {
|
|
time.Sleep(50 * time.Millisecond)
|
|
cancel()
|
|
}()
|
|
|
|
mc := &MarketCollector{CoinID: "bitcoin", Historical: true}
|
|
result, err := mc.Collect(ctx, cfg)
|
|
require.NoError(t, err)
|
|
// Either current succeeds and historical fails, or both fail
|
|
assert.True(t, result.Items+result.Errors >= 1)
|
|
}
|
|
|
|
// --- BitcoinTalk: fetchPage with invalid URL ---
|
|
|
|
func TestBitcoinTalkCollector_FetchPage_Bad_InvalidURL(t *testing.T) {
|
|
b := &BitcoinTalkCollector{TopicID: "12345"}
|
|
// Use a URL with control character that will fail NewRequestWithContext
|
|
_, err := b.fetchPage(context.Background(), "http://\x7f/invalid")
|
|
assert.Error(t, err)
|
|
}
|