2026-03-30 00:54:20 +00:00
|
|
|
// SPDX-License-Identifier: EUPL-1.2
|
2026-03-30 00:19:43 +00:00
|
|
|
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
package collect
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"testing"
|
|
|
|
|
"time"
|
|
|
|
|
|
2026-03-21 23:54:23 +00:00
|
|
|
"dappco.re/go/core/io"
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-30 06:37:20 +00:00
|
|
|
func TestExcavator_Run_Good_ResumeSkipsCompleted_Good(t *testing.T) {
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
m := io.NewMockMedium()
|
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
|
cfg.Limiter = nil
|
|
|
|
|
|
|
|
|
|
// Pre-populate state so source-a looks completed.
|
|
|
|
|
cfg.State.Set("source-a", &StateEntry{
|
|
|
|
|
Source: "source-a",
|
|
|
|
|
LastRun: time.Now().Add(-1 * time.Hour),
|
|
|
|
|
Items: 10,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
c1 := &mockCollector{name: "source-a", items: 10}
|
|
|
|
|
c2 := &mockCollector{name: "source-b", items: 5}
|
|
|
|
|
|
|
|
|
|
e := &Excavator{
|
|
|
|
|
Collectors: []Collector{c1, c2},
|
|
|
|
|
Resume: true,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
|
|
|
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
assert.False(t, c1.called, "source-a should be skipped (already completed)")
|
|
|
|
|
assert.True(t, c2.called, "source-b should run")
|
|
|
|
|
assert.Equal(t, 5, result.Items)
|
|
|
|
|
assert.Equal(t, 1, result.Skipped)
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-30 06:37:20 +00:00
|
|
|
func TestExcavator_Run_Good_ResumeRunsIncomplete_Good(t *testing.T) {
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
m := io.NewMockMedium()
|
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
|
cfg.Limiter = nil
|
|
|
|
|
|
|
|
|
|
// Pre-populate state with 0 items (incomplete).
|
|
|
|
|
cfg.State.Set("source-a", &StateEntry{
|
|
|
|
|
Source: "source-a",
|
|
|
|
|
LastRun: time.Now(),
|
|
|
|
|
Items: 0,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
c1 := &mockCollector{name: "source-a", items: 5}
|
|
|
|
|
|
|
|
|
|
e := &Excavator{
|
|
|
|
|
Collectors: []Collector{c1},
|
|
|
|
|
Resume: true,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
|
|
|
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
assert.True(t, c1.called, "source-a should run (0 items in previous run)")
|
|
|
|
|
assert.Equal(t, 5, result.Items)
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-30 06:37:20 +00:00
|
|
|
func TestExcavator_Run_Good_NilState_Good(t *testing.T) {
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
m := io.NewMockMedium()
|
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
|
cfg.State = nil
|
|
|
|
|
cfg.Limiter = nil
|
|
|
|
|
|
|
|
|
|
c1 := &mockCollector{name: "source-a", items: 3}
|
|
|
|
|
|
|
|
|
|
e := &Excavator{
|
|
|
|
|
Collectors: []Collector{c1},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
|
|
|
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
assert.Equal(t, 3, result.Items)
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-30 06:37:20 +00:00
|
|
|
func TestExcavator_Run_Good_NilDispatcher_Good(t *testing.T) {
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
m := io.NewMockMedium()
|
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
|
cfg.Dispatcher = nil
|
|
|
|
|
cfg.Limiter = nil
|
|
|
|
|
|
|
|
|
|
c1 := &mockCollector{name: "source-a", items: 2}
|
|
|
|
|
|
|
|
|
|
e := &Excavator{
|
|
|
|
|
Collectors: []Collector{c1},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result, err := e.Run(context.Background(), cfg)
|
|
|
|
|
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
assert.Equal(t, 2, result.Items)
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-30 06:37:20 +00:00
|
|
|
func TestExcavator_Run_Good_ProgressEvents_Good(t *testing.T) {
|
test(collect): push coverage from 57.3% to 83.0%
Add HTTP mock tests for BitcoinTalk (fetchPage, Collect with server),
papers (IACR HTML parsing, arXiv XML parsing, PaperSourceAll), market
(historical with FromDate, invalid date, server errors), process
(ordered lists, blockquotes, h4-h6, nested objects, cancelled context),
excavate (resume skips completed, progress events), and state (copy
safety, cursor round-trip, null JSON).
Uses httptest.Server with rewriteTransport to intercept external HTTP
calls without touching the production code.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 01:55:18 +00:00
|
|
|
m := io.NewMockMedium()
|
|
|
|
|
cfg := NewConfigWithMedium(m, "/output")
|
|
|
|
|
cfg.Limiter = nil
|
|
|
|
|
|
|
|
|
|
var progressMsgs []string
|
|
|
|
|
cfg.Dispatcher.On(EventProgress, func(e Event) {
|
|
|
|
|
progressMsgs = append(progressMsgs, e.Message)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
c1 := &mockCollector{name: "source-a", items: 1}
|
|
|
|
|
c2 := &mockCollector{name: "source-b", items: 1}
|
|
|
|
|
|
|
|
|
|
e := &Excavator{
|
|
|
|
|
Collectors: []Collector{c1, c2},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_, err := e.Run(context.Background(), cfg)
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
|
|
assert.Len(t, progressMsgs, 2)
|
|
|
|
|
assert.Contains(t, progressMsgs[0], "1/2")
|
|
|
|
|
assert.Contains(t, progressMsgs[1], "2/2")
|
|
|
|
|
}
|