diff --git a/collect/bitcointalk_http_test.go b/collect/bitcointalk_http_test.go new file mode 100644 index 0000000..f045ebe --- /dev/null +++ b/collect/bitcointalk_http_test.go @@ -0,0 +1,256 @@ +package collect + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "forge.lthn.ai/core/go/pkg/io" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// sampleBTCTalkPage returns HTML resembling a BitcoinTalk topic page with the +// given number of posts. If fewer than postsPerPage the caller can infer that +// it is the last page. +func sampleBTCTalkPage(count int) string { + page := `
` + for i := 0; i < count; i++ { + page += fmt.Sprintf(` +We present a novel construction for zero-knowledge proofs.
+A wise quote` + result, err := HTMLToMarkdown(input) + require.NoError(t, err) + assert.Contains(t, result, "> A wise quote") +} + +func TestHTMLToMarkdown_Good_HorizontalRule(t *testing.T) { + input := `
Before
After
` + result, err := HTMLToMarkdown(input) + require.NoError(t, err) + assert.Contains(t, result, "---") +} + +func TestHTMLToMarkdown_Good_LinkWithoutHref(t *testing.T) { + input := `bare link text` + result, err := HTMLToMarkdown(input) + require.NoError(t, err) + assert.Contains(t, result, "bare link text") + assert.NotContains(t, result, "[") +} + +func TestHTMLToMarkdown_Good_H4H5H6(t *testing.T) { + input := `Clean
` + result, err := HTMLToMarkdown(input) + require.NoError(t, err) + assert.Contains(t, result, "Clean") + assert.NotContains(t, result, "color") +} + +func TestHTMLToMarkdown_Good_LineBreak(t *testing.T) { + input := `Line one
Line two
Still valid enough
` + + cfg := NewConfigWithMedium(m, "/output") + cfg.Limiter = nil + + p := &Processor{Source: "test", Dir: "/input"} + result, err := p.Process(context.Background(), cfg) + + require.NoError(t, err) + assert.Equal(t, 1, result.Items) +} + +func TestProcessor_Process_Good_BadJSON(t *testing.T) { + m := io.NewMockMedium() + m.Dirs["/input"] = true + m.Files["/input/bad.json"] = `not valid json` + + cfg := NewConfigWithMedium(m, "/output") + cfg.Limiter = nil + + var errors int + cfg.Dispatcher.On(EventError, func(e Event) { errors++ }) + + p := &Processor{Source: "test", Dir: "/input"} + result, err := p.Process(context.Background(), cfg) + + require.NoError(t, err) + assert.Equal(t, 0, result.Items) + assert.Equal(t, 1, result.Errors) + assert.Equal(t, 1, errors) +} diff --git a/collect/state_extra_test.go b/collect/state_extra_test.go new file mode 100644 index 0000000..0bdce71 --- /dev/null +++ b/collect/state_extra_test.go @@ -0,0 +1,76 @@ +package collect + +import ( + "testing" + + "forge.lthn.ai/core/go/pkg/io" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestState_Get_Good_ReturnsCopy(t *testing.T) { + m := io.NewMockMedium() + s := NewState(m, "/state.json") + + s.Set("test", &StateEntry{Source: "test", Items: 5}) + + // Get returns a copy, so mutating it shouldn't affect internal state. + got, ok := s.Get("test") + require.True(t, ok) + got.Items = 999 + + again, ok := s.Get("test") + require.True(t, ok) + assert.Equal(t, 5, again.Items, "internal state should not be mutated") +} + +func TestState_Save_Good_WritesJSON(t *testing.T) { + m := io.NewMockMedium() + s := NewState(m, "/data/state.json") + + s.Set("src-a", &StateEntry{Source: "src-a", Items: 10, LastID: "abc"}) + + err := s.Save() + require.NoError(t, err) + + // Verify the raw JSON was written. + content, err := m.Read("/data/state.json") + require.NoError(t, err) + assert.Contains(t, content, `"src-a"`) + assert.Contains(t, content, `"abc"`) +} + +func TestState_Load_Good_NullJSON(t *testing.T) { + m := io.NewMockMedium() + m.Files["/state.json"] = "null" + + s := NewState(m, "/state.json") + err := s.Load() + require.NoError(t, err) + + // Null JSON should result in empty entries. + _, ok := s.Get("anything") + assert.False(t, ok) +} + +func TestState_SaveLoad_Good_WithCursor(t *testing.T) { + m := io.NewMockMedium() + s := NewState(m, "/state.json") + + s.Set("paginated", &StateEntry{ + Source: "paginated", + Items: 50, + Cursor: "page_token_abc123", + }) + + err := s.Save() + require.NoError(t, err) + + s2 := NewState(m, "/state.json") + err = s2.Load() + require.NoError(t, err) + + entry, ok := s2.Get("paginated") + require.True(t, ok) + assert.Equal(t, "page_token_abc123", entry.Cursor) +}