// SPDX-License-Identifier: EUPL-1.2 package collect import ( "context" "testing" "dappco.re/go/core/io" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestHTMLToMarkdown_Good_OrderedList_Good(t *testing.T) { input := `
  1. First
  2. Second
  3. Third
` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "1. First") assert.Contains(t, result, "2. Second") assert.Contains(t, result, "3. Third") } func TestHTMLToMarkdown_Good_UnorderedList_Good(t *testing.T) { input := `` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "- Alpha") assert.Contains(t, result, "- Beta") } func TestHTMLToMarkdown_Good_Blockquote_Good(t *testing.T) { input := `
A wise quote
` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "> A wise quote") } func TestHTMLToMarkdown_Good_HorizontalRule_Good(t *testing.T) { input := `

Before


After

` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "---") } func TestHTMLToMarkdown_Good_LinkWithoutHref_Good(t *testing.T) { input := `bare link text` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "bare link text") assert.NotContains(t, result, "[") } func TestHTMLToMarkdown_Good_H4H5H6_Good(t *testing.T) { input := `

H4

H5
H6
` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "#### H4") assert.Contains(t, result, "##### H5") assert.Contains(t, result, "###### H6") } func TestHTMLToMarkdown_Good_StripsStyle_Good(t *testing.T) { input := `

Clean

` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "Clean") assert.NotContains(t, result, "color") } func TestHTMLToMarkdown_Good_LineBreak_Good(t *testing.T) { input := `

Line one
Line two

` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "Line one") assert.Contains(t, result, "Line two") } func TestHTMLToMarkdown_Good_NestedBoldItalic_Good(t *testing.T) { input := `bold text and italic text` result, err := HTMLToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "**bold text**") assert.Contains(t, result, "*italic text*") } func TestJSONToMarkdown_Good_NestedObject_Good(t *testing.T) { input := `{"outer": {"inner_key": "inner_value"}}` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "**outer:**") assert.Contains(t, result, "**inner_key:** inner_value") } func TestJSONToMarkdown_Good_NestedArray_Good(t *testing.T) { input := `[["a", "b"], ["c"]]` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "# Data") assert.Contains(t, result, "a") assert.Contains(t, result, "b") } func TestJSONToMarkdown_Good_ScalarValue_Good(t *testing.T) { input := `42` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "42") } func TestJSONToMarkdown_Good_ArrayOfObjects_Good(t *testing.T) { input := `[{"name": "Alice"}, {"name": "Bob"}]` result, err := JSONToMarkdown(input) require.NoError(t, err) assert.Contains(t, result, "Item 1") assert.Contains(t, result, "Alice") assert.Contains(t, result, "Item 2") assert.Contains(t, result, "Bob") } func TestProcessor_Process_Good_CancelledContext_Good(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/file.html"] = `

Test

` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil ctx, cancel := context.WithCancel(context.Background()) cancel() p := &Processor{Source: "test", Dir: "/input"} _, err := p.Process(ctx, cfg) assert.Error(t, err) } func TestProcessor_Process_Good_EmitsEvents_Good(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/a.html"] = `

Title

` m.Files["/input/b.json"] = `{"key": "value"}` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil var starts, items, completes int cfg.Dispatcher.On(EventStart, func(e Event) { starts++ }) cfg.Dispatcher.On(EventItem, func(e Event) { items++ }) cfg.Dispatcher.On(EventComplete, func(e Event) { completes++ }) p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) assert.Equal(t, 1, starts) assert.Equal(t, 2, items) assert.Equal(t, 1, completes) } func TestProcessor_Process_Good_BadHTML_Good(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true // html.Parse is very tolerant, so even bad HTML will parse. But we test // that the pipeline handles it gracefully. m.Files["/input/bad.html"] = `

Still valid enough

` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 1, result.Items) } func TestProcessor_Process_Good_BadJSON_Good(t *testing.T) { m := io.NewMockMedium() m.Dirs["/input"] = true m.Files["/input/bad.json"] = `not valid json` cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil var errors int cfg.Dispatcher.On(EventError, func(e Event) { errors++ }) p := &Processor{Source: "test", Dir: "/input"} result, err := p.Process(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 0, result.Items) assert.Equal(t, 1, result.Errors) assert.Equal(t, 1, errors) }