go-scm/collect/process_extra_test.go
Virgil dd59b177c6
Some checks failed
Security Scan / security (push) Failing after 10s
Test / test (push) Successful in 2m2s
chore(ax): normalise test naming and usage annotations
Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-30 06:37:20 +00:00

195 lines
5.7 KiB
Go

// SPDX-License-Identifier: EUPL-1.2
package collect
import (
"context"
"testing"
"dappco.re/go/core/io"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestHTMLToMarkdown_Good_OrderedList_Good(t *testing.T) {
input := `<ol><li>First</li><li>Second</li><li>Third</li></ol>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "1. First")
assert.Contains(t, result, "2. Second")
assert.Contains(t, result, "3. Third")
}
func TestHTMLToMarkdown_Good_UnorderedList_Good(t *testing.T) {
input := `<ul><li>Alpha</li><li>Beta</li></ul>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "- Alpha")
assert.Contains(t, result, "- Beta")
}
func TestHTMLToMarkdown_Good_Blockquote_Good(t *testing.T) {
input := `<blockquote>A wise quote</blockquote>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "> A wise quote")
}
func TestHTMLToMarkdown_Good_HorizontalRule_Good(t *testing.T) {
input := `<p>Before</p><hr/><p>After</p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "---")
}
func TestHTMLToMarkdown_Good_LinkWithoutHref_Good(t *testing.T) {
input := `<a>bare link text</a>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "bare link text")
assert.NotContains(t, result, "[")
}
func TestHTMLToMarkdown_Good_H4H5H6_Good(t *testing.T) {
input := `<h4>H4</h4><h5>H5</h5><h6>H6</h6>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "#### H4")
assert.Contains(t, result, "##### H5")
assert.Contains(t, result, "###### H6")
}
func TestHTMLToMarkdown_Good_StripsStyle_Good(t *testing.T) {
input := `<html><head><style>.foo{color:red}</style></head><body><p>Clean</p></body></html>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "Clean")
assert.NotContains(t, result, "color")
}
func TestHTMLToMarkdown_Good_LineBreak_Good(t *testing.T) {
input := `<p>Line one<br/>Line two</p>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "Line one")
assert.Contains(t, result, "Line two")
}
func TestHTMLToMarkdown_Good_NestedBoldItalic_Good(t *testing.T) {
input := `<b>bold text</b> and <i>italic text</i>`
result, err := HTMLToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "**bold text**")
assert.Contains(t, result, "*italic text*")
}
func TestJSONToMarkdown_Good_NestedObject_Good(t *testing.T) {
input := `{"outer": {"inner_key": "inner_value"}}`
result, err := JSONToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "**outer:**")
assert.Contains(t, result, "**inner_key:** inner_value")
}
func TestJSONToMarkdown_Good_NestedArray_Good(t *testing.T) {
input := `[["a", "b"], ["c"]]`
result, err := JSONToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "# Data")
assert.Contains(t, result, "a")
assert.Contains(t, result, "b")
}
func TestJSONToMarkdown_Good_ScalarValue_Good(t *testing.T) {
input := `42`
result, err := JSONToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "42")
}
func TestJSONToMarkdown_Good_ArrayOfObjects_Good(t *testing.T) {
input := `[{"name": "Alice"}, {"name": "Bob"}]`
result, err := JSONToMarkdown(input)
require.NoError(t, err)
assert.Contains(t, result, "Item 1")
assert.Contains(t, result, "Alice")
assert.Contains(t, result, "Item 2")
assert.Contains(t, result, "Bob")
}
func TestProcessor_Process_Good_CancelledContext_Good(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/file.html"] = `<h1>Test</h1>`
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
ctx, cancel := context.WithCancel(context.Background())
cancel()
p := &Processor{Source: "test", Dir: "/input"}
_, err := p.Process(ctx, cfg)
assert.Error(t, err)
}
func TestProcessor_Process_Good_EmitsEvents_Good(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/a.html"] = `<h1>Title</h1>`
m.Files["/input/b.json"] = `{"key": "value"}`
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var starts, items, completes int
cfg.Dispatcher.On(EventStart, func(e Event) { starts++ })
cfg.Dispatcher.On(EventItem, func(e Event) { items++ })
cfg.Dispatcher.On(EventComplete, func(e Event) { completes++ })
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 2, result.Items)
assert.Equal(t, 1, starts)
assert.Equal(t, 2, items)
assert.Equal(t, 1, completes)
}
func TestProcessor_Process_Good_BadHTML_Good(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
// html.Parse is very tolerant, so even bad HTML will parse. But we test
// that the pipeline handles it gracefully.
m.Files["/input/bad.html"] = `<html><body><p>Still valid enough</p>`
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 1, result.Items)
}
func TestProcessor_Process_Good_BadJSON_Good(t *testing.T) {
m := io.NewMockMedium()
m.Dirs["/input"] = true
m.Files["/input/bad.json"] = `not valid json`
cfg := NewConfigWithMedium(m, "/output")
cfg.Limiter = nil
var errors int
cfg.Dispatcher.On(EventError, func(e Event) { errors++ })
p := &Processor{Source: "test", Dir: "/input"}
result, err := p.Process(context.Background(), cfg)
require.NoError(t, err)
assert.Equal(t, 0, result.Items)
assert.Equal(t, 1, result.Errors)
assert.Equal(t, 1, errors)
}