// SPDX-License-Identifier: EUPL-1.2 package collect import ( "context" strings "dappco.re/go/core/scm/internal/ax/stringsx" "net/http" "net/http/httptest" "testing" "dappco.re/go/core/io" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/net/html" ) const sampleIACRHTML = `
Zero-Knowledge Proofs Alice Bob 2025-01-15

We present a novel construction for zero-knowledge proofs.

Lattice Cryptography Charlie 2025-01-20

A survey of lattice-based cryptography.

` const sampleArXivXML = ` http://arxiv.org/abs/2501.12345v1 Ring Signatures Revisited We propose an efficient ring signature scheme. 2025-01-10T00:00:00Z Alice David http://arxiv.org/abs/2501.67890v1 Post-Quantum Signatures A new approach to post-quantum digital signatures. 2025-01-12T00:00:00Z Eve ` func TestPapersCollector_CollectIACR_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(sampleIACRHTML)) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceIACR, Query: "zero knowledge"} result, err := p.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) assert.Len(t, result.Files, 2) // Verify content was written. content, err := m.Read("/output/papers/iacr/2025-001.md") require.NoError(t, err) assert.Contains(t, content, "Zero-Knowledge Proofs") } func TestPapersCollector_CollectArXiv_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(sampleArXivXML)) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceArXiv, Query: "ring signatures"} result, err := p.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) assert.Len(t, result.Files, 2) // Verify one of the papers. content, err := m.Read("/output/papers/arxiv/2501.12345v1.md") require.NoError(t, err) assert.Contains(t, content, "Ring Signatures Revisited") assert.Contains(t, content, "Alice") } func TestPapersCollector_CollectArXiv_Good_WithCategory_Good(t *testing.T) { var capturedQuery string srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { capturedQuery = r.URL.RawQuery w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(sampleArXivXML)) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceArXiv, Query: "crypto", Category: "cs.CR"} _, err := p.Collect(context.Background(), cfg) require.NoError(t, err) assert.Contains(t, capturedQuery, "cat") } func TestPapersCollector_CollectAll_Good(t *testing.T) { callCount := 0 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { callCount++ if callCount == 1 { // First call is IACR w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(sampleIACRHTML)) } else { // Second call is arXiv w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(sampleArXivXML)) } })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceAll, Query: "cryptography"} result, err := p.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 4, result.Items) // 2 IACR + 2 arXiv } func TestPapersCollector_CollectIACR_Bad_ServerError_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceIACR, Query: "test"} _, err := p.Collect(context.Background(), cfg) assert.Error(t, err) } func TestPapersCollector_CollectArXiv_Bad_ServerError_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusServiceUnavailable) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"} _, err := p.Collect(context.Background(), cfg) assert.Error(t, err) } func TestPapersCollector_CollectArXiv_Bad_InvalidXML_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(`not xml at all`)) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceArXiv, Query: "test"} _, err := p.Collect(context.Background(), cfg) assert.Error(t, err) } func TestPapersCollector_CollectAll_Bad_BothFail_Good(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceAll, Query: "test"} _, err := p.Collect(context.Background(), cfg) assert.Error(t, err) } func TestPapersCollector_CollectAll_Good_OneFails_Good(t *testing.T) { callCount := 0 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { callCount++ if callCount == 1 { // IACR fails w.WriteHeader(http.StatusInternalServerError) } else { // ArXiv succeeds w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(sampleArXivXML)) } })) defer srv.Close() transport := &rewriteTransport{base: srv.Client().Transport, target: srv.URL} old := httpClient httpClient = &http.Client{Transport: transport} defer func() { httpClient = old }() m := io.NewMockMedium() cfg := NewConfigWithMedium(m, "/output") cfg.Limiter = nil p := &PapersCollector{Source: PaperSourceAll, Query: "test"} result, err := p.Collect(context.Background(), cfg) require.NoError(t, err) assert.Equal(t, 2, result.Items) assert.Equal(t, 1, result.Errors) // IACR failure counted } func TestExtractIACRPapers_Good(t *testing.T) { doc, err := html.Parse(strings.NewReader(sampleIACRHTML)) require.NoError(t, err) papers := extractIACRPapers(doc) assert.Len(t, papers, 2) assert.Equal(t, "Zero-Knowledge Proofs", papers[0].Title) assert.Contains(t, papers[0].Authors, "Alice") assert.Contains(t, papers[0].Authors, "Bob") assert.Equal(t, "2025-01-15", papers[0].Date) assert.Contains(t, papers[0].Abstract, "zero-knowledge proofs") assert.Equal(t, "iacr", papers[0].Source) assert.Equal(t, "Lattice Cryptography", papers[1].Title) } func TestExtractIACRPapers_Good_Empty_Good(t *testing.T) { doc, err := html.Parse(strings.NewReader(``)) require.NoError(t, err) papers := extractIACRPapers(doc) assert.Empty(t, papers) } func TestExtractIACRPapers_Good_NoTitle_Good(t *testing.T) { doc, err := html.Parse(strings.NewReader(`
`)) require.NoError(t, err) papers := extractIACRPapers(doc) // Entry with no title should be excluded by the Title check. assert.Empty(t, papers) }