go-ai/rag/chunk_test.go
Claude e84d6ad3c9
feat: extract AI/ML packages from core/go
LEM scoring pipeline, native MLX Metal bindings, Claude SDK wrapper,
RAG with Qdrant/Ollama, unified AI facade, and MCP protocol server.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:25:55 +00:00

120 lines
2.8 KiB
Go

package rag
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestChunkMarkdown_Good_SmallSection(t *testing.T) {
text := `# Title
This is a small section that fits in one chunk.
`
chunks := ChunkMarkdown(text, DefaultChunkConfig())
assert.Len(t, chunks, 1)
assert.Contains(t, chunks[0].Text, "small section")
}
func TestChunkMarkdown_Good_MultipleSections(t *testing.T) {
text := `# Main Title
Introduction paragraph.
## Section One
Content for section one.
## Section Two
Content for section two.
`
chunks := ChunkMarkdown(text, DefaultChunkConfig())
assert.GreaterOrEqual(t, len(chunks), 2)
}
func TestChunkMarkdown_Good_LargeSection(t *testing.T) {
// Create a section larger than chunk size
text := `## Large Section
` + repeatString("This is a test paragraph with some content. ", 50)
cfg := ChunkConfig{Size: 200, Overlap: 20}
chunks := ChunkMarkdown(text, cfg)
assert.Greater(t, len(chunks), 1)
for _, chunk := range chunks {
assert.NotEmpty(t, chunk.Text)
assert.Equal(t, "Large Section", chunk.Section)
}
}
func TestChunkMarkdown_Good_ExtractsTitle(t *testing.T) {
text := `## My Section Title
Some content here.
`
chunks := ChunkMarkdown(text, DefaultChunkConfig())
assert.Len(t, chunks, 1)
assert.Equal(t, "My Section Title", chunks[0].Section)
}
func TestCategory_Good_UIComponent(t *testing.T) {
tests := []struct {
path string
expected string
}{
{"docs/flux/button.md", "ui-component"},
{"ui/components/modal.md", "ui-component"},
{"brand/vi-personality.md", "brand"},
{"mascot/expressions.md", "brand"},
{"product-brief.md", "product-brief"},
{"tasks/2024-01-15-feature.md", "task"},
{"plans/architecture.md", "task"},
{"architecture/migration.md", "architecture"},
{"docs/api.md", "documentation"},
}
for _, tc := range tests {
t.Run(tc.path, func(t *testing.T) {
assert.Equal(t, tc.expected, Category(tc.path))
})
}
}
func TestChunkID_Good_Deterministic(t *testing.T) {
id1 := ChunkID("test.md", 0, "hello world")
id2 := ChunkID("test.md", 0, "hello world")
assert.Equal(t, id1, id2)
}
func TestChunkID_Good_DifferentForDifferentInputs(t *testing.T) {
id1 := ChunkID("test.md", 0, "hello world")
id2 := ChunkID("test.md", 1, "hello world")
id3 := ChunkID("other.md", 0, "hello world")
assert.NotEqual(t, id1, id2)
assert.NotEqual(t, id1, id3)
}
func TestShouldProcess_Good_MarkdownFiles(t *testing.T) {
assert.True(t, ShouldProcess("doc.md"))
assert.True(t, ShouldProcess("doc.markdown"))
assert.True(t, ShouldProcess("doc.txt"))
assert.False(t, ShouldProcess("doc.go"))
assert.False(t, ShouldProcess("doc.py"))
assert.False(t, ShouldProcess("doc"))
}
// Helper function
func repeatString(s string, n int) string {
result := ""
for i := 0; i < n; i++ {
result += s
}
return result
}