diff --git a/pipeline.go b/pipeline.go index 3b2bddf..8f8500e 100644 --- a/pipeline.go +++ b/pipeline.go @@ -1,6 +1,10 @@ package html -import "strings" +import ( + "strings" + + "forge.lthn.ai/core/go-i18n/reversal" +) // StripTags removes HTML tags from rendered output, returning plain text. // Tag boundaries are replaced with a single space; result is trimmed. @@ -28,3 +32,16 @@ func StripTags(html string) string { } return strings.TrimSpace(result) } + +// Imprint renders a node tree to HTML, strips tags, tokenises the text, +// and returns a GrammarImprint — the full render-reverse pipeline. +func Imprint(node Node, ctx *Context) reversal.GrammarImprint { + if ctx == nil { + ctx = NewContext() + } + rendered := node.Render(ctx) + text := StripTags(rendered) + tok := reversal.NewTokeniser() + tokens := tok.Tokenise(text) + return reversal.NewImprint(tokens) +} diff --git a/pipeline_test.go b/pipeline_test.go index 87b808a..09d68c7 100644 --- a/pipeline_test.go +++ b/pipeline_test.go @@ -1,6 +1,10 @@ package html -import "testing" +import ( + "testing" + + i18n "forge.lthn.ai/core/go-i18n" +) func TestStripTags_Simple(t *testing.T) { got := StripTags(`
hello
`) @@ -47,3 +51,51 @@ func TestStripTags_Entities(t *testing.T) { t.Errorf("StripTags should preserve entities, got %q, want %q", got, want) } } + +func TestImprint_FromNode(t *testing.T) { + svc, _ := i18n.New() + i18n.SetDefault(svc) + ctx := NewContext() + + page := NewLayout("HCF"). + H(El("h1", Text("Building project"))). + C(El("p", Text("Files deleted successfully"))). + F(El("small", Text("Completed"))) + + imp := Imprint(page, ctx) + + if imp.TokenCount == 0 { + t.Error("Imprint should produce non-zero token count") + } + if imp.UniqueVerbs == 0 { + t.Error("Imprint should find verbs in rendered content") + } +} + +func TestImprint_SimilarPages(t *testing.T) { + svc, _ := i18n.New() + i18n.SetDefault(svc) + ctx := NewContext() + + page1 := NewLayout("HCF"). + H(El("h1", Text("Building project"))). + C(El("p", Text("Files deleted successfully"))) + + page2 := NewLayout("HCF"). + H(El("h1", Text("Building system"))). + C(El("p", Text("Files removed successfully"))) + + different := NewLayout("HCF"). + C(El("p", Raw("no grammar content here xyz abc"))) + + imp1 := Imprint(page1, ctx) + imp2 := Imprint(page2, ctx) + impDiff := Imprint(different, ctx) + + sim := imp1.Similar(imp2) + diffSim := imp1.Similar(impDiff) + + if sim <= diffSim { + t.Errorf("similar pages should score higher (%f) than different (%f)", sim, diffSim) + } +}