diff --git a/pipeline.go b/pipeline.go
index 3b2bddf..8f8500e 100644
--- a/pipeline.go
+++ b/pipeline.go
@@ -1,6 +1,10 @@
package html
-import "strings"
+import (
+ "strings"
+
+ "forge.lthn.ai/core/go-i18n/reversal"
+)
// StripTags removes HTML tags from rendered output, returning plain text.
// Tag boundaries are replaced with a single space; result is trimmed.
@@ -28,3 +32,16 @@ func StripTags(html string) string {
}
return strings.TrimSpace(result)
}
+
+// Imprint renders a node tree to HTML, strips tags, tokenises the text,
+// and returns a GrammarImprint — the full render-reverse pipeline.
+func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
+ if ctx == nil {
+ ctx = NewContext()
+ }
+ rendered := node.Render(ctx)
+ text := StripTags(rendered)
+ tok := reversal.NewTokeniser()
+ tokens := tok.Tokenise(text)
+ return reversal.NewImprint(tokens)
+}
diff --git a/pipeline_test.go b/pipeline_test.go
index 87b808a..09d68c7 100644
--- a/pipeline_test.go
+++ b/pipeline_test.go
@@ -1,6 +1,10 @@
package html
-import "testing"
+import (
+ "testing"
+
+ i18n "forge.lthn.ai/core/go-i18n"
+)
func TestStripTags_Simple(t *testing.T) {
got := StripTags(`
hello
`)
@@ -47,3 +51,51 @@ func TestStripTags_Entities(t *testing.T) {
t.Errorf("StripTags should preserve entities, got %q, want %q", got, want)
}
}
+
+func TestImprint_FromNode(t *testing.T) {
+ svc, _ := i18n.New()
+ i18n.SetDefault(svc)
+ ctx := NewContext()
+
+ page := NewLayout("HCF").
+ H(El("h1", Text("Building project"))).
+ C(El("p", Text("Files deleted successfully"))).
+ F(El("small", Text("Completed")))
+
+ imp := Imprint(page, ctx)
+
+ if imp.TokenCount == 0 {
+ t.Error("Imprint should produce non-zero token count")
+ }
+ if imp.UniqueVerbs == 0 {
+ t.Error("Imprint should find verbs in rendered content")
+ }
+}
+
+func TestImprint_SimilarPages(t *testing.T) {
+ svc, _ := i18n.New()
+ i18n.SetDefault(svc)
+ ctx := NewContext()
+
+ page1 := NewLayout("HCF").
+ H(El("h1", Text("Building project"))).
+ C(El("p", Text("Files deleted successfully")))
+
+ page2 := NewLayout("HCF").
+ H(El("h1", Text("Building system"))).
+ C(El("p", Text("Files removed successfully")))
+
+ different := NewLayout("HCF").
+ C(El("p", Raw("no grammar content here xyz abc")))
+
+ imp1 := Imprint(page1, ctx)
+ imp2 := Imprint(page2, ctx)
+ impDiff := Imprint(different, ctx)
+
+ sim := imp1.Similar(imp2)
+ diffSim := imp1.Similar(impDiff)
+
+ if sim <= diffSim {
+ t.Errorf("similar pages should score higher (%f) than different (%f)", sim, diffSim)
+ }
+}