feat: add Imprint() full render-reverse-imprint pipeline

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude 2026-02-17 00:11:36 +00:00
parent a06e9716c5
commit 76cef5a8d0
No known key found for this signature in database
GPG key ID: AF404715446AEB41
2 changed files with 71 additions and 2 deletions

View file

@ -1,6 +1,10 @@
package html package html
import "strings" import (
"strings"
"forge.lthn.ai/core/go-i18n/reversal"
)
// StripTags removes HTML tags from rendered output, returning plain text. // StripTags removes HTML tags from rendered output, returning plain text.
// Tag boundaries are replaced with a single space; result is trimmed. // Tag boundaries are replaced with a single space; result is trimmed.
@ -28,3 +32,16 @@ func StripTags(html string) string {
} }
return strings.TrimSpace(result) return strings.TrimSpace(result)
} }
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
// and returns a GrammarImprint — the full render-reverse pipeline.
func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
if ctx == nil {
ctx = NewContext()
}
rendered := node.Render(ctx)
text := StripTags(rendered)
tok := reversal.NewTokeniser()
tokens := tok.Tokenise(text)
return reversal.NewImprint(tokens)
}

View file

@ -1,6 +1,10 @@
package html package html
import "testing" import (
"testing"
i18n "forge.lthn.ai/core/go-i18n"
)
func TestStripTags_Simple(t *testing.T) { func TestStripTags_Simple(t *testing.T) {
got := StripTags(`<div>hello</div>`) got := StripTags(`<div>hello</div>`)
@ -47,3 +51,51 @@ func TestStripTags_Entities(t *testing.T) {
t.Errorf("StripTags should preserve entities, got %q, want %q", got, want) t.Errorf("StripTags should preserve entities, got %q, want %q", got, want)
} }
} }
func TestImprint_FromNode(t *testing.T) {
svc, _ := i18n.New()
i18n.SetDefault(svc)
ctx := NewContext()
page := NewLayout("HCF").
H(El("h1", Text("Building project"))).
C(El("p", Text("Files deleted successfully"))).
F(El("small", Text("Completed")))
imp := Imprint(page, ctx)
if imp.TokenCount == 0 {
t.Error("Imprint should produce non-zero token count")
}
if imp.UniqueVerbs == 0 {
t.Error("Imprint should find verbs in rendered content")
}
}
func TestImprint_SimilarPages(t *testing.T) {
svc, _ := i18n.New()
i18n.SetDefault(svc)
ctx := NewContext()
page1 := NewLayout("HCF").
H(El("h1", Text("Building project"))).
C(El("p", Text("Files deleted successfully")))
page2 := NewLayout("HCF").
H(El("h1", Text("Building system"))).
C(El("p", Text("Files removed successfully")))
different := NewLayout("HCF").
C(El("p", Raw("no grammar content here xyz abc")))
imp1 := Imprint(page1, ctx)
imp2 := Imprint(page2, ctx)
impDiff := Imprint(different, ctx)
sim := imp1.Similar(imp2)
diffSim := imp1.Similar(impDiff)
if sim <= diffSim {
t.Errorf("similar pages should score higher (%f) than different (%f)", sim, diffSim)
}
}