feat: add Imprint() full render-reverse-imprint pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a06e9716c5
commit
76cef5a8d0
2 changed files with 71 additions and 2 deletions
19
pipeline.go
19
pipeline.go
|
|
@ -1,6 +1,10 @@
|
|||
package html
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"forge.lthn.ai/core/go-i18n/reversal"
|
||||
)
|
||||
|
||||
// StripTags removes HTML tags from rendered output, returning plain text.
|
||||
// Tag boundaries are replaced with a single space; result is trimmed.
|
||||
|
|
@ -28,3 +32,16 @@ func StripTags(html string) string {
|
|||
}
|
||||
return strings.TrimSpace(result)
|
||||
}
|
||||
|
||||
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
|
||||
// and returns a GrammarImprint — the full render-reverse pipeline.
|
||||
func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
|
||||
if ctx == nil {
|
||||
ctx = NewContext()
|
||||
}
|
||||
rendered := node.Render(ctx)
|
||||
text := StripTags(rendered)
|
||||
tok := reversal.NewTokeniser()
|
||||
tokens := tok.Tokenise(text)
|
||||
return reversal.NewImprint(tokens)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
package html
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
|
||||
i18n "forge.lthn.ai/core/go-i18n"
|
||||
)
|
||||
|
||||
func TestStripTags_Simple(t *testing.T) {
|
||||
got := StripTags(`<div>hello</div>`)
|
||||
|
|
@ -47,3 +51,51 @@ func TestStripTags_Entities(t *testing.T) {
|
|||
t.Errorf("StripTags should preserve entities, got %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImprint_FromNode(t *testing.T) {
|
||||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
ctx := NewContext()
|
||||
|
||||
page := NewLayout("HCF").
|
||||
H(El("h1", Text("Building project"))).
|
||||
C(El("p", Text("Files deleted successfully"))).
|
||||
F(El("small", Text("Completed")))
|
||||
|
||||
imp := Imprint(page, ctx)
|
||||
|
||||
if imp.TokenCount == 0 {
|
||||
t.Error("Imprint should produce non-zero token count")
|
||||
}
|
||||
if imp.UniqueVerbs == 0 {
|
||||
t.Error("Imprint should find verbs in rendered content")
|
||||
}
|
||||
}
|
||||
|
||||
func TestImprint_SimilarPages(t *testing.T) {
|
||||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
ctx := NewContext()
|
||||
|
||||
page1 := NewLayout("HCF").
|
||||
H(El("h1", Text("Building project"))).
|
||||
C(El("p", Text("Files deleted successfully")))
|
||||
|
||||
page2 := NewLayout("HCF").
|
||||
H(El("h1", Text("Building system"))).
|
||||
C(El("p", Text("Files removed successfully")))
|
||||
|
||||
different := NewLayout("HCF").
|
||||
C(El("p", Raw("no grammar content here xyz abc")))
|
||||
|
||||
imp1 := Imprint(page1, ctx)
|
||||
imp2 := Imprint(page2, ctx)
|
||||
impDiff := Imprint(different, ctx)
|
||||
|
||||
sim := imp1.Similar(imp2)
|
||||
diffSim := imp1.Similar(impDiff)
|
||||
|
||||
if sim <= diffSim {
|
||||
t.Errorf("similar pages should score higher (%f) than different (%f)", sim, diffSim)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue