feat: add Imprint() full render-reverse-imprint pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a06e9716c5
commit
76cef5a8d0
2 changed files with 71 additions and 2 deletions
19
pipeline.go
19
pipeline.go
|
|
@ -1,6 +1,10 @@
|
||||||
package html
|
package html
|
||||||
|
|
||||||
import "strings"
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"forge.lthn.ai/core/go-i18n/reversal"
|
||||||
|
)
|
||||||
|
|
||||||
// StripTags removes HTML tags from rendered output, returning plain text.
|
// StripTags removes HTML tags from rendered output, returning plain text.
|
||||||
// Tag boundaries are replaced with a single space; result is trimmed.
|
// Tag boundaries are replaced with a single space; result is trimmed.
|
||||||
|
|
@ -28,3 +32,16 @@ func StripTags(html string) string {
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(result)
|
return strings.TrimSpace(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
|
||||||
|
// and returns a GrammarImprint — the full render-reverse pipeline.
|
||||||
|
func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = NewContext()
|
||||||
|
}
|
||||||
|
rendered := node.Render(ctx)
|
||||||
|
text := StripTags(rendered)
|
||||||
|
tok := reversal.NewTokeniser()
|
||||||
|
tokens := tok.Tokenise(text)
|
||||||
|
return reversal.NewImprint(tokens)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
package html
|
package html
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
i18n "forge.lthn.ai/core/go-i18n"
|
||||||
|
)
|
||||||
|
|
||||||
func TestStripTags_Simple(t *testing.T) {
|
func TestStripTags_Simple(t *testing.T) {
|
||||||
got := StripTags(`<div>hello</div>`)
|
got := StripTags(`<div>hello</div>`)
|
||||||
|
|
@ -47,3 +51,51 @@ func TestStripTags_Entities(t *testing.T) {
|
||||||
t.Errorf("StripTags should preserve entities, got %q, want %q", got, want)
|
t.Errorf("StripTags should preserve entities, got %q, want %q", got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestImprint_FromNode(t *testing.T) {
|
||||||
|
svc, _ := i18n.New()
|
||||||
|
i18n.SetDefault(svc)
|
||||||
|
ctx := NewContext()
|
||||||
|
|
||||||
|
page := NewLayout("HCF").
|
||||||
|
H(El("h1", Text("Building project"))).
|
||||||
|
C(El("p", Text("Files deleted successfully"))).
|
||||||
|
F(El("small", Text("Completed")))
|
||||||
|
|
||||||
|
imp := Imprint(page, ctx)
|
||||||
|
|
||||||
|
if imp.TokenCount == 0 {
|
||||||
|
t.Error("Imprint should produce non-zero token count")
|
||||||
|
}
|
||||||
|
if imp.UniqueVerbs == 0 {
|
||||||
|
t.Error("Imprint should find verbs in rendered content")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestImprint_SimilarPages(t *testing.T) {
|
||||||
|
svc, _ := i18n.New()
|
||||||
|
i18n.SetDefault(svc)
|
||||||
|
ctx := NewContext()
|
||||||
|
|
||||||
|
page1 := NewLayout("HCF").
|
||||||
|
H(El("h1", Text("Building project"))).
|
||||||
|
C(El("p", Text("Files deleted successfully")))
|
||||||
|
|
||||||
|
page2 := NewLayout("HCF").
|
||||||
|
H(El("h1", Text("Building system"))).
|
||||||
|
C(El("p", Text("Files removed successfully")))
|
||||||
|
|
||||||
|
different := NewLayout("HCF").
|
||||||
|
C(El("p", Raw("no grammar content here xyz abc")))
|
||||||
|
|
||||||
|
imp1 := Imprint(page1, ctx)
|
||||||
|
imp2 := Imprint(page2, ctx)
|
||||||
|
impDiff := Imprint(different, ctx)
|
||||||
|
|
||||||
|
sim := imp1.Similar(imp2)
|
||||||
|
diffSim := imp1.Similar(impDiff)
|
||||||
|
|
||||||
|
if sim <= diffSim {
|
||||||
|
t.Errorf("similar pages should score higher (%f) than different (%f)", sim, diffSim)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue