go-html/pipeline.go
Snider dd68c6d0d7
Some checks are pending
Security Scan / security (push) Waiting to run
Test / test (push) Waiting to run
feat(html): canonicalize compare variant keys
2026-04-15 02:53:37 +01:00

154 lines
3.4 KiB
Go

//go:build !js
package html
import (
core "dappco.re/go/core"
"dappco.re/go/core/i18n/reversal"
"unicode/utf8"
)
// StripTags removes HTML tags from rendered output, returning plain text.
// Usage example: text := StripTags("<main>Hello <strong>world</strong></main>")
// Tag boundaries are collapsed into single spaces; result is trimmed.
// Does not handle script/style element content (go-html does not generate these).
func StripTags(html string) string {
b := core.NewBuilder()
prevSpace := true // starts true to trim leading space
for i := 0; i < len(html); {
r, size := utf8.DecodeRuneInString(html[i:])
if r == '<' {
next, nextSize := nextRune(html, i+size)
if nextSize > 0 && isTagStartRune(next) {
if end, ok := findTagCloser(html, i+size+nextSize); ok {
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
i = end + 1
continue
}
}
}
switch r {
case ' ', '\t', '\n', '\r':
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
default:
_, _ = b.WriteString(html[i : i+size])
prevSpace = false
}
i += size
}
return core.Trim(b.String())
}
func nextRune(s string, i int) (rune, int) {
if i >= len(s) {
return 0, 0
}
return utf8.DecodeRuneInString(s[i:])
}
func isTagStartRune(r rune) bool {
switch {
case r >= 'a' && r <= 'z':
return true
case r >= 'A' && r <= 'Z':
return true
case r == '/', r == '!', r == '?':
return true
default:
return false
}
}
func findTagCloser(s string, start int) (int, bool) {
inSingleQuote := false
inDoubleQuote := false
for i := start; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
switch r {
case '\'':
if !inDoubleQuote {
inSingleQuote = !inSingleQuote
}
case '"':
if !inSingleQuote {
inDoubleQuote = !inDoubleQuote
}
case '>':
if !inSingleQuote && !inDoubleQuote {
return i, true
}
}
i += size
}
return 0, false
}
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
// and returns a GrammarImprint — the full render-reverse pipeline.
// Usage example: imp := Imprint(Text("welcome"), NewContext())
func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
if ctx == nil {
ctx = NewContext()
}
rendered := ""
if node != nil {
rendered = node.Render(ctx)
}
text := StripTags(rendered)
tok := reversal.NewTokeniser()
tokens := tok.Tokenise(text)
return reversal.NewImprint(tokens)
}
// CompareVariants runs the imprint pipeline on each responsive variant independently
// and returns pairwise similarity scores. Key format: "name1:name2".
// Usage example: scores := CompareVariants(NewResponsive(), NewContext())
func CompareVariants(r *Responsive, ctx *Context) map[string]float64 {
if ctx == nil {
ctx = NewContext()
}
if r == nil {
return make(map[string]float64)
}
type named struct {
name string
imp reversal.GrammarImprint
}
var imprints []named
for _, v := range r.variants {
if v.layout == nil {
continue
}
imp := Imprint(v.layout, cloneContext(ctx))
imprints = append(imprints, named{name: v.name, imp: imp})
}
scores := make(map[string]float64)
for i := range len(imprints) {
for j := i + 1; j < len(imprints); j++ {
left := imprints[i].name
right := imprints[j].name
if right < left {
left, right = right, left
}
key := left + ":" + right
scores[key] = imprints[i].imp.Similar(imprints[j].imp)
}
}
return scores
}