go-html/pipeline.go
Claude 76cef5a8d0
feat: add Imprint() full render-reverse-imprint pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 00:11:36 +00:00

47 lines
1.1 KiB
Go

package html
import (
"strings"
"forge.lthn.ai/core/go-i18n/reversal"
)
// StripTags removes HTML tags from rendered output, returning plain text.
// Tag boundaries are replaced with a single space; result is trimmed.
func StripTags(html string) string {
var b strings.Builder
inTag := false
for _, r := range html {
if r == '<' {
inTag = true
b.WriteByte(' ')
continue
}
if r == '>' {
inTag = false
continue
}
if !inTag {
b.WriteRune(r)
}
}
// Collapse multiple spaces into one.
result := b.String()
for strings.Contains(result, " ") {
result = strings.ReplaceAll(result, " ", " ")
}
return strings.TrimSpace(result)
}
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
// and returns a GrammarImprint — the full render-reverse pipeline.
func Imprint(node Node, ctx *Context) reversal.GrammarImprint {
if ctx == nil {
ctx = NewContext()
}
rendered := node.Render(ctx)
text := StripTags(rendered)
tok := reversal.NewTokeniser()
tokens := tok.Tokenise(text)
return reversal.NewImprint(tokens)
}