fix: preserve comparison operators in strip tags
Some checks are pending
Security Scan / security (push) Waiting to run
Test / test (push) Waiting to run

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Codex 2026-04-15 02:08:01 +01:00
parent 5fa28ca64b
commit b1ff334c85
2 changed files with 49 additions and 18 deletions

View file

@ -14,36 +14,59 @@ import (
// Does not handle script/style element content (go-html does not generate these).
func StripTags(html string) string {
b := core.NewBuilder()
runes := []rune(html)
inTag := false
prevSpace := true // starts true to trim leading space
for _, r := range html {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
continue
}
if !inTag {
if r == ' ' || r == '\t' || r == '\n' {
for i := 0; i < len(runes); i++ {
r := runes[i]
if inTag {
if r == '>' {
inTag = false
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
} else {
b.WriteRune(r)
prevSpace = false
}
continue
}
switch r {
case '<':
if i+1 < len(runes) && isTagStartRune(runes[i+1]) {
inTag = true
continue
}
b.WriteRune(r)
prevSpace = false
case '>':
b.WriteRune(r)
prevSpace = false
case ' ', '\t', '\n', '\r':
if !prevSpace {
b.WriteByte(' ')
prevSpace = true
}
default:
b.WriteRune(r)
prevSpace = false
}
}
return core.Trim(b.String())
}
func isTagStartRune(r rune) bool {
switch {
case r >= 'a' && r <= 'z':
return true
case r >= 'A' && r <= 'Z':
return true
case r == '/', r == '!', r == '?':
return true
default:
return false
}
}
// Imprint renders a node tree to HTML, strips tags, tokenises the text,
// and returns a GrammarImprint — the full render-reverse pipeline.
// Usage example: imp := Imprint(Text("welcome"), NewContext())

View file

@ -46,6 +46,14 @@ func TestStripTags_NoTags_Good(t *testing.T) {
}
}
func TestStripTags_PreservesComparisonOperators_Good(t *testing.T) {
got := StripTags(`<p>1 < 2 and 3 > 2</p>`)
want := "1 < 2 and 3 > 2"
if got != want {
t.Errorf("StripTags(comparisons) = %q, want %q", got, want)
}
}
func TestStripTags_Entities_Good(t *testing.T) {
got := StripTags(`&lt;script&gt;`)
want := "&lt;script&gt;"