highlight('The quick brown fox', 'quick'); * // Returns: 'The quick brown fox' * * // Multi-word query * $highlighted = $highlighter->highlight('The quick brown fox', 'quick fox'); * // Returns: 'The quick brown fox' * * // Extract snippet with context * $snippet = $highlighter->snippet($longText, 'search term', 50); * // Returns: '...text around search term with context...' * ``` */ class SearchHighlighter { /** * Default CSS class for highlight wrapper. */ protected const DEFAULT_CLASS = 'search-highlight'; /** * Default number of context characters for snippets. */ protected const DEFAULT_CONTEXT_LENGTH = 50; /** * Minimum word length to highlight. */ protected const MIN_WORD_LENGTH = 2; /** * The wrapper tag to use for highlighting. */ protected string $tag = 'mark'; /** * CSS class(es) to apply to the highlight wrapper. */ protected string $class = self::DEFAULT_CLASS; /** * Whether to escape HTML in the input text. */ protected bool $escapeHtml = true; /** * Minimum word length to highlight. */ protected int $minWordLength = self::MIN_WORD_LENGTH; /** * Create a new highlighter instance. */ public function __construct( ?string $tag = null, ?string $class = null, bool $escapeHtml = true ) { if ($tag !== null) { $this->tag = $tag; } if ($class !== null) { $this->class = $class; } $this->escapeHtml = $escapeHtml; } /** * Set the wrapper tag for highlighting. * * @param string $tag HTML tag name (e.g., 'mark', 'span', 'strong') */ public function tag(string $tag): static { $this->tag = $tag; return $this; } /** * Set the CSS class for the highlight wrapper. * * @param string $class CSS class name(s) */ public function class(string $class): static { $this->class = $class; return $this; } /** * Set whether to escape HTML in the input text. * * @param bool $escape Whether to escape HTML entities */ public function escapeHtml(bool $escape): static { $this->escapeHtml = $escape; return $this; } /** * Set the minimum word length to highlight. * * @param int $length Minimum characters for a word to be highlighted */ public function minWordLength(int $length): static { $this->minWordLength = max(1, $length); return $this; } /** * Highlight search terms in text. * * @param string $text The text to highlight within * @param string $query The search query (space-separated terms) * @return string The text with highlighted terms */ public function highlight(string $text, string $query): string { if (empty($text) || empty($query)) { return $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text; } // Extract words from the query $words = $this->extractWords($query); if (empty($words)) { return $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text; } // Build a regex pattern that matches any of the words $pattern = $this->buildPattern($words); // If escaping HTML, we need to be careful about the order if ($this->escapeHtml) { $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); } // Replace matches with highlighted version return preg_replace_callback( $pattern, fn (array $matches) => $this->wrapMatch($matches[0]), $text ) ?? $text; } /** * Highlight and extract a snippet around the first match. * * @param string $text The text to search within * @param string $query The search query * @param int $contextLength Number of characters before/after match * @return string The snippet with highlighted terms */ public function snippet(string $text, string $query, int $contextLength = self::DEFAULT_CONTEXT_LENGTH): string { if (empty($text) || empty($query)) { return $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text; } $words = $this->extractWords($query); if (empty($words)) { return $this->truncate($text, $contextLength * 2); } // Find the first match position $firstMatchPos = $this->findFirstMatch($text, $words); if ($firstMatchPos === null) { return $this->truncate($text, $contextLength * 2); } // Extract snippet around the match $snippet = $this->extractContext($text, $firstMatchPos, $contextLength); // Highlight terms in the snippet return $this->highlight($snippet, $query); } /** * Highlight all matches and return structured data. * * Useful for programmatic access to match positions and counts. * * @param string $text The text to search within * @param string $query The search query * @return array{text: string, matches: array, count: int} */ public function highlightWithMeta(string $text, string $query): array { $matches = []; $count = 0; if (empty($text) || empty($query)) { return [ 'text' => $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text, 'matches' => $matches, 'count' => $count, ]; } $words = $this->extractWords($query); if (empty($words)) { return [ 'text' => $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text, 'matches' => $matches, 'count' => $count, ]; } $pattern = $this->buildPattern($words); $lowerText = mb_strtolower($text); // Find all match positions foreach ($words as $word) { $pos = 0; while (($pos = mb_strpos($lowerText, $word, $pos)) !== false) { $matches[] = [ 'word' => $word, 'position' => $pos, 'length' => mb_strlen($word), ]; $count++; $pos += mb_strlen($word); } } // Sort matches by position usort($matches, fn (array $a, array $b) => $a['position'] <=> $b['position']); return [ 'text' => $this->highlight($text, $query), 'matches' => $matches, 'count' => $count, ]; } /** * Extract searchable words from a query string. * * @param string $query The search query * @return array Array of words to highlight */ protected function extractWords(string $query): array { $query = trim($query); if (empty($query)) { return []; } // Split on whitespace and filter short words $words = preg_split('/\s+/', mb_strtolower($query)); return array_values(array_filter( $words, fn (string $word) => mb_strlen($word) >= $this->minWordLength )); } /** * Build a regex pattern for matching words. * * @param array $words Words to match * @return string Regex pattern */ protected function buildPattern(array $words): string { // Escape regex special characters and join with alternation $escaped = array_map(fn (string $word) => preg_quote($word, '/'), $words); // Use word boundary where possible, case-insensitive return '/('.implode('|', $escaped).')/iu'; } /** * Wrap a match in the highlight tag. * * @param string $match The matched text * @return string The wrapped text */ protected function wrapMatch(string $match): string { $classAttr = $this->class ? ' class="'.htmlspecialchars($this->class, ENT_QUOTES, 'UTF-8').'"' : ''; return "<{$this->tag}{$classAttr}>{$match}tag}>"; } /** * Find the position of the first match in text. * * @param string $text Text to search * @param array $words Words to find * @return int|null Position of first match or null */ protected function findFirstMatch(string $text, array $words): ?int { $lowerText = mb_strtolower($text); $firstPos = null; foreach ($words as $word) { $pos = mb_strpos($lowerText, $word); if ($pos !== false && ($firstPos === null || $pos < $firstPos)) { $firstPos = $pos; } } return $firstPos; } /** * Extract context around a position in text. * * @param string $text The full text * @param int $position Center position * @param int $contextLength Characters before/after * @return string Extracted context with ellipsis if truncated */ protected function extractContext(string $text, int $position, int $contextLength): string { $textLength = mb_strlen($text); // Calculate start and end positions $start = max(0, $position - $contextLength); $end = min($textLength, $position + $contextLength); // Try to start at word boundary if ($start > 0) { $wordStart = mb_strrpos(mb_substr($text, 0, $start + 10), ' '); if ($wordStart !== false && $wordStart >= $start - 10) { $start = $wordStart + 1; } } // Try to end at word boundary if ($end < $textLength) { $wordEnd = mb_strpos($text, ' ', $end); if ($wordEnd !== false && $wordEnd <= $end + 10) { $end = $wordEnd; } } // Extract the context $context = mb_substr($text, $start, $end - $start); // Add ellipsis where truncated $prefix = $start > 0 ? '...' : ''; $suffix = $end < $textLength ? '...' : ''; return $prefix.trim($context).$suffix; } /** * Truncate text to a maximum length. * * @param string $text Text to truncate * @param int $length Maximum length * @return string Truncated text */ protected function truncate(string $text, int $length): string { if (mb_strlen($text) <= $length) { return $this->escapeHtml ? htmlspecialchars($text, ENT_QUOTES, 'UTF-8') : $text; } $truncated = mb_substr($text, 0, $length); // Try to end at word boundary $lastSpace = mb_strrpos($truncated, ' '); if ($lastSpace !== false && $lastSpace > $length * 0.8) { $truncated = mb_substr($truncated, 0, $lastSpace); } $result = trim($truncated).'...'; return $this->escapeHtml ? htmlspecialchars($result, ENT_QUOTES, 'UTF-8') : $result; } /** * Create a new highlighter with default configuration. */ public static function make(): static { return new static; } }