$html, 'content_html_clean' => $this->cleanHtml($html), 'content_json' => $this->parseToJson($html), ]; } /** * Clean HTML by removing WordPress-specific cruft. * * - Remove inline styles * - Remove WordPress classes * - Remove empty elements * - Remove block comments * - Preserve semantic structure */ public function cleanHtml(string $html): string { if (empty($html)) { return ''; } // Remove WordPress block comments $html = preg_replace('//s', '', $html); // Remove empty comments $html = preg_replace('//s', '', $html); // Load into DOM $doc = $this->loadHtml($html); if (! $doc) { Log::warning('ContentProcessingService: Failed to parse HTML, falling back to strip_tags', [ 'html_length' => strlen($html), 'html_preview' => substr($html, 0, 200), ]); return strip_tags($html, '