fix(security): require HTMLPurifier for XSS sanitisation

The previous getSanitisedContent() method fell back to strip_tags() when
HTMLPurifier was unavailable. This fallback was insecure as strip_tags()
does not sanitise attributes, allowing XSS via onclick, onerror, and
javascript: URLs.

Changes:
- Created Services/HtmlSanitiser.php using HTMLPurifier as the sole sanitiser
- Added ezyang/htmlpurifier as a required dependency in composer.json
- Added boot-time validation that throws RuntimeException if missing
- Removed insecure strip_tags() fallback from ContentItem model
- Added 30+ unit tests covering XSS attack vectors

Closes SEC-002 from TODO.md

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Snider 2026-01-29 12:34:35 +00:00
parent 5c92e92a29
commit fa4893d064
8 changed files with 1631 additions and 10 deletions

View file

@ -12,6 +12,8 @@ use Illuminate\Cache\RateLimiting\Limit;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\RateLimiter;
use Illuminate\Support\ServiceProvider;
use Core\Mod\Content\Services\HtmlSanitiser;
use RuntimeException;
/**
* Content Module Boot
@ -38,12 +40,31 @@ class Boot extends ServiceProvider
public function register(): void
{
$this->mergeConfigFrom(__DIR__.'/config.php', 'content');
// Register HtmlSanitiser as a singleton for performance
$this->app->singleton(HtmlSanitiser::class);
}
public function boot(): void
{
$this->loadMigrationsFrom(__DIR__.'/Migrations');
$this->configureRateLimiting();
$this->validateSecurityDependencies();
}
/**
* Validate that security-critical dependencies are available.
*
* @throws RuntimeException If HTMLPurifier is not installed
*/
protected function validateSecurityDependencies(): void
{
if (! HtmlSanitiser::isAvailable()) {
throw new RuntimeException(
'core-content requires HTMLPurifier for secure HTML sanitisation. '.
'Install it with: composer require ezyang/htmlpurifier'
);
}
}
/**

View file

@ -16,6 +16,7 @@ use Illuminate\Database\Eloquent\Relations\HasMany;
use Illuminate\Database\Eloquent\SoftDeletes;
use Core\Mod\Content\Enums\ContentType;
use Core\Mod\Content\Observers\ContentItemObserver;
use Core\Mod\Content\Services\HtmlSanitiser;
#[ObservedBy([ContentItemObserver::class])]
class ContentItem extends Model
@ -330,6 +331,10 @@ class ContentItem extends Model
*
* Uses HTMLPurifier to remove XSS vectors while preserving
* safe HTML elements like paragraphs, headings, lists, etc.
*
* SECURITY: This method uses HTMLPurifier which is a required dependency.
* Never fall back to strip_tags() as it does not sanitise attributes
* (e.g., onclick, onerror) which can still execute JavaScript.
*/
public function getSanitisedContent(): string
{
@ -339,15 +344,7 @@ class ContentItem extends Model
return '';
}
// Use the StaticPageSanitiser if available
if (class_exists(\Mod\Bio\Services\StaticPageSanitiser::class)) {
return app(\Mod\Bio\Services\StaticPageSanitiser::class)->sanitiseHtml($content);
}
// Fallback: basic sanitisation using strip_tags with allowed tags
$allowedTags = '<p><br><strong><b><em><i><u><h1><h2><h3><h4><h5><h6><ul><ol><li><a><blockquote><pre><code><img><table><thead><tbody><tr><th><td><div><span><hr>';
return strip_tags($content, $allowedTags);
return app(HtmlSanitiser::class)->sanitise($content);
}
/**

151
Services/HtmlSanitiser.php Normal file
View file

@ -0,0 +1,151 @@
<?php
declare(strict_types=1);
namespace Core\Mod\Content\Services;
use HTMLPurifier;
use HTMLPurifier_Config;
use RuntimeException;
/**
* HTML sanitiser for content rendering.
*
* Uses HTMLPurifier to remove XSS vectors while preserving safe HTML elements.
* This is a security-critical service - all user-generated HTML content must
* be sanitised before rendering.
*
* @see https://htmlpurifier.org/
*/
class HtmlSanitiser
{
private HTMLPurifier $purifier;
/**
* Create a new HTML sanitiser instance.
*
* @throws RuntimeException If HTMLPurifier is not installed
*/
public function __construct()
{
if (! class_exists(HTMLPurifier::class)) {
throw new RuntimeException(
'HTMLPurifier is required for HTML sanitisation. '.
'Install it with: composer require ezyang/htmlpurifier'
);
}
$config = HTMLPurifier_Config::createDefault();
// Allow a safe set of HTML5 elements for content rendering
$config->set('HTML.Allowed', implode(',', [
// Structure
'div[id|class]',
'span[id|class]',
'section[id|class]',
'article[id|class]',
// Text
'h1[id|class]',
'h2[id|class]',
'h3[id|class]',
'h4[id|class]',
'h5[id|class]',
'h6[id|class]',
'p[id|class]',
'br',
'hr[id|class]',
'strong',
'em',
'b',
'i',
'u',
'small',
'mark',
'del',
'ins',
'sub',
'sup',
'code',
'pre[id|class]',
'blockquote[id|class]',
// Lists
'ul[id|class]',
'ol[id|class]',
'li[id|class]',
// Links and media
'a[href|id|class|target|rel]',
'img[src|alt|width|height|id|class]',
'figure[id|class]',
'figcaption[id|class]',
// Tables
'table[id|class]',
'thead[id|class]',
'tbody[id|class]',
'tr[id|class]',
'th[id|class|colspan|rowspan]',
'td[id|class|colspan|rowspan]',
]));
// Safe link targets
$config->set('Attr.AllowedFrameTargets', ['_blank', '_self']);
// Add rel="noopener" to external links for security
$config->set('HTML.Nofollow', true);
$config->set('HTML.TargetNoopener', true);
// Disable cache in development, enable via config in production
$cacheDir = config('content.purifier_cache_dir');
if ($cacheDir && is_dir($cacheDir) && is_writable($cacheDir)) {
$config->set('Cache.SerializerPath', $cacheDir);
} else {
$config->set('Cache.DefinitionImpl', null);
}
// Safe URI schemes only
$config->set('URI.AllowedSchemes', [
'http' => true,
'https' => true,
'mailto' => true,
'tel' => true,
]);
// Do not allow data: URIs (can contain XSS)
$config->set('URI.DisableExternalResources', false);
$config->set('URI.DisableResources', false);
$this->purifier = new HTMLPurifier($config);
}
/**
* Sanitise HTML content to prevent XSS attacks.
*
* This method removes dangerous HTML, JavaScript, and CSS while preserving
* safe formatting elements. Always use this before rendering user content.
*
* @param string $html The raw HTML content to sanitise
* @return string The sanitised HTML, safe for rendering
*/
public function sanitise(string $html): string
{
if (empty($html)) {
return '';
}
return $this->purifier->purify($html);
}
/**
* Check if HTMLPurifier is available.
*
* Use this method to verify the dependency is installed before attempting
* to create a sanitiser instance.
*/
public static function isAvailable(): bool
{
return class_exists(HTMLPurifier::class);
}
}

318
TODO.md Normal file
View file

@ -0,0 +1,318 @@
# TODO - core-content
Production quality improvements for the Content Module.
**Legend:**
- P1: Critical/Security - Must fix immediately
- P2: High priority - Fix soon
- P3: Medium priority - Important improvements
- P4: Low priority - Nice to have
- P5: Nice-to-have - When time permits
- P6+: Future/Backlog - Long-term improvements
---
## P1 - Critical/Security
### SEC-001: Add CSRF protection to webhook endpoints
- **Status:** Open
- **Description:** The webhook endpoint at `POST /api/content/webhooks/{endpoint}` accepts external requests but only validates via HMAC signature. If signature verification is skipped (when no secret is configured), the endpoint is vulnerable.
- **File:** `Controllers/Api/ContentWebhookController.php:205-210`
- **Fix:** Require signature verification always OR add explicit opt-in flag to disable it, with warning logs.
- **Acceptance:** Webhooks without secrets must be explicitly enabled per-endpoint.
### SEC-002: Sanitise HTML content before rendering
- **Status:** Fixed
- **Description:** `ContentItem::getSanitisedContent()` falls back to `strip_tags()` if HTMLPurifier is unavailable. This fallback is insufficient for XSS protection.
- **File:** `Models/ContentItem.php:333-351`
- **Fix:** Always require HTMLPurifier or a robust sanitiser. Add package dependency check in boot.
- **Acceptance:** Content rendering always goes through proper XSS sanitisation.
- **Resolution:** Created `Services/HtmlSanitiser.php` using HTMLPurifier as a required dependency. Added HTMLPurifier to composer.json require. Added boot-time validation that throws RuntimeException if dependency missing. Removed insecure strip_tags() fallback. Added comprehensive XSS prevention tests in `tests/Unit/HtmlSanitiserTest.php`.
### SEC-003: Validate workspace access in MCP handlers
- **Status:** Open
- **Description:** MCP handlers check entitlements but workspace resolution via `orWhere('id', $slug)` could expose content across workspaces if numeric IDs are guessed.
- **File:** `Mcp/Handlers/ContentCreateHandler.php:212-220`, `ContentSearchHandler.php:129-137`
- **Fix:** Add explicit workspace ownership/membership check before returning data.
- **Acceptance:** Users can only access content from workspaces they own or are members of.
### SEC-004: Rate limit preview URL generation
- **Status:** Open
- **Description:** Preview token generation has no rate limiting. An attacker could enumerate valid content IDs by watching response times.
- **File:** `Controllers/ContentPreviewController.php:26-49`
- **Fix:** Add rate limiting to preview generation endpoint.
- **Acceptance:** Preview generation limited to 30/minute per user.
### SEC-005: Validate content_type enum in webhook payloads
- **Status:** Open
- **Description:** Webhook processing accepts arbitrary `content_type` strings from external sources without validation.
- **File:** `Jobs/ProcessContentWebhook.php:288-289`
- **Fix:** Validate against `ContentType` enum before assigning to model.
- **Acceptance:** Invalid content types rejected with clear error message.
---
## P2 - High Priority
### DX-001: Add missing type hints to scope methods
- **Status:** Open
- **Description:** Scope methods like `scopeForWorkspace`, `scopePublished` etc. use `$query` without `Builder` type hint.
- **Files:** `Models/ContentItem.php:147-198`, `Models/ContentBrief.php:181-215`
- **Fix:** Add `\Illuminate\Database\Eloquent\Builder` type hints.
- **Acceptance:** All scope methods have proper return types.
### DX-002: Document search service API response format
- **Status:** Open
- **Description:** `ContentSearchService::formatForApi()` returns a specific structure but it's not documented.
- **File:** `Services/ContentSearchService.php:467-493`
- **Fix:** Add PHPDoc with return type schema or create a Resource class.
- **Acceptance:** API response format documented with example JSON.
### TEST-001: Add integration tests for AI generation pipeline
- **Status:** Open
- **Description:** `AIGatewayService` has no tests. The two-stage Gemini+Claude pipeline is critical but untested.
- **File:** `Services/AIGatewayService.php`
- **Fix:** Add tests with mocked API responses for `generateDraft`, `refineDraft`, `generateAndRefine`.
- **Acceptance:** 80%+ coverage on AIGatewayService with edge case tests.
### TEST-002: Add tests for webhook signature verification
- **Status:** Open
- **Description:** `ContentWebhookEndpoint::verifySignature()` handles multiple formats but isn't fully tested.
- **File:** `Models/ContentWebhookEndpoint.php:204-237`
- **Fix:** Add unit tests for each signature format and grace period behaviour.
- **Acceptance:** Tests cover: sha256= prefix, grace period rotation, empty signature handling.
### PERF-001: Add database index for content search
- **Status:** Open
- **Description:** LIKE-based search on `content_html` has no fulltext index, causing table scans.
- **File:** `Services/ContentSearchService.php:142-162`, `Migrations/0001_01_01_000001_create_content_tables.php`
- **Fix:** Add MySQL fulltext index on title, excerpt, content_markdown columns OR document Meilisearch as required for production.
- **Acceptance:** Search queries under 100ms for 10k+ content items.
### PERF-002: Optimise revision pruning for large datasets
- **Status:** Open
- **Description:** `ContentRevision::pruneAll()` loads all content_item_ids into memory before iterating.
- **File:** `Models/ContentRevision.php:595-609`
- **Fix:** Use `chunk()` or cursor to process in batches.
- **Acceptance:** Pruning handles 100k+ content items without memory issues.
### BUG-001: Fix content_briefs migration schema mismatch
- **Status:** Open
- **Description:** Migration defines `content_briefs` with different columns than model fillable (e.g., `user_id` vs model relationships).
- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:215-238`, `Models/ContentBrief.php:49-75`
- **Fix:** Align migration with actual model usage or add a migration to fix schema.
- **Acceptance:** All ContentBrief columns are used and documented.
### BUG-002: Fix ai_usage migration column naming
- **Status:** Open
- **Description:** Migration creates `feature` column but model uses `purpose`. Creates confusion.
- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:246`, `Models/AIUsage.php:46`
- **Fix:** Add migration to rename column OR update model to use `feature`.
- **Acceptance:** Column name matches model fillable property.
---
## P3 - Medium Priority
### CODE-001: Extract webhook processing logic into service
- **Status:** Open
- **Description:** `ProcessContentWebhook` job contains 500+ lines of business logic that should be in a service.
- **File:** `Jobs/ProcessContentWebhook.php`
- **Fix:** Create `ContentWebhookProcessingService` with methods for each event type.
- **Acceptance:** Job is under 100 lines, delegates to service.
### CODE-002: Create ContentBriefResource for API responses
- **Status:** Open
- **Description:** Controllers manually format brief responses. A Resource class would ensure consistency.
- **File:** `Controllers/Api/ContentBriefController.php` references `ContentBriefResource` which may not exist.
- **Fix:** Create or verify `Resources/ContentBriefResource.php` exists with proper formatting.
- **Acceptance:** All brief API responses use the Resource class.
### CODE-003: Consolidate workspace resolution logic
- **Status:** Open
- **Description:** Three different `resolveWorkspace()` methods exist with similar but not identical logic.
- **Files:** `Controllers/Api/ContentSearchController.php`, `Mcp/Handlers/*`, `Services/ContentRender.php`
- **Fix:** Create trait or shared helper in core-tenant.
- **Acceptance:** Single source of truth for workspace resolution.
### TEST-003: Add tests for revision diff algorithm
- **Status:** Open
- **Description:** `ContentRevision::getDiff()` and LCS algorithm are complex but only lightly tested.
- **File:** `Models/ContentRevision.php:233-509`
- **Fix:** Add unit tests for edge cases: empty content, identical content, very long content.
- **Acceptance:** Diff algorithm has 90%+ coverage with edge cases documented.
### TEST-004: Add webhook retry service tests
- **Status:** Open
- **Description:** `WebhookRetryService` has retry logic with exponential backoff but no tests.
- **File:** `Services/WebhookRetryService.php`
- **Fix:** Add tests for retry scheduling, backoff intervals, exhaustion handling.
- **Acceptance:** Full coverage of retry state transitions.
### FEAT-001: Add content scheduling command
- **Status:** Open
- **Description:** `PublishScheduledContent` command is registered but implementation needs verification.
- **File:** `Console/Commands/PublishScheduledContent.php`
- **Fix:** Verify command works, add scheduler entry documentation.
- **Acceptance:** Scheduled content publishes automatically at the correct time.
### FEAT-002: Add media upload validation
- **Status:** Open
- **Description:** `ContentMediaController` store method should validate file types, sizes, dimensions.
- **File:** `Controllers/Api/ContentMediaController.php`
- **Fix:** Add comprehensive validation rules for media uploads.
- **Acceptance:** Reject files over size limit, invalid types, malformed images.
### FEAT-003: Add bulk operations for content items
- **Status:** Open
- **Description:** No bulk delete, bulk status change, or bulk category assignment endpoints.
- **Files:** API routes, new controller methods needed
- **Fix:** Add bulk endpoints with proper authorisation and rate limiting.
- **Acceptance:** Can bulk-update up to 50 items per request.
---
## P4 - Low Priority
### DX-003: Add IDE helper annotations to models
- **Status:** Open
- **Description:** Models lack `@property` annotations for dynamic attributes like `status_color`.
- **Files:** All models in `Models/`
- **Fix:** Add comprehensive `@property` PHPDoc blocks for all magic attributes.
- **Acceptance:** IDE autocomplete works for all model properties.
### DX-004: Document configuration options
- **Status:** Open
- **Description:** `config.php` has comments but no comprehensive documentation of all options and their effects.
- **File:** `config.php`
- **Fix:** Add CLAUDE.md section or dedicated config docs explaining each option.
- **Acceptance:** Every config option documented with type, default, and example.
### CODE-004: Remove deprecated WordPress-specific code paths
- **Status:** Open
- **Description:** Multiple methods have WordPress-specific handling that may be unused.
- **Files:** `Models/ContentItem.php` (wp_id, wp_guid), various scopes
- **Fix:** Audit usage, add deprecation notices if still needed, or remove.
- **Acceptance:** Clear documentation of what is deprecated vs maintained.
### CODE-005: Standardise error response format
- **Status:** Open
- **Description:** Error responses vary: `['error' => ...]`, `['message' => ...]`, different status codes.
- **Files:** All controllers in `Controllers/Api/`
- **Fix:** Use consistent error format: `{error: string, code: string, message: string}`.
- **Acceptance:** All error responses follow documented schema.
### PERF-003: Add eager loading hints to API responses
- **Status:** Open
- **Description:** Some API responses trigger N+1 queries for related data.
- **Files:** `Controllers/Api/ContentBriefController.php:31-77`
- **Fix:** Add `->with(['workspace', 'contentItem'])` where appropriate.
- **Acceptance:** No N+1 queries in API responses (verified with debugbar).
### TEST-005: Add factory states for all content statuses
- **Status:** Open
- **Description:** Factory states exist but may not cover all status/type combinations.
- **Files:** `Database/Factories/*.php` (if they exist, or in test setup)
- **Fix:** Ensure factories have states for: draft, publish, future, private, pending, trash.
- **Acceptance:** Tests can easily create content in any status.
---
## P5 - Nice to Have
### FEAT-004: Add content versioning comparison UI support
- **Status:** Open
- **Description:** `ContentRevision::getDiff()` returns data but no documented UI integration.
- **File:** `Models/ContentRevision.php`
- **Fix:** Document how to integrate diff data with frontend diff viewer.
- **Acceptance:** Example Livewire component or documentation for diff display.
### FEAT-005: Add webhook event deduplication
- **Status:** Open
- **Description:** Same webhook could be received multiple times (network retry). No dedup.
- **File:** `Jobs/ProcessContentWebhook.php`
- **Fix:** Add deduplication based on payload hash + timestamp window.
- **Acceptance:** Duplicate webhooks within 5 minutes are skipped.
### FEAT-006: Add content analytics tracking
- **Status:** Open
- **Description:** No tracking of content views, engagement, or performance metrics.
- **Files:** New feature needed
- **Fix:** Integrate with core-analytics or add simple view tracking.
- **Acceptance:** Can see view counts and basic metrics per content item.
### CODE-006: Add event dispatching for content lifecycle
- **Status:** Open
- **Description:** Content creation/update/publish doesn't dispatch domain events for other modules.
- **Files:** `Models/ContentItem.php`, `Observers/ContentItemObserver.php`
- **Fix:** Dispatch events like `ContentPublished`, `ContentUpdated` etc.
- **Acceptance:** Other modules can listen for content events.
### DOCS-001: Add API documentation
- **Status:** Open
- **Description:** API endpoints lack OpenAPI/Swagger documentation.
- **Files:** `routes/api.php`
- **Fix:** Add Scribe or OpenAPI annotations for all endpoints.
- **Acceptance:** OpenAPI spec can be generated and used in API clients.
---
## P6 - Future/Backlog
### FEAT-007: Add content workflow/approval system
- **Status:** Backlog
- **Description:** No formal review/approval workflow for content before publishing.
- **Fix:** Add ContentWorkflow model with states and transitions.
### FEAT-008: Add content localisation/translation support
- **Status:** Backlog
- **Description:** No i18n support for multilingual content.
- **Fix:** Add locale field and translation linking to ContentItem.
### FEAT-009: Add content A/B testing
- **Status:** Backlog
- **Description:** No ability to test content variations.
- **Fix:** Add ContentVariant model for headline/content testing.
### PERF-004: Add content caching layer
- **Status:** Backlog
- **Description:** CDN purge exists but no server-side caching strategy documented.
- **Fix:** Document caching strategy, add Redis caching for hot content.
### CODE-007: Extract prompts to database-driven system
- **Status:** Backlog
- **Description:** AI prompts are hardcoded in `AIGatewayService`. Prompts table exists but unused for this.
- **File:** `Services/AIGatewayService.php:226-525`
- **Fix:** Load prompts from database, allow admin editing.
---
## Completed
### SEC-002: HTML sanitisation fallback vulnerability (2026-01-29)
- Created `Services/HtmlSanitiser.php` using HTMLPurifier
- Added `ezyang/htmlpurifier` as required dependency in composer.json
- Updated `ContentItem::getSanitisedContent()` to use the new service
- Added boot-time validation to throw exception if HTMLPurifier is missing
- Removed insecure `strip_tags()` fallback that allowed XSS via event handlers
- Added 30+ unit tests covering XSS attack vectors and safe HTML preservation
---
## Notes
### Dependencies
- Requires `core-php` for events and base infrastructure
- Requires `core-tenant` for workspace and user models
- Requires `ezyang/htmlpurifier` for XSS sanitisation (security-critical)
- Optional: `core-agentic` for AI services (GeminiService, ClaudeService)
- Optional: `core-mcp` for MCP tool registration
### Testing
Run tests with: `composer test` from package root.
Run single test: `./vendor/bin/pest --filter=ContentSearchServiceTest`
### Last Audit
- **Date:** 2026-01-29
- **By:** Claude Code (core-content audit)
- **Files Reviewed:** ~70 PHP files

View file

@ -5,7 +5,8 @@
"license": "EUPL-1.2",
"require": {
"php": "^8.2",
"host-uk/core": "dev-main"
"host-uk/core": "dev-main",
"ezyang/htmlpurifier": "^4.17"
},
"require-dev": {
"laravel/pint": "^1.18",

422
docs/architecture.md Normal file
View file

@ -0,0 +1,422 @@
---
title: Architecture
description: Technical architecture of the core-content package
updated: 2026-01-29
---
# Architecture
The `core-content` package provides headless CMS functionality for the Host UK platform. It handles content management, AI-powered generation, revision history, webhooks for external CMS integration, and search capabilities.
## Package Overview
**Namespace:** `Core\Mod\Content\`
**Entry Point:** `Boot.php` (Laravel Service Provider)
**Dependencies:**
- `core-php` (Foundation framework, events)
- `core-tenant` (Workspaces, users, entitlements)
- Optional: `core-agentic` (AI services for content generation)
- Optional: `core-mcp` (MCP tool handlers)
## Directory Structure
```
core-content/
├── Boot.php # Service provider with event listeners
├── config.php # Package configuration
├── Models/ # Eloquent models (10 models)
├── Services/ # Business logic services
├── Controllers/ # API and web controllers
│ └── Api/ # REST API controllers
├── Jobs/ # Queue jobs
├── Mcp/ # MCP tool handlers
│ └── Handlers/ # Individual MCP tools
├── Concerns/ # Traits
├── Console/ # Artisan commands
│ └── Commands/ # Command implementations
├── Enums/ # PHP enums
├── Migrations/ # Database migrations
├── Observers/ # Model observers
├── routes/ # Route definitions
├── View/ # Livewire components and Blade views
│ ├── Modal/ # Livewire components
│ └── Blade/ # Blade templates
├── tests/ # Test suite
└── docs/ # Documentation
```
## Core Concepts
### Content Items
The primary content model. Supports multiple content types and sources:
```php
// Content types (where content originates)
enum ContentType: string {
case NATIVE = 'native'; // Created in Host Hub editor
case HOSTUK = 'hostuk'; // Alias for native (backwards compat)
case SATELLITE = 'satellite'; // Per-service content
case WORDPRESS = 'wordpress'; // Legacy synced content
}
```
Content items belong to workspaces and have:
- Title, slug, excerpt, content (HTML/Markdown/JSON)
- Status (draft, publish, future, private, pending)
- Author and last editor tracking
- Revision history
- Taxonomy (categories, tags)
- SEO metadata
- Preview tokens for sharing unpublished content
- CDN cache invalidation tracking
### Content Briefs
Briefs drive AI-powered content generation. They define what content to create:
```php
// Brief content types (what to generate)
enum BriefContentType: string {
case HELP_ARTICLE = 'help_article'; // Documentation
case BLOG_POST = 'blog_post'; // Blog articles
case LANDING_PAGE = 'landing_page'; // Marketing pages
case SOCIAL_POST = 'social_post'; // Social media
}
```
Brief workflow: `pending` -> `queued` -> `generating` -> `review` -> `published`
### Revisions
Every content change creates an immutable revision snapshot. Revisions support:
- Change type tracking (edit, autosave, restore, publish)
- Word/character count tracking
- Side-by-side diff comparison with LCS algorithm
- Configurable retention policies (max count, max age)
## Service Layer
### AIGatewayService
Orchestrates two-stage AI content generation:
1. **Stage 1: Draft (Gemini)** - Fast, cost-effective initial generation
2. **Stage 2: Refine (Claude)** - Quality refinement and brand voice alignment
```php
$gateway = app(AIGatewayService::class);
// Two-stage pipeline
$result = $gateway->generateAndRefine($brief);
// Or individual stages
$draft = $gateway->generateDraft($brief);
$refined = $gateway->refineDraft($brief, $draftContent);
// Direct Claude generation (skip Gemini)
$content = $gateway->generateDirect($brief);
```
### ContentSearchService
Full-text search with multiple backend support:
```php
// Backends (configured via CONTENT_SEARCH_BACKEND)
const BACKEND_DATABASE = 'database'; // LIKE queries with relevance
const BACKEND_SCOUT_DATABASE = 'scout_database'; // Laravel Scout
const BACKEND_MEILISEARCH = 'meilisearch'; // Laravel Scout + Meilisearch
```
Features:
- Relevance scoring (title > slug > excerpt > content)
- Filters: type, status, category, tag, date range, content_type
- Autocomplete suggestions
- Re-indexing support for Scout backends
### WebhookRetryService
Handles failed webhook processing with exponential backoff:
```
Retry intervals: 1m, 5m, 15m, 1h, 4h
Max retries: 5 (configurable per webhook)
```
### ContentRender
Public-facing content renderer with caching:
- Homepage, blog listing, post, page rendering
- Cache TTL: 1 hour production, 1 minute development
- Cache key sanitisation for special characters
### CdnPurgeService
CDN cache invalidation via Bunny CDN:
- Triggered by ContentItemObserver on publish/update
- URL-based and tag-based purging
- Workspace-level cache clearing
## Event-Driven Architecture
The package uses the event-driven module loading pattern from `core-php`:
```php
class Boot extends ServiceProvider
{
public static array $listens = [
WebRoutesRegistering::class => 'onWebRoutes',
ApiRoutesRegistering::class => 'onApiRoutes',
ConsoleBooting::class => 'onConsole',
McpToolsRegistering::class => 'onMcpTools',
];
}
```
Handlers register:
- **Web Routes:** Public blog, help pages, content preview
- **API Routes:** REST API for briefs, media, search, generation
- **Console:** Artisan commands for scheduling, pruning
- **MCP Tools:** AI agent content management tools
## API Structure
### Authenticated Endpoints (Session or API Key)
```
# Content Briefs
GET /api/content/briefs # List briefs
POST /api/content/briefs # Create brief
GET /api/content/briefs/{id} # Get brief
PUT /api/content/briefs/{id} # Update brief
DELETE /api/content/briefs/{id} # Delete brief
POST /api/content/briefs/bulk # Bulk create
GET /api/content/briefs/next # Next ready for processing
# AI Generation (rate limited: 10/min)
POST /api/content/generate/draft # Generate draft (Gemini)
POST /api/content/generate/refine # Refine draft (Claude)
POST /api/content/generate/full # Full pipeline
POST /api/content/generate/social # Social posts from content
# Content Search (rate limited: 60/min)
GET /api/content/search # Full-text search
GET /api/content/search/suggest # Autocomplete
GET /api/content/search/info # Backend info
POST /api/content/search/reindex # Trigger re-index
# Revisions
GET /api/content/items/{id}/revisions # List revisions
GET /api/content/revisions/{id} # Get revision
POST /api/content/revisions/{id}/restore # Restore revision
GET /api/content/revisions/{id}/compare/{other} # Compare
# Preview
POST /api/content/items/{id}/preview/generate # Generate preview link
DELETE /api/content/items/{id}/preview/revoke # Revoke preview link
```
### Public Endpoints
```
# Webhooks (signature verified, no auth)
POST /api/content/webhooks/{endpoint} # Receive external webhooks
# Web Routes
GET /blog # Blog listing
GET /blog/{slug} # Blog post
GET /help # Help centre
GET /help/{slug} # Help article
GET /content/preview/{id} # Preview content
```
## Rate Limiting
Defined in `Boot::configureRateLimiting()`:
| Limiter | Authenticated | Unauthenticated |
|---------|---------------|-----------------|
| `content-generate` | 10/min per user/workspace | 2/min per IP |
| `content-briefs` | 30/min per user | 5/min per IP |
| `content-webhooks` | 60/min per endpoint | 30/min per IP |
| `content-search` | Configurable (default 60/min) | 20/min per IP |
## MCP Tools
Seven MCP tools for AI agent integration:
| Tool | Description |
|------|-------------|
| `content_list` | List content items with filters |
| `content_read` | Read content by ID or slug |
| `content_search` | Full-text search |
| `content_create` | Create new content |
| `content_update` | Update existing content |
| `content_delete` | Soft delete content |
| `content_taxonomies` | List categories and tags |
All tools:
- Require workspace resolution
- Check entitlements (`content.mcp_access`, `content.items`)
- Log actions to MCP session
- Return structured responses
## Data Flow
### Content Creation via MCP
```
Agent Request
ContentCreateHandler::handle()
resolveWorkspace() → Workspace model
checkEntitlement() → EntitlementService
ContentItem::create()
createRevision() → ContentRevision
recordUsage() → EntitlementService
Response with content ID
```
### Webhook Processing
```
External CMS
POST /api/content/webhooks/{endpoint}
ContentWebhookController::receive()
Verify signature → ContentWebhookEndpoint::verifySignature()
Check type allowed → ContentWebhookEndpoint::isTypeAllowed()
Create ContentWebhookLog
Dispatch ProcessContentWebhook job
Job::handle()
Process based on event type (wordpress.*, cms.*, generic.*)
Create/Update/Delete ContentItem
Mark log completed
```
### AI Generation Pipeline
```
ContentBrief
GenerateContentJob dispatched
Stage 1: AIGatewayService::generateDraft()
GeminiService::generate() → Draft content
Brief::markDraftComplete()
Stage 2: AIGatewayService::refineDraft()
ClaudeService::generate() → Refined content
Brief::markRefined()
AIUsage records created for each stage
```
## Configuration
Key settings in `config.php`:
```php
return [
'generation' => [
'default_timeout' => env('CONTENT_GENERATION_TIMEOUT', 300),
'timeouts' => [
'help_article' => 180,
'blog_post' => 240,
'landing_page' => 300,
'social_post' => 60,
],
'max_retries' => 3,
'backoff' => [30, 60, 120],
],
'revisions' => [
'max_per_item' => env('CONTENT_MAX_REVISIONS', 50),
'max_age_days' => 180,
'preserve_published' => true,
],
'cache' => [
'ttl' => env('CONTENT_CACHE_TTL', 3600),
'prefix' => 'content:render',
],
'search' => [
'backend' => env('CONTENT_SEARCH_BACKEND', 'database'),
'min_query_length' => 2,
'max_per_page' => 50,
'default_per_page' => 20,
'rate_limit' => 60,
],
];
```
## Database Schema
### Primary Tables
| Table | Purpose |
|-------|---------|
| `content_items` | Content storage (posts, pages) |
| `content_revisions` | Version history |
| `content_taxonomies` | Categories and tags |
| `content_item_taxonomy` | Pivot table |
| `content_media` | Media attachments |
| `content_authors` | Author profiles |
| `content_briefs` | AI generation briefs |
| `content_tasks` | Scheduled content tasks |
| `content_webhook_endpoints` | Webhook configurations |
| `content_webhook_logs` | Webhook processing logs |
| `ai_usage` | AI API usage tracking |
| `prompts` | AI prompt templates |
| `prompt_versions` | Prompt version history |
### Key Indexes
- `content_items`: Composite indexes on `(workspace_id, slug, type)`, `(workspace_id, status, type)`, `(workspace_id, status, content_type)`
- `content_revisions`: Index on `(content_item_id, revision_number)`
- `content_webhook_logs`: Index on `(workspace_id, status)`, `(status, created_at)`
## Extension Points
### Adding New Content Types
1. Add value to `ContentType` enum
2. Update `ContentType::isNative()` if applicable
3. Add any type-specific scopes to `ContentItem`
### Adding New AI Generation Types
1. Add value to `BriefContentType` enum
2. Add timeout to `config.php` generation.timeouts
3. Add prompt in `AIGatewayService::getDraftSystemPrompt()`
### Adding New Webhook Event Types
1. Add to `ContentWebhookEndpoint::ALLOWED_TYPES`
2. Add handler in `ProcessContentWebhook::processWordPress()` or `processCms()`
3. Add event type mapping in `ContentWebhookController::normaliseEventType()`
### Adding New MCP Tools
1. Create handler in `Mcp/Handlers/` implementing `McpToolHandler`
2. Define `schema()` with tool name, description, input schema
3. Implement `handle()` with workspace resolution and entitlement checks
4. Register in `Boot::onMcpTools()`

389
docs/security.md Normal file
View file

@ -0,0 +1,389 @@
---
title: Security
description: Security considerations and audit notes for core-content
updated: 2026-01-29
---
# Security
This document covers security considerations, known risks, and recommended mitigations for the `core-content` package.
## Authentication and Authorisation
### API Authentication
The content API supports two authentication methods:
1. **Session Authentication** (`auth` middleware)
- For browser-based access
- CSRF protection via Laravel's standard middleware
2. **API Key Authentication** (`api.auth` middleware)
- For programmatic access
- Keys prefixed with `hk_`
- Scope enforcement via `api.scope.enforce` middleware
### Webhook Authentication
Webhooks use HMAC signature verification instead of session/API key auth:
```php
// Signature verification in ContentWebhookEndpoint
public function verifySignature(string $payload, ?string $signature): bool
{
$expectedSignature = hash_hmac('sha256', $payload, $this->secret);
return hash_equals($expectedSignature, $signature);
}
```
**Supported signature headers:**
- `X-Signature`
- `X-Hub-Signature-256` (GitHub format)
- `X-WP-Webhook-Signature` (WordPress format)
- `X-Content-Signature`
- `Signature`
### MCP Tool Authentication
MCP tools authenticate via the MCP session context. Workspace access is verified through:
- Workspace resolution (by slug or ID)
- Entitlement checks (`content.mcp_access`, `content.items`)
## Known Security Considerations
### HIGH: HTML Sanitisation Fallback
**Location:** `Models/ContentItem.php:333-351`
**Issue:** The `getSanitisedContent()` method falls back to `strip_tags()` if HTMLPurifier is unavailable. This is insufficient for XSS protection.
```php
// Current fallback (insufficient)
$allowedTags = '<p><br><strong>...<a>...';
return strip_tags($content, $allowedTags);
```
**Risk:** XSS attacks via crafted HTML in content body.
**Mitigation:**
1. Ensure HTMLPurifier is installed in production
2. Add package check in boot to fail loudly if missing
3. Consider using `voku/anti-xss` as a lighter alternative
### HIGH: Webhook Signature Optional
**Location:** `Models/ContentWebhookEndpoint.php:205-210`
**Issue:** When no secret is configured, signature verification is skipped:
```php
if (empty($this->secret)) {
return true; // Accepts all requests
}
```
**Risk:** Unauthenticated webhook injection if endpoint has no secret.
**Mitigation:**
1. Require secrets for all production endpoints
2. Add explicit `allow_unsigned` flag if intentional
3. Log warning when unsigned webhooks are accepted
4. Rate limit unsigned endpoints more aggressively
### MEDIUM: Workspace Access in MCP Handlers
**Location:** `Mcp/Handlers/*.php`
**Issue:** Workspace resolution allows lookup by ID:
```php
return Workspace::where('slug', $slug)
->orWhere('id', $slug)
->first();
```
**Risk:** If an attacker knows a workspace ID, they could potentially access content without being a workspace member.
**Mitigation:**
1. Always verify workspace membership after resolution
2. Use entitlement checks (already present but verify coverage)
3. Consider removing ID-based lookup for MCP
### MEDIUM: Preview Token Enumeration
**Location:** `Controllers/ContentPreviewController.php`
**Issue:** No rate limiting on preview token generation endpoint. An attacker could probe for valid content IDs.
**Mitigation:**
1. Add rate limiting (30/min per user)
2. Use constant-time responses regardless of content existence
3. Consider using UUIDs instead of sequential IDs for preview URLs
### LOW: Webhook Payload Content Types
**Location:** `Jobs/ProcessContentWebhook.php:288-289`
**Issue:** Content type from external webhook is assigned directly:
```php
$contentItem->content_type = ContentType::NATIVE;
```
**Risk:** External systems could potentially inject invalid content types.
**Mitigation:**
1. Validate against `ContentType` enum
2. Default to a safe type if validation fails
3. Log invalid types for monitoring
## Input Validation
### API Request Validation
All API controllers use Laravel's validation:
```php
$validated = $request->validate([
'q' => 'required|string|min:2|max:500',
'type' => 'nullable|string|in:post,page',
'status' => 'nullable',
// ...
]);
```
**Validated inputs:**
- Search queries (min/max length, string type)
- Content types (enum validation)
- Pagination (min/max values)
- Date ranges (date format, logical order)
### MCP Input Validation
MCP handlers validate via JSON schema:
```php
'inputSchema' => [
'type' => 'object',
'properties' => [
'workspace' => ['type' => 'string'],
'title' => ['type' => 'string'],
'type' => ['type' => 'string', 'enum' => ['post', 'page']],
],
'required' => ['workspace', 'title'],
]
```
### Webhook Payload Validation
Webhook payloads undergo:
- JSON decode validation
- Event type normalisation
- Content ID extraction with fallbacks
**Note:** Payload content is stored in JSON column without full validation. Processing logic handles missing/invalid fields gracefully.
## Rate Limiting
### Configured Limiters
| Endpoint | Auth | Unauthenticated | Key |
|----------|------|-----------------|-----|
| AI Generation | 10/min | 2/min | `content-generate` |
| Brief Creation | 30/min | 5/min | `content-briefs` |
| Webhooks | 60/min | 30/min | `content-webhooks` |
| Search | 60/min | 20/min | `content-search` |
### Rate Limit Bypass Risks
1. **IP Spoofing:** Ensure `X-Forwarded-For` handling is configured correctly
2. **Workspace Switching:** Workspace-based limits should use user ID as fallback
3. **API Key Sharing:** Each key should have independent limits
## Data Protection
### Sensitive Data Handling
**Encrypted at rest:**
- `ContentWebhookEndpoint.secret` (cast to `encrypted`)
- `ContentWebhookEndpoint.previous_secret` (cast to `encrypted`)
**Hidden from serialisation:**
- Webhook secrets (via `$hidden` property)
### PII Considerations
Content may contain PII in:
- Article body content
- Author information
- Webhook payloads
**Recommendations:**
1. Implement content retention policies
2. Add GDPR data export/deletion support
3. Log access to PII-containing content
## Webhook Security
### Circuit Breaker
Endpoints automatically disable after 10 consecutive failures:
```php
const MAX_FAILURES = 10;
public function incrementFailureCount(): void
{
$this->increment('failure_count');
if ($this->failure_count >= self::MAX_FAILURES) {
$this->update(['is_enabled' => false]);
}
}
```
### Secret Rotation
Grace period support for secret rotation:
```php
public function isInGracePeriod(): bool
{
// Accepts both current and previous secret during grace
}
```
Default grace period: 24 hours
### Allowed Event Types
Endpoints can restrict which event types they accept:
```php
const ALLOWED_TYPES = [
'wordpress.post_created',
'wordpress.post_updated',
// ...
'generic.payload',
];
```
Wildcard support: `wordpress.*` matches all WordPress events.
## Content Security
### XSS Prevention
1. **Input:** Content stored as-is to preserve formatting
2. **Output:** `getSanitisedContent()` for public rendering
3. **Admin:** Trusted content displayed with proper escaping
**Blade template guidelines:**
- Use `{{ $title }}` for plain text (auto-escaped)
- Use `{!! $content !!}` only for sanitised HTML
- Comments document which fields need which treatment
### SQL Injection
All database queries use:
- Eloquent ORM (parameterised queries)
- Query builder with bindings
- No raw SQL with user input
### CSRF Protection
Web routes include CSRF middleware automatically. API routes exempt (use API key auth).
## Audit Logging
### Logged Events
- Webhook receipt and processing
- AI generation requests and results
- Content creation/update/deletion via MCP
- CDN cache purges
- Authentication failures
### Log Levels
| Event | Level |
|-------|-------|
| Webhook signature failure | WARNING |
| Circuit breaker triggered | WARNING |
| Processing failure | ERROR |
| Successful operations | INFO |
| Skipped operations | DEBUG |
## Recommendations
### Immediate (P1)
1. [ ] Require HTMLPurifier or equivalent in production
2. [ ] Make webhook signature verification mandatory
3. [ ] Add rate limiting to preview generation
4. [ ] Validate content_type from webhook payloads
### Short-term (P2)
1. [ ] Add comprehensive audit logging
2. [ ] Implement content access logging
3. [ ] Add IP allowlisting option for webhooks
4. [ ] Create security-focused test suite
### Long-term (P3+)
1. [ ] Implement content encryption at rest option
2. [ ] Add GDPR compliance features
3. [ ] Create security monitoring dashboard
4. [ ] Add anomaly detection for webhook patterns
## Security Testing
### Manual Testing Checklist
```
[ ] Verify webhook signature rejection with invalid signature
[ ] Test rate limiting enforcement
[ ] Confirm XSS payloads are sanitised
[ ] Verify workspace isolation in API responses
[ ] Test preview token expiration
[ ] Verify CSRF protection on web routes
[ ] Test SQL injection attempts in search
[ ] Verify file type validation on media uploads
```
### Automated Testing
```bash
# Run security-focused tests
./vendor/bin/pest --filter=Security
# Check for common vulnerabilities
./vendor/bin/pint --test # Code style (includes some security patterns)
```
## Incident Response
### Webhook Compromise
1. Disable affected endpoint
2. Rotate all secrets
3. Review webhook logs for suspicious patterns
4. Regenerate secrets for all endpoints
### Content Injection
1. Identify affected content items
2. Restore from revision history
3. Review webhook source
4. Add additional validation
### API Key Leak
1. Revoke compromised key
2. Review access logs
3. Generate new key with reduced scope
4. Monitor for unauthorised access
## Contact
Security issues should be reported to the security team. Do not create public issues for security vulnerabilities.

View file

@ -0,0 +1,322 @@
<?php
declare(strict_types=1);
namespace Core\Mod\Content\Tests\Unit;
use Core\Mod\Content\Services\HtmlSanitiser;
use Tests\TestCase;
/**
* Security tests for HTML sanitisation.
*
* These tests verify that XSS attack vectors are properly neutralised
* while preserving safe HTML formatting.
*/
class HtmlSanitiserTest extends TestCase
{
protected HtmlSanitiser $sanitiser;
protected function setUp(): void
{
parent::setUp();
$this->sanitiser = new HtmlSanitiser;
}
// -------------------------------------------------------------------------
// XSS Attack Prevention Tests
// -------------------------------------------------------------------------
public function test_removes_script_tags(): void
{
$malicious = '<p>Hello</p><script>alert("XSS")</script><p>World</p>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<script>', $result);
$this->assertStringNotContainsString('alert', $result);
$this->assertStringContainsString('<p>Hello</p>', $result);
$this->assertStringContainsString('<p>World</p>', $result);
}
public function test_removes_onclick_attributes(): void
{
$malicious = '<a href="#" onclick="alert(\'XSS\')">Click me</a>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('onclick', $result);
$this->assertStringContainsString('Click me', $result);
}
public function test_removes_onerror_attributes(): void
{
$malicious = '<img src="x" onerror="alert(\'XSS\')">';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('onerror', $result);
$this->assertStringNotContainsString('alert', $result);
}
public function test_removes_onload_attributes(): void
{
$malicious = '<body onload="alert(\'XSS\')">';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('onload', $result);
$this->assertStringNotContainsString('alert', $result);
}
public function test_removes_javascript_protocol_in_href(): void
{
$malicious = '<a href="javascript:alert(\'XSS\')">Click me</a>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('javascript:', $result);
$this->assertStringContainsString('Click me', $result);
}
public function test_removes_javascript_protocol_in_src(): void
{
$malicious = '<img src="javascript:alert(\'XSS\')">';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('javascript:', $result);
}
public function test_removes_data_uri_xss(): void
{
$malicious = '<a href="data:text/html,<script>alert(\'XSS\')</script>">Click</a>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('data:text/html', $result);
$this->assertStringNotContainsString('<script>', $result);
}
public function test_removes_style_expression_xss(): void
{
$malicious = '<div style="background:url(javascript:alert(\'XSS\'))">Test</div>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('javascript:', $result);
$this->assertStringContainsString('Test', $result);
}
public function test_removes_svg_xss(): void
{
$malicious = '<svg onload="alert(\'XSS\')"><circle r="50"/></svg>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<svg', $result);
$this->assertStringNotContainsString('onload', $result);
}
public function test_removes_iframe_by_default(): void
{
$malicious = '<iframe src="https://evil.com"></iframe>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<iframe', $result);
}
public function test_removes_form_action_xss(): void
{
$malicious = '<form action="javascript:alert(\'XSS\')"><input type="submit"></form>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('javascript:', $result);
$this->assertStringNotContainsString('<form', $result);
}
public function test_removes_meta_refresh_xss(): void
{
$malicious = '<meta http-equiv="refresh" content="0;url=javascript:alert(\'XSS\')">';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<meta', $result);
$this->assertStringNotContainsString('javascript:', $result);
}
public function test_removes_object_tag(): void
{
$malicious = '<object data="data:text/html,<script>alert(\'XSS\')</script>"></object>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<object', $result);
$this->assertStringNotContainsString('<script>', $result);
}
public function test_removes_embed_tag(): void
{
$malicious = '<embed src="javascript:alert(\'XSS\')">';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<embed', $result);
$this->assertStringNotContainsString('javascript:', $result);
}
public function test_removes_base_tag(): void
{
$malicious = '<base href="javascript:alert(\'XSS\')//"/>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('<base', $result);
}
// -------------------------------------------------------------------------
// Safe HTML Preservation Tests
// -------------------------------------------------------------------------
public function test_preserves_paragraphs(): void
{
$html = '<p>Hello World</p>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<p>Hello World</p>', $result);
}
public function test_preserves_headings(): void
{
$html = '<h1>Title</h1><h2>Subtitle</h2><h3>Section</h3>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<h1>Title</h1>', $result);
$this->assertStringContainsString('<h2>Subtitle</h2>', $result);
$this->assertStringContainsString('<h3>Section</h3>', $result);
}
public function test_preserves_formatting(): void
{
$html = '<p><strong>Bold</strong> and <em>italic</em> and <u>underline</u></p>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<strong>Bold</strong>', $result);
$this->assertStringContainsString('<em>italic</em>', $result);
$this->assertStringContainsString('<u>underline</u>', $result);
}
public function test_preserves_lists(): void
{
$html = '<ul><li>Item 1</li><li>Item 2</li></ul>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<ul>', $result);
$this->assertStringContainsString('<li>Item 1</li>', $result);
$this->assertStringContainsString('<li>Item 2</li>', $result);
}
public function test_preserves_safe_links(): void
{
$html = '<a href="https://example.com">Link</a>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('href="https://example.com"', $result);
$this->assertStringContainsString('Link</a>', $result);
}
public function test_preserves_mailto_links(): void
{
$html = '<a href="mailto:test@example.com">Email</a>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('mailto:test@example.com', $result);
}
public function test_preserves_tel_links(): void
{
$html = '<a href="tel:+1234567890">Call</a>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('tel:+1234567890', $result);
}
public function test_preserves_safe_images(): void
{
$html = '<img src="https://example.com/image.jpg" alt="Test image">';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('src="https://example.com/image.jpg"', $result);
$this->assertStringContainsString('alt="Test image"', $result);
}
public function test_preserves_tables(): void
{
$html = '<table><tr><th>Header</th></tr><tr><td>Data</td></tr></table>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<table>', $result);
$this->assertStringContainsString('<th>Header</th>', $result);
$this->assertStringContainsString('<td>Data</td>', $result);
}
public function test_preserves_code_blocks(): void
{
$html = '<pre><code>function test() {}</code></pre>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<pre>', $result);
$this->assertStringContainsString('<code>', $result);
$this->assertStringContainsString('function test() {}', $result);
}
public function test_preserves_blockquotes(): void
{
$html = '<blockquote>A famous quote</blockquote>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('<blockquote>A famous quote</blockquote>', $result);
}
public function test_preserves_id_and_class_attributes(): void
{
$html = '<div id="main" class="container"><p class="intro">Content</p></div>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('id="main"', $result);
$this->assertStringContainsString('class="container"', $result);
$this->assertStringContainsString('class="intro"', $result);
}
// -------------------------------------------------------------------------
// Edge Cases
// -------------------------------------------------------------------------
public function test_handles_empty_string(): void
{
$result = $this->sanitiser->sanitise('');
$this->assertSame('', $result);
}
public function test_handles_plain_text(): void
{
$text = 'Just plain text without any HTML';
$result = $this->sanitiser->sanitise($text);
$this->assertSame($text, $result);
}
public function test_handles_unicode_content(): void
{
$html = '<p>Caf?? au lait and ????????</p>';
$result = $this->sanitiser->sanitise($html);
$this->assertStringContainsString('Caf??', $result);
$this->assertStringContainsString('????????', $result);
}
public function test_handles_nested_xss_attempts(): void
{
$malicious = '<div><p onclick="alert(1)"><a href="javascript:void(0)" onmouseover="alert(2)">Text</a></p></div>';
$result = $this->sanitiser->sanitise($malicious);
$this->assertStringNotContainsString('onclick', $result);
$this->assertStringNotContainsString('onmouseover', $result);
$this->assertStringNotContainsString('javascript:', $result);
$this->assertStringContainsString('Text', $result);
}
public function test_is_available_returns_true(): void
{
// HTMLPurifier should be installed as a required dependency
$this->assertTrue(HtmlSanitiser::isAvailable());
}
}