lthn.io/app/Core/Input/Sanitiser.php
Claude 41a90cbff8
feat: lthn.io API serving live chain data
Fixed: basePath self→static binding, namespace detection, event wiring,
SQLite cache, file cache driver. All Mod Boot classes converted to
$listens pattern for lifecycle event discovery.

Working endpoints:
- /v1/explorer/info — live chain height, difficulty, aliases
- /v1/explorer/stats — formatted chain statistics
- /v1/names/directory — alias directory grouped by type
- /v1/names/available/{name} — name availability check
- /v1/names/lookup/{name} — name details

Co-Authored-By: Charon <charon@lethean.io>
2026-04-03 17:17:42 +01:00

908 lines
28 KiB
PHP

<?php
/*
* Core PHP Framework
*
* Licensed under the European Union Public Licence (EUPL) v1.2.
* See LICENSE file for details.
*/
declare(strict_types=1);
namespace Core\Input;
use Normalizer;
use Psr\Log\LoggerInterface;
/**
* Input sanitiser - makes data safe, not valid.
*
* One job: strip dangerous control characters.
* One C call: filter_var_array.
*
* Laravel validates. We sanitise.
*
* Features:
* - Configurable filter rules per field via schema
* - Unicode NFC normalization for consistent string handling
* - Optional audit logging when content is modified
* - Rich text support with safe HTML tags whitelist
* - Configurable maximum input length enforcement
* - Transformation hooks for custom processing at different stages
*
* ## Transformation Hooks
*
* Register callbacks to transform values at specific stages of the
* sanitization pipeline:
*
* ```php
* $sanitiser = (new Sanitiser())
* ->beforeFilter(function (string $value, string $field): string {
* // Transform before any filtering
* return trim($value);
* })
* ->afterFilter(function (string $value, string $field): string {
* // Transform after all filtering is complete
* return $value;
* })
* ->transformField('username', function (string $value): string {
* // Field-specific transformation
* return strtolower($value);
* });
* ```
*
* Hook execution order:
* 1. Before hooks (global, then field-specific)
* 2. Standard filtering pipeline (normalize, strip, HTML, preset, filters, length)
* 3. After hooks (global, then field-specific)
*/
class Sanitiser
{
/**
* Default safe HTML tags for rich text fields.
* These tags are considered safe for user-generated content.
*/
public const SAFE_HTML_TAGS = '<p><br><strong><em><a><ul><ol><li><h1><h2><h3><h4><h5><h6><blockquote><code><pre>';
/**
* Minimal HTML tags for basic formatting.
*/
public const BASIC_HTML_TAGS = '<p><br><strong><em>';
/**
* Default maximum input length (0 = unlimited).
*/
public const DEFAULT_MAX_LENGTH = 0;
/**
* Common filter rule presets for quick configuration.
* Use with withPreset() or field-level 'preset' schema option.
*/
public const PRESET_EMAIL = 'email';
public const PRESET_URL = 'url';
public const PRESET_PHONE = 'phone';
public const PRESET_ALPHA = 'alpha';
public const PRESET_ALPHANUMERIC = 'alphanumeric';
public const PRESET_NUMERIC = 'numeric';
public const PRESET_SLUG = 'slug';
/**
* Schema for per-field filter rules.
*
* Format: ['field_name' => ['filters' => [...], 'options' => [...]]]
*
* @var array<string, array{filters?: int[], options?: int[], skip_control_strip?: bool, skip_normalize?: bool, allow_html?: string|bool, max_length?: int}>
*/
protected array $schema = [];
/**
* Optional logger for audit logging.
*/
protected ?LoggerInterface $logger = null;
/**
* Whether to enable audit logging.
*/
protected bool $auditEnabled = false;
/**
* Whether to normalize Unicode to NFC form.
*/
protected bool $normalizeUnicode = true;
/**
* Global maximum input length (0 = unlimited).
*/
protected int $maxLength = 0;
/**
* Global allowed HTML tags (empty string = strip all HTML).
*/
protected string $allowedHtmlTags = '';
/**
* Global before-filter transformation hooks.
*
* @var array<callable(string, string): string>
*/
protected array $beforeHooks = [];
/**
* Global after-filter transformation hooks.
*
* @var array<callable(string, string): string>
*/
protected array $afterHooks = [];
/**
* Field-specific transformation hooks (keyed by field name).
*
* @var array<string, array{before?: array<callable(string): string>, after?: array<callable(string): string>}>
*/
protected array $fieldHooks = [];
/**
* Preset definitions for common input types.
*
* @var array<string, array{pattern?: string, filter?: int, transform?: callable(string): string}>
*/
protected static array $presets = [
self::PRESET_EMAIL => [
'filter' => FILTER_SANITIZE_EMAIL,
'transform' => 'strtolower',
],
self::PRESET_URL => [
'filter' => FILTER_SANITIZE_URL,
],
self::PRESET_PHONE => [
// Keep only digits, plus, hyphens, parentheses, and spaces
'pattern' => '/[^\d\+\-\(\)\s]/',
],
self::PRESET_ALPHA => [
// Keep only letters (including Unicode)
'pattern' => '/[^\p{L}]/u',
],
self::PRESET_ALPHANUMERIC => [
// Keep only letters and numbers (including Unicode)
'pattern' => '/[^\p{L}\p{N}]/u',
],
self::PRESET_NUMERIC => [
// Keep only digits, decimal point, and minus sign
'pattern' => '/[^\d\.\-]/',
],
self::PRESET_SLUG => [
// Convert to URL-safe slug format
'pattern' => '/[^a-z0-9\-]/',
'transform' => 'strtolower',
],
];
/**
* Create a new Sanitiser instance.
*
* @param array<string, array{filters?: int[], options?: int[], skip_control_strip?: bool, skip_normalize?: bool, allow_html?: string|bool, max_length?: int}> $schema Per-field filter rules
* @param LoggerInterface|null $logger Optional PSR-3 logger for audit logging
* @param bool $auditEnabled Whether to enable audit logging (requires logger)
* @param bool $normalizeUnicode Whether to normalize Unicode to NFC form
* @param int $maxLength Global maximum input length (0 = unlimited)
* @param string $allowedHtmlTags Global allowed HTML tags (empty = strip all)
*/
public function __construct(
array $schema = [],
?LoggerInterface $logger = null,
bool $auditEnabled = false,
bool $normalizeUnicode = true,
int $maxLength = 0,
string $allowedHtmlTags = ''
) {
$this->schema = $schema;
$this->logger = $logger;
$this->auditEnabled = $auditEnabled && $logger !== null;
$this->normalizeUnicode = $normalizeUnicode;
$this->maxLength = $maxLength;
$this->allowedHtmlTags = $allowedHtmlTags;
}
/**
* Set the per-field filter schema.
*
* Schema format:
* [
* 'field_name' => [
* 'filters' => [FILTER_SANITIZE_EMAIL, ...], // Additional filters to apply
* 'options' => [FILTER_FLAG_STRIP_HIGH, ...], // Additional flags
* 'skip_control_strip' => false, // Skip control character stripping
* 'skip_normalize' => false, // Skip Unicode normalization
* 'allow_html' => '<p><br><strong>', // Allowed HTML tags for this field
* 'max_length' => 1000, // Max length for this field (overrides global)
* ],
* ]
*
* @param array<string, array{filters?: int[], options?: int[], skip_control_strip?: bool, skip_normalize?: bool, allow_html?: string|bool, max_length?: int}> $schema
*/
public function withSchema(array $schema): static
{
$clone = clone $this;
$clone->schema = $schema;
return $clone;
}
/**
* Set the logger for audit logging.
*
* @param bool $enabled Whether to enable audit logging
*/
public function withLogger(LoggerInterface $logger, bool $enabled = true): static
{
$clone = clone $this;
$clone->logger = $logger;
$clone->auditEnabled = $enabled;
return $clone;
}
/**
* Enable or disable Unicode NFC normalization.
*/
public function withNormalization(bool $enabled): static
{
$clone = clone $this;
$clone->normalizeUnicode = $enabled;
return $clone;
}
/**
* Set allowed HTML tags globally for all fields.
*
* Pass a string of allowed tags (e.g., '<p><br><strong>') or
* use one of the predefined constants (SAFE_HTML_TAGS, BASIC_HTML_TAGS).
*
* @param string $allowedTags Allowed HTML tags in strip_tags format
*/
public function allowHtml(string $allowedTags = self::SAFE_HTML_TAGS): static
{
$clone = clone $this;
$clone->allowedHtmlTags = $allowedTags;
return $clone;
}
/**
* Enable rich text mode with safe HTML tags.
*
* Allows common formatting tags: p, br, strong, em, a, ul, ol, li,
* headings, blockquote, code, and pre.
*/
public function richText(): static
{
return $this->allowHtml(self::SAFE_HTML_TAGS);
}
/**
* Enable basic HTML mode with minimal formatting tags.
*
* Allows only: p, br, strong, em.
*/
public function basicHtml(): static
{
return $this->allowHtml(self::BASIC_HTML_TAGS);
}
/**
* Set global maximum input length.
*
* Inputs exceeding this length will be truncated.
* Set to 0 for unlimited length.
*
* @param int $maxLength Maximum length in characters (0 = unlimited)
*/
public function maxLength(int $maxLength): static
{
$clone = clone $this;
$clone->maxLength = max(0, $maxLength);
return $clone;
}
/**
* Apply an email preset to sanitise email addresses.
*
* Sanitises using FILTER_SANITIZE_EMAIL and lowercases the result.
* Use for fields that should contain valid email addresses.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function email(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_EMAIL, $fields);
}
/**
* Apply a URL preset to sanitise URLs.
*
* Sanitises using FILTER_SANITIZE_URL.
* Use for fields that should contain valid URLs.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function url(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_URL, $fields);
}
/**
* Apply a phone preset to sanitise phone numbers.
*
* Keeps only digits, plus signs, hyphens, parentheses, and spaces.
* Use for fields that should contain phone numbers.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function phone(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_PHONE, $fields);
}
/**
* Apply an alpha preset to allow only letters.
*
* Keeps only alphabetic characters (including Unicode letters).
* Use for fields like names that should only contain letters.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function alpha(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_ALPHA, $fields);
}
/**
* Apply an alphanumeric preset to allow only letters and numbers.
*
* Keeps only alphanumeric characters (including Unicode).
* Use for usernames, codes, or similar fields.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function alphanumeric(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_ALPHANUMERIC, $fields);
}
/**
* Apply a numeric preset to allow only numbers.
*
* Keeps only digits, decimal points, and minus signs.
* Use for numeric input fields.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function numeric(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_NUMERIC, $fields);
}
/**
* Apply a slug preset to create URL-safe slugs.
*
* Lowercases and keeps only lowercase letters, numbers, and hyphens.
* Use for URL slugs, identifiers, or similar fields.
*
* @param string ...$fields Field names to apply the preset to (empty = all fields)
*/
public function slug(string ...$fields): static
{
return $this->applyPresetToFields(self::PRESET_SLUG, $fields);
}
/**
* Register a custom preset.
*
* @param string $name Preset name
* @param array{pattern?: string, filter?: int, transform?: callable(string): string} $definition
*/
public static function registerPreset(string $name, array $definition): void
{
self::$presets[$name] = $definition;
}
/**
* Get all registered presets.
*
* @return array<string, array{pattern?: string, filter?: int, transform?: callable(string): string}>
*/
public static function getPresets(): array
{
return self::$presets;
}
/**
* Apply a preset to specified fields or globally.
*
* @param array<string> $fields
*/
protected function applyPresetToFields(string $presetName, array $fields): static
{
$clone = clone $this;
if (empty($fields)) {
// Apply globally by setting a default preset
$clone->schema['*'] = array_merge(
$clone->schema['*'] ?? [],
['preset' => $presetName]
);
} else {
// Apply to specific fields
foreach ($fields as $field) {
$clone->schema[$field] = array_merge(
$clone->schema[$field] ?? [],
['preset' => $presetName]
);
}
}
return $clone;
}
// =========================================================================
// TRANSFORMATION HOOKS
// =========================================================================
/**
* Register a global before-filter transformation hook.
*
* The callback receives the value and field name, and should return
* the transformed value. Multiple hooks are executed in order.
*
* @param callable(string, string): string $callback
*/
public function beforeFilter(callable $callback): static
{
$clone = clone $this;
$clone->beforeHooks[] = $callback;
return $clone;
}
/**
* Register a global after-filter transformation hook.
*
* The callback receives the value and field name, and should return
* the transformed value. Multiple hooks are executed in order.
*
* @param callable(string, string): string $callback
*/
public function afterFilter(callable $callback): static
{
$clone = clone $this;
$clone->afterHooks[] = $callback;
return $clone;
}
/**
* Register a field-specific transformation hook.
*
* The callback receives only the value (field name is known).
* Use `$stage` to control when the hook runs:
* - 'before': Run before standard filtering
* - 'after': Run after standard filtering (default)
*
* @param string $field Field name to transform
* @param callable(string): string $callback
* @param string $stage When to run: 'before' or 'after'
*/
public function transformField(string $field, callable $callback, string $stage = 'after'): static
{
$clone = clone $this;
if (! isset($clone->fieldHooks[$field])) {
$clone->fieldHooks[$field] = ['before' => [], 'after' => []];
}
$stage = in_array($stage, ['before', 'after'], true) ? $stage : 'after';
$clone->fieldHooks[$field][$stage][] = $callback;
return $clone;
}
/**
* Register a before-filter hook for specific fields.
*
* @param callable(string): string $callback
* @param string ...$fields Field names to apply the hook to
*/
public function beforeFilterFields(callable $callback, string ...$fields): static
{
$clone = $this;
foreach ($fields as $field) {
$clone = $clone->transformField($field, $callback, 'before');
}
return $clone;
}
/**
* Register an after-filter hook for specific fields.
*
* @param callable(string): string $callback
* @param string ...$fields Field names to apply the hook to
*/
public function afterFilterFields(callable $callback, string ...$fields): static
{
$clone = $this;
foreach ($fields as $field) {
$clone = $clone->transformField($field, $callback, 'after');
}
return $clone;
}
/**
* Apply all before hooks to a value.
*
* @param string $value The value to transform
* @param string $fieldName The field name
*/
protected function applyBeforeHooks(string $value, string $fieldName): string
{
// Apply global before hooks
foreach ($this->beforeHooks as $hook) {
$value = $hook($value, $fieldName);
}
// Apply field-specific before hooks
if (isset($this->fieldHooks[$fieldName]['before'])) {
foreach ($this->fieldHooks[$fieldName]['before'] as $hook) {
$value = $hook($value);
}
}
return $value;
}
/**
* Apply all after hooks to a value.
*
* @param string $value The value to transform
* @param string $fieldName The field name
*/
protected function applyAfterHooks(string $value, string $fieldName): string
{
// Apply global after hooks
foreach ($this->afterHooks as $hook) {
$value = $hook($value, $fieldName);
}
// Apply field-specific after hooks
if (isset($this->fieldHooks[$fieldName]['after'])) {
foreach ($this->fieldHooks[$fieldName]['after'] as $hook) {
$value = $hook($value);
}
}
return $value;
}
/**
* Check if any transformation hooks are registered.
*/
public function hasTransformationHooks(): bool
{
return ! empty($this->beforeHooks)
|| ! empty($this->afterHooks)
|| ! empty($this->fieldHooks);
}
/**
* Get the count of registered hooks.
*
* @return array{before: int, after: int, field: int}
*/
public function getHookCounts(): array
{
$fieldCount = 0;
foreach ($this->fieldHooks as $hooks) {
$fieldCount += count($hooks['before'] ?? []) + count($hooks['after'] ?? []);
}
return [
'before' => count($this->beforeHooks),
'after' => count($this->afterHooks),
'field' => $fieldCount,
];
}
/**
* Strip dangerous control characters from all values.
*
* Only strips ASCII 0-31 (null bytes, control characters).
* Preserves Unicode (UTF-8 high bytes) for international input.
* Handles nested arrays recursively.
* Applies per-field filter rules from schema.
* Normalizes Unicode to NFC form (if enabled).
*/
public function filter(array $input): array
{
if (empty($input)) {
return [];
}
return $this->filterRecursive($input, '');
}
/**
* Recursively filter array values.
*
* @param string $path Current path for nested arrays (for logging)
*/
protected function filterRecursive(array $input, string $path = ''): array
{
$result = [];
foreach ($input as $key => $value) {
$currentPath = $path === '' ? (string) $key : $path.'.'.$key;
if (is_array($value)) {
// Recursively filter nested arrays
$result[$key] = $this->filterRecursive($value, $currentPath);
} elseif (is_string($value)) {
// Apply filters to string values
$result[$key] = $this->filterString($value, $currentPath, (string) $key);
} else {
// Pass through non-string values unchanged
$result[$key] = $value;
}
}
return $result;
}
/**
* Apply filters to a string value.
*
* The filtering pipeline executes in this order:
* 1. Before hooks (global, then field-specific)
* 2. Unicode NFC normalization
* 3. Control character stripping
* 4. HTML filtering
* 5. Preset application
* 6. Additional schema filters
* 7. Max length enforcement
* 8. After hooks (global, then field-specific)
* 9. Audit logging (if enabled and value changed)
*
* @param string $path Full path for logging
* @param string $fieldName Top-level field name for schema lookup
*/
protected function filterString(string $value, string $path, string $fieldName): string
{
$original = $value;
$fieldSchema = $this->schema[$fieldName] ?? [];
$globalSchema = $this->schema['*'] ?? [];
// Merge global schema with field-specific schema (field takes precedence)
$effectiveSchema = array_merge($globalSchema, $fieldSchema);
// Step 0: Apply before hooks
$value = $this->applyBeforeHooks($value, $fieldName);
// Step 1: Unicode NFC normalization (unless skipped)
$skipNormalize = $effectiveSchema['skip_normalize'] ?? false;
if ($this->normalizeUnicode && ! $skipNormalize && $this->isNormalizerAvailable()) {
$normalized = Normalizer::normalize($value, Normalizer::FORM_C);
if ($normalized !== false) {
$value = $normalized;
}
}
// Step 2: Strip control characters (unless skipped)
$skipControlStrip = $effectiveSchema['skip_control_strip'] ?? false;
if (! $skipControlStrip) {
$value = filter_var($value, FILTER_UNSAFE_RAW, FILTER_FLAG_STRIP_LOW) ?? '';
}
// Step 3: Handle HTML tags (strip or allow based on configuration)
$value = $this->filterHtml($value, $effectiveSchema);
// Step 4: Apply preset if specified
$value = $this->applyPreset($value, $effectiveSchema);
// Step 5: Apply additional schema-defined filters
$additionalFilters = $effectiveSchema['filters'] ?? [];
$additionalOptions = $effectiveSchema['options'] ?? [];
foreach ($additionalFilters as $filter) {
$options = 0;
foreach ($additionalOptions as $option) {
$options |= $option;
}
$filtered = $options > 0
? filter_var($value, $filter, $options)
: filter_var($value, $filter);
if ($filtered !== false) {
$value = $filtered;
}
}
// Step 6: Enforce max length
$value = $this->enforceMaxLength($value, $effectiveSchema);
// Step 7: Apply after hooks
$value = $this->applyAfterHooks($value, $fieldName);
// Step 8: Audit logging if content was modified
if ($this->auditEnabled && $this->logger !== null && $value !== $original) {
$this->logSanitisation($path, $original, $value);
}
return $value;
}
/**
* Apply a preset to a value.
*
* @param string $value The value to transform
* @param array $schema The effective schema containing preset configuration
*/
protected function applyPreset(string $value, array $schema): string
{
$presetName = $schema['preset'] ?? null;
if ($presetName === null || ! isset(self::$presets[$presetName])) {
return $value;
}
$preset = self::$presets[$presetName];
// Apply regex pattern if defined
if (isset($preset['pattern'])) {
$value = preg_replace($preset['pattern'], '', $value) ?? $value;
}
// Apply filter if defined
if (isset($preset['filter'])) {
$filtered = filter_var($value, $preset['filter']);
if ($filtered !== false) {
$value = $filtered;
}
}
// Apply transform function if defined
if (isset($preset['transform'])) {
$transform = $preset['transform'];
if (is_callable($transform)) {
$value = $transform($value);
} elseif (is_string($transform) && function_exists($transform)) {
$value = $transform($value);
}
}
return $value;
}
/**
* Filter HTML from value based on configuration.
*
* @param string $value The value to filter
* @param array $effectiveSchema Effective schema (merged global + field)
*/
protected function filterHtml(string $value, array $effectiveSchema): string
{
// Check field-specific HTML allowance first
$allowHtml = $effectiveSchema['allow_html'] ?? null;
if ($allowHtml !== null) {
if ($allowHtml === true) {
// Allow default safe HTML tags
return strip_tags($value, self::SAFE_HTML_TAGS);
} elseif ($allowHtml === false) {
// Strip all HTML
return strip_tags($value);
} elseif (is_string($allowHtml) && $allowHtml !== '') {
// Use custom allowed tags
return strip_tags($value, $allowHtml);
}
}
// Fall back to global setting
if ($this->allowedHtmlTags !== '') {
return strip_tags($value, $this->allowedHtmlTags);
}
// No HTML filtering by default (preserves BC)
return $value;
}
/**
* Enforce maximum length on value.
*
* @param string $value The value to truncate
* @param array $effectiveSchema Effective schema (merged global + field)
*/
protected function enforceMaxLength(string $value, array $effectiveSchema): string
{
// Check field-specific max length first
$maxLength = $effectiveSchema['max_length'] ?? null;
if ($maxLength === null) {
// Fall back to global setting
$maxLength = $this->maxLength;
}
// 0 means unlimited
if ($maxLength <= 0) {
return $value;
}
// Truncate if needed (using mb_substr for Unicode safety)
if (mb_strlen($value) > $maxLength) {
return mb_substr($value, 0, $maxLength);
}
return $value;
}
/**
* Log when content is modified during sanitisation.
*
* @param string $path Field path
* @param string $original Original value
* @param string $sanitised Sanitised value
*/
protected function logSanitisation(string $path, string $original, string $sanitised): void
{
// Truncate long values for logging
$maxLength = 100;
$originalTruncated = mb_strlen($original) > $maxLength
? mb_substr($original, 0, $maxLength).'...'
: $original;
$sanitisedTruncated = mb_strlen($sanitised) > $maxLength
? mb_substr($sanitised, 0, $maxLength).'...'
: $sanitised;
// Convert control characters to visible representation for logging
$originalVisible = $this->makeControlCharsVisible($originalTruncated);
$sanitisedVisible = $this->makeControlCharsVisible($sanitisedTruncated);
$this->logger->info('Input sanitised', [
'field' => $path,
'original' => $originalVisible,
'sanitised' => $sanitisedVisible,
'original_length' => mb_strlen($original),
'sanitised_length' => mb_strlen($sanitised),
]);
}
/**
* Convert control characters to visible Unicode representation.
*/
protected function makeControlCharsVisible(string $value): string
{
// Replace control characters (0x00-0x1F) with Unicode Control Pictures (U+2400-U+241F)
return preg_replace_callback(
'/[\x00-\x1F]/',
fn ($matches) => mb_chr(0x2400 + ord($matches[0])),
$value
) ?? $value;
}
/**
* Check if the Normalizer class is available.
*/
protected function isNormalizerAvailable(): bool
{
return class_exists(Normalizer::class);
}
}