agent/php/Mcp/Services/DataRedactor.php
Snider 83df8ad71a fix(agent): address CodeRabbit + SonarCloud findings on PR #6
20+ CHANGES_REQUESTED dispositions across PHP MCP services, Go pkg/agentic,
hermes_runner_mcp Python server, plugin shell scripts.

Highlights:
- DatabaseSchema.php: identifier quoting
- AwardCredits.php: task row locking order
- CreditTransaction.php: fail-fast row decoding
- OpenApiGenerator.php: YAML parse handling + uri query params
- CaptureDispatchResultJob.php: AgentProfile namespace fix
- CreditsController.php: missing workspace_id fail-closed
- QueryAuditService.php: prose query false positives + unbounded aggregation
- McpHealthService.php: proc_close after timeout + env var resolution
- CreditLedger.php + FleetOverview.php: workspace agent + dispatch target validation
- McpAgentServerCommand.php: quota burn on failed tool calls
- McpMetricsService.php: N-day window consistency
- hermes_runner_mcp: API key off command line + invalid method+id + run_id encoding
- CircuitBreaker.php: extracted CircuitOpenException class with autoload-correct placement
- pkg/agentic + brain + flow: SonarCloud sendMessage/fetchLoopRepoRefs/commitWorkspace/Connect annotations
- shell scripts: removed [[ usage for portability

43 files modified, 1 new (CircuitOpenException.php).

Verification: gofmt -w + php -l + python3 -m py_compile + bash -n all clean.
Touched-package go test passes (pkg/lib/flow, pkg/lib).
Full go test ./... blocked by pre-existing dappco.re module graph drift, out of scope.

Parked for separate work:
- Mantis #1062: go.mod local replace removal (cross-repo architectural)
- Mantis #1063: Sonar residual line-length / duplication quality-gate cluster

Closes findings on https://github.com/dAppCore/agent/pull/6

Co-authored-by: Codex <noreply@openai.com>
2026-04-27 13:39:24 +01:00

260 lines
6.5 KiB
PHP

<?php
// SPDX-License-Identifier: EUPL-1.2
declare(strict_types=1);
namespace Core\Mcp\Services;
final class DataRedactor
{
protected const REDACTED = '[REDACTED]';
protected const SENSITIVE_KEYS = [
'password',
'passwd',
'secret',
'token',
'api_key',
'apikey',
'api-key',
'auth',
'authorization',
'bearer',
'credential',
'credentials',
'private_key',
'privatekey',
'access_token',
'refresh_token',
'session_token',
'jwt',
'ssn',
'social_security',
'credit_card',
'creditcard',
'card_number',
'cvv',
'cvc',
'pin',
'routing_number',
'account_number',
'bank_account',
];
protected const PII_KEYS = [
'email',
'phone',
'telephone',
'mobile',
'address',
'street',
'postcode',
'zip',
'zipcode',
'date_of_birth',
'dob',
'birthdate',
'national_insurance',
'ni_number',
'passport',
'license',
'licence',
];
public function redact(mixed $data, int $maxDepth = 10): mixed
{
if ($maxDepth <= 0) {
return '[MAX_DEPTH_EXCEEDED]';
}
if (is_object($data)) {
return $this->redactObject($data, $maxDepth - 1);
}
if (is_array($data)) {
return $this->redactArray($data, $maxDepth - 1);
}
if (is_string($data)) {
return $this->redactString($data);
}
return $data;
}
public function summarize(mixed $data, int $maxDepth = 3): mixed
{
if ($maxDepth <= 0) {
return '[...]';
}
if (is_object($data)) {
return $this->summarizeObject($data, $maxDepth - 1);
}
if (is_array($data)) {
$result = [];
$count = count($data);
$limit = 10;
$items = array_slice($data, 0, $limit, true);
foreach ($items as $key => $value) {
$lowerKey = strtolower((string) $key);
if ($this->isSensitiveKey($lowerKey)) {
$result[$key] = self::REDACTED;
continue;
}
if ($this->isPiiKey($lowerKey) && is_string($value)) {
$result[$key] = $this->partialRedact($value);
continue;
}
$result[$key] = $this->summarize($value, $maxDepth - 1);
}
if ($count > $limit) {
$result['_truncated'] = sprintf('... and %d more items', $count - $limit);
}
return $result;
}
if (is_string($data)) {
$redacted = $this->redactString($data);
return strlen($redacted) > 100
? substr($redacted, 0, 97).'...'
: $redacted;
}
return $data;
}
protected function redactObject(object $data, int $maxDepth): mixed
{
$normalised = $this->normaliseObject($data);
if (is_object($normalised)) {
return $this->redactObject($normalised, $maxDepth);
}
if (is_array($normalised)) {
return $this->redactArray($normalised, $maxDepth);
}
return is_string($normalised)
? $this->redactString($normalised)
: $normalised;
}
protected function summarizeObject(object $data, int $maxDepth): mixed
{
$normalised = $this->normaliseObject($data);
if (is_object($normalised)) {
return $this->summarizeObject($normalised, $maxDepth);
}
return $this->summarize($normalised, $maxDepth);
}
protected function redactArray(array $data, int $maxDepth): array
{
$result = [];
foreach ($data as $key => $value) {
$lowerKey = strtolower((string) $key);
if ($this->isSensitiveKey($lowerKey)) {
$result[$key] = self::REDACTED;
continue;
}
if ($this->isPiiKey($lowerKey) && is_string($value)) {
$result[$key] = $this->partialRedact($value);
continue;
}
if (is_array($value)) {
$result[$key] = $maxDepth <= 0
? '[MAX_DEPTH_EXCEEDED]'
: $this->redactArray($value, $maxDepth - 1);
continue;
}
$result[$key] = is_string($value)
? $this->redactString($value)
: $value;
}
return $result;
}
protected function isSensitiveKey(string $key): bool
{
foreach (self::SENSITIVE_KEYS as $sensitiveKey) {
if (str_contains($key, $sensitiveKey)) {
return true;
}
}
return false;
}
protected function isPiiKey(string $key): bool
{
foreach (self::PII_KEYS as $piiKey) {
if (str_contains($key, $piiKey)) {
return true;
}
}
return false;
}
protected function redactString(string $value): string
{
$value = preg_replace('/Bearer\s+[A-Za-z0-9\-_\.]+/i', 'Bearer '.self::REDACTED, $value) ?? $value;
$value = preg_replace('/Basic\s+[A-Za-z0-9+\/=]+/i', 'Basic '.self::REDACTED, $value) ?? $value;
$value = preg_replace('/\b(sk|pk|key|api|token)_[A-Za-z0-9]{16,}\b/i', '$1_'.self::REDACTED, $value) ?? $value;
$value = preg_replace('/eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*/i', self::REDACTED, $value) ?? $value;
$value = preg_replace('/[A-Z]{2}\s?\d{2}\s?\d{2}\s?\d{2}\s?[A-Z]/i', self::REDACTED, $value) ?? $value;
$value = preg_replace('/\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/', self::REDACTED, $value) ?? $value;
return $value;
}
protected function partialRedact(string $value): string
{
$length = strlen($value);
if ($length <= 4) {
return self::REDACTED;
}
if ($length <= 8) {
return substr($value, 0, 2).'***'.substr($value, -1);
}
$visible = min(3, (int) floor($length / 4));
return substr($value, 0, $visible).'***'.substr($value, -$visible);
}
protected function normaliseObject(object $data): mixed
{
if ($data instanceof \JsonSerializable) {
return $data->jsonSerialize();
}
return get_object_vars($data);
}
}