From 468ee8a75c72d49cd1bfc879b7ad699b3126ccef Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Mon, 5 Jan 2026 09:00:45 -0800 Subject: [PATCH] [MCP] Sanitize MCP tool names to ensure they are compatible with the Responses APO (#8694) The Responses API requires that all tool names conform to '^[a-zA-Z0-9_-]+$'. This PR replaces all non-conforming characters with `_` to ensure that they can be used. Fixes #8174 --- codex-rs/core/src/mcp_connection_manager.rs | 72 ++++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/codex-rs/core/src/mcp_connection_manager.rs b/codex-rs/core/src/mcp_connection_manager.rs index 6c0b48b1b..dcd1edf80 100644 --- a/codex-rs/core/src/mcp_connection_manager.rs +++ b/codex-rs/core/src/mcp_connection_manager.rs @@ -79,26 +79,60 @@ pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10); /// Default timeout for individual tool calls. const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60); +/// The Responses API requires tool names to match `^[a-zA-Z0-9_-]+$`. +/// MCP server/tool names are user-controlled, so sanitize the fully-qualified +/// name we expose to the model by replacing any disallowed character with `_`. +fn sanitize_responses_api_tool_name(name: &str) -> String { + let mut sanitized = String::with_capacity(name.len()); + for c in name.chars() { + if c.is_ascii_alphanumeric() || c == '_' || c == '-' { + sanitized.push(c); + } else { + sanitized.push('_'); + } + } + + if sanitized.is_empty() { + "_".to_string() + } else { + sanitized + } +} + +fn sha1_hex(s: &str) -> String { + let mut hasher = Sha1::new(); + hasher.update(s.as_bytes()); + let sha1 = hasher.finalize(); + format!("{sha1:x}") +} + fn qualify_tools(tools: I) -> HashMap where I: IntoIterator, { let mut used_names = HashSet::new(); + let mut seen_raw_names = HashSet::new(); let mut qualified_tools = HashMap::new(); for tool in tools { - let mut qualified_name = format!( + let qualified_name_raw = format!( "mcp{}{}{}{}", MCP_TOOL_NAME_DELIMITER, tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name ); + if !seen_raw_names.insert(qualified_name_raw.clone()) { + warn!("skipping duplicated tool {}", qualified_name_raw); + continue; + } + + // Start from a "pretty" name (sanitized), then deterministically disambiguate on + // collisions by appending a hash of the *raw* (unsanitized) qualified name. This + // ensures tools like `foo.bar` and `foo_bar` don't collapse to the same key. + let mut qualified_name = sanitize_responses_api_tool_name(&qualified_name_raw); + + // Enforce length constraints early; use the raw name for the hash input so the + // output remains stable even when sanitization changes. if qualified_name.len() > MAX_TOOL_NAME_LENGTH { - let mut hasher = Sha1::new(); - hasher.update(qualified_name.as_bytes()); - let sha1 = hasher.finalize(); - let sha1_str = format!("{sha1:x}"); - - // Truncate to make room for the hash suffix + let sha1_str = sha1_hex(&qualified_name_raw); let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len(); - qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str); } @@ -1035,6 +1069,28 @@ mod tests { ); } + #[test] + fn test_qualify_tools_sanitizes_invalid_characters() { + let tools = vec![create_test_tool("server.one", "tool.two")]; + + let qualified_tools = qualify_tools(tools); + + assert_eq!(qualified_tools.len(), 1); + let (qualified_name, tool) = qualified_tools.into_iter().next().expect("one tool"); + assert_eq!(qualified_name, "mcp__server_one__tool_two"); + + // The key is sanitized for OpenAI, but we keep original parts for the actual MCP call. + assert_eq!(tool.server_name, "server.one"); + assert_eq!(tool.tool_name, "tool.two"); + + assert!( + qualified_name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'), + "qualified name must be Responses API compatible: {qualified_name:?}" + ); + } + #[test] fn tool_filter_allows_by_default() { let filter = ToolFilter::default();