diff --git a/codex-rs/core/src/tools/handlers/collab.rs b/codex-rs/core/src/tools/handlers/collab.rs index 00190648f..b1666949f 100644 --- a/codex-rs/core/src/tools/handlers/collab.rs +++ b/codex-rs/core/src/tools/handlers/collab.rs @@ -28,6 +28,8 @@ use serde::Serialize; pub struct CollabHandler; +/// Minimum wait timeout to prevent tight polling loops from burning CPU. +pub(crate) const MIN_WAIT_TIMEOUT_MS: i64 = 10_000; pub(crate) const DEFAULT_WAIT_TIMEOUT_MS: i64 = 30_000; pub(crate) const MAX_WAIT_TIMEOUT_MS: i64 = 300_000; @@ -323,6 +325,8 @@ mod wait { .collect::, _>>()?; // Validate timeout. + // Very short timeouts encourage busy-polling loops in the orchestrator prompt and can + // cause high CPU usage even with a single active worker, so clamp to a minimum. let timeout_ms = args.timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS); let timeout_ms = match timeout_ms { ms if ms <= 0 => { @@ -330,7 +334,7 @@ mod wait { "timeout_ms must be greater than zero".to_owned(), )); } - ms => ms.min(MAX_WAIT_TIMEOUT_MS), + ms => ms.clamp(MIN_WAIT_TIMEOUT_MS, MAX_WAIT_TIMEOUT_MS), }; session @@ -1012,7 +1016,7 @@ mod tests { "wait", function_payload(json!({ "ids": [agent_id.to_string()], - "timeout_ms": 10 + "timeout_ms": MIN_WAIT_TIMEOUT_MS })), ); let output = CollabHandler @@ -1043,6 +1047,37 @@ mod tests { .expect("shutdown should submit"); } + #[tokio::test] + async fn wait_clamps_short_timeouts_to_minimum() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + session.services.agent_control = manager.agent_control(); + let config = turn.client.config().as_ref().clone(); + let thread = manager.start_thread(config).await.expect("start thread"); + let agent_id = thread.thread_id; + let invocation = invocation( + Arc::new(session), + Arc::new(turn), + "wait", + function_payload(json!({ + "ids": [agent_id.to_string()], + "timeout_ms": 10 + })), + ); + + let early = timeout(Duration::from_millis(50), CollabHandler.handle(invocation)).await; + assert!( + early.is_err(), + "wait should not return before the minimum timeout clamp" + ); + + let _ = thread + .thread + .submit(Op::Shutdown {}) + .await + .expect("shutdown should submit"); + } + #[tokio::test] async fn wait_returns_final_status_without_timeout() { let (mut session, turn) = make_session_and_context().await; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index c06b9556e..33010e58e 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -8,6 +8,7 @@ use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool; use crate::tools::handlers::apply_patch::create_apply_patch_json_tool; use crate::tools::handlers::collab::DEFAULT_WAIT_TIMEOUT_MS; use crate::tools::handlers::collab::MAX_WAIT_TIMEOUT_MS; +use crate::tools::handlers::collab::MIN_WAIT_TIMEOUT_MS; use crate::tools::registry::ToolRegistryBuilder; use codex_protocol::config_types::WebSearchMode; use codex_protocol::dynamic_tools::DynamicToolSpec; @@ -517,7 +518,7 @@ fn create_wait_tool() -> ToolSpec { "timeout_ms".to_string(), JsonSchema::Number { description: Some(format!( - "Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS} and max {MAX_WAIT_TIMEOUT_MS}." + "Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS}, min {MIN_WAIT_TIMEOUT_MS}, and max {MAX_WAIT_TIMEOUT_MS}. Avoid tight polling loops; prefer longer waits (seconds to minutes)." )), }, ); diff --git a/codex-rs/core/templates/agents/orchestrator.md b/codex-rs/core/templates/agents/orchestrator.md index 09b6ea0b4..c303591d4 100644 --- a/codex-rs/core/templates/agents/orchestrator.md +++ b/codex-rs/core/templates/agents/orchestrator.md @@ -52,6 +52,7 @@ You are Codex Orchestrator, based on GPT-5. You are running as an orchestration * Workers must not revert, overwrite, or conflict with others’ work. * By default, workers must not spawn sub-agents unless explicitly allowed. * When multiple workers are active, you may pass multiple IDs to `wait` to react to the first completion and keep the workflow event-driven and use a long timeout (e.g. 5 minutes). +* Do not busy-poll `wait` with very short timeouts. Prefer waits measured in seconds (or minutes) so the system is idle while workers run. ## Collab tools