From 7d9ad3effdd5194f58fe01ebefa0307e824684a6 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Wed, 12 Nov 2025 08:35:34 -0800 Subject: [PATCH] Fix otel tests (#6541) Mount responses only once, remove unneeded retries and add a final assistant messages to complete the turn. --- codex-rs/core/tests/common/responses.rs | 6 - codex-rs/core/tests/suite/otel.rs | 184 +++++++++++++++++------- 2 files changed, 136 insertions(+), 54 deletions(-) diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index 1eaf9d0c9..8a6290360 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -446,12 +446,6 @@ pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock { response_mock } -pub async fn mount_sse(server: &MockServer, body: String) -> ResponseMock { - let (mock, response_mock) = base_mock(); - mock.respond_with(sse_response(body)).mount(server).await; - response_mock -} - pub async fn start_mock_server() -> MockServer { MockServer::builder() .body_print_limit(BodyPrintLimit::Limited(80_000)) diff --git a/codex-rs/core/tests/suite/otel.rs b/codex-rs/core/tests/suite/otel.rs index cb21451f4..8665d3a8e 100644 --- a/codex-rs/core/tests/suite/otel.rs +++ b/codex-rs/core/tests/suite/otel.rs @@ -9,7 +9,6 @@ use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_custom_tool_call; use core_test_support::responses::ev_function_call; -use core_test_support::responses::mount_sse; use core_test_support::responses::mount_sse_once; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; @@ -103,8 +102,6 @@ async fn process_sse_emits_failed_event_on_parse_error() { let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -144,8 +141,6 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed() let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -193,12 +188,18 @@ async fn process_sse_failed_event_records_response_error_message() { })]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -244,12 +245,18 @@ async fn process_sse_failed_event_logs_parse_error() { })]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -294,8 +301,6 @@ async fn process_sse_failed_event_logs_missing_error() { let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -337,11 +342,18 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -430,7 +442,7 @@ async fn process_sse_emits_completed_telemetry() { async fn handle_response_item_records_tool_result_for_custom_tool_call() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_custom_tool_call( @@ -442,12 +454,18 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() { ]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -494,7 +512,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() { async fn handle_response_item_records_tool_result_for_function_call() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_function_call("function-call", "nonexistent", "{\"value\":1}"), @@ -503,11 +521,18 @@ async fn handle_response_item_records_tool_result_for_function_call() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -554,7 +579,7 @@ async fn handle_response_item_records_tool_result_for_function_call() { async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ serde_json::json!({ @@ -573,11 +598,18 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -618,7 +650,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() async fn handle_response_item_records_tool_result_for_local_shell_call() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("shell-call", "completed", vec!["/bin/echo", "shell"]), @@ -627,11 +659,18 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(move |config| { config.features.disable(Feature::GhostCommit); - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -710,10 +749,23 @@ fn tool_decision_assertion<'a>( #[traced_test] async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ - ev_local_shell_call("auto_config_call", "completed", vec!["/bin/echo", "hello"]), + ev_local_shell_call( + "auto_config_call", + "completed", + vec!["/bin/echo", "local shell"], + ), + ev_completed("done"), + ]), + ) + .await; + + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), ev_completed("done"), ]), ) @@ -723,8 +775,6 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { .with_config(|config| { config.approval_policy = AskForApproval::OnRequest; config.sandbox_policy = SandboxPolicy::DangerFullAccess; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -739,7 +789,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { .await .unwrap(); - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TokenCount(_))).await; + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; logs_assert(tool_decision_assertion( "auto_config_call", @@ -752,7 +802,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() { #[traced_test] async fn handle_container_exec_user_approved_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]), @@ -761,11 +811,18 @@ async fn handle_container_exec_user_approved_records_tool_decision() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -804,7 +861,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() { async fn handle_container_exec_user_approved_for_session_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]), @@ -812,12 +869,18 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() ]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -856,7 +919,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision() async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]), @@ -864,12 +927,18 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { ]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -908,7 +977,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() { async fn handle_container_exec_user_denies_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]), @@ -917,11 +986,17 @@ async fn handle_container_exec_user_denies_records_tool_decision() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -960,7 +1035,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() { async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]), @@ -968,12 +1043,18 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() ]), ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await @@ -1012,7 +1093,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() async fn handle_sandbox_error_user_denies_records_tool_decision() { let server = start_mock_server().await; - mount_sse( + mount_sse_once( &server, sse(vec![ ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]), @@ -1021,11 +1102,18 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() { ) .await; + mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "local shell done"), + ev_completed("done"), + ]), + ) + .await; + let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.approval_policy = AskForApproval::UnlessTrusted; - config.model_provider.request_max_retries = Some(0); - config.model_provider.stream_max_retries = Some(0); }) .build(&server) .await