diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index b78f921bc..7a60cbe83 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use std::sync::Mutex; +use anyhow::Result; use serde_json::Value; use wiremock::BodyPrintLimit; use wiremock::Match; @@ -9,6 +10,7 @@ use wiremock::MockBuilder; use wiremock::MockServer; use wiremock::Respond; use wiremock::ResponseTemplate; +use wiremock::matchers::any; use wiremock::matchers::method; use wiremock::matchers::path_regex; @@ -38,6 +40,10 @@ impl ResponseMock { self.requests.lock().unwrap().clone() } + pub fn last_request(&self) -> Option { + self.requests.lock().unwrap().last().cloned() + } + /// Returns true if any captured request contains a `function_call` with the /// provided `call_id`. pub fn saw_function_call(&self, call_id: &str) -> bool { @@ -130,6 +136,42 @@ impl ResponsesRequest { .map(str::to_string) } + pub fn function_call_output_content_and_success( + &self, + call_id: &str, + ) -> Option<(Option, Option)> { + self.call_output_content_and_success(call_id, "function_call_output") + } + + pub fn custom_tool_call_output_content_and_success( + &self, + call_id: &str, + ) -> Option<(Option, Option)> { + self.call_output_content_and_success(call_id, "custom_tool_call_output") + } + + fn call_output_content_and_success( + &self, + call_id: &str, + call_type: &str, + ) -> Option<(Option, Option)> { + let output = self + .call_output(call_id, call_type) + .get("output") + .cloned() + .unwrap_or(Value::Null); + match output { + Value::String(text) => Some((Some(text), None)), + Value::Object(obj) => Some(( + obj.get("content") + .and_then(Value::as_str) + .map(str::to_string), + obj.get("success").and_then(Value::as_bool), + )), + _ => Some((None, None)), + } + } + pub fn header(&self, name: &str) -> Option { self.0 .headers @@ -487,6 +529,37 @@ pub async fn start_mock_server() -> MockServer { .await } +#[derive(Clone)] +pub struct FunctionCallResponseMocks { + pub function_call: ResponseMock, + pub completion: ResponseMock, +} + +pub async fn mount_function_call_agent_response( + server: &MockServer, + call_id: &str, + arguments: &str, + tool_name: &str, +) -> FunctionCallResponseMocks { + let first_response = sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, tool_name, arguments), + ev_completed("resp-1"), + ]); + let function_call = mount_sse_once_match(server, any(), first_response).await; + + let second_response = sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]); + let completion = mount_sse_once_match(server, any(), second_response).await; + + FunctionCallResponseMocks { + function_call, + completion, + } +} + /// Mounts a sequence of SSE response bodies and serves them in order for each /// POST to `/v1/responses`. Panics if more requests are received than bodies /// provided. Also asserts the exact number of expected calls. diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 2741d8bed..e88c410ec 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -159,14 +159,28 @@ impl TestCodex { } pub async fn submit_turn(&self, prompt: &str) -> Result<()> { - self.submit_turn_with_policy(prompt, SandboxPolicy::DangerFullAccess) - .await + self.submit_turn_with_policies( + prompt, + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await } pub async fn submit_turn_with_policy( &self, prompt: &str, sandbox_policy: SandboxPolicy, + ) -> Result<()> { + self.submit_turn_with_policies(prompt, AskForApproval::Never, sandbox_policy) + .await + } + + pub async fn submit_turn_with_policies( + &self, + prompt: &str, + approval_policy: AskForApproval, + sandbox_policy: SandboxPolicy, ) -> Result<()> { let session_model = self.session_configured.model.clone(); self.codex @@ -176,7 +190,7 @@ impl TestCodex { }], final_output_json_schema: None, cwd: self.cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, + approval_policy, sandbox_policy, model: session_model, effort: None, diff --git a/codex-rs/core/tests/suite/grep_files.rs b/codex-rs/core/tests/suite/grep_files.rs index f8097558d..8b9f2a3b3 100644 --- a/codex-rs/core/tests/suite/grep_files.rs +++ b/codex-rs/core/tests/suite/grep_files.rs @@ -2,28 +2,14 @@ use anyhow::Result; use codex_core::model_family::find_family_for_model; -use codex_core::protocol::AskForApproval; -use codex_core::protocol::EventMsg; -use codex_core::protocol::Op; -use codex_core::protocol::SandboxPolicy; -use codex_protocol::config_types::ReasoningSummary; -use codex_protocol::user_input::UserInput; -use core_test_support::responses; -use core_test_support::responses::ev_assistant_message; -use core_test_support::responses::ev_completed; -use core_test_support::responses::ev_function_call; -use core_test_support::responses::ev_response_created; -use core_test_support::responses::sse; +use core_test_support::responses::mount_function_call_agent_response; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; -use serde_json::Value; use std::collections::HashSet; use std::path::Path; use std::process::Command as StdCommand; -use wiremock::matchers::any; const MODEL_WITH_TOOL: &str = "test-gpt-5-codex"; @@ -69,18 +55,22 @@ async fn grep_files_tool_collects_matches() -> Result<()> { }) .to_string(); - mount_tool_sequence(&server, call_id, &arguments, "grep_files").await; - submit_turn(&test, "please find uses of needle").await?; + let mocks = + mount_function_call_agent_response(&server, call_id, &arguments, "grep_files").await; + test.submit_turn("please find uses of needle").await?; - let bodies = recorded_bodies(&server).await?; - let tool_output = find_tool_output(&bodies, call_id).expect("tool output present"); - let payload = tool_output.get("output").expect("output field present"); - let (content_opt, success_opt) = extract_content_and_success(payload); + let req = mocks.completion.single_request(); + let (content_opt, success_opt) = req + .function_call_output_content_and_success(call_id) + .expect("tool output present"); let content = content_opt.expect("content present"); let success = success_opt.unwrap_or(true); - assert!(success, "expected success for matches, got {payload:?}"); + assert!( + success, + "expected success for matches, got content={content}" + ); - let entries = collect_file_names(content); + let entries = collect_file_names(&content); assert_eq!(entries.len(), 2, "content: {content}"); assert!( entries.contains("alpha.rs"), @@ -118,16 +108,17 @@ async fn grep_files_tool_reports_empty_results() -> Result<()> { }) .to_string(); - mount_tool_sequence(&server, call_id, &arguments, "grep_files").await; - submit_turn(&test, "search again").await?; + let mocks = + mount_function_call_agent_response(&server, call_id, &arguments, "grep_files").await; + test.submit_turn("search again").await?; - let bodies = recorded_bodies(&server).await?; - let tool_output = find_tool_output(&bodies, call_id).expect("tool output present"); - let payload = tool_output.get("output").expect("output field present"); - let (content_opt, success_opt) = extract_content_and_success(payload); + let req = mocks.completion.single_request(); + let (content_opt, success_opt) = req + .function_call_output_content_and_success(call_id) + .expect("tool output present"); let content = content_opt.expect("content present"); if let Some(success) = success_opt { - assert!(!success, "expected success=false payload: {payload:?}"); + assert!(!success, "expected success=false content={content}"); } assert_eq!(content, "No matches found."); @@ -144,73 +135,6 @@ async fn build_test_codex(server: &wiremock::MockServer) -> Result { builder.build(server).await } -async fn submit_turn(test: &TestCodex, prompt: &str) -> Result<()> { - let session_model = test.session_configured.model.clone(); - - test.codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: prompt.into(), - }], - final_output_json_schema: None, - cwd: test.cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) - .await?; - - wait_for_event(&test.codex, |event| { - matches!(event, EventMsg::TaskComplete(_)) - }) - .await; - Ok(()) -} - -async fn mount_tool_sequence( - server: &wiremock::MockServer, - call_id: &str, - arguments: &str, - tool_name: &str, -) { - let first_response = sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, tool_name, arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(server, any(), first_response).await; - - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - responses::mount_sse_once_match(server, any(), second_response).await; -} - -#[allow(clippy::expect_used)] -async fn recorded_bodies(server: &wiremock::MockServer) -> Result> { - let requests = server.received_requests().await.expect("requests recorded"); - Ok(requests - .iter() - .map(|req| req.body_json::().expect("request json")) - .collect()) -} - -fn find_tool_output<'a>(requests: &'a [Value], call_id: &str) -> Option<&'a Value> { - requests.iter().find_map(|body| { - body.get("input") - .and_then(Value::as_array) - .and_then(|items| { - items.iter().find(|item| { - item.get("type").and_then(Value::as_str) == Some("function_call_output") - && item.get("call_id").and_then(Value::as_str) == Some(call_id) - }) - }) - }) -} - fn collect_file_names(content: &str) -> HashSet { content .lines() @@ -224,14 +148,3 @@ fn collect_file_names(content: &str) -> HashSet { }) .collect() } - -fn extract_content_and_success(value: &Value) -> (Option<&str>, Option) { - match value { - Value::String(text) => (Some(text.as_str()), None), - Value::Object(obj) => ( - obj.get("content").and_then(Value::as_str), - obj.get("success").and_then(Value::as_bool), - ), - _ => (None, None), - } -} diff --git a/codex-rs/core/tests/suite/list_dir.rs b/codex-rs/core/tests/suite/list_dir.rs index 2a04d3075..802080d84 100644 --- a/codex-rs/core/tests/suite/list_dir.rs +++ b/codex-rs/core/tests/suite/list_dir.rs @@ -1,25 +1,11 @@ #![cfg(not(target_os = "windows"))] -use codex_core::protocol::AskForApproval; -use codex_core::protocol::EventMsg; -use codex_core::protocol::Op; -use codex_core::protocol::SandboxPolicy; -use codex_protocol::config_types::ReasoningSummary; -use codex_protocol::user_input::UserInput; -use core_test_support::responses; -use core_test_support::responses::ev_assistant_message; -use core_test_support::responses::ev_completed; -use core_test_support::responses::ev_function_call; -use core_test_support::responses::ev_response_created; -use core_test_support::responses::sse; +use core_test_support::responses::mount_function_call_agent_response; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; -use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; use pretty_assertions::assert_eq; -use serde_json::Value; -use wiremock::matchers::any; +use serde_json::json; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[ignore = "disabled until we enable list_dir tool"] @@ -27,99 +13,30 @@ async fn list_dir_tool_returns_entries() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let test = test_codex().build(&server).await?; - let TestCodex { - codex, - cwd, - session_configured, - .. - } = test_codex().build(&server).await?; - - let dir_path = cwd.path().join("sample_dir"); + let dir_path = test.cwd.path().join("sample_dir"); std::fs::create_dir(&dir_path)?; std::fs::write(dir_path.join("alpha.txt"), "first file")?; std::fs::create_dir(dir_path.join("nested"))?; let dir_path = dir_path.to_string_lossy().to_string(); let call_id = "list-dir-call"; - let arguments = serde_json::json!({ + let arguments = json!({ "dir_path": dir_path, "offset": 1, "limit": 2, }) .to_string(); - let first_response = sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "list_dir", &arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(&server, any(), first_response).await; - - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - responses::mount_sse_once_match(&server, any(), second_response).await; - - let session_model = session_configured.model.clone(); - - codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: "list directory contents".into(), - }], - final_output_json_schema: None, - cwd: cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) - .await?; - - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - - let requests = server.received_requests().await.expect("recorded requests"); - let request_bodies = requests - .iter() - .map(|req| req.body_json::().unwrap()) - .collect::>(); - assert!( - !request_bodies.is_empty(), - "expected at least one request body" - ); - - let tool_output_item = request_bodies - .iter() - .find_map(|body| { - body.get("input") - .and_then(Value::as_array) - .and_then(|items| { - items.iter().find(|item| { - item.get("type").and_then(Value::as_str) == Some("function_call_output") - }) - }) - }) - .unwrap_or_else(|| { - panic!("function_call_output item not found in requests: {request_bodies:#?}") - }); - - assert_eq!( - tool_output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - - let output_text = tool_output_item - .get("output") - .and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) - .expect("output text present"); - assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested"); + let mocks = mount_function_call_agent_response(&server, call_id, &arguments, "list_dir").await; + test.submit_turn("list directory contents").await?; + let req = mocks.completion.single_request(); + let (content_opt, _) = req + .function_call_output_content_and_success(call_id) + .expect("function_call_output present"); + let output = content_opt.expect("output content present in tool output"); + assert_eq!(output, "E1: [file] alpha.txt\nE2: [dir] nested"); Ok(()) } @@ -130,15 +47,9 @@ async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let test = test_codex().build(&server).await?; - let TestCodex { - codex, - cwd, - session_configured, - .. - } = test_codex().build(&server).await?; - - let dir_path = cwd.path().join("depth_one"); + let dir_path = test.cwd.path().join("depth_one"); std::fs::create_dir(&dir_path)?; std::fs::write(dir_path.join("alpha.txt"), "alpha")?; std::fs::create_dir(dir_path.join("nested"))?; @@ -146,7 +57,7 @@ async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> { let dir_path = dir_path.to_string_lossy().to_string(); let call_id = "list-dir-depth1"; - let arguments = serde_json::json!({ + let arguments = json!({ "dir_path": dir_path, "offset": 1, "limit": 10, @@ -154,77 +65,15 @@ async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> { }) .to_string(); - let first_response = sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "list_dir", &arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(&server, any(), first_response).await; - - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - responses::mount_sse_once_match(&server, any(), second_response).await; - - let session_model = session_configured.model.clone(); - - codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: "list directory contents depth one".into(), - }], - final_output_json_schema: None, - cwd: cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) + let mocks = mount_function_call_agent_response(&server, call_id, &arguments, "list_dir").await; + test.submit_turn("list directory contents depth one") .await?; - - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - - let requests = server.received_requests().await.expect("recorded requests"); - let request_bodies = requests - .iter() - .map(|req| req.body_json::().unwrap()) - .collect::>(); - assert!( - !request_bodies.is_empty(), - "expected at least one request body" - ); - - let tool_output_item = request_bodies - .iter() - .find_map(|body| { - body.get("input") - .and_then(Value::as_array) - .and_then(|items| { - items.iter().find(|item| { - item.get("type").and_then(Value::as_str) == Some("function_call_output") - }) - }) - }) - .unwrap_or_else(|| { - panic!("function_call_output item not found in requests: {request_bodies:#?}") - }); - - assert_eq!( - tool_output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - - let output_text = tool_output_item - .get("output") - .and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) - .expect("output text present"); - assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested"); + let req = mocks.completion.single_request(); + let (content_opt, _) = req + .function_call_output_content_and_success(call_id) + .expect("function_call_output present"); + let output = content_opt.expect("output content present in tool output"); + assert_eq!(output, "E1: [file] alpha.txt\nE2: [dir] nested"); Ok(()) } @@ -235,15 +84,9 @@ async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()> skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let test = test_codex().build(&server).await?; - let TestCodex { - codex, - cwd, - session_configured, - .. - } = test_codex().build(&server).await?; - - let dir_path = cwd.path().join("depth_two"); + let dir_path = test.cwd.path().join("depth_two"); std::fs::create_dir(&dir_path)?; std::fs::write(dir_path.join("alpha.txt"), "alpha")?; let nested = dir_path.join("nested"); @@ -255,7 +98,7 @@ async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()> let dir_path_string = dir_path.to_string_lossy().to_string(); let call_id = "list-dir-depth2"; - let arguments = serde_json::json!({ + let arguments = json!({ "dir_path": dir_path_string, "offset": 1, "limit": 10, @@ -263,81 +106,16 @@ async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()> }) .to_string(); - let first_response = sse(vec![ - serde_json::json!({ - "type": "response.created", - "response": {"id": "resp-1"} - }), - ev_function_call(call_id, "list_dir", &arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(&server, any(), first_response).await; - - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - responses::mount_sse_once_match(&server, any(), second_response).await; - - let session_model = session_configured.model.clone(); - - codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: "list directory contents depth two".into(), - }], - final_output_json_schema: None, - cwd: cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) + let mocks = mount_function_call_agent_response(&server, call_id, &arguments, "list_dir").await; + test.submit_turn("list directory contents depth two") .await?; - - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - - let requests = server.received_requests().await.expect("recorded requests"); - let request_bodies = requests - .iter() - .map(|req| req.body_json::().unwrap()) - .collect::>(); - assert!( - !request_bodies.is_empty(), - "expected at least one request body" - ); - - let tool_output_item = request_bodies - .iter() - .find_map(|body| { - body.get("input") - .and_then(Value::as_array) - .and_then(|items| { - items.iter().find(|item| { - item.get("type").and_then(Value::as_str) == Some("function_call_output") - }) - }) - }) - .unwrap_or_else(|| { - panic!("function_call_output item not found in requests: {request_bodies:#?}") - }); - + let req = mocks.completion.single_request(); + let (content_opt, _) = req + .function_call_output_content_and_success(call_id) + .expect("function_call_output present"); + let output = content_opt.expect("output content present in tool output"); assert_eq!( - tool_output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - - let output_text = tool_output_item - .get("output") - .and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) - .expect("output text present"); - assert_eq!( - output_text, + output, "E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand" ); @@ -350,15 +128,9 @@ async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<() skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let test = test_codex().build(&server).await?; - let TestCodex { - codex, - cwd, - session_configured, - .. - } = test_codex().build(&server).await?; - - let dir_path = cwd.path().join("depth_three"); + let dir_path = test.cwd.path().join("depth_three"); std::fs::create_dir(&dir_path)?; std::fs::write(dir_path.join("alpha.txt"), "alpha")?; let nested = dir_path.join("nested"); @@ -370,7 +142,7 @@ async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<() let dir_path_string = dir_path.to_string_lossy().to_string(); let call_id = "list-dir-depth3"; - let arguments = serde_json::json!({ + let arguments = json!({ "dir_path": dir_path_string, "offset": 1, "limit": 10, @@ -378,81 +150,16 @@ async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<() }) .to_string(); - let first_response = sse(vec![ - serde_json::json!({ - "type": "response.created", - "response": {"id": "resp-1"} - }), - ev_function_call(call_id, "list_dir", &arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(&server, any(), first_response).await; - - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - responses::mount_sse_once_match(&server, any(), second_response).await; - - let session_model = session_configured.model.clone(); - - codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: "list directory contents depth three".into(), - }], - final_output_json_schema: None, - cwd: cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) + let mocks = mount_function_call_agent_response(&server, call_id, &arguments, "list_dir").await; + test.submit_turn("list directory contents depth three") .await?; - - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - - let requests = server.received_requests().await.expect("recorded requests"); - let request_bodies = requests - .iter() - .map(|req| req.body_json::().unwrap()) - .collect::>(); - assert!( - !request_bodies.is_empty(), - "expected at least one request body" - ); - - let tool_output_item = request_bodies - .iter() - .find_map(|body| { - body.get("input") - .and_then(Value::as_array) - .and_then(|items| { - items.iter().find(|item| { - item.get("type").and_then(Value::as_str) == Some("function_call_output") - }) - }) - }) - .unwrap_or_else(|| { - panic!("function_call_output item not found in requests: {request_bodies:#?}") - }); - + let req = mocks.completion.single_request(); + let (content_opt, _) = req + .function_call_output_content_and_success(call_id) + .expect("function_call_output present"); + let output = content_opt.expect("output content present in tool output"); assert_eq!( - tool_output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - - let output_text = tool_output_item - .get("output") - .and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) - .expect("output text present"); - assert_eq!( - output_text, + output, "E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand\nE5: [file] nested/grand/gamma.txt" ); diff --git a/codex-rs/core/tests/suite/read_file.rs b/codex-rs/core/tests/suite/read_file.rs index a74bd8b2a..57be21f9f 100644 --- a/codex-rs/core/tests/suite/read_file.rs +++ b/codex-rs/core/tests/suite/read_file.rs @@ -1,25 +1,11 @@ #![cfg(not(target_os = "windows"))] -use codex_core::protocol::AskForApproval; -use codex_core::protocol::EventMsg; -use codex_core::protocol::Op; -use codex_core::protocol::SandboxPolicy; -use codex_protocol::config_types::ReasoningSummary; -use codex_protocol::user_input::UserInput; -use core_test_support::responses; -use core_test_support::responses::ev_assistant_message; -use core_test_support::responses::ev_completed; -use core_test_support::responses::ev_function_call; -use core_test_support::responses::ev_response_created; -use core_test_support::responses::sse; +use core_test_support::responses::mount_function_call_agent_response; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; -use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; use pretty_assertions::assert_eq; -use serde_json::Value; -use wiremock::matchers::any; +use serde_json::json; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[ignore = "disabled until we enable read_file tool"] @@ -27,72 +13,29 @@ async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let test = test_codex().build(&server).await?; - let TestCodex { - codex, - cwd, - session_configured, - .. - } = test_codex().build(&server).await?; - - let file_path = cwd.path().join("sample.txt"); + let file_path = test.cwd.path().join("sample.txt"); std::fs::write(&file_path, "first\nsecond\nthird\nfourth\n")?; let file_path = file_path.to_string_lossy().to_string(); let call_id = "read-file-call"; - let arguments = serde_json::json!({ + let arguments = json!({ "file_path": file_path, "offset": 2, "limit": 2, }) .to_string(); - let first_response = sse(vec![ - ev_response_created("resp-1"), - ev_function_call(call_id, "read_file", &arguments), - ev_completed("resp-1"), - ]); - responses::mount_sse_once_match(&server, any(), first_response).await; + let mocks = mount_function_call_agent_response(&server, call_id, &arguments, "read_file").await; - let second_response = sse(vec![ - ev_assistant_message("msg-1", "done"), - ev_completed("resp-2"), - ]); - let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; + test.submit_turn("please inspect sample.txt").await?; - let session_model = session_configured.model.clone(); - - codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: "please inspect sample.txt".into(), - }], - final_output_json_schema: None, - cwd: cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::DangerFullAccess, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) - .await?; - - wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; - - let req = second_mock.single_request(); - let tool_output_item = req.function_call_output(call_id); - assert_eq!( - tool_output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - let output_text = tool_output_item - .get("output") - .and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) - .expect("output text present"); + let req = mocks.completion.single_request(); + let (output_text_opt, _) = req + .function_call_output_content_and_success(call_id) + .expect("output present"); + let output_text = output_text_opt.expect("output text present"); assert_eq!(output_text, "L2: second\nL3: third"); Ok(()) diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index d748fe6b4..237b2db37 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -3,12 +3,7 @@ use anyhow::Result; use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; -use codex_core::protocol::AskForApproval; -use codex_core::protocol::EventMsg; -use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; -use codex_protocol::config_types::ReasoningSummary; -use codex_protocol::user_input::UserInput; use core_test_support::assert_regex_match; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_assistant_message; @@ -21,9 +16,7 @@ use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; -use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; use pretty_assertions::assert_eq; use regex_lite::Regex; use serde_json::Value; @@ -42,69 +35,6 @@ const FIXTURE_JSON: &str = r#"{ } "#; -async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> { - let session_model = test.session_configured.model.clone(); - - test.codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: prompt.into(), - }], - final_output_json_schema: None, - cwd: test.cwd.path().to_path_buf(), - approval_policy: AskForApproval::Never, - sandbox_policy, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) - .await?; - - wait_for_event(&test.codex, |event| { - matches!(event, EventMsg::TaskComplete(_)) - }) - .await; - - Ok(()) -} - -fn request_bodies(requests: &[wiremock::Request]) -> Result> { - requests - .iter() - .map(|req| Ok(serde_json::from_slice::(&req.body)?)) - .collect() -} - -fn find_function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> { - for body in bodies { - if let Some(items) = body.get("input").and_then(Value::as_array) { - for item in items { - if item.get("type").and_then(Value::as_str) == Some("function_call_output") - && item.get("call_id").and_then(Value::as_str) == Some(call_id) - { - return Some(item); - } - } - } - } - None -} - -fn find_custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> { - for body in bodies { - if let Some(items) = body.get("input").and_then(Value::as_array) { - for item in items { - if item.get("type").and_then(Value::as_str) == Some("custom_tool_call_output") - && item.get("call_id").and_then(Value::as_str) == Some(call_id) - { - return Some(item); - } - } - } - } - None -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { skip_if_no_network!(Ok(())); @@ -133,21 +63,16 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the json shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = find_function_call_output(&bodies, call_id).expect("shell output present"); + let req = mock.last_request().expect("shell output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -202,22 +127,18 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the structured shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("structured output present"); + let req = mock + .last_request() + .expect("structured shell output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -269,21 +190,16 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<( ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "read the fixture JSON with sed", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = find_function_call_output(&bodies, call_id).expect("shell output present"); + let req = mock.last_request().expect("shell output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -345,22 +261,18 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "read the fixture JSON with structured output", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("structured output present"); + let req = mock + .last_request() + .expect("structured output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -420,22 +332,18 @@ async fn shell_output_for_freeform_tool_records_duration() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the structured shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("structured output present"); + let req = mock + .last_request() + .expect("structured output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -490,22 +398,18 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the truncation shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("truncated output present"); + let req = mock + .last_request() + .expect("truncated output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -570,22 +474,18 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "apply the patch via custom tool", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present"); + let req = mock + .last_request() + .expect("apply_patch output request recorded"); + let output_item = req.custom_tool_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -630,22 +530,18 @@ async fn apply_patch_custom_tool_call_creates_file() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "apply the patch via custom tool to create a file", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present"); + let req = mock + .last_request() + .expect("apply_patch output request recorded"); + let output_item = req.custom_tool_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -699,22 +595,18 @@ async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "apply the patch via custom tool to update a file", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present"); + let req = mock + .last_request() + .expect("apply_patch output request recorded"); + let output_item = req.custom_tool_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -762,22 +654,18 @@ async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "attempt a failing apply_patch via custom tool", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present"); + let req = mock + .last_request() + .expect("apply_patch output request recorded"); + let output_item = req.custom_tool_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -817,22 +705,18 @@ async fn apply_patch_function_call_output_is_structured() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "apply the patch via function-call apply_patch", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("apply_patch function output present"); + let req = mock + .last_request() + .expect("apply_patch function output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -880,21 +764,16 @@ async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the failing shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = find_function_call_output(&bodies, call_id).expect("shell output present"); + let req = mock.last_request().expect("shell output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) @@ -934,22 +813,18 @@ async fn local_shell_call_output_is_structured() -> Result<()> { ev_completed("resp-2"), ]), ]; - mount_sse_sequence(&server, responses).await; + let mock = mount_sse_sequence(&server, responses).await; - submit_turn( - &test, + test.submit_turn_with_policy( "run the local shell command", SandboxPolicy::DangerFullAccess, ) .await?; - let requests = server - .received_requests() - .await - .expect("recorded requests present"); - let bodies = request_bodies(&requests)?; - let output_item = - find_function_call_output(&bodies, call_id).expect("local shell output present"); + let req = mock + .last_request() + .expect("local shell output request recorded"); + let output_item = req.function_call_output(call_id); let output = output_item .get("output") .and_then(Value::as_str) diff --git a/codex-rs/core/tests/suite/tool_harness.rs b/codex-rs/core/tests/suite/tool_harness.rs index e9f9552c5..3288822ff 100644 --- a/codex-rs/core/tests/suite/tool_harness.rs +++ b/codex-rs/core/tests/suite/tool_harness.rs @@ -14,6 +14,7 @@ use codex_protocol::plan_tool::StepStatus; use codex_protocol::user_input::UserInput; use core_test_support::assert_regex_match; use core_test_support::responses; +use core_test_support::responses::ResponsesRequest; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -30,12 +31,22 @@ use serde_json::Value; use serde_json::json; use wiremock::matchers::any; -fn extract_output_text(item: &Value) -> Option<&str> { - item.get("output").and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) +fn call_output(req: &ResponsesRequest, call_id: &str) -> (String, Option) { + let raw = req.function_call_output(call_id); + assert_eq!( + raw.get("call_id").and_then(Value::as_str), + Some(call_id), + "mismatched call_id in function_call_output" + ); + let (content_opt, success) = match req.function_call_output_content_and_success(call_id) { + Some(values) => values, + None => panic!("function_call_output present"), + }; + let content = match content_opt { + Some(c) => c, + None => panic!("function_call_output content present"), + }; + (content, success) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -90,9 +101,8 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; let req = second_mock.single_request(); - let output_item = req.function_call_output(call_id); - let output_text = extract_output_text(&output_item).expect("output text present"); - let exec_output: Value = serde_json::from_str(output_text)?; + let (output_text, _) = call_output(&req, call_id); + let exec_output: Value = serde_json::from_str(&output_text)?; assert_eq!(exec_output["metadata"]["exit_code"], 0); let stdout = exec_output["output"].as_str().expect("stdout field"); assert_regex_match(r"(?s)^tool harness\n?$", stdout); @@ -174,12 +184,7 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> { assert!(saw_plan_update, "expected PlanUpdate event"); let req = second_mock.single_request(); - let output_item = req.function_call_output(call_id); - assert_eq!( - output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - let output_text = extract_output_text(&output_item).expect("output text present"); + let (output_text, _success_flag) = call_output(&req, call_id); assert_eq!(output_text, "Plan updated"); Ok(()) @@ -252,22 +257,12 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> { ); let req = second_mock.single_request(); - let output_item = req.function_call_output(call_id); - assert_eq!( - output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - let output_text = extract_output_text(&output_item).expect("output text present"); + let (output_text, success_flag) = call_output(&req, call_id); assert!( output_text.contains("failed to parse function arguments"), "expected parse error message in output text, got {output_text:?}" ); - if let Some(success_flag) = output_item - .get("output") - .and_then(|value| value.as_object()) - .and_then(|obj| obj.get("success")) - .and_then(serde_json::Value::as_bool) - { + if let Some(success_flag) = success_flag { assert!( !success_flag, "expected tool output to mark success=false for malformed payload" @@ -357,12 +352,7 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<() assert!(patch_end_success); let req = second_mock.single_request(); - let output_item = req.function_call_output(call_id); - assert_eq!( - output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - let output_text = extract_output_text(&output_item).expect("output text present"); + let (output_text, _success_flag) = call_output(&req, call_id); let expected_pattern = format!( r"(?s)^Exit code: 0 @@ -372,7 +362,7 @@ Success. Updated the following files: A {file_name} ?$" ); - assert_regex_match(&expected_pattern, output_text); + assert_regex_match(&expected_pattern, &output_text); let updated_contents = fs::read_to_string(file_path)?; assert_eq!( @@ -437,12 +427,7 @@ async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> { wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; let req = second_mock.single_request(); - let output_item = req.function_call_output(call_id); - assert_eq!( - output_item.get("call_id").and_then(Value::as_str), - Some(call_id) - ); - let output_text = extract_output_text(&output_item).expect("output text present"); + let (output_text, success_flag) = call_output(&req, call_id); assert!( output_text.contains("apply_patch verification failed"), @@ -453,12 +438,7 @@ async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> { "expected parse diagnostics in output text, got {output_text:?}" ); - if let Some(success_flag) = output_item - .get("output") - .and_then(|value| value.as_object()) - .and_then(|obj| obj.get("success")) - .and_then(serde_json::Value::as_bool) - { + if let Some(success_flag) = success_flag { assert!( !success_flag, "expected tool output to mark success=false for parse failures" diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index 2e92e6596..ca844db4c 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -10,11 +10,7 @@ use anyhow::Result; use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; use codex_core::protocol::AskForApproval; -use codex_core::protocol::EventMsg; -use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; -use codex_protocol::config_types::ReasoningSummary; -use codex_protocol::user_input::UserInput; use core_test_support::assert_regex_match; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -26,44 +22,11 @@ use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; -use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; -use core_test_support::wait_for_event; use regex_lite::Regex; use serde_json::Value; use serde_json::json; -async fn submit_turn( - test: &TestCodex, - prompt: &str, - approval_policy: AskForApproval, - sandbox_policy: SandboxPolicy, -) -> Result<()> { - let session_model = test.session_configured.model.clone(); - - test.codex - .submit(Op::UserTurn { - items: vec![UserInput::Text { - text: prompt.into(), - }], - final_output_json_schema: None, - cwd: test.cwd.path().to_path_buf(), - approval_policy, - sandbox_policy, - model: session_model, - effort: None, - summary: ReasoningSummary::Auto, - }) - .await?; - - wait_for_event(&test.codex, |event| { - matches!(event, EventMsg::TaskComplete(_)) - }) - .await; - - Ok(()) -} - fn tool_names(body: &Value) -> Vec { body.get("tools") .and_then(Value::as_array) @@ -110,8 +73,7 @@ async fn custom_tool_unknown_returns_custom_output_error() -> Result<()> { ) .await; - submit_turn( - &test, + test.submit_turn_with_policies( "invoke custom tool", AskForApproval::Never, SandboxPolicy::DangerFullAccess, @@ -189,8 +151,7 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> { ) .await; - submit_turn( - &test, + test.submit_turn_with_policies( "run the shell command", AskForApproval::Never, SandboxPolicy::DangerFullAccess, @@ -202,24 +163,22 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> { "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}" ); - let blocked_item = second_mock + let blocked_output = second_mock .single_request() - .function_call_output(call_id_blocked); + .function_call_output_content_and_success(call_id_blocked) + .and_then(|(content, _)| content) + .expect("blocked output string"); assert_eq!( - blocked_item.get("output").and_then(Value::as_str), - Some(expected_message.as_str()), + blocked_output, expected_message, "unexpected rejection message" ); - let success_item = third_mock + let success_output = third_mock .single_request() - .function_call_output(call_id_success); - let output_json: Value = serde_json::from_str( - success_item - .get("output") - .and_then(Value::as_str) - .expect("success output string"), - )?; + .function_call_output_content_and_success(call_id_success) + .and_then(|(content, _)| content) + .expect("success output string"); + let output_json: Value = serde_json::from_str(&success_output)?; assert_eq!( output_json["metadata"]["exit_code"].as_i64(), Some(0), @@ -348,8 +307,7 @@ async fn collect_tools(use_unified_exec: bool) -> Result> { }); let test = builder.build(&server).await?; - submit_turn( - &test, + test.submit_turn_with_policies( "list tools", AskForApproval::Never, SandboxPolicy::DangerFullAccess, @@ -423,8 +381,7 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { ) .await; - submit_turn( - &test, + test.submit_turn_with_policies( "run a long command", AskForApproval::Never, SandboxPolicy::DangerFullAccess, @@ -600,8 +557,7 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> { ) .await; - submit_turn( - &test, + test.submit_turn_with_policies( "spawn a missing binary", AskForApproval::Never, SandboxPolicy::DangerFullAccess, diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index ae9af966f..00931ebce 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -45,14 +45,6 @@ fn find_image_message(body: &Value) -> Option<&Value> { }) } -fn extract_output_text(item: &Value) -> Option<&str> { - item.get("output").and_then(|value| match value { - Value::String(text) => Some(text.as_str()), - Value::Object(obj) => obj.get("content").and_then(Value::as_str), - _ => None, - }) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); @@ -207,10 +199,12 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> { assert_eq!(tool_event.call_id, call_id); assert_eq!(tool_event.path, abs_path); - let body = mock.single_request().body_json(); - let output_item = mock.single_request().function_call_output(call_id); - - let output_text = extract_output_text(&output_item).expect("output text present"); + let req = mock.single_request(); + let body = req.body_json(); + let output_text = req + .function_call_output_content_and_success(call_id) + .and_then(|(content, _)| content) + .expect("output text present"); assert_eq!(output_text, "attached local image path"); let image_message = @@ -299,9 +293,12 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> { wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; - let body_with_tool_output = mock.single_request().body_json(); - let output_item = mock.single_request().function_call_output(call_id); - let output_text = extract_output_text(&output_item).expect("output text present"); + let req = mock.single_request(); + let body_with_tool_output = req.body_json(); + let output_text = req + .function_call_output_content_and_success(call_id) + .and_then(|(content, _)| content) + .expect("output text present"); let expected_message = format!("image path `{}` is not a file", abs_path.display()); assert_eq!(output_text, expected_message); @@ -398,8 +395,11 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> "placeholder should mention path: {placeholder}" ); - let output_item = mock.single_request().function_call_output(call_id); - let output_text = extract_output_text(&output_item).expect("output text present"); + let output_text = mock + .single_request() + .function_call_output_content_and_success(call_id) + .and_then(|(content, _)| content) + .expect("output text present"); assert_eq!(output_text, "attached local image path"); Ok(()) @@ -456,9 +456,12 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> { wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; - let body_with_tool_output = mock.single_request().body_json(); - let output_item = mock.single_request().function_call_output(call_id); - let output_text = extract_output_text(&output_item).expect("output text present"); + let req = mock.single_request(); + let body_with_tool_output = req.body_json(); + let output_text = req + .function_call_output_content_and_success(call_id) + .and_then(|(content, _)| content) + .expect("output text present"); let expected_prefix = format!("unable to locate image at `{}`:", abs_path.display()); assert!( output_text.starts_with(&expected_prefix),