diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index 1a0a69035..539e9cb8e 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -47,6 +47,11 @@ jobs: steps: - uses: actions/checkout@v6 + - name: Set up Node.js for js_repl tests + uses: actions/setup-node@v6 + with: + node-version-file: codex-rs/node-version.txt + # Some integration tests rely on DotSlash being installed. # See https://github.com/openai/codex/pull/7617. - name: Install DotSlash @@ -156,6 +161,13 @@ jobs: --build_metadata=VISIBILITY=PUBLIC ) + if [[ "${RUNNER_OS:-}" != "Windows" ]]; then + # Bazel test sandboxes on macOS may resolve an older Homebrew `node` + # before the `actions/setup-node` runtime on PATH. + node_bin="$(which node)" + bazel_args+=("--test_env=CODEX_JS_REPL_NODE_PATH=${node_bin}") + fi + if [[ -n "${BUILDBUDDY_API_KEY:-}" ]]; then echo "BuildBuddy API key is available; using remote Bazel configuration." # Work around Bazel 9 remote repo contents cache / overlay materialization failures diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 8e8825e9f..3122a965a 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -499,6 +499,10 @@ jobs: steps: - uses: actions/checkout@v6 + - name: Set up Node.js for js_repl tests + uses: actions/setup-node@v6 + with: + node-version-file: codex-rs/node-version.txt - name: Install Linux build dependencies if: ${{ runner.os == 'Linux' }} shell: bash diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs index 18695ab88..3ae9cff62 100644 --- a/codex-rs/core/src/tools/js_repl/mod.rs +++ b/codex-rs/core/src/tools/js_repl/mod.rs @@ -1636,6 +1636,12 @@ mod tests { } async fn can_run_js_repl_runtime_tests() -> bool { + // These white-box runtime tests rely on the unit-test harness and are + // only required on macOS. Linux uses the codex-linux-sandbox arg0 + // dispatch path, which is exercised in integration tests instead. + if !cfg!(target_os = "macos") { + return false; + } if std::env::var_os("CODEX_SANDBOX").is_some() { return false; } @@ -1669,47 +1675,6 @@ mod tests { Ok(()) } - #[tokio::test] - async fn js_repl_persists_top_level_bindings_and_supports_tla() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await { - return Ok(()); - } - - let (session, turn) = make_session_and_context().await; - let session = Arc::new(session); - let turn = Arc::new(turn); - let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); - let manager = turn.js_repl.manager().await?; - - let first = manager - .execute( - Arc::clone(&session), - Arc::clone(&turn), - Arc::clone(&tracker), - JsReplArgs { - code: "let x = await Promise.resolve(41); console.log(x);".to_string(), - timeout_ms: Some(10_000), - }, - ) - .await?; - assert!(first.output.contains("41")); - - let second = manager - .execute( - Arc::clone(&session), - Arc::clone(&turn), - Arc::clone(&tracker), - JsReplArgs { - code: "console.log(x + 1);".to_string(), - timeout_ms: Some(10_000), - }, - ) - .await?; - - assert!(second.output.contains("42")); - Ok(()) - } - #[tokio::test] async fn js_repl_timeout_does_not_deadlock() -> anyhow::Result<()> { if !can_run_js_repl_runtime_tests().await { @@ -1965,108 +1930,9 @@ mod tests { Ok(()) } - #[tokio::test] - async fn js_repl_can_call_tools() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await { - return Ok(()); - } - - let (session, mut turn) = make_session_and_context().await; - turn.approval_policy - .set(AskForApproval::Never) - .expect("test setup should allow updating approval policy"); - turn.sandbox_policy - .set(SandboxPolicy::DangerFullAccess) - .expect("test setup should allow updating sandbox policy"); - - let session = Arc::new(session); - let turn = Arc::new(turn); - let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); - let manager = turn.js_repl.manager().await?; - - let shell = manager - .execute( - Arc::clone(&session), - Arc::clone(&turn), - Arc::clone(&tracker), - JsReplArgs { - code: "const shellOut = await codex.tool(\"shell_command\", { command: \"printf js_repl_shell_ok\" }); console.log(JSON.stringify(shellOut));".to_string(), - timeout_ms: Some(15_000), - }, - ) - .await?; - assert!(shell.output.contains("js_repl_shell_ok")); - - let tool = manager - .execute( - Arc::clone(&session), - Arc::clone(&turn), - Arc::clone(&tracker), - JsReplArgs { - code: "const toolOut = await codex.tool(\"list_mcp_resources\", {}); console.log(toolOut.type);".to_string(), - timeout_ms: Some(15_000), - }, - ) - .await?; - assert!(tool.output.contains("function_call_output")); - Ok(()) - } - - #[tokio::test] - async fn js_repl_tool_call_rejects_recursive_js_repl_invocation() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await { - return Ok(()); - } - - let (session, mut turn) = make_session_and_context().await; - turn.approval_policy - .set(AskForApproval::Never) - .expect("test setup should allow updating approval policy"); - turn.sandbox_policy - .set(SandboxPolicy::DangerFullAccess) - .expect("test setup should allow updating sandbox policy"); - - let session = Arc::new(session); - let turn = Arc::new(turn); - let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); - let manager = turn.js_repl.manager().await?; - - let result = manager - .execute( - session, - turn, - tracker, - JsReplArgs { - code: r#" -try { - await codex.tool("js_repl", "console.log('recursive')"); - console.log("unexpected-success"); -} catch (err) { - console.log(String(err)); -} -"# - .to_string(), - timeout_ms: Some(15_000), - }, - ) - .await?; - - assert!( - result.output.contains("js_repl cannot invoke itself"), - "expected recursion guard message, got output: {}", - result.output - ); - assert!( - !result.output.contains("unexpected-success"), - "recursive js_repl tool call unexpectedly succeeded: {}", - result.output - ); - Ok(()) - } - #[tokio::test] async fn js_repl_waits_for_unawaited_tool_calls_before_completion() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await || cfg!(windows) { + if !can_run_js_repl_runtime_tests().await { return Ok(()); } @@ -2277,65 +2143,6 @@ console.log(out.type); Ok(()) } - - #[tokio::test] - async fn js_repl_does_not_expose_process_global() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await { - return Ok(()); - } - - let (session, turn) = make_session_and_context().await; - let session = Arc::new(session); - let turn = Arc::new(turn); - let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); - let manager = turn.js_repl.manager().await?; - - let result = manager - .execute( - session, - turn, - tracker, - JsReplArgs { - code: "console.log(typeof process);".to_string(), - timeout_ms: Some(10_000), - }, - ) - .await?; - assert!(result.output.contains("undefined")); - Ok(()) - } - - #[tokio::test] - async fn js_repl_blocks_sensitive_builtin_imports() -> anyhow::Result<()> { - if !can_run_js_repl_runtime_tests().await { - return Ok(()); - } - - let (session, turn) = make_session_and_context().await; - let session = Arc::new(session); - let turn = Arc::new(turn); - let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); - let manager = turn.js_repl.manager().await?; - - let err = manager - .execute( - session, - turn, - tracker, - JsReplArgs { - code: "await import(\"node:process\");".to_string(), - timeout_ms: Some(10_000), - }, - ) - .await - .expect_err("node:process import should be blocked"); - assert!( - err.to_string() - .contains("Importing module \"node:process\" is not allowed in js_repl") - ); - Ok(()) - } - #[tokio::test] async fn js_repl_prefers_env_node_module_dirs_over_config() -> anyhow::Result<()> { if !can_run_js_repl_runtime_tests().await { diff --git a/codex-rs/core/tests/suite/js_repl.rs b/codex-rs/core/tests/suite/js_repl.rs new file mode 100644 index 000000000..364b07726 --- /dev/null +++ b/codex-rs/core/tests/suite/js_repl.rs @@ -0,0 +1,267 @@ +#![allow(clippy::expect_used, clippy::unwrap_used)] + +use anyhow::Result; +use codex_core::features::Feature; +use core_test_support::responses; +use core_test_support::responses::ResponseMock; +use core_test_support::responses::ResponsesRequest; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_custom_tool_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::sse; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use which::which; +use wiremock::MockServer; + +fn custom_tool_output_text_and_success( + req: &ResponsesRequest, + call_id: &str, +) -> (String, Option) { + let (output, success) = req + .custom_tool_call_output_content_and_success(call_id) + .expect("custom tool output should be present"); + (output.unwrap_or_default(), success) +} + +async fn run_js_repl_turn( + server: &MockServer, + prompt: &str, + calls: &[(&str, &str)], +) -> Result { + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::JsRepl); + }); + let test = builder.build(server).await?; + + let mut first_events = vec![ev_response_created("resp-1")]; + for (call_id, js_input) in calls { + first_events.push(ev_custom_tool_call(call_id, "js_repl", js_input)); + } + first_events.push(ev_completed("resp-1")); + responses::mount_sse_once(server, sse(first_events)).await; + + let second_mock = responses::mount_sse_once( + server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + test.submit_turn(prompt).await?; + Ok(second_mock) +} + +fn can_run_js_repl_runtime_tests() -> bool { + // In the base CI-stabilization commit, skip these integration tests only + // when the js_repl Node runtime is not available. A later stacked commit + // removes this automatic skip and requires them everywhere. + if let Some(path) = std::env::var_os("CODEX_JS_REPL_NODE_PATH") + && std::path::Path::new(&path).exists() + { + return true; + } + which("node").is_ok() +} +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_persists_top_level_bindings_and_supports_tla() -> Result<()> { + skip_if_no_network!(Ok(())); + + if !can_run_js_repl_runtime_tests() { + return Ok(()); + } + + let server = responses::start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::JsRepl); + }); + let test = builder.build(&server).await?; + + responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call( + "call-1", + "js_repl", + "let x = await Promise.resolve(41); console.log(x);", + ), + ev_completed("resp-1"), + ]), + ) + .await; + let second_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-2"), + ev_custom_tool_call("call-2", "js_repl", "console.log(x + 1);"), + ev_completed("resp-2"), + ]), + ) + .await; + let third_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-3"), + ]), + ) + .await; + + test.submit_turn("run js_repl twice").await?; + + let req2 = second_mock.single_request(); + let (first_output, first_success) = custom_tool_output_text_and_success(&req2, "call-1"); + assert_ne!( + first_success, + Some(false), + "first js_repl call failed unexpectedly: {first_output}" + ); + assert!(first_output.contains("41")); + + let req3 = third_mock.single_request(); + let (second_output, second_success) = custom_tool_output_text_and_success(&req3, "call-2"); + assert_ne!( + second_success, + Some(false), + "second js_repl call failed unexpectedly: {second_output}" + ); + assert!(second_output.contains("42")); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_can_invoke_builtin_tools() -> Result<()> { + skip_if_no_network!(Ok(())); + + if !can_run_js_repl_runtime_tests() { + return Ok(()); + } + + let server = responses::start_mock_server().await; + let mock = run_js_repl_turn( + &server, + "use js_repl to call a tool", + &[( + "call-1", + "const toolOut = await codex.tool(\"list_mcp_resources\", {}); console.log(toolOut.type);", + )], + ) + .await?; + + let req = mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "js_repl call failed unexpectedly: {output}" + ); + assert!(output.contains("function_call_output")); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_tool_call_rejects_recursive_js_repl_invocation() -> Result<()> { + skip_if_no_network!(Ok(())); + + if !can_run_js_repl_runtime_tests() { + return Ok(()); + } + + let server = responses::start_mock_server().await; + let mock = run_js_repl_turn( + &server, + "use js_repl recursively", + &[( + "call-1", + r#" +try { + await codex.tool("js_repl", "console.log('recursive')"); + console.log("unexpected-success"); +} catch (err) { + console.log(String(err)); +} +"#, + )], + ) + .await?; + + let req = mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "js_repl call failed unexpectedly: {output}" + ); + assert!( + output.contains("js_repl cannot invoke itself"), + "expected recursion guard message, got output: {output}" + ); + assert!( + !output.contains("unexpected-success"), + "recursive js_repl call unexpectedly succeeded: {output}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_does_not_expose_process_global() -> Result<()> { + skip_if_no_network!(Ok(())); + + if !can_run_js_repl_runtime_tests() { + return Ok(()); + } + + let server = responses::start_mock_server().await; + let mock = run_js_repl_turn( + &server, + "check process visibility", + &[("call-1", "console.log(typeof process);")], + ) + .await?; + + let req = mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "js_repl call failed unexpectedly: {output}" + ); + assert!(output.contains("undefined")); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_blocks_sensitive_builtin_imports() -> Result<()> { + skip_if_no_network!(Ok(())); + + if !can_run_js_repl_runtime_tests() { + return Ok(()); + } + + let server = responses::start_mock_server().await; + let mock = run_js_repl_turn( + &server, + "import a blocked module", + &[("call-1", "await import(\"node:process\");")], + ) + .await?; + + let req = mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(true), + "blocked import unexpectedly succeeded: {output}" + ); + assert!(output.contains("Importing module \"node:process\" is not allowed in js_repl")); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index f3b64e1e7..1f20aeee3 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -78,6 +78,7 @@ mod grep_files; mod hierarchical_agents; mod image_rollout; mod items; +mod js_repl; mod json_result; mod list_dir; mod live_cli;