From c67120f4a08148de160dbbb044af4cbc2dcecd9e Mon Sep 17 00:00:00 2001 From: jif-oai Date: Thu, 5 Feb 2026 10:30:18 +0000 Subject: [PATCH] fix: flaky landlock (#10689) https://openai.slack.com/archives/C095U48JNL9/p1770243347893959 --- .../suite/codex_message_processor_flow.rs | 1 + .../app-server/tests/suite/v2/turn_start.rs | 21 ++++- codex-rs/exec/tests/suite/sandbox.rs | 82 ++++++++++++++++++- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs index a041b8b0b..2debbda65 100644 --- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs +++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs @@ -529,6 +529,7 @@ fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<() r#" model = "mock-model" approval_policy = "untrusted" +sandbox_mode = "danger-full-access" model_provider = "mock_provider" diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index 87ae4cfaa..395561f69 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -1716,11 +1716,12 @@ async fn command_execution_notifications_include_process_id() -> Result<()> { ]; let server = create_mock_responses_server_sequence(responses).await; let codex_home = TempDir::new()?; - create_config_toml( + create_config_toml_with_sandbox( codex_home.path(), &server.uri(), "never", &BTreeMap::from([(Feature::UnifiedExec, true)]), + "danger-full-access", )?; let mut mcp = McpProcess::new(codex_home.path()).await?; @@ -1847,6 +1848,22 @@ fn create_config_toml( server_uri: &str, approval_policy: &str, feature_flags: &BTreeMap, +) -> std::io::Result<()> { + create_config_toml_with_sandbox( + codex_home, + server_uri, + approval_policy, + feature_flags, + "read-only", + ) +} + +fn create_config_toml_with_sandbox( + codex_home: &Path, + server_uri: &str, + approval_policy: &str, + feature_flags: &BTreeMap, + sandbox_mode: &str, ) -> std::io::Result<()> { let mut features = BTreeMap::from([(Feature::RemoteModels, false)]); for (feature, enabled) in feature_flags { @@ -1871,7 +1888,7 @@ fn create_config_toml( r#" model = "mock-model" approval_policy = "{approval_policy}" -sandbox_mode = "read-only" +sandbox_mode = "{sandbox_mode}" model_provider = "mock_provider" diff --git a/codex-rs/exec/tests/suite/sandbox.rs b/codex-rs/exec/tests/suite/sandbox.rs index 2d0ad42ca..ab8d3868d 100644 --- a/codex-rs/exec/tests/suite/sandbox.rs +++ b/codex-rs/exec/tests/suite/sandbox.rs @@ -57,9 +57,69 @@ async fn spawn_command_under_sandbox( .await } +#[cfg(target_os = "linux")] +/// Determines whether Linux sandbox tests can run on this host. +/// +/// These tests require an enforceable filesystem sandbox. We run a tiny command +/// under the production Landlock path and skip when enforcement is unavailable +/// (for example on kernels or container profiles where Landlock is not +/// enforced). +async fn linux_sandbox_test_env() -> Option> { + let command_cwd = std::env::current_dir().ok()?; + let sandbox_cwd = command_cwd.clone(); + let policy = SandboxPolicy::ReadOnly; + + if can_apply_linux_sandbox_policy(&policy, &command_cwd, sandbox_cwd.as_path(), HashMap::new()) + .await + { + return Some(HashMap::new()); + } + + eprintln!("Skipping test: Landlock is not enforceable on this host."); + None +} + +#[cfg(target_os = "linux")] +/// Returns whether a minimal command can run successfully with the requested +/// Linux sandbox policy applied. +/// +/// This is used as a capability probe so sandbox behavior tests only run when +/// Landlock enforcement is actually active. +async fn can_apply_linux_sandbox_policy( + policy: &SandboxPolicy, + command_cwd: &Path, + sandbox_cwd: &Path, + env: HashMap, +) -> bool { + let spawn_result = spawn_command_under_sandbox( + vec!["/usr/bin/true".to_string()], + command_cwd.to_path_buf(), + policy, + sandbox_cwd, + StdioPolicy::RedirectForShellTool, + env, + ) + .await; + let Ok(mut child) = spawn_result else { + return false; + }; + child + .wait() + .await + .map(|status| status.success()) + .unwrap_or(false) +} + #[tokio::test] async fn python_multiprocessing_lock_works_under_sandbox() { core_test_support::skip_if_sandbox!(); + #[cfg(target_os = "linux")] + let sandbox_env = match linux_sandbox_test_env().await { + Some(env) => env, + None => return, + }; + #[cfg(not(target_os = "linux"))] + let sandbox_env = HashMap::new(); #[cfg(target_os = "macos")] let writable_roots = Vec::::new(); @@ -103,7 +163,7 @@ if __name__ == '__main__': &policy, sandbox_cwd.as_path(), StdioPolicy::Inherit, - HashMap::new(), + sandbox_env, ) .await .expect("should be able to spawn python under sandbox"); @@ -115,6 +175,13 @@ if __name__ == '__main__': #[tokio::test] async fn python_getpwuid_works_under_sandbox() { core_test_support::skip_if_sandbox!(); + #[cfg(target_os = "linux")] + let sandbox_env = match linux_sandbox_test_env().await { + Some(env) => env, + None => return, + }; + #[cfg(not(target_os = "linux"))] + let sandbox_env = HashMap::new(); if std::process::Command::new("python3") .arg("--version") @@ -139,7 +206,7 @@ async fn python_getpwuid_works_under_sandbox() { &policy, sandbox_cwd.as_path(), StdioPolicy::RedirectForShellTool, - HashMap::new(), + sandbox_env, ) .await .expect("should be able to spawn python under sandbox"); @@ -154,6 +221,13 @@ async fn python_getpwuid_works_under_sandbox() { #[tokio::test] async fn sandbox_distinguishes_command_and_policy_cwds() { core_test_support::skip_if_sandbox!(); + #[cfg(target_os = "linux")] + let sandbox_env = match linux_sandbox_test_env().await { + Some(env) => env, + None => return, + }; + #[cfg(not(target_os = "linux"))] + let sandbox_env = HashMap::new(); let temp = tempfile::tempdir().expect("should be able to create temp dir"); let sandbox_root = temp.path().join("sandbox"); let command_root = temp.path().join("command"); @@ -187,7 +261,7 @@ async fn sandbox_distinguishes_command_and_policy_cwds() { &policy, canonical_sandbox_root.as_path(), StdioPolicy::Inherit, - HashMap::new(), + sandbox_env.clone(), ) .await .expect("should spawn command writing to forbidden path"); @@ -218,7 +292,7 @@ async fn sandbox_distinguishes_command_and_policy_cwds() { &policy, canonical_sandbox_root.as_path(), StdioPolicy::Inherit, - HashMap::new(), + sandbox_env, ) .await .expect("should spawn command writing to sandbox root");