From 06704b1a0fff5bfaf500c8a3420bad3432754cc9 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Mon, 8 Dec 2025 16:00:24 -0800 Subject: [PATCH] fix: pre-main hardening logic must tolerate non-UTF-8 env vars (#7749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We received a bug report that Codex CLI crashes when an env var contains a non-ASCII character, or more specifically, cannot be decoded as UTF-8: ```shell $ RUST_BACKTRACE=full RÖDBURK=1 codex thread '' panicked at library/std/src/env.rs:162:57: called `Result::unwrap()` on an `Err` value: "RÃ\xB6DBURK" stack backtrace: 0: 0x101905c18 - __mh_execute_header 1: 0x1012bd76c - __mh_execute_header 2: 0x1019050e4 - __mh_execute_header 3: 0x101905ad8 - __mh_execute_header 4: 0x101905874 - __mh_execute_header 5: 0x101904f38 - __mh_execute_header 6: 0x1019347bc - __mh_execute_header 7: 0x10193472c - __mh_execute_header 8: 0x101937884 - __mh_execute_header 9: 0x101b3bcd0 - __mh_execute_header 10: 0x101b3c0bc - __mh_execute_header 11: 0x101927a20 - __mh_execute_header 12: 0x1005c58d8 - __mh_execute_header thread '' panicked at library/core/src/panicking.rs:225:5: panic in a function that cannot unwind stack backtrace: 0: 0x101905c18 - __mh_execute_header 1: 0x1012bd76c - __mh_execute_header 2: 0x1019050e4 - __mh_execute_header 3: 0x101905ad8 - __mh_execute_header 4: 0x101905874 - __mh_execute_header 5: 0x101904f38 - __mh_execute_header 6: 0x101934794 - __mh_execute_header 7: 0x10193472c - __mh_execute_header 8: 0x101937884 - __mh_execute_header 9: 0x101b3c144 - __mh_execute_header 10: 0x101b3c1a0 - __mh_execute_header 11: 0x101b3c158 - __mh_execute_header 12: 0x1005c5ef8 - __mh_execute_header thread caused non-unwinding panic. aborting. ``` I discovered I could reproduce this on a release build, but not a dev build, so between that and the unhelpful stack trace, my mind went to the pre-`main()` logic we run in prod builds. Sure enough, we were operating on `std::env::vars()` instead of `std::env::vars_os()`, which is why the non-UTF-8 environment variable was causing an issue. This PR updates the logic to use `std::env::vars_os()` and adds a unit test. And to be extra sure, I also verified the fix works with a local release build: ``` $ cargo build --bin codex --release $ RÖDBURK=1 ./target/release/codex --version codex-cli 0.0.0 ``` --- codex-rs/Cargo.lock | 1 + codex-rs/process-hardening/Cargo.toml | 3 + codex-rs/process-hardening/src/lib.rs | 98 +++++++++++++++++++-------- 3 files changed, 75 insertions(+), 27 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 3e7f0fd8b..cfb466974 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1483,6 +1483,7 @@ name = "codex-process-hardening" version = "0.0.0" dependencies = [ "libc", + "pretty_assertions", ] [[package]] diff --git a/codex-rs/process-hardening/Cargo.toml b/codex-rs/process-hardening/Cargo.toml index 2a867572d..7cc88ed60 100644 --- a/codex-rs/process-hardening/Cargo.toml +++ b/codex-rs/process-hardening/Cargo.toml @@ -13,3 +13,6 @@ workspace = true [dependencies] libc = { workspace = true } + +[dev-dependencies] +pretty_assertions = { workspace = true } diff --git a/codex-rs/process-hardening/src/lib.rs b/codex-rs/process-hardening/src/lib.rs index 772647671..fb6145f17 100644 --- a/codex-rs/process-hardening/src/lib.rs +++ b/codex-rs/process-hardening/src/lib.rs @@ -1,3 +1,9 @@ +#[cfg(unix)] +use std::ffi::OsString; + +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; + /// This is designed to be called pre-main() (using `#[ctor::ctor]`) to perform /// various process hardening steps, such as /// - disabling core dumps @@ -51,15 +57,7 @@ pub(crate) fn pre_main_hardening_linux() { // Official Codex releases are MUSL-linked, which means that variables such // as LD_PRELOAD are ignored anyway, but just to be sure, clear them here. - let ld_keys: Vec = std::env::vars() - .filter_map(|(key, _)| { - if key.starts_with("LD_") { - Some(key) - } else { - None - } - }) - .collect(); + let ld_keys = env_keys_with_prefix(std::env::vars_os(), b"LD_"); for key in ld_keys { unsafe { @@ -73,15 +71,7 @@ pub(crate) fn pre_main_hardening_bsd() { // FreeBSD/OpenBSD: set RLIMIT_CORE to 0 and clear LD_* env vars set_core_file_size_limit_to_zero(); - let ld_keys: Vec = std::env::vars() - .filter_map(|(key, _)| { - if key.starts_with("LD_") { - Some(key) - } else { - None - } - }) - .collect(); + let ld_keys = env_keys_with_prefix(std::env::vars_os(), b"LD_"); for key in ld_keys { unsafe { std::env::remove_var(key); @@ -106,15 +96,7 @@ pub(crate) fn pre_main_hardening_macos() { // Remove all DYLD_ environment variables, which can be used to subvert // library loading. - let dyld_keys: Vec = std::env::vars() - .filter_map(|(key, _)| { - if key.starts_with("DYLD_") { - Some(key) - } else { - None - } - }) - .collect(); + let dyld_keys = env_keys_with_prefix(std::env::vars_os(), b"DYLD_"); for key in dyld_keys { unsafe { @@ -144,3 +126,65 @@ fn set_core_file_size_limit_to_zero() { pub(crate) fn pre_main_hardening_windows() { // TODO(mbolin): Perform the appropriate configuration for Windows. } + +#[cfg(unix)] +fn env_keys_with_prefix(vars: I, prefix: &[u8]) -> Vec +where + I: IntoIterator, +{ + vars.into_iter() + .filter_map(|(key, _)| { + key.as_os_str() + .as_bytes() + .starts_with(prefix) + .then_some(key) + }) + .collect() +} + +#[cfg(all(test, unix))] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + use std::os::unix::ffi::OsStringExt; + + #[test] + fn env_keys_with_prefix_handles_non_utf8_entries() { + // RÖDBURK + let non_utf8_key1 = OsStr::from_bytes(b"R\xD6DBURK").to_os_string(); + assert!(non_utf8_key1.clone().into_string().is_err()); + let non_utf8_key2 = OsString::from_vec(vec![b'L', b'D', b'_', 0xF0]); + assert!(non_utf8_key2.clone().into_string().is_err()); + + let non_utf8_value = OsString::from_vec(vec![0xF0, 0x9F, 0x92, 0xA9]); + + let keys = env_keys_with_prefix( + vec![ + (non_utf8_key1, non_utf8_value.clone()), + (non_utf8_key2.clone(), non_utf8_value), + ], + b"LD_", + ); + assert_eq!( + keys, + vec![non_utf8_key2], + "non-UTF-8 env entries with LD_ prefix should be retained" + ); + } + + #[test] + fn env_keys_with_prefix_filters_only_matching_keys() { + let ld_test_var = OsStr::from_bytes(b"LD_TEST"); + let vars = vec![ + (OsString::from("PATH"), OsString::from("/usr/bin")), + (ld_test_var.to_os_string(), OsString::from("1")), + (OsString::from("DYLD_FOO"), OsString::from("bar")), + ]; + + let keys = env_keys_with_prefix(vars, b"LD_"); + assert_eq!(keys.len(), 1); + assert_eq!(keys[0].as_os_str(), ld_test_var); + } +}