Add remote env CI matrix and integration test (#14869)
`CODEX_TEST_REMOTE_ENV` will make `test_codex` start the executor "remotely" (inside a docker container) turning any integration test into remote test.
This commit is contained in:
parent
e5f4d1fef5
commit
ba85a58039
10 changed files with 514 additions and 23 deletions
23
.github/workflows/rust-ci.yml
vendored
23
.github/workflows/rust-ci.yml
vendored
|
|
@ -527,7 +527,7 @@ jobs:
|
||||||
key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1
|
key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1
|
||||||
|
|
||||||
tests:
|
tests:
|
||||||
name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}
|
name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.remote_env == 'true' && ' (remote)' || '' }}
|
||||||
runs-on: ${{ matrix.runs_on || matrix.runner }}
|
runs-on: ${{ matrix.runs_on || matrix.runner }}
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
needs: changed
|
needs: changed
|
||||||
|
|
@ -553,6 +553,7 @@ jobs:
|
||||||
- runner: ubuntu-24.04
|
- runner: ubuntu-24.04
|
||||||
target: x86_64-unknown-linux-gnu
|
target: x86_64-unknown-linux-gnu
|
||||||
profile: dev
|
profile: dev
|
||||||
|
remote_env: "true"
|
||||||
runs_on:
|
runs_on:
|
||||||
group: codex-runners
|
group: codex-runners
|
||||||
labels: codex-linux-x64
|
labels: codex-linux-x64
|
||||||
|
|
@ -590,6 +591,7 @@ jobs:
|
||||||
sudo apt-get update -y
|
sudo apt-get update -y
|
||||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Some integration tests rely on DotSlash being installed.
|
# Some integration tests rely on DotSlash being installed.
|
||||||
# See https://github.com/openai/codex/pull/7617.
|
# See https://github.com/openai/codex/pull/7617.
|
||||||
- name: Install DotSlash
|
- name: Install DotSlash
|
||||||
|
|
@ -674,6 +676,15 @@ jobs:
|
||||||
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
|
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Set up remote test env (Docker)
|
||||||
|
if: ${{ runner.os == 'Linux' && matrix.remote_env == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME=codex-remote-test-env
|
||||||
|
source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
|
||||||
|
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
- name: tests
|
- name: tests
|
||||||
id: test
|
id: test
|
||||||
run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test --timings
|
run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test --timings
|
||||||
|
|
@ -726,6 +737,16 @@ jobs:
|
||||||
echo '```';
|
echo '```';
|
||||||
} >> "$GITHUB_STEP_SUMMARY"
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
|
||||||
|
- name: Tear down remote test env
|
||||||
|
if: ${{ always() && runner.os == 'Linux' && matrix.remote_env == 'true' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
if [[ "${{ steps.test.outcome }}" != "success" ]]; then
|
||||||
|
docker logs codex-remote-test-env || true
|
||||||
|
fi
|
||||||
|
docker rm -f codex-remote-test-env >/dev/null 2>&1 || true
|
||||||
|
|
||||||
- name: verify tests passed
|
- name: verify tests passed
|
||||||
if: steps.test.outcome == 'failure'
|
if: steps.test.outcome == 'failure'
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
|
|
@ -3121,6 +3121,7 @@ dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"codex-arg0",
|
"codex-arg0",
|
||||||
"codex-core",
|
"codex-core",
|
||||||
|
"codex-exec-server",
|
||||||
"codex-features",
|
"codex-features",
|
||||||
"codex-protocol",
|
"codex-protocol",
|
||||||
"codex-utils-absolute-path",
|
"codex-utils-absolute-path",
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ assert_cmd = { workspace = true }
|
||||||
base64 = { workspace = true }
|
base64 = { workspace = true }
|
||||||
codex-arg0 = { workspace = true }
|
codex-arg0 = { workspace = true }
|
||||||
codex-core = { workspace = true }
|
codex-core = { workspace = true }
|
||||||
|
codex-exec-server = { workspace = true }
|
||||||
codex-features = { workspace = true }
|
codex-features = { workspace = true }
|
||||||
codex-protocol = { workspace = true }
|
codex-protocol = { workspace = true }
|
||||||
codex-utils-absolute-path = { workspace = true }
|
codex-utils-absolute-path = { workspace = true }
|
||||||
|
|
|
||||||
|
|
@ -289,6 +289,29 @@ pub fn sandbox_network_env_var() -> &'static str {
|
||||||
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
|
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const REMOTE_ENV_ENV_VAR: &str = "CODEX_TEST_REMOTE_ENV";
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct RemoteEnvConfig {
|
||||||
|
pub container_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_remote_test_env() -> Option<RemoteEnvConfig> {
|
||||||
|
if std::env::var_os(REMOTE_ENV_ENV_VAR).is_none() {
|
||||||
|
eprintln!("Skipping test because {REMOTE_ENV_ENV_VAR} is not set.");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let container_name = std::env::var(REMOTE_ENV_ENV_VAR)
|
||||||
|
.unwrap_or_else(|_| panic!("{REMOTE_ENV_ENV_VAR} must be set"));
|
||||||
|
assert!(
|
||||||
|
!container_name.trim().is_empty(),
|
||||||
|
"{REMOTE_ENV_ENV_VAR} must not be empty"
|
||||||
|
);
|
||||||
|
|
||||||
|
Some(RemoteEnvConfig { container_name })
|
||||||
|
}
|
||||||
|
|
||||||
pub fn format_with_current_shell(command: &str) -> Vec<String> {
|
pub fn format_with_current_shell(command: &str) -> Vec<String> {
|
||||||
codex_core::shell::default_user_shell().derive_exec_args(command, /*use_login_shell*/ true)
|
codex_core::shell::default_user_shell().derive_exec_args(command, /*use_login_shell*/ true)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,16 @@
|
||||||
use std::mem::swap;
|
use std::mem::swap;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::process::Command;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::AtomicU64;
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use anyhow::anyhow;
|
||||||
use codex_core::CodexAuth;
|
use codex_core::CodexAuth;
|
||||||
use codex_core::CodexThread;
|
use codex_core::CodexThread;
|
||||||
use codex_core::ModelProviderInfo;
|
use codex_core::ModelProviderInfo;
|
||||||
|
|
@ -14,6 +20,8 @@ use codex_core::config::Config;
|
||||||
use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
||||||
use codex_core::shell::Shell;
|
use codex_core::shell::Shell;
|
||||||
use codex_core::shell::get_shell_by_model_provided_path;
|
use codex_core::shell::get_shell_by_model_provided_path;
|
||||||
|
use codex_exec_server::CreateDirectoryOptions;
|
||||||
|
use codex_exec_server::ExecutorFileSystem;
|
||||||
use codex_features::Feature;
|
use codex_features::Feature;
|
||||||
use codex_protocol::config_types::ServiceTier;
|
use codex_protocol::config_types::ServiceTier;
|
||||||
use codex_protocol::openai_models::ModelsResponse;
|
use codex_protocol::openai_models::ModelsResponse;
|
||||||
|
|
@ -24,10 +32,13 @@ use codex_protocol::protocol::SandboxPolicy;
|
||||||
use codex_protocol::protocol::SessionConfiguredEvent;
|
use codex_protocol::protocol::SessionConfiguredEvent;
|
||||||
use codex_protocol::protocol::SessionSource;
|
use codex_protocol::protocol::SessionSource;
|
||||||
use codex_protocol::user_input::UserInput;
|
use codex_protocol::user_input::UserInput;
|
||||||
|
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use wiremock::MockServer;
|
use wiremock::MockServer;
|
||||||
|
|
||||||
|
use crate::RemoteEnvConfig;
|
||||||
|
use crate::get_remote_test_env;
|
||||||
use crate::load_default_config_for_test;
|
use crate::load_default_config_for_test;
|
||||||
use crate::responses::WebSocketTestServer;
|
use crate::responses::WebSocketTestServer;
|
||||||
use crate::responses::output_value_to_text;
|
use crate::responses::output_value_to_text;
|
||||||
|
|
@ -41,6 +52,254 @@ use wiremock::matchers::path_regex;
|
||||||
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
|
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
|
||||||
type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
|
type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
|
||||||
const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
|
const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
|
||||||
|
const REMOTE_EXEC_SERVER_START_TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
|
const REMOTE_EXEC_SERVER_POLL_INTERVAL: Duration = Duration::from_millis(25);
|
||||||
|
static REMOTE_EXEC_SERVER_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct RemoteExecServerProcess {
|
||||||
|
container_name: String,
|
||||||
|
pid: u32,
|
||||||
|
remote_exec_server_path: String,
|
||||||
|
stdout_path: String,
|
||||||
|
cleanup_paths: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for RemoteExecServerProcess {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let cleanup_paths = self.cleanup_paths.join(" ");
|
||||||
|
let cleanup_paths_script = if cleanup_paths.is_empty() {
|
||||||
|
String::new()
|
||||||
|
} else {
|
||||||
|
format!("rm -rf {cleanup_paths}; ")
|
||||||
|
};
|
||||||
|
let script = format!(
|
||||||
|
"if kill -0 {pid} 2>/dev/null; then kill {pid}; fi; {cleanup_paths_script}rm -f {remote_exec_server_path} {stdout_path}",
|
||||||
|
pid = self.pid,
|
||||||
|
cleanup_paths_script = cleanup_paths_script,
|
||||||
|
remote_exec_server_path = self.remote_exec_server_path,
|
||||||
|
stdout_path = self.stdout_path
|
||||||
|
);
|
||||||
|
let _ = docker_command_capture_stdout(["exec", &self.container_name, "sh", "-lc", &script]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RemoteExecServerProcess {
|
||||||
|
fn register_cleanup_path(&mut self, path: &Path) {
|
||||||
|
self.cleanup_paths.push(path.display().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TestEnv {
|
||||||
|
environment: codex_exec_server::Environment,
|
||||||
|
cwd: PathBuf,
|
||||||
|
_local_cwd_temp_dir: Option<TempDir>,
|
||||||
|
_remote_exec_server_process: Option<RemoteExecServerProcess>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TestEnv {
|
||||||
|
pub async fn local() -> Result<Self> {
|
||||||
|
let local_cwd_temp_dir = TempDir::new()?;
|
||||||
|
let cwd = local_cwd_temp_dir.path().to_path_buf();
|
||||||
|
let environment =
|
||||||
|
codex_exec_server::Environment::create(/*experimental_exec_server_url*/ None).await?;
|
||||||
|
Ok(Self {
|
||||||
|
environment,
|
||||||
|
cwd,
|
||||||
|
_local_cwd_temp_dir: Some(local_cwd_temp_dir),
|
||||||
|
_remote_exec_server_process: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn environment(&self) -> &codex_exec_server::Environment {
|
||||||
|
&self.environment
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn experimental_exec_server_url(&self) -> Option<&str> {
|
||||||
|
self.environment.experimental_exec_server_url()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn test_env() -> Result<TestEnv> {
|
||||||
|
match get_remote_test_env() {
|
||||||
|
Some(remote_env) => {
|
||||||
|
let mut remote_process = start_remote_exec_server(&remote_env)?;
|
||||||
|
let remote_ip = remote_container_ip(&remote_env.container_name)?;
|
||||||
|
let websocket_url = rewrite_websocket_host(&remote_process.listen_url, &remote_ip)?;
|
||||||
|
let environment = codex_exec_server::Environment::create(Some(websocket_url)).await?;
|
||||||
|
let cwd = remote_aware_cwd_path();
|
||||||
|
environment
|
||||||
|
.get_filesystem()
|
||||||
|
.create_directory(
|
||||||
|
&absolute_path(&cwd)?,
|
||||||
|
CreateDirectoryOptions { recursive: true },
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
remote_process.process.register_cleanup_path(&cwd);
|
||||||
|
Ok(TestEnv {
|
||||||
|
environment,
|
||||||
|
cwd,
|
||||||
|
_local_cwd_temp_dir: None,
|
||||||
|
_remote_exec_server_process: Some(remote_process.process),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
None => TestEnv::local().await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RemoteExecServerStart {
|
||||||
|
process: RemoteExecServerProcess,
|
||||||
|
listen_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_remote_exec_server(remote_env: &RemoteEnvConfig) -> Result<RemoteExecServerStart> {
|
||||||
|
let container_name = remote_env.container_name.as_str();
|
||||||
|
let instance_id = remote_exec_server_instance_id();
|
||||||
|
let remote_exec_server_path = format!("/tmp/codex-exec-server-{instance_id}");
|
||||||
|
let stdout_path = format!("/tmp/codex-exec-server-{instance_id}.stdout");
|
||||||
|
let local_binary = codex_utils_cargo_bin::cargo_bin("codex-exec-server")
|
||||||
|
.context("resolve codex-exec-server binary")?;
|
||||||
|
let local_binary = local_binary.to_string_lossy().to_string();
|
||||||
|
let remote_binary = format!("{container_name}:{remote_exec_server_path}");
|
||||||
|
|
||||||
|
docker_command_success(["cp", &local_binary, &remote_binary])?;
|
||||||
|
docker_command_success([
|
||||||
|
"exec",
|
||||||
|
container_name,
|
||||||
|
"chmod",
|
||||||
|
"+x",
|
||||||
|
&remote_exec_server_path,
|
||||||
|
])?;
|
||||||
|
|
||||||
|
let start_script = format!(
|
||||||
|
"rm -f {stdout_path}; \
|
||||||
|
nohup {remote_exec_server_path} --listen ws://0.0.0.0:0 > {stdout_path} 2>&1 & \
|
||||||
|
echo $!"
|
||||||
|
);
|
||||||
|
let pid_output =
|
||||||
|
docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &start_script])?;
|
||||||
|
let pid = pid_output
|
||||||
|
.trim()
|
||||||
|
.parse::<u32>()
|
||||||
|
.with_context(|| format!("parse remote exec-server PID from {pid_output:?}"))?;
|
||||||
|
|
||||||
|
let listen_url = wait_for_remote_listen_url(container_name, &stdout_path)?;
|
||||||
|
|
||||||
|
Ok(RemoteExecServerStart {
|
||||||
|
process: RemoteExecServerProcess {
|
||||||
|
container_name: container_name.to_string(),
|
||||||
|
pid,
|
||||||
|
remote_exec_server_path,
|
||||||
|
stdout_path,
|
||||||
|
cleanup_paths: Vec::new(),
|
||||||
|
},
|
||||||
|
listen_url,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remote_aware_cwd_path() -> PathBuf {
|
||||||
|
PathBuf::from(format!(
|
||||||
|
"/tmp/codex-core-test-cwd-{}",
|
||||||
|
remote_exec_server_instance_id()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn wait_for_remote_listen_url(container_name: &str, stdout_path: &str) -> Result<String> {
|
||||||
|
let deadline = Instant::now() + REMOTE_EXEC_SERVER_START_TIMEOUT;
|
||||||
|
loop {
|
||||||
|
let line = docker_command_capture_stdout([
|
||||||
|
"exec",
|
||||||
|
container_name,
|
||||||
|
"sh",
|
||||||
|
"-lc",
|
||||||
|
&format!("head -n 1 {stdout_path} 2>/dev/null || true"),
|
||||||
|
])?;
|
||||||
|
let listen_url = line.trim();
|
||||||
|
if listen_url.starts_with("ws://") {
|
||||||
|
return Ok(listen_url.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"timed out waiting for remote exec-server listen URL in container `{container_name}` after {REMOTE_EXEC_SERVER_START_TIMEOUT:?}"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
std::thread::sleep(REMOTE_EXEC_SERVER_POLL_INTERVAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remote_exec_server_instance_id() -> String {
|
||||||
|
let instance = REMOTE_EXEC_SERVER_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||||
|
format!("{}-{instance}", std::process::id())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remote_container_ip(container_name: &str) -> Result<String> {
|
||||||
|
let ip = docker_command_capture_stdout([
|
||||||
|
"inspect",
|
||||||
|
"-f",
|
||||||
|
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
|
||||||
|
container_name,
|
||||||
|
])?;
|
||||||
|
let ip = ip.trim();
|
||||||
|
if ip.is_empty() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"container `{container_name}` has no IP address; cannot connect to remote exec-server"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(ip.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rewrite_websocket_host(listen_url: &str, host: &str) -> Result<String> {
|
||||||
|
let Some(address) = listen_url.strip_prefix("ws://") else {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||||
|
));
|
||||||
|
};
|
||||||
|
let Some((_, port)) = address.rsplit_once(':') else {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||||
|
));
|
||||||
|
};
|
||||||
|
Ok(format!("ws://{host}:{port}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn docker_command_success<const N: usize>(args: [&str; N]) -> Result<()> {
|
||||||
|
let output = Command::new("docker")
|
||||||
|
.args(args)
|
||||||
|
.output()
|
||||||
|
.with_context(|| format!("run docker {:?}", args))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"docker {:?} failed: stdout={} stderr={}",
|
||||||
|
args,
|
||||||
|
String::from_utf8_lossy(&output.stdout).trim(),
|
||||||
|
String::from_utf8_lossy(&output.stderr).trim()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<String> {
|
||||||
|
let output = Command::new("docker")
|
||||||
|
.args(args)
|
||||||
|
.output()
|
||||||
|
.with_context(|| format!("run docker {:?}", args))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"docker {:?} failed: stdout={} stderr={}",
|
||||||
|
args,
|
||||||
|
String::from_utf8_lossy(&output.stdout).trim(),
|
||||||
|
String::from_utf8_lossy(&output.stderr).trim()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
String::from_utf8(output.stdout).context("docker stdout must be utf-8")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn absolute_path(path: &Path) -> Result<AbsolutePathBuf> {
|
||||||
|
AbsolutePathBuf::try_from(path.to_path_buf())
|
||||||
|
.map_err(|err| anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
/// A collection of different ways the model can output an apply_patch call
|
/// A collection of different ways the model can output an apply_patch call
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
|
@ -124,6 +383,24 @@ impl TestCodexBuilder {
|
||||||
Box::pin(self.build_with_home(server, home, /*resume_from*/ None)).await
|
Box::pin(self.build_with_home(server, home, /*resume_from*/ None)).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn build_remote_aware(
|
||||||
|
&mut self,
|
||||||
|
server: &wiremock::MockServer,
|
||||||
|
) -> anyhow::Result<TestCodex> {
|
||||||
|
let test_env = test_env().await?;
|
||||||
|
let experimental_exec_server_url =
|
||||||
|
test_env.experimental_exec_server_url().map(str::to_owned);
|
||||||
|
let cwd = test_env.cwd.to_path_buf();
|
||||||
|
self.config_mutators.push(Box::new(move |config| {
|
||||||
|
config.experimental_exec_server_url = experimental_exec_server_url;
|
||||||
|
config.cwd = cwd;
|
||||||
|
}));
|
||||||
|
|
||||||
|
let mut test = self.build(server).await?;
|
||||||
|
test._test_env = test_env;
|
||||||
|
Ok(test)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn build_with_streaming_server(
|
pub async fn build_with_streaming_server(
|
||||||
&mut self,
|
&mut self,
|
||||||
server: &StreamingSseServer,
|
server: &StreamingSseServer,
|
||||||
|
|
@ -176,7 +453,8 @@ impl TestCodexBuilder {
|
||||||
) -> anyhow::Result<TestCodex> {
|
) -> anyhow::Result<TestCodex> {
|
||||||
let base_url = format!("{}/v1", server.uri());
|
let base_url = format!("{}/v1", server.uri());
|
||||||
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
||||||
Box::pin(self.build_from_config(config, cwd, home, resume_from)).await
|
Box::pin(self.build_from_config(config, cwd, home, resume_from, TestEnv::local().await?))
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn build_with_home_and_base_url(
|
async fn build_with_home_and_base_url(
|
||||||
|
|
@ -186,7 +464,8 @@ impl TestCodexBuilder {
|
||||||
resume_from: Option<PathBuf>,
|
resume_from: Option<PathBuf>,
|
||||||
) -> anyhow::Result<TestCodex> {
|
) -> anyhow::Result<TestCodex> {
|
||||||
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
||||||
Box::pin(self.build_from_config(config, cwd, home, resume_from)).await
|
Box::pin(self.build_from_config(config, cwd, home, resume_from, TestEnv::local().await?))
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn build_from_config(
|
async fn build_from_config(
|
||||||
|
|
@ -195,6 +474,7 @@ impl TestCodexBuilder {
|
||||||
cwd: Arc<TempDir>,
|
cwd: Arc<TempDir>,
|
||||||
home: Arc<TempDir>,
|
home: Arc<TempDir>,
|
||||||
resume_from: Option<PathBuf>,
|
resume_from: Option<PathBuf>,
|
||||||
|
test_env: TestEnv,
|
||||||
) -> anyhow::Result<TestCodex> {
|
) -> anyhow::Result<TestCodex> {
|
||||||
let auth = self.auth.clone();
|
let auth = self.auth.clone();
|
||||||
let thread_manager = if config.model_catalog.is_some() {
|
let thread_manager = if config.model_catalog.is_some() {
|
||||||
|
|
@ -258,6 +538,7 @@ impl TestCodexBuilder {
|
||||||
codex: new_conversation.thread,
|
codex: new_conversation.thread,
|
||||||
session_configured: new_conversation.session_configured,
|
session_configured: new_conversation.session_configured,
|
||||||
thread_manager,
|
thread_manager,
|
||||||
|
_test_env: test_env,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -354,6 +635,7 @@ pub struct TestCodex {
|
||||||
pub session_configured: SessionConfiguredEvent,
|
pub session_configured: SessionConfiguredEvent,
|
||||||
pub config: Config,
|
pub config: Config,
|
||||||
pub thread_manager: Arc<ThreadManager>,
|
pub thread_manager: Arc<ThreadManager>,
|
||||||
|
_test_env: TestEnv,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TestCodex {
|
impl TestCodex {
|
||||||
|
|
@ -369,6 +651,14 @@ impl TestCodex {
|
||||||
self.cwd_path().join(rel)
|
self.cwd_path().join(rel)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn executor_environment(&self) -> &TestEnv {
|
||||||
|
&self._test_env
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fs(&self) -> Arc<dyn ExecutorFileSystem> {
|
||||||
|
self._test_env.environment().get_filesystem()
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
|
pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
|
||||||
self.submit_turn_with_policies(
|
self.submit_turn_with_policies(
|
||||||
prompt,
|
prompt,
|
||||||
|
|
@ -431,7 +721,7 @@ impl TestCodex {
|
||||||
text_elements: Vec::new(),
|
text_elements: Vec::new(),
|
||||||
}],
|
}],
|
||||||
final_output_json_schema: None,
|
final_output_json_schema: None,
|
||||||
cwd: self.cwd.path().to_path_buf(),
|
cwd: self.config.cwd.clone(),
|
||||||
approval_policy,
|
approval_policy,
|
||||||
sandbox_policy,
|
sandbox_policy,
|
||||||
model: session_model,
|
model: session_model,
|
||||||
|
|
|
||||||
|
|
@ -2277,14 +2277,9 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
let test = TestCodex {
|
let mut test = base_test;
|
||||||
home: base_test.home,
|
test.codex = new_thread.thread;
|
||||||
cwd: base_test.cwd,
|
test.session_configured = new_thread.session_configured;
|
||||||
codex: new_thread.thread,
|
|
||||||
session_configured: new_thread.session_configured,
|
|
||||||
config: base_test.config,
|
|
||||||
thread_manager: base_test.thread_manager,
|
|
||||||
};
|
|
||||||
|
|
||||||
let code = r#"
|
let code = r#"
|
||||||
import { ALL_TOOLS, hidden_dynamic_tool } from "tools.js";
|
import { ALL_TOOLS, hidden_dynamic_tool } from "tools.js";
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,7 @@ mod prompt_caching;
|
||||||
mod quota_exceeded;
|
mod quota_exceeded;
|
||||||
mod read_file;
|
mod read_file;
|
||||||
mod realtime_conversation;
|
mod realtime_conversation;
|
||||||
|
mod remote_env;
|
||||||
mod remote_models;
|
mod remote_models;
|
||||||
mod request_compression;
|
mod request_compression;
|
||||||
#[cfg(not(target_os = "windows"))]
|
#[cfg(not(target_os = "windows"))]
|
||||||
|
|
|
||||||
57
codex-rs/core/tests/suite/remote_env.rs
Normal file
57
codex-rs/core/tests/suite/remote_env.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use codex_exec_server::RemoveOptions;
|
||||||
|
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||||
|
use core_test_support::get_remote_test_env;
|
||||||
|
use core_test_support::test_codex::test_env;
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
use std::time::UNIX_EPOCH;
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn remote_test_env_can_connect_and_use_filesystem() -> Result<()> {
|
||||||
|
let Some(_remote_env) = get_remote_test_env() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
let test_env = test_env().await?;
|
||||||
|
let file_system = test_env.environment().get_filesystem();
|
||||||
|
|
||||||
|
let file_path = remote_test_file_path();
|
||||||
|
let file_path_abs = absolute_path(file_path.clone())?;
|
||||||
|
let payload = b"remote-test-env-ok".to_vec();
|
||||||
|
|
||||||
|
file_system
|
||||||
|
.write_file(&file_path_abs, payload.clone())
|
||||||
|
.await?;
|
||||||
|
let actual = file_system.read_file(&file_path_abs).await?;
|
||||||
|
assert_eq!(actual, payload);
|
||||||
|
|
||||||
|
file_system
|
||||||
|
.remove(
|
||||||
|
&file_path_abs,
|
||||||
|
RemoveOptions {
|
||||||
|
recursive: false,
|
||||||
|
force: true,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn absolute_path(path: PathBuf) -> Result<AbsolutePathBuf> {
|
||||||
|
AbsolutePathBuf::try_from(path.clone())
|
||||||
|
.map_err(|err| anyhow::anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remote_test_file_path() -> PathBuf {
|
||||||
|
let nanos = match SystemTime::now().duration_since(UNIX_EPOCH) {
|
||||||
|
Ok(duration) => duration.as_nanos(),
|
||||||
|
Err(_) => 0,
|
||||||
|
};
|
||||||
|
PathBuf::from(format!(
|
||||||
|
"/tmp/codex-remote-test-env-{}-{nanos}.txt",
|
||||||
|
std::process::id()
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||||
use codex_core::CodexAuth;
|
use codex_core::CodexAuth;
|
||||||
|
use codex_exec_server::CreateDirectoryOptions;
|
||||||
use codex_features::Feature;
|
use codex_features::Feature;
|
||||||
use codex_protocol::config_types::ReasoningSummary;
|
use codex_protocol::config_types::ReasoningSummary;
|
||||||
use codex_protocol::openai_models::ConfigShellToolType;
|
use codex_protocol::openai_models::ConfigShellToolType;
|
||||||
|
|
@ -32,12 +33,16 @@ use core_test_support::test_codex::TestCodex;
|
||||||
use core_test_support::test_codex::test_codex;
|
use core_test_support::test_codex::test_codex;
|
||||||
use core_test_support::wait_for_event;
|
use core_test_support::wait_for_event;
|
||||||
use core_test_support::wait_for_event_with_timeout;
|
use core_test_support::wait_for_event_with_timeout;
|
||||||
|
use image::DynamicImage;
|
||||||
use image::GenericImageView;
|
use image::GenericImageView;
|
||||||
use image::ImageBuffer;
|
use image::ImageBuffer;
|
||||||
use image::Rgba;
|
use image::Rgba;
|
||||||
use image::load_from_memory;
|
use image::load_from_memory;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
use std::io::Cursor;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::path::PathBuf;
|
||||||
use tokio::time::Duration;
|
use tokio::time::Duration;
|
||||||
use wiremock::BodyPrintLimit;
|
use wiremock::BodyPrintLimit;
|
||||||
use wiremock::MockServer;
|
use wiremock::MockServer;
|
||||||
|
|
@ -73,6 +78,11 @@ fn find_image_message(body: &Value) -> Option<&Value> {
|
||||||
image_messages(body).into_iter().next()
|
image_messages(body).into_iter().next()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn absolute_path(path: &Path) -> anyhow::Result<codex_utils_absolute_path::AbsolutePathBuf> {
|
||||||
|
codex_utils_absolute_path::AbsolutePathBuf::try_from(path.to_path_buf())
|
||||||
|
.map_err(|err| anyhow::anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
|
|
@ -171,23 +181,37 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
|
|
||||||
let server = start_mock_server().await;
|
let server = start_mock_server().await;
|
||||||
|
let mut builder = test_codex();
|
||||||
|
let test = builder.build_remote_aware(&server).await?;
|
||||||
let TestCodex {
|
let TestCodex {
|
||||||
codex,
|
codex,
|
||||||
cwd,
|
|
||||||
session_configured,
|
session_configured,
|
||||||
|
config,
|
||||||
..
|
..
|
||||||
} = test_codex().build(&server).await?;
|
} = &test;
|
||||||
|
let cwd = config.cwd.clone();
|
||||||
|
|
||||||
|
let rel_path = PathBuf::from("assets/example.png");
|
||||||
|
let abs_path = cwd.join(&rel_path);
|
||||||
|
let abs_path_absolute = absolute_path(&abs_path)?;
|
||||||
|
let assets_dir = cwd.join("assets");
|
||||||
|
|
||||||
|
let file_system = test.fs();
|
||||||
|
|
||||||
let rel_path = "assets/example.png";
|
|
||||||
let abs_path = cwd.path().join(rel_path);
|
|
||||||
if let Some(parent) = abs_path.parent() {
|
|
||||||
std::fs::create_dir_all(parent)?;
|
|
||||||
}
|
|
||||||
let original_width = 2304;
|
let original_width = 2304;
|
||||||
let original_height = 864;
|
let original_height = 864;
|
||||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([255u8, 0, 0, 255]));
|
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([255u8, 0, 0, 255]));
|
||||||
image.save(&abs_path)?;
|
let mut cursor = Cursor::new(Vec::new());
|
||||||
|
DynamicImage::ImageRgba8(image).write_to(&mut cursor, image::ImageFormat::Png)?;
|
||||||
|
file_system
|
||||||
|
.create_directory(
|
||||||
|
&absolute_path(&assets_dir)?,
|
||||||
|
CreateDirectoryOptions { recursive: true },
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
file_system
|
||||||
|
.write_file(&abs_path_absolute, cursor.into_inner())
|
||||||
|
.await?;
|
||||||
|
|
||||||
let call_id = "view-image-call";
|
let call_id = "view-image-call";
|
||||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||||
|
|
@ -214,7 +238,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||||
text_elements: Vec::new(),
|
text_elements: Vec::new(),
|
||||||
}],
|
}],
|
||||||
final_output_json_schema: None,
|
final_output_json_schema: None,
|
||||||
cwd: cwd.path().to_path_buf(),
|
cwd: cwd.clone(),
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||||
model: session_model,
|
model: session_model,
|
||||||
|
|
@ -228,7 +252,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||||
|
|
||||||
let mut tool_event = None;
|
let mut tool_event = None;
|
||||||
wait_for_event_with_timeout(
|
wait_for_event_with_timeout(
|
||||||
&codex,
|
codex,
|
||||||
|event| match event {
|
|event| match event {
|
||||||
EventMsg::ViewImageToolCall(_) => {
|
EventMsg::ViewImageToolCall(_) => {
|
||||||
tool_event = Some(event.clone());
|
tool_event = Some(event.clone());
|
||||||
|
|
|
||||||
78
scripts/test-remote-env.sh
Executable file
78
scripts/test-remote-env.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Remote-env setup script for codex-rs integration tests.
|
||||||
|
#
|
||||||
|
# Usage (source-only):
|
||||||
|
# source scripts/test-remote-env.sh
|
||||||
|
# cd codex-rs
|
||||||
|
# cargo test -p codex-core --test all remote_env_connects_creates_temp_dir_and_runs_sample_script
|
||||||
|
# codex_remote_env_cleanup
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
|
|
||||||
|
is_sourced() {
|
||||||
|
[[ "${BASH_SOURCE[0]}" != "$0" ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
setup_remote_env() {
|
||||||
|
local container_name
|
||||||
|
local codex_exec_server_binary_path
|
||||||
|
|
||||||
|
container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
|
||||||
|
codex_exec_server_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex-exec-server"
|
||||||
|
|
||||||
|
if ! command -v docker >/dev/null 2>&1; then
|
||||||
|
echo "docker is required (Colima or Docker Desktop)" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! docker info >/dev/null 2>&1; then
|
||||||
|
echo "docker daemon is not reachable; for Colima run: colima start" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v cargo >/dev/null 2>&1; then
|
||||||
|
echo "cargo is required to build codex-exec-server" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
(
|
||||||
|
cd "${REPO_ROOT}/codex-rs"
|
||||||
|
cargo build -p codex-exec-server --bin codex-exec-server
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ ! -f "${codex_exec_server_binary_path}" ]]; then
|
||||||
|
echo "codex-exec-server binary not found at ${codex_exec_server_binary_path}" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker rm -f "${container_name}" >/dev/null 2>&1 || true
|
||||||
|
docker run -d --name "${container_name}" ubuntu:24.04 sleep infinity >/dev/null
|
||||||
|
|
||||||
|
export CODEX_TEST_REMOTE_ENV="${container_name}"
|
||||||
|
}
|
||||||
|
|
||||||
|
codex_remote_env_cleanup() {
|
||||||
|
if [[ -n "${CODEX_TEST_REMOTE_ENV:-}" ]]; then
|
||||||
|
docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
|
||||||
|
unset CODEX_TEST_REMOTE_ENV
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if ! is_sourced; then
|
||||||
|
echo "source this script instead of executing it: source scripts/test-remote-env.sh" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
old_shell_options="$(set +o)"
|
||||||
|
set -euo pipefail
|
||||||
|
if setup_remote_env; then
|
||||||
|
status=0
|
||||||
|
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}"
|
||||||
|
echo "Remote env ready. Run your command, then call: codex_remote_env_cleanup"
|
||||||
|
else
|
||||||
|
status=$?
|
||||||
|
fi
|
||||||
|
eval "${old_shell_options}"
|
||||||
|
return "${status}"
|
||||||
Loading…
Add table
Reference in a new issue