Add remote env CI matrix and integration test (#14869)
`CODEX_TEST_REMOTE_ENV` will make `test_codex` start the executor "remotely" (inside a docker container) turning any integration test into remote test.
This commit is contained in:
parent
e5f4d1fef5
commit
ba85a58039
10 changed files with 514 additions and 23 deletions
23
.github/workflows/rust-ci.yml
vendored
23
.github/workflows/rust-ci.yml
vendored
|
|
@ -527,7 +527,7 @@ jobs:
|
|||
key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1
|
||||
|
||||
tests:
|
||||
name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}
|
||||
name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.remote_env == 'true' && ' (remote)' || '' }}
|
||||
runs-on: ${{ matrix.runs_on || matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
needs: changed
|
||||
|
|
@ -553,6 +553,7 @@ jobs:
|
|||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-gnu
|
||||
profile: dev
|
||||
remote_env: "true"
|
||||
runs_on:
|
||||
group: codex-runners
|
||||
labels: codex-linux-x64
|
||||
|
|
@ -590,6 +591,7 @@ jobs:
|
|||
sudo apt-get update -y
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
|
||||
fi
|
||||
|
||||
# Some integration tests rely on DotSlash being installed.
|
||||
# See https://github.com/openai/codex/pull/7617.
|
||||
- name: Install DotSlash
|
||||
|
|
@ -674,6 +676,15 @@ jobs:
|
|||
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
|
||||
fi
|
||||
|
||||
- name: Set up remote test env (Docker)
|
||||
if: ${{ runner.os == 'Linux' && matrix.remote_env == 'true' }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME=codex-remote-test-env
|
||||
source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
|
||||
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: tests
|
||||
id: test
|
||||
run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test --timings
|
||||
|
|
@ -726,6 +737,16 @@ jobs:
|
|||
echo '```';
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Tear down remote test env
|
||||
if: ${{ always() && runner.os == 'Linux' && matrix.remote_env == 'true' }}
|
||||
shell: bash
|
||||
run: |
|
||||
set +e
|
||||
if [[ "${{ steps.test.outcome }}" != "success" ]]; then
|
||||
docker logs codex-remote-test-env || true
|
||||
fi
|
||||
docker rm -f codex-remote-test-env >/dev/null 2>&1 || true
|
||||
|
||||
- name: verify tests passed
|
||||
if: steps.test.outcome == 'failure'
|
||||
run: |
|
||||
|
|
|
|||
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
|
|
@ -3121,6 +3121,7 @@ dependencies = [
|
|||
"base64 0.22.1",
|
||||
"codex-arg0",
|
||||
"codex-core",
|
||||
"codex-exec-server",
|
||||
"codex-features",
|
||||
"codex-protocol",
|
||||
"codex-utils-absolute-path",
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ assert_cmd = { workspace = true }
|
|||
base64 = { workspace = true }
|
||||
codex-arg0 = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-exec-server = { workspace = true }
|
||||
codex-features = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-utils-absolute-path = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -289,6 +289,29 @@ pub fn sandbox_network_env_var() -> &'static str {
|
|||
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
|
||||
}
|
||||
|
||||
const REMOTE_ENV_ENV_VAR: &str = "CODEX_TEST_REMOTE_ENV";
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct RemoteEnvConfig {
|
||||
pub container_name: String,
|
||||
}
|
||||
|
||||
pub fn get_remote_test_env() -> Option<RemoteEnvConfig> {
|
||||
if std::env::var_os(REMOTE_ENV_ENV_VAR).is_none() {
|
||||
eprintln!("Skipping test because {REMOTE_ENV_ENV_VAR} is not set.");
|
||||
return None;
|
||||
}
|
||||
|
||||
let container_name = std::env::var(REMOTE_ENV_ENV_VAR)
|
||||
.unwrap_or_else(|_| panic!("{REMOTE_ENV_ENV_VAR} must be set"));
|
||||
assert!(
|
||||
!container_name.trim().is_empty(),
|
||||
"{REMOTE_ENV_ENV_VAR} must not be empty"
|
||||
);
|
||||
|
||||
Some(RemoteEnvConfig { container_name })
|
||||
}
|
||||
|
||||
pub fn format_with_current_shell(command: &str) -> Vec<String> {
|
||||
codex_core::shell::default_user_shell().derive_exec_args(command, /*use_login_shell*/ true)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,16 @@
|
|||
use std::mem::swap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexThread;
|
||||
use codex_core::ModelProviderInfo;
|
||||
|
|
@ -14,6 +20,8 @@ use codex_core::config::Config;
|
|||
use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
||||
use codex_core::shell::Shell;
|
||||
use codex_core::shell::get_shell_by_model_provided_path;
|
||||
use codex_exec_server::CreateDirectoryOptions;
|
||||
use codex_exec_server::ExecutorFileSystem;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::config_types::ServiceTier;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
|
|
@ -24,10 +32,13 @@ use codex_protocol::protocol::SandboxPolicy;
|
|||
use codex_protocol::protocol::SessionConfiguredEvent;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
use crate::RemoteEnvConfig;
|
||||
use crate::get_remote_test_env;
|
||||
use crate::load_default_config_for_test;
|
||||
use crate::responses::WebSocketTestServer;
|
||||
use crate::responses::output_value_to_text;
|
||||
|
|
@ -41,6 +52,254 @@ use wiremock::matchers::path_regex;
|
|||
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
|
||||
type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
|
||||
const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
|
||||
const REMOTE_EXEC_SERVER_START_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const REMOTE_EXEC_SERVER_POLL_INTERVAL: Duration = Duration::from_millis(25);
|
||||
static REMOTE_EXEC_SERVER_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RemoteExecServerProcess {
|
||||
container_name: String,
|
||||
pid: u32,
|
||||
remote_exec_server_path: String,
|
||||
stdout_path: String,
|
||||
cleanup_paths: Vec<String>,
|
||||
}
|
||||
|
||||
impl Drop for RemoteExecServerProcess {
|
||||
fn drop(&mut self) {
|
||||
let cleanup_paths = self.cleanup_paths.join(" ");
|
||||
let cleanup_paths_script = if cleanup_paths.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("rm -rf {cleanup_paths}; ")
|
||||
};
|
||||
let script = format!(
|
||||
"if kill -0 {pid} 2>/dev/null; then kill {pid}; fi; {cleanup_paths_script}rm -f {remote_exec_server_path} {stdout_path}",
|
||||
pid = self.pid,
|
||||
cleanup_paths_script = cleanup_paths_script,
|
||||
remote_exec_server_path = self.remote_exec_server_path,
|
||||
stdout_path = self.stdout_path
|
||||
);
|
||||
let _ = docker_command_capture_stdout(["exec", &self.container_name, "sh", "-lc", &script]);
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteExecServerProcess {
|
||||
fn register_cleanup_path(&mut self, path: &Path) {
|
||||
self.cleanup_paths.push(path.display().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TestEnv {
|
||||
environment: codex_exec_server::Environment,
|
||||
cwd: PathBuf,
|
||||
_local_cwd_temp_dir: Option<TempDir>,
|
||||
_remote_exec_server_process: Option<RemoteExecServerProcess>,
|
||||
}
|
||||
|
||||
impl TestEnv {
|
||||
pub async fn local() -> Result<Self> {
|
||||
let local_cwd_temp_dir = TempDir::new()?;
|
||||
let cwd = local_cwd_temp_dir.path().to_path_buf();
|
||||
let environment =
|
||||
codex_exec_server::Environment::create(/*experimental_exec_server_url*/ None).await?;
|
||||
Ok(Self {
|
||||
environment,
|
||||
cwd,
|
||||
_local_cwd_temp_dir: Some(local_cwd_temp_dir),
|
||||
_remote_exec_server_process: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn environment(&self) -> &codex_exec_server::Environment {
|
||||
&self.environment
|
||||
}
|
||||
|
||||
pub fn experimental_exec_server_url(&self) -> Option<&str> {
|
||||
self.environment.experimental_exec_server_url()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn test_env() -> Result<TestEnv> {
|
||||
match get_remote_test_env() {
|
||||
Some(remote_env) => {
|
||||
let mut remote_process = start_remote_exec_server(&remote_env)?;
|
||||
let remote_ip = remote_container_ip(&remote_env.container_name)?;
|
||||
let websocket_url = rewrite_websocket_host(&remote_process.listen_url, &remote_ip)?;
|
||||
let environment = codex_exec_server::Environment::create(Some(websocket_url)).await?;
|
||||
let cwd = remote_aware_cwd_path();
|
||||
environment
|
||||
.get_filesystem()
|
||||
.create_directory(
|
||||
&absolute_path(&cwd)?,
|
||||
CreateDirectoryOptions { recursive: true },
|
||||
)
|
||||
.await?;
|
||||
remote_process.process.register_cleanup_path(&cwd);
|
||||
Ok(TestEnv {
|
||||
environment,
|
||||
cwd,
|
||||
_local_cwd_temp_dir: None,
|
||||
_remote_exec_server_process: Some(remote_process.process),
|
||||
})
|
||||
}
|
||||
None => TestEnv::local().await,
|
||||
}
|
||||
}
|
||||
|
||||
struct RemoteExecServerStart {
|
||||
process: RemoteExecServerProcess,
|
||||
listen_url: String,
|
||||
}
|
||||
|
||||
fn start_remote_exec_server(remote_env: &RemoteEnvConfig) -> Result<RemoteExecServerStart> {
|
||||
let container_name = remote_env.container_name.as_str();
|
||||
let instance_id = remote_exec_server_instance_id();
|
||||
let remote_exec_server_path = format!("/tmp/codex-exec-server-{instance_id}");
|
||||
let stdout_path = format!("/tmp/codex-exec-server-{instance_id}.stdout");
|
||||
let local_binary = codex_utils_cargo_bin::cargo_bin("codex-exec-server")
|
||||
.context("resolve codex-exec-server binary")?;
|
||||
let local_binary = local_binary.to_string_lossy().to_string();
|
||||
let remote_binary = format!("{container_name}:{remote_exec_server_path}");
|
||||
|
||||
docker_command_success(["cp", &local_binary, &remote_binary])?;
|
||||
docker_command_success([
|
||||
"exec",
|
||||
container_name,
|
||||
"chmod",
|
||||
"+x",
|
||||
&remote_exec_server_path,
|
||||
])?;
|
||||
|
||||
let start_script = format!(
|
||||
"rm -f {stdout_path}; \
|
||||
nohup {remote_exec_server_path} --listen ws://0.0.0.0:0 > {stdout_path} 2>&1 & \
|
||||
echo $!"
|
||||
);
|
||||
let pid_output =
|
||||
docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &start_script])?;
|
||||
let pid = pid_output
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.with_context(|| format!("parse remote exec-server PID from {pid_output:?}"))?;
|
||||
|
||||
let listen_url = wait_for_remote_listen_url(container_name, &stdout_path)?;
|
||||
|
||||
Ok(RemoteExecServerStart {
|
||||
process: RemoteExecServerProcess {
|
||||
container_name: container_name.to_string(),
|
||||
pid,
|
||||
remote_exec_server_path,
|
||||
stdout_path,
|
||||
cleanup_paths: Vec::new(),
|
||||
},
|
||||
listen_url,
|
||||
})
|
||||
}
|
||||
|
||||
fn remote_aware_cwd_path() -> PathBuf {
|
||||
PathBuf::from(format!(
|
||||
"/tmp/codex-core-test-cwd-{}",
|
||||
remote_exec_server_instance_id()
|
||||
))
|
||||
}
|
||||
|
||||
fn wait_for_remote_listen_url(container_name: &str, stdout_path: &str) -> Result<String> {
|
||||
let deadline = Instant::now() + REMOTE_EXEC_SERVER_START_TIMEOUT;
|
||||
loop {
|
||||
let line = docker_command_capture_stdout([
|
||||
"exec",
|
||||
container_name,
|
||||
"sh",
|
||||
"-lc",
|
||||
&format!("head -n 1 {stdout_path} 2>/dev/null || true"),
|
||||
])?;
|
||||
let listen_url = line.trim();
|
||||
if listen_url.starts_with("ws://") {
|
||||
return Ok(listen_url.to_string());
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(anyhow!(
|
||||
"timed out waiting for remote exec-server listen URL in container `{container_name}` after {REMOTE_EXEC_SERVER_START_TIMEOUT:?}"
|
||||
));
|
||||
}
|
||||
std::thread::sleep(REMOTE_EXEC_SERVER_POLL_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
fn remote_exec_server_instance_id() -> String {
|
||||
let instance = REMOTE_EXEC_SERVER_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
format!("{}-{instance}", std::process::id())
|
||||
}
|
||||
|
||||
fn remote_container_ip(container_name: &str) -> Result<String> {
|
||||
let ip = docker_command_capture_stdout([
|
||||
"inspect",
|
||||
"-f",
|
||||
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
|
||||
container_name,
|
||||
])?;
|
||||
let ip = ip.trim();
|
||||
if ip.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"container `{container_name}` has no IP address; cannot connect to remote exec-server"
|
||||
));
|
||||
}
|
||||
Ok(ip.to_string())
|
||||
}
|
||||
|
||||
fn rewrite_websocket_host(listen_url: &str, host: &str) -> Result<String> {
|
||||
let Some(address) = listen_url.strip_prefix("ws://") else {
|
||||
return Err(anyhow!(
|
||||
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||
));
|
||||
};
|
||||
let Some((_, port)) = address.rsplit_once(':') else {
|
||||
return Err(anyhow!(
|
||||
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||
));
|
||||
};
|
||||
Ok(format!("ws://{host}:{port}"))
|
||||
}
|
||||
|
||||
fn docker_command_success<const N: usize>(args: [&str; N]) -> Result<()> {
|
||||
let output = Command::new("docker")
|
||||
.args(args)
|
||||
.output()
|
||||
.with_context(|| format!("run docker {:?}", args))?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"docker {:?} failed: stdout={} stderr={}",
|
||||
args,
|
||||
String::from_utf8_lossy(&output.stdout).trim(),
|
||||
String::from_utf8_lossy(&output.stderr).trim()
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<String> {
|
||||
let output = Command::new("docker")
|
||||
.args(args)
|
||||
.output()
|
||||
.with_context(|| format!("run docker {:?}", args))?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"docker {:?} failed: stdout={} stderr={}",
|
||||
args,
|
||||
String::from_utf8_lossy(&output.stdout).trim(),
|
||||
String::from_utf8_lossy(&output.stderr).trim()
|
||||
));
|
||||
}
|
||||
String::from_utf8(output.stdout).context("docker stdout must be utf-8")
|
||||
}
|
||||
|
||||
fn absolute_path(path: &Path) -> Result<AbsolutePathBuf> {
|
||||
AbsolutePathBuf::try_from(path.to_path_buf())
|
||||
.map_err(|err| anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||
}
|
||||
|
||||
/// A collection of different ways the model can output an apply_patch call
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
|
|
@ -124,6 +383,24 @@ impl TestCodexBuilder {
|
|||
Box::pin(self.build_with_home(server, home, /*resume_from*/ None)).await
|
||||
}
|
||||
|
||||
pub async fn build_remote_aware(
|
||||
&mut self,
|
||||
server: &wiremock::MockServer,
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
let test_env = test_env().await?;
|
||||
let experimental_exec_server_url =
|
||||
test_env.experimental_exec_server_url().map(str::to_owned);
|
||||
let cwd = test_env.cwd.to_path_buf();
|
||||
self.config_mutators.push(Box::new(move |config| {
|
||||
config.experimental_exec_server_url = experimental_exec_server_url;
|
||||
config.cwd = cwd;
|
||||
}));
|
||||
|
||||
let mut test = self.build(server).await?;
|
||||
test._test_env = test_env;
|
||||
Ok(test)
|
||||
}
|
||||
|
||||
pub async fn build_with_streaming_server(
|
||||
&mut self,
|
||||
server: &StreamingSseServer,
|
||||
|
|
@ -176,7 +453,8 @@ impl TestCodexBuilder {
|
|||
) -> anyhow::Result<TestCodex> {
|
||||
let base_url = format!("{}/v1", server.uri());
|
||||
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
||||
Box::pin(self.build_from_config(config, cwd, home, resume_from)).await
|
||||
Box::pin(self.build_from_config(config, cwd, home, resume_from, TestEnv::local().await?))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn build_with_home_and_base_url(
|
||||
|
|
@ -186,7 +464,8 @@ impl TestCodexBuilder {
|
|||
resume_from: Option<PathBuf>,
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
let (config, cwd) = self.prepare_config(base_url, &home).await?;
|
||||
Box::pin(self.build_from_config(config, cwd, home, resume_from)).await
|
||||
Box::pin(self.build_from_config(config, cwd, home, resume_from, TestEnv::local().await?))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn build_from_config(
|
||||
|
|
@ -195,6 +474,7 @@ impl TestCodexBuilder {
|
|||
cwd: Arc<TempDir>,
|
||||
home: Arc<TempDir>,
|
||||
resume_from: Option<PathBuf>,
|
||||
test_env: TestEnv,
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
let auth = self.auth.clone();
|
||||
let thread_manager = if config.model_catalog.is_some() {
|
||||
|
|
@ -258,6 +538,7 @@ impl TestCodexBuilder {
|
|||
codex: new_conversation.thread,
|
||||
session_configured: new_conversation.session_configured,
|
||||
thread_manager,
|
||||
_test_env: test_env,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -354,6 +635,7 @@ pub struct TestCodex {
|
|||
pub session_configured: SessionConfiguredEvent,
|
||||
pub config: Config,
|
||||
pub thread_manager: Arc<ThreadManager>,
|
||||
_test_env: TestEnv,
|
||||
}
|
||||
|
||||
impl TestCodex {
|
||||
|
|
@ -369,6 +651,14 @@ impl TestCodex {
|
|||
self.cwd_path().join(rel)
|
||||
}
|
||||
|
||||
pub fn executor_environment(&self) -> &TestEnv {
|
||||
&self._test_env
|
||||
}
|
||||
|
||||
pub fn fs(&self) -> Arc<dyn ExecutorFileSystem> {
|
||||
self._test_env.environment().get_filesystem()
|
||||
}
|
||||
|
||||
pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
|
||||
self.submit_turn_with_policies(
|
||||
prompt,
|
||||
|
|
@ -431,7 +721,7 @@ impl TestCodex {
|
|||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: self.cwd.path().to_path_buf(),
|
||||
cwd: self.config.cwd.clone(),
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
|
|
|
|||
|
|
@ -2277,14 +2277,9 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
|
|||
false,
|
||||
)
|
||||
.await?;
|
||||
let test = TestCodex {
|
||||
home: base_test.home,
|
||||
cwd: base_test.cwd,
|
||||
codex: new_thread.thread,
|
||||
session_configured: new_thread.session_configured,
|
||||
config: base_test.config,
|
||||
thread_manager: base_test.thread_manager,
|
||||
};
|
||||
let mut test = base_test;
|
||||
test.codex = new_thread.thread;
|
||||
test.session_configured = new_thread.session_configured;
|
||||
|
||||
let code = r#"
|
||||
import { ALL_TOOLS, hidden_dynamic_tool } from "tools.js";
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ mod prompt_caching;
|
|||
mod quota_exceeded;
|
||||
mod read_file;
|
||||
mod realtime_conversation;
|
||||
mod remote_env;
|
||||
mod remote_models;
|
||||
mod request_compression;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
|
|
|
|||
57
codex-rs/core/tests/suite/remote_env.rs
Normal file
57
codex-rs/core/tests/suite/remote_env.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
use anyhow::Result;
|
||||
use codex_exec_server::RemoveOptions;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use core_test_support::get_remote_test_env;
|
||||
use core_test_support::test_codex::test_env;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_test_env_can_connect_and_use_filesystem() -> Result<()> {
|
||||
let Some(_remote_env) = get_remote_test_env() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let test_env = test_env().await?;
|
||||
let file_system = test_env.environment().get_filesystem();
|
||||
|
||||
let file_path = remote_test_file_path();
|
||||
let file_path_abs = absolute_path(file_path.clone())?;
|
||||
let payload = b"remote-test-env-ok".to_vec();
|
||||
|
||||
file_system
|
||||
.write_file(&file_path_abs, payload.clone())
|
||||
.await?;
|
||||
let actual = file_system.read_file(&file_path_abs).await?;
|
||||
assert_eq!(actual, payload);
|
||||
|
||||
file_system
|
||||
.remove(
|
||||
&file_path_abs,
|
||||
RemoveOptions {
|
||||
recursive: false,
|
||||
force: true,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn absolute_path(path: PathBuf) -> Result<AbsolutePathBuf> {
|
||||
AbsolutePathBuf::try_from(path.clone())
|
||||
.map_err(|err| anyhow::anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||
}
|
||||
|
||||
fn remote_test_file_path() -> PathBuf {
|
||||
let nanos = match SystemTime::now().duration_since(UNIX_EPOCH) {
|
||||
Ok(duration) => duration.as_nanos(),
|
||||
Err(_) => 0,
|
||||
};
|
||||
PathBuf::from(format!(
|
||||
"/tmp/codex-remote-test-env-{}-{nanos}.txt",
|
||||
std::process::id()
|
||||
))
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_exec_server::CreateDirectoryOptions;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::openai_models::ConfigShellToolType;
|
||||
|
|
@ -32,12 +33,16 @@ use core_test_support::test_codex::TestCodex;
|
|||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use image::DynamicImage;
|
||||
use image::GenericImageView;
|
||||
use image::ImageBuffer;
|
||||
use image::Rgba;
|
||||
use image::load_from_memory;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use std::io::Cursor;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tokio::time::Duration;
|
||||
use wiremock::BodyPrintLimit;
|
||||
use wiremock::MockServer;
|
||||
|
|
@ -73,6 +78,11 @@ fn find_image_message(body: &Value) -> Option<&Value> {
|
|||
image_messages(body).into_iter().next()
|
||||
}
|
||||
|
||||
fn absolute_path(path: &Path) -> anyhow::Result<codex_utils_absolute_path::AbsolutePathBuf> {
|
||||
codex_utils_absolute_path::AbsolutePathBuf::try_from(path.to_path_buf())
|
||||
.map_err(|err| anyhow::anyhow!("invalid absolute path {}: {err}", path.display()))
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
@ -171,23 +181,37 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
|||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_remote_aware(&server).await?;
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
config,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
} = &test;
|
||||
let cwd = config.cwd.clone();
|
||||
|
||||
let rel_path = PathBuf::from("assets/example.png");
|
||||
let abs_path = cwd.join(&rel_path);
|
||||
let abs_path_absolute = absolute_path(&abs_path)?;
|
||||
let assets_dir = cwd.join("assets");
|
||||
|
||||
let file_system = test.fs();
|
||||
|
||||
let rel_path = "assets/example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([255u8, 0, 0, 255]));
|
||||
image.save(&abs_path)?;
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
DynamicImage::ImageRgba8(image).write_to(&mut cursor, image::ImageFormat::Png)?;
|
||||
file_system
|
||||
.create_directory(
|
||||
&absolute_path(&assets_dir)?,
|
||||
CreateDirectoryOptions { recursive: true },
|
||||
)
|
||||
.await?;
|
||||
file_system
|
||||
.write_file(&abs_path_absolute, cursor.into_inner())
|
||||
.await?;
|
||||
|
||||
let call_id = "view-image-call";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
|
@ -214,7 +238,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
|||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
cwd: cwd.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
|
|
@ -228,7 +252,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
|||
|
||||
let mut tool_event = None;
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
codex,
|
||||
|event| match event {
|
||||
EventMsg::ViewImageToolCall(_) => {
|
||||
tool_event = Some(event.clone());
|
||||
|
|
|
|||
78
scripts/test-remote-env.sh
Executable file
78
scripts/test-remote-env.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Remote-env setup script for codex-rs integration tests.
|
||||
#
|
||||
# Usage (source-only):
|
||||
# source scripts/test-remote-env.sh
|
||||
# cd codex-rs
|
||||
# cargo test -p codex-core --test all remote_env_connects_creates_temp_dir_and_runs_sample_script
|
||||
# codex_remote_env_cleanup
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
|
||||
is_sourced() {
|
||||
[[ "${BASH_SOURCE[0]}" != "$0" ]]
|
||||
}
|
||||
|
||||
setup_remote_env() {
|
||||
local container_name
|
||||
local codex_exec_server_binary_path
|
||||
|
||||
container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
|
||||
codex_exec_server_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex-exec-server"
|
||||
|
||||
if ! command -v docker >/dev/null 2>&1; then
|
||||
echo "docker is required (Colima or Docker Desktop)" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "docker daemon is not reachable; for Colima run: colima start" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! command -v cargo >/dev/null 2>&1; then
|
||||
echo "cargo is required to build codex-exec-server" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
(
|
||||
cd "${REPO_ROOT}/codex-rs"
|
||||
cargo build -p codex-exec-server --bin codex-exec-server
|
||||
)
|
||||
|
||||
if [[ ! -f "${codex_exec_server_binary_path}" ]]; then
|
||||
echo "codex-exec-server binary not found at ${codex_exec_server_binary_path}" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
docker rm -f "${container_name}" >/dev/null 2>&1 || true
|
||||
docker run -d --name "${container_name}" ubuntu:24.04 sleep infinity >/dev/null
|
||||
|
||||
export CODEX_TEST_REMOTE_ENV="${container_name}"
|
||||
}
|
||||
|
||||
codex_remote_env_cleanup() {
|
||||
if [[ -n "${CODEX_TEST_REMOTE_ENV:-}" ]]; then
|
||||
docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
|
||||
unset CODEX_TEST_REMOTE_ENV
|
||||
fi
|
||||
}
|
||||
|
||||
if ! is_sourced; then
|
||||
echo "source this script instead of executing it: source scripts/test-remote-env.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
old_shell_options="$(set +o)"
|
||||
set -euo pipefail
|
||||
if setup_remote_env; then
|
||||
status=0
|
||||
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}"
|
||||
echo "Remote env ready. Run your command, then call: codex_remote_env_cleanup"
|
||||
else
|
||||
status=$?
|
||||
fi
|
||||
eval "${old_shell_options}"
|
||||
return "${status}"
|
||||
Loading…
Add table
Reference in a new issue