Use use_model (#7121)

This commit is contained in:
pakrym-oai 2025-11-21 14:10:52 -08:00 committed by GitHub
parent 3bdcbc7292
commit e52cc38dfd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 68 additions and 149 deletions

View file

@ -6,9 +6,7 @@ use core_test_support::test_codex::ApplyPatchModelOutput;
use pretty_assertions::assert_eq;
use std::fs;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -24,23 +22,24 @@ use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::json;
use test_case::test_case;
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
apply_patch_harness_with(|_| {}).await
apply_patch_harness_with(|builder| builder).await
}
async fn apply_patch_harness_with(
configure: impl FnOnce(&mut Config) + Send + 'static,
configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
) -> Result<TestCodexHarness> {
TestCodexHarness::with_config(|config| {
let builder = configure(test_codex()).with_config(|config| {
config.include_apply_patch_tool = true;
configure(config);
})
.await
});
TestCodexHarness::with_builder(builder).await
}
pub async fn mount_apply_patch(
@ -86,11 +85,7 @@ async fn apply_patch_cli_multiple_operations_integration(
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
// Seed workspace state
let modify_path = harness.path("modify.txt");
@ -645,8 +640,10 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects(
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.features.enable(Feature::ApplyPatchFreeform);
let harness = apply_patch_harness_with(|builder| {
builder.with_config(|config| {
config.features.enable(Feature::ApplyPatchFreeform);
})
})
.await?;
@ -670,11 +667,7 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects(
async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
// Prepare a file inside a subdir; update it via cd && apply_patch heredoc form.
let sub = harness.path("sub");
@ -713,11 +706,7 @@ async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() ->
async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let test = harness.test();
let codex = test.codex.clone();
let cwd = test.cwd.clone();

View file

@ -2,7 +2,6 @@
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
@ -1458,14 +1457,11 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let sandbox_policy = scenario.sandbox_policy.clone();
let features = scenario.features.clone();
let model_override = scenario.model_override;
let model = model_override.unwrap_or("gpt-5.1");
let mut builder = test_codex().with_config(move |config| {
let mut builder = test_codex().with_model(model).with_config(move |config| {
config.approval_policy = approval_policy;
config.sandbox_policy = sandbox_policy.clone();
let model = model_override.unwrap_or("gpt-5.1");
config.model = model.to_string();
config.model_family =
find_family_for_model(model).expect("model should map to a known family");
for feature in features {
config.features.enable(feature);
}

View file

@ -1,4 +1,3 @@
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -59,11 +58,9 @@ async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
// Build a conversation configured to require approvals so the delegate
// routes ExecApprovalRequest via the parent.
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.approval_policy = AskForApproval::OnRequest;
config.sandbox_policy = SandboxPolicy::ReadOnly;
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
});
let test = builder.build(&server).await.expect("build test codex");
@ -136,13 +133,11 @@ async fn codex_delegate_forwards_patch_approval_and_proceeds_on_decision() {
let server = start_mock_server().await;
mount_sse_sequence(&server, vec![sse1, sse2]).await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.approval_policy = AskForApproval::OnRequest;
// Use a restricted sandbox so patch approval is required
config.sandbox_policy = SandboxPolicy::ReadOnly;
config.include_apply_patch_tool = true;
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
});
let test = builder.build(&server).await.expect("build test codex");

View file

@ -1,7 +1,6 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -27,7 +26,7 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
return Ok(());
}
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
fs::create_dir_all(
policy_path
@ -40,9 +39,6 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
)
.expect("write policy file");
config.model = "gpt-5.1".to_string();
config.model_family =
find_family_for_model("gpt-5.1").expect("gpt-5.1 should have a model family");
});
let server = start_mock_server().await;
let test = builder.build(&server).await?;

View file

@ -1,7 +1,6 @@
#![cfg(not(target_os = "windows"))]
use anyhow::Result;
use codex_core::model_family::find_family_for_model;
use core_test_support::responses::mount_function_call_agent_response;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
@ -127,11 +126,7 @@ async fn grep_files_tool_reports_empty_results() -> Result<()> {
#[allow(clippy::expect_used)]
async fn build_test_codex(server: &wiremock::MockServer) -> Result<TestCodex> {
let mut builder = test_codex().with_config(|config| {
config.model = MODEL_WITH_TOOL.to_string();
config.model_family =
find_family_for_model(MODEL_WITH_TOOL).expect("model family for test model");
});
let mut builder = test_codex().with_model(MODEL_WITH_TOOL);
builder.build(server).await
}

View file

@ -123,10 +123,8 @@ async fn shell_output_stays_json_without_freeform_apply_patch(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let mut builder = test_codex().with_model("gpt-5").with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
@ -228,10 +226,8 @@ async fn shell_output_preserves_fixture_json_without_serialization(
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let mut builder = test_codex().with_model("gpt-5").with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
@ -412,13 +408,12 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.tool_output_token_limit = Some(200);
configure_shell_command_model(output_type, config);
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(move |config| {
config.tool_output_token_limit = Some(200);
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
let call_id = "shell-truncated";
@ -714,13 +709,12 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
configure_shell_command_model(output_type, config);
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(move |config| {
config.include_apply_patch_tool = true;
configure_shell_command_model(output_type, config);
});
let test = builder.build(&server).await?;
let call_id = "shell-nonzero-exit";
@ -907,12 +901,11 @@ async fn local_shell_call_output_is_structured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let call_id = "local-shell-call";

View file

@ -4,7 +4,6 @@ use std::fs;
use assert_matches::assert_matches;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -54,10 +53,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
});
let mut builder = test_codex().with_model("gpt-5");
let TestCodex {
codex,
cwd,

View file

@ -4,7 +4,6 @@
use std::time::Duration;
use std::time::Instant;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -56,11 +55,7 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
#[allow(clippy::expect_used)]
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
let mut builder = test_codex().with_config(|config| {
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
});
let mut builder = test_codex().with_model("test-gpt-5.1-codex");
builder.build(server).await
}

View file

@ -8,7 +8,6 @@ use std::time::Instant;
use anyhow::Context;
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::SandboxPolicy;
use core_test_support::assert_regex_match;
@ -96,10 +95,7 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
});
let mut builder = test_codex().with_model("gpt-5");
let test = builder.build(&server).await?;
let command = ["/bin/echo", "shell ok"];
@ -196,11 +192,7 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
});
let mut builder = test_codex().with_model("gpt-5.1-codex");
let fixture = builder.build(&server).await?;
let call_id = "sandbox-denied-shell";
@ -350,10 +342,7 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
});
let mut builder = test_codex().with_model("gpt-5");
let test = builder.build(&server).await?;
let call_id = "shell-timeout";
@ -424,9 +413,7 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
});
let test = builder.build(&server).await?;

View file

@ -6,7 +6,6 @@ use anyhow::Result;
use codex_core::config::types::McpServerConfig;
use codex_core::config::types::McpServerTransportConfig;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -38,12 +37,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
// Use the test model that wires function tools like grep_files
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
});
let mut builder = test_codex().with_model("test-gpt-5.1-codex");
let test = builder.build(&server).await?;
// Construct a very long, non-existent path to force a RespondToModel error with a large message
@ -257,11 +251,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
let server = start_mock_server().await;
// Use a model that exposes the shell_command tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let mut builder = test_codex().with_model("gpt-5.1-codex");
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
@ -337,11 +327,7 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let mut builder = test_codex().with_model("gpt-5.1-codex");
let fixture = builder.build(&server).await?;
let call_id = "shell-single-truncation";
let command = if cfg!(windows) {
@ -588,12 +574,11 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string(); // token policy
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
config.tool_output_token_limit = Some(50); // small budget to force truncation
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.tool_output_token_limit = Some(50); // small budget to force truncation
});
let fixture = builder.build(&server).await?;
let call_id = "shell-token-marker";
@ -642,9 +627,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1".to_string(); // byte policy
config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1");
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.tool_output_token_limit = Some(50); // ~200 byte cap
});
let fixture = builder.build(&server).await?;
@ -695,12 +678,11 @@ async fn shell_command_output_not_truncated_with_custom_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
config.tool_output_token_limit = Some(50_000); // ample budget
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.tool_output_token_limit = Some(50_000); // ample budget
});
let fixture = builder.build(&server).await?;
let call_id = "shell-no-trunc";

View file

@ -9,9 +9,7 @@ use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use codex_core::CodexConversation;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::UndoCompletedEvent;
@ -23,18 +21,17 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
#[allow(clippy::expect_used)]
async fn undo_harness() -> Result<TestCodexHarness> {
TestCodexHarness::with_config(|config: &mut Config| {
let builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.include_apply_patch_tool = true;
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
config.features.enable(Feature::GhostCommit);
})
.await
});
TestCodexHarness::with_builder(builder).await
}
fn git(path: &Path, args: &[&str]) -> Result<()> {

View file

@ -1,7 +1,6 @@
use anyhow::Context;
use codex_core::ConversationManager;
use codex_core::NewConversation;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::ExecCommandSource;
@ -275,12 +274,11 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.tool_output_token_limit = Some(100);
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let mut builder = test_codex()
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.tool_output_token_limit = Some(100);
});
let fixture = builder.build(&server).await?;
let call_id = "user-shell-double-truncation";