core-agent-ide/codex-rs/exec/src/lib.rs
dedrisian-oai 90a0fd342f
Review Mode (Core) (#3401)
## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
2025-09-12 23:25:10 +00:00

281 lines
9.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

mod cli;
mod event_processor;
mod event_processor_with_human_output;
mod event_processor_with_json_output;
use std::io::IsTerminal;
use std::io::Read;
use std::path::PathBuf;
pub use cli::Cli;
use codex_core::AuthManager;
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
use codex_core::ConversationManager;
use codex_core::NewConversation;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::git_info::get_git_repo_root;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::TaskCompleteEvent;
use codex_ollama::DEFAULT_OSS_MODEL;
use codex_protocol::config_types::SandboxMode;
use event_processor_with_human_output::EventProcessorWithHumanOutput;
use event_processor_with_json_output::EventProcessorWithJsonOutput;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing_subscriber::EnvFilter;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
let Cli {
images,
model: model_cli_arg,
oss,
config_profile,
full_auto,
dangerously_bypass_approvals_and_sandbox,
cwd,
skip_git_repo_check,
color,
last_message_file,
json: json_mode,
sandbox_mode: sandbox_mode_cli_arg,
prompt,
config_overrides,
} = cli;
// Determine the prompt based on CLI arg and/or stdin.
let prompt = match prompt {
Some(p) if p != "-" => p,
// Either `-` was passed or no positional arg.
maybe_dash => {
// When no arg (None) **and** stdin is a TTY, bail out early unless the
// user explicitly forced reading via `-`.
let force_stdin = matches!(maybe_dash.as_deref(), Some("-"));
if std::io::stdin().is_terminal() && !force_stdin {
eprintln!(
"No prompt provided. Either specify one as an argument or pipe the prompt into stdin."
);
std::process::exit(1);
}
// Ensure the user knows we are waiting on stdin, as they may
// have gotten into this state by mistake. If so, and they are not
// writing to stdin, Codex will hang indefinitely, so this should
// help them debug in that case.
if !force_stdin {
eprintln!("Reading prompt from stdin...");
}
let mut buffer = String::new();
if let Err(e) = std::io::stdin().read_to_string(&mut buffer) {
eprintln!("Failed to read prompt from stdin: {e}");
std::process::exit(1);
} else if buffer.trim().is_empty() {
eprintln!("No prompt provided via stdin.");
std::process::exit(1);
}
buffer
}
};
let (stdout_with_ansi, stderr_with_ansi) = match color {
cli::Color::Always => (true, true),
cli::Color::Never => (false, false),
cli::Color::Auto => (
std::io::stdout().is_terminal(),
std::io::stderr().is_terminal(),
),
};
// TODO(mbolin): Take a more thoughtful approach to logging.
let default_level = "error";
let _ = tracing_subscriber::fmt()
// Fallback to the `default_level` log filter if the environment
// variable is not set _or_ contains an invalid value
.with_env_filter(
EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::try_new(default_level))
.unwrap_or_else(|_| EnvFilter::new(default_level)),
)
.with_ansi(stderr_with_ansi)
.with_writer(std::io::stderr)
.try_init();
let sandbox_mode = if full_auto {
Some(SandboxMode::WorkspaceWrite)
} else if dangerously_bypass_approvals_and_sandbox {
Some(SandboxMode::DangerFullAccess)
} else {
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
};
// When using `--oss`, let the bootstrapper pick the model (defaulting to
// gpt-oss:20b) and ensure it is present locally. Also, force the builtin
// `oss` model provider.
let model = if let Some(model) = model_cli_arg {
Some(model)
} else if oss {
Some(DEFAULT_OSS_MODEL.to_owned())
} else {
None // No model specified, will use the default.
};
let model_provider = if oss {
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
} else {
None // No specific model provider override.
};
// Load configuration and determine approval policy
let overrides = ConfigOverrides {
model,
review_model: None,
config_profile,
// This CLI is intended to be headless and has no affordances for asking
// the user for approval.
approval_policy: Some(AskForApproval::Never),
sandbox_mode,
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
model_provider,
codex_linux_sandbox_exe,
base_instructions: None,
include_plan_tool: None,
include_apply_patch_tool: None,
include_view_image_tool: None,
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,
};
// Parse `-c` overrides.
let cli_kv_overrides = match config_overrides.parse_overrides() {
Ok(v) => v,
Err(e) => {
eprintln!("Error parsing -c overrides: {e}");
std::process::exit(1);
}
};
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides)?;
let mut event_processor: Box<dyn EventProcessor> = if json_mode {
Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone()))
} else {
Box::new(EventProcessorWithHumanOutput::create_with_ansi(
stdout_with_ansi,
&config,
last_message_file.clone(),
))
};
if oss {
codex_ollama::ensure_oss_ready(&config)
.await
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
}
// Print the effective configuration and prompt so users can see what Codex
// is using.
event_processor.print_config_summary(&config, &prompt);
if !skip_git_repo_check && get_git_repo_root(&config.cwd.to_path_buf()).is_none() {
eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified.");
std::process::exit(1);
}
let conversation_manager =
ConversationManager::new(AuthManager::shared(config.codex_home.clone()));
let NewConversation {
conversation_id: _,
conversation,
session_configured,
} = conversation_manager.new_conversation(config).await?;
info!("Codex initialized with event: {session_configured:?}");
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
{
let conversation = conversation.clone();
tokio::spawn(async move {
loop {
tokio::select! {
_ = tokio::signal::ctrl_c() => {
tracing::debug!("Keyboard interrupt");
// Immediately notify Codex to abort any inflight task.
conversation.submit(Op::Interrupt).await.ok();
// Exit the inner loop and return to the main input prompt. The codex
// will emit a `TurnInterrupted` (Error) event which is drained later.
break;
}
res = conversation.next_event() => match res {
Ok(event) => {
debug!("Received event: {event:?}");
let is_shutdown_complete = matches!(event.msg, EventMsg::ShutdownComplete);
if let Err(e) = tx.send(event) {
error!("Error sending event: {e:?}");
break;
}
if is_shutdown_complete {
info!("Received shutdown event, exiting event loop.");
break;
}
},
Err(e) => {
error!("Error receiving event: {e:?}");
break;
}
}
}
}
});
}
// Send images first, if any.
if !images.is_empty() {
let items: Vec<InputItem> = images
.into_iter()
.map(|path| InputItem::LocalImage { path })
.collect();
let initial_images_event_id = conversation.submit(Op::UserInput { items }).await?;
info!("Sent images with event ID: {initial_images_event_id}");
while let Ok(event) = conversation.next_event().await {
if event.id == initial_images_event_id
&& matches!(
event.msg,
EventMsg::TaskComplete(TaskCompleteEvent {
last_agent_message: _,
})
)
{
break;
}
}
}
// Send the prompt.
let items: Vec<InputItem> = vec![InputItem::Text { text: prompt }];
let initial_prompt_task_id = conversation.submit(Op::UserInput { items }).await?;
info!("Sent prompt with event ID: {initial_prompt_task_id}");
// Run the loop until the task is complete.
while let Some(event) = rx.recv().await {
let shutdown: CodexStatus = event_processor.process_event(event);
match shutdown {
CodexStatus::Running => continue,
CodexStatus::InitiateShutdown => {
conversation.submit(Op::Shutdown).await?;
}
CodexStatus::Shutdown => {
break;
}
}
}
Ok(())
}