core-agent-ide/codex-rs/shell-command/src/bash.rs

use std::path::PathBuf;

use tree_sitter::Node;
use tree_sitter::Parser;
use tree_sitter::Tree;
use tree_sitter_bash::LANGUAGE as BASH;

use crate::shell_detect::ShellType;
use crate::shell_detect::detect_shell_type;

/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
/// success or None if parsing failed.
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
    let lang = BASH.into();
    let mut parser = Parser::new();
    #[expect(clippy::expect_used)]
    parser.set_language(&lang).expect("load bash grammar");
    let old_tree: Option<&Tree> = None;
    parser.parse(shell_lc_arg, old_tree)
}

/// Parse a script which may contain multiple simple commands joined only by
/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
///
/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
/// command and the parse tree does not contain disallowed constructs
/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
/// returns `None`.
pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
    if tree.root_node().has_error() {
        return None;
    }

    // List of allowed (named) node kinds for a "word only commands sequence".
    // If we encounter a named node that is not in this list we reject.
    const ALLOWED_KINDS: &[&str] = &[
        // top level containers
        "program",
        "list",
        "pipeline",
        // commands & words
        "command",
        "command_name",
        "word",
        "string",
        "string_content",
        "raw_string",
        "number",
        "concatenation",
    ];
    // Allow only safe punctuation / operator tokens; anything else causes reject.
    const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];

    let root = tree.root_node();
    let mut cursor = root.walk();
    let mut stack = vec![root];
    let mut command_nodes = Vec::new();
    while let Some(node) = stack.pop() {
        let kind = node.kind();
        if node.is_named() {
            if !ALLOWED_KINDS.contains(&kind) {
                return None;
            }
            if kind == "command" {
                command_nodes.push(node);
            }
        } else {
            // Reject any punctuation / operator tokens that are not explicitly allowed.
            if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
                return None;
            }
            if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
                // If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
                // Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
                return None;
            }
        }
        for child in node.children(&mut cursor) {
            stack.push(child);
        }
    }

    // Walk uses a stack (LIFO), so re-sort by position to restore source order.
    command_nodes.sort_by_key(Node::start_byte);

    let mut commands = Vec::new();
    for node in command_nodes {
        if let Some(words) = parse_plain_command_from_node(node, src) {
            commands.push(words);
        } else {
            return None;
        }
    }
    Some(commands)
}

pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
    let [shell, flag, script] = command else {
        return None;
    };
    if !matches!(flag.as_str(), "-lc" | "-c")
        || !matches!(
            detect_shell_type(&PathBuf::from(shell)),
            Some(ShellType::Zsh) | Some(ShellType::Bash) | Some(ShellType::Sh)
        )
    {
        return None;
    }
    Some((shell, script))
}

/// Returns the sequence of plain commands within a `bash -lc "..."` or
/// `zsh -lc "..."` invocation when the script only contains word-only commands
/// joined by safe operators.
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
    let (_, script) = extract_bash_command(command)?;

    let tree = try_parse_shell(script)?;
    try_parse_word_only_commands_sequence(&tree, script)
}

/// Returns the parsed argv for a single shell command in a here-doc style
/// script (`<<`), as long as the script contains exactly one command node.
pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
    let (_, script) = extract_bash_command(command)?;
    let tree = try_parse_shell(script)?;
    let root = tree.root_node();
    if root.has_error() {
        return None;
    }
    if !has_named_descendant_kind(root, "heredoc_redirect") {
        return None;
    }

    let command_node = find_single_command_node(root)?;
    parse_heredoc_command_words(command_node, script)
}

fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
    if cmd.kind() != "command" {
        return None;
    }
    let mut words = Vec::new();
    let mut cursor = cmd.walk();
    for child in cmd.named_children(&mut cursor) {
        match child.kind() {
            "command_name" => {
                let word_node = child.named_child(0)?;
                if word_node.kind() != "word" {
                    return None;
                }
                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
            }
            "word" | "number" => {
                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
            }
            "string" => {
                let parsed = parse_double_quoted_string(child, src)?;
                words.push(parsed);
            }
            "raw_string" => {
                let parsed = parse_raw_string(child, src)?;
                words.push(parsed);
            }
            "concatenation" => {
                // Handle concatenated arguments like -g"*.py"
                let mut concatenated = String::new();
                let mut concat_cursor = child.walk();
                for part in child.named_children(&mut concat_cursor) {
                    match part.kind() {
                        "word" | "number" => {
                            concatenated
                                .push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
                        }
                        "string" => {
                            let parsed = parse_double_quoted_string(part, src)?;
                            concatenated.push_str(&parsed);
                        }
                        "raw_string" => {
                            let parsed = parse_raw_string(part, src)?;
                            concatenated.push_str(&parsed);
                        }
                        _ => return None,
                    }
                }
                if concatenated.is_empty() {
                    return None;
                }
                words.push(concatenated);
            }
            _ => return None,
        }
    }
    Some(words)
}

fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
    if cmd.kind() != "command" {
        return None;
    }

    let mut words = Vec::new();
    let mut cursor = cmd.walk();
    for child in cmd.named_children(&mut cursor) {
        match child.kind() {
            "command_name" => {
                let word_node = child.named_child(0)?;
                if !matches!(word_node.kind(), "word" | "number")
                    || !is_literal_word_or_number(word_node)
                {
                    return None;
                }
                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
            }
            "word" | "number" => {
                if !is_literal_word_or_number(child) {
                    return None;
                }
                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
            }
            // Allow shell constructs that attach IO to a single command without
            // changing argv matching semantics for the executable prefix.
            "variable_assignment" | "comment" => {}
            kind if is_allowed_heredoc_attachment_kind(kind) => {}
            _ => return None,
        }
    }

    if words.is_empty() { None } else { Some(words) }
}

fn is_literal_word_or_number(node: Node<'_>) -> bool {
    if !matches!(node.kind(), "word" | "number") {
        return false;
    }
    let mut cursor = node.walk();
    node.named_children(&mut cursor).next().is_none()
}

fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
    matches!(
        kind,
        "heredoc_body"
            | "simple_heredoc_body"
            | "heredoc_redirect"
            | "herestring_redirect"
            | "file_redirect"
            | "redirected_statement"
    )
}

fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
    let mut stack = vec![root];
    let mut single_command = None;
    while let Some(node) = stack.pop() {
        if node.kind() == "command" {
            if single_command.is_some() {
                return None;
            }
            single_command = Some(node);
        }

        let mut cursor = node.walk();
        for child in node.named_children(&mut cursor) {
            stack.push(child);
        }
    }
    single_command
}

fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
    let mut stack = vec![node];
    while let Some(current) = stack.pop() {
        if current.kind() == kind {
            return true;
        }
        let mut cursor = current.walk();
        for child in current.named_children(&mut cursor) {
            stack.push(child);
        }
    }
    false
}

fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
    if node.kind() != "string" {
        return None;
    }

    let mut cursor = node.walk();
    for part in node.named_children(&mut cursor) {
        if part.kind() != "string_content" {
            return None;
        }
    }
    let raw = node.utf8_text(src.as_bytes()).ok()?;
    let stripped = raw
        .strip_prefix('"')
        .and_then(|text| text.strip_suffix('"'))?;
    Some(stripped.to_string())
}

fn parse_raw_string(node: Node, src: &str) -> Option<String> {
    if node.kind() != "raw_string" {
        return None;
    }

    let raw_string = node.utf8_text(src.as_bytes()).ok()?;
    let stripped = raw_string
        .strip_prefix('\'')
        .and_then(|s| s.strip_suffix('\''));
    stripped.map(str::to_owned)
}

#[cfg(test)]
mod tests {
    use super::*;
    use pretty_assertions::assert_eq;

    fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
        let tree = try_parse_shell(src)?;
        try_parse_word_only_commands_sequence(&tree, src)
    }

    #[test]
    fn accepts_single_simple_command() {
        let cmds = parse_seq("ls -1").unwrap();
        assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
    }

    #[test]
    fn accepts_multiple_commands_with_allowed_operators() {
        let src = "ls && pwd; echo 'hi there' | wc -l";
        let cmds = parse_seq(src).unwrap();
        let expected: Vec<Vec<String>> = vec![
            vec!["ls".to_string()],
            vec!["pwd".to_string()],
            vec!["echo".to_string(), "hi there".to_string()],
            vec!["wc".to_string(), "-l".to_string()],
        ];
        assert_eq!(cmds, expected);
    }

    #[test]
    fn extracts_double_and_single_quoted_strings() {
        let cmds = parse_seq("echo \"hello world\"").unwrap();
        assert_eq!(
            cmds,
            vec![vec!["echo".to_string(), "hello world".to_string()]]
        );

        let cmds2 = parse_seq("echo 'hi there'").unwrap();
        assert_eq!(
            cmds2,
            vec![vec!["echo".to_string(), "hi there".to_string()]]
        );
    }

    #[test]
    fn accepts_double_quoted_strings_with_newlines() {
        let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
        assert_eq!(
            cmds,
            vec![vec![
                "git".to_string(),
                "commit".to_string(),
                "-m".to_string(),
                "line1\nline2".to_string(),
            ]]
        );
    }

    #[test]
    fn accepts_mixed_quote_concatenation() {
        assert_eq!(
            parse_seq(r#"echo "/usr"'/'"local"/bin"#).unwrap(),
            vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
        );
        assert_eq!(
            parse_seq(r#"echo '/usr'"/"'local'/bin"#).unwrap(),
            vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
        );
    }

    #[test]
    fn rejects_double_quoted_strings_with_expansions() {
        assert!(parse_seq(r#"echo "hi ${USER}""#).is_none());
        assert!(parse_seq(r#"echo "$HOME""#).is_none());
    }

    #[test]
    fn accepts_numbers_as_words() {
        let cmds = parse_seq("echo 123 456").unwrap();
        assert_eq!(
            cmds,
            vec![vec![
                "echo".to_string(),
                "123".to_string(),
                "456".to_string()
            ]]
        );
    }

    #[test]
    fn rejects_parentheses_and_subshells() {
        assert!(parse_seq("(ls)").is_none());
        assert!(parse_seq("ls || (pwd && echo hi)").is_none());
    }

    #[test]
    fn rejects_redirections_and_unsupported_operators() {
        assert!(parse_seq("ls > out.txt").is_none());
        assert!(parse_seq("echo hi & echo bye").is_none());
    }

    #[test]
    fn rejects_command_and_process_substitutions_and_expansions() {
        assert!(parse_seq("echo $(pwd)").is_none());
        assert!(parse_seq("echo `pwd`").is_none());
        assert!(parse_seq("echo $HOME").is_none());
        assert!(parse_seq("echo \"hi $USER\"").is_none());
    }

    #[test]
    fn rejects_variable_assignment_prefix() {
        assert!(parse_seq("FOO=bar ls").is_none());
    }

    #[test]
    fn rejects_trailing_operator_parse_error() {
        assert!(parse_seq("ls &&").is_none());
    }

    #[test]
    fn rejects_empty_command_position_with_leading_operator() {
        assert!(parse_seq("&& ls").is_none());
    }

    #[test]
    fn rejects_empty_command_position_with_double_separator() {
        assert!(parse_seq("ls ;; pwd").is_none());
    }

    #[test]
    fn rejects_empty_command_position_with_empty_pipeline_segment() {
        assert!(parse_seq("ls | | wc").is_none());
    }

    #[test]
    fn parse_zsh_lc_plain_commands() {
        let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
        let parsed = parse_shell_lc_plain_commands(&command).unwrap();
        assert_eq!(parsed, vec![vec!["ls".to_string()]]);
    }

    #[test]
    fn accepts_concatenated_flag_and_value() {
        // Test case: -g"*.py" (flag directly concatenated with quoted value)
        let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
        assert_eq!(
            cmds,
            vec![vec![
                "rg".to_string(),
                "-n".to_string(),
                "foo".to_string(),
                "-g*.py".to_string(),
            ]]
        );
    }

    #[test]
    fn accepts_concatenated_flag_with_single_quotes() {
        let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
        assert_eq!(
            cmds,
            vec![vec![
                "grep".to_string(),
                "-n".to_string(),
                "pattern".to_string(),
                "-g*.txt".to_string(),
            ]]
        );
    }

    #[test]
    fn rejects_concatenation_with_variable_substitution() {
        // Environment variables in concatenated strings should be rejected
        assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
        assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
    }

    #[test]
    fn rejects_concatenation_with_command_substitution() {
        // Command substitution in concatenated strings should be rejected
        assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
        assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_supports_heredoc() {
        let command = vec![
            "zsh".to_string(),
            "-lc".to_string(),
            "python3 <<'PY'\nprint('hello')\nPY".to_string(),
        ];
        let parsed = parse_shell_lc_single_command_prefix(&command);
        assert_eq!(parsed, Some(vec!["python3".to_string()]));

        let command_unquoted = vec![
            "zsh".to_string(),
            "-lc".to_string(),
            "python3 << PY\nprint('hello')\nPY".to_string(),
        ];
        let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
        assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            "python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            "echo hello > /tmp/out.txt".to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            "python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
        ];
        assert_eq!(
            parse_shell_lc_single_command_prefix(&command),
            Some(vec!["python3".to_string()])
        );
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_herestring_with_chaining() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            r#"echo hello > /tmp/out.txt && cat /tmp/out.txt"#.to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            r#"python3 <<< "$(rm -rf /)""#.to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            "echo $((1<<2))".to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }

    #[test]
    fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
            "python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
        ];
        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
    }
}
-												Revert "Revert "Overhaul shell detection and centralize command generation for unified exec"" (#6607)

Reverts openai/codex#6606
											
										
										
											2025-11-13 16:45:17 -08:00
+								use std::path::PathBuf;
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								use tree_sitter::Node;
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								use tree_sitter::Parser;
 								use tree_sitter::Tree;
 								use tree_sitter_bash::LANGUAGE as BASH;
-												# Split command parsing/safety out of `codex-core` into new `codex-command` (#11361)

`codex-core` had accumulated command parsing and command safety logic
(`bash`, `powershell`, `parse_command`, and `command_safety`) that is
logically cohesive but orthogonal to most core session/runtime logic.
Keeping this code in `codex-core` made the crate increasingly monolithic
and raised iteration cost for unrelated core changes.

This change extracts that surface into a dedicated crate,
`codex-command`, while preserving existing `codex_core::...` call sites
via re-exports.

## Why this refactor

During analysis, command parsing/safety stood out as a good first split
because it has:

- a clear domain boundary (shell parsing + safety classification)
- relatively self-contained dependencies (notably `tree-sitter` /
`tree-sitter-bash`)
- a meaningful standalone test surface (`134` tests moved with the
crate)
- many downstream uses that benefit from independent compilation and
caching

The practical problem was build latency from a large `codex-core`
compile/test graph. Clean-build timings before and after this split
showed measurable wins:

- `cargo check -p codex-core`: `57.08s` -> `53.54s` (~`6.2%` faster)
- `cargo test -p codex-core --no-run`: `2m39.9s` -> `2m20s` (~`12.4%`
faster)
- `codex-core lib` compile unit: `57.18s` -> `49.67s` (~`13.1%` faster)
- `codex-core lib(test)` compile unit: `60.87s` -> `53.21s` (~`12.6%`
faster)

This gives a concrete reduction in core build overhead without changing
behavior.

## What changed

### New crate

- Added `codex-rs/command` as workspace crate `codex-command`.
- Added:
  - `command/src/lib.rs`
  - `command/src/bash.rs`
  - `command/src/powershell.rs`
  - `command/src/parse_command.rs`
  - `command/src/command_safety/*`
  - `command/src/shell_detect.rs`
  - `command/BUILD.bazel`

### Code moved out of `codex-core`

- Moved modules from `core/src` into `command/src`:
  - `bash.rs`
  - `powershell.rs`
  - `parse_command.rs`
  - `command_safety/*`

### Dependency graph updates

- Added workspace member/dependency entries for `codex-command` in
`codex-rs/Cargo.toml`.
- Added `codex-command` dependency to `codex-rs/core/Cargo.toml`.
- Removed `tree-sitter` and `tree-sitter-bash` from `codex-core` direct
deps (now owned by `codex-command`).

### API compatibility for callers

To avoid immediate downstream churn, `codex-core` now re-exports the
moved modules/functions:

- `codex_command::bash`
- `codex_command::powershell`
- `codex_command::parse_command`
- `codex_command::is_safe_command`
- `codex_command::is_dangerous_command`

This keeps existing `codex_core::...` paths working while enabling
gradual migration to direct `codex-command` usage.

### Internal decoupling detail

- Added `command::shell_detect` so moved `bash`/`powershell` logic no
longer depends on core shell internals.
- Adjusted PowerShell helper visibility in `codex-command` for existing
core test usage (`UTF8` prefix helper + executable discovery functions).

## Validation

- `just fmt`
- `just fix -p codex-command -p codex-core`
- `cargo test -p codex-command` (`134` passed)
- `cargo test -p codex-core --no-run`
- `cargo test -p codex-core shell_command_handler`

## Notes / follow-up

This commit intentionally prioritizes boundary extraction and
compatibility. A follow-up can migrate downstream crates to depend
directly on `codex-command` (instead of through `codex-core` re-exports)
to realize additional incremental build wins.
											
										
										
											2026-02-10 14:43:16 -08:00
+								use crate::shell_detect::ShellType;
 								use crate::shell_detect::detect_shell_type;
-												Revert "Revert "Overhaul shell detection and centralize command generation for unified exec"" (#6607)

Reverts openai/codex#6606
											
										
										
											2025-11-13 16:45:17 -08:00
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
 								/// success or None if parsing failed.
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								    let lang = BASH.into();
 								    let mut parser = Parser::new();
 								    #[expect(clippy::expect_used)]
 								    parser.set_language(&lang).expect("load bash grammar");
 								    let old_tree: Option<&Tree> = None;
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								    parser.parse(shell_lc_arg, old_tree)
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								}
 								/// Parse a script which may contain multiple simple commands joined only by
 								/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
 								///
 								/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
 								/// command and the parse tree does not contain disallowed constructs
 								/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
 								/// returns `None`.
 								pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
 								    if tree.root_node().has_error() {
 								        return None;
 								    }
 								    // List of allowed (named) node kinds for a "word only commands sequence".
 								    // If we encounter a named node that is not in this list we reject.
 								    const ALLOWED_KINDS: &[&str] = &[
 								        // top level containers
 								        "program",
 								        "list",
 								        "pipeline",
 								        // commands & words
 								        "command",
 								        "command_name",
 								        "word",
 								        "string",
 								        "string_content",
 								        "raw_string",
 								        "number",
-												Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
											
										
										
											2025-12-22 22:59:27 +03:00
+								        "concatenation",
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								    ];
 								    // Allow only safe punctuation / operator tokens; anything else causes reject.
 								    const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
 								    let root = tree.root_node();
 								    let mut cursor = root.walk();
 								    let mut stack = vec![root];
 								    let mut command_nodes = Vec::new();
 								    while let Some(node) = stack.pop() {
 								        let kind = node.kind();
 								        if node.is_named() {
 								            if !ALLOWED_KINDS.contains(&kind) {
 								                return None;
 								            }
 								            if kind == "command" {
 								                command_nodes.push(node);
 								            }
 								        } else {
 								            // Reject any punctuation / operator tokens that are not explicitly allowed.
 								            if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
 								                return None;
 								            }
 								            if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
 								                // If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
 								                // Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
 								                return None;
 								            }
 								        }
 								        for child in node.children(&mut cursor) {
 								            stack.push(child);
 								        }
 								    }
-												fix: update try_parse_word_only_commands_sequence() to return commands in order (#3881)

Incidentally, we had a test for this in
`accepts_multiple_commands_with_allowed_operators()`, but it was
verifying the bad behavior. Oops!
											
										
										
											2025-09-18 16:07:38 -07:00
+								    // Walk uses a stack (LIFO), so re-sort by position to restore source order.
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								    command_nodes.sort_by_key(Node::start_byte);
-												fix: update try_parse_word_only_commands_sequence() to return commands in order (#3881)

Incidentally, we had a test for this in
`accepts_multiple_commands_with_allowed_operators()`, but it was
verifying the bad behavior. Oops!
											
										
										
											2025-09-18 16:07:38 -07:00
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								    let mut commands = Vec::new();
 								    for node in command_nodes {
 								        if let Some(words) = parse_plain_command_from_node(node, src) {
 								            commands.push(words);
 								        } else {
 								            return None;
 								        }
 								    }
 								    Some(commands)
 								}
-												Colocate more of bash parsing (#6489)

Move a few callsites that were detecting `bash -lc` into a shared
helper.
											
										
										
											2025-11-10 18:38:36 -08:00
+								pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								    let [shell, flag, script] = command else {
-												core: add potentially dangerous command check (#4211)

Certain shell commands are potentially dangerous, and we want to check
for them.
Unless the user has explicitly approved a command, we will *always* ask
them for approval
when one of these commands is encountered, regardless of whether they
are in a sandbox, or what their approval policy is.

The first (of probably many) such examples is `git reset --hard`. We
will be conservative and check for any `git reset`
											
										
										
											2025-09-25 19:46:20 -07:00
+								        return None;
 								    };
-												Revert "Revert "Overhaul shell detection and centralize command generation for unified exec"" (#6607)

Reverts openai/codex#6606
											
										
										
											2025-11-13 16:45:17 -08:00
+								    if !matches!(flag.as_str(), "-lc" | "-c")
 								        || !matches!(
 								            detect_shell_type(&PathBuf::from(shell)),
-												Always fallback to real shell (#6953)

Either cmd.exe or `/bin/sh`.
											
										
										
											2025-11-20 10:58:46 -08:00
+								            Some(ShellType::Zsh) | Some(ShellType::Bash) | Some(ShellType::Sh)
-												Revert "Revert "Overhaul shell detection and centralize command generation for unified exec"" (#6607)

Reverts openai/codex#6606
											
										
										
											2025-11-13 16:45:17 -08:00
+								        )
 								    {
-												core: add potentially dangerous command check (#4211)

Certain shell commands are potentially dangerous, and we want to check
for them.
Unless the user has explicitly approved a command, we will *always* ask
them for approval
when one of these commands is encountered, regardless of whether they
are in a sandbox, or what their approval policy is.

The first (of probably many) such examples is `git reset --hard`. We
will be conservative and check for any `git reset`
											
										
										
											2025-09-25 19:46:20 -07:00
+								        return None;
 								    }
-												Colocate more of bash parsing (#6489)

Move a few callsites that were detecting `bash -lc` into a shared
helper.
											
										
										
											2025-11-10 18:38:36 -08:00
+								    Some((shell, script))
 								}
 								/// Returns the sequence of plain commands within a `bash -lc "..."` or
 								/// `zsh -lc "..."` invocation when the script only contains word-only commands
 								/// joined by safe operators.
 								pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
 								    let (_, script) = extract_bash_command(command)?;
-												core: add potentially dangerous command check (#4211)

Certain shell commands are potentially dangerous, and we want to check
for them.
Unless the user has explicitly approved a command, we will *always* ask
them for approval
when one of these commands is encountered, regardless of whether they
are in a sandbox, or what their approval policy is.

The first (of probably many) such examples is `git reset --hard`. We
will be conservative and check for any `git reset`
											
										
										
											2025-09-25 19:46:20 -07:00
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								    let tree = try_parse_shell(script)?;
-												core: add potentially dangerous command check (#4211)

Certain shell commands are potentially dangerous, and we want to check
for them.
Unless the user has explicitly approved a command, we will *always* ask
them for approval
when one of these commands is encountered, regardless of whether they
are in a sandbox, or what their approval policy is.

The first (of probably many) such examples is `git reset --hard`. We
will be conservative and check for any `git reset`
											
										
										
											2025-09-25 19:46:20 -07:00
+								    try_parse_word_only_commands_sequence(&tree, script)
 								}
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
+								/// Returns the parsed argv for a single shell command in a here-doc style
 								/// script (`<<`), as long as the script contains exactly one command node.
 								pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
 								    let (_, script) = extract_bash_command(command)?;
 								    let tree = try_parse_shell(script)?;
 								    let root = tree.root_node();
 								    if root.has_error() {
 								        return None;
 								    }
-												fix(core) exec policy parsing 3 (#12485)

## Summary
Quick fix
											
										
										
											2026-02-21 22:26:13 -08:00
+								    if !has_named_descendant_kind(root, "heredoc_redirect") {
 								        return None;
 								    }
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
 								    let command_node = find_single_command_node(root)?;
 								    parse_heredoc_command_words(command_node, script)
 								}
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
 								    if cmd.kind() != "command" {
 								        return None;
 								    }
 								    let mut words = Vec::new();
 								    let mut cursor = cmd.walk();
 								    for child in cmd.named_children(&mut cursor) {
 								        match child.kind() {
 								            "command_name" => {
 								                let word_node = child.named_child(0)?;
 								                if word_node.kind() != "word" {
 								                    return None;
 								                }
 								                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
 								            }
 								            "word" | "number" => {
 								                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
 								            }
 								            "string" => {
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								                let parsed = parse_double_quoted_string(child, src)?;
 								                words.push(parsed);
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								            }
 								            "raw_string" => {
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								                let parsed = parse_raw_string(child, src)?;
 								                words.push(parsed);
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								            }
-												Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
											
										
										
											2025-12-22 22:59:27 +03:00
+								            "concatenation" => {
 								                // Handle concatenated arguments like -g"*.py"
 								                let mut concatenated = String::new();
 								                let mut concat_cursor = child.walk();
 								                for part in child.named_children(&mut concat_cursor) {
 								                    match part.kind() {
 								                        "word" | "number" => {
 								                            concatenated
 								                                .push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
 								                        }
 								                        "string" => {
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								                            let parsed = parse_double_quoted_string(part, src)?;
 								                            concatenated.push_str(&parsed);
-												Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
											
										
										
											2025-12-22 22:59:27 +03:00
+								                        }
 								                        "raw_string" => {
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								                            let parsed = parse_raw_string(part, src)?;
 								                            concatenated.push_str(&parsed);
-												Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
											
										
										
											2025-12-22 22:59:27 +03:00
+								                        }
 								                        _ => return None,
 								                    }
 								                }
 								                if concatenated.is_empty() {
 								                    return None;
 								                }
 								                words.push(concatenated);
 								            }
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								            _ => return None,
 								        }
 								    }
 								    Some(words)
 								}
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
+								fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
 								    if cmd.kind() != "command" {
 								        return None;
 								    }
 								    let mut words = Vec::new();
 								    let mut cursor = cmd.walk();
 								    for child in cmd.named_children(&mut cursor) {
 								        match child.kind() {
 								            "command_name" => {
 								                let word_node = child.named_child(0)?;
 								                if !matches!(word_node.kind(), "word" | "number")
 								                    || !is_literal_word_or_number(word_node)
 								                {
 								                    return None;
 								                }
 								                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
 								            }
 								            "word" | "number" => {
 								                if !is_literal_word_or_number(child) {
 								                    return None;
 								                }
 								                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
 								            }
 								            // Allow shell constructs that attach IO to a single command without
 								            // changing argv matching semantics for the executable prefix.
 								            "variable_assignment" | "comment" => {}
 								            kind if is_allowed_heredoc_attachment_kind(kind) => {}
 								            _ => return None,
 								        }
 								    }
 								    if words.is_empty() { None } else { Some(words) }
 								}
 								fn is_literal_word_or_number(node: Node<'_>) -> bool {
 								    if !matches!(node.kind(), "word" | "number") {
 								        return false;
 								    }
 								    let mut cursor = node.walk();
 								    node.named_children(&mut cursor).next().is_none()
 								}
 								fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
 								    matches!(
 								        kind,
 								        "heredoc_body"
 								            | "simple_heredoc_body"
 								            | "heredoc_redirect"
 								            | "herestring_redirect"
 								            | "file_redirect"
 								            | "redirected_statement"
 								    )
 								}
 								fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
 								    let mut stack = vec![root];
 								    let mut single_command = None;
 								    while let Some(node) = stack.pop() {
 								        if node.kind() == "command" {
 								            if single_command.is_some() {
 								                return None;
 								            }
 								            single_command = Some(node);
 								        }
 								        let mut cursor = node.walk();
 								        for child in node.named_children(&mut cursor) {
 								            stack.push(child);
 								        }
 								    }
 								    single_command
 								}
-												fix(core) exec policy parsing 3 (#12485)

## Summary
Quick fix
											
										
										
											2026-02-21 22:26:13 -08:00
+								fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
 								    let mut stack = vec![node];
 								    while let Some(current) = stack.pop() {
 								        if current.kind() == kind {
 								            return true;
 								        }
 								        let mut cursor = current.walk();
 								        for child in current.named_children(&mut cursor) {
 								            stack.push(child);
 								        }
 								    }
 								    false
 								}
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
 								    if node.kind() != "string" {
 								        return None;
 								    }
-												chore: use some raw strings to reduce quoting (#9745)

Small follow-ups for https://github.com/openai/codex/pull/9565. Mainly
`r#`, but also added some whitespace for early returns.
											
										
										
											2026-01-22 22:38:10 -08:00
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								    let mut cursor = node.walk();
 								    for part in node.named_children(&mut cursor) {
 								        if part.kind() != "string_content" {
 								            return None;
 								        }
 								    }
 								    let raw = node.utf8_text(src.as_bytes()).ok()?;
 								    let stripped = raw
 								        .strip_prefix('"')
 								        .and_then(|text| text.strip_suffix('"'))?;
 								    Some(stripped.to_string())
 								}
 								fn parse_raw_string(node: Node, src: &str) -> Option<String> {
 								    if node.kind() != "raw_string" {
 								        return None;
 								    }
-												chore: use some raw strings to reduce quoting (#9745)

Small follow-ups for https://github.com/openai/codex/pull/9565. Mainly
`r#`, but also added some whitespace for early returns.
											
										
										
											2026-01-22 22:38:10 -08:00
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								    let raw_string = node.utf8_text(src.as_bytes()).ok()?;
 								    let stripped = raw_string
 								        .strip_prefix('\'')
 								        .and_then(|s| s.strip_suffix('\''));
 								    stripped.map(str::to_owned)
 								}
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								#[cfg(test)]
 								mod tests {
 								    use super::*;
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								    use pretty_assertions::assert_eq;
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
 								    fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								        let tree = try_parse_shell(src)?;
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								        try_parse_word_only_commands_sequence(&tree, src)
 								    }
 								    #[test]
 								    fn accepts_single_simple_command() {
 								        let cmds = parse_seq("ls -1").unwrap();
 								        assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
 								    }
 								    #[test]
 								    fn accepts_multiple_commands_with_allowed_operators() {
 								        let src = "ls && pwd; echo 'hi there' | wc -l";
 								        let cmds = parse_seq(src).unwrap();
 								        let expected: Vec<Vec<String>> = vec![
 								            vec!["ls".to_string()],
-												fix: update try_parse_word_only_commands_sequence() to return commands in order (#3881)

Incidentally, we had a test for this in
`accepts_multiple_commands_with_allowed_operators()`, but it was
verifying the bad behavior. Oops!
											
										
										
											2025-09-18 16:07:38 -07:00
+								            vec!["pwd".to_string()],
 								            vec!["echo".to_string(), "hi there".to_string()],
 								            vec!["wc".to_string(), "-l".to_string()],
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								        ];
 								        assert_eq!(cmds, expected);
 								    }
 								    #[test]
 								    fn extracts_double_and_single_quoted_strings() {
 								        let cmds = parse_seq("echo \"hello world\"").unwrap();
 								        assert_eq!(
 								            cmds,
 								            vec![vec!["echo".to_string(), "hello world".to_string()]]
 								        );
 								        let cmds2 = parse_seq("echo 'hi there'").unwrap();
 								        assert_eq!(
 								            cmds2,
 								            vec![vec!["echo".to_string(), "hi there".to_string()]]
 								        );
 								    }
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								    #[test]
 								    fn accepts_double_quoted_strings_with_newlines() {
 								        let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
 								        assert_eq!(
 								            cmds,
 								            vec![vec![
 								                "git".to_string(),
 								                "commit".to_string(),
 								                "-m".to_string(),
 								                "line1\nline2".to_string(),
 								            ]]
 								        );
 								    }
 								    #[test]
 								    fn accepts_mixed_quote_concatenation() {
 								        assert_eq!(
-												chore: use some raw strings to reduce quoting (#9745)

Small follow-ups for https://github.com/openai/codex/pull/9565. Mainly
`r#`, but also added some whitespace for early returns.
											
										
										
											2026-01-22 22:38:10 -08:00
+								            parse_seq(r#"echo "/usr"'/'"local"/bin"#).unwrap(),
 								            vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
 								        );
 								        assert_eq!(
 								            parse_seq(r#"echo '/usr'"/"'local'/bin"#).unwrap(),
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								            vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
 								        );
 								    }
 								    #[test]
 								    fn rejects_double_quoted_strings_with_expansions() {
-												chore: use some raw strings to reduce quoting (#9745)

Small follow-ups for https://github.com/openai/codex/pull/9565. Mainly
`r#`, but also added some whitespace for early returns.
											
										
										
											2026-01-22 22:38:10 -08:00
+								        assert!(parse_seq(r#"echo "hi ${USER}""#).is_none());
 								        assert!(parse_seq(r#"echo "$HOME""#).is_none());
-												Fix execpolicy parsing for multiline quoted args (#9565)

## What
Fix bash command parsing to accept double-quoted strings that contain
literal newlines so execpolicy can match allow rules.

## Why
Allow rules like [git, commit] should still match when commit messages
include a newline in a quoted argument; the parser currently rejects
these strings and falls back to the outer shell invocation.

## How
- Validate double-quoted strings by ensuring all named children are
string_content and then stripping the outer quotes from the raw node
text so embedded newlines are preserved.
- Reuse the helper for concatenated arguments.
- Ensure large SI suffix formatting uses the caller-provided locale
formatter for grouping.
- Add coverage for newline-containing quoted arguments.

Fixes #9541.

## Tests
- cargo test -p codex-core
- just fix -p codex-core
- cargo test -p codex-protocol
- just fix -p codex-protocol
- cargo test --all-features
											
										
										
											2026-01-23 01:16:53 -05:00
+								    }
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								    #[test]
 								    fn accepts_numbers_as_words() {
 								        let cmds = parse_seq("echo 123 456").unwrap();
 								        assert_eq!(
 								            cmds,
 								            vec![vec![
 								                "echo".to_string(),
 								                "123".to_string(),
 								                "456".to_string()
 								            ]]
 								        );
 								    }
 								    #[test]
 								    fn rejects_parentheses_and_subshells() {
 								        assert!(parse_seq("(ls)").is_none());
 								        assert!(parse_seq("ls || (pwd && echo hi)").is_none());
 								    }
 								    #[test]
 								    fn rejects_redirections_and_unsupported_operators() {
 								        assert!(parse_seq("ls > out.txt").is_none());
 								        assert!(parse_seq("echo hi & echo bye").is_none());
 								    }
 								    #[test]
 								    fn rejects_command_and_process_substitutions_and_expansions() {
 								        assert!(parse_seq("echo $(pwd)").is_none());
 								        assert!(parse_seq("echo `pwd`").is_none());
 								        assert!(parse_seq("echo $HOME").is_none());
 								        assert!(parse_seq("echo \"hi $USER\"").is_none());
 								    }
 								    #[test]
 								    fn rejects_variable_assignment_prefix() {
 								        assert!(parse_seq("FOO=bar ls").is_none());
 								    }
 								    #[test]
 								    fn rejects_trailing_operator_parse_error() {
 								        assert!(parse_seq("ls &&").is_none());
 								    }
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
-												fix(exec-policy) No empty command lists (#11397)

## Summary
This should rarely, if ever, happen in practice. But regardless, we
should never provide an empty list of `commands` to ExecPolicy. This PR
is almost entirely adding test around these cases.

## Testing
- [x] Adds a bunch of unit tests for this
											
										
										
											2026-02-10 19:22:23 -08:00
+								    #[test]
 								    fn rejects_empty_command_position_with_leading_operator() {
 								        assert!(parse_seq("&& ls").is_none());
 								    }
 								    #[test]
 								    fn rejects_empty_command_position_with_double_separator() {
 								        assert!(parse_seq("ls ;; pwd").is_none());
 								    }
 								    #[test]
 								    fn rejects_empty_command_position_with_empty_pipeline_segment() {
 								        assert!(parse_seq("ls | | wc").is_none());
 								    }
-												Treat `zsh -lc` like `bash -lc` (#5411)

Without proper `zsh -lc` parsing, we lose some things like proper
command parsing, turn diff tracking, safe command checks, and other
things we expect from raw or `bash -lc` commands.
											
										
										
											2025-10-20 15:52:25 -07:00
+								    #[test]
 								    fn parse_zsh_lc_plain_commands() {
 								        let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
 								        let parsed = parse_shell_lc_plain_commands(&command).unwrap();
 								        assert_eq!(parsed, vec![vec!["ls".to_string()]]);
 								    }
-												Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
											
										
										
											2025-12-22 22:59:27 +03:00
 								    #[test]
 								    fn accepts_concatenated_flag_and_value() {
 								        // Test case: -g"*.py" (flag directly concatenated with quoted value)
 								        let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
 								        assert_eq!(
 								            cmds,
 								            vec![vec![
 								                "rg".to_string(),
 								                "-n".to_string(),
 								                "foo".to_string(),
 								                "-g*.py".to_string(),
 								            ]]
 								        );
 								    }
 								    #[test]
 								    fn accepts_concatenated_flag_with_single_quotes() {
 								        let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
 								        assert_eq!(
 								            cmds,
 								            vec![vec![
 								                "grep".to_string(),
 								                "-n".to_string(),
 								                "pattern".to_string(),
 								                "-g*.txt".to_string(),
 								            ]]
 								        );
 								    }
 								    #[test]
 								    fn rejects_concatenation_with_variable_substitution() {
 								        // Environment variables in concatenated strings should be rejected
 								        assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
 								        assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
 								    }
 								    #[test]
 								    fn rejects_concatenation_with_command_substitution() {
 								        // Command substitution in concatenated strings should be rejected
 								        assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
 								        assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
 								    }
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_supports_heredoc() {
 								        let command = vec![
 								            "zsh".to_string(),
 								            "-lc".to_string(),
 								            "python3 <<'PY'\nprint('hello')\nPY".to_string(),
 								        ];
 								        let parsed = parse_shell_lc_single_command_prefix(&command);
 								        assert_eq!(parsed, Some(vec!["python3".to_string()]));
 								        let command_unquoted = vec![
 								            "zsh".to_string(),
 								            "-lc".to_string(),
 								            "python3 << PY\nprint('hello')\nPY".to_string(),
 								        ];
 								        let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
 								        assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
 								    }
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            "python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
 								        ];
 								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
 								    }
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            "echo hello > /tmp/out.txt".to_string(),
 								        ];
-												fix(core) exec policy parsing 3 (#12485)

## Summary
Quick fix
											
										
										
											2026-02-21 22:26:13 -08:00
+								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
+								    }
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            "python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
 								        ];
 								        assert_eq!(
 								            parse_shell_lc_single_command_prefix(&command),
 								            Some(vec!["python3".to_string()])
 								        );
 								    }
 								    #[test]
-												fix(core) exec_policy parsing fixes (#11951)

## Summary
Fixes a few things in our exec_policy handling of prefix_rules:
1. Correctly match redirects specifically for exec_policy parsing. i.e.
if you have `prefix_rule(["echo"], decision="allow")` then `echo hello >
output.txt` should match - this should fix #10321
2. If there already exists any rule that would match our prefix rule
(not just a prompt), then drop it, since it won't do anything.


## Testing
- [x] Updated unit tests, added approvals ScenarioSpecs
											
										
										
											2026-02-16 23:11:59 -08:00
+								    fn parse_shell_lc_single_command_prefix_rejects_herestring_with_chaining() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            r#"echo hello > /tmp/out.txt && cat /tmp/out.txt"#.to_string(),
 								        ];
 								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
 								    }
 								    #[test]
-												fix(core): canonicalize wrapper approvals and support heredoc prefix … (#10941)

## Summary
- Reduced repeated approvals for equivalent wrapper commands and fixed
execpolicy matching for heredoc-style shell invocations, with minimal
behavior change and fail-closed defaults.

## Fixes
1. Canonicalized approval matching for wrappers so equivalent commands
map to the same approval intent.
2. Added heredoc-aware prefix extraction for execpolicy so commands like
`python3 <<'PY' ... PY` match rules such as `prefix_rule(["python3"],
...)`.
3. Kept fallback behavior conservative: if parsing is ambiguous,
existing prompt behavior is preserved.

## Edge Cases Covered
- Wrapper path/name differences: `/bin/bash` vs `bash`, `/bin/zsh` vs
`zsh`.
- Shell modes: `-c` and `-lc`.
- Heredoc forms: quoted delimiter (`<<'PY'`) and unquoted delimiter (`<<
PY`).
- Multi-command heredoc scripts are rejected by the fallback
- Non-heredoc redirections (`>`, etc.) are not treated as heredoc prefix
matches.
- Complex scripts still fall back to prior behavior rather than
expanding permissions.

---------

Co-authored-by: Dylan Hurd <dylan.hurd@openai.com>
											
										
										
											2026-02-10 11:46:40 -08:00
+								    fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            r#"python3 <<< "$(rm -rf /)""#.to_string(),
 								        ];
 								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
 								    }
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            "echo $((1<<2))".to_string(),
 								        ];
 								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
 								    }
 								    #[test]
 								    fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
 								        let command = vec![
 								            "bash".to_string(),
 								            "-lc".to_string(),
 								            "python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
 								        ];
 								        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
 								    }
-												feat: expand the set of commands that can be safely identified as "trusted" (#1668)

This PR updates `is_known_safe_command()` to account for "safe
operators" to expand the set of commands that can be run without
approval. This concept existed in the TypeScript CLI, and we are
[finally!] porting it to the Rust one:


https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-cli/src/approvals.ts#L531-L541

The idea is that if we have `EXPR1 SAFE_OP EXPR2` and `EXPR1` and
`EXPR2` are considered safe independently, then `EXPR1 SAFE_OP EXPR2`
should be considered safe. Currently, `SAFE_OP` includes `&&`, `||`,
`;`, and `|`.

In the TypeScript implementation, we relied on
https://www.npmjs.com/package/shell-quote to parse the string of Bash,
as it could provide a "lightweight" parse tree, parsing `'beep || boop >
/byte'` as:

```
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
```

Though in this PR, we introduce the use of
https://crates.io/crates/tree-sitter-bash for parsing (which
incidentally we were already using in
[`codex-apply-patch`](https://github.com/openai/codex/blob/c9e2def49487585cfe6f8bb7b2be442e8c0b5e1b/codex-rs/apply-patch/Cargo.toml#L18)),
which gives us a richer parse tree. (Incidentally, if you have never
played with tree-sitter, try the
[playground](https://tree-sitter.github.io/tree-sitter/7-playground.html)
and select **Bash** from the dropdown to see how it parses various
expressions.)

As a concrete example, prior to this change, our implementation of
`is_known_safe_command()` could verify things like:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
```

but not:

```
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
```

With this change, the version with `|| true` is also accepted.

Admittedly, this PR does not expand the safety check to support
subshells, so it would reject, e.g. `bash -lc 'ls || (pwd && echo hi)'`,
but that can be addressed in a subsequent PR.
											
										
										
											2025-07-24 14:13:30 -07:00
+								}