Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands with concatenated flag-value patterns like `-g"*.py"` (no space between flag and quoted value). This caused policy rules like `prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo" -g"*.py"`. When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation" node containing a word (`-g`) and a string (`"*.py"`). The parser previously rejected any node type not in the ALLOWED_KINDS list, causing the entire command parsing to fail and fall back to matching against the wrapped `bash -lc` command instead of the inner command. This change: - Adds "concatenation" to ALLOWED_KINDS in try_parse_word_only_commands_sequence - Adds handling for concatenation nodes in parse_plain_command_from_node that recursively extracts and joins word/string/raw_string children - Adds test cases for concatenated flag patterns with double and single quotes Fixes #8394
2025-12-22 22:59:27 +03:00 · 2025-12-22 22:59:27 +03:00 · 0237459f71
commit 0237459f71
parent 314937fb11
1 changed files with 86 additions and 0 deletions
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@ -46,6 +46,7 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
        "string_content",
        "raw_string",
        "number",
+        "concatenation",
    ];
    // Allow only safe punctuation / operator tokens; anything else causes reject.
    const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
@ -158,6 +159,48 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
                    return None;
                }
            }
+            "concatenation" => {
+                // Handle concatenated arguments like -g"*.py"
+                let mut concatenated = String::new();
+                let mut concat_cursor = child.walk();
+                for part in child.named_children(&mut concat_cursor) {
+                    match part.kind() {
+                        "word" | "number" => {
+                            concatenated
+                                .push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
+                        }
+                        "string" => {
+                            if part.child_count() == 3
+                                && part.child(0)?.kind() == "\""
+                                && part.child(1)?.kind() == "string_content"
+                                && part.child(2)?.kind() == "\""
+                            {
+                                concatenated.push_str(
+                                    part.child(1)?
+                                        .utf8_text(src.as_bytes())
+                                        .ok()?
+                                        .to_owned()
+                                        .as_str(),
+                                );
+                            } else {
+                                return None;
+                            }
+                        }
+                        "raw_string" => {
+                            let raw_string = part.utf8_text(src.as_bytes()).ok()?;
+                            let stripped = raw_string
+                                .strip_prefix('\'')
+                                .and_then(|s| s.strip_suffix('\''))?;
+                            concatenated.push_str(stripped);
+                        }
+                        _ => return None,
+                    }
+                }
+                if concatenated.is_empty() {
+                    return None;
+                }
+                words.push(concatenated);
+            }
            _ => return None,
        }
    }
@ -256,4 +299,47 @@ mod tests {
        let parsed = parse_shell_lc_plain_commands(&command).unwrap();
        assert_eq!(parsed, vec![vec!["ls".to_string()]]);
    }
+
+    #[test]
+    fn accepts_concatenated_flag_and_value() {
+        // Test case: -g"*.py" (flag directly concatenated with quoted value)
+        let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
+        assert_eq!(
+            cmds,
+            vec![vec![
+                "rg".to_string(),
+                "-n".to_string(),
+                "foo".to_string(),
+                "-g*.py".to_string(),
+            ]]
+        );
+    }
+
+    #[test]
+    fn accepts_concatenated_flag_with_single_quotes() {
+        let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
+        assert_eq!(
+            cmds,
+            vec![vec![
+                "grep".to_string(),
+                "-n".to_string(),
+                "pattern".to_string(),
+                "-g*.txt".to_string(),
+            ]]
+        );
+    }
+
+    #[test]
+    fn rejects_concatenation_with_variable_substitution() {
+        // Environment variables in concatenated strings should be rejected
+        assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
+        assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
+    }
+
+    #[test]
+    fn rejects_concatenation_with_command_substitution() {
+        // Command substitution in concatenated strings should be rejected
+        assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
+        assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
+    }
 }