Handle concatenation nodes in bash command parser for exec policy (#8395)

The bash command parser in exec_policy was failing to parse commands
with concatenated flag-value patterns like `-g"*.py"` (no space between
flag and quoted value). This caused policy rules like
`prefix_rule(pattern=["rg"])` to not match commands such as `rg -n "foo"
-g"*.py"`.

When tree-sitter-bash parses `-g"*.py"`, it creates a "concatenation"
node containing a word (`-g`) and a string (`"*.py"`). The parser
previously rejected any node type not in the ALLOWED_KINDS list, causing
the entire command parsing to fail and fall back to matching against the
wrapped `bash -lc` command instead of the inner command.

This change:
- Adds "concatenation" to ALLOWED_KINDS in
try_parse_word_only_commands_sequence
- Adds handling for concatenation nodes in parse_plain_command_from_node
that recursively extracts and joins word/string/raw_string children
- Adds test cases for concatenated flag patterns with double and single
quotes

Fixes #8394
This commit is contained in:
Ivan Murashko 2025-12-22 22:59:27 +03:00 committed by GitHub
parent 314937fb11
commit 0237459f71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -46,6 +46,7 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
"string_content",
"raw_string",
"number",
"concatenation",
];
// Allow only safe punctuation / operator tokens; anything else causes reject.
const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
@ -158,6 +159,48 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
return None;
}
}
"concatenation" => {
// Handle concatenated arguments like -g"*.py"
let mut concatenated = String::new();
let mut concat_cursor = child.walk();
for part in child.named_children(&mut concat_cursor) {
match part.kind() {
"word" | "number" => {
concatenated
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
}
"string" => {
if part.child_count() == 3
&& part.child(0)?.kind() == "\""
&& part.child(1)?.kind() == "string_content"
&& part.child(2)?.kind() == "\""
{
concatenated.push_str(
part.child(1)?
.utf8_text(src.as_bytes())
.ok()?
.to_owned()
.as_str(),
);
} else {
return None;
}
}
"raw_string" => {
let raw_string = part.utf8_text(src.as_bytes()).ok()?;
let stripped = raw_string
.strip_prefix('\'')
.and_then(|s| s.strip_suffix('\''))?;
concatenated.push_str(stripped);
}
_ => return None,
}
}
if concatenated.is_empty() {
return None;
}
words.push(concatenated);
}
_ => return None,
}
}
@ -256,4 +299,47 @@ mod tests {
let parsed = parse_shell_lc_plain_commands(&command).unwrap();
assert_eq!(parsed, vec![vec!["ls".to_string()]]);
}
#[test]
fn accepts_concatenated_flag_and_value() {
// Test case: -g"*.py" (flag directly concatenated with quoted value)
let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
assert_eq!(
cmds,
vec![vec![
"rg".to_string(),
"-n".to_string(),
"foo".to_string(),
"-g*.py".to_string(),
]]
);
}
#[test]
fn accepts_concatenated_flag_with_single_quotes() {
let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
assert_eq!(
cmds,
vec![vec![
"grep".to_string(),
"-n".to_string(),
"pattern".to_string(),
"-g*.txt".to_string(),
]]
);
}
#[test]
fn rejects_concatenation_with_variable_substitution() {
// Environment variables in concatenated strings should be rejected
assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
}
#[test]
fn rejects_concatenation_with_command_substitution() {
// Command substitution in concatenated strings should be rejected
assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
}
}