fix: parse git apply paths correctly (#8824)
Fixes apply.rs path parsing so - quoted diff headers are tokenized and extracted correctly, - /dev/null headers are ignored before prefix stripping to avoid bogus dev/null paths, and - git apply output paths are unescaped from C-style quoting. **Why** This prevents potentially missed staging and misclassified paths when applying or reverting patches, which could lead to incorrect behavior for repos with spaces or escaped characters in filenames. **Impact** I checked and this is only used in the cloud tasks support and `codex apply <task_id>` flow.
This commit is contained in:
parent
8372d61be7
commit
a59052341d
1 changed files with 150 additions and 18 deletions
|
|
@ -192,28 +192,130 @@ fn render_command_for_log(cwd: &Path, git_cfg: &[String], args: &[String]) -> St
|
|||
|
||||
/// Collect every path referenced by the diff headers inside `diff --git` sections.
|
||||
pub fn extract_paths_from_patch(diff_text: &str) -> Vec<String> {
|
||||
static RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"(?m)^diff --git a/(.*?) b/(.*)$")
|
||||
.unwrap_or_else(|e| panic!("invalid regex: {e}"))
|
||||
});
|
||||
let mut set = std::collections::BTreeSet::new();
|
||||
for caps in RE.captures_iter(diff_text) {
|
||||
if let Some(a) = caps.get(1).map(|m| m.as_str())
|
||||
&& a != "/dev/null"
|
||||
&& !a.trim().is_empty()
|
||||
{
|
||||
set.insert(a.to_string());
|
||||
for raw_line in diff_text.lines() {
|
||||
let line = raw_line.trim();
|
||||
let Some(rest) = line.strip_prefix("diff --git ") else {
|
||||
continue;
|
||||
};
|
||||
let Some((a, b)) = parse_diff_git_paths(rest) else {
|
||||
continue;
|
||||
};
|
||||
if let Some(a) = normalize_diff_path(&a, "a/") {
|
||||
set.insert(a);
|
||||
}
|
||||
if let Some(b) = caps.get(2).map(|m| m.as_str())
|
||||
&& b != "/dev/null"
|
||||
&& !b.trim().is_empty()
|
||||
{
|
||||
set.insert(b.to_string());
|
||||
if let Some(b) = normalize_diff_path(&b, "b/") {
|
||||
set.insert(b);
|
||||
}
|
||||
}
|
||||
set.into_iter().collect()
|
||||
}
|
||||
|
||||
fn parse_diff_git_paths(line: &str) -> Option<(String, String)> {
|
||||
let mut chars = line.chars().peekable();
|
||||
let first = read_diff_git_token(&mut chars)?;
|
||||
let second = read_diff_git_token(&mut chars)?;
|
||||
Some((first, second))
|
||||
}
|
||||
|
||||
fn read_diff_git_token(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> Option<String> {
|
||||
while matches!(chars.peek(), Some(c) if c.is_whitespace()) {
|
||||
chars.next();
|
||||
}
|
||||
let quote = match chars.peek().copied() {
|
||||
Some('"') | Some('\'') => chars.next(),
|
||||
_ => None,
|
||||
};
|
||||
let mut out = String::new();
|
||||
while let Some(c) = chars.next() {
|
||||
if let Some(q) = quote {
|
||||
if c == q {
|
||||
break;
|
||||
}
|
||||
if c == '\\' {
|
||||
out.push('\\');
|
||||
if let Some(next) = chars.next() {
|
||||
out.push(next);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else if c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
out.push(c);
|
||||
}
|
||||
if out.is_empty() && quote.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(match quote {
|
||||
Some(_) => unescape_c_string(&out),
|
||||
None => out,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_diff_path(raw: &str, prefix: &str) -> Option<String> {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed == "/dev/null" || trimmed == format!("{prefix}dev/null") {
|
||||
return None;
|
||||
}
|
||||
let trimmed = trimmed.strip_prefix(prefix).unwrap_or(trimmed);
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
|
||||
fn unescape_c_string(input: &str) -> String {
|
||||
let mut out = String::with_capacity(input.len());
|
||||
let mut chars = input.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if c != '\\' {
|
||||
out.push(c);
|
||||
continue;
|
||||
}
|
||||
let Some(next) = chars.next() else {
|
||||
out.push('\\');
|
||||
break;
|
||||
};
|
||||
match next {
|
||||
'n' => out.push('\n'),
|
||||
'r' => out.push('\r'),
|
||||
't' => out.push('\t'),
|
||||
'b' => out.push('\u{0008}'),
|
||||
'f' => out.push('\u{000C}'),
|
||||
'a' => out.push('\u{0007}'),
|
||||
'v' => out.push('\u{000B}'),
|
||||
'\\' => out.push('\\'),
|
||||
'"' => out.push('"'),
|
||||
'\'' => out.push('\''),
|
||||
'0'..='7' => {
|
||||
let mut value = next.to_digit(8).unwrap_or(0);
|
||||
for _ in 0..2 {
|
||||
match chars.peek() {
|
||||
Some('0'..='7') => {
|
||||
if let Some(digit) = chars.next() {
|
||||
value = value * 8 + digit.to_digit(8).unwrap_or(0);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
if let Some(ch) = std::char::from_u32(value) {
|
||||
out.push(ch);
|
||||
}
|
||||
}
|
||||
other => out.push(other),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Stage only the files that actually exist on disk for the given diff.
|
||||
pub fn stage_paths(git_root: &Path, diff: &str) -> io::Result<()> {
|
||||
let paths = extract_paths_from_patch(diff);
|
||||
|
|
@ -266,12 +368,12 @@ pub fn parse_git_apply_output(
|
|||
let first = trimmed.chars().next().unwrap_or('\0');
|
||||
let last = trimmed.chars().last().unwrap_or('\0');
|
||||
let unquoted = if (first == '"' || first == '\'') && last == first && trimmed.len() >= 2 {
|
||||
&trimmed[1..trimmed.len() - 1]
|
||||
unescape_c_string(&trimmed[1..trimmed.len() - 1])
|
||||
} else {
|
||||
trimmed
|
||||
trimmed.to_string()
|
||||
};
|
||||
if !unquoted.is_empty() {
|
||||
set.insert(unquoted.to_string());
|
||||
set.insert(unquoted);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -531,6 +633,36 @@ mod tests {
|
|||
.replace("\r\n", "\n")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_paths_handles_quoted_headers() {
|
||||
let diff = "diff --git \"a/hello world.txt\" \"b/hello world.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello world.txt\n@@ -0,0 +1 @@\n+hi\n";
|
||||
let paths = extract_paths_from_patch(diff);
|
||||
assert_eq!(paths, vec!["hello world.txt".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_paths_ignores_dev_null_header() {
|
||||
let diff = "diff --git a/dev/null b/ok.txt\nnew file mode 100644\n--- /dev/null\n+++ b/ok.txt\n@@ -0,0 +1 @@\n+hi\n";
|
||||
let paths = extract_paths_from_patch(diff);
|
||||
assert_eq!(paths, vec!["ok.txt".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_paths_unescapes_c_style_in_quoted_headers() {
|
||||
let diff = "diff --git \"a/hello\\tworld.txt\" \"b/hello\\tworld.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello\tworld.txt\n@@ -0,0 +1 @@\n+hi\n";
|
||||
let paths = extract_paths_from_patch(diff);
|
||||
assert_eq!(paths, vec!["hello\tworld.txt".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_output_unescapes_quoted_paths() {
|
||||
let stderr = "error: patch failed: \"hello\\tworld.txt\":1\n";
|
||||
let (applied, skipped, conflicted) = parse_git_apply_output("", stderr);
|
||||
assert_eq!(applied, Vec::<String>::new());
|
||||
assert_eq!(conflicted, Vec::<String>::new());
|
||||
assert_eq!(skipped, vec!["hello\tworld.txt".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_add_success() {
|
||||
let _g = env_lock().lock().unwrap();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue