From 370ac26d8574bdd1925959783029b43b09a17247 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Sun, 4 Aug 2024 20:53:34 +0200 Subject: [PATCH] Handle ambiguous diff header, '--- ' can also be present in a minus hunk `diff -u file1 file2` output starts with '--- file1', a '-- 12' line being removed results in '--- 12', which was interpreted as the start of a new diff. Now the number of removed lines announced in the header as e.g. '@@ -1,4 +1,3 @@' is taken into account for this specific diff input. --- src/delta.rs | 63 ++++++++++++ src/handlers/diff_header.rs | 187 +++++++++++++++++++++++++++++++++++- src/handlers/hunk.rs | 2 + src/handlers/hunk_header.rs | 10 ++ 4 files changed, 261 insertions(+), 1 deletion(-) diff --git a/src/delta.rs b/src/delta.rs index 9890b9b24..948766f2b 100644 --- a/src/delta.rs +++ b/src/delta.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; use std::collections::HashMap; +use std::convert::TryInto; use std::io::{self, BufRead, IsTerminal, Write}; use bytelines::ByteLines; @@ -77,6 +78,61 @@ pub enum Source { Unknown, } +// The output of `diff -u file1 file2` does not contain a header before the +// `--- old.lua` / `+++ new.lua` block, and the hunk can of course contain lines +// starting with '-- '. To avoid interpreting '--- lua comment' as a new header, +// count the minus lines in a hunk (provided by the '@@ -1,4 +1,3 @@' header). +// `diff` itself can not generate two diffs in this ambiguous format, so a second header +// could just be ignored. But concatenated diffs exist and are accepted by `patch`. +#[derive(Debug)] +pub struct AmbiguousDiffMinusCounter(isize); + +impl AmbiguousDiffMinusCounter { + // The internal isize representation avoids calling `if let Some(..)` on every line. For + // nearly all input the counter is not needed, in this case it is decremented but ignored. + // [min, COUNTER_RELEVANT_IF_GT] unambiguous diff + // (COUNTER_RELEVANT_IF_GT, 0] handle next '--- ' like a header, and set counter in next @@ block + // [1, max] counting minus lines in ambiguous header + const COUNTER_RELEVANT_IF_GREATER_THAN: isize = -4096; // -1 works too, but be defensive + const EXPECT_DIFF_3DASH_HEADER: isize = 0; + pub fn not_needed() -> Self { + Self(Self::COUNTER_RELEVANT_IF_GREATER_THAN) + } + pub fn count_from(lines: usize) -> Self { + Self( + lines + .try_into() + .unwrap_or(Self::COUNTER_RELEVANT_IF_GREATER_THAN), + ) + } + pub fn prepare_to_count() -> Self { + Self(Self::EXPECT_DIFF_3DASH_HEADER) + } + pub fn three_dashes_expected(&self) -> bool { + if self.0 > Self::COUNTER_RELEVANT_IF_GREATER_THAN { + self.0 <= Self::EXPECT_DIFF_3DASH_HEADER + } else { + true + } + } + #[allow(clippy::needless_bool)] + pub fn must_count(&mut self) -> bool { + let relevant = self.0 > Self::COUNTER_RELEVANT_IF_GREATER_THAN; + if relevant { + true + } else { + #[cfg(target_pointer_width = "32")] + { + self.0 = Self::COUNTER_RELEVANT_IF_GREATER_THAN; + } + false + } + } + pub fn count_line(&mut self) { + self.0 -= 1; + } +} + // Possible transitions, with actions on entry: // // @@ -111,6 +167,7 @@ pub struct StateMachine<'a> { pub current_file_pair: Option<(String, String)>, pub handled_diff_header_header_line_file_pair: Option<(String, String)>, pub blame_key_colors: HashMap, + pub minus_line_counter: AmbiguousDiffMinusCounter, } pub fn delta(lines: ByteLines, writer: &mut dyn Write, config: &Config) -> std::io::Result<()> @@ -138,6 +195,7 @@ impl<'a> StateMachine<'a> { painter: Painter::new(writer, config), config, blame_key_colors: HashMap::new(), + minus_line_counter: AmbiguousDiffMinusCounter::not_needed(), } } @@ -150,6 +208,11 @@ impl<'a> StateMachine<'a> { if self.source == Source::Unknown { self.source = detect_source(&self.line); + // Handle (rare) plain `diff -u file1 file2` header. Done here to avoid having + // to introduce and handle a Source::DiffUnifiedAmbiguous variant everywhere. + if self.line.starts_with("--- ") { + self.minus_line_counter = AmbiguousDiffMinusCounter::prepare_to_count(); + } } // Every method named handle_* must return std::io::Result. diff --git a/src/handlers/diff_header.rs b/src/handlers/diff_header.rs index 46146110d..fed515251 100644 --- a/src/handlers/diff_header.rs +++ b/src/handlers/diff_header.rs @@ -73,7 +73,7 @@ impl<'a> StateMachine<'a> { #[inline] fn test_diff_header_minus_line(&self) -> bool { (matches!(self.state, State::DiffHeader(_)) || self.source == Source::DiffUnified) - && (self.line.starts_with("--- ") + && ((self.line.starts_with("--- ") && self.minus_line_counter.three_dashes_expected()) || self.line.starts_with("rename from ") || self.line.starts_with("copy from ")) } @@ -678,4 +678,189 @@ index 0000000..323fae0 "###) }); } + + pub const DIFF_AMBIGUOUS_HEADER_3X_MINUS: &str = r#"--- a.lua ++++ b.lua +@@ -1,5 +1,4 @@ + #!/usr/bin/env lua + + print("Hello") +--- World? + print("..") +"#; + pub const DIFF_AMBIGUOUS_HEADER_3X_MINUS_LAST_LINE: &str = r#"--- c.lua ++++ d.lua +@@ -1,4 +1,3 @@ + #!/usr/bin/env lua + + print("Hello") +--- World? +"#; + + pub const DIFF_AMBIGUOUS_HEADER_MULTIPLE_HUNKS: &str = r#"--- e.lua 2024-08-04 20:50:27.257726606 +0200 ++++ f.lua 2024-08-04 20:50:35.345795405 +0200 +@@ -3,3 +3,2 @@ + print("Hello") +--- World? + print("") +@@ -7,2 +6,3 @@ + print("") ++print("World") + print("") +@@ -10,2 +10 @@ + print("") +--- End +"#; + + #[test] + fn test_diff_header_ambiguous_3x_minus() { + // check ansi output to ensure output is highlighted + let result = DeltaTest::with_args(&[]) + .explain_ansi() + .with_input(DIFF_AMBIGUOUS_HEADER_3X_MINUS); + + assert_snapshot!(result.output, @r###" + (normal) + (blue)a.lua ⟶ b.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (203)#(231)!(203)/(231)usr(203)/(231)bin(203)/(231)env lua(normal) + + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + (81)print(231)((186)".."(231))(normal) + + "###); + } + + #[test] + fn test_diff_header_ambiguous_3x_minus_concatenated() { + let result = DeltaTest::with_args(&[]) + .explain_ansi() + .with_input(&format!( + "{}{}{}", + DIFF_AMBIGUOUS_HEADER_MULTIPLE_HUNKS, + DIFF_AMBIGUOUS_HEADER_3X_MINUS, + DIFF_AMBIGUOUS_HEADER_3X_MINUS_LAST_LINE + )); + + assert_snapshot!(result.output, @r###" + (normal) + (blue)e.lua ⟶ f.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)3(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + (81)print(231)((186)""(231))(normal) + + (blue)───(blue)┐(normal) + (blue)6(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (81)print(231)((186)""(231))(normal) + (81 22)print(231)((186)"World"(231))(normal) + (81)print(231)((186)""(231))(normal) + + (blue)────(blue)┐(normal) + (blue)10(normal): (blue)│(normal) + (blue)────(blue)┘(normal) + (81)print(231)((186)""(231))(normal) + (normal 52)-- End(normal) + + (blue)a.lua ⟶ b.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (203)#(231)!(203)/(231)usr(203)/(231)bin(203)/(231)env lua(normal) + + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + (81)print(231)((186)".."(231))(normal) + + (blue)c.lua ⟶ d.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (203)#(231)!(203)/(231)usr(203)/(231)bin(203)/(231)env lua(normal) + + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + "###); + } + + #[test] + fn test_diff_header_ambiguous_3x_minus_extra_and_concatenated() { + let result = DeltaTest::with_args(&[]) + .explain_ansi() + .with_input(&format!( + "extra 1\n\n{}\nextra 2\n{}\nextra 3\n{}", + DIFF_AMBIGUOUS_HEADER_MULTIPLE_HUNKS, + DIFF_AMBIGUOUS_HEADER_3X_MINUS, + DIFF_AMBIGUOUS_HEADER_3X_MINUS_LAST_LINE + )); + + assert_snapshot!(result.output, @r###" + (normal)extra 1 + + + (blue)e.lua ⟶ f.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)3(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + (81)print(231)((186)""(231))(normal) + + (blue)───(blue)┐(normal) + (blue)6(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (81)print(231)((186)""(231))(normal) + (81 22)print(231)((186)"World"(231))(normal) + (81)print(231)((186)""(231))(normal) + + (blue)────(blue)┐(normal) + (blue)10(normal): (blue)│(normal) + (blue)────(blue)┘(normal) + (81)print(231)((186)""(231))(normal) + (normal 52)-- End(normal) + + extra 2 + + (blue)a.lua ⟶ b.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (203)#(231)!(203)/(231)usr(203)/(231)bin(203)/(231)env lua(normal) + + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + (81)print(231)((186)".."(231))(normal) + + extra 3 + + (blue)c.lua ⟶ d.lua(normal) + (blue)───────────────────────────────────────────(normal) + + (blue)───(blue)┐(normal) + (blue)1(normal): (blue)│(normal) + (blue)───(blue)┘(normal) + (203)#(231)!(203)/(231)usr(203)/(231)bin(203)/(231)env lua(normal) + + (81)print(231)((186)"Hello"(231))(normal) + (normal 52)-- World?(normal) + "###); + } } diff --git a/src/handlers/hunk.rs b/src/handlers/hunk.rs index 30e87ebef..d485386e7 100644 --- a/src/handlers/hunk.rs +++ b/src/handlers/hunk.rs @@ -89,6 +89,7 @@ impl<'a> StateMachine<'a> { let line = prepare(&self.line, n_parents, self.config); let state = HunkMinus(diff_type, raw_line); self.painter.minus_lines.push((line, state.clone())); + self.minus_line_counter.count_line(); state } Some(HunkPlus(diff_type, raw_line)) => { @@ -111,6 +112,7 @@ impl<'a> StateMachine<'a> { let line = prepare(&self.line, n_parents, self.config); let state = State::HunkZero(diff_type, raw_line); self.painter.paint_zero_line(&line, state.clone()); + self.minus_line_counter.count_line(); state } _ => { diff --git a/src/handlers/hunk_header.rs b/src/handlers/hunk_header.rs index a34035b51..fd53377c0 100644 --- a/src/handlers/hunk_header.rs +++ b/src/handlers/hunk_header.rs @@ -72,6 +72,16 @@ impl<'a> StateMachine<'a> { | HunkPlus(diff_type, _) => diff_type.clone(), _ => Unified, }; + + if self.minus_line_counter.must_count() { + if let &[(_, minus_lines), (_, _plus_lines), ..] = + parsed_hunk_header.line_numbers_and_hunk_lengths.as_slice() + { + self.minus_line_counter = + delta::AmbiguousDiffMinusCounter::count_from(minus_lines); + } + } + self.state = HunkHeader( diff_type, parsed_hunk_header,