diffy/diff/
mod.rs

1use crate::{
2    patch::{Hunk, HunkRange, Line, Patch},
3    range::{DiffRange, SliceLike},
4    utils::Classifier,
5};
6use std::{borrow::Cow, cmp, ops};
7
8mod cleanup;
9mod myers;
10
11#[cfg(test)]
12mod tests;
13
14// TODO determine if this should be exposed in the public API
15#[allow(dead_code)]
16#[derive(Debug, PartialEq, Eq)]
17enum Diff<'a, T: ?Sized> {
18    Equal(&'a T),
19    Delete(&'a T),
20    Insert(&'a T),
21}
22
23impl<T: ?Sized> Copy for Diff<'_, T> {}
24
25impl<T: ?Sized> Clone for Diff<'_, T> {
26    fn clone(&self) -> Self {
27        *self
28    }
29}
30
31impl<'a, T> From<DiffRange<'a, 'a, T>> for Diff<'a, T>
32where
33    T: ?Sized + SliceLike,
34{
35    fn from(diff: DiffRange<'a, 'a, T>) -> Self {
36        match diff {
37            DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()),
38            DiffRange::Delete(range) => Diff::Delete(range.as_slice()),
39            DiffRange::Insert(range) => Diff::Insert(range.as_slice()),
40        }
41    }
42}
43
44/// A collection of options for modifying the way a diff is performed
45#[derive(Debug)]
46pub struct DiffOptions {
47    compact: bool,
48    context_len: usize,
49    original_filename: Option<Cow<'static, str>>,
50    modified_filename: Option<Cow<'static, str>>,
51}
52
53impl DiffOptions {
54    /// Construct a new `DiffOptions` with default settings
55    ///
56    /// ## Defaults
57    /// * context_len = 3
58    pub fn new() -> Self {
59        Self {
60            compact: true,
61            context_len: 3,
62            original_filename: Some("original".into()),
63            modified_filename: Some("modified".into()),
64        }
65    }
66
67    /// Set the number of context lines that should be used when producing a patch
68    pub fn set_context_len(&mut self, context_len: usize) -> &mut Self {
69        self.context_len = context_len;
70        self
71    }
72
73    /// Enable/Disable diff compaction. Compaction is a post-processing step which attempts to
74    /// produce a prettier diff by reducing the number of edited blocks by shifting and merging
75    /// edit blocks.
76    // TODO determine if this should be exposed in the public API
77    #[allow(dead_code)]
78    fn set_compact(&mut self, compact: bool) -> &mut Self {
79        self.compact = compact;
80        self
81    }
82
83    /// Set the filename to be used in the patch for the original text
84    ///
85    /// If not set, the default value is "original".
86    pub fn set_original_filename<T>(&mut self, filename: T) -> &mut Self
87    where
88        T: Into<Cow<'static, str>>,
89    {
90        self.original_filename = Some(filename.into());
91        self
92    }
93
94    /// Set the filename to be used in the patch for the modified text
95    ///
96    /// If not set, the default value is "modified".
97    pub fn set_modified_filename<T>(&mut self, filename: T) -> &mut Self
98    where
99        T: Into<Cow<'static, str>>,
100    {
101        self.modified_filename = Some(filename.into());
102        self
103    }
104
105    // TODO determine if this should be exposed in the public API
106    #[allow(dead_code)]
107    fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
108        let solution = myers::diff(original.as_bytes(), modified.as_bytes());
109
110        let mut solution = solution
111            .into_iter()
112            .map(|diff_range| diff_range.to_str(original, modified))
113            .collect();
114
115        if self.compact {
116            cleanup::compact(&mut solution);
117        }
118
119        solution.into_iter().map(Diff::from).collect()
120    }
121
122    /// Produce a Patch between two texts based on the configured options
123    pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> {
124        let mut classifier = Classifier::default();
125        let (old_lines, old_ids) = classifier.classify_lines(original);
126        let (new_lines, new_ids) = classifier.classify_lines(modified);
127
128        let solution = self.diff_slice(&old_ids, &new_ids);
129
130        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
131        Patch::new(
132            self.original_filename.clone(),
133            self.modified_filename.clone(),
134            hunks,
135        )
136    }
137
138    /// Create a patch between two potentially non-utf8 texts
139    pub fn create_patch_bytes<'a>(
140        &self,
141        original: &'a [u8],
142        modified: &'a [u8],
143    ) -> Patch<'a, [u8]> {
144        let mut classifier = Classifier::default();
145        let (old_lines, old_ids) = classifier.classify_lines(original);
146        let (new_lines, new_ids) = classifier.classify_lines(modified);
147
148        let solution = self.diff_slice(&old_ids, &new_ids);
149
150        let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len);
151
152        // helper function to convert a utf8 cow to a bytes cow
153        fn cow_str_to_bytes(cow: Cow<'static, str>) -> Cow<'static, [u8]> {
154            match cow {
155                Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
156                Cow::Owned(o) => Cow::Owned(o.into_bytes()),
157            }
158        }
159
160        Patch::new(
161            self.original_filename.clone().map(cow_str_to_bytes),
162            self.modified_filename.clone().map(cow_str_to_bytes),
163            hunks,
164        )
165    }
166
167    pub(crate) fn diff_slice<'a, T: PartialEq>(
168        &self,
169        old: &'a [T],
170        new: &'a [T],
171    ) -> Vec<DiffRange<'a, 'a, [T]>> {
172        let mut solution = myers::diff(old, new);
173
174        if self.compact {
175            cleanup::compact(&mut solution);
176        }
177
178        solution
179    }
180}
181
182impl Default for DiffOptions {
183    fn default() -> Self {
184        Self::new()
185    }
186}
187
188// TODO determine if this should be exposed in the public API
189#[allow(dead_code)]
190fn diff<'a>(original: &'a str, modified: &'a str) -> Vec<Diff<'a, str>> {
191    DiffOptions::default().diff(original, modified)
192}
193
194/// Create a patch between two texts.
195///
196/// ```
197/// # use diffy::create_patch;
198/// let original = "\
199/// I am afraid, however, that all I have known - that my story - will be forgotten.
200/// I am afraid for the world that is to come.
201/// Afraid that my plans will fail.
202/// Afraid of a doom worse than the Deepness.
203/// ";
204///
205/// let modified = "\
206/// I am afraid, however, that all I have known - that my story - will be forgotten.
207/// I am afraid for the world that is to come.
208/// Afraid that Alendi will fail.
209/// Afraid of a doom brought by the Deepness.
210/// ";
211///
212/// let expected = "\
213/// --- original
214/// +++ modified
215/// @@ -1,4 +1,4 @@
216///  I am afraid, however, that all I have known - that my story - will be forgotten.
217///  I am afraid for the world that is to come.
218/// -Afraid that my plans will fail.
219/// -Afraid of a doom worse than the Deepness.
220/// +Afraid that Alendi will fail.
221/// +Afraid of a doom brought by the Deepness.
222/// ";
223///
224/// let patch = create_patch(original, modified);
225/// assert_eq!(patch.to_string(), expected);
226/// ```
227pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> {
228    DiffOptions::default().create_patch(original, modified)
229}
230
231/// Create a patch between two potentially non-utf8 texts
232pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> {
233    DiffOptions::default().create_patch_bytes(original, modified)
234}
235
236fn to_hunks<'a, T: ?Sized>(
237    lines1: &[&'a T],
238    lines2: &[&'a T],
239    solution: &[DiffRange<[u64]>],
240    context_len: usize,
241) -> Vec<Hunk<'a, T>> {
242    let edit_script = build_edit_script(solution);
243
244    let mut hunks = Vec::new();
245
246    let mut idx = 0;
247    while let Some(mut script) = edit_script.get(idx) {
248        let start1 = script.old.start.saturating_sub(context_len);
249        let start2 = script.new.start.saturating_sub(context_len);
250
251        let (mut end1, mut end2) = calc_end(
252            context_len,
253            lines1.len(),
254            lines2.len(),
255            script.old.end,
256            script.new.end,
257        );
258
259        let mut lines = Vec::new();
260
261        // Pre-context
262        for line in lines2.get(start2..script.new.start).into_iter().flatten() {
263            lines.push(Line::Context(*line));
264        }
265
266        loop {
267            // Delete lines from text1
268            for line in lines1.get(script.old.clone()).into_iter().flatten() {
269                lines.push(Line::Delete(*line));
270            }
271
272            // Insert lines from text2
273            for line in lines2.get(script.new.clone()).into_iter().flatten() {
274                lines.push(Line::Insert(*line));
275            }
276
277            if let Some(s) = edit_script.get(idx + 1) {
278                // Check to see if we can merge the hunks
279                let start1_next =
280                    cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len);
281                if start1_next < end1 {
282                    // Context lines between hunks
283                    for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start)
284                    {
285                        if let Some(line) = lines2.get(i2) {
286                            lines.push(Line::Context(*line));
287                        }
288                    }
289
290                    // Calc the new end
291                    let (e1, e2) = calc_end(
292                        context_len,
293                        lines1.len(),
294                        lines2.len(),
295                        s.old.end,
296                        s.new.end,
297                    );
298
299                    end1 = e1;
300                    end2 = e2;
301                    script = s;
302                    idx += 1;
303                    continue;
304                }
305            }
306
307            break;
308        }
309
310        // Post-context
311        for line in lines2.get(script.new.end..end2).into_iter().flatten() {
312            lines.push(Line::Context(*line));
313        }
314
315        let len1 = end1 - start1;
316        let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1);
317
318        let len2 = end2 - start2;
319        let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2);
320
321        hunks.push(Hunk::new(old_range, new_range, None, lines));
322        idx += 1;
323    }
324
325    hunks
326}
327
328fn calc_end(
329    context_len: usize,
330    text1_len: usize,
331    text2_len: usize,
332    script1_end: usize,
333    script2_end: usize,
334) -> (usize, usize) {
335    let post_context_len = cmp::min(
336        context_len,
337        cmp::min(
338            text1_len.saturating_sub(script1_end),
339            text2_len.saturating_sub(script2_end),
340        ),
341    );
342
343    let end1 = script1_end + post_context_len;
344    let end2 = script2_end + post_context_len;
345
346    (end1, end2)
347}
348
349#[derive(Debug)]
350struct EditRange {
351    old: ops::Range<usize>,
352    new: ops::Range<usize>,
353}
354
355impl EditRange {
356    fn new(old: ops::Range<usize>, new: ops::Range<usize>) -> Self {
357        Self { old, new }
358    }
359}
360
361fn build_edit_script<T>(solution: &[DiffRange<[T]>]) -> Vec<EditRange> {
362    let mut idx_a = 0;
363    let mut idx_b = 0;
364
365    let mut edit_script: Vec<EditRange> = Vec::new();
366    let mut script = None;
367
368    for diff in solution {
369        match diff {
370            DiffRange::Equal(range1, range2) => {
371                idx_a += range1.len();
372                idx_b += range2.len();
373                if let Some(script) = script.take() {
374                    edit_script.push(script);
375                }
376            }
377            DiffRange::Delete(range) => {
378                match &mut script {
379                    Some(s) => s.old.end += range.len(),
380                    None => {
381                        script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b));
382                    }
383                }
384                idx_a += range.len();
385            }
386            DiffRange::Insert(range) => {
387                match &mut script {
388                    Some(s) => s.new.end += range.len(),
389                    None => {
390                        script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len()));
391                    }
392                }
393                idx_b += range.len();
394            }
395        }
396    }
397
398    if let Some(script) = script.take() {
399        edit_script.push(script);
400    }
401
402    edit_script
403}
404
405#[cfg(test)]
406mod test {
407    use super::DiffOptions;
408
409    #[test]
410    fn set_original_and_modified_filenames() {
411        let original = "\
412I am afraid, however, that all I have known - that my story - will be forgotten.
413I am afraid for the world that is to come.
414Afraid that my plans will fail.
415Afraid of a doom worse than the Deepness.
416";
417        let modified = "\
418I am afraid, however, that all I have known - that my story - will be forgotten.
419I am afraid for the world that is to come.
420Afraid that Alendi will fail.
421Afraid of a doom brought by the Deepness.
422";
423        let expected = "\
424--- the old version
425+++ the better version
426@@ -1,4 +1,4 @@
427 I am afraid, however, that all I have known - that my story - will be forgotten.
428 I am afraid for the world that is to come.
429-Afraid that my plans will fail.
430-Afraid of a doom worse than the Deepness.
431+Afraid that Alendi will fail.
432+Afraid of a doom brought by the Deepness.
433";
434
435        let patch = DiffOptions::new()
436            .set_original_filename("the old version")
437            .set_modified_filename("the better version")
438            .create_patch(original, modified);
439
440        assert_eq!(patch.to_string(), expected);
441    }
442}