Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gix-blame/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ gix-fs = { path = "../gix-fs" }
gix-index = { path = "../gix-index" }
gix-odb = { path = "../gix-odb" }
gix-testtools = { path = "../tests/tools" }
pretty_assertions = "1.4.0"
47 changes: 11 additions & 36 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ use crate::{BlameEntry, Error, Options, Outcome, Statistics};
///
/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that
/// represent the worktree state.
/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the
///
/// We begin with one or more *Unblamed Hunks* and a single suspect, usually the `HEAD` commit as the commit containing the
/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*.
/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`.
/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes.
Expand Down Expand Up @@ -197,15 +198,16 @@ pub fn file(
if let Some(range_in_suspect) = hunk.get_range(&suspect) {
let range_in_blamed_file = hunk.range_in_blamed_file.clone();

for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) {
let source_token = source_lines_as_tokens[source_line_number as usize];
let blame_token = blamed_lines_as_tokens[blamed_line_number as usize];

let source_line = BString::new(source_interner[source_token].into());
let blamed_line = BString::new(blamed_interner[blame_token].into());
let source_lines = range_in_suspect
.clone()
.map(|i| BString::new(source_interner[source_lines_as_tokens[i as usize]].into()))
.collect::<Vec<_>>();
let blamed_lines = range_in_blamed_file
.clone()
.map(|i| BString::new(blamed_interner[blamed_lines_as_tokens[i as usize]].into()))
.collect::<Vec<_>>();

assert_eq!(source_line, blamed_line);
}
assert_eq!(source_lines, blamed_lines);
}
}
}
Expand Down Expand Up @@ -302,33 +304,6 @@ pub fn file(
unblamed_hunk.remove_blame(suspect);
true
});

// This block asserts that line ranges for each suspect never overlap. If they did overlap
// this would mean that the same line in a *Source File* would map to more than one line in
// the *Blamed File* and this is not possible.
#[cfg(debug_assertions)]
{
let ranges = hunks_to_blame.iter().fold(
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
|mut acc, hunk| {
for (suspect, range) in hunk.suspects.clone() {
acc.entry(suspect).or_default().push(range);
}

acc
},
);

for (_, mut ranges) in ranges {
ranges.sort_by(|a, b| a.start.cmp(&b.start));

for window in ranges.windows(2) {
if let [a, b] = window {
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
}
}
}
}
}

debug_assert_eq!(
Expand Down
60 changes: 34 additions & 26 deletions gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ use crate::types::{BlameEntry, Change, Either, LineRange, Offset, UnblamedHunk};

pub(super) mod function;

/// Compare a section from the *Blamed File* (`hunk`) with a change from a diff and see if there
/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out`
/// and/or split the `hunk` into multiple.
/// Compare a section from a potential *Source File* (`hunk`) with a change from a diff and see if
/// there is an intersection with `change`. Based on that intersection, we may generate a
/// [`BlameEntry`] for `out` and/or split the `hunk` into multiple.
///
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
/// This is the core of the blame implementation as it matches regions in *Blamed File* to
/// corresponding regions in one or more than one *Source File*.
fn process_change(
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
offset: &mut Offset,
Expand Down Expand Up @@ -320,36 +321,43 @@ fn process_change(

/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
/// Once a match is found, it's pushed onto `out`.
///
/// `process_changes` assumes that ranges coming from the same *Source File* can and do
/// occasionally overlap. If it were a desirable property of the blame algorithm as a whole to
/// never have two different lines from a *Blamed File* mapped to the same line in a *Source File*,
/// this property would need to be enforced at a higher level than `process_changes` if which case
/// the nested loops could potentially be flattened into one.
fn process_changes(
hunks_to_blame: Vec<UnblamedHunk>,
changes: Vec<Change>,
suspect: ObjectId,
parent: ObjectId,
) -> Vec<UnblamedHunk> {
let mut hunks_iter = hunks_to_blame.into_iter();
let mut changes_iter = changes.into_iter();
let mut new_hunks_to_blame = Vec::new();

let mut hunk = hunks_iter.next();
let mut change = changes_iter.next();
for hunk in hunks_to_blame {
let mut hunk = Some(hunk);

let mut new_hunks_to_blame = Vec::new();
let mut offset_in_destination = Offset::Added(0);

loop {
(hunk, change) = process_change(
&mut new_hunks_to_blame,
&mut offset_in_destination,
suspect,
parent,
hunk,
change,
);

hunk = hunk.or_else(|| hunks_iter.next());
change = change.or_else(|| changes_iter.next());

if hunk.is_none() && change.is_none() {
break;
let mut offset_in_destination = Offset::Added(0);

let mut changes_iter = changes.iter();
let mut change: Option<Change> = changes_iter.next().cloned();

loop {
(hunk, change) = process_change(
&mut new_hunks_to_blame,
&mut offset_in_destination,
suspect,
parent,
hunk,
change,
);

change = change.or_else(|| changes_iter.next().cloned());

if hunk.is_none() {
break;
}
}
}
new_hunks_to_blame
Expand Down
36 changes: 36 additions & 0 deletions gix-blame/src/file/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,8 @@ mod process_change {
}

mod process_changes {
use pretty_assertions::assert_eq;

use crate::file::{
process_changes,
tests::{new_unblamed_hunk, one_sha, zero_sha},
Expand Down Expand Up @@ -1337,4 +1339,38 @@ mod process_changes {
]
);
}

#[test]
fn subsequent_hunks_overlapping_end_of_addition() {
let suspect = zero_sha();
let parent = one_sha();
let hunks_to_blame = vec![
new_unblamed_hunk(13..16, suspect, Offset::Added(0)),
new_unblamed_hunk(10..17, suspect, Offset::Added(0)),
];
let changes = vec![Change::AddedOrReplaced(10..14, 0)];
let new_hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent);

assert_eq!(
new_hunks_to_blame,
[
UnblamedHunk {
range_in_blamed_file: 13..14,
suspects: [(suspect, 13..14)].into()
},
UnblamedHunk {
range_in_blamed_file: 14..16,
suspects: [(parent, 10..12)].into(),
},
UnblamedHunk {
range_in_blamed_file: 10..14,
suspects: [(suspect, 10..14)].into(),
},
UnblamedHunk {
range_in_blamed_file: 14..17,
suspects: [(parent, 10..13)].into(),
},
]
);
}
}
6 changes: 3 additions & 3 deletions gix-blame/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
//!
//! ### Terminology
//!
//! * **Source File**
//! - The file as it exists in `HEAD`.
//! - the initial state with all lines that we need to associate with a *Source File*.
//! * **Blamed File**
//! - The file as it exists in `HEAD`.
//! - the initial state with all lines that we need to associate with a *Blamed File*.
//! * **Source File**
//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'.
//! * **Suspects**
//! - The versions of the files that can contain hunks that we could use in the final 'image'
Expand Down
5 changes: 4 additions & 1 deletion gix-blame/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,10 @@ pub(crate) enum Either<T, U> {
}

/// A single change between two blobs, or an unchanged region.
#[derive(Debug, PartialEq)]
///
/// Line numbers refer to the file that is referred to as `after` or `NewOrDestination`, depending
/// on the context.
#[derive(Clone, Debug, PartialEq)]
pub enum Change {
/// A range of tokens that wasn't changed.
Unchanged(Range<u32>),
Expand Down
Loading