From c49f12d8940a3b0d8b4ca1fcb3c40119cd8167dc Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 2 Apr 2023 20:45:38 +0200 Subject: [PATCH 01/30] allow access to index timestamp --- gix-index/src/access/mod.rs | 6 ++++++ gix-index/src/entry/mod.rs | 24 ++++++++++++++++++++++++ gix-index/src/lib.rs | 1 - 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/gix-index/src/access/mod.rs b/gix-index/src/access/mod.rs index e8f2dc9f861..5d161d49dcd 100644 --- a/gix-index/src/access/mod.rs +++ b/gix-index/src/access/mod.rs @@ -1,6 +1,7 @@ use std::cmp::Ordering; use bstr::{BStr, ByteSlice, ByteVec}; +use filetime::FileTime; use crate::{entry, extension, Entry, PathStorage, State, Version}; @@ -15,6 +16,11 @@ impl State { self.version } + /// Returns time at which the state was created, indicating its freshness compared to other files on disk. + pub fn timestamp(&self) -> FileTime { + self.timestamp + } + /// Return the kind of hashes used in this instance. pub fn object_hash(&self) -> gix_hash::Kind { self.object_hash diff --git a/gix-index/src/entry/mod.rs b/gix-index/src/entry/mod.rs index 165df801e6d..b7a5ab3f27b 100644 --- a/gix-index/src/entry/mod.rs +++ b/gix-index/src/entry/mod.rs @@ -2,6 +2,9 @@ pub type Stage = u32; mod mode; +use std::cmp::Ordering; + +use filetime::FileTime; pub use mode::Mode; mod flags; @@ -20,6 +23,27 @@ pub struct Time { pub nsecs: u32, } +impl From for Time { + fn from(value: FileTime) -> Self { + Time { + secs: value.unix_seconds().try_into().expect("can't represent non-unix times"), + nsecs: value.nanoseconds(), + } + } +} + +impl PartialEq for Time { + fn eq(&self, other: &FileTime) -> bool { + *self == Time::from(*other) + } +} + +impl PartialOrd for Time { + fn partial_cmp(&self, other: &FileTime) -> Option { + self.partial_cmp(&Time::from(*other)) + } +} + /// An entry's filesystem stat information. #[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] diff --git a/gix-index/src/lib.rs b/gix-index/src/lib.rs index d8451c54566..fd763a15154 100644 --- a/gix-index/src/lib.rs +++ b/gix-index/src/lib.rs @@ -91,7 +91,6 @@ pub struct State { /// /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened. - #[allow(dead_code)] timestamp: FileTime, version: Version, entries: Vec, From 38e228c61be79e2c03457a69242b2abd523f755b Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 2 Apr 2023 20:46:28 +0200 Subject: [PATCH 02/30] add function to read blob from worktree --- gix-worktree/src/lib.rs | 2 + gix-worktree/src/read.rs | 79 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 gix-worktree/src/read.rs diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 9a67e0289ee..ef46df726da 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -13,3 +13,5 @@ pub mod fs; pub mod index; pub(crate) mod os; +/// +pub mod read; diff --git a/gix-worktree/src/read.rs b/gix-worktree/src/read.rs new file mode 100644 index 00000000000..d61fb7095c1 --- /dev/null +++ b/gix-worktree/src/read.rs @@ -0,0 +1,79 @@ +use crate::fs; +use gix_object::Blob; +use gix_path as path; +use std::borrow::Cow; +use std::fs::{read_link, File}; +use std::io; +use std::io::Read; +use std::path::Path; + +/// Error returned by [`Blob::read`] and related functions +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// + #[error("Could not convert symlink path to UTF8")] + IllformedUtf8, + /// + #[error("IO error while reading blob")] + Io(#[from] io::Error), +} + +// TODO: what to do about precompose unicode and ignore case_here? + +/// Create a blob from a file/symlink +pub fn read_blob(path: &Path, capabilities: &fs::Capabilities) -> Result { + let mut buf = Vec::new(); + let res = read_blob_to_buf(path, &mut buf, capabilities)?; + match res { + Cow::Borrowed(_) => Ok(Blob { data: buf }), + Cow::Owned(data) => Ok(Blob { data }), + } +} + +/// Create a blob from a file/symlink +pub fn read_blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Capabilities) -> Result { + let mut buf = Vec::new(); + let res = read_blob_to_buf_with_meta(path, is_symlink, &mut buf, capabilities)?; + match res { + Cow::Borrowed(_) => Ok(Blob { data: buf }), + Cow::Owned(data) => Ok(Blob { data }), + } +} + +// TODO: there is no reason this should be a Cow +// std isn't great about allowing users to avoid allocations but we could +// simply write our own wrapper around libc::readlink which reuses the +// buffer. This would require unsafe code tough (obviously) + +/// Create blob data from a file/symlink +pub fn read_blob_to_buf<'a>( + path: &Path, + buf: &'a mut Vec, + capabilities: &fs::Capabilities, +) -> Result, Error> { + read_blob_to_buf_with_meta(path, path.symlink_metadata()?.is_symlink(), buf, capabilities) +} + +/// Create a blob from a file/symlink +pub fn read_blob_to_buf_with_meta<'a>( + path: &Path, + is_symlink: bool, + buf: &'a mut Vec, + capabilities: &fs::Capabilities, +) -> Result, Error> { + // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just + // normal files with their content equal to the linked path (so can be read normally) + // + if is_symlink && capabilities.symlink { + let symlink_path = path::try_into_bstr(read_link(path)?).map_err(|_| Error::IllformedUtf8)?; + match path::to_unix_separators_on_windows(symlink_path) { + Cow::Borrowed(path) => Ok(Cow::Borrowed(path.as_ref())), + Cow::Owned(path) => Ok(Cow::Owned(path.into())), + } + } else { + buf.clear(); + File::open(path)?.read_to_end(buf)?; + // TODO apply filters + Ok(buf.as_slice().into()) + } +} From efcbf0d1cb1c9d77eaf04fbcf6e86dc101c886d2 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 2 Apr 2023 20:47:24 +0200 Subject: [PATCH 03/30] accept paths in scripted_fixture_writable --- tests/tools/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 46734759d13..2e0bf6a469e 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -288,7 +288,7 @@ pub fn scripted_fixture_read_only_standalone(script_name: impl AsRef) -> R /// the tempdir is returned. It will be removed automatically, courtesy of [`tempfile::TempDir`]. /// /// Note that `script_name` is only executed once, so the data can be copied from its read-only location. -pub fn scripted_fixture_writable(script_name: &str) -> Result { +pub fn scripted_fixture_writable(script_name: impl AsRef) -> Result { scripted_fixture_writable_with_args(script_name, None::, Creation::CopyFromReadOnly) } @@ -300,7 +300,7 @@ pub fn scripted_fixture_writable_standalone(script_name: &str) -> Result, args: impl IntoIterator>, mode: Creation, ) -> Result { @@ -317,7 +317,7 @@ pub fn scripted_fixture_writable_with_args_standalone( } fn scripted_fixture_writable_with_args_inner( - script_name: &str, + script_name: impl AsRef, args: impl IntoIterator>, mode: Creation, root: DirectoryRoot, From 0a8e50f64a8a730fbbd14b465e08d96ebfcf697d Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Sun, 2 Apr 2023 20:49:05 +0200 Subject: [PATCH 04/30] feat: diff between worktree and index --- gix-worktree/src/diff.rs | 292 ++++++++++++++++++++++ gix-worktree/src/lib.rs | 7 + gix-worktree/src/untracked.rs | 1 + gix-worktree/tests/worktree/index/diff.rs | 75 ++++++ gix-worktree/tests/worktree/index/mod.rs | 1 + 5 files changed, 376 insertions(+) create mode 100644 gix-worktree/src/diff.rs create mode 100644 gix-worktree/src/untracked.rs create mode 100644 gix-worktree/tests/worktree/index/diff.rs diff --git a/gix-worktree/src/diff.rs b/gix-worktree/src/diff.rs new file mode 100644 index 00000000000..a7b734eb273 --- /dev/null +++ b/gix-worktree/src/diff.rs @@ -0,0 +1,292 @@ +use std::io::{self, ErrorKind}; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTimeError}; + +use bstr::BString; +use gix_features::hash; +use gix_hash::ObjectId; +use gix_index as index; +use gix_object::encode::loose_header; +use gix_path as path; + +use crate::fs; +use crate::read::{self, read_blob_to_buf_with_meta}; + +/// How the mode of an index entry has changed +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum ModeChange { + /// Shown as `typechange` in git status + /// For example if a normal file was replaced with a symlink. + /// Note: Except for submodules only files/symlinks are present in the + /// the index so anything turning into a directory is counted as a removal + TypeChange, + /// The executable bit of a file changed + ExecutableChange, +} + +/// How a worktree file changed compared to an index entry +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct FileModification { + /// How the mode has changed + pub mode_change: Option, + /// mtime/ctime changed. If this is false then we can assume + /// that the file is uncahged (with the exception of racy timestamps). + /// If this is true however the file might still be unchaged. We need + /// to read the file from disk and compare it to the object in + /// index. + pub stat_changed: bool, + /// The data of this entry has changed. This can be quickly + /// determined if the size of the stat data is mismatched. + /// Otherwise a data change must be detected by reading the file + /// from disk and comparing it to the file stored in the index + /// (only needs to be done if `self.stat_changed` is true) + pub data_changed: bool, +} + +impl FileModification { + /// Computes the status of an entry by comparing its stat to `symlink_metadata()` + pub fn from_stat( + entry: &index::Entry, + fs_stat: &std::fs::Metadata, + capabilites: &fs::Capabilities, + ) -> Result { + #[cfg(unix)] + use std::os::unix::fs::MetadataExt; + + let mode_change = match entry.mode { + index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange), + #[cfg(unix)] + index::entry::Mode::FILE if capabilites.executable_bit && fs_stat.mode() & 0o111 != 0 => { + Some(ModeChange::ExecutableChange) + } + #[cfg(unix)] + index::entry::Mode::FILE_EXECUTABLE if capabilites.executable_bit && fs_stat.mode() & 0o111 == 0 => { + Some(ModeChange::ExecutableChange) + } + index::entry::Mode::SYMLINK if capabilites.symlink && !fs_stat.is_symlink() => Some(ModeChange::TypeChange), + index::entry::Mode::SYMLINK if !capabilites.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), + index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange), + _ => None, // TODO: log/errror invalid file type + }; + + let data_changed = entry.stat.size as u64 != fs_stat.len(); + + let ctime = fs_stat + .created() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + let mtime = fs_stat + .modified() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + + let stat = &entry.stat; + let stat_changed = stat.mtime.secs + != mtime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this") + || stat.mtime.nsecs != mtime.subsec_nanos() + || stat.ctime.secs + != ctime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this") + || stat.ctime.nsecs != ctime.subsec_nanos(); + + Ok(Self { + mode_change, + stat_changed, + data_changed, + }) + } + + /// Marks this entries stats as changed if there is a potential fs race condition + pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) { + self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp() + } + + /// returns true if this entry has any changes + /// usually `detect_racy_stat` should be called first to avoid race condition + pub fn changed(&self) -> bool { + self.mode_change.is_some() || self.stat_changed || self.data_changed + } + + /// Reads the worktree file from the disk and compares it to + /// the index entries oid to check if the actual data of the file is changed + /// and sets [`Entry::data_changed`] accordingly + pub fn compare_data( + &mut self, + worktree_path: &Path, + index_entry: &index::Entry, + buf: &mut Vec, + capabilities: &fs::Capabilities, + ) -> Result<(), read::Error> { + if self.mode_change.is_some() || !self.stat_changed || self.data_changed { + return Ok(()); + } + let data = read_blob_to_buf_with_meta( + worktree_path, + index_entry.mode.contains(index::entry::Mode::SYMLINK), + buf, + capabilities, + )?; + let header = loose_header(gix_object::Kind::Blob, data.len()); + let hash_changed = match index_entry.id { + ObjectId::Sha1(entry_hash) => { + let mut file_hash = hash::Sha1::default(); + file_hash.update(&header); + file_hash.update(&data); + let file_hash = file_hash.digest(); + entry_hash != file_hash + } + }; + self.data_changed = hash_changed; + Ok(()) + } +} + +#[allow(missing_docs)] +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Could not convert path to UTF8 {path}")] + IllformedUtf8 { path: BString }, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] io::Error), +} + +#[derive(Clone, Debug)] +/// A change between the index and the worktree computed by [`compate_to_index`] +pub struct Change<'a> { + /// The index entry that changed + pub index_entry: &'a index::Entry, + /// The on-disk worktree path corresponding to this entry + pub worktree_path: PathBuf, + /// How this index entry changed + pub kind: ChangeKind, + /// file metadata that can be reused (optimization) + pub fstat: Option, +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +/// +pub enum ChangeKind { + /// An index entry has no corresponding file in the worktree + Removed, + /// Ar new files that has been marked with git add but has not yet been + /// checked in yet. No diff is computed for these files because whatever is + /// on disk at commit time will be used + Added, + /// Called for files that may have changed in some form as indicated by `change`. + /// Note that this doesn't necessarily mean that the *content* of the file changed + /// see [`FileStatus`] for details + Modified { + /// How the file was modified exactly + modification: FileModification, + /// Whether this (changed) file also has an unresolved merge conflict + conflict: bool, + }, + /// There are unresolved merge conflicts for this file + /// but it has not changed on disk + Conflict, +} + +/// Computes the changes between the index and the worktree +pub fn compare_to_index<'a: 'b, 'b>( + index: &'a index::State, + // TODO: use worktree cache instead + worktree: &'b Path, + capabilities: &'b fs::Capabilities, +) -> impl Iterator, Error>> + 'b { + // TODO: parallel with rayon + index.entries().iter().filter_map(|index_entry| { + let conflict = match index_entry.stage() { + 0 => false, + 1 => true, + _ => return None, + }; + let git_path = index_entry.path(index); + if index_entry.flags.intersects( + index::entry::Flags::UPTODATE + | index::entry::Flags::SKIP_WORKTREE + | index::entry::Flags::ASSUME_VALID + | index::entry::Flags::FSMONITOR_VALID, + ) { + return None; + } + + let path = if let Ok(path) = path::try_from_bstr(git_path) { + path + } else { + return Some(Err(Error::IllformedUtf8 { + path: git_path.to_owned(), + })); + }; + + let worktree_path = worktree.join(path); + let metadata = match worktree_path.symlink_metadata() { + // TODO: check if any parent directory is a symlink + // we need to use fs::Cache for that + Ok(metadata) if metadata.is_dir() => { + // index entries are normally only for files/symlinks + // if a file turned into a directory it was removed + // the only exception here are submodules which are + // part of the index despite being directories + // + // TODO: submodules: + // if entry.mode.contains(Mode::COMMIT) && + // resolve_gitlink_ref(ce->name, "HEAD", &sub)) + return Some(Ok(Change { + kind: ChangeKind::Removed, + index_entry, + worktree_path, + fstat: Some(metadata), + })); + } + Ok(metdata) => metdata, + Err(err) if err.kind() == ErrorKind::NotFound => { + return Some(Ok(Change { + kind: ChangeKind::Removed, + index_entry, + worktree_path, + fstat: None, + })) + } + Err(err) => { + // TODO: strict mode? + return Some(Err(err.into())); + } + }; + if index_entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { + return Some(Ok(Change { + kind: ChangeKind::Added, + index_entry, + worktree_path, + fstat: None, + })); + } + let mut change = match FileModification::from_stat(index_entry, &metadata, capabilities) { + Ok(change) => change, + Err(err) => return Some(Err(err.into())), + }; + change.detect_racy_stat(index, index_entry); + + let kind = if change.changed() { + ChangeKind::Modified { + modification: change, + conflict, + } + } else if conflict { + ChangeKind::Conflict + } else { + return None; + }; + + Some(Ok(Change { + kind, + index_entry, + worktree_path, + fstat: Some(metadata), + })) + }) +} diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index ef46df726da..8bae7a724b8 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -13,5 +13,12 @@ pub mod fs; pub mod index; pub(crate) mod os; + +/// +pub mod diff; + +/// +pub mod untracked; + /// pub mod read; diff --git a/gix-worktree/src/untracked.rs b/gix-worktree/src/untracked.rs new file mode 100644 index 00000000000..6e77d7fa3ba --- /dev/null +++ b/gix-worktree/src/untracked.rs @@ -0,0 +1 @@ +// TODO: untracked file detection, needs fs::Cache diff --git a/gix-worktree/tests/worktree/index/diff.rs b/gix-worktree/tests/worktree/index/diff.rs new file mode 100644 index 00000000000..254365bf8a8 --- /dev/null +++ b/gix-worktree/tests/worktree/index/diff.rs @@ -0,0 +1,75 @@ +use std::fs::{self}; +use std::path::Path; + +use bstr::BString; +use gix_worktree as worktree; +use worktree::diff::{ChangeKind, FileModification}; + +fn compute_diff(name: &str, make_worktree_dirty: impl FnOnce(&Path)) -> Vec<(ChangeKind, BString)> { + let work_tree = + gix_testtools::scripted_fixture_writable(Path::new(name).with_extension("sh")).expect("script works"); + let git_dir = work_tree.path().join(".git"); + make_worktree_dirty(work_tree.path()); + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); + let capapilites = worktree::fs::Capabilities::probe(git_dir); + let mut buf = Vec::with_capacity(8 * 1024); + worktree::diff::compare_to_index(&index, work_tree.path(), &capapilites) + .filter_map(|change| { + let mut change = change.unwrap(); + if let ChangeKind::Modified { + ref mut modification, .. + } = &mut change.kind + { + modification + .compare_data(&change.worktree_path, change.index_entry, &mut buf, &capapilites) + .unwrap(); + if modification.mode_change.is_none() && !modification.data_changed { + return None; + } + } + Some((change.kind, change.index_entry.path(&index).to_owned())) + }) + .collect() +} + +#[test] +fn removed() { + let diff = compute_diff("make_mixed_without_submodules", |path| { + fs::remove_file(path.join("executable")).unwrap(); + fs::remove_file(path.join("dir/content")).unwrap(); + fs::remove_file(path.join("dir/sub-dir/symlink")).unwrap(); + }); + + assert_eq!( + diff, + vec![ + (ChangeKind::Removed, BString::new(b"dir/content".to_vec())), + (ChangeKind::Removed, BString::new(b"dir/sub-dir/symlink".to_vec())), + (ChangeKind::Removed, BString::new(b"executable".to_vec())), + ] + ) +} + +#[test] +fn changed() { + let diff = compute_diff("make_mixed_without_submodules", |path| { + fs::write(path.join("dir/content"), "hello_world").unwrap(); + // write same content to this file to simulate a touch command + fs::write(path.join("executable"), "content").unwrap(); + }); + + assert_eq!( + diff, + vec![( + ChangeKind::Modified { + modification: FileModification { + mode_change: None, + stat_changed: true, + data_changed: true + }, + conflict: false + }, + BString::new(b"dir/content".to_vec()) + ),] + ) +} diff --git a/gix-worktree/tests/worktree/index/mod.rs b/gix-worktree/tests/worktree/index/mod.rs index 24370dce48a..1f31dafe6a8 100644 --- a/gix-worktree/tests/worktree/index/mod.rs +++ b/gix-worktree/tests/worktree/index/mod.rs @@ -1 +1,2 @@ mod checkout; +mod diff; From 31ddda2ae47f68c9a487d12c1f7ffa1d14de13d1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 3 Apr 2023 13:17:33 +0200 Subject: [PATCH 05/30] refactor - improve docs - fix doc-links - some name changes to be more inline with `gix-*` conventions` --- gix-worktree/src/diff.rs | 156 ++++++++++++---------- gix-worktree/src/read.rs | 45 ++++--- gix-worktree/tests/worktree/index/diff.rs | 4 +- 3 files changed, 110 insertions(+), 95 deletions(-) diff --git a/gix-worktree/src/diff.rs b/gix-worktree/src/diff.rs index a7b734eb273..ca0b32d3fcb 100644 --- a/gix-worktree/src/diff.rs +++ b/gix-worktree/src/diff.rs @@ -10,45 +10,51 @@ use gix_object::encode::loose_header; use gix_path as path; use crate::fs; -use crate::read::{self, read_blob_to_buf_with_meta}; +use crate::read; -/// How the mode of an index entry has changed +/// How the mode of an index entry has changed. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum ModeChange { - /// Shown as `typechange` in git status - /// For example if a normal file was replaced with a symlink. - /// Note: Except for submodules only files/symlinks are present in the - /// the index so anything turning into a directory is counted as a removal + /// Shown as `typechange` in `git status`. + /// + /// For example, this happens if a normal file was replaced with a symlink. + /// **Note**: A directory turning into a file or vice-versa is not counted as `TypeChange`, + /// but as addition and removal respectively. TypeChange, - /// The executable bit of a file changed + /// The executable bit of a file changed. ExecutableChange, } -/// How a worktree file changed compared to an index entry +/// How a worktree file changed compared to an index entry. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct FileModification { - /// How the mode has changed + /// If not `None`, the file mode was changed. pub mode_change: Option, - /// mtime/ctime changed. If this is false then we can assume - /// that the file is uncahged (with the exception of racy timestamps). - /// If this is true however the file might still be unchaged. We need - /// to read the file from disk and compare it to the object in - /// index. + /// The `mtime` or `ctime` changed. + /// + /// If this is `false` then we can assume the file is unchanged + /// assuming that timestamps where not racy (see [`detect_racy_stat()`][Self::detect_racy_stat()]). + /// If this is `true`, the file might still be unchanged, and to be perfectly sure we would need + /// to read the file from disk and compare it to the object in index. pub stat_changed: bool, - /// The data of this entry has changed. This can be quickly - /// determined if the size of the stat data is mismatched. + /// The data of this entry has changed. + /// + /// This can be quickly determined if the size of the stat data is mismatched. /// Otherwise a data change must be detected by reading the file /// from disk and comparing it to the file stored in the index - /// (only needs to be done if `self.stat_changed` is true) + /// This only needs to be done if `stat_changed` is `true`. pub data_changed: bool, } +/// Instantiation impl FileModification { - /// Computes the status of an entry by comparing its stat to `symlink_metadata()` + /// Computes the status of an `entry` by comparing it with its `fs_stat` while respecting filesystem `capabilities`. + /// + /// It does so exclusively by looking at the filesystem stats. pub fn from_stat( entry: &index::Entry, fs_stat: &std::fs::Metadata, - capabilites: &fs::Capabilities, + capabilities: &fs::Capabilities, ) -> Result { #[cfg(unix)] use std::os::unix::fs::MetadataExt; @@ -56,17 +62,19 @@ impl FileModification { let mode_change = match entry.mode { index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange), #[cfg(unix)] - index::entry::Mode::FILE if capabilites.executable_bit && fs_stat.mode() & 0o111 != 0 => { + index::entry::Mode::FILE if capabilities.executable_bit && fs_stat.mode() & 0o111 != 0 => { Some(ModeChange::ExecutableChange) } #[cfg(unix)] - index::entry::Mode::FILE_EXECUTABLE if capabilites.executable_bit && fs_stat.mode() & 0o111 == 0 => { + index::entry::Mode::FILE_EXECUTABLE if capabilities.executable_bit && fs_stat.mode() & 0o111 == 0 => { Some(ModeChange::ExecutableChange) } - index::entry::Mode::SYMLINK if capabilites.symlink && !fs_stat.is_symlink() => Some(ModeChange::TypeChange), - index::entry::Mode::SYMLINK if !capabilites.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), + index::entry::Mode::SYMLINK if capabilities.symlink && !fs_stat.is_symlink() => { + Some(ModeChange::TypeChange) + } + index::entry::Mode::SYMLINK if !capabilities.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange), - _ => None, // TODO: log/errror invalid file type + _ => None, // TODO: log/error invalid file type }; let data_changed = entry.stat.size as u64 != fs_stat.len(); @@ -98,39 +106,45 @@ impl FileModification { data_changed, }) } +} - /// Marks this entries stats as changed if there is a potential fs race condition +/// Modification +impl FileModification { + /// Marks this entry's stats as changed if there is a potential filesystem race condition. pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) { self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp() } - /// returns true if this entry has any changes - /// usually `detect_racy_stat` should be called first to avoid race condition - pub fn changed(&self) -> bool { + /// Returns true if this instance has any changes. + /// + /// The [`detect_racy_stat()`][Self::detect_racy_stat()] method should be called first to account for race conditions. + pub fn is_changed(&self) -> bool { self.mode_change.is_some() || self.stat_changed || self.data_changed } - /// Reads the worktree file from the disk and compares it to - /// the index entries oid to check if the actual data of the file is changed - /// and sets [`Entry::data_changed`] accordingly + /// Read the worktree file denoted by `entry` from the disk rooted at `worktree_path` into `buf` and compare + /// it to the index entry's hash to check if the actual data of the file is changed to set [`Self::data_changed`] accordingly, + /// while respecting the filesystem's `capabilities`. + /// + /// Does no computation if we are already sure that the file has or hasn't changed. pub fn compare_data( &mut self, worktree_path: &Path, - index_entry: &index::Entry, + entry: &index::Entry, buf: &mut Vec, capabilities: &fs::Capabilities, ) -> Result<(), read::Error> { if self.mode_change.is_some() || !self.stat_changed || self.data_changed { return Ok(()); } - let data = read_blob_to_buf_with_meta( + let data = read::data_with_buf_and_meta( worktree_path, - index_entry.mode.contains(index::entry::Mode::SYMLINK), buf, + entry.mode.contains(index::entry::Mode::SYMLINK), capabilities, )?; let header = loose_header(gix_object::Kind::Blob, data.len()); - let hash_changed = match index_entry.id { + let hash_changed = match entry.id { ObjectId::Sha1(entry_hash) => { let mut file_hash = hash::Sha1::default(); file_hash.update(&header); @@ -144,8 +158,9 @@ impl FileModification { } } -#[allow(missing_docs)] +/// The error returned by [`compare_to_index()`]. #[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] pub enum Error { #[error("Could not convert path to UTF8 {path}")] IllformedUtf8 { path: BString }, @@ -155,58 +170,52 @@ pub enum Error { Io(#[from] io::Error), } +/// A change between the index and the worktree computed by [`compare_to_index`]. #[derive(Clone, Debug)] -/// A change between the index and the worktree computed by [`compate_to_index`] pub struct Change<'a> { - /// The index entry that changed - pub index_entry: &'a index::Entry, - /// The on-disk worktree path corresponding to this entry + /// The index entry that changed. + pub entry: &'a index::Entry, + /// The on-disk worktree path corresponding to this entry. pub worktree_path: PathBuf, - /// How this index entry changed + /// How this index entry changed. pub kind: ChangeKind, - /// file metadata that can be reused (optimization) + /// File metadata observed from disk that can be reused (optimization). pub fstat: Option, } +/// The nature of a the difference between the index and the worktree. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -/// pub enum ChangeKind { - /// An index entry has no corresponding file in the worktree + /// An index entry has no corresponding file in the worktree. Removed, - /// Ar new files that has been marked with git add but has not yet been + /// A new file that has been marked with git add but has not yet been /// checked in yet. No diff is computed for these files because whatever is - /// on disk at commit time will be used + /// on disk at commit time will be used. Added, /// Called for files that may have changed in some form as indicated by `change`. - /// Note that this doesn't necessarily mean that the *content* of the file changed - /// see [`FileStatus`] for details + /// + /// Note that this doesn't necessarily mean that the *content* of the file changed. Modified { /// How the file was modified exactly modification: FileModification, - /// Whether this (changed) file also has an unresolved merge conflict + /// Whether this changed file also has an unresolved merge conflict. conflict: bool, }, - /// There are unresolved merge conflicts for this file - /// but it has not changed on disk + /// There are unresolved merge conflicts for this file but it has not changed on disk. Conflict, } -/// Computes the changes between the index and the worktree +/// Computes the changes needed to turn the `index` into the `worktree` (as identified by its root), +/// while respecting the filesystem's `capabilities`. pub fn compare_to_index<'a: 'b, 'b>( index: &'a index::State, // TODO: use worktree cache instead worktree: &'b Path, capabilities: &'b fs::Capabilities, ) -> impl Iterator, Error>> + 'b { - // TODO: parallel with rayon - index.entries().iter().filter_map(|index_entry| { - let conflict = match index_entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; - let git_path = index_entry.path(index); - if index_entry.flags.intersects( + index.entries().iter().filter_map(|entry| { + let git_path = entry.path(index); + if entry.flags.intersects( index::entry::Flags::UPTODATE | index::entry::Flags::SKIP_WORKTREE | index::entry::Flags::ASSUME_VALID @@ -226,7 +235,7 @@ pub fn compare_to_index<'a: 'b, 'b>( let worktree_path = worktree.join(path); let metadata = match worktree_path.symlink_metadata() { // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that + // we need to use fs::Cache for that Ok(metadata) if metadata.is_dir() => { // index entries are normally only for files/symlinks // if a file turned into a directory it was removed @@ -238,16 +247,16 @@ pub fn compare_to_index<'a: 'b, 'b>( // resolve_gitlink_ref(ce->name, "HEAD", &sub)) return Some(Ok(Change { kind: ChangeKind::Removed, - index_entry, + entry, worktree_path, fstat: Some(metadata), })); } - Ok(metdata) => metdata, + Ok(metadata) => metadata, Err(err) if err.kind() == ErrorKind::NotFound => { return Some(Ok(Change { kind: ChangeKind::Removed, - index_entry, + entry, worktree_path, fstat: None, })) @@ -257,21 +266,26 @@ pub fn compare_to_index<'a: 'b, 'b>( return Some(Err(err.into())); } }; - if index_entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { + if entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { return Some(Ok(Change { kind: ChangeKind::Added, - index_entry, + entry, worktree_path, fstat: None, })); } - let mut change = match FileModification::from_stat(index_entry, &metadata, capabilities) { + let mut change = match FileModification::from_stat(entry, &metadata, capabilities) { Ok(change) => change, Err(err) => return Some(Err(err.into())), }; - change.detect_racy_stat(index, index_entry); + change.detect_racy_stat(index, entry); - let kind = if change.changed() { + let conflict = match entry.stage() { + 0 => false, + 1 => true, + _ => return None, + }; + let kind = if change.is_changed() { ChangeKind::Modified { modification: change, conflict, @@ -284,7 +298,7 @@ pub fn compare_to_index<'a: 'b, 'b>( Some(Ok(Change { kind, - index_entry, + entry, worktree_path, fstat: Some(metadata), })) diff --git a/gix-worktree/src/read.rs b/gix-worktree/src/read.rs index d61fb7095c1..f1ef7b32871 100644 --- a/gix-worktree/src/read.rs +++ b/gix-worktree/src/read.rs @@ -7,33 +7,35 @@ use std::io; use std::io::Read; use std::path::Path; -/// Error returned by [`Blob::read`] and related functions +// TODO: tests +// TODO: module level docs to explain why this would be needed (e.g. symlinks + filters) + +/// Error returned by [`blob()`] and related functions. #[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] pub enum Error { - /// #[error("Could not convert symlink path to UTF8")] IllformedUtf8, - /// #[error("IO error while reading blob")] Io(#[from] io::Error), } // TODO: what to do about precompose unicode and ignore case_here? -/// Create a blob from a file/symlink -pub fn read_blob(path: &Path, capabilities: &fs::Capabilities) -> Result { +/// Create a blob from a file or symlink. +pub fn blob(path: &Path, capabilities: &fs::Capabilities) -> Result { let mut buf = Vec::new(); - let res = read_blob_to_buf(path, &mut buf, capabilities)?; + let res = data_with_buf(path, &mut buf, capabilities)?; match res { Cow::Borrowed(_) => Ok(Blob { data: buf }), Cow::Owned(data) => Ok(Blob { data }), } } -/// Create a blob from a file/symlink -pub fn read_blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Capabilities) -> Result { +/// Create a blob from a file or symlink. +pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Capabilities) -> Result { let mut buf = Vec::new(); - let res = read_blob_to_buf_with_meta(path, is_symlink, &mut buf, capabilities)?; + let res = data_with_buf_and_meta(path, &mut buf, is_symlink, capabilities)?; match res { Cow::Borrowed(_) => Ok(Blob { data: buf }), Cow::Owned(data) => Ok(Blob { data }), @@ -41,24 +43,24 @@ pub fn read_blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Cap } // TODO: there is no reason this should be a Cow -// std isn't great about allowing users to avoid allocations but we could -// simply write our own wrapper around libc::readlink which reuses the -// buffer. This would require unsafe code tough (obviously) +// std isn't great about allowing users to avoid allocations but we could +// simply write our own wrapper around libc::readlink which reuses the +// buffer. This would require unsafe code tough (obviously) -/// Create blob data from a file/symlink -pub fn read_blob_to_buf<'a>( +/// Create blob data from a file or symlink. +pub fn data_with_buf<'a>( path: &Path, buf: &'a mut Vec, capabilities: &fs::Capabilities, ) -> Result, Error> { - read_blob_to_buf_with_meta(path, path.symlink_metadata()?.is_symlink(), buf, capabilities) + data_with_buf_and_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) } -/// Create a blob from a file/symlink -pub fn read_blob_to_buf_with_meta<'a>( +/// Create a blob from a file or symlink. +pub fn data_with_buf_and_meta<'a>( path: &Path, - is_symlink: bool, buf: &'a mut Vec, + is_symlink: bool, capabilities: &fs::Capabilities, ) -> Result, Error> { // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just @@ -66,10 +68,9 @@ pub fn read_blob_to_buf_with_meta<'a>( // if is_symlink && capabilities.symlink { let symlink_path = path::try_into_bstr(read_link(path)?).map_err(|_| Error::IllformedUtf8)?; - match path::to_unix_separators_on_windows(symlink_path) { - Cow::Borrowed(path) => Ok(Cow::Borrowed(path.as_ref())), - Cow::Owned(path) => Ok(Cow::Owned(path.into())), - } + Ok(Cow::Owned( + path::to_unix_separators_on_windows(symlink_path).into_owned().into(), + )) } else { buf.clear(); File::open(path)?.read_to_end(buf)?; diff --git a/gix-worktree/tests/worktree/index/diff.rs b/gix-worktree/tests/worktree/index/diff.rs index 254365bf8a8..9946fce2483 100644 --- a/gix-worktree/tests/worktree/index/diff.rs +++ b/gix-worktree/tests/worktree/index/diff.rs @@ -21,13 +21,13 @@ fn compute_diff(name: &str, make_worktree_dirty: impl FnOnce(&Path)) -> Vec<(Cha } = &mut change.kind { modification - .compare_data(&change.worktree_path, change.index_entry, &mut buf, &capapilites) + .compare_data(&change.worktree_path, change.entry, &mut buf, &capapilites) .unwrap(); if modification.mode_change.is_none() && !modification.data_changed { return None; } } - Some((change.kind, change.index_entry.path(&index).to_owned())) + Some((change.kind, change.entry.path(&index).to_owned())) }) .collect() } From 16eab8166ea03489c3ac5537365d69626b419bfe Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Tue, 4 Apr 2023 18:13:39 +0200 Subject: [PATCH 06/30] refactor gix_worktree::read module --- gix-worktree/src/read.rs | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/gix-worktree/src/read.rs b/gix-worktree/src/read.rs index f1ef7b32871..b6ae10faf97 100644 --- a/gix-worktree/src/read.rs +++ b/gix-worktree/src/read.rs @@ -1,29 +1,21 @@ +//! This module allows creating git blobs from worktree files. +//! For the most part a blob just contains the raw on-disk data. +//! However symlinks need to be considered poperly and attributes/config options need to be considered + use crate::fs; use gix_object::Blob; use gix_path as path; use std::borrow::Cow; use std::fs::{read_link, File}; -use std::io; -use std::io::Read; +use std::io::{self, Read}; use std::path::Path; // TODO: tests -// TODO: module level docs to explain why this would be needed (e.g. symlinks + filters) - -/// Error returned by [`blob()`] and related functions. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert symlink path to UTF8")] - IllformedUtf8, - #[error("IO error while reading blob")] - Io(#[from] io::Error), -} -// TODO: what to do about precompose unicode and ignore case_here? +// TODO: what to do about precompose unicode and ignore_case for symlinks /// Create a blob from a file or symlink. -pub fn blob(path: &Path, capabilities: &fs::Capabilities) -> Result { +pub fn blob(path: &Path, capabilities: &fs::Capabilities) -> io::Result { let mut buf = Vec::new(); let res = data_with_buf(path, &mut buf, capabilities)?; match res { @@ -33,7 +25,7 @@ pub fn blob(path: &Path, capabilities: &fs::Capabilities) -> Result } /// Create a blob from a file or symlink. -pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Capabilities) -> Result { +pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &fs::Capabilities) -> io::Result { let mut buf = Vec::new(); let res = data_with_buf_and_meta(path, &mut buf, is_symlink, capabilities)?; match res { @@ -52,7 +44,7 @@ pub fn data_with_buf<'a>( path: &Path, buf: &'a mut Vec, capabilities: &fs::Capabilities, -) -> Result, Error> { +) -> io::Result> { data_with_buf_and_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) } @@ -62,15 +54,15 @@ pub fn data_with_buf_and_meta<'a>( buf: &'a mut Vec, is_symlink: bool, capabilities: &fs::Capabilities, -) -> Result, Error> { +) -> io::Result> { // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just // normal files with their content equal to the linked path (so can be read normally) // if is_symlink && capabilities.symlink { - let symlink_path = path::try_into_bstr(read_link(path)?).map_err(|_| Error::IllformedUtf8)?; - Ok(Cow::Owned( - path::to_unix_separators_on_windows(symlink_path).into_owned().into(), - )) + // conversion to bstr can never fail because symlinks are only used + // on unix (by git) so no reason to use the try version here + let symlink_path = path::into_bstr(read_link(path)?); + Ok(Cow::Owned(symlink_path.into_owned().into())) } else { buf.clear(); File::open(path)?.read_to_end(buf)?; From 1c43c75966994ada6b84cf0d69cd13fb7cebf7cd Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Tue, 4 Apr 2023 17:45:46 +0200 Subject: [PATCH 07/30] change index/worktree diff to a visitor based API --- gix-worktree/src/diff.rs | 306 ------------------ gix-worktree/src/index/mod.rs | 1 + gix-worktree/src/index/status.rs | 28 ++ gix-worktree/src/index/status/index.rs | 1 + gix-worktree/src/index/status/recorder.rs | 4 + .../src/index/status/recorder/index.rs | 1 + .../src/index/status/recorder/worktree.rs | 62 ++++ gix-worktree/src/index/status/visit.rs | 36 +++ .../src/index/status/visit/worktree.rs | 58 ++++ gix-worktree/src/index/status/worktree.rs | 222 +++++++++++++ .../src/index/status/worktree/untracked.rs | 1 + gix-worktree/src/lib.rs | 6 - .../generated-archives/status_changed.tar.xz | 3 + .../generated-archives/status_removed.tar.xz | 3 + .../status_unchanged.tar.xz | 3 + gix-worktree/tests/fixtures/status_changed.sh | 24 ++ gix-worktree/tests/fixtures/status_removed.sh | 18 ++ .../tests/fixtures/status_unchanged.sh | 20 ++ gix-worktree/tests/worktree/index/diff.rs | 75 ----- gix-worktree/tests/worktree/index/mod.rs | 2 +- gix-worktree/tests/worktree/index/status.rs | 70 ++++ 21 files changed, 556 insertions(+), 388 deletions(-) delete mode 100644 gix-worktree/src/diff.rs create mode 100644 gix-worktree/src/index/status.rs create mode 100644 gix-worktree/src/index/status/index.rs create mode 100644 gix-worktree/src/index/status/recorder.rs create mode 100644 gix-worktree/src/index/status/recorder/index.rs create mode 100644 gix-worktree/src/index/status/recorder/worktree.rs create mode 100644 gix-worktree/src/index/status/visit.rs create mode 100644 gix-worktree/src/index/status/visit/worktree.rs create mode 100644 gix-worktree/src/index/status/worktree.rs create mode 100644 gix-worktree/src/index/status/worktree/untracked.rs create mode 100644 gix-worktree/tests/fixtures/generated-archives/status_changed.tar.xz create mode 100644 gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz create mode 100644 gix-worktree/tests/fixtures/generated-archives/status_unchanged.tar.xz create mode 100644 gix-worktree/tests/fixtures/status_changed.sh create mode 100644 gix-worktree/tests/fixtures/status_removed.sh create mode 100644 gix-worktree/tests/fixtures/status_unchanged.sh delete mode 100644 gix-worktree/tests/worktree/index/diff.rs create mode 100644 gix-worktree/tests/worktree/index/status.rs diff --git a/gix-worktree/src/diff.rs b/gix-worktree/src/diff.rs deleted file mode 100644 index ca0b32d3fcb..00000000000 --- a/gix-worktree/src/diff.rs +++ /dev/null @@ -1,306 +0,0 @@ -use std::io::{self, ErrorKind}; -use std::path::{Path, PathBuf}; -use std::time::{Duration, SystemTimeError}; - -use bstr::BString; -use gix_features::hash; -use gix_hash::ObjectId; -use gix_index as index; -use gix_object::encode::loose_header; -use gix_path as path; - -use crate::fs; -use crate::read; - -/// How the mode of an index entry has changed. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum ModeChange { - /// Shown as `typechange` in `git status`. - /// - /// For example, this happens if a normal file was replaced with a symlink. - /// **Note**: A directory turning into a file or vice-versa is not counted as `TypeChange`, - /// but as addition and removal respectively. - TypeChange, - /// The executable bit of a file changed. - ExecutableChange, -} - -/// How a worktree file changed compared to an index entry. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub struct FileModification { - /// If not `None`, the file mode was changed. - pub mode_change: Option, - /// The `mtime` or `ctime` changed. - /// - /// If this is `false` then we can assume the file is unchanged - /// assuming that timestamps where not racy (see [`detect_racy_stat()`][Self::detect_racy_stat()]). - /// If this is `true`, the file might still be unchanged, and to be perfectly sure we would need - /// to read the file from disk and compare it to the object in index. - pub stat_changed: bool, - /// The data of this entry has changed. - /// - /// This can be quickly determined if the size of the stat data is mismatched. - /// Otherwise a data change must be detected by reading the file - /// from disk and comparing it to the file stored in the index - /// This only needs to be done if `stat_changed` is `true`. - pub data_changed: bool, -} - -/// Instantiation -impl FileModification { - /// Computes the status of an `entry` by comparing it with its `fs_stat` while respecting filesystem `capabilities`. - /// - /// It does so exclusively by looking at the filesystem stats. - pub fn from_stat( - entry: &index::Entry, - fs_stat: &std::fs::Metadata, - capabilities: &fs::Capabilities, - ) -> Result { - #[cfg(unix)] - use std::os::unix::fs::MetadataExt; - - let mode_change = match entry.mode { - index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange), - #[cfg(unix)] - index::entry::Mode::FILE if capabilities.executable_bit && fs_stat.mode() & 0o111 != 0 => { - Some(ModeChange::ExecutableChange) - } - #[cfg(unix)] - index::entry::Mode::FILE_EXECUTABLE if capabilities.executable_bit && fs_stat.mode() & 0o111 == 0 => { - Some(ModeChange::ExecutableChange) - } - index::entry::Mode::SYMLINK if capabilities.symlink && !fs_stat.is_symlink() => { - Some(ModeChange::TypeChange) - } - index::entry::Mode::SYMLINK if !capabilities.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), - index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange), - _ => None, // TODO: log/error invalid file type - }; - - let data_changed = entry.stat.size as u64 != fs_stat.len(); - - let ctime = fs_stat - .created() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - let mtime = fs_stat - .modified() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - - let stat = &entry.stat; - let stat_changed = stat.mtime.secs - != mtime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this") - || stat.mtime.nsecs != mtime.subsec_nanos() - || stat.ctime.secs - != ctime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this") - || stat.ctime.nsecs != ctime.subsec_nanos(); - - Ok(Self { - mode_change, - stat_changed, - data_changed, - }) - } -} - -/// Modification -impl FileModification { - /// Marks this entry's stats as changed if there is a potential filesystem race condition. - pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) { - self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp() - } - - /// Returns true if this instance has any changes. - /// - /// The [`detect_racy_stat()`][Self::detect_racy_stat()] method should be called first to account for race conditions. - pub fn is_changed(&self) -> bool { - self.mode_change.is_some() || self.stat_changed || self.data_changed - } - - /// Read the worktree file denoted by `entry` from the disk rooted at `worktree_path` into `buf` and compare - /// it to the index entry's hash to check if the actual data of the file is changed to set [`Self::data_changed`] accordingly, - /// while respecting the filesystem's `capabilities`. - /// - /// Does no computation if we are already sure that the file has or hasn't changed. - pub fn compare_data( - &mut self, - worktree_path: &Path, - entry: &index::Entry, - buf: &mut Vec, - capabilities: &fs::Capabilities, - ) -> Result<(), read::Error> { - if self.mode_change.is_some() || !self.stat_changed || self.data_changed { - return Ok(()); - } - let data = read::data_with_buf_and_meta( - worktree_path, - buf, - entry.mode.contains(index::entry::Mode::SYMLINK), - capabilities, - )?; - let header = loose_header(gix_object::Kind::Blob, data.len()); - let hash_changed = match entry.id { - ObjectId::Sha1(entry_hash) => { - let mut file_hash = hash::Sha1::default(); - file_hash.update(&header); - file_hash.update(&data); - let file_hash = file_hash.digest(); - entry_hash != file_hash - } - }; - self.data_changed = hash_changed; - Ok(()) - } -} - -/// The error returned by [`compare_to_index()`]. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert path to UTF8 {path}")] - IllformedUtf8 { path: BString }, - #[error("The clock was off when reading file related metadata after updating a file on disk")] - Time(#[from] std::time::SystemTimeError), - #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] io::Error), -} - -/// A change between the index and the worktree computed by [`compare_to_index`]. -#[derive(Clone, Debug)] -pub struct Change<'a> { - /// The index entry that changed. - pub entry: &'a index::Entry, - /// The on-disk worktree path corresponding to this entry. - pub worktree_path: PathBuf, - /// How this index entry changed. - pub kind: ChangeKind, - /// File metadata observed from disk that can be reused (optimization). - pub fstat: Option, -} - -/// The nature of a the difference between the index and the worktree. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum ChangeKind { - /// An index entry has no corresponding file in the worktree. - Removed, - /// A new file that has been marked with git add but has not yet been - /// checked in yet. No diff is computed for these files because whatever is - /// on disk at commit time will be used. - Added, - /// Called for files that may have changed in some form as indicated by `change`. - /// - /// Note that this doesn't necessarily mean that the *content* of the file changed. - Modified { - /// How the file was modified exactly - modification: FileModification, - /// Whether this changed file also has an unresolved merge conflict. - conflict: bool, - }, - /// There are unresolved merge conflicts for this file but it has not changed on disk. - Conflict, -} - -/// Computes the changes needed to turn the `index` into the `worktree` (as identified by its root), -/// while respecting the filesystem's `capabilities`. -pub fn compare_to_index<'a: 'b, 'b>( - index: &'a index::State, - // TODO: use worktree cache instead - worktree: &'b Path, - capabilities: &'b fs::Capabilities, -) -> impl Iterator, Error>> + 'b { - index.entries().iter().filter_map(|entry| { - let git_path = entry.path(index); - if entry.flags.intersects( - index::entry::Flags::UPTODATE - | index::entry::Flags::SKIP_WORKTREE - | index::entry::Flags::ASSUME_VALID - | index::entry::Flags::FSMONITOR_VALID, - ) { - return None; - } - - let path = if let Ok(path) = path::try_from_bstr(git_path) { - path - } else { - return Some(Err(Error::IllformedUtf8 { - path: git_path.to_owned(), - })); - }; - - let worktree_path = worktree.join(path); - let metadata = match worktree_path.symlink_metadata() { - // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that - Ok(metadata) if metadata.is_dir() => { - // index entries are normally only for files/symlinks - // if a file turned into a directory it was removed - // the only exception here are submodules which are - // part of the index despite being directories - // - // TODO: submodules: - // if entry.mode.contains(Mode::COMMIT) && - // resolve_gitlink_ref(ce->name, "HEAD", &sub)) - return Some(Ok(Change { - kind: ChangeKind::Removed, - entry, - worktree_path, - fstat: Some(metadata), - })); - } - Ok(metadata) => metadata, - Err(err) if err.kind() == ErrorKind::NotFound => { - return Some(Ok(Change { - kind: ChangeKind::Removed, - entry, - worktree_path, - fstat: None, - })) - } - Err(err) => { - // TODO: strict mode? - return Some(Err(err.into())); - } - }; - if entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { - return Some(Ok(Change { - kind: ChangeKind::Added, - entry, - worktree_path, - fstat: None, - })); - } - let mut change = match FileModification::from_stat(entry, &metadata, capabilities) { - Ok(change) => change, - Err(err) => return Some(Err(err.into())), - }; - change.detect_racy_stat(index, entry); - - let conflict = match entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; - let kind = if change.is_changed() { - ChangeKind::Modified { - modification: change, - conflict, - } - } else if conflict { - ChangeKind::Conflict - } else { - return None; - }; - - Some(Ok(Change { - kind, - entry, - worktree_path, - fstat: Some(metadata), - })) - }) -} diff --git a/gix-worktree/src/index/mod.rs b/gix-worktree/src/index/mod.rs index 684d1cae9e4..0d25c9f5e7b 100644 --- a/gix-worktree/src/index/mod.rs +++ b/gix-worktree/src/index/mod.rs @@ -7,6 +7,7 @@ use crate::fs; pub mod checkout; pub(crate) mod entry; +pub mod status; /// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. /// `dir` is the directory into which to checkout the `index`. diff --git a/gix-worktree/src/index/status.rs b/gix-worktree/src/index/status.rs new file mode 100644 index 00000000000..fa7119d6049 --- /dev/null +++ b/gix-worktree/src/index/status.rs @@ -0,0 +1,28 @@ +//! + +mod worktree; + +mod index; + +/// +pub mod visit; + +/// +pub mod recorder; + +/// +pub struct IndexStatus<'index> { + index: &'index gix_index::State, +} + +impl<'index> From<&'index gix_index::File> for IndexStatus<'index> { + fn from(file: &'index gix_index::File) -> Self { + Self { index: file } + } +} + +impl<'index> From<&'index gix_index::State> for IndexStatus<'index> { + fn from(index: &'index gix_index::State) -> Self { + Self { index } + } +} diff --git a/gix-worktree/src/index/status/index.rs b/gix-worktree/src/index/status/index.rs new file mode 100644 index 00000000000..1d5e7fe551b --- /dev/null +++ b/gix-worktree/src/index/status/index.rs @@ -0,0 +1 @@ +// TODO: index to index diff diff --git a/gix-worktree/src/index/status/recorder.rs b/gix-worktree/src/index/status/recorder.rs new file mode 100644 index 00000000000..e545ef360c2 --- /dev/null +++ b/gix-worktree/src/index/status/recorder.rs @@ -0,0 +1,4 @@ +/// +pub mod index; +/// +pub mod worktree; diff --git a/gix-worktree/src/index/status/recorder/index.rs b/gix-worktree/src/index/status/recorder/index.rs new file mode 100644 index 00000000000..1777fa38245 --- /dev/null +++ b/gix-worktree/src/index/status/recorder/index.rs @@ -0,0 +1 @@ +// TODO: index diff diff --git a/gix-worktree/src/index/status/recorder/worktree.rs b/gix-worktree/src/index/status/recorder/worktree.rs new file mode 100644 index 00000000000..475305937ad --- /dev/null +++ b/gix-worktree/src/index/status/recorder/worktree.rs @@ -0,0 +1,62 @@ +use bstr::BStr; + +use gix_index as index; + +use crate::fs; +use crate::index::status::visit; + +/// A [Visit][visit::Visit] implementation to record every observed change and keep track of the changed paths. +#[derive(Debug)] +pub struct Recorder<'a, 'index> { + index: &'index index::State, + buf: Vec, + capabilities: &'a fs::Capabilities, + /// + pub records: Vec<(&'index BStr, visit::worktree::Status, bool)>, +} + +impl<'a, 'index> Recorder<'a, 'index> { + /// + pub fn new(capabilities: &'a fs::Capabilities, index: &'index index::State) -> Self { + Recorder { + index, + buf: Vec::with_capacity(8 * 1024), + capabilities, + records: Vec::new(), + } + } +} + +impl<'a, 'index> visit::worktree::Visit<'index> for Recorder<'a, 'index> { + fn visit_entry( + &mut self, + entry: &'index index::Entry, + status: Result, + path: Result<&std::path::Path, &BStr>, + conflict: bool, + ) { + // we treat any errors as a data modiciation to be conservative + let status = if let Ok(path) = path { + if let Ok(mut status) = status { + let _ = status.compare_data(path, entry, &mut self.buf, self.capabilities); + status + } else { + visit::worktree::Status::Modified(visit::Modification { + mode_change: None, + stat_changed: true, + data_changed: true, + }) + } + } else { + visit::worktree::Status::Modified(visit::Modification { + mode_change: None, + stat_changed: true, + data_changed: true, + }) + }; + let path = entry.path(self.index); + if status != visit::worktree::Status::Unchanged { + self.records.push((path, status, conflict)) + } + } +} diff --git a/gix-worktree/src/index/status/visit.rs b/gix-worktree/src/index/status/visit.rs new file mode 100644 index 00000000000..a1d85ec21c5 --- /dev/null +++ b/gix-worktree/src/index/status/visit.rs @@ -0,0 +1,36 @@ +/// +pub mod worktree; + +/// How the mode of an index entry has changed. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum ModeChange { + /// Shown as `typechange` in `git status`. + /// + /// For example, this happens if a normal file was replaced with a symlink. + /// **Note**: A directory turning into a file or vice-versa is not counted as `TypeChange`, + /// but as addition and removal respectively. + TypeChange, + /// The executable bit of a file changed. + ExecutableChange, +} + +/// How a worktree file changed compared to an index entry. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct Modification { + /// If not `None`, the file mode was changed. + pub mode_change: Option, + /// The `mtime` or `ctime` changed. + /// + /// If this is `false` then we can assume the file is unchanged + /// assuming that timestamps where not racy (see [`detect_racy_stat()`][Self::detect_racy_stat()]). + /// If this is `true`, the file might still be unchanged, and to be perfectly sure we would need + /// to read the file from disk and compare it to the object in index. + pub stat_changed: bool, + /// The data of this entry has changed. + /// + /// This can be quickly determined if the size of the stat data is mismatched. + /// Otherwise a data change must be detected by reading the file + /// from disk and comparing it to the file stored in the index + /// This only needs to be done if `stat_changed` is `true`. + pub data_changed: bool, +} diff --git a/gix-worktree/src/index/status/visit/worktree.rs b/gix-worktree/src/index/status/visit/worktree.rs new file mode 100644 index 00000000000..6bbd304e4d9 --- /dev/null +++ b/gix-worktree/src/index/status/visit/worktree.rs @@ -0,0 +1,58 @@ +use std::io; +use std::path::Path; + +use bstr::BStr; +use gix_index as index; + +use super::Modification; + +/// The status of an index entry in a worktree +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Status { + /// The file in the worktree is identical to the index entry + Unchanged, + /// An index entry has no corresponding file in the worktree. + Removed, + /// A worktree file has been modified in some form as indicated by `change`. + /// + /// Note that this doesn't necessarily mean that the *content* of the file changed. + Modified(Modification), + /// An index entry that correspond to an untracked worktree file marked with `git add` + Added, +} + +/// The error returned by [`compare_to_index()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not convert path to UTF8")] + IllformedUtf8, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] io::Error), +} + +/// +pub trait Visit<'index> { + /// + fn visit_entry( + &mut self, + entry: &'index index::Entry, + status: Result, + path: Result<&Path, &BStr>, + conflict: bool, + ); +} + +/// +pub trait VisitPrallel<'index> { + /// + fn visit_entry( + &self, + entry: &'index index::Entry, + status: Result, + path: Result<&Path, &BStr>, + conflict: bool, + ); +} diff --git a/gix-worktree/src/index/status/worktree.rs b/gix-worktree/src/index/status/worktree.rs new file mode 100644 index 00000000000..2b580acfc3d --- /dev/null +++ b/gix-worktree/src/index/status/worktree.rs @@ -0,0 +1,222 @@ +use std::io; +use std::path::Path; +use std::time::{Duration, SystemTimeError}; + +use crate::index::status::visit::worktree::Visit; +use crate::index::status::visit::worktree::{Error, Status}; +use crate::index::status::visit::{ModeChange, Modification}; +use crate::index::status::IndexStatus; +use crate::{fs, read}; +use gix_features::hash; +use gix_hash::ObjectId; +use gix_index as index; +use gix_object::encode::loose_header; +use gix_path as path; + +mod untracked; + +/// Instantiation +impl Modification { + /// Computes the status of an `entry` by comparing it with its `fs_stat` while respecting filesystem `capabilities`. + /// + /// It does so exclusively by looking at the filesystem stats. + pub fn from_fstat( + entry: &index::Entry, + fs_stat: &std::fs::Metadata, + capabilities: &fs::Capabilities, + ) -> Result { + #[cfg(unix)] + use std::os::unix::fs::MetadataExt; + + let mode_change = match entry.mode { + index::entry::Mode::FILE if !fs_stat.is_file() => Some(ModeChange::TypeChange), + #[cfg(unix)] + index::entry::Mode::FILE if capabilities.executable_bit && fs_stat.mode() & 0o111 != 0 => { + Some(ModeChange::ExecutableChange) + } + #[cfg(unix)] + index::entry::Mode::FILE_EXECUTABLE if capabilities.executable_bit && fs_stat.mode() & 0o111 == 0 => { + Some(ModeChange::ExecutableChange) + } + index::entry::Mode::SYMLINK if capabilities.symlink && !fs_stat.is_symlink() => { + Some(ModeChange::TypeChange) + } + index::entry::Mode::SYMLINK if !capabilities.symlink && !fs_stat.is_file() => Some(ModeChange::TypeChange), + index::entry::Mode::COMMIT if !fs_stat.is_dir() => Some(ModeChange::TypeChange), + _ => None, // TODO: log/error invalid file type + }; + + let data_changed = entry.stat.size as u64 != fs_stat.len(); + + let ctime = fs_stat + .created() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + let mtime = fs_stat + .modified() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + + let stat = &entry.stat; + let stat_changed = stat.mtime.secs + != mtime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this") + || stat.mtime.nsecs != mtime.subsec_nanos() + || stat.ctime.secs + != ctime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this") + || stat.ctime.nsecs != ctime.subsec_nanos(); + + Ok(Self { + mode_change, + stat_changed, + data_changed, + }) + } + + /// Marks this entry's stats as changed if there is a potential filesystem race condition. + pub fn detect_racy_stat(&mut self, index: &index::State, index_entry: &index::Entry) { + self.stat_changed = self.stat_changed || index_entry.stat.mtime >= index.timestamp() + } + + /// Returns true if this instance has any changes. + /// + /// The [`detect_racy_stat()`][Self::detect_racy_stat()] method should be called first to account for race conditions. + pub fn is_changed(&self) -> bool { + self.mode_change.is_some() || self.stat_changed || self.data_changed + } + + /// Read the worktree file denoted by `entry` from the disk rooted at `worktree_path` into `buf` and compare + /// it to the index entry's hash to check if the actual data of the file is changed to set [`Self::data_changed`] accordingly, + /// while respecting the filesystem's `capabilities`. + /// + /// Does no computation if we are already sure that the file has or hasn't changed. + pub fn compare_data( + &mut self, + worktree_path: &Path, + entry: &index::Entry, + buf: &mut Vec, + capabilities: &fs::Capabilities, + ) -> io::Result<()> { + if self.mode_change == Some(ModeChange::TypeChange) || self.data_changed { + return Ok(()); + } + + let data = read::data_with_buf_and_meta( + worktree_path, + buf, + entry.mode.contains(index::entry::Mode::SYMLINK), + capabilities, + )?; + let header = loose_header(gix_object::Kind::Blob, data.len()); + let hash_changed = match entry.id { + ObjectId::Sha1(entry_hash) => { + let mut file_hash = hash::Sha1::default(); + file_hash.update(&header); + file_hash.update(&data); + let file_hash = file_hash.digest(); + entry_hash != file_hash + } + }; + self.data_changed = hash_changed; + Ok(()) + } +} + +impl<'index> IndexStatus<'index> { + /// Calculates the status of worktree + pub fn of_worktree( + self, + worktree: &Path, + visit: &mut impl Visit<'index>, + visit_added: bool, + fs_capabilities: &fs::Capabilities, + ) { + for entry in self.index.entries() { + let conflict = match entry.stage() { + 0 => false, + 1 => true, + _ => continue, + }; + if entry.flags.intersects( + index::entry::Flags::UPTODATE + | index::entry::Flags::SKIP_WORKTREE + | index::entry::Flags::ASSUME_VALID + | index::entry::Flags::FSMONITOR_VALID, + ) { + continue; + } + + let git_path = entry.path(self.index); + let path = path::try_from_bstr(git_path).map(|path| worktree.join(path)); + let status = match &path { + Ok(path) => self.of_file(entry, path, visit_added, fs_capabilities), + Err(_) => Err(Error::IllformedUtf8), + }; + + visit.visit_entry(entry, status, path.as_deref().map_err(|_| git_path), conflict); + } + } + + fn of_file( + &self, + entry: &'index index::Entry, + path: &Path, + visit_added: bool, + capabilities: &fs::Capabilities, + ) -> Result { + let metadata = match path.symlink_metadata() { + // TODO: check if any parent directory is a symlink + // we need to use fs::Cache for that + Ok(metadata) if metadata.is_dir() => { + // index entries are normally only for files/symlinks + // if a file turned into a directory it was removed + // the only exception here are submodules which are + // part of the index despite being directories + // + // TODO: submodules: + // if entry.mode.contains(Mode::COMMIT) && + // resolve_gitlink_ref(ce->name, "HEAD", &sub)) + return Ok(Status::Removed); + } + Ok(metadata) => metadata, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Status::Removed), + Err(err) => { + return Err(err.into()); + } + }; + if visit_added && entry.flags.contains(index::entry::Flags::INTENT_TO_ADD) { + return Ok(Status::Added); + } + let mut modification = Modification::from_fstat(entry, &metadata, capabilities)?; + modification.detect_racy_stat(self.index, entry); + if modification.is_changed() { + Ok(Status::Modified(modification)) + } else { + Ok(Status::Unchanged) + } + } +} + +impl Status { + /// Checks if files with stat changes have changed content by reading their + /// contents from the disk. If the file content is unchanged and there + /// are no mode change the Status is changed to Unchanged + pub fn compare_data( + &mut self, + path: &Path, + entry: &index::Entry, + buf: &mut Vec, + capabilities: &fs::Capabilities, + ) -> io::Result<()> { + if let Status::Modified(status) = self { + status.compare_data(path, entry, buf, capabilities)?; + if !status.data_changed && status.mode_change.is_none() { + *self = Status::Unchanged + } + } + Ok(()) + } +} diff --git a/gix-worktree/src/index/status/worktree/untracked.rs b/gix-worktree/src/index/status/worktree/untracked.rs new file mode 100644 index 00000000000..f82719ed8a6 --- /dev/null +++ b/gix-worktree/src/index/status/worktree/untracked.rs @@ -0,0 +1 @@ +// TODO: untracked file detection diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 8bae7a724b8..aae38370830 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -14,11 +14,5 @@ pub mod index; pub(crate) mod os; -/// -pub mod diff; - -/// -pub mod untracked; - /// pub mod read; diff --git a/gix-worktree/tests/fixtures/generated-archives/status_changed.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_changed.tar.xz new file mode 100644 index 00000000000..f671df45b63 --- /dev/null +++ b/gix-worktree/tests/fixtures/generated-archives/status_changed.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9faaa40b975d1b8df86e61dc162b71cc11f07345397657542588659955e9c9 +size 10616 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz new file mode 100644 index 00000000000..7b1462fc83e --- /dev/null +++ b/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fe85a65e3689e9e60598130be60761dc4ea129e04d7d5501320f7ebad1eb2b +size 10520 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_unchanged.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_unchanged.tar.xz new file mode 100644 index 00000000000..106700ea88c --- /dev/null +++ b/gix-worktree/tests/fixtures/generated-archives/status_unchanged.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deae2062b1f0348e7af84de891769306b31c2ba4a96e033385f82018fa601468 +size 10592 diff --git a/gix-worktree/tests/fixtures/status_changed.sh b/gix-worktree/tests/fixtures/status_changed.sh new file mode 100644 index 00000000000..772076cf051 --- /dev/null +++ b/gix-worktree/tests/fixtures/status_changed.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +echo -n "other content" > dir/content2 +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" + +chmod +x dir/content +chmod +x dir/content2 +echo "new content" > dir/content2 +rm empty +ln -sf dir/content empty +git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_removed.sh b/gix-worktree/tests/fixtures/status_removed.sh new file mode 100644 index 00000000000..30cdfb94993 --- /dev/null +++ b/gix-worktree/tests/fixtures/status_removed.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" +rm -rf ./empty ./executable ./dir/content ./dir/sub-dir/symlink +git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_unchanged.sh b/gix-worktree/tests/fixtures/status_unchanged.sh new file mode 100644 index 00000000000..67684549509 --- /dev/null +++ b/gix-worktree/tests/fixtures/status_unchanged.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" + +touch ./empty ./executable ./dir/content ./dir/sub-dir/symlink + +git reset # ensure index timestamp is large enough to not mark everything racy \ No newline at end of file diff --git a/gix-worktree/tests/worktree/index/diff.rs b/gix-worktree/tests/worktree/index/diff.rs deleted file mode 100644 index 9946fce2483..00000000000 --- a/gix-worktree/tests/worktree/index/diff.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::fs::{self}; -use std::path::Path; - -use bstr::BString; -use gix_worktree as worktree; -use worktree::diff::{ChangeKind, FileModification}; - -fn compute_diff(name: &str, make_worktree_dirty: impl FnOnce(&Path)) -> Vec<(ChangeKind, BString)> { - let work_tree = - gix_testtools::scripted_fixture_writable(Path::new(name).with_extension("sh")).expect("script works"); - let git_dir = work_tree.path().join(".git"); - make_worktree_dirty(work_tree.path()); - let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); - let capapilites = worktree::fs::Capabilities::probe(git_dir); - let mut buf = Vec::with_capacity(8 * 1024); - worktree::diff::compare_to_index(&index, work_tree.path(), &capapilites) - .filter_map(|change| { - let mut change = change.unwrap(); - if let ChangeKind::Modified { - ref mut modification, .. - } = &mut change.kind - { - modification - .compare_data(&change.worktree_path, change.entry, &mut buf, &capapilites) - .unwrap(); - if modification.mode_change.is_none() && !modification.data_changed { - return None; - } - } - Some((change.kind, change.entry.path(&index).to_owned())) - }) - .collect() -} - -#[test] -fn removed() { - let diff = compute_diff("make_mixed_without_submodules", |path| { - fs::remove_file(path.join("executable")).unwrap(); - fs::remove_file(path.join("dir/content")).unwrap(); - fs::remove_file(path.join("dir/sub-dir/symlink")).unwrap(); - }); - - assert_eq!( - diff, - vec![ - (ChangeKind::Removed, BString::new(b"dir/content".to_vec())), - (ChangeKind::Removed, BString::new(b"dir/sub-dir/symlink".to_vec())), - (ChangeKind::Removed, BString::new(b"executable".to_vec())), - ] - ) -} - -#[test] -fn changed() { - let diff = compute_diff("make_mixed_without_submodules", |path| { - fs::write(path.join("dir/content"), "hello_world").unwrap(); - // write same content to this file to simulate a touch command - fs::write(path.join("executable"), "content").unwrap(); - }); - - assert_eq!( - diff, - vec![( - ChangeKind::Modified { - modification: FileModification { - mode_change: None, - stat_changed: true, - data_changed: true - }, - conflict: false - }, - BString::new(b"dir/content".to_vec()) - ),] - ) -} diff --git a/gix-worktree/tests/worktree/index/mod.rs b/gix-worktree/tests/worktree/index/mod.rs index 1f31dafe6a8..5de836e04cf 100644 --- a/gix-worktree/tests/worktree/index/mod.rs +++ b/gix-worktree/tests/worktree/index/mod.rs @@ -1,2 +1,2 @@ mod checkout; -mod diff; +mod status; diff --git a/gix-worktree/tests/worktree/index/status.rs b/gix-worktree/tests/worktree/index/status.rs new file mode 100644 index 00000000000..02d7c1fcfb3 --- /dev/null +++ b/gix-worktree/tests/worktree/index/status.rs @@ -0,0 +1,70 @@ +use bstr::BStr; +use gix_worktree::fs; +use gix_worktree::index::status::recorder::worktree::Recorder; +use gix_worktree::index::status::visit::{worktree, ModeChange, Modification}; +use gix_worktree::index::status::IndexStatus; + +use crate::fixture_path; + +fn fixture(name: &str, expected_status: &[(&BStr, worktree::Status, bool)]) { + let worktree = fixture_path(name); + let git_dir = worktree.join(".git"); + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); + let capabilities = fs::Capabilities::probe(git_dir); + let mut recorder = Recorder::new(&capabilities, &index); + IndexStatus::from(&index).of_worktree(&worktree, &mut recorder, true, &capabilities); + assert_eq!(recorder.records, expected_status) +} + +#[test] +fn removed() { + fixture( + "status_removed", + &[ + (BStr::new(b"dir/content"), worktree::Status::Removed, false), + (BStr::new(b"dir/sub-dir/symlink"), worktree::Status::Removed, false), + (BStr::new(b"empty"), worktree::Status::Removed, false), + (BStr::new(b"executable"), worktree::Status::Removed, false), + ], + ); +} + +#[test] +fn unchanged() { + fixture("status_unchanged", &[]); +} +#[test] +fn modified() { + fixture( + "status_changed", + &[ + ( + BStr::new(b"dir/content"), + worktree::Status::Modified(Modification { + mode_change: Some(ModeChange::ExecutableChange), + stat_changed: false, + data_changed: false, + }), + false, + ), + ( + BStr::new(b"dir/content2"), + worktree::Status::Modified(Modification { + mode_change: Some(ModeChange::ExecutableChange), + stat_changed: true, + data_changed: true, + }), + false, + ), + ( + BStr::new(b"empty"), + worktree::Status::Modified(Modification { + mode_change: Some(ModeChange::TypeChange), + stat_changed: true, + data_changed: true, + }), + false, + ), + ], + ); +} From 870bdb2f3957e0f5690679e2aeb6752cd0b8d93e Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Wed, 12 Apr 2023 15:22:54 +0200 Subject: [PATCH 08/30] centralize index entry Stat creation/comparison --- gix-index/src/decode/entries.rs | 4 +- gix-index/src/decode/mod.rs | 4 +- gix-index/src/entry/mod.rs | 62 +-- gix-index/src/entry/stat.rs | 415 ++++++++++++++++++ gix-index/src/lib.rs | 2 +- gix-index/tests/index/entry.rs | 4 +- gix-index/tests/index/mod.rs | 2 +- gix-worktree/src/index/checkout.rs | 31 +- gix-worktree/src/index/entry.rs | 32 +- gix-worktree/src/index/status.rs | 3 +- gix-worktree/src/index/status/worktree.rs | 105 ++--- .../status_unchanged.tar.xz | 4 +- gix-worktree/tests/worktree/index/status.rs | 25 +- gix/src/config/cache/access.rs | 19 +- 14 files changed, 526 insertions(+), 186 deletions(-) create mode 100644 gix-index/src/entry/stat.rs diff --git a/gix-index/src/decode/entries.rs b/gix-index/src/decode/entries.rs index 74bc1cfc8e8..5de949fe130 100644 --- a/gix-index/src/decode/entries.rs +++ b/gix-index/src/decode/entries.rs @@ -142,11 +142,11 @@ fn load_one<'a>( Some(( Entry { stat: entry::Stat { - ctime: entry::Time { + ctime: entry::stat::Time { secs: ctime_secs, nsecs: ctime_nsecs, }, - mtime: entry::Time { + mtime: entry::stat::Time { secs: mtime_secs, nsecs: mtime_nsecs, }, diff --git a/gix-index/src/decode/mod.rs b/gix-index/src/decode/mod.rs index e84d8f71739..c94b0349570 100644 --- a/gix-index/src/decode/mod.rs +++ b/gix-index/src/decode/mod.rs @@ -302,11 +302,11 @@ pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> { let (size, data) = read_u32(data)?; Some(( entry::Stat { - mtime: entry::Time { + mtime: entry::stat::Time { secs: ctime_secs, nsecs: ctime_nsecs, }, - ctime: entry::Time { + ctime: entry::stat::Time { secs: mtime_secs, nsecs: mtime_nsecs, }, diff --git a/gix-index/src/entry/mod.rs b/gix-index/src/entry/mod.rs index b7a5ab3f27b..5573f323207 100644 --- a/gix-index/src/entry/mod.rs +++ b/gix-index/src/entry/mod.rs @@ -2,56 +2,24 @@ pub type Stage = u32; mod mode; -use std::cmp::Ordering; - -use filetime::FileTime; pub use mode::Mode; mod flags; pub(crate) use flags::at_rest; pub use flags::Flags; +/// +pub mod stat; mod write; -/// The time component in a [`Stat`] struct. -#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] -pub struct Time { - /// The amount of seconds elapsed since EPOCH - pub secs: u32, - /// The amount of nanoseconds elapsed in the current second, ranging from 0 to 999.999.999 . - pub nsecs: u32, -} - -impl From for Time { - fn from(value: FileTime) -> Self { - Time { - secs: value.unix_seconds().try_into().expect("can't represent non-unix times"), - nsecs: value.nanoseconds(), - } - } -} - -impl PartialEq for Time { - fn eq(&self, other: &FileTime) -> bool { - *self == Time::from(*other) - } -} - -impl PartialOrd for Time { - fn partial_cmp(&self, other: &FileTime) -> Option { - self.partial_cmp(&Time::from(*other)) - } -} - /// An entry's filesystem stat information. #[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Stat { /// Modification time - pub mtime: Time, + pub mtime: stat::Time, /// Creation time - pub ctime: Time, + pub ctime: stat::Time, /// Device number pub dev: u32, /// Inode number @@ -88,29 +56,11 @@ mod access { } mod _impls { - use std::{cmp::Ordering, ops::Add, time::SystemTime}; + use std::cmp::Ordering; use bstr::BStr; - use crate::{entry::Time, Entry, State}; - - impl From for Time { - fn from(s: SystemTime) -> Self { - let d = s - .duration_since(std::time::UNIX_EPOCH) - .expect("system time is not before unix epoch!"); - Time { - secs: d.as_secs() as u32, - nsecs: d.subsec_nanos(), - } - } - } - - impl From