Skip to content

Commit 1955153

Browse files
o2shByron
andauthored
Added File Churn Metric (#1071)
* add churn metric * add diff_count * revert * rename * add churn cli flags * fix integration test * add unit tests * try fix codeowners * fix codeowners * Optimize diff implementation * delay conversion to String for filepaths to the last moment. That way, only the paths that are displayed will be converted in an operation that isn't free. * change diff implementation to decode parents only once, instead of three times in the commmon case. * setup an object cache in the `Repository` for faster traversals and much faster diffs. * Don't fail on missing parent as we want to work in shallow repos, too * Increase performance by decoding the commit only once Previously, each time we would query commit information, the commit would lazily be decoded until the point of interest under the hood. Now we decode everything once, which is faster than what happened before. Note that diffs are still causing the parents to be decoded even we *could* pass them in, but it's not worth the complexity for just 100 commits (the default value for churn). * track changes on executable files * remove for_each method * use horizontal ellipsis * review * use MAIN_SEPERATOR when building path * revert * run expensive diffs in parallel and abort them once we run out of time. That way, we could even use higher amounts of diffs if we wanted to, or could warn if there was not enough time to reach the desired amount of diffs. * Always calculate at least one diff for 'churn' That way, there is always some data to work with. This is important in case the repo is very small and the thread needs some time to start-up and finish. * improved readability + churn_pool_size CLI flag The churn_pool_size allow the user to force onefetch to be deterministic in the number of commits used to create the churn summary * fix test * halt if the churn pool size is bigger than the total number of commits * improve readability * add unit test * refactor * simplify `should_break()` * update to latest `gix` version * Avoid exhaustive memory consumption by sending the commit-id instead of its buffer. The delta-processing happens by referring to a commit, and previously we could send the whole commit buffer (which is expensive) as the overall amount of buffers in flight would be bounded. Now that the bound was removed, it's necessary to limit the costs of the commit, and we do this by referring to it by id instead. That way, on the linux kernel, we get these values for memory consumption: * bounded: 960MB * unbounded buffer: 2156 * unbounded id: 1033 --------- Co-authored-by: Sebastian Thiel <[email protected]>
1 parent d5f8621 commit 1955153

File tree

16 files changed

+995
-241
lines changed

16 files changed

+995
-241
lines changed

Cargo.lock

Lines changed: 513 additions & 114 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ clap_complete = "4.3.0"
2626
gix-features-for-configuration-only = { package = "gix-features", version = "0.29.0", features = [
2727
"zlib-ng",
2828
] }
29-
gix = { version = "0.44.1", default-features = false, features = [
29+
gix = { version = "0.45.1", default-features = false, features = [
3030
"max-performance-safe",
3131
] }
3232
git2 = { version = "0.17.1", default-features = false }
@@ -55,6 +55,7 @@ criterion = "0.4.0"
5555
gix-testtools = "0.12.0"
5656
insta = { version = "1.29.0", features = ["json", "redactions"] }
5757
pretty_assertions = "1.3.0"
58+
rstest = "0.17.0"
5859

5960
[[bench]]
6061
name = "repo"

src/cli.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ pub struct InfoCliOptions {
6363
/// Maximum NUM of languages to be shown
6464
#[arg(long, default_value_t = 6usize, value_name = "NUM")]
6565
pub number_of_languages: usize,
66+
/// Maximum NUM of file churns to be shown
67+
#[arg(long, default_value_t = 3usize, value_name = "NUM")]
68+
pub number_of_file_churns: usize,
69+
/// Maximum NUM of commits from HEAD used to compute the churn summary
70+
///
71+
/// By default, the actual value is non-deterministic due to time-based computation
72+
/// and will be displayed under the info title "Churn (NUM)"
73+
#[arg(long, value_name = "NUM")]
74+
pub churn_pool_size: Option<usize>,
6675
/// Ignore all files & directories matching EXCLUDE
6776
#[arg(long, short, num_args = 1.., value_hint = ValueHint::AnyPath)]
6877
pub exclude: Vec<PathBuf>,
@@ -228,6 +237,8 @@ impl Default for InfoCliOptions {
228237
InfoCliOptions {
229238
number_of_authors: 3,
230239
number_of_languages: 6,
240+
number_of_file_churns: 3,
241+
churn_pool_size: Option::default(),
231242
exclude: Vec::default(),
232243
no_bots: Option::default(),
233244
no_merges: Default::default(),

src/info/author.rs

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
cli::NumberSeparator,
3-
info::{format_number, utils::git::Commits, utils::info_field::InfoField},
3+
info::{format_number, utils::git::CommitMetrics, utils::info_field::InfoField},
44
};
55
use owo_colors::{DynColors, OwoColorize};
66
use serde::Serialize;
@@ -10,46 +10,42 @@ use std::fmt::Write;
1010
#[serde(rename_all = "camelCase")]
1111
pub struct Author {
1212
pub name: String,
13-
email: String,
13+
email: Option<String>,
1414
nbr_of_commits: usize,
1515
contribution: usize,
1616
#[serde(skip_serializing)]
17-
show_email: bool,
18-
#[serde(skip_serializing)]
1917
number_separator: NumberSeparator,
2018
}
2119

2220
impl Author {
2321
pub fn new(
24-
name: gix::bstr::BString,
25-
email: gix::bstr::BString,
22+
name: String,
23+
email: Option<String>,
2624
nbr_of_commits: usize,
2725
total_nbr_of_commits: usize,
28-
show_email: bool,
2926
number_separator: NumberSeparator,
3027
) -> Self {
3128
let contribution =
3229
(nbr_of_commits as f32 * 100. / total_nbr_of_commits as f32).round() as usize;
3330
Self {
34-
name: name.to_string(),
35-
email: email.to_string(),
31+
name,
32+
email,
3633
nbr_of_commits,
3734
contribution,
38-
show_email,
3935
number_separator,
4036
}
4137
}
4238
}
4339

4440
impl std::fmt::Display for Author {
4541
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
46-
if self.show_email {
42+
if let Some(email) = &self.email {
4743
write!(
4844
f,
4945
"{}% {} <{}> {}",
5046
self.contribution,
5147
self.name,
52-
self.email,
48+
email,
5349
format_number(&self.nbr_of_commits, self.number_separator)
5450
)
5551
} else {
@@ -72,8 +68,8 @@ pub struct AuthorsInfo {
7268
}
7369

7470
impl AuthorsInfo {
75-
pub fn new(info_color: DynColors, commits: &Commits) -> Self {
76-
let authors = commits.authors_to_display.clone();
71+
pub fn new(info_color: DynColors, commit_metrics: &CommitMetrics) -> Self {
72+
let authors = commit_metrics.authors_to_display.clone();
7773
Self {
7874
authors,
7975
info_color,
@@ -91,9 +87,9 @@ impl std::fmt::Display for AuthorsInfo {
9187
let author_str = author.color(self.info_color);
9288

9389
if i == 0 {
94-
let _ = write!(authors_info, "{author_str}");
90+
write!(authors_info, "{author_str}")?;
9591
} else {
96-
let _ = write!(authors_info, "\n{:<width$}{}", "", author_str, width = pad);
92+
write!(authors_info, "\n{:<width$}{}", "", author_str, width = pad)?;
9793
}
9894
}
9995

@@ -128,10 +124,9 @@ mod test {
128124
fn test_display_author() {
129125
let author = Author::new(
130126
"John Doe".into(),
131-
127+
Some("[email protected]".into()),
132128
1500,
133129
2000,
134-
true,
135130
NumberSeparator::Plain,
136131
);
137132

@@ -140,14 +135,7 @@ mod test {
140135

141136
#[test]
142137
fn test_display_author_with_no_email() {
143-
let author = Author::new(
144-
"John Doe".into(),
145-
146-
1500,
147-
2000,
148-
false,
149-
NumberSeparator::Plain,
150-
);
138+
let author = Author::new("John Doe".into(), None, 1500, 2000, NumberSeparator::Plain);
151139

152140
assert_eq!(author.to_string(), "75% John Doe 1500");
153141
}
@@ -156,10 +144,9 @@ mod test {
156144
fn test_authors_info_title_with_one_author() {
157145
let author = Author::new(
158146
"John Doe".into(),
159-
147+
Some("[email protected]".into()),
160148
1500,
161149
2000,
162-
true,
163150
NumberSeparator::Plain,
164151
);
165152

@@ -175,19 +162,17 @@ mod test {
175162
fn test_authors_info_title_with_two_authors() {
176163
let author = Author::new(
177164
"John Doe".into(),
178-
165+
Some("[email protected]".into()),
179166
1500,
180167
2000,
181-
true,
182168
NumberSeparator::Plain,
183169
);
184170

185171
let author_2 = Author::new(
186172
"Roberto Berto".into(),
187-
173+
None,
188174
240,
189175
300,
190-
false,
191176
NumberSeparator::Plain,
192177
);
193178

@@ -203,10 +188,9 @@ mod test {
203188
fn test_author_info_value_with_one_author() {
204189
let author = Author::new(
205190
"John Doe".into(),
206-
191+
Some("[email protected]".into()),
207192
1500,
208193
2000,
209-
true,
210194
NumberSeparator::Plain,
211195
);
212196

@@ -227,19 +211,17 @@ mod test {
227211
fn test_author_info_value_with_two_authors() {
228212
let author = Author::new(
229213
"John Doe".into(),
230-
214+
Some("[email protected]".into()),
231215
1500,
232216
2000,
233-
true,
234217
NumberSeparator::Plain,
235218
);
236219

237220
let author_2 = Author::new(
238221
"Roberto Berto".into(),
239-
222+
None,
240223
240,
241224
300,
242-
false,
243225
NumberSeparator::Plain,
244226
);
245227

src/info/churn.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
use super::utils::{git::CommitMetrics, info_field::InfoField};
2+
use crate::{cli::NumberSeparator, info::format_number};
3+
use owo_colors::{DynColors, OwoColorize};
4+
use serde::Serialize;
5+
use std::fmt::Write;
6+
7+
#[derive(Serialize, Clone)]
8+
#[serde(rename_all = "camelCase")]
9+
pub struct FileChurn {
10+
pub file_path: String,
11+
pub nbr_of_commits: usize,
12+
#[serde(skip_serializing)]
13+
number_separator: NumberSeparator,
14+
}
15+
16+
impl FileChurn {
17+
pub fn new(
18+
file_path: String,
19+
nbr_of_commits: usize,
20+
number_separator: NumberSeparator,
21+
) -> Self {
22+
Self {
23+
file_path,
24+
nbr_of_commits,
25+
number_separator,
26+
}
27+
}
28+
}
29+
30+
impl std::fmt::Display for FileChurn {
31+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
32+
write!(
33+
f,
34+
"{} {}",
35+
shorten_file_path(&self.file_path, 2),
36+
format_number(&self.nbr_of_commits, self.number_separator)
37+
)
38+
}
39+
}
40+
41+
#[derive(Serialize)]
42+
pub struct ChurnInfo {
43+
pub file_churns: Vec<FileChurn>,
44+
pub churn_pool_size: usize,
45+
#[serde(skip_serializing)]
46+
pub info_color: DynColors,
47+
}
48+
impl ChurnInfo {
49+
pub fn new(info_color: DynColors, commit_metrics: &CommitMetrics) -> Self {
50+
let file_churns = commit_metrics.file_churns_to_display.clone();
51+
Self {
52+
file_churns,
53+
churn_pool_size: commit_metrics.churn_pool_size,
54+
info_color,
55+
}
56+
}
57+
}
58+
impl std::fmt::Display for ChurnInfo {
59+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
60+
let mut churn_info = String::new();
61+
62+
let pad = self.title().len() + 2;
63+
64+
for (i, churn) in self.file_churns.iter().enumerate() {
65+
let churn_str = churn.color(self.info_color);
66+
67+
if i == 0 {
68+
write!(churn_info, "{churn_str}")?;
69+
} else {
70+
write!(churn_info, "\n{:<width$}{}", "", churn_str, width = pad)?;
71+
}
72+
}
73+
74+
write!(f, "{churn_info}")
75+
}
76+
}
77+
78+
#[typetag::serialize]
79+
impl InfoField for ChurnInfo {
80+
fn value(&self) -> String {
81+
self.to_string()
82+
}
83+
84+
fn title(&self) -> String {
85+
format!("Churn ({})", self.churn_pool_size)
86+
}
87+
88+
fn should_color(&self) -> bool {
89+
false
90+
}
91+
}
92+
93+
fn shorten_file_path(file_path: &str, depth: usize) -> String {
94+
let components: Vec<&str> = file_path.split('/').collect();
95+
96+
if depth == 0 || components.len() <= depth {
97+
return file_path.to_string();
98+
}
99+
100+
let truncated_components: Vec<&str> = components
101+
.iter()
102+
.skip(components.len() - depth)
103+
.copied()
104+
.collect();
105+
106+
format!("\u{2026}/{}", truncated_components.join("/"))
107+
}
108+
109+
#[cfg(test)]
110+
mod tests {
111+
use super::*;
112+
113+
#[test]
114+
fn test_display_file_churn() {
115+
let file_churn = FileChurn::new("path/to/file.txt".into(), 50, NumberSeparator::Plain);
116+
117+
assert_eq!(file_churn.to_string(), "\u{2026}/to/file.txt 50");
118+
}
119+
120+
#[test]
121+
fn test_churn_info_value_with_two_file_churns() {
122+
let file_churn_1 = FileChurn::new("path/to/file.txt".into(), 50, NumberSeparator::Plain);
123+
let file_churn_2 = FileChurn::new("file_2.txt".into(), 30, NumberSeparator::Plain);
124+
125+
let churn_info = ChurnInfo {
126+
file_churns: vec![file_churn_1, file_churn_2],
127+
churn_pool_size: 5,
128+
info_color: DynColors::Rgb(255, 0, 0),
129+
};
130+
131+
assert!(churn_info.value().contains(
132+
&"\u{2026}/to/file.txt 50"
133+
.color(DynColors::Rgb(255, 0, 0))
134+
.to_string()
135+
));
136+
137+
assert!(churn_info
138+
.value()
139+
.contains(&"file_2.txt 30".color(DynColors::Rgb(255, 0, 0)).to_string()));
140+
}
141+
142+
#[test]
143+
fn test_truncate_file_path() {
144+
assert_eq!(shorten_file_path("path/to/file.txt", 3), "path/to/file.txt");
145+
assert_eq!(shorten_file_path("another/file.txt", 2), "another/file.txt");
146+
assert_eq!(shorten_file_path("file.txt", 1), "file.txt");
147+
assert_eq!(
148+
shorten_file_path("path/to/file.txt", 2),
149+
"\u{2026}/to/file.txt"
150+
);
151+
assert_eq!(
152+
shorten_file_path("another/file.txt", 1),
153+
"\u{2026}/file.txt"
154+
);
155+
assert_eq!(shorten_file_path("file.txt", 0), "file.txt");
156+
}
157+
}

0 commit comments

Comments
 (0)