From cfb0f11a9f506292cd213872f759078d9302e0cc Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 2 Jan 2023 05:17:47 +0100 Subject: [PATCH 1/4] add benchmark --- library/core/benches/slice.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/library/core/benches/slice.rs b/library/core/benches/slice.rs index 9b86a0ca97c09..3bfb35e684ea1 100644 --- a/library/core/benches/slice.rs +++ b/library/core/benches/slice.rs @@ -1,3 +1,4 @@ +use core::ptr::NonNull; use test::black_box; use test::Bencher; @@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) { black_box(slice.fill(black_box(NewType(42)))); }); } + +// Tests the ability of the compiler to recognize that only the last slice item is needed +// based on issue #106288 +#[bench] +fn fold_to_last(b: &mut Bencher) { + let slice: &[i32] = &[0; 1024]; + b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r)))); +} From d89e4581594a5d96b9ddaa454de3b25d6cc34ee8 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sun, 1 Jan 2023 05:22:54 +0100 Subject: [PATCH 2/4] optimize slice::Iter::fold --- library/core/src/slice/iter/macros.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index 3462c0e020a3d..dd809ca459d20 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -191,6 +191,29 @@ macro_rules! iterator { self.next_back() } + #[inline] + fn fold(mut self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + // Handling the 0-len case explicitly and then using a do-while style loop + // helps the optimizer. See issue #106288 + if is_empty!(self) { + return init; + } + let mut acc = init; + // SAFETY: The 0-len case was handled above so one loop iteration is guaranteed. + unsafe { + loop { + acc = f(acc, next_unchecked!(self)); + if is_empty!(self) { + break; + } + } + } + acc + } + // We override the default implementation, which uses `try_fold`, // because this simple implementation generates less LLVM IR and is // faster to compile. From ba5b2f0b4b71662663a224f685b2b808d9c8f99a Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 12 Jun 2023 14:05:46 +0200 Subject: [PATCH 3/4] add codegen test for slice::Iter::fold --- tests/codegen/slice-iter-fold.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/codegen/slice-iter-fold.rs diff --git a/tests/codegen/slice-iter-fold.rs b/tests/codegen/slice-iter-fold.rs new file mode 100644 index 0000000000000..9391c176130a4 --- /dev/null +++ b/tests/codegen/slice-iter-fold.rs @@ -0,0 +1,14 @@ +// ignore-debug: the debug assertions get in the way +// compile-flags: -O +// min-llvm-version: 16 +#![crate_type = "lib"] + +// CHECK-LABEL: @slice_fold_to_last +#[no_mangle] +pub fn slice_fold_to_last(slice: &[i32]) -> Option<&i32> { + // CHECK-NOT: loop + // CHECK-NOT: br + // CHECK-NOT: call + // CHECK: ret + slice.iter().fold(None, |_, i| Some(i)) +} From d90508f76130a8a9aaa68521ac1b0e35ea9a236e Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 12 Jun 2023 14:06:24 +0200 Subject: [PATCH 4/4] use indexed loop instead of ptr bumping this seems to produce less IR --- library/core/src/slice/iter/macros.rs | 30 ++++++++++++++++++--------- tests/codegen/vec-shrink-panik.rs | 8 ------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index dd809ca459d20..96a145e22ed5d 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -192,23 +192,33 @@ macro_rules! iterator { } #[inline] - fn fold(mut self, init: B, mut f: F) -> B + fn fold(self, init: B, mut f: F) -> B where F: FnMut(B, Self::Item) -> B, { - // Handling the 0-len case explicitly and then using a do-while style loop - // helps the optimizer. See issue #106288 + // this implementation consists of the following optimizations compared to the + // default implementation: + // - do-while loop, as is llvm's preferred loop shape, + // see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops + // - bumps an index instead of a pointer since the latter case inhibits + // some optimizations, see #111603 + // - avoids Option wrapping/matching if is_empty!(self) { return init; } let mut acc = init; - // SAFETY: The 0-len case was handled above so one loop iteration is guaranteed. - unsafe { - loop { - acc = f(acc, next_unchecked!(self)); - if is_empty!(self) { - break; - } + let mut i = 0; + let len = len!(self); + loop { + // SAFETY: the loop iterates `i in 0..len`, which always is in bounds of + // the slice allocation + acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() }); + // SAFETY: `i` can't overflow since it'll only reach usize::MAX if the + // slice had that length, in which case we'll break out of the loop + // after the increment + i = unsafe { i.unchecked_add(1) }; + if i == len { + break; } } acc diff --git a/tests/codegen/vec-shrink-panik.rs b/tests/codegen/vec-shrink-panik.rs index 606d68ff3ab38..14fef4e2cd55a 100644 --- a/tests/codegen/vec-shrink-panik.rs +++ b/tests/codegen/vec-shrink-panik.rs @@ -37,14 +37,6 @@ pub fn issue71861(vec: Vec) -> Box<[u32]> { // CHECK-LABEL: @issue75636 #[no_mangle] pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> { - // CHECK-NOT: panic - - // Call to panic_cannot_unwind in case of double-panic is expected, - // on LLVM 16 and older, but other panics are not. - // old: filter - // old-NEXT: ; call core::panicking::panic_cannot_unwind - // old-NEXT: panic_cannot_unwind - // CHECK-NOT: panic iter.iter().copied().collect() }