From cfb0f11a9f506292cd213872f759078d9302e0cc Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Mon, 2 Jan 2023 05:17:47 +0100
Subject: [PATCH 1/4] add benchmark

---
 library/core/benches/slice.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/library/core/benches/slice.rs b/library/core/benches/slice.rs
index 9b86a0ca97c09..3bfb35e684ea1 100644
--- a/library/core/benches/slice.rs
+++ b/library/core/benches/slice.rs
@@ -1,3 +1,4 @@
+use core::ptr::NonNull;
 use test::black_box;
 use test::Bencher;
 
@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
         black_box(slice.fill(black_box(NewType(42))));
     });
 }
+
+// Tests the ability of the compiler to recognize that only the last slice item is needed
+// based on issue #106288
+#[bench]
+fn fold_to_last(b: &mut Bencher) {
+    let slice: &[i32] = &[0; 1024];
+    b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
+}

From d89e4581594a5d96b9ddaa454de3b25d6cc34ee8 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Sun, 1 Jan 2023 05:22:54 +0100
Subject: [PATCH 2/4] optimize slice::Iter::fold

---
 library/core/src/slice/iter/macros.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index 3462c0e020a3d..dd809ca459d20 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -191,6 +191,29 @@ macro_rules! iterator {
                 self.next_back()
             }
 
+            #[inline]
+            fn fold<B, F>(mut self, init: B, mut f: F) -> B
+                where
+                    F: FnMut(B, Self::Item) -> B,
+            {
+                // Handling the 0-len case explicitly and then using a do-while style loop
+                // helps the optimizer. See issue #106288
+                if is_empty!(self) {
+                    return init;
+                }
+                let mut acc = init;
+                // SAFETY: The 0-len case was handled above so one loop iteration is guaranteed.
+                unsafe {
+                    loop {
+                        acc = f(acc, next_unchecked!(self));
+                        if is_empty!(self) {
+                            break;
+                        }
+                    }
+                }
+                acc
+            }
+
             // We override the default implementation, which uses `try_fold`,
             // because this simple implementation generates less LLVM IR and is
             // faster to compile.

From ba5b2f0b4b71662663a224f685b2b808d9c8f99a Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Mon, 12 Jun 2023 14:05:46 +0200
Subject: [PATCH 3/4] add codegen test for slice::Iter::fold

---
 tests/codegen/slice-iter-fold.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 tests/codegen/slice-iter-fold.rs

diff --git a/tests/codegen/slice-iter-fold.rs b/tests/codegen/slice-iter-fold.rs
new file mode 100644
index 0000000000000..9391c176130a4
--- /dev/null
+++ b/tests/codegen/slice-iter-fold.rs
@@ -0,0 +1,14 @@
+// ignore-debug: the debug assertions get in the way
+// compile-flags: -O
+// min-llvm-version: 16
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @slice_fold_to_last
+#[no_mangle]
+pub fn slice_fold_to_last(slice: &[i32]) -> Option<&i32> {
+    // CHECK-NOT: loop
+    // CHECK-NOT: br
+    // CHECK-NOT: call
+    // CHECK: ret
+    slice.iter().fold(None, |_, i| Some(i))
+}

From d90508f76130a8a9aaa68521ac1b0e35ea9a236e Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Mon, 12 Jun 2023 14:06:24 +0200
Subject: [PATCH 4/4] use indexed loop instead of ptr bumping

this seems to produce less IR
---
 library/core/src/slice/iter/macros.rs | 30 ++++++++++++++++++---------
 tests/codegen/vec-shrink-panik.rs     |  8 -------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index dd809ca459d20..96a145e22ed5d 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -192,23 +192,33 @@ macro_rules! iterator {
             }
 
             #[inline]
-            fn fold<B, F>(mut self, init: B, mut f: F) -> B
+            fn fold<B, F>(self, init: B, mut f: F) -> B
                 where
                     F: FnMut(B, Self::Item) -> B,
             {
-                // Handling the 0-len case explicitly and then using a do-while style loop
-                // helps the optimizer. See issue #106288
+                // this implementation consists of the following optimizations compared to the
+                // default implementation:
+                // - do-while loop, as is llvm's preferred loop shape,
+                //   see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
+                // - bumps an index instead of a pointer since the latter case inhibits
+                //   some optimizations, see #111603
+                // - avoids Option wrapping/matching
                 if is_empty!(self) {
                     return init;
                 }
                 let mut acc = init;
-                // SAFETY: The 0-len case was handled above so one loop iteration is guaranteed.
-                unsafe {
-                    loop {
-                        acc = f(acc, next_unchecked!(self));
-                        if is_empty!(self) {
-                            break;
-                        }
+                let mut i = 0;
+                let len = len!(self);
+                loop {
+                    // SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
+                    // the slice allocation
+                    acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
+                    // SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
+                    // slice had that length, in which case we'll break out of the loop
+                    // after the increment
+                    i = unsafe { i.unchecked_add(1) };
+                    if i == len {
+                        break;
                     }
                 }
                 acc
diff --git a/tests/codegen/vec-shrink-panik.rs b/tests/codegen/vec-shrink-panik.rs
index 606d68ff3ab38..14fef4e2cd55a 100644
--- a/tests/codegen/vec-shrink-panik.rs
+++ b/tests/codegen/vec-shrink-panik.rs
@@ -37,14 +37,6 @@ pub fn issue71861(vec: Vec<u32>) -> Box<[u32]> {
 // CHECK-LABEL: @issue75636
 #[no_mangle]
 pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> {
-    // CHECK-NOT: panic
-
-    // Call to panic_cannot_unwind in case of double-panic is expected,
-    // on LLVM 16 and older, but other panics are not.
-    // old: filter
-    // old-NEXT: ; call core::panicking::panic_cannot_unwind
-    // old-NEXT: panic_cannot_unwind
-
     // CHECK-NOT: panic
     iter.iter().copied().collect()
 }