`assume`-`nonnull` operand bundles should optimize away more

Today in rust we emit certain conversions using `assume(icmp ne %p, null)`: <https://rust.godbolt.org/z/WTGj3Ks9q> <https://github.com/rust-lang/rust/blob/63f6845e570305a92eaf855897768617366164d6/tests/codegen-llvm/intrinsics/transmute.rs#L380-L388>
```llvm
define { ptr, i64 } @check_pair_to_dst_ref(i64 noundef %x.0, i64 noundef %x.1) unnamed_addr {
start:
  %_0.0 = getelementptr i8, ptr null, i64 %x.0
  %0 = icmp ne ptr %_0.0, null
  call void @llvm.assume(i1 %0)
  %1 = insertvalue { ptr, i64 } poison, ptr %_0.0, 0
  %2 = insertvalue { ptr, i64 } %1, i64 %x.1, 1
  ret { ptr, i64 } %2
}
```

But since I hear extra uses from such `icmp`s can make optimization worse sometimes, I wanted to move to [assume operand bundles](https://llvm.org/docs/LangRef.html#assume-operand-bundles) instead, so [I made that change](https://github.com/scottmcm/rust/commit/6f6cbe99c29d2c65f6f08abbc63bea851bc13e67) and now get what I think is correct from that,

```llvm
define { ptr, i64 } @check_pair_to_dst_ref(i64 noundef %x.0, i64 noundef %x.1) unnamed_addr #0 {
start:
  %_0.0 = getelementptr i8, ptr null, i64 %x.0
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %_0.0) ]
  %0 = insertvalue { ptr, i64 } poison, ptr %_0.0, 0
  %1 = insertvalue { ptr, i64 } %0, i64 %x.1, 1
  ret { ptr, i64 } %1
}
```

But that turned out to give bad consequences.  For example, what used to be a quite-good <https://rust.godbolt.org/z/vhY16Kavc>

```llvm
define void @long_integer_map(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([2048 x i8]) align 4 dereferenceable(2048) %_0, ptr noalias nocapture noundef readonly align 4 dereferenceable(2048) %x) unnamed_addr personality ptr @rust_eh_personality {
start:
  %array.i.i.i.i = alloca [2048 x i8], align 4
  br label %vector.body

vector.body:
  %index = phi i64 [ 0, %start ], [ %index.next, %vector.body ]
  %offset.idx = shl i64 %index, 2
  %next.gep = getelementptr i8, ptr %x, i64 %offset.idx
  %0 = getelementptr i8, ptr %next.gep, i64 16
  %wide.load = load <4 x i32>, ptr %next.gep, align 4
  %wide.load1 = load <4 x i32>, ptr %0, align 4
  %1 = mul <4 x i32> %wide.load, splat (i32 13)
  %2 = mul <4 x i32> %wide.load1, splat (i32 13)
  %3 = add <4 x i32> %1, splat (i32 7)
  %4 = add <4 x i32> %2, splat (i32 7)
  %5 = getelementptr inbounds nuw i32, ptr %array.i.i.i.i, i64 %index
  %6 = getelementptr inbounds nuw i8, ptr %5, i64 16
  store <4 x i32> %3, ptr %5, align 4
  store <4 x i32> %4, ptr %6, align 4
  %index.next = add nuw i64 %index, 8
  %7 = icmp eq i64 %index.next, 512
  br i1 %7, label %core::array::drain::drain_array_with::h75d8f8b0fda7bb41.exit, label %vector.body

core::array::drain::drain_array_with::h75d8f8b0fda7bb41.exit:
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %_0, ptr noundef nonnull align 4 dereferenceable(2048) %array.i.i.i.i, i64 2048, i1 false)
  ret void
}
```

Never removes *any* of the superfluous-after-inlining `assume`s, giving this obviously-silly IR:

```llvm
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) uwtable
define void @long_integer_map(ptr dead_on_unwind noalias nocapture noundef writable writeonly sret([2048 x i8]) align 4 dereferenceable(2048) %_0, ptr noalias nocapture noundef readonly align 4 dereferenceable(2048) %x) unnamed_addr #1 personality ptr @__CxxFrameHandler3 {
start:
  %array.i.i.i.i = alloca [2048 x i8], align 4
  %array1.i = alloca [2048 x i8], align 4
  call void @llvm.lifetime.start.p0(i64 2048, ptr nonnull %array1.i), !noalias !7
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %array1.i, ptr noundef nonnull readonly align 4 dereferenceable(2048) %x, i64 2048, i1 false), !noalias !11
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %array1.i) ]
  %0 = getelementptr inbounds nuw i8, ptr %array1.i, i64 2048
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %array1.i) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  %1 = getelementptr inbounds nuw i8, ptr %array1.i, i64 2048
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %start
  %index = phi i64 [ 0, %start ], [ %index.next, %vector.body ]
  %offset.idx = shl i64 %index, 2
  %2 = or disjoint i64 %offset.idx, 4
  %3 = or disjoint i64 %offset.idx, 8
  %4 = or disjoint i64 %offset.idx, 12
  %5 = or disjoint i64 %offset.idx, 16
  %6 = or disjoint i64 %offset.idx, 20
  %7 = or disjoint i64 %offset.idx, 24
  %8 = or disjoint i64 %offset.idx, 28
  %next.gep = getelementptr i8, ptr %array1.i, i64 %offset.idx
  %next.gep1 = getelementptr i8, ptr %array1.i, i64 %2
  %next.gep2 = getelementptr i8, ptr %array1.i, i64 %3
  %next.gep3 = getelementptr i8, ptr %array1.i, i64 %4
  %next.gep4 = getelementptr i8, ptr %array1.i, i64 %5
  %next.gep5 = getelementptr i8, ptr %array1.i, i64 %6
  %next.gep6 = getelementptr i8, ptr %array1.i, i64 %7
  %next.gep7 = getelementptr i8, ptr %array1.i, i64 %8
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep2) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep3) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep4) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep5) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep6) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep7) ]
  %9 = getelementptr inbounds nuw i8, ptr %next.gep, i64 4
  %10 = getelementptr inbounds nuw i8, ptr %next.gep1, i64 4
  %11 = getelementptr inbounds nuw i8, ptr %next.gep2, i64 4
  %12 = getelementptr inbounds nuw i8, ptr %next.gep3, i64 4
  %13 = getelementptr inbounds nuw i8, ptr %next.gep4, i64 4
  %14 = getelementptr inbounds nuw i8, ptr %next.gep5, i64 4
  %15 = getelementptr inbounds nuw i8, ptr %next.gep6, i64 4
  %16 = getelementptr inbounds nuw i8, ptr %next.gep7, i64 4
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %9) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %10) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %11) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %12) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %13) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %14) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %15) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %16) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep2) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep3) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep4) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep5) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep6) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %next.gep7) ]
  %17 = getelementptr i8, ptr %next.gep, i64 16
  %wide.load = load <4 x i32>, ptr %next.gep, align 4, !noalias !12
  %wide.load8 = load <4 x i32>, ptr %17, align 4, !noalias !12
  %18 = mul <4 x i32> %wide.load, splat (i32 13)
  %19 = mul <4 x i32> %wide.load8, splat (i32 13)
  %20 = add <4 x i32> %18, splat (i32 7)
  %21 = add <4 x i32> %19, splat (i32 7)
  %22 = getelementptr inbounds nuw i32, ptr %array.i.i.i.i, i64 %index
  %23 = getelementptr inbounds nuw i8, ptr %22, i64 16
  store <4 x i32> %20, ptr %22, align 4
  store <4 x i32> %21, ptr %23, align 4
  %index.next = add nuw i64 %index, 8
  %24 = icmp eq i64 %index.next, 512
  br i1 %24, label %_ZN4core5array5drain16drain_array_with17hdab83ed713860683E.exit, label %vector.body, !llvm.loop !27

_ZN4core5array5drain16drain_array_with17hdab83ed713860683E.exit: ; preds = %vector.body
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %1) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.lifetime.end.p0(i64 2048, ptr nonnull %array1.i), !noalias !7
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(2048) %_0, ptr noundef nonnull align 4 dereferenceable(2048) %array.i.i.i.i, i64 2048, i1 false)
  ret void
}
```

Trunk and clean that up a bit, but it's still full of unnecessary `assume`s: <https://llvm.godbolt.org/z/b8oM1vxvT>

At a minimum this at least ought to be treated idempotently, since there's no need for 
```llvm
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
  call void @llvm.assume(i1 true) [ "nonnull"(ptr %0) ]
```
repeated in a row like that.  But it'd also be nice to optimize out all the ones that came from `GEP nuw`, for example.

---

(Or if this is the wrong way to do this, that'd be good to know and reflect in the langref too.)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

`assume`-`nonnull` operand bundles should optimize away more #151791

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

assume-nonnull operand bundles should optimize away more #151791

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

`assume`-`nonnull` operand bundles should optimize away more #151791