diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 89f90ee2b7707..207bd9debba66 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -365,6 +365,8 @@ class AArch64TargetLowering : public TargetLowering { return true; } + bool preferZeroCompareBranch() const override { return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; bool hasAndNotCompare(SDValue V) const override { diff --git a/llvm/test/CodeGen/AArch64/branch-on-zero.ll b/llvm/test/CodeGen/AArch64/branch-on-zero.ll new file mode 100644 index 0000000000000..07fc01a91600a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/branch-on-zero.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define i32 @test_lshr(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) { +; CHECK-SD-LABEL: test_lshr: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: lsr w8, w2, #2 +; CHECK-SD-NEXT: cbz w8, .LBB0_2 +; CHECK-SD-NEXT: .LBB0_1: // %while.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: ldr w9, [x1], #4 +; CHECK-SD-NEXT: subs w8, w8, #1 +; CHECK-SD-NEXT: lsl w9, w9, #1 +; CHECK-SD-NEXT: str w9, [x0], #4 +; CHECK-SD-NEXT: b.ne .LBB0_1 +; CHECK-SD-NEXT: .LBB0_2: // %while.end +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_lshr: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: lsr w8, w2, #2 +; CHECK-GI-NEXT: cbz w8, .LBB0_2 +; CHECK-GI-NEXT: .LBB0_1: // %while.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: ldr w9, [x1], #4 +; CHECK-GI-NEXT: add x10, x0, #4 +; CHECK-GI-NEXT: subs w8, w8, #1 +; CHECK-GI-NEXT: lsl w9, w9, #1 +; CHECK-GI-NEXT: str w9, [x0] +; CHECK-GI-NEXT: mov x0, x10 +; CHECK-GI-NEXT: b.ne .LBB0_1 +; CHECK-GI-NEXT: .LBB0_2: // %while.end +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %shr = lshr i32 %n, 2 + %tobool.not4 = icmp eq i32 %shr, 0 + br i1 %tobool.not4, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.07 = phi i32 [ %dec, %while.body ], [ %shr, %entry ] + %x.addr.06 = phi ptr [ %incdec.ptr1, %while.body ], [ %x, %entry ] + %y.addr.05 = phi ptr [ %incdec.ptr, %while.body ], [ %y, %entry ] + %incdec.ptr = getelementptr inbounds i32, ptr %y.addr.05, i32 1 + %0 = load i32, ptr %y.addr.05, align 4 + %mul = shl nsw i32 %0, 1 + %incdec.ptr1 = getelementptr inbounds i32, ptr %x.addr.06, i32 1 + store i32 %mul, ptr %x.addr.06, align 4 + %dec = add nsw i32 %c.07, -1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret i32 0 +} + +define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) { +; CHECK-SD-LABEL: test_lshr2: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: lsr w8, w2, #2 +; CHECK-SD-NEXT: cbz w8, .LBB1_2 +; CHECK-SD-NEXT: .LBB1_1: // %while.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: ldr w9, [x1], #4 +; CHECK-SD-NEXT: subs w8, w8, #1 +; CHECK-SD-NEXT: lsl w9, w9, #1 +; CHECK-SD-NEXT: str w9, [x0], #4 +; CHECK-SD-NEXT: b.ne .LBB1_1 +; CHECK-SD-NEXT: .LBB1_2: // %while.end +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_lshr2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: lsr w8, w2, #2 +; CHECK-GI-NEXT: cbz w8, .LBB1_2 +; CHECK-GI-NEXT: .LBB1_1: // %while.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: ldr w9, [x1], #4 +; CHECK-GI-NEXT: add x10, x0, #4 +; CHECK-GI-NEXT: subs w8, w8, #1 +; CHECK-GI-NEXT: lsl w9, w9, #1 +; CHECK-GI-NEXT: str w9, [x0] +; CHECK-GI-NEXT: mov x0, x10 +; CHECK-GI-NEXT: b.ne .LBB1_1 +; CHECK-GI-NEXT: .LBB1_2: // %while.end +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %tobool.not4 = icmp ult i32 %n, 4 + br i1 %tobool.not4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + %shr = lshr i32 %n, 2 + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %c.07 = phi i32 [ %dec, %while.body ], [ %shr, %while.body.preheader ] + %x.addr.06 = phi ptr [ %incdec.ptr1, %while.body ], [ %x, %while.body.preheader ] + %y.addr.05 = phi ptr [ %incdec.ptr, %while.body ], [ %y, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i32, ptr %y.addr.05, i32 1 + %0 = load i32, ptr %y.addr.05, align 4 + %mul = shl nsw i32 %0, 1 + %incdec.ptr1 = getelementptr inbounds i32, ptr %x.addr.06, i32 1 + store i32 %mul, ptr %x.addr.06, align 4 + %dec = add nsw i32 %c.07, -1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret i32 0 +} + +define i32 @lshr(i32 %u) { +; CHECK-LABEL: lshr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: lsr w0, w0, #4 +; CHECK-NEXT: mov w8, w19 +; CHECK-NEXT: cbz w0, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: bl use +; CHECK-NEXT: add w8, w19, w19, lsl #1 +; CHECK-NEXT: .LBB2_2: // %if.end +; CHECK-NEXT: sub w9, w19, #7 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %cmp.not = icmp ult i32 %u, 16 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %shr = lshr i32 %u, 4 + tail call void @use(i32 noundef %shr) + %mul = mul i32 %u, 3 + br label %if.end + +if.end: ; preds = %if.then, %entry + %u.addr.0 = phi i32 [ %mul, %if.then ], [ %u, %entry ] + %sub = add i32 %u, -7 + %cmp1 = icmp ugt i32 %u.addr.0, %sub + %conv = zext i1 %cmp1 to i32 + ret i32 %conv +} + +declare void @use(i32) +