-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (10/11) #116836
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
55f9637
to
a876e35
Compare
a876e35
to
3cb1299
Compare
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesSVE2.2 introduces instructions with predicated forms with zeroing of This patch adds support for emitting the zeroing forms of certain Patch is 62.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116836.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7dd6d49bf20227..f6ea15ef164a18 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4339,11 +4339,11 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">;
// SVE reverse within elements, zeroing predicate
- defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit">;
- defm REVB_ZPzZ : sve_int_perm_rev_revb_z<"revb">;
- defm REVH_ZPzZ : sve_int_perm_rev_revh_z<"revh">;
- def REVW_ZPzZ : sve_int_perm_rev_z<0b11, 0b0110, "revw", ZPR64>;
- def REVD_ZPzZ : sve_int_perm_rev_z<0b00, 0b1110, "revd", ZPR128>;
+ defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit", AArch64rbit_mt>;
+ defm REVB_ZPzZ : sve_int_perm_rev_revb_z<"revb", AArch64revb_mt>;
+ defm REVH_ZPzZ : sve_int_perm_rev_revh_z<"revh", AArch64revh_mt>;
+ defm REVW_ZPzZ : sve_int_perm_rev_revw_z<"revw", AArch64revw_mt>;
+ defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>;
} // End HasSME2p2orSVE2p2
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0ef862fc1a27cf..597458283644e9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7621,22 +7621,54 @@ class sve_int_perm_rev_z<bits<2> sz, bits<4> opc, string asm,
let hasSideEffects = 0;
}
-multiclass sve_int_perm_rev_rbit_z<string asm> {
+multiclass sve_int_perm_rev_rbit_z<string asm, SDPatternOperator op> {
def _B : sve_int_perm_rev_z<0b00, 0b0111, asm, ZPR8>;
def _H : sve_int_perm_rev_z<0b01, 0b0111, asm, ZPR16>;
def _S : sve_int_perm_rev_z<0b10, 0b0111, asm, ZPR32>;
def _D : sve_int_perm_rev_z<0b11, 0b0111, asm, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revb_z<string asm> {
+multiclass sve_int_perm_rev_revb_z<string asm, SDPatternOperator op> {
def _H : sve_int_perm_rev_z<0b01, 0b0100, asm, ZPR16>;
def _S : sve_int_perm_rev_z<0b10, 0b0100, asm, ZPR32>;
def _D : sve_int_perm_rev_z<0b11, 0b0100, asm, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revh_z<string asm> {
+multiclass sve_int_perm_rev_revh_z<string asm, SDPatternOperator op> {
def _S : sve_int_perm_rev_z<0b10, 0b0101, asm, ZPR32>;
def _D : sve_int_perm_rev_z<0b11, 0b0101, asm, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_perm_rev_revw_z<string asm, SDPatternOperator op> {
+ def _D : sve_int_perm_rev_z<0b11, 0b0110, asm, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_perm_rev_revd_z<string asm, SDPatternOperator op> {
+ def NAME : sve_int_perm_rev_z<0b00, 0b1110, asm, ZPR128>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>;
}
class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-rev.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-rev.ll
new file mode 100644
index 00000000000000..d7a51c8cf8062d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-rev.ll
@@ -0,0 +1,1502 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 16 x i8> @test_svrbit_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svrbit_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svrbit_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svrbit_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svrbit_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svrbit_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: rbit z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svrbit_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrbit_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svrbit_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrbit_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svrbit_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrbit_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: rbit z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svrbit_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrbit_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrbit_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrbit_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrbit_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrbit_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: rbit z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svrbit_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrbit_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svrbit_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrbit_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rbit z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svrbit_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrbit_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: rbit z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrbit_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: rbit z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svrevb_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrevb_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svrevb_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrevb_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svrevb_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svrevb_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: revb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svrevb_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevb_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrevb_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevb_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrevb_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevb_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: revb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svrevb_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrevb_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svrevb_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrevb_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svrevb_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrevb_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: revb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevb_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @test_svrevh_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevh_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revh z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevh_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revh z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrevh_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevh_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevh_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revh z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svrevh_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svrevh_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: revh z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevh_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revh z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svrevh_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrevh_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revh z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevh_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revh z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svrevh_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svrevh_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: revh z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svrevh_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: revh z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
SVE2.2 introduces instructions with predicated forms with zeroing of
the inactive lanes. This allows in some cases to save a
movprfx
ora
mov
instruction when emitting code for_x
or_z
variants ofintrinsics.
This patch adds support for emitting the zeroing forms of certain
RBIT
,REVB
,REVH
,REVW
, andREVD
instructions.