diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4e9a6942d6cd9..59a9dced6cf03 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6111,18 +6111,37 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue PassThru = GetWidenedVector(N->getPassThru()); ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); + EVT WideMaskVT = + EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(), + WidenVT.getVectorElementCount()); + + if (ExtType == ISD::NON_EXTLOAD && + TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) && + TLI.isTypeLegal(WideMaskVT)) { + Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0); + SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(), + VT.getVectorElementCount()); + SDValue NewLoad = + DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl, + N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL, + N->getMemoryVT(), N->getMemOperand()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1)); + + return NewLoad; + } + // The mask should be widened as well - EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), - MaskVT.getVectorElementType(), - WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad( diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index 636af9535f6fa..76ce41655ea1e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -325,10 +325,7 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) { define <7 x float> @masked_load_v7f32(ptr %a, <7 x i1> %mask) { ; CHECK-LABEL: masked_load_v7f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call <7 x float> @llvm.masked.load.v7f32(ptr %a, i32 8, <7 x i1> %mask, <7 x float> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index f8f8a0c22d212..545c89495e621 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -334,10 +334,7 @@ define <256 x i8> @masked_load_v256i8(ptr %a, <256 x i1> %mask) { define <7 x i8> @masked_load_v7i8(ptr %a, <7 x i1> %mask) { ; CHECK-LABEL: masked_load_v7i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: vsetivli zero, 7, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call <7 x i8> @llvm.masked.load.v7i8(ptr %a, i32 8, <7 x i1> %mask, <7 x i8> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll new file mode 100644 index 0000000000000..493d55f6eefe6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +define @masked_load_nxv1i64(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv1i64(ptr %a, i32 8, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv1i64(ptr, i32, , ) + +define @masked_load_nxv2i64(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2i64(ptr %a, i32 8, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv2i64(ptr, i32, , ) + +define @masked_load_nxv4i64(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4i64(ptr %a, i32 8, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv4i64(ptr, i32, , ) + +define @masked_load_nxv8i64(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8i64(ptr %a, i32 8, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv8i64(ptr, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll index f8c1d5e45bc28..d992669306fb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll @@ -1,51 +1,66 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V +; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32 +; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32 define @masked_load_nxv1i8(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv1i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0), v0.t -; CHECK-NEXT: ret +; V-LABEL: masked_load_nxv1i8: +; V: # %bb.0: +; V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; V-NEXT: vle8.v v8, (a0), v0.t +; V-NEXT: ret +; +; ZVE32-LABEL: masked_load_nxv1i8: +; ZVE32: # %bb.0: +; ZVE32-NEXT: csrr a1, vlenb +; ZVE32-NEXT: srli a1, a1, 3 +; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVE32-NEXT: vle8.v v8, (a0), v0.t +; ZVE32-NEXT: ret %load = call @llvm.masked.load.nxv1i8(ptr %a, i32 1, %mask, undef) ret %load } declare @llvm.masked.load.nxv1i8(ptr, i32, , ) define @masked_load_nxv1i16(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv1i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0), v0.t -; CHECK-NEXT: ret +; V-LABEL: masked_load_nxv1i16: +; V: # %bb.0: +; V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; V-NEXT: vle16.v v8, (a0), v0.t +; V-NEXT: ret +; +; ZVE32-LABEL: masked_load_nxv1i16: +; ZVE32: # %bb.0: +; ZVE32-NEXT: csrr a1, vlenb +; ZVE32-NEXT: srli a1, a1, 3 +; ZVE32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; ZVE32-NEXT: vle16.v v8, (a0), v0.t +; ZVE32-NEXT: ret %load = call @llvm.masked.load.nxv1i16(ptr %a, i32 2, %mask, undef) ret %load } declare @llvm.masked.load.nxv1i16(ptr, i32, , ) define @masked_load_nxv1i32(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0), v0.t -; CHECK-NEXT: ret +; V-LABEL: masked_load_nxv1i32: +; V: # %bb.0: +; V-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; V-NEXT: vle32.v v8, (a0), v0.t +; V-NEXT: ret +; +; ZVE32-LABEL: masked_load_nxv1i32: +; ZVE32: # %bb.0: +; ZVE32-NEXT: csrr a1, vlenb +; ZVE32-NEXT: srli a1, a1, 3 +; ZVE32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; ZVE32-NEXT: vle32.v v8, (a0), v0.t +; ZVE32-NEXT: ret %load = call @llvm.masked.load.nxv1i32(ptr %a, i32 4, %mask, undef) ret %load } declare @llvm.masked.load.nxv1i32(ptr, i32, , ) -define @masked_load_nxv1i64(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv1i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0), v0.t -; CHECK-NEXT: ret - %load = call @llvm.masked.load.nxv1i64(ptr %a, i32 8, %mask, undef) - ret %load -} -declare @llvm.masked.load.nxv1i64(ptr, i32, , ) - define @masked_load_nxv2i8(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv2i8: ; CHECK: # %bb.0: @@ -79,17 +94,6 @@ define @masked_load_nxv2i32(ptr %a, %mask) } declare @llvm.masked.load.nxv2i32(ptr, i32, , ) -define @masked_load_nxv2i64(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv2i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0), v0.t -; CHECK-NEXT: ret - %load = call @llvm.masked.load.nxv2i64(ptr %a, i32 8, %mask, undef) - ret %load -} -declare @llvm.masked.load.nxv2i64(ptr, i32, , ) - define @masked_load_nxv4i8(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4i8: ; CHECK: # %bb.0: @@ -123,17 +127,6 @@ define @masked_load_nxv4i32(ptr %a, %mask) } declare @llvm.masked.load.nxv4i32(ptr, i32, , ) -define @masked_load_nxv4i64(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv4i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v8, (a0), v0.t -; CHECK-NEXT: ret - %load = call @llvm.masked.load.nxv4i64(ptr %a, i32 8, %mask, undef) - ret %load -} -declare @llvm.masked.load.nxv4i64(ptr, i32, , ) - define @masked_load_nxv8i8(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8i8: ; CHECK: # %bb.0: @@ -167,17 +160,6 @@ define @masked_load_nxv8i32(ptr %a, %mask) } declare @llvm.masked.load.nxv8i32(ptr, i32, , ) -define @masked_load_nxv8i64(ptr %a, %mask) nounwind { -; CHECK-LABEL: masked_load_nxv8i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0), v0.t -; CHECK-NEXT: ret - %load = call @llvm.masked.load.nxv8i64(ptr %a, i32 8, %mask, undef) - ret %load -} -declare @llvm.masked.load.nxv8i64(ptr, i32, , ) - define @masked_load_nxv16i8(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16i8: ; CHECK: # %bb.0: