Skip to content

[CodeGen][RISCV] Use vscale_range to handle more fixed<->scalable casts of i1 vectors. #138378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,35 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
Result = CGF.Builder.CreateBitCast(Result, Ty);
return Result;
}

// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, and we weren't able to handle it above, try using what we know
// about vscale to insert a fixed i1 vector into the scalable vector.
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
FixedSrcTy->getElementType()->isIntegerTy(8)) {
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
false);
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
VScaleRange->first <= FixedSrcTy->getNumElements() * 8) {
llvm::Value *Load = CGF.Builder.CreateLoad(Src);
unsigned VScale = VScaleRange->first;
llvm::Type *WideFixedTy =
llvm::FixedVectorType::get(ScalableDstTy->getElementType(),
FixedSrcTy->getNumElements() * 8);
Load = CGF.Builder.CreateBitCast(Load, WideFixedTy);
llvm::Type *FixedTy = llvm::FixedVectorType::get(
ScalableDstTy->getElementType(),
ScalableDstTy->getElementCount().getKnownMinValue() * VScale);
// If the fixed i8 vector is larger than the i1 vector, we need to
// extract.
if (FixedTy != WideFixedTy)
Load = CGF.Builder.CreateExtractVector(FixedTy, Load, uint64_t(0));
return CGF.Builder.CreateInsertVector(
ScalableDstTy, llvm::PoisonValue::get(ScalableDstTy), Load,
uint64_t(0));
}
}
}
}

Expand Down Expand Up @@ -1485,6 +1514,32 @@ CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
V = CGF.Builder.CreateExtractVector(ToTy, V, uint64_t(0), "cast.fixed");
return {V, true};
}

// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, and we weren't able to handle it above, try using what we know
// about vscale to extract a fixed i1 vector from the scalable vector.
if (FromTy->getElementType()->isIntegerTy(1) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
false);
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
VScaleRange->first <= ToTy->getNumElements() * 8) {
unsigned VScale = VScaleRange->first;
llvm::Type *FixedTy = llvm::FixedVectorType::get(
FromTy->getElementType(),
FromTy->getElementCount().getKnownMinValue() * VScale);
V = CGF.Builder.CreateExtractVector(FixedTy, V, uint64_t(0));
llvm::Type *WideFixedTy = llvm::FixedVectorType::get(
FromTy->getElementType(), ToTy->getNumElements() * 8);
if (FixedTy != WideFixedTy)
V = CGF.Builder.CreateInsertVector(
WideFixedTy, llvm::PoisonValue::get(WideFixedTy), V, uint64_t(0));
V = CGF.Builder.CreateBitCast(V, ToTy);
return {V, true};
}
}

return {V, false};
}

Expand Down
58 changes: 58 additions & 0 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2493,6 +2493,35 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Result = Builder.CreateBitCast(Result, DstTy);
return Result;
}

// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, and we weren't able to handle it above, try using what we
// know about vscale to insert a fixed i1 vector into the scalable
// vector.
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
FixedSrcTy->getElementType()->isIntegerTy(8)) {
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
false);
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
VScaleRange->first <= FixedSrcTy->getNumElements() * 8) {
unsigned VScale = VScaleRange->first;
llvm::Type *WideFixedTy =
llvm::FixedVectorType::get(ScalableDstTy->getElementType(),
FixedSrcTy->getNumElements() * 8);
Src = Builder.CreateBitCast(Src, WideFixedTy);
llvm::Type *FixedTy = llvm::FixedVectorType::get(
ScalableDstTy->getElementType(),
ScalableDstTy->getElementCount().getKnownMinValue() * VScale);
// If the fixed i8 vector is larger than the i1 vector, we need to
// extract.
if (FixedTy != WideFixedTy)
Src = Builder.CreateExtractVector(FixedTy, Src, uint64_t(0));
return Builder.CreateInsertVector(
ScalableDstTy, llvm::PoisonValue::get(ScalableDstTy), Src,
uint64_t(0));
}
}
}
}

Expand All @@ -2514,6 +2543,35 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType())
return Builder.CreateExtractVector(DstTy, Src, uint64_t(0),
"cast.fixed");

// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, and we weren't able to handle it above, try using what we
// know about vscale to extract a fixed i1 vector from the scalable
// vector.
if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
FixedDstTy->getElementType()->isIntegerTy(8)) {
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
CGF.getContext().getTargetInfo().getVScaleRange(CGF.getLangOpts(),
false);
if (VScaleRange && VScaleRange->first == VScaleRange->second &&
VScaleRange->first <= FixedDstTy->getNumElements() * 8) {
unsigned VScale = VScaleRange->first;
llvm::Type *FixedTy = llvm::FixedVectorType::get(
ScalableSrcTy->getElementType(),
ScalableSrcTy->getElementCount().getKnownMinValue() * VScale);
Src = Builder.CreateExtractVector(FixedTy, Src, uint64_t(0));
llvm::Type *WideFixedTy =
llvm::FixedVectorType::get(ScalableSrcTy->getElementType(),
FixedDstTy->getNumElements() * 8);
// If the fixed i8 vector is larger than the i1 vector, we need to
// widen the i1 vector.
if (FixedTy != WideFixedTy)
Src = Builder.CreateInsertVector(
WideFixedTy, llvm::PoisonValue::get(WideFixedTy), Src,
uint64_t(0));
return Builder.CreateBitCast(Src, FixedDstTy);
}
}
}
}

Expand Down
104 changes: 16 additions & 88 deletions clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,12 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_

// CHECK-64-LABEL: @call_bool32_ff(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 2)
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 2)
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
//
// CHECK-128-LABEL: @call_bool32_ff(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 4)
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
//
fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
Expand All @@ -41,24 +29,12 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {

// CHECK-64-LABEL: @call_bool64_ff(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 1)
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 1)
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
//
// CHECK-128-LABEL: @call_bool64_ff(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 2)
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
//
fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
Expand All @@ -71,51 +47,27 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {

// CHECK-64-LABEL: @call_bool32_fs(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP1]]
//
// CHECK-128-LABEL: @call_bool32_fs(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP1]]
//
fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
}

// CHECK-64-LABEL: @call_bool64_fs(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP1]]
//
// CHECK-128-LABEL: @call_bool64_fs(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP1]]
//
fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);
Expand All @@ -127,51 +79,27 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {

// CHECK-64-LABEL: @call_bool32_ss(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP0]]
//
// CHECK-128-LABEL: @call_bool32_ss(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP0]]
//
fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
}

// CHECK-64-LABEL: @call_bool64_ss(
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP0]]
//
// CHECK-128-LABEL: @call_bool64_ss(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP0]]
//
fixed_bool64_t call_bool64_ss(vbool64_t op1, vbool64_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);
Expand Down
Loading