Skip to content

[CIR] Upstream splat op for VectorType #139827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,38 @@ def VecTernaryOp : CIR_Op<"vec.ternary",
let hasFolder = 1;
}

//===----------------------------------------------------------------------===//
// VecSplatOp
//===----------------------------------------------------------------------===//

def VecSplatOp : CIR_Op<"vec.splat", [Pure,
TypesMatchWith<"type of 'value' matches element type of 'result'", "result",
"value", "cast<VectorType>($_self).getElementType()">]> {

let summary = "Convert a scalar into a vector";
let description = [{
The `cir.vec.splat` operation creates a vector value from a scalar value.
All elements of the vector have the same value, that of the given scalar.

It's a separate operation from `cir.vec.create` because more
efficient LLVM IR can be generated for it, and because some optimization and
analysis passes can benefit from knowing that all elements of the vector
have the same value.

```mlir
%value = cir.const #cir.int<3> : !s32i
%value_vec = cir.vec.splat %value : !s32i, !cir.vector<4 x !s32i>
```
}];

let arguments = (ins CIR_VectorElementType:$value);
let results = (outs CIR_VectorType:$result);

let assemblyFormat = [{
$value `:` type($value) `,` qualified(type($result)) attr-dict
}];
}

//===----------------------------------------------------------------------===//
// BaseClassAddrOp
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,14 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) {
cgf.convertType(destTy));
}

case CK_VectorSplat: {
// Create a vector object and fill all elements with the same scalar value.
assert(destTy->isVectorType() && "CK_VectorSplat to non-vector type");
return builder.create<cir::VecSplatOp>(
cgf.getLoc(subExpr->getSourceRange()), cgf.convertType(destTy),
Visit(subExpr));
}

default:
cgf.getCIRGenModule().errorNYI(subExpr->getSourceRange(),
"CastExpr: ", ce->getCastKindName());
Expand Down
51 changes: 51 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1803,6 +1803,7 @@ void ConvertCIRToLLVMPass::runOnOperation() {
CIRToLLVMVecExtractOpLowering,
CIRToLLVMVecInsertOpLowering,
CIRToLLVMVecCmpOpLowering,
CIRToLLVMVecSplatOpLowering,
CIRToLLVMVecShuffleOpLowering,
CIRToLLVMVecShuffleDynamicOpLowering,
CIRToLLVMVecTernaryOpLowering
Expand Down Expand Up @@ -1956,6 +1957,56 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMVecSplatOpLowering::matchAndRewrite(
cir::VecSplatOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
// Vector splat can be implemented with an `insertelement` and a
// `shufflevector`, which is better than an `insertelement` for each
// element in the vector. Start with an undef vector. Insert the value into
// the first element. Then use a `shufflevector` with a mask of all 0 to
// fill out the entire vector with that value.
cir::VectorType vecTy = op.getType();
mlir::Type llvmTy = typeConverter->convertType(vecTy);
mlir::Location loc = op.getLoc();
mlir::Value poison = rewriter.create<mlir::LLVM::PoisonOp>(loc, llvmTy);

mlir::Value elementValue = adaptor.getValue();
if (mlir::isa<mlir::LLVM::PoisonOp>(elementValue.getDefiningOp())) {
// If the splat value is poison, then we can just use poison value
// for the entire vector.
rewriter.replaceOp(op, poison);
return mlir::success();
}

if (auto constValue =
dyn_cast<mlir::LLVM::ConstantOp>(elementValue.getDefiningOp())) {
if (auto intAttr = dyn_cast<mlir::IntegerAttr>(constValue.getValue())) {
mlir::DenseIntElementsAttr denseVec = mlir::DenseIntElementsAttr::get(
mlir::cast<mlir::ShapedType>(llvmTy), intAttr.getValue());
rewriter.replaceOpWithNewOp<mlir::LLVM::ConstantOp>(
op, denseVec.getType(), denseVec);
return mlir::success();
}

if (auto fpAttr = dyn_cast<mlir::FloatAttr>(constValue.getValue())) {
mlir::DenseFPElementsAttr denseVec = mlir::DenseFPElementsAttr::get(
mlir::cast<mlir::ShapedType>(llvmTy), fpAttr.getValue());
rewriter.replaceOpWithNewOp<mlir::LLVM::ConstantOp>(
op, denseVec.getType(), denseVec);
return mlir::success();
}
}

mlir::Value indexValue =
rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), 0);
mlir::Value oneElement = rewriter.create<mlir::LLVM::InsertElementOp>(
loc, poison, elementValue, indexValue);
SmallVector<int32_t> zeroValues(vecTy.getSize(), 0);
rewriter.replaceOpWithNewOp<mlir::LLVM::ShuffleVectorOp>(op, oneElement,
poison, zeroValues);
return mlir::success();
}

mlir::LogicalResult CIRToLLVMVecShuffleOpLowering::matchAndRewrite(
cir::VecShuffleOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,16 @@ class CIRToLLVMVecCmpOpLowering
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMVecSplatOpLowering
: public mlir::OpConversionPattern<cir::VecSplatOp> {
public:
using mlir::OpConversionPattern<cir::VecSplatOp>::OpConversionPattern;

mlir::LogicalResult
matchAndRewrite(cir::VecSplatOp op, OpAdaptor,
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMVecShuffleOpLowering
: public mlir::OpConversionPattern<cir::VecShuffleOp> {
public:
Expand Down
64 changes: 64 additions & 0 deletions clang/test/CIR/CodeGen/vector-ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ void foo14() {
// OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16
// OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
// OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16

void foo15() {
vi4 a;
Expand Down Expand Up @@ -1092,6 +1093,69 @@ void foo17() {
// OGCG: %[[TMP:.*]] = load <2 x double>, ptr %[[VEC_A]], align 16
// OGCG: %[[RES:.*]]= fptoui <2 x double> %[[TMP]] to <2 x i16>

void foo18() {
vi4 a = {1, 2, 3, 4};
vi4 shl = a << 3;

uvi4 b = {1u, 2u, 3u, 4u};
uvi4 shr = b >> 3u;
}

// CIR: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a", init]
// CIR: %[[SHL_RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["shl", init]
// CIR: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>, ["b", init]
// CIR: %[[SHR_RES:.*]] = cir.alloca !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>, ["shr", init]
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
// CIR: %[[CONST_3:.*]] = cir.const #cir.int<3> : !s32i
// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !s32i
// CIR: %[[VEC_A_VAL:.*]] = cir.vec.create(%[[CONST_1]], %[[CONST_2]], %[[CONST_3]], %[[CONST_4]] :
// CIR-SAME: !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
// CIR: cir.store{{.*}} %[[VEC_A_VAL]], %[[VEC_A]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[SH_AMOUNT:.*]] = cir.const #cir.int<3> : !s32i
// CIR: %[[SPLAT_VEC:.*]] = cir.vec.splat %[[SH_AMOUNT]] : !s32i, !cir.vector<4 x !s32i>
// CIR: %[[SHL:.*]] = cir.shift(left, %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[SPLAT_VEC]] : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
// CIR: cir.store{{.*}} %[[SHL]], %[[SHL_RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !u32i
// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !u32i
// CIR: %[[CONST_3:.*]] = cir.const #cir.int<3> : !u32i
// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u32i
// CIR: %[[VEC_B_VAL:.*]] = cir.vec.create(%[[CONST_1]], %[[CONST_2]], %[[CONST_3]], %[[CONST_4]] :
// CIR-SAME: !u32i, !u32i, !u32i, !u32i) : !cir.vector<4 x !u32i>
// CIR: cir.store{{.*}} %[[VEC_B_VAL]], %[[VEC_B]] : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !u32i>>, !cir.vector<4 x !u32i>
// CIR: %[[SH_AMOUNT:.*]] = cir.const #cir.int<3> : !u32i
// CIR: %[[SPLAT_VEC:.*]] = cir.vec.splat %[[SH_AMOUNT]] : !u32i, !cir.vector<4 x !u32i>
// CIR: %[[SHR:.*]] = cir.shift(right, %[[TMP_B]] : !cir.vector<4 x !u32i>, %[[SPLAT_VEC]] : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
// CIR: cir.store{{.*}} %[[SHR]], %[[SHR_RES]] : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>

// LLVM: %[[VEC_A:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[SHL_RES:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[VEC_B:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[SHR_RES:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_A]], align 16
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16
// LLVM: %[[SHL:.*]] = shl <4 x i32> %[[TMP_A]], splat (i32 3)
// LLVM: store <4 x i32> %[[SHL]], ptr %[[SHL_RES]], align 16
// LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_B]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16
// LLVM: %[[SHR:.*]] = lshr <4 x i32> %[[TMP_B]], splat (i32 3)
// LLVM: store <4 x i32> %[[SHR]], ptr %[[SHR_RES]], align 16

// OGCG: %[[VEC_A:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[SHL_RES:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[VEC_B:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[SHR_RES:.*]] = alloca <4 x i32>, align 16
// OGCG: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_A]], align 16
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16
// OGCG: %[[SHL:.*]] = shl <4 x i32> %[[TMP_A]], splat (i32 3)
// OGCG: store <4 x i32> %[[SHL]], ptr %[[SHL_RES]], align 16
// OGCG: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_B]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16
// OGCG: %[[SHR:.*]] = lshr <4 x i32> %[[TMP_B]], splat (i32 3)
// OGCG: store <4 x i32> %[[SHR]], ptr %[[SHR_RES]], align 16

void foo19() {
vi4 a;
vi4 b;
Expand Down
63 changes: 63 additions & 0 deletions clang/test/CIR/CodeGen/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,69 @@ void foo17() {
// OGCG: %[[TMP:.*]] = load <2 x double>, ptr %[[VEC_A]], align 16
// OGCG: %[[RES:.*]]= fptoui <2 x double> %[[TMP]] to <2 x i16>

void foo18() {
vi4 a = {1, 2, 3, 4};
vi4 shl = a << 3;

uvi4 b = {1u, 2u, 3u, 4u};
uvi4 shr = b >> 3u;
}

// CIR: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a", init]
// CIR: %[[SHL_RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["shl", init]
// CIR: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>, ["b", init]
// CIR: %[[SHR_RES:.*]] = cir.alloca !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>, ["shr", init]
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
// CIR: %[[CONST_3:.*]] = cir.const #cir.int<3> : !s32i
// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !s32i
// CIR: %[[VEC_A_VAL:.*]] = cir.vec.create(%[[CONST_1]], %[[CONST_2]], %[[CONST_3]], %[[CONST_4]] :
// CIR-SAME: !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
// CIR: cir.store{{.*}} %[[VEC_A_VAL]], %[[VEC_A]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[SH_AMOUNT:.*]] = cir.const #cir.int<3> : !s32i
// CIR: %[[SPLAT_VEC:.*]] = cir.vec.splat %[[SH_AMOUNT]] : !s32i, !cir.vector<4 x !s32i>
// CIR: %[[SHL:.*]] = cir.shift(left, %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[SPLAT_VEC]] : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
// CIR: cir.store{{.*}} %[[SHL]], %[[SHL_RES]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !u32i
// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !u32i
// CIR: %[[CONST_3:.*]] = cir.const #cir.int<3> : !u32i
// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u32i
// CIR: %[[VEC_B_VAL:.*]] = cir.vec.create(%[[CONST_1]], %[[CONST_2]], %[[CONST_3]], %[[CONST_4]] :
// CIR-SAME: !u32i, !u32i, !u32i, !u32i) : !cir.vector<4 x !u32i>
// CIR: cir.store{{.*}} %[[VEC_B_VAL]], %[[VEC_B]] : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[VEC_B]] : !cir.ptr<!cir.vector<4 x !u32i>>, !cir.vector<4 x !u32i>
// CIR: %[[SH_AMOUNT:.*]] = cir.const #cir.int<3> : !u32i
// CIR: %[[SPLAT_VEC:.*]] = cir.vec.splat %[[SH_AMOUNT]] : !u32i, !cir.vector<4 x !u32i>
// CIR: %[[SHR:.*]] = cir.shift(right, %[[TMP_B]] : !cir.vector<4 x !u32i>, %[[SPLAT_VEC]] : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
// CIR: cir.store{{.*}} %[[SHR]], %[[SHR_RES]] : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>>

// LLVM: %[[VEC_A:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[SHL_RES:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[VEC_B:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[SHR_RES:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_A]], align 16
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16
// LLVM: %[[SHL:.*]] = shl <4 x i32> %[[TMP_A]], splat (i32 3)
// LLVM: store <4 x i32> %[[SHL]], ptr %[[SHL_RES]], align 16
// LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_B]], align 16
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16
// LLVM: %[[SHR:.*]] = lshr <4 x i32> %[[TMP_B]], splat (i32 3)
// LLVM: store <4 x i32> %[[SHR]], ptr %[[SHR_RES]], align 16

// OGCG: %[[VEC_A:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[SHL_RES:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[VEC_B:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[SHR_RES:.*]] = alloca <4 x i32>, align 16
// OGCG: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_A]], align 16
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16
// OGCG: %[[SHL:.*]] = shl <4 x i32> %[[TMP_A]], splat (i32 3)
// OGCG: store <4 x i32> %[[SHL]], ptr %[[SHL_RES]], align 16
// OGCG: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %[[VEC_B]], align 16
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16
// OGCG: %[[SHR:.*]] = lshr <4 x i32> %[[TMP_B]], splat (i32 3)
// OGCG: store <4 x i32> %[[SHR]], ptr %[[SHR_RES]], align 16

void foo19() {
vi4 a;
vi4 b;
Expand Down
33 changes: 33 additions & 0 deletions clang/test/CIR/IR/vector.cir
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,37 @@ cir.func @vector_shuffle_dynamic_test() {
// CHECK: cir.return
// CHECK: }

cir.func @vector_splat_test() {
%0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a", init]
%1 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["shl", init]
%2 = cir.const #cir.int<1> : !s32i
%3 = cir.const #cir.int<2> : !s32i
%4 = cir.const #cir.int<3> : !s32i
%5 = cir.const #cir.int<4> : !s32i
%6 = cir.vec.create(%2, %3, %4, %5 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
cir.store %6, %0 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
%7 = cir.load %0 : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
%8 = cir.const #cir.int<3> : !s32i
%9 = cir.vec.splat %8 : !s32i, !cir.vector<4 x !s32i>
%10 = cir.shift(left, %7 : !cir.vector<4 x !s32i>, %9 : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
cir.store %10, %1 : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
cir.return
}

// CHECK: cir.func @vector_splat_test() {
// CHECK-NEXT: %[[VEC:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a", init]
// CHECK-NEXT: %[[SHL_RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["shl", init]
// CHECK-NEXT: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
// CHECK-NEXT: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
// CHECK-NEXT: %[[CONST_3:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[CONST_4:.*]] = cir.const #cir.int<4> : !s32i
// CHECK-NEXT: %[[VEC_VAL:.*]] = cir.vec.create(%[[CONST_1]], %[[CONST_2]], %[[CONST_3]], %[[CONST_4]] : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
// CHECK-NEXT: cir.store %[[VEC_VAL]], %[[VEC]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CHECK-NEXT: %[[TMP:.*]] = cir.load %[[VEC]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CHECK-NEXT: %[[SPLAT_VAL:.*]] = cir.const #cir.int<3> : !s32i
// CHECK-NEXT: %[[SPLAT_VEC:.*]] = cir.vec.splat %[[SPLAT_VAL]] : !s32i, !cir.vector<4 x !s32i>
// CHECK-NEXT: %[[SHL:.*]] = cir.shift(left, %[[TMP]] : !cir.vector<4 x !s32i>, %[[SPLAT_VEC]] : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
// CHECK-NEXT: cir.store %[[SHL]], %[[SHL_RES:.*]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// CHECK-NEXT: cir.return

}