Skip to content

Commit 7bbe2fd

Browse files
Add wasm support for int32->f64 and f32->f64 simd ops (#5863)
* Add wasm support for int32->f64 and f32->f64 simd ops At top-of-tree LLVM, the wasm backend never seems to emit the vector version of these ops; pattern-match to target them specifically.
1 parent e7eec5c commit 7bbe2fd

File tree

3 files changed

+43
-5
lines changed

3 files changed

+43
-5
lines changed

src/CodeGen_WebAssembly.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ const WasmIntrinsic intrinsic_defs[] = {
9494
{"llvm.wasm.extadd.pairwise.unsigned.v8i16", Int(16, 8), "pairwise_widening_add", {UInt(8, 16)}, Target::WasmSimd128},
9595
{"llvm.wasm.extadd.pairwise.unsigned.v4i32", Int(32, 4), "pairwise_widening_add", {UInt(16, 8)}, Target::WasmSimd128},
9696

97+
{"i32_to_double_s", Float(64, 4), "int_to_double", {Int(32, 4)}, Target::WasmSimd128},
98+
{"i32_to_double_u", Float(64, 4), "int_to_double", {UInt(32, 4)}, Target::WasmSimd128},
99+
{"float_to_double", Float(64, 4), "float_to_double", {Float(32, 4)}, Target::WasmSimd128},
100+
97101
// Basically like ARM's SQRDMULH
98102
{"llvm.wasm.q15mulr.sat.signed", Int(16, 8), "q15mulr_sat_s", {Int(16, 8), Int(16, 8)}, Target::WasmSimd128},
99103

@@ -147,6 +151,9 @@ void CodeGen_WebAssembly::visit(const Cast *op) {
147151
{"saturating_narrow", u8_sat(wild_i16x_), Target::WasmSimd128},
148152
{"saturating_narrow", i16_sat(wild_i32x_), Target::WasmSimd128},
149153
{"saturating_narrow", u16_sat(wild_i32x_), Target::WasmSimd128},
154+
{"int_to_double", f64(wild_i32x_), Target::WasmSimd128},
155+
{"int_to_double", f64(wild_u32x_), Target::WasmSimd128},
156+
{"float_to_double", f64(wild_f32x_), Target::WasmSimd128},
150157
};
151158
// clang-format on
152159

src/runtime/wasm_math.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,36 @@ define weak_odr <8 x i16> @saturating_narrow_i32x8_to_u16x8(<8 x i32> %x) nounwi
138138
%3 = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> %1, <4 x i32> %2)
139139
ret <8 x i16> %3
140140
}
141+
142+
; Integer to double-precision floating point
143+
144+
declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>)
145+
declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>)
146+
147+
define weak_odr <4 x double> @i32_to_double_s(<4 x i32> %x) nounwind alwaysinline {
148+
%1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
149+
%2 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
150+
%3 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %1)
151+
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
152+
ret <4 x double> %4
153+
}
154+
155+
define weak_odr <4 x double> @i32_to_double_u(<4 x i32> %x) nounwind alwaysinline {
156+
%1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
157+
%2 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x)
158+
%3 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %1)
159+
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
160+
ret <4 x double> %4
161+
}
162+
163+
; single to double-precision floating point
164+
165+
declare <2 x double> @llvm.wasm.promote.low(<4 x float>)
166+
167+
define weak_odr <4 x double> @float_to_double(<4 x float> %x) nounwind alwaysinline {
168+
%1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
169+
%2 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %x)
170+
%3 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %1)
171+
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
172+
ret <4 x double> %4
173+
}

test/correctness/simd_op_check.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,9 +2137,8 @@ class SimdOpCheck : public SimdOpCheckTest {
21372137
check("f32x4.convert_i32x4_u", 8 * w, cast<float>(u32_1));
21382138

21392139
// Integer to double-precision floating point
2140-
// TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK
2141-
// check("f64x2.convert_low_i32x4_s", 4 * w, cast<double>(i32_1));
2142-
// check("f64x2.convert_low_i32x4_u", 4 * w, cast<double>(u32_1));
2140+
check("f64x2.convert_low_i32x4_s", 2 * w, cast<double>(i32_1));
2141+
check("f64x2.convert_low_i32x4_u", 2 * w, cast<double>(u32_1));
21432142

21442143
// Single-precision floating point to integer with saturation
21452144
check("i32x4.trunc_sat_f32x4_s", 4 * w, cast<int32_t>(f32_1));
@@ -2155,8 +2154,7 @@ class SimdOpCheck : public SimdOpCheckTest {
21552154
// check("f32x4.demote_f64x2_zero", 4 * w, ???);
21562155

21572156
// Single-precision floating point to double-precision
2158-
// TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK
2159-
// check("f64x2.promote_low_f32x4", 4 * w, ???);
2157+
check("f64x2.promote_low_f32x4", 2 * w, cast<double>(f32_1));
21602158

21612159
// Integer to integer narrowing
21622160
check("i8x16.narrow_i16x8_s", 16 * w, i8_sat(i16_1));

0 commit comments

Comments
 (0)