Skip to content

Commit c6d53e9

Browse files
committed
Update simd support
1 parent 7f45bfb commit c6d53e9

35 files changed

+2538
-2048
lines changed

src/binary-reader.cc

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -912,12 +912,12 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
912912
case Opcode::F32Load:
913913
case Opcode::F64Load:
914914
case Opcode::V128Load:
915-
case Opcode::I16X8Load8X8S:
916-
case Opcode::I16X8Load8X8U:
917-
case Opcode::I32X4Load16X4S:
918-
case Opcode::I32X4Load16X4U:
919-
case Opcode::I64X2Load32X2S:
920-
case Opcode::I64X2Load32X2U: {
915+
case Opcode::V128Load8X8S:
916+
case Opcode::V128Load8X8U:
917+
case Opcode::V128Load16X4S:
918+
case Opcode::V128Load16X4U:
919+
case Opcode::V128Load32X2S:
920+
case Opcode::V128Load32X2U: {
921921
Address alignment_log2;
922922
CHECK_RESULT(ReadAlignment(&alignment_log2, "load alignment"));
923923
Address offset;
@@ -1020,14 +1020,14 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
10201020
case Opcode::I16X8Mul:
10211021
case Opcode::I32X4Mul:
10221022
case Opcode::I64X2Mul:
1023-
case Opcode::I8X16AddSaturateS:
1024-
case Opcode::I8X16AddSaturateU:
1025-
case Opcode::I16X8AddSaturateS:
1026-
case Opcode::I16X8AddSaturateU:
1027-
case Opcode::I8X16SubSaturateS:
1028-
case Opcode::I8X16SubSaturateU:
1029-
case Opcode::I16X8SubSaturateS:
1030-
case Opcode::I16X8SubSaturateU:
1023+
case Opcode::I8X16AddSatS:
1024+
case Opcode::I8X16AddSatU:
1025+
case Opcode::I16X8AddSatS:
1026+
case Opcode::I16X8AddSatU:
1027+
case Opcode::I8X16SubSatS:
1028+
case Opcode::I8X16SubSatU:
1029+
case Opcode::I16X8SubSatS:
1030+
case Opcode::I16X8SubSatU:
10311031
case Opcode::I8X16MinS:
10321032
case Opcode::I16X8MinS:
10331033
case Opcode::I32X4MinS:
@@ -1056,9 +1056,13 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
10561056
case Opcode::V128Or:
10571057
case Opcode::V128Xor:
10581058
case Opcode::F32X4Min:
1059+
case Opcode::F32X4PMin:
10591060
case Opcode::F64X2Min:
1061+
case Opcode::F64X2PMin:
10601062
case Opcode::F32X4Max:
1063+
case Opcode::F32X4PMax:
10611064
case Opcode::F64X2Max:
1065+
case Opcode::F64X2PMax:
10621066
case Opcode::F32X4Add:
10631067
case Opcode::F64X2Add:
10641068
case Opcode::F32X4Sub:
@@ -1067,7 +1071,7 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
10671071
case Opcode::F64X2Div:
10681072
case Opcode::F32X4Mul:
10691073
case Opcode::F64X2Mul:
1070-
case Opcode::V8X16Swizzle:
1074+
case Opcode::I8X16Swizzle:
10711075
case Opcode::I8X16NarrowI16X8S:
10721076
case Opcode::I8X16NarrowI16X8U:
10731077
case Opcode::I16X8NarrowI32X4S:
@@ -1197,6 +1201,14 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
11971201
case Opcode::I8X16AllTrue:
11981202
case Opcode::I16X8AllTrue:
11991203
case Opcode::I32X4AllTrue:
1204+
case Opcode::F32X4Ceil:
1205+
case Opcode::F64X2Ceil:
1206+
case Opcode::F32X4Floor:
1207+
case Opcode::F64X2Floor:
1208+
case Opcode::F32X4Trunc:
1209+
case Opcode::F64X2Trunc:
1210+
case Opcode::F32X4Nearest:
1211+
case Opcode::F64X2Nearest:
12001212
case Opcode::F32X4Neg:
12011213
case Opcode::F64X2Neg:
12021214
case Opcode::F32X4Abs:
@@ -1244,18 +1256,18 @@ Result BinaryReader::ReadFunctionBody(Offset end_offset) {
12441256
break;
12451257
}
12461258

1247-
case Opcode::V8X16Shuffle: {
1259+
case Opcode::I8X16Shuffle: {
12481260
v128 value;
12491261
CHECK_RESULT(ReadV128(&value, "Lane idx [16]"));
12501262
CALLBACK(OnSimdShuffleOpExpr, opcode, value);
12511263
CALLBACK(OnOpcodeV128, value);
12521264
break;
12531265
}
12541266

1255-
case Opcode::V8X16LoadSplat:
1256-
case Opcode::V16X8LoadSplat:
1257-
case Opcode::V32X4LoadSplat:
1258-
case Opcode::V64X2LoadSplat: {
1267+
case Opcode::V128Load8Splat:
1268+
case Opcode::V128Load16Splat:
1269+
case Opcode::V128Load32Splat:
1270+
case Opcode::V128Load64Splat: {
12591271
Address alignment_log2;
12601272
CHECK_RESULT(ReadAlignment(&alignment_log2, "load alignment"));
12611273
Address offset;

src/interp/interp-math.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,11 @@ T WABT_VECTORCALL FloatMin(T lhs, T rhs) {
272272
}
273273
}
274274

275+
template <typename T>
276+
T WABT_VECTORCALL FloatPMin(T lhs, T rhs) {
277+
return std::min(lhs, rhs);
278+
}
279+
275280
template <typename T>
276281
T WABT_VECTORCALL FloatMax(T lhs, T rhs) {
277282
if (WABT_UNLIKELY(std::isnan(lhs) || std::isnan(rhs))) {
@@ -283,6 +288,11 @@ T WABT_VECTORCALL FloatMax(T lhs, T rhs) {
283288
}
284289
}
285290

291+
template <typename T>
292+
T WABT_VECTORCALL FloatPMax(T lhs, T rhs) {
293+
return std::max(lhs, rhs);
294+
}
295+
286296
template <typename R, typename T> bool WABT_VECTORCALL CanConvert(T val) { return true; }
287297
template <> inline bool WABT_VECTORCALL CanConvert<s32, f32>(f32 val) { return val >= -2147483648.f && val < 2147483648.f; }
288298
template <> inline bool WABT_VECTORCALL CanConvert<s32, f64>(f64 val) { return val > -2147483649. && val < 2147483648.; }

src/interp/interp.cc

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,11 +1503,11 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
15031503
case O::I8X16ShrS: return DoSimdShift(IntShr<s8>);
15041504
case O::I8X16ShrU: return DoSimdShift(IntShr<u8>);
15051505
case O::I8X16Add: return DoSimdBinop(Add<u8>);
1506-
case O::I8X16AddSaturateS: return DoSimdBinop(IntAddSat<s8>);
1507-
case O::I8X16AddSaturateU: return DoSimdBinop(IntAddSat<u8>);
1506+
case O::I8X16AddSatS: return DoSimdBinop(IntAddSat<s8>);
1507+
case O::I8X16AddSatU: return DoSimdBinop(IntAddSat<u8>);
15081508
case O::I8X16Sub: return DoSimdBinop(Sub<u8>);
1509-
case O::I8X16SubSaturateS: return DoSimdBinop(IntSubSat<s8>);
1510-
case O::I8X16SubSaturateU: return DoSimdBinop(IntSubSat<u8>);
1509+
case O::I8X16SubSatS: return DoSimdBinop(IntSubSat<s8>);
1510+
case O::I8X16SubSatU: return DoSimdBinop(IntSubSat<u8>);
15111511
case O::I8X16MinS: return DoSimdBinop(IntMin<s8>);
15121512
case O::I8X16MinU: return DoSimdBinop(IntMin<u8>);
15131513
case O::I8X16MaxS: return DoSimdBinop(IntMax<s8>);
@@ -1521,11 +1521,11 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
15211521
case O::I16X8ShrS: return DoSimdShift(IntShr<s16>);
15221522
case O::I16X8ShrU: return DoSimdShift(IntShr<u16>);
15231523
case O::I16X8Add: return DoSimdBinop(Add<u16>);
1524-
case O::I16X8AddSaturateS: return DoSimdBinop(IntAddSat<s16>);
1525-
case O::I16X8AddSaturateU: return DoSimdBinop(IntAddSat<u16>);
1524+
case O::I16X8AddSatS: return DoSimdBinop(IntAddSat<s16>);
1525+
case O::I16X8AddSatU: return DoSimdBinop(IntAddSat<u16>);
15261526
case O::I16X8Sub: return DoSimdBinop(Sub<u16>);
1527-
case O::I16X8SubSaturateS: return DoSimdBinop(IntSubSat<s16>);
1528-
case O::I16X8SubSaturateU: return DoSimdBinop(IntSubSat<u16>);
1527+
case O::I16X8SubSatS: return DoSimdBinop(IntSubSat<s16>);
1528+
case O::I16X8SubSatU: return DoSimdBinop(IntSubSat<u16>);
15291529
case O::I16X8Mul: return DoSimdBinop(Mul<u16>);
15301530
case O::I16X8MinS: return DoSimdBinop(IntMin<s16>);
15311531
case O::I16X8MinU: return DoSimdBinop(IntMin<u16>);
@@ -1555,6 +1555,16 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
15551555
case O::I64X2Sub: return DoSimdBinop(Sub<u64>);
15561556
case O::I64X2Mul: return DoSimdBinop(Mul<u64>);
15571557

1558+
case O::F32X4Ceil: return DoSimdUnop(FloatCeil<f32>);
1559+
case O::F32X4Floor: return DoSimdUnop(FloatFloor<f32>);
1560+
case O::F32X4Trunc: return DoSimdUnop(FloatTrunc<f32>);
1561+
case O::F32X4Nearest: return DoSimdUnop(FloatNearest<f32>);
1562+
1563+
case O::F64X2Ceil: return DoSimdUnop(FloatCeil<f64>);
1564+
case O::F64X2Floor: return DoSimdUnop(FloatFloor<f64>);
1565+
case O::F64X2Trunc: return DoSimdUnop(FloatTrunc<f64>);
1566+
case O::F64X2Nearest: return DoSimdUnop(FloatNearest<f64>);
1567+
15581568
case O::F32X4Abs: return DoSimdUnop(FloatAbs<f32>);
15591569
case O::F32X4Neg: return DoSimdUnop(FloatNeg<f32>);
15601570
case O::F32X4Sqrt: return DoSimdUnop(FloatSqrt<f32>);
@@ -1564,6 +1574,8 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
15641574
case O::F32X4Div: return DoSimdBinop(FloatDiv<f32>);
15651575
case O::F32X4Min: return DoSimdBinop(FloatMin<f32>);
15661576
case O::F32X4Max: return DoSimdBinop(FloatMax<f32>);
1577+
case O::F32X4PMin: return DoSimdBinop(FloatPMin<f32>);
1578+
case O::F32X4PMax: return DoSimdBinop(FloatPMax<f32>);
15671579

15681580
case O::F64X2Abs: return DoSimdUnop(FloatAbs<f64>);
15691581
case O::F64X2Neg: return DoSimdUnop(FloatNeg<f64>);
@@ -1574,19 +1586,21 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
15741586
case O::F64X2Div: return DoSimdBinop(FloatDiv<f64>);
15751587
case O::F64X2Min: return DoSimdBinop(FloatMin<f64>);
15761588
case O::F64X2Max: return DoSimdBinop(FloatMax<f64>);
1589+
case O::F64X2PMin: return DoSimdBinop(FloatPMin<f64>);
1590+
case O::F64X2PMax: return DoSimdBinop(FloatPMax<f64>);
15771591

15781592
case O::I32X4TruncSatF32X4S: return DoSimdUnop(IntTruncSat<s32, f32>);
15791593
case O::I32X4TruncSatF32X4U: return DoSimdUnop(IntTruncSat<u32, f32>);
15801594
case O::F32X4ConvertI32X4S: return DoSimdUnop(Convert<f32, s32>);
15811595
case O::F32X4ConvertI32X4U: return DoSimdUnop(Convert<f32, u32>);
15821596

1583-
case O::V8X16Swizzle: return DoSimdSwizzle();
1584-
case O::V8X16Shuffle: return DoSimdShuffle(instr);
1597+
case O::I8X16Swizzle: return DoSimdSwizzle();
1598+
case O::I8X16Shuffle: return DoSimdShuffle(instr);
15851599

1586-
case O::V8X16LoadSplat: return DoSimdLoadSplat<u8x16, u32>(instr, out_trap);
1587-
case O::V16X8LoadSplat: return DoSimdLoadSplat<u16x8, u32>(instr, out_trap);
1588-
case O::V32X4LoadSplat: return DoSimdLoadSplat<u32x4, u32>(instr, out_trap);
1589-
case O::V64X2LoadSplat: return DoSimdLoadSplat<u64x2, u64>(instr, out_trap);
1600+
case O::V128Load8Splat: return DoSimdLoadSplat<u8x16, u32>(instr, out_trap);
1601+
case O::V128Load16Splat: return DoSimdLoadSplat<u16x8, u32>(instr, out_trap);
1602+
case O::V128Load32Splat: return DoSimdLoadSplat<u32x4, u32>(instr, out_trap);
1603+
case O::V128Load64Splat: return DoSimdLoadSplat<u64x2, u64>(instr, out_trap);
15901604

15911605
case O::I8X16NarrowI16X8S: return DoSimdNarrow<s8x16, s16x8>();
15921606
case O::I8X16NarrowI16X8U: return DoSimdNarrow<u8x16, s16x8>();
@@ -1601,12 +1615,12 @@ RunResult Thread::StepInternal(Trap::Ptr* out_trap) {
16011615
case O::I32X4WidenLowI16X8U: return DoSimdWiden<u32x4, u16x8, true>();
16021616
case O::I32X4WidenHighI16X8U: return DoSimdWiden<u32x4, u16x8, false>();
16031617

1604-
case O::I16X8Load8X8S: return DoSimdLoadExtend<s16x8, s8x8>(instr, out_trap);
1605-
case O::I16X8Load8X8U: return DoSimdLoadExtend<u16x8, u8x8>(instr, out_trap);
1606-
case O::I32X4Load16X4S: return DoSimdLoadExtend<s32x4, s16x4>(instr, out_trap);
1607-
case O::I32X4Load16X4U: return DoSimdLoadExtend<u32x4, u16x4>(instr, out_trap);
1608-
case O::I64X2Load32X2S: return DoSimdLoadExtend<s64x2, s32x2>(instr, out_trap);
1609-
case O::I64X2Load32X2U: return DoSimdLoadExtend<u64x2, u32x2>(instr, out_trap);
1618+
case O::V128Load8X8S: return DoSimdLoadExtend<s16x8, s8x8>(instr, out_trap);
1619+
case O::V128Load8X8U: return DoSimdLoadExtend<u16x8, u8x8>(instr, out_trap);
1620+
case O::V128Load16X4S: return DoSimdLoadExtend<s32x4, s16x4>(instr, out_trap);
1621+
case O::V128Load16X4U: return DoSimdLoadExtend<u32x4, u16x4>(instr, out_trap);
1622+
case O::V128Load32X2S: return DoSimdLoadExtend<s64x2, s32x2>(instr, out_trap);
1623+
case O::V128Load32X2U: return DoSimdLoadExtend<u64x2, u32x2>(instr, out_trap);
16101624

16111625
case O::V128Andnot: return DoSimdBinop(IntAndNot<u64>);
16121626
case O::I8X16AvgrU: return DoSimdBinop(IntAvgr<u8>);

src/interp/istream.cc

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,15 @@ Instr Istream::Read(Offset* offset) const {
130130
case Opcode::F32Sqrt:
131131
case Opcode::F32Trunc:
132132
case Opcode::F32X4Abs:
133+
case Opcode::F32X4Ceil:
133134
case Opcode::F32X4ConvertI32X4S:
134135
case Opcode::F32X4ConvertI32X4U:
136+
case Opcode::F32X4Floor:
137+
case Opcode::F32X4Nearest:
135138
case Opcode::F32X4Neg:
136139
case Opcode::F32X4Splat:
137140
case Opcode::F32X4Sqrt:
141+
case Opcode::F32X4Trunc:
138142
case Opcode::F64Abs:
139143
case Opcode::F64Ceil:
140144
case Opcode::F64ConvertI32S:
@@ -149,9 +153,13 @@ Instr Istream::Read(Offset* offset) const {
149153
case Opcode::F64Sqrt:
150154
case Opcode::F64Trunc:
151155
case Opcode::F64X2Abs:
156+
case Opcode::F64X2Ceil:
157+
case Opcode::F64X2Floor:
158+
case Opcode::F64X2Nearest:
152159
case Opcode::F64X2Neg:
153160
case Opcode::F64X2Splat:
154161
case Opcode::F64X2Sqrt:
162+
case Opcode::F64X2Trunc:
155163
case Opcode::I16X8AllTrue:
156164
case Opcode::I16X8AnyTrue:
157165
case Opcode::I16X8Bitmask:
@@ -246,6 +254,8 @@ Instr Istream::Read(Offset* offset) const {
246254
case Opcode::F32X4Min:
247255
case Opcode::F32X4Mul:
248256
case Opcode::F32X4Ne:
257+
case Opcode::F32X4PMax:
258+
case Opcode::F32X4PMin:
249259
case Opcode::F32X4Sub:
250260
case Opcode::F64Add:
251261
case Opcode::F64Copysign:
@@ -271,10 +281,12 @@ Instr Istream::Read(Offset* offset) const {
271281
case Opcode::F64X2Min:
272282
case Opcode::F64X2Mul:
273283
case Opcode::F64X2Ne:
284+
case Opcode::F64X2PMax:
285+
case Opcode::F64X2PMin:
274286
case Opcode::F64X2Sub:
275287
case Opcode::I16X8Add:
276-
case Opcode::I16X8AddSaturateS:
277-
case Opcode::I16X8AddSaturateU:
288+
case Opcode::I16X8AddSatS:
289+
case Opcode::I16X8AddSatU:
278290
case Opcode::I16X8AvgrU:
279291
case Opcode::I16X8Eq:
280292
case Opcode::I16X8GeS:
@@ -297,8 +309,8 @@ Instr Istream::Read(Offset* offset) const {
297309
case Opcode::I16X8ShrS:
298310
case Opcode::I16X8ShrU:
299311
case Opcode::I16X8Sub:
300-
case Opcode::I16X8SubSaturateS:
301-
case Opcode::I16X8SubSaturateU:
312+
case Opcode::I16X8SubSatS:
313+
case Opcode::I16X8SubSatU:
302314
case Opcode::I32Add:
303315
case Opcode::I32And:
304316
case Opcode::I32DivS:
@@ -376,8 +388,8 @@ Instr Istream::Read(Offset* offset) const {
376388
case Opcode::I64X2Mul:
377389
case Opcode::I64Xor:
378390
case Opcode::I8X16Add:
379-
case Opcode::I8X16AddSaturateS:
380-
case Opcode::I8X16AddSaturateU:
391+
case Opcode::I8X16AddSatS:
392+
case Opcode::I8X16AddSatU:
381393
case Opcode::I8X16AvgrU:
382394
case Opcode::I8X16Eq:
383395
case Opcode::I8X16GeS:
@@ -399,14 +411,14 @@ Instr Istream::Read(Offset* offset) const {
399411
case Opcode::I8X16ShrS:
400412
case Opcode::I8X16ShrU:
401413
case Opcode::I8X16Sub:
402-
case Opcode::I8X16SubSaturateS:
403-
case Opcode::I8X16SubSaturateU:
414+
case Opcode::I8X16SubSatS:
415+
case Opcode::I8X16SubSatU:
404416
case Opcode::V128And:
405417
case Opcode::V128Andnot:
406418
case Opcode::V128BitSelect:
407419
case Opcode::V128Or:
408420
case Opcode::V128Xor:
409-
case Opcode::V8X16Swizzle:
421+
case Opcode::I8X16Swizzle:
410422
// 0 immediates, 2 operands
411423
instr.kind = InstrKind::Imm_0_Op_2;
412424
break;
@@ -493,9 +505,9 @@ Instr Istream::Read(Offset* offset) const {
493505

494506
case Opcode::F32Load:
495507
case Opcode::F64Load:
496-
case Opcode::I16X8Load8X8S:
497-
case Opcode::I16X8Load8X8U:
498-
case Opcode::V16X8LoadSplat:
508+
case Opcode::V128Load8X8S:
509+
case Opcode::V128Load8X8U:
510+
case Opcode::V128Load16Splat:
499511
case Opcode::I32AtomicLoad:
500512
case Opcode::I32AtomicLoad16U:
501513
case Opcode::I32AtomicLoad8U:
@@ -504,9 +516,9 @@ Instr Istream::Read(Offset* offset) const {
504516
case Opcode::I32Load16U:
505517
case Opcode::I32Load8S:
506518
case Opcode::I32Load8U:
507-
case Opcode::I32X4Load16X4S:
508-
case Opcode::I32X4Load16X4U:
509-
case Opcode::V32X4LoadSplat:
519+
case Opcode::V128Load16X4S:
520+
case Opcode::V128Load16X4U:
521+
case Opcode::V128Load32Splat:
510522
case Opcode::I64AtomicLoad:
511523
case Opcode::I64AtomicLoad16U:
512524
case Opcode::I64AtomicLoad32U:
@@ -518,10 +530,10 @@ Instr Istream::Read(Offset* offset) const {
518530
case Opcode::I64Load32U:
519531
case Opcode::I64Load8S:
520532
case Opcode::I64Load8U:
521-
case Opcode::I64X2Load32X2S:
522-
case Opcode::I64X2Load32X2U:
523-
case Opcode::V64X2LoadSplat:
524-
case Opcode::V8X16LoadSplat:
533+
case Opcode::V128Load32X2S:
534+
case Opcode::V128Load32X2U:
535+
case Opcode::V128Load64Splat:
536+
case Opcode::V128Load8Splat:
525537
case Opcode::V128Load:
526538
// Index + memory offset immediates, 1 operand.
527539
instr.kind = InstrKind::Imm_Index_Offset_Op_1;
@@ -673,7 +685,7 @@ Instr Istream::Read(Offset* offset) const {
673685
instr.imm_v128 = ReadAt<v128>(offset);
674686
break;
675687

676-
case Opcode::V8X16Shuffle:
688+
case Opcode::I8X16Shuffle:
677689
// v128 immediate, 2 operands.
678690
instr.kind = InstrKind::Imm_V128_Op_2;
679691
instr.imm_v128 = ReadAt<v128>(offset);

src/interp/istream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ enum class InstrKind {
6767
Imm_I8_Op_1, // i32x4.extract_lane
6868
Imm_I8_Op_2, // i32x4.replace_lane
6969
Imm_V128_Op_0, // v128.const
70-
Imm_V128_Op_2, // v8x16.shuffle
70+
Imm_V128_Op_2, // i8x16.shuffle
7171
};
7272

7373
struct Instr {

0 commit comments

Comments
 (0)