From d6345dfc7ad889d2be52a0618e82cd9978aae62f Mon Sep 17 00:00:00 2001 From: Jan Palus Date: Fri, 20 Jun 2025 23:29:50 +0200 Subject: [PATCH] fix: correct NEON intrinsics so types check out no functional changes. two intrinsics are replaced to match used types but previous and current compile to exactly same assembler. one additional explicit cast which is a no-op but is required by type checker. fixes C implementation compilation failure with GCC targeting ARMv7 + NEON without -flax-vector-conversions. --- src/implementation/c/node/table-lookup.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/implementation/c/node/table-lookup.ts b/src/implementation/c/node/table-lookup.ts index b41b143..4504b23 100644 --- a/src/implementation/c/node/table-lookup.ts +++ b/src/implementation/c/node/table-lookup.ts @@ -229,7 +229,7 @@ export class TableLookup extends Node { if (start === end) { out.push(` single = vceqq_u8(input, ${v128(start)});`); } else { - out.push(` single = vandq_u16(`); + out.push(` single = vandq_u8(`); out.push(` vcgeq_u8(input, ${v128(start)}),`); out.push(` vcleq_u8(input, ${v128(end)})`); out.push(' );'); @@ -238,12 +238,12 @@ export class TableLookup extends Node { if (off === 0) { out.push(' mask = single;'); } else { - out.push(' mask = vorrq_u16(mask, single);'); + out.push(' mask = vorrq_u8(mask, single);'); } } // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon - out.push(' narrow = vshrn_n_u16(mask, 4);'); + out.push(' narrow = vshrn_n_u16(vreinterpretq_u16_u8(mask), 4);'); out.push(' match_mask = ~vget_lane_u64(vreinterpret_u64_u8(narrow), 0);'); out.push(' match_len = __builtin_ctzll(match_mask) >> 2;'); out.push(' if (match_len != 16) {');