@@ -1549,8 +1549,7 @@ SZ_PUBLIC sz_size_t sz_utf8_count_neon(sz_cptr_t text, sz_size_t length) {
15491549 uint8x16_t continuation_mask_vec = vdupq_n_u8 (0xC0 );
15501550 uint8x16_t continuation_pattern_vec = vdupq_n_u8 (0x80 );
15511551 sz_u8_t const * text_u8 = (sz_u8_t const * )text ;
1552- sz_size_t char_count = 0 ;
1553-
1552+ uint64x2_t char_count_vec = vdupq_n_u64 (0 );
15541553 while (length >= 16 ) {
15551554 text_vec .u8x16 = vld1q_u8 (text_u8 );
15561555 headers_vec .u8x16 = vandq_u8 (text_vec .u8x16 , continuation_mask_vec );
@@ -1560,25 +1559,29 @@ SZ_PUBLIC sz_size_t sz_utf8_count_neon(sz_cptr_t text, sz_size_t length) {
15601559 uint16x8_t sum16 = vpaddlq_u8 (start_flags );
15611560 uint32x4_t sum32 = vpaddlq_u16 (sum16 );
15621561 uint64x2_t sum64 = vpaddlq_u32 (sum32 );
1563- char_count += vgetq_lane_u64 ( sum64 , 0 ) + vgetq_lane_u64 ( sum64 , 1 );
1562+ char_count_vec = vaddq_u64 ( char_count_vec , sum64 );
15641563 text_u8 += 16 ;
15651564 length -= 16 ;
15661565 }
15671566
1567+ sz_size_t char_count = vgetq_lane_u64 (char_count_vec , 0 ) + vgetq_lane_u64 (char_count_vec , 1 );
15681568 if (length ) char_count += sz_utf8_count_serial ((sz_cptr_t )text_u8 , length );
15691569 return char_count ;
15701570}
15711571
15721572SZ_PUBLIC sz_cptr_t sz_utf8_find_nth_neon (sz_cptr_t text , sz_size_t length , sz_size_t n ) {
1573+ // TODO: Implement a NEON-accelerated version of sz_utf8_find_nth in absense of PDEP instruction.
15731574 return sz_utf8_find_nth_serial (text , length , n );
15741575}
15751576
15761577SZ_PUBLIC sz_cptr_t sz_utf8_unpack_chunk_neon ( //
15771578 sz_cptr_t text , sz_size_t length , //
15781579 sz_rune_t * runes , sz_size_t runes_capacity , //
15791580 sz_size_t * runes_unpacked ) {
1581+ // TODO: Implement a fast NEON version once we come up with an AVX-512 design.
15801582 return sz_utf8_unpack_chunk_serial (text , length , runes , runes_capacity , runes_unpacked );
15811583}
1584+
15821585#if defined(__clang__ )
15831586#pragma clang attribute pop
15841587#elif defined(__GNUC__ )
0 commit comments