Skip to content

Commit bda1321

Browse files
committed
Fix: Folding Greek final sigma in AVX-512
1 parent 39516b3 commit bda1321

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

include/stringzilla/utf8_case.h

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5620,25 +5620,17 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56205620

56215621
// 1. CE ranges (Uppercase -> Lowercase)
56225622
// -------------------------------------
5623-
5624-
// Group 1: CE 91-9F ('Α'-'Ο') -> CE B1-BF ('α'-'ο') (Add 0x20)
5623+
// Basic Greek Upper (Range 1): CE 91-9F ('Α'-'Ο') -> CE B1-BF ('α'-'ο') (Add 0x20)
56255624
__mmask64 is_basic1 = _mm512_mask_cmpge_epu8_mask(is_after_ce_mask, result_zmm, x_91_zmm);
56265625
is_basic1 &= _mm512_mask_cmple_epu8_mask(is_after_ce_mask, result_zmm, x_9f_zmm);
5627-
result_zmm = _mm512_mask_add_epi8(result_zmm, is_basic1, result_zmm, x_20_zmm);
56285626

5629-
// Group 2: CE A0-A9 ('Π'-'Ω') -> CF 80-89 ('π'-'ω') (Change lead CE->CF, subtract 0x20 from 2nd)
5627+
// Basic Greek Upper (Range 2): CE A0-A9 ('Π'-'Ω') -> CF 80-89 ('π'-'ω') (Change lead CE->CF, subtract 0x20 from
5628+
// 2nd)
56305629
__mmask64 is_basic2 = _mm512_mask_cmpge_epu8_mask(is_after_ce_mask, result_zmm, x_a0_zmm);
56315630
is_basic2 &= _mm512_mask_cmple_epu8_mask(is_after_ce_mask, result_zmm, x_a9_zmm);
5632-
result_zmm = _mm512_mask_mov_epi8(result_zmm, is_basic2 >> 1, x_cf_zmm); // Lead CE -> CF
5633-
result_zmm = _mm512_mask_add_epi8(result_zmm, is_basic2, result_zmm, x_e0_zmm); // 2nd -0x20 (using add E0)
56345631

5635-
// Group 3: CE 86-8F (Accented 'Ά'-'Ώ') -> Lowercase
5632+
// Accented Greek Upper: CE 86-8F ('Ά'-'Ώ') -> Lowercase
56365633
// Most map nicely: CE 8x -> CE Ax (+20) or CE 8x -> CF 8x (Lead change, 2nd same)
5637-
// - 'Ά' (86) -> 'ά' (AC) -- +0x26
5638-
// - 'Έ' (88), 'Ή' (89), 'Ί' (8A) -> 'έ' (AD), 'ή' (AE), 'ί' (AF) -- +0x25
5639-
// - 'Ό' (8C) -> 'ό' (CF 8C) -- Lead change!
5640-
// - 'Ύ' (8E), 'Ώ' (8F) -> 'ύ' (CF 8D), 'ώ' (CF 8E) -- Lead change + 2nd change!
5641-
56425634
__mmask64 is_accented = _mm512_mask_cmpge_epu8_mask(is_after_ce_mask, result_zmm, x_86_zmm);
56435635
is_accented &= _mm512_mask_cmple_epu8_mask(is_after_ce_mask, result_zmm, x_8f_zmm);
56445636

@@ -5652,7 +5644,25 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56525644

56535645
__mmask64 is_8e_8f = is_accented & _mm512_cmpge_epu8_mask(result_zmm, _mm512_set1_epi8((char)0x8E));
56545646

5655-
// Apply transformations using masked operations (branchless)
5647+
// Dialytika Greek Upper: CE AA-AB ('Ϊ', 'Ϋ') -> CF 8A-8B (Lead CE->CF, 2nd -0x20)
5648+
__mmask64 is_dialytika = _mm512_mask_cmpge_epu8_mask(is_after_ce_mask, result_zmm, x_aa_zmm);
5649+
is_dialytika &= _mm512_mask_cmple_epu8_mask(is_after_ce_mask, result_zmm, x_ab_zmm);
5650+
5651+
// 2. CF ranges (Final Sigma)
5652+
// --------------------------
5653+
// 'ς' (CF 82) -> 'σ' (CF 83)
5654+
__mmask64 is_final_sigma = _mm512_mask_cmpeq_epi8_mask(is_after_cf_mask, result_zmm, x_82_zmm);
5655+
5656+
// Apply transformations using masked operations
5657+
// ---------------------------------------------
5658+
// Apply Basic Greek Upper (Range 1)
5659+
result_zmm = _mm512_mask_add_epi8(result_zmm, is_basic1, result_zmm, x_20_zmm);
5660+
5661+
// Apply Basic Greek Upper (Range 2)
5662+
result_zmm = _mm512_mask_mov_epi8(result_zmm, is_basic2 >> 1, x_cf_zmm); // Lead CE -> CF
5663+
result_zmm = _mm512_mask_add_epi8(result_zmm, is_basic2, result_zmm, x_e0_zmm); // 2nd -0x20 (using add E0)
5664+
5665+
// Apply Accented Greek Upper
56565666
// 1. Additions for Same-Block Mappings (CE -> CE)
56575667
// 'Ά' -> +0x26
56585668
result_zmm = _mm512_mask_add_epi8(result_zmm, is_86, result_zmm, _mm512_set1_epi8(0x26));
@@ -5668,16 +5678,11 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56685678
// 'Ύ', 'Ώ' -> -1
56695679
result_zmm = _mm512_mask_sub_epi8(result_zmm, is_8e_8f, result_zmm, x_01_zmm);
56705680

5671-
// Group 4: CE AA-AB (Dialytika 'Ϊ', 'Ϋ') -> CF 8A-8B (Lead CE->CF, 2nd -0x20)
5672-
__mmask64 is_dialytika = _mm512_mask_cmpge_epu8_mask(is_after_ce_mask, result_zmm, x_aa_zmm);
5673-
is_dialytika &= _mm512_mask_cmple_epu8_mask(is_after_ce_mask, result_zmm, x_ab_zmm);
5681+
// Apply Dialytika Greek Upper
56745682
result_zmm = _mm512_mask_mov_epi8(result_zmm, is_dialytika >> 1, x_cf_zmm);
56755683
result_zmm = _mm512_mask_add_epi8(result_zmm, is_dialytika, result_zmm, x_e0_zmm); // -0x20
56765684

5677-
// 2. CF ranges (Final Sigma)
5678-
// --------------------------
5679-
// 'ς' (CF 82) -> 'σ' (CF 83)
5680-
__mmask64 is_final_sigma = _mm512_mask_cmpeq_epi8_mask(is_after_cf_mask, result_zmm, x_82_zmm);
5685+
// Apply Final Sigma
56815686
result_zmm = _mm512_mask_mov_epi8(result_zmm, is_final_sigma, x_83_zmm);
56825687

56835688
return result_zmm;

0 commit comments

Comments
 (0)