@@ -5620,25 +5620,17 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56205620
56215621 // 1. CE ranges (Uppercase -> Lowercase)
56225622 // -------------------------------------
5623-
5624- // Group 1: CE 91-9F ('Α'-'Ο') -> CE B1-BF ('α'-'ο') (Add 0x20)
5623+ // Basic Greek Upper (Range 1): CE 91-9F ('Α'-'Ο') -> CE B1-BF ('α'-'ο') (Add 0x20)
56255624 __mmask64 is_basic1 = _mm512_mask_cmpge_epu8_mask (is_after_ce_mask , result_zmm , x_91_zmm );
56265625 is_basic1 &= _mm512_mask_cmple_epu8_mask (is_after_ce_mask , result_zmm , x_9f_zmm );
5627- result_zmm = _mm512_mask_add_epi8 (result_zmm , is_basic1 , result_zmm , x_20_zmm );
56285626
5629- // Group 2: CE A0-A9 ('Π'-'Ω') -> CF 80-89 ('π'-'ω') (Change lead CE->CF, subtract 0x20 from 2nd)
5627+ // Basic Greek Upper (Range 2): CE A0-A9 ('Π'-'Ω') -> CF 80-89 ('π'-'ω') (Change lead CE->CF, subtract 0x20 from
5628+ // 2nd)
56305629 __mmask64 is_basic2 = _mm512_mask_cmpge_epu8_mask (is_after_ce_mask , result_zmm , x_a0_zmm );
56315630 is_basic2 &= _mm512_mask_cmple_epu8_mask (is_after_ce_mask , result_zmm , x_a9_zmm );
5632- result_zmm = _mm512_mask_mov_epi8 (result_zmm , is_basic2 >> 1 , x_cf_zmm ); // Lead CE -> CF
5633- result_zmm = _mm512_mask_add_epi8 (result_zmm , is_basic2 , result_zmm , x_e0_zmm ); // 2nd -0x20 (using add E0)
56345631
5635- // Group 3 : CE 86-8F (Accented 'Ά'-'Ώ') -> Lowercase
5632+ // Accented Greek Upper : CE 86-8F ('Ά'-'Ώ') -> Lowercase
56365633 // Most map nicely: CE 8x -> CE Ax (+20) or CE 8x -> CF 8x (Lead change, 2nd same)
5637- // - 'Ά' (86) -> 'ά' (AC) -- +0x26
5638- // - 'Έ' (88), 'Ή' (89), 'Ί' (8A) -> 'έ' (AD), 'ή' (AE), 'ί' (AF) -- +0x25
5639- // - 'Ό' (8C) -> 'ό' (CF 8C) -- Lead change!
5640- // - 'Ύ' (8E), 'Ώ' (8F) -> 'ύ' (CF 8D), 'ώ' (CF 8E) -- Lead change + 2nd change!
5641-
56425634 __mmask64 is_accented = _mm512_mask_cmpge_epu8_mask (is_after_ce_mask , result_zmm , x_86_zmm );
56435635 is_accented &= _mm512_mask_cmple_epu8_mask (is_after_ce_mask , result_zmm , x_8f_zmm );
56445636
@@ -5652,7 +5644,25 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56525644
56535645 __mmask64 is_8e_8f = is_accented & _mm512_cmpge_epu8_mask (result_zmm , _mm512_set1_epi8 ((char )0x8E ));
56545646
5655- // Apply transformations using masked operations (branchless)
5647+ // Dialytika Greek Upper: CE AA-AB ('Ϊ', 'Ϋ') -> CF 8A-8B (Lead CE->CF, 2nd -0x20)
5648+ __mmask64 is_dialytika = _mm512_mask_cmpge_epu8_mask (is_after_ce_mask , result_zmm , x_aa_zmm );
5649+ is_dialytika &= _mm512_mask_cmple_epu8_mask (is_after_ce_mask , result_zmm , x_ab_zmm );
5650+
5651+ // 2. CF ranges (Final Sigma)
5652+ // --------------------------
5653+ // 'ς' (CF 82) -> 'σ' (CF 83)
5654+ __mmask64 is_final_sigma = _mm512_mask_cmpeq_epi8_mask (is_after_cf_mask , result_zmm , x_82_zmm );
5655+
5656+ // Apply transformations using masked operations
5657+ // ---------------------------------------------
5658+ // Apply Basic Greek Upper (Range 1)
5659+ result_zmm = _mm512_mask_add_epi8 (result_zmm , is_basic1 , result_zmm , x_20_zmm );
5660+
5661+ // Apply Basic Greek Upper (Range 2)
5662+ result_zmm = _mm512_mask_mov_epi8 (result_zmm , is_basic2 >> 1 , x_cf_zmm ); // Lead CE -> CF
5663+ result_zmm = _mm512_mask_add_epi8 (result_zmm , is_basic2 , result_zmm , x_e0_zmm ); // 2nd -0x20 (using add E0)
5664+
5665+ // Apply Accented Greek Upper
56565666 // 1. Additions for Same-Block Mappings (CE -> CE)
56575667 // 'Ά' -> +0x26
56585668 result_zmm = _mm512_mask_add_epi8 (result_zmm , is_86 , result_zmm , _mm512_set1_epi8 (0x26 ));
@@ -5668,16 +5678,11 @@ SZ_INTERNAL __m512i sz_utf8_case_insensitive_find_ice_greek_fold_zmm_(__m512i te
56685678 // 'Ύ', 'Ώ' -> -1
56695679 result_zmm = _mm512_mask_sub_epi8 (result_zmm , is_8e_8f , result_zmm , x_01_zmm );
56705680
5671- // Group 4: CE AA-AB (Dialytika 'Ϊ', 'Ϋ') -> CF 8A-8B (Lead CE->CF, 2nd -0x20)
5672- __mmask64 is_dialytika = _mm512_mask_cmpge_epu8_mask (is_after_ce_mask , result_zmm , x_aa_zmm );
5673- is_dialytika &= _mm512_mask_cmple_epu8_mask (is_after_ce_mask , result_zmm , x_ab_zmm );
5681+ // Apply Dialytika Greek Upper
56745682 result_zmm = _mm512_mask_mov_epi8 (result_zmm , is_dialytika >> 1 , x_cf_zmm );
56755683 result_zmm = _mm512_mask_add_epi8 (result_zmm , is_dialytika , result_zmm , x_e0_zmm ); // -0x20
56765684
5677- // 2. CF ranges (Final Sigma)
5678- // --------------------------
5679- // 'ς' (CF 82) -> 'σ' (CF 83)
5680- __mmask64 is_final_sigma = _mm512_mask_cmpeq_epi8_mask (is_after_cf_mask , result_zmm , x_82_zmm );
5685+ // Apply Final Sigma
56815686 result_zmm = _mm512_mask_mov_epi8 (result_zmm , is_final_sigma , x_83_zmm );
56825687
56835688 return result_zmm ;
0 commit comments