@@ -1089,6 +1089,189 @@ SZ_INTERNAL sz_bool_t sz_utf8_is_fully_caseless_(sz_cptr_t str, sz_size_t len) {
10891089 return sz_true_k ;
10901090}
10911091
1092+ /**
1093+ * @brief Hash-free case-insensitive search for needles that fold to exactly 1 rune.
1094+ * Examples: 'a', 'A', 'б', 'Б' (but NOT 'ß' which folds to 'ss' = 2 runes).
1095+ *
1096+ * Single-pass algorithm: parses each source rune, folds it, checks if it produces
1097+ * exactly one rune matching the target. No iterator overhead, no verification needed.
1098+ *
1099+ * @param[in] target_folded The single folded rune to search for.
1100+ */
1101+ SZ_INTERNAL sz_cptr_t sz_utf8_case_insensitive_find_1folded_serial_ ( //
1102+ sz_cptr_t haystack , sz_size_t haystack_length , //
1103+ sz_rune_t needle_folded , sz_size_t * match_length ) {
1104+
1105+ sz_cptr_t const haystack_end = haystack + haystack_length ;
1106+
1107+ // Each haystack rune may fold in up to 3 runes
1108+ sz_rune_t haystack_rune ;
1109+ sz_rune_length_t haystack_rune_length ;
1110+
1111+ // If we simply initialize the runes for zero, the code will break
1112+ // when the needle itself is the NUL character
1113+ sz_rune_t haystack_folded_runes [3 ] = {~needle_folded };
1114+ while (haystack < haystack_end ) {
1115+ sz_rune_parse (haystack , & haystack_rune , & haystack_rune_length );
1116+ sz_unicode_fold_codepoint_ (haystack_rune , haystack_folded_runes );
1117+
1118+ // Perform branchless equality check via arithmetic
1119+ sz_u32_t has_match = //
1120+ (haystack_folded_runes [0 ] == needle_folded ) + //
1121+ (haystack_folded_runes [1 ] == needle_folded ) + //
1122+ (haystack_folded_runes [2 ] == needle_folded );
1123+
1124+ if (has_match ) {
1125+ * match_length = haystack_rune_length ;
1126+ return haystack ;
1127+ }
1128+
1129+ haystack += haystack_rune_length ;
1130+ }
1131+
1132+ * match_length = 0 ;
1133+ return SZ_NULL_CHAR ;
1134+ }
1135+
1136+ /**
1137+ * @brief Hash-free case-insensitive search for needles that fold to exactly 2 runes.
1138+ * Examples: 'ab', 'AB', 'ß' (folds to 'ss'), 'fi' (folds to 'fi').
1139+ *
1140+ * Single-pass sliding window over the folded rune stream. Handles expansions (ß→ss)
1141+ * by buffering folded runes from each source and tracking source boundaries.
1142+ */
1143+ SZ_INTERNAL sz_cptr_t sz_utf8_case_insensitive_find_2folded_serial_ ( //
1144+ sz_cptr_t haystack , sz_size_t haystack_length , //
1145+ sz_rune_t first_needle_folded , sz_rune_t second_needle_folded , sz_size_t * match_length ) {
1146+
1147+ sz_cptr_t const haystack_end = haystack + haystack_length ;
1148+
1149+ // Each haystack rune may fold in up to 3 runes, but we also keep an extra slot
1150+ // for the last folded rune from the previous iterato step
1151+ sz_rune_t haystack_rune ;
1152+ sz_rune_length_t haystack_rune_length , haystack_last_rune_length = sz_utf8_invalid_k ;
1153+
1154+ // If we simply initialize the runes for zero, the code will break
1155+ // when the needle itself is the NUL character
1156+ sz_rune_t haystack_folded_runes [4 ] = {~first_needle_folded };
1157+ while (haystack < haystack_end ) {
1158+ sz_rune_parse (haystack , & haystack_rune , & haystack_rune_length );
1159+
1160+ // Export into the last 3 rune entries of the 4-element array,
1161+ // keeping the first position with historical data untouched
1162+ sz_size_t folded_count = sz_unicode_fold_codepoint_ (haystack_rune , haystack_folded_runes + 1 );
1163+
1164+ // Perform branchless equality check via arithmetic
1165+ sz_u32_t has_match_f0 = first_needle_folded == haystack_folded_runes [0 ];
1166+ sz_u32_t has_match_f1 = first_needle_folded == haystack_folded_runes [1 ];
1167+ sz_u32_t has_match_f2 = first_needle_folded == haystack_folded_runes [2 ];
1168+ sz_u32_t has_match_s1 = second_needle_folded == haystack_folded_runes [1 ];
1169+ sz_u32_t has_match_s2 = second_needle_folded == haystack_folded_runes [2 ];
1170+ sz_u32_t has_match_s3 = second_needle_folded == haystack_folded_runes [3 ];
1171+
1172+ // Branchless match detection: each product is 0 or 1
1173+ sz_u32_t match_at_01 = has_match_f0 * has_match_s1 ;
1174+ sz_u32_t match_at_12 = has_match_f1 * has_match_s2 ;
1175+ sz_u32_t match_at_23 = has_match_f2 * has_match_s3 ;
1176+ sz_u32_t has_match = match_at_01 + match_at_12 + match_at_23 ;
1177+
1178+ if (has_match ) {
1179+ // Only `match_at_01` spans sources; others are within current source
1180+ sz_size_t back_offset = match_at_01 * (sz_size_t )haystack_last_rune_length ;
1181+ * match_length = (sz_size_t )haystack_rune_length + back_offset ;
1182+ return haystack - back_offset ;
1183+ }
1184+
1185+ haystack_folded_runes [0 ] = haystack_folded_runes [folded_count ];
1186+ haystack_last_rune_length = haystack_rune_length ;
1187+ haystack += haystack_rune_length ;
1188+ }
1189+
1190+ * match_length = 0 ;
1191+ return SZ_NULL_CHAR ;
1192+ }
1193+
1194+ /**
1195+ * @brief Hash-free case-insensitive search for needles that fold to exactly 3 runes.
1196+ * Examples: 'abc', 'ABC', 'aß' (folds to 'ass'), 'fia' (folds to 'fia').
1197+ *
1198+ * Single-pass sliding window of 3 folded runes over the haystack's folded stream.
1199+ * Handles expansions (ß→ss) by buffering and tracking source boundaries.
1200+ */
1201+ SZ_INTERNAL sz_cptr_t sz_utf8_case_insensitive_find_3folded_serial_ ( //
1202+ sz_cptr_t haystack , sz_size_t haystack_length , //
1203+ sz_rune_t first_needle_folded , sz_rune_t second_needle_folded , sz_rune_t third_needle_folded ,
1204+ sz_size_t * match_length ) {
1205+
1206+ sz_cptr_t const haystack_end = haystack + haystack_length ;
1207+
1208+ // Each haystack rune may fold in up to 3 runes, but we also keep an extra 2 slots
1209+ // for the last folded rune from the previous iteration step, and the one before that
1210+ sz_rune_t haystack_rune ;
1211+ sz_rune_length_t haystack_rune_length , haystack_last_rune_length = sz_utf8_invalid_k ,
1212+ haystack_preceding_rune_length = sz_utf8_invalid_k ;
1213+
1214+ // Initialize historical slots with sentinels that can never match their respective needle positions
1215+ // This prevents false matches on first iterations when history is not yet populated
1216+ sz_rune_t haystack_folded_runes [5 ] = {~first_needle_folded , ~second_needle_folded , 0 , 0 , 0 };
1217+ while (haystack < haystack_end ) {
1218+ sz_rune_parse (haystack , & haystack_rune , & haystack_rune_length );
1219+
1220+ // Export into the last 3 rune entries of the 5-element array,
1221+ // keeping the first two positions with historical data untouched
1222+ sz_size_t folded_count = sz_unicode_fold_codepoint_ (haystack_rune , haystack_folded_runes + 2 );
1223+
1224+ // Perform branchless equality check via arithmetic
1225+ sz_u32_t has_match_f0 = first_needle_folded == haystack_folded_runes [0 ];
1226+ sz_u32_t has_match_f1 = first_needle_folded == haystack_folded_runes [1 ];
1227+ sz_u32_t has_match_f2 = first_needle_folded == haystack_folded_runes [2 ];
1228+ sz_u32_t has_match_s1 = second_needle_folded == haystack_folded_runes [1 ];
1229+ sz_u32_t has_match_s2 = second_needle_folded == haystack_folded_runes [2 ];
1230+ sz_u32_t has_match_s3 = second_needle_folded == haystack_folded_runes [3 ];
1231+ sz_u32_t has_match_t2 = third_needle_folded == haystack_folded_runes [2 ];
1232+ sz_u32_t has_match_t3 = third_needle_folded == haystack_folded_runes [3 ];
1233+ sz_u32_t has_match_t4 = third_needle_folded == haystack_folded_runes [4 ];
1234+
1235+ // Branchless match detection: each product is 0 or 1
1236+ sz_u32_t match_at_012 = has_match_f0 * has_match_s1 * has_match_t2 ;
1237+ sz_u32_t match_at_123 = has_match_f1 * has_match_s2 * has_match_t3 ;
1238+ sz_u32_t match_at_234 = has_match_f2 * has_match_s3 * has_match_t4 ;
1239+ sz_u32_t has_match = match_at_012 + match_at_123 + match_at_234 ;
1240+
1241+ if (has_match ) {
1242+ // Compute back offset based on which position matched:
1243+ // - `match_at_012`: need preceding + last
1244+ // - `match_at_123`: need last
1245+ // - `match_at_234`: stay at current
1246+ sz_size_t back_for_last = (match_at_012 + match_at_123 ) * (sz_size_t )haystack_last_rune_length ;
1247+ sz_size_t back_for_preceding = match_at_012 * (sz_size_t )haystack_preceding_rune_length ;
1248+ sz_size_t back_offset = back_for_last + back_for_preceding ;
1249+ * match_length = (sz_size_t )haystack_rune_length + back_offset ;
1250+ return haystack - back_offset ;
1251+ }
1252+
1253+ // Historical context update here is a bit trickier than in previous spaces
1254+ if (folded_count >= 2 ) {
1255+ haystack_folded_runes [0 ] = haystack_folded_runes [folded_count ];
1256+ haystack_folded_runes [1 ] = haystack_folded_runes [folded_count + 1 ];
1257+ haystack_preceding_rune_length = sz_utf8_invalid_k ;
1258+ haystack_last_rune_length = haystack_rune_length ;
1259+ }
1260+ else {
1261+ sz_assert_ (folded_count == 1 );
1262+ haystack_folded_runes [0 ] = haystack_folded_runes [1 ];
1263+ haystack_folded_runes [1 ] = haystack_folded_runes [2 ];
1264+ haystack_preceding_rune_length = haystack_last_rune_length ;
1265+ haystack_last_rune_length = haystack_rune_length ;
1266+ }
1267+
1268+ haystack += haystack_rune_length ;
1269+ }
1270+
1271+ * match_length = 0 ;
1272+ return SZ_NULL_CHAR ;
1273+ }
1274+
10921275/**
10931276 * @brief Rabin-Karp style case-insensitive UTF-8 substring search using a ring buffer.
10941277 * Uses a rolling hash over casefolded runes with O(1) updates per position.
@@ -1113,6 +1296,35 @@ SZ_PUBLIC sz_cptr_t sz_utf8_case_insensitive_find_serial( //
11131296 return SZ_NULL_CHAR ;
11141297 }
11151298
1299+ // For short needles (up to 12 bytes which can fold to at most ~6 runes), try hash-free search.
1300+ // We fold the needle first and dispatch based on the folded rune count.
1301+ // This avoids ring buffer setup, hash multiplier computation, and rolling hash updates.
1302+ if (needle_length <= 12 ) {
1303+ sz_rune_t folded [4 ]; // 4th slot accessed before loop exit
1304+ sz_size_t folded_count = 0 ;
1305+ sz_utf8_folded_iter_t iter ;
1306+ sz_utf8_folded_iter_init_ (& iter , needle , needle_length );
1307+ sz_rune_t rune ;
1308+ while (folded_count < 4 && sz_utf8_folded_iter_next_ (& iter , & rune )) folded [folded_count ++ ] = rune ;
1309+
1310+ // Dispatch based on folded rune count
1311+ switch (folded_count ) {
1312+ case 1 :
1313+ return sz_utf8_case_insensitive_find_1folded_serial_ ( //
1314+ haystack , haystack_length , //
1315+ folded [0 ], match_length );
1316+ case 2 :
1317+ return sz_utf8_case_insensitive_find_2folded_serial_ ( //
1318+ haystack , haystack_length , //
1319+ folded [0 ], folded [1 ], match_length );
1320+ case 3 :
1321+ return sz_utf8_case_insensitive_find_3folded_serial_ ( //
1322+ haystack , haystack_length , //
1323+ folded [0 ], folded [1 ], folded [2 ], match_length );
1324+ default : break ; // 4+ folded runes: fall through to Rabin-Karp
1325+ }
1326+ }
1327+
11161328 sz_size_t const ring_capacity = 32 ;
11171329 sz_rune_t needle_runes [32 ];
11181330 sz_size_t needle_prefix_count = 0 , needle_total_count = 0 ;
@@ -4748,18 +4960,18 @@ SZ_PUBLIC sz_cptr_t sz_utf8_case_insensitive_find_ice( //
47484960 // 3. Fall back to serial if no path meets its threshold
47494961
47504962 // Priority 1: ASCII path (broadest haystack compatibility, handles all byte values)
4751- if (analysis .ascii .length >= 3 )
4963+ if (analysis .ascii .length >= 1 )
47524964 return sz_utf8_case_insensitive_find_ascii_ice_ (haystack , haystack_length , needle , needle_length ,
47534965 needle + analysis .ascii .start , analysis .ascii .length ,
47544966 matched_length );
47554967
47564968 // Priority 2: Latin1 path (includes Latin-1 Supplement: ß, accented letters, etc.)
4757- if (analysis .latin1 .length >= 4 )
4969+ if (analysis .latin1 .length >= 2 ) // Smallest non-ASCII Latin1 codepoint is 2 bytes
47584970 return sz_utf8_case_insensitive_find_latin1_ice_ (haystack , haystack_length , needle , needle_length ,
47594971 & analysis .latin1 , matched_length );
47604972
47614973 // Priority 3: Vietnamese path (includes Latin1 + Latin Extended Additional)
4762- if (analysis .vietnamese .length >= 6 )
4974+ if (analysis .vietnamese .length >= 3 ) // One Vietnamese codepoints are 3 bytes in size
47634975 return sz_utf8_case_insensitive_find_vietnamese_ice_ (haystack , haystack_length , needle , needle_length ,
47644976 & analysis .vietnamese , matched_length );
47654977
@@ -4773,13 +4985,13 @@ SZ_PUBLIC sz_cptr_t sz_utf8_case_insensitive_find_ice( //
47734985 if (analysis .armenian .length > best_script_len ) best_script_len = analysis .armenian .length ;
47744986
47754987 // Select among script-specific paths based on longest window
4776- if (analysis .cyrillic .length == best_script_len && analysis .cyrillic .length >= 4 )
4988+ if (analysis .cyrillic .length == best_script_len && analysis .cyrillic .length >= 2 )
47774989 return sz_utf8_case_insensitive_find_cyrillic_ice_ (haystack , haystack_length , needle , needle_length ,
47784990 & analysis .cyrillic , matched_length );
4779- if (analysis .greek .length == best_script_len && analysis .greek .length >= 4 )
4991+ if (analysis .greek .length == best_script_len && analysis .greek .length >= 2 )
47804992 return sz_utf8_case_insensitive_find_greek_ice_ (haystack , haystack_length , needle , needle_length ,
47814993 & analysis .greek , matched_length );
4782- if (analysis .armenian .length == best_script_len && analysis .armenian .length >= 4 )
4994+ if (analysis .armenian .length == best_script_len && analysis .armenian .length >= 2 )
47834995 return sz_utf8_case_insensitive_find_armenian_ice_ (haystack , haystack_length , needle , needle_length ,
47844996 & analysis .armenian , matched_length );
47854997
0 commit comments