Skip to content

Commit 15bcc43

Browse files
committed
Add: UTF-8 case-folding placeholders
1 parent 65b652f commit 15bcc43

File tree

3 files changed

+628
-891
lines changed

3 files changed

+628
-891
lines changed

c/stringzilla.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ typedef struct sz_implementations_t {
6363
sz_utf8_unpack_chunk_t utf8_unpack_chunk;
6464
sz_utf8_find_boundary_t utf8_find_newline;
6565
sz_utf8_find_boundary_t utf8_find_whitespace;
66+
sz_utf8_case_fold_t utf8_case_fold;
6667

6768
sz_sequence_argsort_t sequence_argsort;
6869
sz_sequence_intersect_t sequence_intersect;
@@ -110,6 +111,7 @@ static void sz_dispatch_table_update_implementation_(sz_capability_t caps) {
110111
impl->utf8_unpack_chunk = sz_utf8_unpack_chunk_serial;
111112
impl->utf8_find_newline = sz_utf8_find_newline_serial;
112113
impl->utf8_find_whitespace = sz_utf8_find_whitespace_serial;
114+
impl->utf8_case_fold = sz_utf8_case_fold_serial;
113115

114116
impl->sequence_argsort = sz_sequence_argsort_serial;
115117
impl->sequence_intersect = sz_sequence_intersect_serial;
@@ -165,6 +167,7 @@ static void sz_dispatch_table_update_implementation_(sz_capability_t caps) {
165167
impl->utf8_unpack_chunk = sz_utf8_unpack_chunk_haswell;
166168
impl->utf8_find_newline = sz_utf8_find_newline_haswell;
167169
impl->utf8_find_whitespace = sz_utf8_find_whitespace_haswell;
170+
impl->utf8_case_fold = sz_utf8_case_fold_haswell;
168171
}
169172
#endif
170173

@@ -204,6 +207,7 @@ static void sz_dispatch_table_update_implementation_(sz_capability_t caps) {
204207
impl->utf8_unpack_chunk = sz_utf8_unpack_chunk_ice;
205208
impl->utf8_find_newline = sz_utf8_find_newline_ice;
206209
impl->utf8_find_whitespace = sz_utf8_find_whitespace_ice;
210+
impl->utf8_case_fold = sz_utf8_case_fold_ice;
207211

208212
impl->lookup = sz_lookup_ice;
209213

@@ -245,6 +249,7 @@ static void sz_dispatch_table_update_implementation_(sz_capability_t caps) {
245249
impl->utf8_unpack_chunk = sz_utf8_unpack_chunk_neon;
246250
impl->utf8_find_newline = sz_utf8_find_newline_neon;
247251
impl->utf8_find_whitespace = sz_utf8_find_whitespace_neon;
252+
impl->utf8_case_fold = sz_utf8_case_fold_neon;
248253
}
249254
#endif
250255

@@ -485,6 +490,12 @@ SZ_DYNAMIC sz_cptr_t sz_utf8_find_whitespace(sz_cptr_t text, sz_size_t length, s
485490
return sz_dispatch_table.utf8_find_whitespace(text, length, matched_length);
486491
}
487492

493+
SZ_DYNAMIC sz_status_t sz_utf8_case_fold(sz_cptr_t source, sz_size_t source_length, sz_ptr_t destination,
494+
sz_size_t destination_capacity, sz_size_t *destination_length) {
495+
return sz_dispatch_table.utf8_case_fold(source, source_length, destination, destination_capacity,
496+
destination_length);
497+
}
498+
488499
// Provide overrides for the libc mem* functions
489500
#if SZ_OVERRIDE_LIBC && !defined(__CYGWIN__)
490501

include/stringzilla/types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,9 @@ typedef sz_cptr_t (*sz_utf8_find_nth_t)(sz_cptr_t, sz_size_t, sz_size_t);
780780
/** @brief Signature of `sz_utf8_unpack_chunk`. */
781781
typedef sz_cptr_t (*sz_utf8_unpack_chunk_t)(sz_cptr_t, sz_size_t, sz_rune_t *, sz_size_t, sz_size_t *);
782782

783+
/** @brief Signature of `sz_utf8_case_fold`. */
784+
typedef sz_status_t (*sz_utf8_case_fold_t)(sz_cptr_t, sz_size_t, sz_ptr_t, sz_size_t, sz_size_t *);
785+
783786
/** @brief Signature of `sz_fill_random`. */
784787
typedef void (*sz_fill_random_t)(sz_ptr_t, sz_size_t, sz_u64_t);
785788

0 commit comments

Comments
 (0)