Skip to content

Commit 88a22e2

Browse files
committed
perf: reduce Indic character searching
1 parent 4b8d1d1 commit 88a22e2

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

.changeset/grumpy-olives-warn.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"unicode-segmenter": patch
3+
---
4+
5+
grapheme: improve runtime perf by ~9% for most common use cases

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
256256

257257
| Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) |
258258
|------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|
259-
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 15,997 | 12,130 | 5,061 | 3,751 |
259+
| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 16,053 | 12,150 | 5,070 | 3,752 |
260260
| `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 |
261261
| `grapheme-splitter` | 10.0.0 | ✖️ | 122,252 | 23,680 | 7,852 | 4,841 |
262262
| `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,285 | 369,560 | 72,218 | 49,416 |
@@ -272,9 +272,9 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb
272272

273273
| Name | Bytecode size | Bytecode size (gzip)* |
274274
|------------------------------|--------------:|----------------------:|
275-
| `unicode-segmenter/grapheme` | 22,061 | 11,539 |
276-
| `graphemer` | 133,974 | 31,719 |
277-
| `grapheme-splitter` | 63,831 | 19,140 |
275+
| `unicode-segmenter/grapheme` | 22,110 | 11,566 |
276+
| `graphemer` | 133,978 | 31,713 |
277+
| `grapheme-splitter` | 63,835 | 19,137 |
278278

279279
* It would be compressed when included as an app asset.
280280

src/grapheme.js

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,15 @@ export function* graphemeSegments(input) {
104104
catBegin = catBefore;
105105
}
106106

107-
// Note: Lazily update `consonant` and `linker` state
108-
// which is a extra overhead only for Hindi text.
109-
if (!consonant && catBefore === 0) {
110-
consonant = isIndicConjunctCosonant(cp);
111-
} else if (catBefore === 3 /* Extend */) {
112-
// Note: \p{InCB=Linker} is a subset of \p{Extend}
113-
linker = isIndicConjunctLinker(cp);
107+
if (cp >= 2325) {
108+
// Note: Lazily update `consonant` and `linker` state
109+
// which is a extra overhead only for Hindi text.
110+
if (!consonant && catBefore === 0) {
111+
consonant = isIndicConjunctCosonant(cp);
112+
} else if (catBefore === 3 /* Extend */) {
113+
// Note: \p{InCB=Linker} is a subset of \p{Extend}
114+
linker = isIndicConjunctLinker(cp);
115+
}
114116
}
115117

116118
if (cursor < len) {
@@ -138,7 +140,7 @@ export function* graphemeSegments(input) {
138140
) {
139141
emoji = true;
140142

141-
} else if (catAfter === 0 /* Any */) {
143+
} else if (catAfter === 0 /* Any */ && cp >= 2325) {
142144
// Note: Put GB9c rule checking here to reduce.
143145
incb = consonant && linker && (consonant = isIndicConjunctCosonant(cp));
144146
// It cannot be both a linker and a consonant.

0 commit comments

Comments
 (0)