@@ -108,9 +108,12 @@ export function* graphemeSegments(input) {
108
108
catBegin = catBefore ;
109
109
}
110
110
111
+ // Note: Lazily update `consonant` and `linker` state
112
+ // which is a extra overhead only for Hindi text.
111
113
if ( ! consonant && catBefore === 0 ) {
112
114
consonant = isIndicConjunctCosonant ( cp ) ;
113
- } else if ( catBefore === 3 ) {
115
+ } else if ( catBefore === 3 /* Extend */ ) {
116
+ // Note: \p{InCB=Linker} is a subset of \p{Extend}
114
117
linker = isIndicConjunctLinker ( cp ) ;
115
118
}
116
119
@@ -130,7 +133,7 @@ export function* graphemeSegments(input) {
130
133
return ;
131
134
}
132
135
133
- if ( catBefore === 10 /* Regional_Indicator*/ ) {
136
+ if ( catBefore === 10 /* Regional_Indicator */ ) {
134
137
risCount += 1 ;
135
138
} else {
136
139
risCount = 0 ;
@@ -140,9 +143,11 @@ export function* graphemeSegments(input) {
140
143
) {
141
144
emoji = true ;
142
145
143
- // Put GB9c rule checking here to reduce.
146
+ // Note: Put GB9c rule checking here to reduce.
144
147
} else if ( catAfter === 0 /* Any */ ) {
145
- incb = consonant && linker && isIndicConjunctCosonant ( cp ) ;
148
+ incb = consonant && linker && ( consonant = isIndicConjunctCosonant ( cp ) ) ;
149
+ // It cannot be both a linker and a consonant.
150
+ linker = linker && ! consonant ;
146
151
}
147
152
}
148
153
@@ -160,8 +165,7 @@ export function* graphemeSegments(input) {
160
165
index = cursor ;
161
166
segment = '' ;
162
167
emoji = false ;
163
- consonant = false ;
164
- linker = false ;
168
+ incb = false ;
165
169
catBegin = catAfter ;
166
170
}
167
171
}
@@ -227,7 +231,14 @@ function isIndicConjunctCosonant(cp) {
227
231
* @return {boolean }
228
232
*/
229
233
function isIndicConjunctLinker ( cp ) {
230
- return ( cp === 0x094D || cp === 0x09CD || cp === 0x0ACD || cp === 0x0B4D || cp === 0x0C4D || cp === 0x0D4D ) ;
234
+ return (
235
+ cp === 2381 /* 0x094D */ ||
236
+ cp === 2509 /* 0x09CD */ ||
237
+ cp === 2765 /* 0x0ACD */ ||
238
+ cp === 2893 /* 0x0B4D */ ||
239
+ cp === 3149 /* 0x0C4D */ ||
240
+ cp === 3405 /* 0x0D4D */
241
+ ) ;
231
242
}
232
243
233
244
/**
@@ -246,12 +257,13 @@ function isBoundary(catBefore, catAfter, risCount, emoji, incb) {
246
257
return false ;
247
258
}
248
259
249
- if (
250
- // GB4
251
- ( catBefore === 1 || catBefore === 2 || catBefore === 6 ) ||
252
- // GB5
253
- ( catAfter === 1 || catAfter === 2 || catAfter === 6 )
254
- ) {
260
+ // GB4
261
+ if ( catBefore === 1 || catBefore === 2 || catBefore === 6 ) {
262
+ return true ;
263
+ }
264
+
265
+ // GB5
266
+ if ( catAfter === 1 || catAfter === 2 || catAfter === 6 ) {
255
267
return true ;
256
268
}
257
269
@@ -279,8 +291,13 @@ function isBoundary(catBefore, catAfter, risCount, emoji, incb) {
279
291
return false ;
280
292
}
281
293
282
- // GB9. GB9a
283
- if ( catAfter === 3 || catAfter === 11 || catAfter === 14 ) {
294
+ // GB9
295
+ if ( catAfter === 3 || catAfter === 14 ) {
296
+ return false ;
297
+ }
298
+
299
+ // GB9a
300
+ if ( catAfter === 11 ) {
284
301
return false ;
285
302
}
286
303
0 commit comments