@@ -24,9 +24,22 @@ interface Symbols {
24
24
25
25
const CURRENCY_SIGN_REGEX = new RegExp ( '^.*\\(.*\\).*$' ) ;
26
26
const NUMBERING_SYSTEMS = [ 'latn' , 'arab' , 'hanidec' ] ;
27
- // eslint-disable-next-line no-irregular-whitespace
28
- const GROUPING_SYMBOLS_REGEX = / [ , ٬ . ] / gu;
29
- const NUMERALS_REGEX = / [ 0 1 2 3 4 5 6 7 8 9 ] | [ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ] | [ 〇 一 二 三 四 五 六 七 八 九 ] / gu;
27
+ const MINUS_SIGN_SYMBOLS = '\u002D\u2212' ;
28
+ const MINUS_SIGN_REGEX = new RegExp ( `[${ MINUS_SIGN_SYMBOLS } ]` , 'g' ) ;
29
+ const AMBIGUOUS_SYMBOLS = ',.' ;
30
+ const ARABIC_THOUSANDS_SEPARATOR = '\u066C' ;
31
+ const ARABIC_DECIMAL_SEPARATOR = '\u066B' ;
32
+ const LRM_RLM_REGEX = / [ \u200E \u200F ] / g;
33
+ const GROUPING_SYMBOLS = `${ AMBIGUOUS_SYMBOLS } \u00A0\u202F${ ARABIC_THOUSANDS_SEPARATOR } \u2019` ;
34
+ const GROUPING_SYMBOLS_REGEX = new RegExp ( `[${ GROUPING_SYMBOLS } ]` , 'g' ) ;
35
+ const DECIMAL_SYMBOLS = `${ AMBIGUOUS_SYMBOLS } ${ ARABIC_DECIMAL_SEPARATOR } ` ;
36
+ const DECIMAL_SYMBOLS_REGEX = new RegExp ( `[${ DECIMAL_SYMBOLS } ]` , 'g' ) ;
37
+ const NUMERALS_LATN = '0123456789' ;
38
+ const NUMERALS_ARAB = '٠١٢٣٤٥٦٧٨٩' ;
39
+ const NUMERALS_ARAB_REGEX = new RegExp ( `[${ NUMERALS_ARAB } ]` , 'gu' ) ;
40
+ const NUMERALS_HANIDEC = '〇一二三四五六七八九' ;
41
+ const NUMERALS_HANIDEC_REGEX = new RegExp ( `[${ NUMERALS_HANIDEC } ]` , 'gu' ) ;
42
+ const NUMERALS_REGEX = new RegExp ( `[${ NUMERALS_LATN } ]|[${ NUMERALS_ARAB } ]|[${ NUMERALS_HANIDEC } ]` , 'gu' ) ;
30
43
31
44
/**
32
45
* A NumberParser can be used to perform locale-aware parsing of numbers from Unicode strings,
@@ -158,8 +171,12 @@ class NumberParserImpl {
158
171
}
159
172
}
160
173
174
+ // Remove LRM and RLM characters, which are used in some locales to control text direction.
175
+ fullySanitizedValue = fullySanitizedValue ?. replace ( LRM_RLM_REGEX , '' ) ;
176
+
161
177
let newValue = fullySanitizedValue ? + fullySanitizedValue : NaN ;
162
178
if ( isNaN ( newValue ) ) {
179
+ // console.log('Failed to parse number:', {value, fullySanitizedValue, locale: this.locale, options: this.options, symbols: this.symbols});
163
180
return NaN ;
164
181
}
165
182
@@ -186,24 +203,73 @@ class NumberParserImpl {
186
203
let sanitizedValue = value . trim ( ) ;
187
204
188
205
let numeralMatches = sanitizedValue . match ( NUMERALS_REGEX ) ;
189
- if ( numeralMatches ) {
190
- let beforeAbs = sanitizedValue . slice ( 0 , sanitizedValue . indexOf ( numeralMatches [ 0 ] ) ) ;
191
- let afterAbs = sanitizedValue . slice ( sanitizedValue . lastIndexOf ( numeralMatches [ numeralMatches . length - 1 ] ) + 1 ) ;
192
- let abs = sanitizedValue . slice ( sanitizedValue . indexOf ( numeralMatches [ 0 ] ) , sanitizedValue . lastIndexOf ( numeralMatches [ numeralMatches . length - 1 ] ) + 1 ) ;
206
+ if ( numeralMatches && this . options . numberingSystem !== 'arab' ) {
207
+ let firstNumeralMatch = numeralMatches [ 0 ] ;
208
+ let lastNumeralMatch = numeralMatches [ numeralMatches . length - 1 ] ;
209
+ let beforeAbs = sanitizedValue . slice ( 0 , sanitizedValue . indexOf ( firstNumeralMatch ) ) ;
210
+ let afterAbs = sanitizedValue . slice ( sanitizedValue . lastIndexOf ( lastNumeralMatch ) + 1 ) ;
211
+ let abs = sanitizedValue . slice ( sanitizedValue . indexOf ( firstNumeralMatch ) , sanitizedValue . lastIndexOf ( lastNumeralMatch ) + 1 ) ;
212
+
193
213
// Replace group and decimal symbols with the current locale's symbols
214
+ let decimalSymbolMatch = abs . match ( DECIMAL_SYMBOLS_REGEX ) ;
194
215
let groupSymbolMatch = abs . match ( GROUPING_SYMBOLS_REGEX ) ;
195
216
let integerPart : string ;
196
- let parsedIntegerPart : number ;
197
217
let decimalPart : string ;
198
- if ( groupSymbolMatch && groupSymbolMatch . length > 0 && abs . length - groupSymbolMatch . length > this . options . minimumIntegerDigits ) {
199
- integerPart = abs . slice ( 0 , abs . indexOf ( groupSymbolMatch [ groupSymbolMatch . length - 1 ] ) ) ;
200
- decimalPart = abs . slice ( abs . indexOf ( groupSymbolMatch [ groupSymbolMatch . length - 1 ] ) + 1 , abs . length ) ;
218
+ if ( decimalSymbolMatch ) {
219
+ let firstDecimalSymbol = decimalSymbolMatch [ 0 ] ;
220
+ let lastDecimalSymbol = decimalSymbolMatch [ decimalSymbolMatch . length - 1 ] ;
221
+ integerPart = abs . slice ( 0 , abs . lastIndexOf ( lastDecimalSymbol ) ) ;
222
+ decimalPart = abs . slice ( abs . lastIndexOf ( lastDecimalSymbol ) + 1 , abs . length ) ;
201
223
integerPart = integerPart . replace ( GROUPING_SYMBOLS_REGEX , '' ) ;
202
- parsedIntegerPart = parseInt ( integerPart , 10 ) ;
203
- if ( ! isNaN ( parsedIntegerPart ) ) {
204
- integerPart = parsedIntegerPart . toString ( ) ;
224
+ let isArabic = NUMERALS_ARAB_REGEX . test ( abs ) ;
225
+ let isHanidec = NUMERALS_HANIDEC_REGEX . test ( abs ) ;
226
+ if ( isArabic ) {
227
+ // Replace Arabic numerals with Latin numerals,
228
+ // then parse the integer part to remove leading zeros,
229
+ // and finally replace Latin numerals with Arabic numerals.
230
+ integerPart = (
231
+ parseInt (
232
+ integerPart
233
+ . replace ( NUMERALS_ARAB_REGEX , ( d ) => NUMERALS_ARAB . indexOf ( d ) . toString ( ) ) ,
234
+ 10
235
+ ) . toString ( )
236
+ . replace ( NUMERALS_REGEX , ( d ) => NUMERALS_ARAB . split ( '' ) [ parseInt ( d , 10 ) ] )
237
+ ) ;
238
+ } else if ( isHanidec ) {
239
+ // Replace Hanidec numerals with Latin numerals,
240
+ // then parse the integer part to remove leading zeros,
241
+ // and finally replace Latin numerals with Hanidec numerals.
242
+ integerPart = (
243
+ parseInt (
244
+ integerPart
245
+ . replace ( NUMERALS_HANIDEC_REGEX , ( d ) => NUMERALS_HANIDEC . indexOf ( d ) . toString ( ) ) ,
246
+ 10
247
+ ) . toString ( )
248
+ . replace ( NUMERALS_REGEX , ( d ) => NUMERALS_HANIDEC . split ( '' ) [ parseInt ( d , 10 ) ] )
249
+ ) ;
250
+ } else {
251
+ integerPart = parseInt ( integerPart , 10 ) . toString ( ) ;
205
252
}
206
- abs = `${ integerPart ?? '' } ${ integerPart === '0' || groupSymbolMatch ?. [ groupSymbolMatch . length - 1 ] !== groupSymbolMatch ?. [ 0 ] ? this . symbols . decimal : groupSymbolMatch [ groupSymbolMatch . length - 1 ] } ${ decimalPart ?? '' } ` ;
253
+ let decimalSymbol = decimalSymbolMatch . length > 1 && lastDecimalSymbol === firstDecimalSymbol ? '' : lastDecimalSymbol ;
254
+ if ( decimalSymbol !== '' ) {
255
+ if ( this . symbols . decimal &&
256
+ lastDecimalSymbol !== this . symbols . decimal &&
257
+ ! isArabic &&
258
+ ! isHanidec &&
259
+ (
260
+ integerPart . length > 3 ||
261
+ integerPart === '0' ||
262
+ ( firstDecimalSymbol === this . symbols . decimal && lastDecimalSymbol === this . symbols . group ) ||
263
+ ( decimalSymbolMatch . length === 1 && decimalPart . length > 3 )
264
+ )
265
+ ) {
266
+ decimalSymbol = this . symbols . decimal ;
267
+ }
268
+ }
269
+
270
+ abs = `${ integerPart ?? '' } ${ decimalSymbol } ${ decimalPart ?? '' } ` ;
271
+ } else if ( groupSymbolMatch ) {
272
+ abs = parseInt ( abs . replace ( GROUPING_SYMBOLS_REGEX , '' ) , 10 ) . toString ( ) ;
207
273
}
208
274
sanitizedValue = `${ beforeAbs } ${ abs } ${ afterAbs } ` ;
209
275
}
@@ -214,7 +280,7 @@ class NumberParserImpl {
214
280
// Replace the ASCII minus sign with the minus sign used in the current locale
215
281
// so that both are allowed in case the user's keyboard doesn't have the locale's minus sign.
216
282
if ( this . symbols . minusSign ) {
217
- sanitizedValue = sanitizedValue . replace ( '-' , this . symbols . minusSign ) ;
283
+ sanitizedValue = sanitizedValue . replace ( MINUS_SIGN_REGEX , this . symbols . minusSign ) ;
218
284
}
219
285
220
286
// In arab numeral system, their decimal character is 1643, but most keyboards don't type that
0 commit comments