Skip to content

Commit 3a44259

Browse files
author
Michael Jordan
committed
more NumberParser improvements
1 parent 884db80 commit 3a44259

File tree

2 files changed

+94
-23
lines changed

2 files changed

+94
-23
lines changed

packages/@internationalized/number/src/NumberParser.ts

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,22 @@ interface Symbols {
2424

2525
const CURRENCY_SIGN_REGEX = new RegExp('^.*\\(.*\\).*$');
2626
const NUMBERING_SYSTEMS = ['latn', 'arab', 'hanidec'];
27-
// eslint-disable-next-line no-irregular-whitespace
28-
const GROUPING_SYMBOLS_REGEX = /[,٬ . ]/gu;
29-
const NUMERALS_REGEX = /[0123456789]|[٠١٢٣٤٥٦٧٨٩]|[]/gu;
27+
const MINUS_SIGN_SYMBOLS = '\u002D\u2212';
28+
const MINUS_SIGN_REGEX = new RegExp(`[${MINUS_SIGN_SYMBOLS}]`, 'g');
29+
const AMBIGUOUS_SYMBOLS = ',.';
30+
const ARABIC_THOUSANDS_SEPARATOR = '\u066C';
31+
const ARABIC_DECIMAL_SEPARATOR = '\u066B';
32+
const LRM_RLM_REGEX = /[\u200E\u200F]/g;
33+
const GROUPING_SYMBOLS = `${AMBIGUOUS_SYMBOLS} \u00A0\u202F${ARABIC_THOUSANDS_SEPARATOR}\u2019`;
34+
const GROUPING_SYMBOLS_REGEX = new RegExp(`[${GROUPING_SYMBOLS}]`, 'g');
35+
const DECIMAL_SYMBOLS = `${AMBIGUOUS_SYMBOLS}${ARABIC_DECIMAL_SEPARATOR}`;
36+
const DECIMAL_SYMBOLS_REGEX = new RegExp(`[${DECIMAL_SYMBOLS}]`, 'g');
37+
const NUMERALS_LATN = '0123456789';
38+
const NUMERALS_ARAB = '٠١٢٣٤٥٦٧٨٩';
39+
const NUMERALS_ARAB_REGEX = new RegExp(`[${NUMERALS_ARAB}]`, 'gu');
40+
const NUMERALS_HANIDEC = '〇一二三四五六七八九';
41+
const NUMERALS_HANIDEC_REGEX = new RegExp(`[${NUMERALS_HANIDEC}]`, 'gu');
42+
const NUMERALS_REGEX = new RegExp(`[${NUMERALS_LATN}]|[${NUMERALS_ARAB}]|[${NUMERALS_HANIDEC}]`, 'gu');
3043

3144
/**
3245
* A NumberParser can be used to perform locale-aware parsing of numbers from Unicode strings,
@@ -158,8 +171,12 @@ class NumberParserImpl {
158171
}
159172
}
160173

174+
// Remove LRM and RLM characters, which are used in some locales to control text direction.
175+
fullySanitizedValue = fullySanitizedValue?.replace(LRM_RLM_REGEX, '');
176+
161177
let newValue = fullySanitizedValue ? +fullySanitizedValue : NaN;
162178
if (isNaN(newValue)) {
179+
// console.log('Failed to parse number:', {value, fullySanitizedValue, locale: this.locale, options: this.options, symbols: this.symbols});
163180
return NaN;
164181
}
165182

@@ -186,24 +203,73 @@ class NumberParserImpl {
186203
let sanitizedValue = value.trim();
187204

188205
let numeralMatches = sanitizedValue.match(NUMERALS_REGEX);
189-
if (numeralMatches) {
190-
let beforeAbs = sanitizedValue.slice(0, sanitizedValue.indexOf(numeralMatches[0]));
191-
let afterAbs = sanitizedValue.slice(sanitizedValue.lastIndexOf(numeralMatches[numeralMatches.length - 1]) + 1);
192-
let abs = sanitizedValue.slice(sanitizedValue.indexOf(numeralMatches[0]), sanitizedValue.lastIndexOf(numeralMatches[numeralMatches.length - 1]) + 1);
206+
if (numeralMatches && this.options.numberingSystem !== 'arab') {
207+
let firstNumeralMatch = numeralMatches[0];
208+
let lastNumeralMatch = numeralMatches[numeralMatches.length - 1];
209+
let beforeAbs = sanitizedValue.slice(0, sanitizedValue.indexOf(firstNumeralMatch));
210+
let afterAbs = sanitizedValue.slice(sanitizedValue.lastIndexOf(lastNumeralMatch) + 1);
211+
let abs = sanitizedValue.slice(sanitizedValue.indexOf(firstNumeralMatch), sanitizedValue.lastIndexOf(lastNumeralMatch) + 1);
212+
193213
// Replace group and decimal symbols with the current locale's symbols
214+
let decimalSymbolMatch = abs.match(DECIMAL_SYMBOLS_REGEX);
194215
let groupSymbolMatch = abs.match(GROUPING_SYMBOLS_REGEX);
195216
let integerPart: string;
196-
let parsedIntegerPart: number;
197217
let decimalPart: string;
198-
if (groupSymbolMatch && groupSymbolMatch.length > 0 && abs.length - groupSymbolMatch.length > this.options.minimumIntegerDigits) {
199-
integerPart = abs.slice(0, abs.indexOf(groupSymbolMatch[groupSymbolMatch.length - 1]));
200-
decimalPart = abs.slice(abs.indexOf(groupSymbolMatch[groupSymbolMatch.length - 1]) + 1, abs.length);
218+
if (decimalSymbolMatch) {
219+
let firstDecimalSymbol = decimalSymbolMatch[0];
220+
let lastDecimalSymbol = decimalSymbolMatch[decimalSymbolMatch.length - 1];
221+
integerPart = abs.slice(0, abs.lastIndexOf(lastDecimalSymbol));
222+
decimalPart = abs.slice(abs.lastIndexOf(lastDecimalSymbol) + 1, abs.length);
201223
integerPart = integerPart.replace(GROUPING_SYMBOLS_REGEX, '');
202-
parsedIntegerPart = parseInt(integerPart, 10);
203-
if (!isNaN(parsedIntegerPart)) {
204-
integerPart = parsedIntegerPart.toString();
224+
let isArabic = NUMERALS_ARAB_REGEX.test(abs);
225+
let isHanidec = NUMERALS_HANIDEC_REGEX.test(abs);
226+
if (isArabic) {
227+
// Replace Arabic numerals with Latin numerals,
228+
// then parse the integer part to remove leading zeros,
229+
// and finally replace Latin numerals with Arabic numerals.
230+
integerPart = (
231+
parseInt(
232+
integerPart
233+
.replace(NUMERALS_ARAB_REGEX, (d) => NUMERALS_ARAB.indexOf(d).toString()),
234+
10
235+
).toString()
236+
.replace(NUMERALS_REGEX, (d) => NUMERALS_ARAB.split('')[parseInt(d, 10)])
237+
);
238+
} else if (isHanidec) {
239+
// Replace Hanidec numerals with Latin numerals,
240+
// then parse the integer part to remove leading zeros,
241+
// and finally replace Latin numerals with Hanidec numerals.
242+
integerPart = (
243+
parseInt(
244+
integerPart
245+
.replace(NUMERALS_HANIDEC_REGEX, (d) => NUMERALS_HANIDEC.indexOf(d).toString()),
246+
10
247+
).toString()
248+
.replace(NUMERALS_REGEX, (d) => NUMERALS_HANIDEC.split('')[parseInt(d, 10)])
249+
);
250+
} else {
251+
integerPart = parseInt(integerPart, 10).toString();
205252
}
206-
abs = `${integerPart ?? ''}${integerPart === '0' || groupSymbolMatch?.[groupSymbolMatch.length - 1] !== groupSymbolMatch?.[0] ? this.symbols.decimal : groupSymbolMatch[groupSymbolMatch.length - 1]}${decimalPart ?? ''}`;
253+
let decimalSymbol = decimalSymbolMatch.length > 1 && lastDecimalSymbol === firstDecimalSymbol ? '' : lastDecimalSymbol;
254+
if (decimalSymbol !== '') {
255+
if (this.symbols.decimal &&
256+
lastDecimalSymbol !== this.symbols.decimal &&
257+
!isArabic &&
258+
!isHanidec &&
259+
(
260+
integerPart.length > 3 ||
261+
integerPart === '0' ||
262+
(firstDecimalSymbol === this.symbols.decimal && lastDecimalSymbol === this.symbols.group) ||
263+
(decimalSymbolMatch.length === 1 && decimalPart.length > 3)
264+
)
265+
) {
266+
decimalSymbol = this.symbols.decimal;
267+
}
268+
}
269+
270+
abs = `${integerPart ?? ''}${decimalSymbol}${decimalPart ?? ''}`;
271+
} else if (groupSymbolMatch) {
272+
abs = parseInt(abs.replace(GROUPING_SYMBOLS_REGEX, ''), 10).toString();
207273
}
208274
sanitizedValue = `${beforeAbs}${abs}${afterAbs}`;
209275
}
@@ -214,7 +280,7 @@ class NumberParserImpl {
214280
// Replace the ASCII minus sign with the minus sign used in the current locale
215281
// so that both are allowed in case the user's keyboard doesn't have the locale's minus sign.
216282
if (this.symbols.minusSign) {
217-
sanitizedValue = sanitizedValue.replace('-', this.symbols.minusSign);
283+
sanitizedValue = sanitizedValue.replace(MINUS_SIGN_REGEX, this.symbols.minusSign);
218284
}
219285

220286
// In arab numeral system, their decimal character is 1643, but most keyboards don't type that

packages/@internationalized/number/test/NumberParser.test.js

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ describe('NumberParser', function () {
4545

4646
it('should support negative numbers with different minus signs', function () {
4747
expect(new NumberParser('en-US', {style: 'decimal'}).parse('-10')).toBe(-10);
48-
expect(new NumberParser('en-US', {style: 'decimal'}).parse('\u221210')).toBe(NaN);
48+
expect(new NumberParser('en-US', {style: 'decimal'}).parse('\u221210')).toBe(-10);
4949

5050
expect(new NumberParser('fi-FI', {style: 'decimal'}).parse('-10')).toBe(-10);
5151
expect(new NumberParser('fi-FI', {style: 'decimal'}).parse('\u221210')).toBe(-10);
@@ -232,13 +232,18 @@ describe('NumberParser', function () {
232232
function ({adjustedNumberForFractions, locale, opts}) {
233233
const formatter = new Intl.NumberFormat(locale, opts);
234234
const parser = new NumberParser(locale, opts);
235+
const altParser = new NumberParser('en-US', opts);
235236

236237
const formattedOnce = formatter.format(adjustedNumberForFractions);
237-
const roundTrip = formatter.format(parser.parse(formattedOnce));
238-
if (roundTrip !== formattedOnce) {
239-
console.warn({formattedOnce, roundTrip, adjustedNumberForFractions, locale, opts});
240-
}
238+
const parsed = parser.parse(formattedOnce);
239+
const altParsed = altParser.parse(formattedOnce);
240+
241+
const roundTrip = formatter.format(parsed);
241242
expect(roundTrip).toBe(formattedOnce);
243+
244+
if (parsed !== altParsed || roundTrip !== formattedOnce) {
245+
console.log({formattedOnce, roundTrip, [locale]: parsed, 'en-US': altParsed, adjustedNumberForFractions, opts});
246+
}
242247
}
243248
)
244249
);
@@ -323,8 +328,8 @@ describe('NumberParser', function () {
323328
it('should support negative numbers with different minus signs', function () {
324329
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('-')).toBe(true);
325330
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('-10')).toBe(true);
326-
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('\u2212')).toBe(false);
327-
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('\u221210')).toBe(false);
331+
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('\u2212')).toBe(true);
332+
expect(new NumberParser('en-US', {style: 'decimal'}).isValidPartialNumber('\u221210')).toBe(true);
328333

329334
expect(new NumberParser('fi-FI', {style: 'decimal'}).isValidPartialNumber('-')).toBe(true);
330335
expect(new NumberParser('fi-FI', {style: 'decimal'}).isValidPartialNumber('-10')).toBe(true);

0 commit comments

Comments
 (0)