Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.

Commit ad6c0c2

Browse files
committed
Make the parser conform to ICU MessageFormat
Apostrophes are now handled correctly, emulating ICU's default DOUBLE_OPTIONAL behavior. Octothorpe is handled correctly, and can be escaped using apostrophes, but only inside plural (depending on strictNumberSign). A single apostrophe only starts quoted literal text if it immediately precedes a curly brace ({}), or, if inside a plural, an octothorpe (#). The parser now supports the strictNumberSign option, since that determines whether a quoted octothorpe is parsed as `'#'` or just `#`. Since choice format isn't supported, the pipe symbol never causes an apostrophe to start quoted literal text. Fixes #2
1 parent 22a6045 commit ad6c0c2

File tree

3 files changed

+73
-8
lines changed

3 files changed

+73
-8
lines changed

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,36 @@ Outputs an AST defined by [parser.pegjs].
66
The generated parser function takes two parameters, first the string to be
77
parsed, and a second optional parameter `options`, an object.
88

9-
The options object contains arrays
9+
The `options` object contains arrays
1010
of keywords for `cardinal` and `ordinal` rules for the current locale – these
1111
are used to validate plural and selectordinal keys. If `options` or its fields
1212
are missing or set to false, the full set of valid [Unicode CLDR] keys is used:
1313
`'zero', 'one', 'two', 'few', 'many', 'other'`. To disable this check, pass in
1414
an empty array.
1515

16-
The `options` object also supports a setting that makes the parser
17-
follow the ICU MessageFormat spec more closely: `strictFunctionParams`.
16+
The `options` object also supports two settings that make the parser
17+
follow the ICU MessageFormat spec more closely: `strictNumberSign` and `strictFunctionParams`.
18+
19+
Inside a `plural` or `selectordinal` statement, a pound symbol (`#`) is replaced
20+
with the input number. By default, `#` is parsed as a special character
21+
in nested statements too, and can be escaped using apostrophes (`'#'`).
22+
23+
Setting `strictNumberSign` to true will only parse `#` as a special character
24+
directly inside a `plural` or `selectordinal` statement.
25+
Outside those, `#` and `'#'` are parsed as literal text.
1826

1927
By default, function parameters are split on commas and trimmed,
2028
so the parameters in `{x,fn, a, b }` are parsed as `['a','b']`.
2129
Setting `strictFunctionParams` to true will result in a params array
2230
with a single element: `[' a, b ']`.
2331

32+
The parser only supports the `DOUBLE_OPTIONAL` apostrophe mode.
33+
A single apostrophe only starts quoted literal text if preceded
34+
by a curly brace (`{}`) or a pound symbol (`#`) inside a
35+
`plural` or `selectordinal` statement, depending on the value of `strictNumberSign`.
36+
Otherwise, it is a literal apostrophe. A double apostrophe is always
37+
a literal apostrophe.
38+
2439
[ICU MessageFormat]: https://messageformat.github.io/guide/
2540
[messageformat.js]: https://messageformat.github.io/
2641
[parser.pegjs]: ./parser.pegjs

parser.pegjs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
{
2+
var inPlural = false;
3+
}
4+
15
start = token*
26

37
token
48
= argument / select / plural / function
5-
/ '#' { return { type: 'octothorpe' }; }
9+
/ '#' & { return inPlural; } { return { type: 'octothorpe' }; }
610
/ str:char+ { return str.join(''); }
711

812
argument = '{' _ arg:id _ '}' {
@@ -12,15 +16,15 @@ argument = '{' _ arg:id _ '}' {
1216
};
1317
}
1418

15-
select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' {
19+
select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
1620
return {
1721
type: 'select',
1822
arg: arg,
1923
cases: cases
2024
};
2125
}
2226

23-
plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
27+
plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = true; return m; } ) _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
2428
var ls = ((type === 'selectordinal') ? options.ordinal : options.cardinal)
2529
|| ['zero', 'one', 'two', 'few', 'many', 'other'];
2630
if (ls && ls.length) cases.forEach(function(c) {
@@ -29,6 +33,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
2933
' Valid ' + type + ' keys for this locale are `' + ls.join('`, `') +
3034
'`, and explicit keys like `=0`.');
3135
});
36+
inPlural = false;
3237
return {
3338
type: type,
3439
arg: arg,
@@ -37,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
3742
};
3843
}
3944

40-
function = '{' _ arg:id _ ',' _ key:id _ params:functionParams '}' {
45+
function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ params:functionParams '}' {
4146
return {
4247
type: 'function',
4348
arg: arg,
@@ -80,12 +85,20 @@ quotedCurly
8085
= "'{"str:inapos*"'" { return '\u007B'+str.join(''); }
8186
/ "'}"str:inapos*"'" { return '\u007D'+str.join(''); }
8287

88+
quoted
89+
= quotedCurly
90+
/ quotedOcto:(("'#"str:inapos*"'" { return "#"+str.join(''); }) & { return inPlural; }) { return quotedOcto[0]; }
91+
/ "'"
92+
8393
quotedFunctionParams
8494
= quotedCurly
8595
/ "'"
8696

8797
char
88-
= [^{}#\\\0-\x08\x0e-\x1f\x7f]
98+
= doubleapos
99+
/ quoted
100+
/ octo:'#' & { return !inPlural; } { return octo; }
101+
/ [^{}#\\\0-\x08\x0e-\x1f\x7f]
89102
/ '\\\\' { return '\\'; }
90103
/ '\\#' { return '#'; }
91104
/ '\\{' { return '\u007B'; }

test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ describe("Replacement", function() {
8080
expect(parse('one {plural} ')[1].arg).to.eql('plural');
8181
});
8282

83+
it("should correctly handle apostrophes", function() {
84+
// This mirrors the default DOUBLE_OPTIONAL behavior of ICU.
85+
expect(parse("I see '{many}'")[0]).to.eql("I see {many}");
86+
expect(parse("I said '{''Wow!''}'")[0]).to.eql("I said {'Wow!'}");
87+
expect(parse("I don't know")[0]).to.eql("I don't know");
88+
expect(parse("I don''t know")[0]).to.eql("I don't know");
89+
expect(parse("A'a''a'A")[0]).to.eql("A'a'a'A");
90+
expect(parse("A'{a''a}'A")[0]).to.eql("A{a'a}A");
91+
92+
// # and | are not special here.
93+
expect(parse("A '#' A")[0]).to.eql("A '#' A");
94+
expect(parse("A '|' A")[0]).to.eql("A '|' A");
95+
});
8396
});
8497
describe("Simple arguments", function() {
8598

@@ -216,6 +229,30 @@ describe("Plurals", function() {
216229
).to.eql(4);
217230
});
218231

232+
it("should support quoting", function() {
233+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function');
234+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x');
235+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date');
236+
// Octothorpe is not special here regardless of strict number sign
237+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].params[0]).to.eql("y-M-dd # '#'");
238+
239+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
240+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' ");
241+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe');
242+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
243+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #");
244+
245+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one');
246+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe');
247+
248+
// without strict number sign
249+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe')
250+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one')
251+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe')
252+
// with strict number sign
253+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#')
254+
});
255+
219256
});
220257
describe("Ordinals", function() {
221258

0 commit comments

Comments
 (0)