Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.

Commit e533056

Browse files
committed
Merge pull request #3 from nkovacs/icu-compatibility
Make the parser conform to ICU MessageFormat
2 parents 524e8fb + ad6c0c2 commit e533056

File tree

3 files changed

+69
-5
lines changed

3 files changed

+69
-5
lines changed

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@ possible keys:
1919
follow the ICU MessageFormat spec more closely, and result in a params array
2020
with a single element: `[' a, b ']`.
2121

22+
- `strictNumberSign` – Inside a `plural` or `selectordinal` statement, a pound
23+
symbol (`#`) is replaced with the input number. By default, `#` is also parsed
24+
as a special character in nested statements too, and can be escaped using
25+
apostrophes (`'#'`). Setting `strictNumberSign` to true will make the parser
26+
follow the ICU MessageFormat spec more closely, and only parse `#` as a
27+
special character directly inside a `plural` or `selectordinal` statement.
28+
Outside those, `#` and `'#'` will be parsed as literal text.
29+
30+
The parser only supports the default `DOUBLE_OPTIONAL` apostrophe mode. A
31+
single apostrophe only starts quoted literal text if preceded by a curly brace
32+
(`{}`) or a pound symbol (`#`) inside a `plural` or `selectordinal` statement,
33+
depending on the value of `strictNumberSign`. Otherwise, it is a literal
34+
apostrophe. A double apostrophe is always a literal apostrophe.
35+
2236
[ICU MessageFormat]: https://messageformat.github.io/guide/
2337
[messageformat.js]: https://messageformat.github.io/
2438
[parser.pegjs]: ./parser.pegjs

parser.pegjs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
{
2+
var inPlural = false;
3+
}
4+
15
start = token*
26

37
token
48
= argument / select / plural / function
5-
/ '#' { return { type: 'octothorpe' }; }
9+
/ '#' & { return inPlural; } { return { type: 'octothorpe' }; }
610
/ str:char+ { return str.join(''); }
711

812
argument = '{' _ arg:id _ '}' {
@@ -12,15 +16,15 @@ argument = '{' _ arg:id _ '}' {
1216
};
1317
}
1418

15-
select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' {
19+
select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
1620
return {
1721
type: 'select',
1822
arg: arg,
1923
cases: cases
2024
};
2125
}
2226

23-
plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
27+
plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = true; return m; } ) _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
2428
var ls = ((type === 'selectordinal') ? options.ordinal : options.cardinal)
2529
|| ['zero', 'one', 'two', 'few', 'many', 'other'];
2630
if (ls && ls.length) cases.forEach(function(c) {
@@ -29,6 +33,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
2933
' Valid ' + type + ' keys for this locale are `' + ls.join('`, `') +
3034
'`, and explicit keys like `=0`.');
3135
});
36+
inPlural = false;
3237
return {
3338
type: type,
3439
arg: arg,
@@ -37,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
3742
};
3843
}
3944

40-
function = '{' _ arg:id _ ',' _ key:id _ params:functionParams '}' {
45+
function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ params:functionParams '}' {
4146
return {
4247
type: 'function',
4348
arg: arg,
@@ -80,12 +85,20 @@ quotedCurly
8085
= "'{"str:inapos*"'" { return '\u007B'+str.join(''); }
8186
/ "'}"str:inapos*"'" { return '\u007D'+str.join(''); }
8287

88+
quoted
89+
= quotedCurly
90+
/ quotedOcto:(("'#"str:inapos*"'" { return "#"+str.join(''); }) & { return inPlural; }) { return quotedOcto[0]; }
91+
/ "'"
92+
8393
quotedFunctionParams
8494
= quotedCurly
8595
/ "'"
8696

8797
char
88-
= [^{}#\\\0-\x08\x0e-\x1f\x7f]
98+
= doubleapos
99+
/ quoted
100+
/ octo:'#' & { return !inPlural; } { return octo; }
101+
/ [^{}#\\\0-\x08\x0e-\x1f\x7f]
89102
/ '\\\\' { return '\\'; }
90103
/ '\\#' { return '#'; }
91104
/ '\\{' { return '\u007B'; }

test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ describe("Replacement", function() {
8080
expect(parse('one {plural} ')[1].arg).to.eql('plural');
8181
});
8282

83+
it("should correctly handle apostrophes", function() {
84+
// This mirrors the default DOUBLE_OPTIONAL behavior of ICU.
85+
expect(parse("I see '{many}'")[0]).to.eql("I see {many}");
86+
expect(parse("I said '{''Wow!''}'")[0]).to.eql("I said {'Wow!'}");
87+
expect(parse("I don't know")[0]).to.eql("I don't know");
88+
expect(parse("I don''t know")[0]).to.eql("I don't know");
89+
expect(parse("A'a''a'A")[0]).to.eql("A'a'a'A");
90+
expect(parse("A'{a''a}'A")[0]).to.eql("A{a'a}A");
91+
92+
// # and | are not special here.
93+
expect(parse("A '#' A")[0]).to.eql("A '#' A");
94+
expect(parse("A '|' A")[0]).to.eql("A '|' A");
95+
});
8396
});
8497
describe("Simple arguments", function() {
8598

@@ -216,6 +229,30 @@ describe("Plurals", function() {
216229
).to.eql(4);
217230
});
218231

232+
it("should support quoting", function() {
233+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function');
234+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x');
235+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date');
236+
// Octothorpe is not special here regardless of strict number sign
237+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].params[0]).to.eql("y-M-dd # '#'");
238+
239+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
240+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' ");
241+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe');
242+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
243+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #");
244+
245+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one');
246+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe');
247+
248+
// without strict number sign
249+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe')
250+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one')
251+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe')
252+
// with strict number sign
253+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#')
254+
});
255+
219256
});
220257
describe("Ordinals", function() {
221258

0 commit comments

Comments
 (0)