Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.

Commit b7f260d

Browse files
committed
Make the parser conform to ICU MessageFormat
Apostrophes are now handled correctly, emulating ICU's default DOUBLE_OPTIONAL behavior. Octothorpe is handled correctly, and can be escaped using apostrophes, but only inside plural (depending on strictNumberSign). A single apostrophe only starts quoted literal text if it immediately precedes a curly brace ({}), or, if inside a plural, an octothorpe (#). The parser now supports the strictNumberSign option, since that determines whether a quoted octothorpe is parsed as `'#'` or just `#`. Since choice format isn't supported, the pipe symbol never causes an apostrophe to start quoted literal text. Parameters to functions may contain whitespace and quoted special characters, but argStyle is still trimmed and split into multiple parameters. A new option, strictFunctionParams, activates ICU-compatible parsing, which parses everything from the second comma to the closing curly brace as a single "argStyleText" parameter. Fixes #1, fixes #2
1 parent 4b42d43 commit b7f260d

File tree

2 files changed

+146
-7
lines changed

2 files changed

+146
-7
lines changed

parser.pegjs

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
start = token*
1+
{
2+
var inPlural = false;
3+
}
4+
5+
start = token*
26

37
token
48
= argument / select / plural / function
5-
/ '#' { return { type: 'octothorpe' }; }
9+
/ '#' & { return inPlural; } { return { type: 'octothorpe' }; }
610
/ str:char+ { return str.join(''); }
711

812
argument = '{' _ arg:id _ '}' {
@@ -12,15 +16,15 @@ argument = '{' _ arg:id _ '}' {
1216
};
1317
}
1418

15-
select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' {
19+
select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
1620
return {
1721
type: 'select',
1822
arg: arg,
1923
cases: cases
2024
};
2125
}
2226

23-
plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
27+
plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = true; return m; } ) _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
2428
var ls = ((type === 'selectordinal') ? options.ordinal : options.cardinal)
2529
|| ['zero', 'one', 'two', 'few', 'many', 'other'];
2630
if (ls && ls.length) cases.forEach(function(c) {
@@ -29,6 +33,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
2933
' Valid ' + type + ' keys for this locale are `' + ls.join('`, `') +
3034
'`, and explicit keys like `=0`.');
3135
});
36+
inPlural = false;
3237
return {
3338
type: type,
3439
arg: arg,
@@ -37,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
3742
};
3843
}
3944

40-
function = '{' _ arg:id _ ',' _ key:id _ params:functionParams* '}' {
45+
function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ params:functionParams '}' {
4146
return {
4247
type: 'function',
4348
arg: arg,
@@ -48,6 +53,10 @@ function = '{' _ arg:id _ ',' _ key:id _ params:functionParams* '}' {
4853

4954
id = $([0-9a-zA-Z$_][^ \t\n\r,.+={}]*)
5055

56+
paramDefault = str:paramcharsDefault+ { return str.join(''); }
57+
58+
paramStrict = str:paramcharsStrict+ { return str.join(''); }
59+
5160
selectCase = _ key:id _ tokens:caseTokens { return { key: key, tokens: tokens }; }
5261

5362
pluralCase = _ key:pluralKey _ tokens:caseTokens { return { key: key, tokens: tokens }; }
@@ -60,10 +69,36 @@ pluralKey
6069
= id
6170
/ '=' d:digits { return d; }
6271

63-
functionParams = _ ',' _ p:id _ { return p; }
72+
functionParams
73+
= p:functionParamsDefault* ! { return options.strictFunctionParams; } { return p; }
74+
/ p:functionParamsStrict* & { return options.strictFunctionParams; } { return p; }
75+
76+
functionParamsStrict = _ ',' p:paramStrict { return p; }
77+
78+
functionParamsDefault = _ ',' _ p:paramDefault _ { return p.replace(/^[ \t\n\r]*|[ \t\n\r]*$/g, ''); }
79+
80+
doubleapos = "''" { return "'"; }
81+
82+
inapos = doubleapos / str:[^']+ { return str.join(''); }
83+
84+
quotedCurly
85+
= "'{"str:inapos*"'" { return '\u007B'+str.join(''); }
86+
/ "'}"str:inapos*"'" { return '\u007D'+str.join(''); }
87+
88+
quoted
89+
= quotedCurly
90+
/ quotedOcto:(("'#"str:inapos*"'" { return "#"+str.join(''); }) & { return inPlural; }) { return quotedOcto[0]; }
91+
/ "'"
92+
93+
quotedFunctionParams
94+
= quotedCurly
95+
/ "'"
6496

6597
char
66-
= [^{}#\\\0-\x08\x0e-\x1f\x7f]
98+
= doubleapos
99+
/ quoted
100+
/ octo:'#' & { return !inPlural; } { return octo; }
101+
/ [^{}#\\\0-\x08\x0e-\x1f\x7f]
67102
/ '\\\\' { return '\\'; }
68103
/ '\\#' { return '#'; }
69104
/ '\\{' { return '\u007B'; }
@@ -72,6 +107,18 @@ char
72107
return String.fromCharCode(parseInt('0x' + h1 + h2 + h3 + h4));
73108
}
74109

110+
paramcharsCommon
111+
= doubleapos
112+
/ quotedFunctionParams
113+
114+
paramcharsDefault
115+
= paramcharsCommon
116+
/ str:[^',}]+ { return str.join(''); }
117+
118+
paramcharsStrict
119+
= paramcharsCommon
120+
/ str:[^'}]+ { return str.join(''); }
121+
75122
digits = $([0-9]+)
76123

77124
hexDigit = [0-9a-fA-F]

test.js

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ describe("Replacement", function() {
8080
expect(parse('one {plural} ')[1].arg).to.eql('plural');
8181
});
8282

83+
it("should correctly handle apostrophes", function() {
84+
// This mirrors the default DOUBLE_OPTIONAL behavior of ICU.
85+
expect(parse("I see '{many}'")[0]).to.eql("I see {many}");
86+
expect(parse("I said '{''Wow!''}'")[0]).to.eql("I said {'Wow!'}");
87+
expect(parse("I don't know")[0]).to.eql("I don't know");
88+
expect(parse("I don''t know")[0]).to.eql("I don't know");
89+
expect(parse("A'a''a'A")[0]).to.eql("A'a'a'A");
90+
expect(parse("A'{a''a}'A")[0]).to.eql("A{a'a}A");
91+
92+
// # and | are not special here.
93+
expect(parse("A '#' A")[0]).to.eql("A '#' A");
94+
expect(parse("A '|' A")[0]).to.eql("A '|' A");
95+
});
8396
});
8497
describe("Simple arguments", function() {
8598

@@ -216,6 +229,30 @@ describe("Plurals", function() {
216229
).to.eql(4);
217230
});
218231

232+
it("should support quoting", function() {
233+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function');
234+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x');
235+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date');
236+
// Octothorpe is not special here regardless of strict number sign
237+
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].params[0]).to.eql("y-M-dd # '#'");
238+
239+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
240+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' ");
241+
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe');
242+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
243+
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #");
244+
245+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one');
246+
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe');
247+
248+
// without strict number sign
249+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe')
250+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one')
251+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe')
252+
// with strict number sign
253+
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#')
254+
});
255+
219256
});
220257
describe("Ordinals", function() {
221258

@@ -239,6 +276,61 @@ describe("Ordinals", function() {
239276
});
240277

241278
});
279+
describe("Functions", function() {
280+
it("should accept no parameters", function() {
281+
expect(parse('{var,date}')[0].type).to.eql('function');
282+
expect(parse('{var,date}')[0].key).to.eql('date');
283+
expect(parse('{var,date}')[0].params).to.be.empty;
284+
})
285+
286+
it("should accept parameters", function() {
287+
expect(parse('{var,date,long}')[0].type).to.eql('function');
288+
expect(parse('{var,date,long}')[0].key).to.eql('date');
289+
expect(parse('{var,date,long}')[0].params[0]).to.eql('long');
290+
expect(parse('{var,date,long,short}')[0].params[0]).to.eql('long');
291+
expect(parse('{var,date,long,short}')[0].params[1]).to.eql('short');
292+
})
293+
294+
it("should accept parameters with whitespace", function() {
295+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].type).to.eql('function');
296+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].key).to.eql('date');
297+
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].params[0]).to.eql('y-M-d HH:mm:ss zzzz');
298+
// This is not how ICU works. ICU does not trim whitespace,
299+
// but messageformat-parse must trim it to maintain backwards compatibility.
300+
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }')[0].params[0]).to.eql('y-M-d HH:mm:ss zzzz');
301+
// This is how ICU works.
302+
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }', { strictFunctionParams: true })[0].params[0]).to.eql(' y-M-d HH:mm:ss zzzz ');
303+
})
304+
305+
it("should accept parameters with special characters", function() {
306+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].type).to.eql('function');
307+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].key).to.eql('date');
308+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d {,} ' HH:mm:ss zzzz");
309+
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz'}'}")[0].params[0]).to.eql("y-M-d {,} ' HH:mm:ss zzzz}");
310+
expect(parse("{var,date,y-M-d # HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d # HH:mm:ss zzzz");
311+
expect(parse("{var,date,y-M-d '#' HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d '#' HH:mm:ss zzzz");
312+
// This is not how ICU works.
313+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].params[0]).to.eql("y-M-d");
314+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].params[1]).to.eql("HH:mm:ss zzzz");
315+
// This is how ICU works, but this only allows a single argStyle parameter.
316+
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}", { strictFunctionParams: true })[0].params[0]).to.eql("y-M-d, HH:mm:ss zzzz");
317+
})
318+
319+
it("should be gracious with whitespace", function() {
320+
var firstRes = JSON.stringify(parse('{var, date, long, short}'));
321+
expect(JSON.stringify(parse('{ var, date, long, short }'))).to.eql(firstRes);
322+
expect(JSON.stringify(parse('{var,date,long,short}'))).to.eql(firstRes);
323+
expect(JSON.stringify(parse('{\nvar, \ndate,\n long\n\n,\n short\n\n\n}'))).to.eql(firstRes);
324+
expect(JSON.stringify(parse('{\tvar\t,\t\t\r date\t\n, \tlong\n, short\t\n\n\n\n}'))).to.eql(firstRes);
325+
326+
// This is not how ICU works. ICU does not trim whitespace.
327+
firstRes = JSON.stringify(parse('{var, date, y-M-d HH:mm:ss zzzz}'));
328+
expect(JSON.stringify(parse('{ var, date, y-M-d HH:mm:ss zzzz }'))).to.eql(firstRes);
329+
expect(JSON.stringify(parse('{var,date,y-M-d HH:mm:ss zzzz}'))).to.eql(firstRes);
330+
expect(JSON.stringify(parse('{\nvar, \ndate,\n \n\n\n y-M-d HH:mm:ss zzzz\n\n\n}'))).to.eql(firstRes);
331+
expect(JSON.stringify(parse('{\tvar\t,\t\t\r date\t\n, \t\ny-M-d HH:mm:ss zzzz\t\n\n\n\n}'))).to.eql(firstRes);
332+
});
333+
});
242334
describe("Nested/Recursive blocks", function() {
243335

244336
it("should allow a select statement inside of a select statement", function() {

0 commit comments

Comments
 (0)