Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.

Make the parser conform to ICU MessageFormat #3

Merged
merged 1 commit into from
Jul 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,36 @@ Outputs an AST defined by [parser.pegjs].
The generated parser function takes two parameters, first the string to be
parsed, and a second optional parameter `options`, an object.

The options object contains arrays
The `options` object contains arrays
of keywords for `cardinal` and `ordinal` rules for the current locale – these
are used to validate plural and selectordinal keys. If `options` or its fields
are missing or set to false, the full set of valid [Unicode CLDR] keys is used:
`'zero', 'one', 'two', 'few', 'many', 'other'`. To disable this check, pass in
an empty array.

The `options` object also supports a setting that makes the parser
follow the ICU MessageFormat spec more closely: `strictFunctionParams`.
The `options` object also supports two settings that make the parser
follow the ICU MessageFormat spec more closely: `strictNumberSign` and `strictFunctionParams`.

Inside a `plural` or `selectordinal` statement, a pound symbol (`#`) is replaced
with the input number. By default, `#` is parsed as a special character
in nested statements too, and can be escaped using apostrophes (`'#'`).

Setting `strictNumberSign` to true will only parse `#` as a special character
directly inside a `plural` or `selectordinal` statement.
Outside those, `#` and `'#'` are parsed as literal text.

By default, function parameters are split on commas and trimmed,
so the parameters in `{x,fn, a, b }` are parsed as `['a','b']`.
Setting `strictFunctionParams` to true will result in a params array
with a single element: `[' a, b ']`.

The parser only supports the `DOUBLE_OPTIONAL` apostrophe mode.
A single apostrophe only starts quoted literal text if preceded
by a curly brace (`{}`) or a pound symbol (`#`) inside a
`plural` or `selectordinal` statement, depending on the value of `strictNumberSign`.
Otherwise, it is a literal apostrophe. A double apostrophe is always
a literal apostrophe.

[ICU MessageFormat]: https://messageformat.github.io/guide/
[messageformat.js]: https://messageformat.github.io/
[parser.pegjs]: ./parser.pegjs
Expand Down
23 changes: 18 additions & 5 deletions parser.pegjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
{
var inPlural = false;
}

start = token*

token
= argument / select / plural / function
/ '#' { return { type: 'octothorpe' }; }
/ '#' & { return inPlural; } { return { type: 'octothorpe' }; }
/ str:char+ { return str.join(''); }

argument = '{' _ arg:id _ '}' {
Expand All @@ -12,15 +16,15 @@ argument = '{' _ arg:id _ '}' {
};
}

select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' {
select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
return {
type: 'select',
arg: arg,
cases: cases
};
}

plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = true; return m; } ) _ ',' _ offset:offset? cases:pluralCase+ _ '}' {
var ls = ((type === 'selectordinal') ? options.ordinal : options.cardinal)
|| ['zero', 'one', 'two', 'few', 'many', 'other'];
if (ls && ls.length) cases.forEach(function(c) {
Expand All @@ -29,6 +33,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
' Valid ' + type + ' keys for this locale are `' + ls.join('`, `') +
'`, and explicit keys like `=0`.');
});
inPlural = false;
return {
type: type,
arg: arg,
Expand All @@ -37,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off
};
}

function = '{' _ arg:id _ ',' _ key:id _ params:functionParams '}' {
function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ params:functionParams '}' {
return {
type: 'function',
arg: arg,
Expand Down Expand Up @@ -80,12 +85,20 @@ quotedCurly
= "'{"str:inapos*"'" { return '\u007B'+str.join(''); }
/ "'}"str:inapos*"'" { return '\u007D'+str.join(''); }

quoted
= quotedCurly
/ quotedOcto:(("'#"str:inapos*"'" { return "#"+str.join(''); }) & { return inPlural; }) { return quotedOcto[0]; }
/ "'"

quotedFunctionParams
= quotedCurly
/ "'"

char
= [^{}#\\\0-\x08\x0e-\x1f\x7f]
= doubleapos
/ quoted
/ octo:'#' & { return !inPlural; } { return octo; }
/ [^{}#\\\0-\x08\x0e-\x1f\x7f]
/ '\\\\' { return '\\'; }
/ '\\#' { return '#'; }
/ '\\{' { return '\u007B'; }
Expand Down
37 changes: 37 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ describe("Replacement", function() {
expect(parse('one {plural} ')[1].arg).to.eql('plural');
});

it("should correctly handle apostrophes", function() {
// This mirrors the default DOUBLE_OPTIONAL behavior of ICU.
expect(parse("I see '{many}'")[0]).to.eql("I see {many}");
expect(parse("I said '{''Wow!''}'")[0]).to.eql("I said {'Wow!'}");
expect(parse("I don't know")[0]).to.eql("I don't know");
expect(parse("I don''t know")[0]).to.eql("I don't know");
expect(parse("A'a''a'A")[0]).to.eql("A'a'a'A");
expect(parse("A'{a''a}'A")[0]).to.eql("A{a'a}A");

// # and | are not special here.
expect(parse("A '#' A")[0]).to.eql("A '#' A");
expect(parse("A '|' A")[0]).to.eql("A '|' A");
});
});
describe("Simple arguments", function() {

Expand Down Expand Up @@ -216,6 +229,30 @@ describe("Plurals", function() {
).to.eql(4);
});

it("should support quoting", function() {
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function');
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x');
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date');
// Octothorpe is not special here regardless of strict number sign
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].params[0]).to.eql("y-M-dd # '#'");

expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' ");
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #");

expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one');
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe');

// without strict number sign
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe')
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one')
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe')
// with strict number sign
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#')
});

});
describe("Ordinals", function() {

Expand Down