diff --git a/README.md b/README.md index 2031647..37fe4ab 100644 --- a/README.md +++ b/README.md @@ -6,21 +6,36 @@ Outputs an AST defined by [parser.pegjs]. The generated parser function takes two parameters, first the string to be parsed, and a second optional parameter `options`, an object. -The options object contains arrays +The `options` object contains arrays of keywords for `cardinal` and `ordinal` rules for the current locale – these are used to validate plural and selectordinal keys. If `options` or its fields are missing or set to false, the full set of valid [Unicode CLDR] keys is used: `'zero', 'one', 'two', 'few', 'many', 'other'`. To disable this check, pass in an empty array. -The `options` object also supports a setting that makes the parser -follow the ICU MessageFormat spec more closely: `strictFunctionParams`. +The `options` object also supports two settings that make the parser +follow the ICU MessageFormat spec more closely: `strictNumberSign` and `strictFunctionParams`. + +Inside a `plural` or `selectordinal` statement, a pound symbol (`#`) is replaced +with the input number. By default, `#` is parsed as a special character +in nested statements too, and can be escaped using apostrophes (`'#'`). + +Setting `strictNumberSign` to true will only parse `#` as a special character +directly inside a `plural` or `selectordinal` statement. +Outside those, `#` and `'#'` are parsed as literal text. By default, function parameters are split on commas and trimmed, so the parameters in `{x,fn, a, b }` are parsed as `['a','b']`. Setting `strictFunctionParams` to true will result in a params array with a single element: `[' a, b ']`. +The parser only supports the `DOUBLE_OPTIONAL` apostrophe mode. +A single apostrophe only starts quoted literal text if preceded +by a curly brace (`{}`) or a pound symbol (`#`) inside a +`plural` or `selectordinal` statement, depending on the value of `strictNumberSign`. +Otherwise, it is a literal apostrophe. A double apostrophe is always +a literal apostrophe. + [ICU MessageFormat]: https://messageformat.github.io/guide/ [messageformat.js]: https://messageformat.github.io/ [parser.pegjs]: ./parser.pegjs diff --git a/parser.pegjs b/parser.pegjs index 918bccb..30ac5ad 100644 --- a/parser.pegjs +++ b/parser.pegjs @@ -1,8 +1,12 @@ +{ + var inPlural = false; +} + start = token* token = argument / select / plural / function - / '#' { return { type: 'octothorpe' }; } + / '#' & { return inPlural; } { return { type: 'octothorpe' }; } / str:char+ { return str.join(''); } argument = '{' _ arg:id _ '}' { @@ -12,7 +16,7 @@ argument = '{' _ arg:id _ '}' { }; } -select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' { +select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' { return { type: 'select', arg: arg, @@ -20,7 +24,7 @@ select = '{' _ arg:id _ ',' _ 'select' _ ',' _ cases:selectCase+ _ '}' { }; } -plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:offset? cases:pluralCase+ _ '}' { +plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = true; return m; } ) _ ',' _ offset:offset? cases:pluralCase+ _ '}' { var ls = ((type === 'selectordinal') ? options.ordinal : options.cardinal) || ['zero', 'one', 'two', 'few', 'many', 'other']; if (ls && ls.length) cases.forEach(function(c) { @@ -29,6 +33,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off ' Valid ' + type + ' keys for this locale are `' + ls.join('`, `') + '`, and explicit keys like `=0`.'); }); + inPlural = false; return { type: type, arg: arg, @@ -37,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:('plural'/'selectordinal') _ ',' _ offset:off }; } -function = '{' _ arg:id _ ',' _ key:id _ params:functionParams '}' { +function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ params:functionParams '}' { return { type: 'function', arg: arg, @@ -80,12 +85,20 @@ quotedCurly = "'{"str:inapos*"'" { return '\u007B'+str.join(''); } / "'}"str:inapos*"'" { return '\u007D'+str.join(''); } +quoted + = quotedCurly + / quotedOcto:(("'#"str:inapos*"'" { return "#"+str.join(''); }) & { return inPlural; }) { return quotedOcto[0]; } + / "'" + quotedFunctionParams = quotedCurly / "'" char - = [^{}#\\\0-\x08\x0e-\x1f\x7f] + = doubleapos + / quoted + / octo:'#' & { return !inPlural; } { return octo; } + / [^{}#\\\0-\x08\x0e-\x1f\x7f] / '\\\\' { return '\\'; } / '\\#' { return '#'; } / '\\{' { return '\u007B'; } diff --git a/test.js b/test.js index b2540f0..4478231 100644 --- a/test.js +++ b/test.js @@ -80,6 +80,19 @@ describe("Replacement", function() { expect(parse('one {plural} ')[1].arg).to.eql('plural'); }); + it("should correctly handle apostrophes", function() { + // This mirrors the default DOUBLE_OPTIONAL behavior of ICU. + expect(parse("I see '{many}'")[0]).to.eql("I see {many}"); + expect(parse("I said '{''Wow!''}'")[0]).to.eql("I said {'Wow!'}"); + expect(parse("I don't know")[0]).to.eql("I don't know"); + expect(parse("I don''t know")[0]).to.eql("I don't know"); + expect(parse("A'a''a'A")[0]).to.eql("A'a'a'A"); + expect(parse("A'{a''a}'A")[0]).to.eql("A{a'a}A"); + + // # and | are not special here. + expect(parse("A '#' A")[0]).to.eql("A '#' A"); + expect(parse("A '|' A")[0]).to.eql("A '|' A"); + }); }); describe("Simple arguments", function() { @@ -216,6 +229,30 @@ describe("Plurals", function() { ).to.eql(4); }); + it("should support quoting", function() { + expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function'); + expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x'); + expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date'); + // Octothorpe is not special here regardless of strict number sign + expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].params[0]).to.eql("y-M-dd # '#'"); + + expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe'); + expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' "); + expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe'); + expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe'); + expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #"); + + expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one'); + expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe'); + + // without strict number sign + expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe') + expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one') + expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe') + // with strict number sign + expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#') + }); + }); describe("Ordinals", function() {