From fcd0229e1fcc5e7e7e46b76044e03a2654d307c4 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Thu, 17 Jul 2014 16:29:03 -0600 Subject: [PATCH 01/22] Add ABNF description of TOML. --- toml.abnf | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 toml.abnf diff --git a/toml.abnf b/toml.abnf new file mode 100644 index 00000000..82f2fbca --- /dev/null +++ b/toml.abnf @@ -0,0 +1,175 @@ +;; This is an attempt to define TOML in ABNF according to the grammar defined +;; in RFC 4234 (http://www.ietf.org/rfc/rfc4234.txt). + +;; TOML + +toml = expression *( newline expression ) +expression = ( + ws / + ws comment / + ws keyval ws [ comment ] / + ws table ws [ comment ] +) + +;; Newline + +newline = 1*( + %x0A / ; Line feed or New line + %x0D ; Carriage return +) + +;; Whitespace + +ws = *( + %x20 / ; Space + %x09 ; Horizontal tab +) + +;; Comment + +comment-start-symbol = %x23 ; # +non-eol = %x09 / %x20-10FFFF +comment = comment-start-symbol *non-eol + +;; Key-Value pairs + +keyval-sep = ws %x3D ws ; = +keyval = key keyval-sep val + +key = unquoted-key / quoted-key +unquoted-key = 1*ALPHA +quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings + +val = integer / float / string / boolean / datetime / array / inline-table + +;; Table + +table = std-table / array-table + +;; Standard Table + +std-table-open = %x5B ws ; [ Left square bracket +std-table-close = ws %x5D ; ] Right square bracket +table-key-sep = ws %x2E ws ; . Period + +std-table = std-table-open key *( table-key-sep key) std-table-close + +;; Array Table + +array-table-open = %x5B.5B ws ; [[ Double left square bracket +array-table-close = %x5D.5D ws ; ]] Double right quare bracket + +array-table = array-table-open key *( table-key-sep key) array-table-close + +;; Integer + +integer = [ minus ] int +minus = %x2D ; - +int = zero / ( digit1-9 *DIGIT ) +zero = %x30 ; 0 +digit1-9 = %x31-39 ; 1-9 + +;; Float + +float = [ minus ] int [ frac ] [ exp ] +frac = decimal-point 1*DIGIT +decimal-point = %x2E ; . +exp = e [ minus / plus ] 1*DIGIT +e = %x65 / %x45 ; e E +plus = %x2B ; + + +;; String + +string = basic-string / ml-basic-string / literal-string / ml-literal-string + +;; Basic String + +basic-string = quotation-mark *basic-char quotation-mark + +quotation-mark = %x22 ; " + +basic-char = basic-unescaped / escaped +escaped = escape ( %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + +basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + +escape = %x5C ; \ + +;; Multiline Basic String + +ml-basic-string-delim = quotation-mark quotation-mark quotation-mark +ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim +ml-basic-body = *( ml-basic-char / ( escape newline )) + +ml-basic-char = ml-basic-unescaped / escaped +ml-basic-unescaped = %x20-5B / %x5D-10FFFF + +;; Literal String + +literal-string = apostraphe *literal-char apostraphe + +apostraphe = %x27 ; ' Apostraphe + +literal-char = %x09 / %x20-26 / %x28-10FFFF + +;; Multiline Literal String + +ml-literal-string-delim = apostraphe apostraphe apostraphe +ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim + +ml-literal-body = *( ml-literal-char / newline ) +ml-literal-char = %x09 / %x20-10FFFF + +;; Boolean + +boolean = true / false +true = %x74.72.75.65 ; true +false = %x66.61.6c.73.65 ; false + +;; Datetime + +datetime = ymd tee hms zee +ymd = 4DIGIT dash 2DIGIT dash 2DIGIT +hms = 2DIGIT colon 2DIGIT colon 2DIGIT +dash = %x2D ; - +colon = %x3A ; : +tee = %x54 ; T +zee = %x5A ; Z + +;; Array + +array-open = %x5B ws ; [ +array-close = ws %x5D ; ] + +array = array-open array-values array-close + +array-values = [ val [ array-sep ] [ ( comment newline) / newline ] / + val array-sep [ ( comment newline) / newline ] array-values ] + +array-sep = ws %x2C ws ; , Comma + +;; Inline Table + +inline-table-open = %x7B ws ; { +inline-table-close = ws %x7D ; } +inline-table-sep = ws %x2C ws ; , Comma + +inline-table = inline-table-open inline-table-keyvals inline-table-close + +inline-table-keyvals = [ inline-table-keyvals-non-empty ] +inline-table-keyvals-non-empty = key keyval-sep val / + key keyval-sep val inline-table-sep inline-table-keyvals-non-empty + +;; Built-in ABNF terms, reproduced here for clarity + +; ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +; DIGIT = %x30-39 ; 0-9 +; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" From cb50c722fa0821306a3ab09cbec49f71ceffced8 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Thu, 17 Jul 2014 19:49:10 -0600 Subject: [PATCH 02/22] Properly specify unquoted-key. --- toml.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml.abnf b/toml.abnf index 82f2fbca..9e589608 100644 --- a/toml.abnf +++ b/toml.abnf @@ -37,7 +37,7 @@ keyval-sep = ws %x3D ws ; = keyval = key keyval-sep val key = unquoted-key / quoted-key -unquoted-key = 1*ALPHA +unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings val = integer / float / string / boolean / datetime / array / inline-table From 7b439f6625eb6417954e4019aeed0cdd0a18ffcf Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Thu, 17 Jul 2014 22:17:23 -0600 Subject: [PATCH 03/22] Fix whitespace in array-table-close. --- toml.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml.abnf b/toml.abnf index 9e589608..bb556d3e 100644 --- a/toml.abnf +++ b/toml.abnf @@ -57,7 +57,7 @@ std-table = std-table-open key *( table-key-sep key) std-table-close ;; Array Table array-table-open = %x5B.5B ws ; [[ Double left square bracket -array-table-close = %x5D.5D ws ; ]] Double right quare bracket +array-table-close = ws %x5D.5D ; ]] Double right quare bracket array-table = array-table-open key *( table-key-sep key) array-table-close From bd4fe79606da21a34824b4769185a90a723a6fa9 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Mon, 10 Nov 2014 16:59:09 +0100 Subject: [PATCH 04/22] Update ABNF with v0.3.0 compliant integer/float rules. --- toml.abnf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/toml.abnf b/toml.abnf index bb556d3e..dd7d83b6 100644 --- a/toml.abnf +++ b/toml.abnf @@ -63,20 +63,20 @@ array-table = array-table-open key *( table-key-sep key) array-table-close ;; Integer -integer = [ minus ] int +integer = [ minus / plus ] int minus = %x2D ; - +plus = %x2B ; + int = zero / ( digit1-9 *DIGIT ) zero = %x30 ; 0 digit1-9 = %x31-39 ; 1-9 ;; Float -float = [ minus ] int [ frac ] [ exp ] +float = integer ( frac / frac exp / exp ) frac = decimal-point 1*DIGIT decimal-point = %x2E ; . -exp = e [ minus / plus ] 1*DIGIT +exp = e integer e = %x65 / %x45 ; e E -plus = %x2B ; + ;; String From 4d15ea5adef72da1596982d2ea3913954cb3ea0e Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Mon, 10 Nov 2014 17:09:13 +0100 Subject: [PATCH 05/22] Update ABNF with RFC 3339 datetime spec to be v0.3.0 compliant. --- toml.abnf | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/toml.abnf b/toml.abnf index dd7d83b6..f6fd6909 100644 --- a/toml.abnf +++ b/toml.abnf @@ -40,7 +40,7 @@ key = unquoted-key / quoted-key unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings -val = integer / float / string / boolean / datetime / array / inline-table +val = integer / float / string / boolean / date-time / array / inline-table ;; Table @@ -134,15 +134,23 @@ boolean = true / false true = %x74.72.75.65 ; true false = %x66.61.6c.73.65 ; false -;; Datetime +;; Datetime (as defined in RFC 3339) -datetime = ymd tee hms zee -ymd = 4DIGIT dash 2DIGIT dash 2DIGIT -hms = 2DIGIT colon 2DIGIT colon 2DIGIT -dash = %x2D ; - -colon = %x3A ; : -tee = %x54 ; T -zee = %x5A ; Z +date-fullyear = 4DIGIT +date-month = 2DIGIT ; 01-12 +date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +time-hour = 2DIGIT ; 00-23 +time-minute = 2DIGIT ; 00-59 +time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules +time-secfrac = "." 1*DIGIT +time-numoffset = ( "+" / "-" ) time-hour ":" time-minute +time-offset = "Z" / time-numoffset + +partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] +full-date = date-fullyear "-" date-month "-" date-mday +full-time = partial-time time-offset + +date-time = full-date "T" full-time ;; Array From 52dc64adc8e0b161b48384c8ed040475c2a91f5b Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Wed, 17 Dec 2014 13:28:41 -0800 Subject: [PATCH 06/22] Update for newline clarifications. --- toml.abnf | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/toml.abnf b/toml.abnf index f6fd6909..0b3647e7 100644 --- a/toml.abnf +++ b/toml.abnf @@ -13,11 +13,13 @@ expression = ( ;; Newline -newline = 1*( - %x0A / ; Line feed or New line - %x0D ; Carriage return +newline = ( + %x0A / ; LF + %x0D.0A ; CRLF ) +newlines = 1*( newline ) + ;; Whitespace ws = *( @@ -107,7 +109,7 @@ escape = %x5C ; \ ml-basic-string-delim = quotation-mark quotation-mark quotation-mark ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim -ml-basic-body = *( ml-basic-char / ( escape newline )) +ml-basic-body = *( ml-basic-char / newline / ( escape newline )) ml-basic-char = ml-basic-unescaped / escaped ml-basic-unescaped = %x20-5B / %x5D-10FFFF @@ -159,8 +161,8 @@ array-close = ws %x5D ; ] array = array-open array-values array-close -array-values = [ val [ array-sep ] [ ( comment newline) / newline ] / - val array-sep [ ( comment newline) / newline ] array-values ] +array-values = [ val [ array-sep ] [ ( comment newlines) / newlines ] / + val array-sep [ ( comment newlines) / newlines ] array-values ] array-sep = ws %x2C ws ; , Comma From 38089026e5e565b133b39ab9c19bfea8cb965047 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Wed, 7 Jan 2015 15:50:54 -0800 Subject: [PATCH 07/22] Add \UXXXXXXXX escape sequence. --- toml.abnf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toml.abnf b/toml.abnf index 0b3647e7..71f6ccf3 100644 --- a/toml.abnf +++ b/toml.abnf @@ -99,7 +99,8 @@ escaped = escape ( %x22 / ; " quotation mark U+0022 %x6E / ; n line feed U+000A %x72 / ; r carriage return U+000D %x74 / ; t tab U+0009 - %x75 4HEXDIG ) ; uXXXX U+XXXX + %x75 4HEXDIG / ; uXXXX U+XXXX + %x55 8HEXDIG ) ; UXXXXXXXX U+XXXXXXXX basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF From 0cee090045b22530c22d753488cfc0d6847ad93c Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Fri, 6 Feb 2015 17:32:00 -0800 Subject: [PATCH 08/22] Allow underscores in int/float ABNF. --- toml.abnf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/toml.abnf b/toml.abnf index 71f6ccf3..b54fe937 100644 --- a/toml.abnf +++ b/toml.abnf @@ -68,14 +68,15 @@ array-table = array-table-open key *( table-key-sep key) array-table-close integer = [ minus / plus ] int minus = %x2D ; - plus = %x2B ; + -int = zero / ( digit1-9 *DIGIT ) -zero = %x30 ; 0 digit1-9 = %x31-39 ; 1-9 +underscore = %x5F ; _ +int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) ;; Float float = integer ( frac / frac exp / exp ) -frac = decimal-point 1*DIGIT +zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) +frac = decimal-point zero-prefixable-int decimal-point = %x2E ; . exp = e integer e = %x65 / %x45 ; e E From 532a4668624f74303029d8cebdb463c68213c48b Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Tue, 10 Feb 2015 13:35:29 -0800 Subject: [PATCH 09/22] Small style fixes. --- toml.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index b54fe937..772f1ae7 100644 --- a/toml.abnf +++ b/toml.abnf @@ -18,7 +18,7 @@ newline = ( %x0D.0A ; CRLF ) -newlines = 1*( newline ) +newlines = 1*newline ;; Whitespace @@ -136,7 +136,7 @@ ml-literal-char = %x09 / %x20-10FFFF boolean = true / false true = %x74.72.75.65 ; true -false = %x66.61.6c.73.65 ; false +false = %x66.61.6C.73.65 ; false ;; Datetime (as defined in RFC 3339) From bb0005754c949048fe9e2afb88a4c61e09666cc9 Mon Sep 17 00:00:00 2001 From: Joel Date: Thu, 31 Dec 2015 13:04:46 -0700 Subject: [PATCH 10/22] Added grouping to ambiguous alternatives RFC 4234 Section 3.5 advises the use of grouping notation rather than "bare" alternation when alternatives consist of multiple rules or literals, e.g. "( int float ) / ( bool char )" instead of "int float / bool char". I might've gotten the grouping wrong, which just serves to illustrate the importance of using grouping notation. --- toml.abnf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/toml.abnf b/toml.abnf index 772f1ae7..18d9ec4b 100644 --- a/toml.abnf +++ b/toml.abnf @@ -6,9 +6,9 @@ toml = expression *( newline expression ) expression = ( ws / - ws comment / - ws keyval ws [ comment ] / - ws table ws [ comment ] + ( ws comment ) / + ( ws keyval ws [ comment ] ) / + ( ws table ws [ comment ] ) ) ;; Newline @@ -163,8 +163,8 @@ array-close = ws %x5D ; ] array = array-open array-values array-close -array-values = [ val [ array-sep ] [ ( comment newlines) / newlines ] / - val array-sep [ ( comment newlines) / newlines ] array-values ] +array-values = ( [ val [ array-sep ] [ ( comment newlines) / newlines ] ) / + ( val array-sep [ ( comment newlines) / newlines ] array-values ] ) array-sep = ws %x2C ws ; , Comma @@ -177,8 +177,8 @@ inline-table-sep = ws %x2C ws ; , Comma inline-table = inline-table-open inline-table-keyvals inline-table-close inline-table-keyvals = [ inline-table-keyvals-non-empty ] -inline-table-keyvals-non-empty = key keyval-sep val / - key keyval-sep val inline-table-sep inline-table-keyvals-non-empty +inline-table-keyvals-non-empty = ( key keyval-sep val ) / + ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) ;; Built-in ABNF terms, reproduced here for clarity From 863f5ccef627c647adf00c5a65e5c96632dd1e14 Mon Sep 17 00:00:00 2001 From: Joel Date: Fri, 1 Jan 2016 19:41:53 -0700 Subject: [PATCH 11/22] Allow leading newlines inside arrays The README.md has this at the end of the first TOML example: ```toml hosts = [ "alpha", "omega" ] ``` And later says: >...arrays also ignore newlines.... But the ABNF doesn't allow for a newline to precede the first element. I wasn't sure if the grammar should allow more than one newline preceding the first element, but given that an arbitrary number of whitespaces are allowed and the readme says that newlines are ignored in general I figured allowing an arbitrary number of newlines best matches your intent. It also won't break anyone that happens to have multiple leading newlines in their arrays. --- toml.abnf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index 18d9ec4b..f0c2af97 100644 --- a/toml.abnf +++ b/toml.abnf @@ -158,8 +158,10 @@ date-time = full-date "T" full-time ;; Array -array-open = %x5B ws ; [ -array-close = ws %x5D ; ] +ws-newline = *(ws / newline) + +array-open = %x5B ws-newline ; [ +array-close = ws-newline %x5D ; ] array = array-open array-values array-close From d6c49b4c0d4ef7d27fd1915ba4b043b479f892c7 Mon Sep 17 00:00:00 2001 From: Joel Date: Fri, 1 Jan 2016 19:51:48 -0700 Subject: [PATCH 12/22] Don't need extra newlines before closing brace The array-values production already allows for an arbitrary amount of newlines before the closing brace. It doesn't, however, allow any whitespace after the newlines that don't come just before the closing brace so: ```toml [5, 6 \t\t] ``` is valid, but ```toml [5, 6 \t\t ] ``` is invalid. Probably not a big deal. --- toml.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml.abnf b/toml.abnf index f0c2af97..59b907bd 100644 --- a/toml.abnf +++ b/toml.abnf @@ -161,7 +161,7 @@ date-time = full-date "T" full-time ws-newline = *(ws / newline) array-open = %x5B ws-newline ; [ -array-close = ws-newline %x5D ; ] +array-close = ws %x5D ; ] array = array-open array-values array-close From 216f6421eaa0cf60abeb3196f169daa4cb40d73f Mon Sep 17 00:00:00 2001 From: Joel Date: Fri, 1 Jan 2016 20:30:47 -0700 Subject: [PATCH 13/22] Allow ws after newline and before element Back to the README example: ```toml hosts = [ "alpha", "omega" ] ``` The ABNF allows spaces after an element, but before any newlines, the examples in the README have spaces before elements after a newline which isn't allowed in the grammar. I've added a new production "ws-newlines" which is just like "ws-newline" except it requires at least one newline like the production it replaces("newlines"). With "ws-newlines" it makes it legal to to put whitespace before an element after a newline. --- toml.abnf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/toml.abnf b/toml.abnf index 59b907bd..1c38f5a1 100644 --- a/toml.abnf +++ b/toml.abnf @@ -158,15 +158,16 @@ date-time = full-date "T" full-time ;; Array -ws-newline = *(ws / newline) +ws-newline = *( ws / newline ) +ws-newlines = newline *( ws / newline ) array-open = %x5B ws-newline ; [ array-close = ws %x5D ; ] array = array-open array-values array-close -array-values = ( [ val [ array-sep ] [ ( comment newlines) / newlines ] ) / - ( val array-sep [ ( comment newlines) / newlines ] array-values ] ) +array-values = ( [ val [ array-sep ] [ ( comment ws-newlines) / ws-newlines ] ) / + ( val array-sep [ ( comment ws-newlines) / ws-newlines ] array-values ] ) array-sep = ws %x2C ws ; , Comma From 64c0a6a0190d8edf4779766dac812d8f9afc12dd Mon Sep 17 00:00:00 2001 From: Joel Date: Sat, 2 Jan 2016 01:42:06 -0700 Subject: [PATCH 14/22] Allow newline and whitespace after , before comment Going over your hard example I found I couldn't parse this: ```toml multi_line_array = [ "]", # ] Oh yes I did ] ``` It turns out that the grammar doesn't allow newlines and whitespaces after the array-sep. I've modified the production again to accommodate newlines and whitespace after the array separator. --- toml.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index 1c38f5a1..3bfd4142 100644 --- a/toml.abnf +++ b/toml.abnf @@ -166,8 +166,8 @@ array-close = ws %x5D ; ] array = array-open array-values array-close -array-values = ( [ val [ array-sep ] [ ( comment ws-newlines) / ws-newlines ] ) / - ( val array-sep [ ( comment ws-newlines) / ws-newlines ] array-values ] ) +array-values = ( [ val [ array-sep ] [ ( ws-newline comment ws-newlines) / ws-newlines ] ) / + ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ] ) array-sep = ws %x2C ws ; , Comma From 75f6ba3a8fbfc0623d18b67cc8b53abf7c6871e3 Mon Sep 17 00:00:00 2001 From: Joel Date: Sat, 30 Jan 2016 14:40:02 -0700 Subject: [PATCH 15/22] Adjusted the abnf to allow for whitespaces after the last element without a leading newline. --- toml.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index 3bfd4142..5781d87d 100644 --- a/toml.abnf +++ b/toml.abnf @@ -162,11 +162,11 @@ ws-newline = *( ws / newline ) ws-newlines = newline *( ws / newline ) array-open = %x5B ws-newline ; [ -array-close = ws %x5D ; ] +array-close = ws-newline %x5D ; ] array = array-open array-values array-close -array-values = ( [ val [ array-sep ] [ ( ws-newline comment ws-newlines) / ws-newlines ] ) / +array-values = ( [ val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) / ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ] ) array-sep = ws %x2C ws ; , Comma From 92f20e5a3f16eda0e57c7d9f087288b318259d50 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Tue, 3 Jan 2017 16:23:53 -0800 Subject: [PATCH 16/22] Add ABNF work-in-progress warning. --- toml.abnf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/toml.abnf b/toml.abnf index 5781d87d..fcd47cd4 100644 --- a/toml.abnf +++ b/toml.abnf @@ -1,6 +1,9 @@ ;; This is an attempt to define TOML in ABNF according to the grammar defined ;; in RFC 4234 (http://www.ietf.org/rfc/rfc4234.txt). +;; WARNING: This document is a work-in-progress and should not be considered +;; authoritative until further notice. + ;; TOML toml = expression *( newline expression ) From eb703d21683ed5eef4ae2f66db7cd8e5ca8280ab Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Tue, 3 Jan 2017 17:05:00 -0800 Subject: [PATCH 17/22] Fix array grouping. --- toml.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index fcd47cd4..4752ce86 100644 --- a/toml.abnf +++ b/toml.abnf @@ -169,8 +169,8 @@ array-close = ws-newline %x5D ; ] array = array-open array-values array-close -array-values = ( [ val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) / - ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ] ) +array-values = [ ( val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) / + ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ) ] array-sep = ws %x2C ws ; , Comma From a2c74ceeafd4ea00ee792678ee1c971ea397187b Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Tue, 3 Jan 2017 17:05:46 -0800 Subject: [PATCH 18/22] Add ABNF for Date and Time. --- toml.abnf | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index 4752ce86..c3173328 100644 --- a/toml.abnf +++ b/toml.abnf @@ -141,7 +141,7 @@ boolean = true / false true = %x74.72.75.65 ; true false = %x66.61.6C.73.65 ; false -;; Datetime (as defined in RFC 3339) +;; Date and Time (as defined in RFC 3339) date-fullyear = 4DIGIT date-month = 2DIGIT ; 01-12 @@ -157,7 +157,23 @@ partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] full-date = date-fullyear "-" date-month "-" date-mday full-time = partial-time time-offset -date-time = full-date "T" full-time +date-time = offset-date-time / local-date-time / local-date / local-time + +;; Offset Date-Time + +offset-date-time = full-date "T" full-time + +;; Local Date-Time + +local-date-time = full-date "T" partial-time + +;; Local Date + +local-date = full-date + +;; Local Time + +local-time = partial-time ;; Array From 7c0db2cfae4439e2effdfb9effb087ce37d43d3a Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Tue, 3 Jan 2017 17:29:15 -0800 Subject: [PATCH 19/22] RFC 5234 is latest for ABNF. --- toml.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toml.abnf b/toml.abnf index c3173328..2ebdd6f5 100644 --- a/toml.abnf +++ b/toml.abnf @@ -1,5 +1,5 @@ ;; This is an attempt to define TOML in ABNF according to the grammar defined -;; in RFC 4234 (http://www.ietf.org/rfc/rfc4234.txt). +;; in RFC 5234 (http://www.ietf.org/rfc/rfc5234.txt). ;; WARNING: This document is a work-in-progress and should not be considered ;; authoritative until further notice. From f9d442956440a5a05a953ef5a070df083ba9aa47 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Wed, 4 Jan 2017 14:42:50 -0800 Subject: [PATCH 20/22] Make ABNF compatible with a real ABNF parser. --- toml.abnf | 68 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/toml.abnf b/toml.abnf index 2ebdd6f5..7f22d459 100644 --- a/toml.abnf +++ b/toml.abnf @@ -1,34 +1,38 @@ +;; WARNING: This document is a work-in-progress and should not be considered +;; authoritative until further notice. + ;; This is an attempt to define TOML in ABNF according to the grammar defined ;; in RFC 5234 (http://www.ietf.org/rfc/rfc5234.txt). -;; WARNING: This document is a work-in-progress and should not be considered -;; authoritative until further notice. +;; You can try out this grammar interactively via the online ABNF tool at +;; http://www.coasttocoastresearch.com/interactiveapg +;; Note that due to the limitations of ABNF parsers, in order for multi-line +;; strings to work in that tool, the following rules must be ammended to +;; disallow the use of unescaped double- or single-quotes: +;; ml-basic-unescaped = basic-unescaped +;; ml-literal-char = literal-char ;; TOML toml = expression *( newline expression ) -expression = ( - ws / - ( ws comment ) / - ( ws keyval ws [ comment ] ) / - ( ws table ws [ comment ] ) -) +expression = ( ( ws comment ) / + ( ws keyval ws [ comment ] ) / + ( ws table ws [ comment ] ) / + ws ) ;; Newline -newline = ( - %x0A / ; LF - %x0D.0A ; CRLF -) +newline = ( %x0A / ; LF + %x0D.0A ) ; CRLF newlines = 1*newline ;; Whitespace -ws = *( - %x20 / ; Space - %x09 ; Horizontal tab -) +wschar = ( %x20 / ; Space + %x09 ) ; Horizontal tab + +ws = *wschar ;; Comment @@ -45,7 +49,7 @@ key = unquoted-key / quoted-key unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings -val = integer / float / string / boolean / date-time / array / inline-table +val = string / boolean / array / inline-table / date-time / float / integer ;; Table @@ -77,7 +81,7 @@ int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) ;; Float -float = integer ( frac / frac exp / exp ) +float = integer ( frac / ( frac exp ) / exp ) zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) frac = decimal-point zero-prefixable-int decimal-point = %x2E ; . @@ -86,7 +90,7 @@ e = %x65 / %x45 ; e E ;; String -string = basic-string / ml-basic-string / literal-string / ml-literal-string +string = ml-basic-string / basic-string / ml-literal-string / literal-string ;; Basic String @@ -112,7 +116,7 @@ escape = %x5C ; \ ;; Multiline Basic String -ml-basic-string-delim = quotation-mark quotation-mark quotation-mark +ml-basic-string-delim = 3quotation-mark ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim ml-basic-body = *( ml-basic-char / newline / ( escape newline )) @@ -121,15 +125,15 @@ ml-basic-unescaped = %x20-5B / %x5D-10FFFF ;; Literal String -literal-string = apostraphe *literal-char apostraphe +literal-string = apostrophe *literal-char apostrophe -apostraphe = %x27 ; ' Apostraphe +apostrophe = %x27 ; ' apostrophe literal-char = %x09 / %x20-26 / %x28-10FFFF ;; Multiline Literal String -ml-literal-string-delim = apostraphe apostraphe apostraphe +ml-literal-string-delim = 3apostrophe ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim ml-literal-body = *( ml-literal-char / newline ) @@ -177,16 +181,16 @@ local-time = partial-time ;; Array -ws-newline = *( ws / newline ) -ws-newlines = newline *( ws / newline ) +ws-newline = *( wschar / newline ) +ws-newlines = newline *( wschar / newline ) array-open = %x5B ws-newline ; [ array-close = ws-newline %x5D ; ] array = array-open array-values array-close -array-values = [ ( val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) / - ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ) ] +array-values = [ ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ) / + ( val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) ] array-sep = ws %x2C ws ; , Comma @@ -199,11 +203,11 @@ inline-table-sep = ws %x2C ws ; , Comma inline-table = inline-table-open inline-table-keyvals inline-table-close inline-table-keyvals = [ inline-table-keyvals-non-empty ] -inline-table-keyvals-non-empty = ( key keyval-sep val ) / - ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) +inline-table-keyvals-non-empty = ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / + ( key keyval-sep val ) ;; Built-in ABNF terms, reproduced here for clarity -; ALPHA = %x41-5A / %x61-7A ; A-Z / a-z -; DIGIT = %x30-39 ; 0-9 -; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +DIGIT = %x30-39 ; 0-9 +HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" From 514037d64f25c26a27a12036edfc362cd814d449 Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Wed, 4 Jan 2017 14:51:05 -0800 Subject: [PATCH 21/22] Reorder ABNF rules for better clarity. --- toml.abnf | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/toml.abnf b/toml.abnf index 7f22d459..6c0e76fa 100644 --- a/toml.abnf +++ b/toml.abnf @@ -15,6 +15,7 @@ ;; TOML toml = expression *( newline expression ) + expression = ( ( ws comment ) / ( ws keyval ws [ comment ] ) / ( ws table ws [ comment ] ) / @@ -29,11 +30,11 @@ newlines = 1*newline ;; Whitespace +ws = *wschar + wschar = ( %x20 / ; Space %x09 ) ; Horizontal tab -ws = *wschar - ;; Comment comment-start-symbol = %x23 ; # @@ -42,13 +43,14 @@ comment = comment-start-symbol *non-eol ;; Key-Value pairs -keyval-sep = ws %x3D ws ; = keyval = key keyval-sep val key = unquoted-key / quoted-key unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ quoted-key = quotation-mark 1*basic-char quotation-mark ; See Basic Strings +keyval-sep = ws %x3D ws ; = + val = string / boolean / array / inline-table / date-time / float / integer ;; Table @@ -57,34 +59,38 @@ table = std-table / array-table ;; Standard Table +std-table = std-table-open key *( table-key-sep key) std-table-close + std-table-open = %x5B ws ; [ Left square bracket std-table-close = ws %x5D ; ] Right square bracket table-key-sep = ws %x2E ws ; . Period -std-table = std-table-open key *( table-key-sep key) std-table-close - ;; Array Table +array-table = array-table-open key *( table-key-sep key) array-table-close + array-table-open = %x5B.5B ws ; [[ Double left square bracket array-table-close = ws %x5D.5D ; ]] Double right quare bracket -array-table = array-table-open key *( table-key-sep key) array-table-close - ;; Integer integer = [ minus / plus ] int + minus = %x2D ; - plus = %x2B ; + + +int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) digit1-9 = %x31-39 ; 1-9 underscore = %x5F ; _ -int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) ;; Float float = integer ( frac / ( frac exp ) / exp ) -zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) + frac = decimal-point zero-prefixable-int decimal-point = %x2E ; . +zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) + exp = e integer e = %x65 / %x45 ; e E @@ -99,6 +105,7 @@ basic-string = quotation-mark *basic-char quotation-mark quotation-mark = %x22 ; " basic-char = basic-unescaped / escaped +basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF escaped = escape ( %x22 / ; " quotation mark U+0022 %x5C / ; \ reverse solidus U+005C %x2F / ; / solidus U+002F @@ -110,16 +117,15 @@ escaped = escape ( %x22 / ; " quotation mark U+0022 %x75 4HEXDIG / ; uXXXX U+XXXX %x55 8HEXDIG ) ; UXXXXXXXX U+XXXXXXXX -basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF - escape = %x5C ; \ ;; Multiline Basic String -ml-basic-string-delim = 3quotation-mark ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim -ml-basic-body = *( ml-basic-char / newline / ( escape newline )) +ml-basic-string-delim = 3quotation-mark + +ml-basic-body = *( ml-basic-char / newline / ( escape newline )) ml-basic-char = ml-basic-unescaped / escaped ml-basic-unescaped = %x20-5B / %x5D-10FFFF @@ -133,20 +139,24 @@ literal-char = %x09 / %x20-26 / %x28-10FFFF ;; Multiline Literal String -ml-literal-string-delim = 3apostrophe ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim +ml-literal-string-delim = 3apostrophe + ml-literal-body = *( ml-literal-char / newline ) ml-literal-char = %x09 / %x20-10FFFF ;; Boolean boolean = true / false + true = %x74.72.75.65 ; true false = %x66.61.6C.73.65 ; false ;; Date and Time (as defined in RFC 3339) +date-time = offset-date-time / local-date-time / local-date / local-time + date-fullyear = 4DIGIT date-month = 2DIGIT ; 01-12 date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year @@ -161,8 +171,6 @@ partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] full-date = date-fullyear "-" date-month "-" date-mday full-time = partial-time time-offset -date-time = offset-date-time / local-date-time / local-date / local-time - ;; Offset Date-Time offset-date-time = full-date "T" full-time @@ -181,27 +189,27 @@ local-time = partial-time ;; Array -ws-newline = *( wschar / newline ) -ws-newlines = newline *( wschar / newline ) +array = array-open array-values array-close array-open = %x5B ws-newline ; [ array-close = ws-newline %x5D ; ] -array = array-open array-values array-close - array-values = [ ( val array-sep [ ( ws-newline comment ws-newlines) / ws-newlines ] array-values ) / ( val [ array-sep ] [ ( ws-newline comment ws-newlines ) ] ) ] array-sep = ws %x2C ws ; , Comma +ws-newline = *( wschar / newline ) +ws-newlines = newline *( wschar / newline ) + ;; Inline Table +inline-table = inline-table-open inline-table-keyvals inline-table-close + inline-table-open = %x7B ws ; { inline-table-close = ws %x7D ; } inline-table-sep = ws %x2C ws ; , Comma -inline-table = inline-table-open inline-table-keyvals inline-table-close - inline-table-keyvals = [ inline-table-keyvals-non-empty ] inline-table-keyvals-non-empty = ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / ( key keyval-sep val ) From ecb8274a198a32a5091ca49b001f939e5e46584c Mon Sep 17 00:00:00 2001 From: Tom Preston-Werner Date: Wed, 4 Jan 2017 14:52:50 -0800 Subject: [PATCH 22/22] Delete unused rule. --- toml.abnf | 2 -- 1 file changed, 2 deletions(-) diff --git a/toml.abnf b/toml.abnf index 6c0e76fa..54ef4e3f 100644 --- a/toml.abnf +++ b/toml.abnf @@ -26,8 +26,6 @@ expression = ( ( ws comment ) / newline = ( %x0A / ; LF %x0D.0A ) ; CRLF -newlines = 1*newline - ;; Whitespace ws = *wschar