5353# # + field lists
5454# # + option lists
5555# # + indented literal blocks
56+ # # + quoted literal blocks
57+ # # + line blocks
5658# # + simple tables
5759# # + directives (see official documentation in `RST directives list`_):
5860# # - ``image``, ``figure`` for including images and videos
121123# # * Markdown code blocks
122124# # * Markdown links
123125# # * Markdown headlines
126+ # # * Markdown block quotes
124127# # * using ``1`` as auto-enumerator in enumerated lists like RST ``#``
125128# # (auto-enumerator ``1`` can not be used with ``#`` in the same list)
126129# #
145148# # 2) Compatibility mode which is RST rules.
146149# #
147150# # .. Note:: in both modes the parser interpretes text between single
148- # # backticks (code) identically:
151+ # # backticks (code) identically:
149152# # backslash does not escape; the only exception: ``\`` folowed by `
150153# # does escape so that we can always input a single backtick ` in
151154# # inline code. However that makes impossible to input code with
156159# # ``\`` -- GOOD
157160# # So single backticks can always be input: `\`` will turn to ` code
158161# #
162+ # # .. Attention::
163+ # # We don't support some obviously poor design choices of Markdown (or RST).
164+ # #
165+ # # - no support for the rule of 2 spaces causing a line break in Markdown
166+ # # (use RST "line blocks" syntax for making line breaks)
167+ # #
168+ # # - interpretation of Markdown block quotes is also slightly different,
169+ # # e.g. case
170+ # #
171+ # # ::
172+ # #
173+ # # >>> foo
174+ # # > bar
175+ # # >>baz
176+ # #
177+ # # is a single 3rd-level quote `foo bar baz` in original Markdown, while
178+ # # in Nim we naturally see it as 3rd-level quote `foo` + 1st level `bar` +
179+ # # 2nd level `baz`:
180+ # #
181+ # # >>> foo
182+ # # > bar
183+ # # >>baz
184+ # #
159185# # Limitations
160186# # -----------
161187# #
162188# # * no Unicode support in character width calculations
163189# # * body elements
164190# # - no roman numerals in enumerated lists
165- # # - no quoted literal blocks
166191# # - no doctest blocks
167192# # - no grid tables
168193# # - some directives are missing (check official `RST directives list`_):
@@ -472,6 +497,10 @@ type
472497 line: int # the last line of this style occurrence
473498 # (for error message)
474499 hasPeers: bool # has headings on the same level of hierarchy?
500+ LiteralBlockKind = enum # RST-style literal blocks after `::`
501+ lbNone,
502+ lbIndentedLiteralBlock,
503+ lbQuotedLiteralBlock
475504 LevelMap = seq [LevelInfo ] # Saves for each possible title adornment
476505 # style its level in the current document.
477506 SubstitutionKind = enum
@@ -1953,6 +1982,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode =
19531982 inc p.idx
19541983 result .add (n)
19551984
1985+ proc parseQuotedLiteralBlock (p: var RstParser ): PRstNode =
1986+ result = newRstNodeA (p, rnLiteralBlock)
1987+ var n = newLeaf (" " )
1988+ if currentTok (p).kind == tkIndent:
1989+ var indent = currInd (p)
1990+ while currentTok (p).kind == tkIndent: inc p.idx # skip blank lines
1991+ var quoteSym = currentTok (p).symbol[0 ]
1992+ while true :
1993+ case currentTok (p).kind
1994+ of tkEof:
1995+ break
1996+ of tkIndent:
1997+ if currentTok (p).ival < indent:
1998+ break
1999+ elif currentTok (p).ival == indent:
2000+ if nextTok (p).kind == tkPunct and nextTok (p).symbol[0 ] == quoteSym:
2001+ n.text.add (" \n " )
2002+ inc p.idx
2003+ elif nextTok (p).kind == tkIndent:
2004+ break
2005+ else :
2006+ rstMessage (p, mwRstStyle, " no newline after quoted literal block" )
2007+ break
2008+ else :
2009+ rstMessage (p, mwRstStyle,
2010+ " unexpected indentation in quoted literal block" )
2011+ break
2012+ else :
2013+ n.text.add (currentTok (p).symbol)
2014+ inc p.idx
2015+ result .add (n)
2016+
2017+ proc parseRstLiteralBlock (p: var RstParser , kind: LiteralBlockKind ): PRstNode =
2018+ if kind == lbIndentedLiteralBlock:
2019+ result = parseLiteralBlock (p)
2020+ else :
2021+ result = parseQuotedLiteralBlock (p)
2022+
19562023proc getLevel (p: var RstParser , c: char , hasOverline: bool ): int =
19572024 # # Returns (preliminary) heading level corresponding to `c` and
19582025 # # `hasOverline`. If level does not exist, add it first.
@@ -2023,6 +2090,33 @@ proc isLineBlock(p: RstParser): bool =
20232090 p.tok[j].col > currentTok (p).col or
20242091 p.tok[j].symbol == " \n "
20252092
2093+ proc isMarkdownBlockQuote (p: RstParser ): bool =
2094+ result = currentTok (p).symbol[0 ] == '>'
2095+
2096+ proc whichRstLiteralBlock (p: RstParser ): LiteralBlockKind =
2097+ # # Checks that the following tokens are either Indented Literal Block or
2098+ # # Quoted Literal Block (which is not quite the same as Markdown quote block).
2099+ # # https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
2100+ if currentTok (p).symbol == " ::" and nextTok (p).kind == tkIndent:
2101+ if currInd (p) > nextTok (p).ival:
2102+ result = lbNone
2103+ if currInd (p) < nextTok (p).ival:
2104+ result = lbIndentedLiteralBlock
2105+ elif currInd (p) == nextTok (p).ival:
2106+ var i = p.idx + 1
2107+ while p.tok[i].kind == tkIndent: inc i
2108+ const validQuotingCharacters = {
2109+ '!' , '"' , '#' , '$' , '%' , '&' , '\' ' , '(' , ')' , '*' , '+' , ',' , '-' ,
2110+ '.' , '/' , ':' , ';' , '<' , '=' , '>' , '?' , '@' , '[' , '\\ ' , ']' , '^' ,
2111+ '_' , '`' , '{' , '|' , '}' , '~' }
2112+ if p.tok[i].kind in {tkPunct, tkAdornment} and
2113+ p.tok[i].symbol[0 ] in validQuotingCharacters:
2114+ result = lbQuotedLiteralBlock
2115+ else :
2116+ result = lbNone
2117+ else :
2118+ result = lbNone
2119+
20262120proc predNL (p: RstParser ): bool =
20272121 result = true
20282122 if p.idx > 0 :
@@ -2078,6 +2172,8 @@ proc whichSection(p: RstParser): RstNodeKind =
20782172 elif match (p, p.idx + 1 , " a" ): result = rnTable
20792173 elif currentTok (p).symbol == " |" and isLineBlock (p):
20802174 result = rnLineBlock
2175+ elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote (p):
2176+ result = rnMarkdownBlockQuote
20812177 elif match (p, p.idx + 1 , " i" ) and isAdornmentHeadline (p, p.idx):
20822178 result = rnOverline
20832179 else :
@@ -2090,6 +2186,8 @@ proc whichSection(p: RstParser): RstNodeKind =
20902186 result = rnMarkdownTable
20912187 elif currentTok (p).symbol == " |" and isLineBlock (p):
20922188 result = rnLineBlock
2189+ elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote (p):
2190+ result = rnMarkdownBlockQuote
20932191 elif match (p, tokenAfterNewline (p), " aI" ) and
20942192 isAdornmentHeadline (p, tokenAfterNewline (p)):
20952193 result = rnHeadline
@@ -2143,6 +2241,102 @@ proc parseLineBlock(p: var RstParser): PRstNode =
21432241 else :
21442242 break
21452243
2244+ proc parseDoc (p: var RstParser ): PRstNode {.gcsafe .}
2245+
2246+ proc getQuoteSymbol (p: RstParser , idx: int ): tuple [sym: string , depth: int , tokens: int ] =
2247+ result = (" " , 0 , 0 )
2248+ var i = idx
2249+ result .sym &= p.tok[i].symbol
2250+ result .depth += p.tok[i].symbol.len
2251+ inc result .tokens
2252+ inc i
2253+ while p.tok[i].kind == tkWhite and i+ 1 < p.tok.len and
2254+ p.tok[i+ 1 ].kind == tkPunct and p.tok[i+ 1 ].symbol[0 ] == '>' :
2255+ result .sym &= p.tok[i].symbol
2256+ result .sym &= p.tok[i+ 1 ].symbol
2257+ result .depth += p.tok[i+ 1 ].symbol.len
2258+ inc result .tokens, 2
2259+ inc i, 2
2260+
2261+ proc parseMarkdownQuoteSegment (p: var RstParser , curSym: string , col: int ):
2262+ PRstNode =
2263+ # # We define *segment* as a group of lines that starts with exactly the
2264+ # # same quote symbol. If the following lines don't contain any `>` (*lazy*
2265+ # # continuation) they considered as continuation of the current segment.
2266+ var q: RstParser # to delete `>` at a start of line and then parse normally
2267+ initParser (q, p.s)
2268+ q.col = p.col
2269+ q.line = p.line
2270+ var minCol = int .high # minimum colum num in the segment
2271+ while true : # move tokens of segment from `p` to `q` skipping `curSym`
2272+ case currentTok (p).kind
2273+ of tkEof:
2274+ break
2275+ of tkIndent:
2276+ if nextTok (p).kind in {tkIndent, tkEof}:
2277+ break
2278+ else :
2279+ if nextTok (p).symbol[0 ] == '>' :
2280+ var (quoteSym, _, quoteTokens) = getQuoteSymbol (p, p.idx + 1 )
2281+ if quoteSym == curSym: # the segment continues
2282+ var iTok = tokenAfterNewline (p, p.idx+ 1 )
2283+ if p.tok[iTok].kind notin {tkEof, tkIndent} and
2284+ p.tok[iTok].symbol[0 ] != '>' :
2285+ rstMessage (p, mwRstStyle,
2286+ " two or more quoted lines are followed by unquoted line " &
2287+ $ (curLine (p) + 1 ))
2288+ break
2289+ q.tok.add currentTok (p)
2290+ var ival = currentTok (p).ival + quoteSym.len
2291+ inc p.idx, (1 + quoteTokens) # skip newline and > > >
2292+ if currentTok (p).kind == tkWhite:
2293+ ival += currentTok (p).symbol.len
2294+ inc p.idx
2295+ # fix up previous `tkIndent`s to ival (as if >>> were not there)
2296+ var j = q.tok.len - 1
2297+ while j >= 0 and q.tok[j].kind == tkIndent:
2298+ q.tok[j].ival = ival
2299+ dec j
2300+ else : # next segment started
2301+ break
2302+ elif currentTok (p).ival < col:
2303+ break
2304+ else : # the segment continues, a case like:
2305+ # > beginning
2306+ # continuation
2307+ q.tok.add currentTok (p)
2308+ inc p.idx
2309+ else :
2310+ if currentTok (p).col < minCol: minCol = currentTok (p).col
2311+ q.tok.add currentTok (p)
2312+ inc p.idx
2313+ q.indentStack = @ [minCol]
2314+ # if initial indentation `minCol` is > 0 then final newlines
2315+ # should be omitted so that parseDoc could advance to the end of tokens:
2316+ var j = q.tok.len - 1
2317+ while q.tok[j].kind == tkIndent: dec j
2318+ q.tok.setLen (j+ 1 )
2319+ q.tok.add Token (kind: tkEof, line: currentTok (p).line)
2320+ result = parseDoc (q)
2321+
2322+ proc parseMarkdownBlockQuote (p: var RstParser ): PRstNode =
2323+ var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol (p, p.idx)
2324+ let col = currentTok (p).col
2325+ result = newRstNodeA (p, rnMarkdownBlockQuote)
2326+ inc p.idx, quoteTokens # skip first >
2327+ while true :
2328+ var item = newRstNode (rnMarkdownBlockQuoteItem)
2329+ item.quotationDepth = quotationDepth
2330+ if currentTok (p).kind == tkWhite: inc p.idx
2331+ item.add parseMarkdownQuoteSegment (p, curSym, col)
2332+ result .add (item)
2333+ if currentTok (p).kind == tkIndent and currentTok (p).ival == col and
2334+ nextTok (p).kind != tkEof and nextTok (p).symbol[0 ] == '>' :
2335+ (curSym, quotationDepth, quoteTokens) = getQuoteSymbol (p, p.idx + 1 )
2336+ inc p.idx, (1 + quoteTokens) # skip newline and > > >
2337+ else :
2338+ break
2339+
21462340proc parseParagraph (p: var RstParser , result: PRstNode ) =
21472341 while true :
21482342 case currentTok (p).kind
@@ -2158,16 +2352,17 @@ proc parseParagraph(p: var RstParser, result: PRstNode) =
21582352 result .add newLeaf (" " )
21592353 of rnLineBlock:
21602354 result .addIfNotNil (parseLineBlock (p))
2355+ of rnMarkdownBlockQuote:
2356+ result .addIfNotNil (parseMarkdownBlockQuote (p))
21612357 else : break
21622358 else :
21632359 break
21642360 of tkPunct:
2165- if currentTok (p).symbol == " ::" and
2166- nextTok (p).kind == tkIndent and
2167- currInd (p) < nextTok (p).ival:
2361+ if (let literalBlockKind = whichRstLiteralBlock (p);
2362+ literalBlockKind != lbNone):
21682363 result .add newLeaf (" :" )
21692364 inc p.idx # skip '::'
2170- result .add (parseLiteralBlock (p ))
2365+ result .add (parseRstLiteralBlock (p, literalBlockKind ))
21712366 break
21722367 else :
21732368 parseInline (p, result )
@@ -2257,8 +2452,6 @@ proc getColumns(p: var RstParser, cols: var IntSeq) =
22572452 # last column has no limit:
22582453 cols[L - 1 ] = 32000
22592454
2260- proc parseDoc (p: var RstParser ): PRstNode {.gcsafe .}
2261-
22622455proc parseSimpleTable (p: var RstParser ): PRstNode =
22632456 var
22642457 cols: IntSeq
@@ -2585,6 +2778,7 @@ proc parseSection(p: var RstParser, result: PRstNode) =
25852778 a = parseLiteralBlock (p)
25862779 of rnBulletList: a = parseBulletList (p)
25872780 of rnLineBlock: a = parseLineBlock (p)
2781+ of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote (p)
25882782 of rnDirective: a = parseDotDot (p)
25892783 of rnEnumList: a = parseEnumList (p)
25902784 of rnLeaf: rstMessage (p, meNewSectionExpected, " (syntax error)" )
0 commit comments