Skip to content

Commit 040d23e

Browse files
authored
implement RST & Markdown quote blocks (#19147)
* implement RST & Markdown quote blocks * compile with nim 1.0 * Fix indentation
1 parent 7772ca3 commit 040d23e

File tree

7 files changed

+617
-12
lines changed

7 files changed

+617
-12
lines changed

config/nimdoc.tex.cfg

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,17 @@ doc.file = """
139139
\usepackage[most]{tcolorbox} % boxes around admonitions, code blocks, doc.item
140140

141141
\newtcolorbox{rstadmonition}[1][]{blanker, breakable,
142-
left=3mm, right=3mm, top=1mm, bottom=1mm,
142+
left=3mm, right=0mm, top=1mm, bottom=1mm,
143143
before upper=\indent, parbox=false, #1}
144144

145+
\newtcolorbox{rstquote}[1][]{blanker, breakable,
146+
left=3mm, right=3mm, top=1mm, bottom=1mm,
147+
parbox=false,
148+
borderline west={0.3em}{0pt}{lightgray},
149+
borderline north={0.05em}{0pt}{lightgray},
150+
borderline east={0.05em}{0pt}{lightgray},
151+
borderline south={0.05em}{0pt}{lightgray}}
152+
145153
\definecolor{rstframecolor}{rgb}{0.85, 0.8, 0.6}
146154

147155
\newtcolorbox{rstprebox}[1][]{blanker, breakable,

doc/nimdoc.css

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,11 @@ blockquote {
567567
border-left: 5px solid #bbc;
568568
}
569569

570+
blockquote.markdown-quote {
571+
font-size: 0.9rem; /* use rem to avoid recursion */
572+
font-style: normal;
573+
}
574+
570575
.pre, span.tok {
571576
font-family: "Source Code Pro", Monaco, Menlo, Consolas, "Courier New", monospace;
572577
font-weight: 500;

lib/packages/docutils/rst.nim

Lines changed: 202 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
## + field lists
5454
## + option lists
5555
## + indented literal blocks
56+
## + quoted literal blocks
57+
## + line blocks
5658
## + simple tables
5759
## + directives (see official documentation in `RST directives list`_):
5860
## - ``image``, ``figure`` for including images and videos
@@ -121,6 +123,7 @@
121123
## * Markdown code blocks
122124
## * Markdown links
123125
## * Markdown headlines
126+
## * Markdown block quotes
124127
## * using ``1`` as auto-enumerator in enumerated lists like RST ``#``
125128
## (auto-enumerator ``1`` can not be used with ``#`` in the same list)
126129
##
@@ -145,7 +148,7 @@
145148
## 2) Compatibility mode which is RST rules.
146149
##
147150
## .. Note:: in both modes the parser interpretes text between single
148-
## backticks (code) identically:
151+
## backticks (code) identically:
149152
## backslash does not escape; the only exception: ``\`` folowed by `
150153
## does escape so that we can always input a single backtick ` in
151154
## inline code. However that makes impossible to input code with
@@ -156,13 +159,35 @@
156159
## ``\`` -- GOOD
157160
## So single backticks can always be input: `\`` will turn to ` code
158161
##
162+
## .. Attention::
163+
## We don't support some obviously poor design choices of Markdown (or RST).
164+
##
165+
## - no support for the rule of 2 spaces causing a line break in Markdown
166+
## (use RST "line blocks" syntax for making line breaks)
167+
##
168+
## - interpretation of Markdown block quotes is also slightly different,
169+
## e.g. case
170+
##
171+
## ::
172+
##
173+
## >>> foo
174+
## > bar
175+
## >>baz
176+
##
177+
## is a single 3rd-level quote `foo bar baz` in original Markdown, while
178+
## in Nim we naturally see it as 3rd-level quote `foo` + 1st level `bar` +
179+
## 2nd level `baz`:
180+
##
181+
## >>> foo
182+
## > bar
183+
## >>baz
184+
##
159185
## Limitations
160186
## -----------
161187
##
162188
## * no Unicode support in character width calculations
163189
## * body elements
164190
## - no roman numerals in enumerated lists
165-
## - no quoted literal blocks
166191
## - no doctest blocks
167192
## - no grid tables
168193
## - some directives are missing (check official `RST directives list`_):
@@ -472,6 +497,10 @@ type
472497
line: int # the last line of this style occurrence
473498
# (for error message)
474499
hasPeers: bool # has headings on the same level of hierarchy?
500+
LiteralBlockKind = enum # RST-style literal blocks after `::`
501+
lbNone,
502+
lbIndentedLiteralBlock,
503+
lbQuotedLiteralBlock
475504
LevelMap = seq[LevelInfo] # Saves for each possible title adornment
476505
# style its level in the current document.
477506
SubstitutionKind = enum
@@ -1953,6 +1982,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode =
19531982
inc p.idx
19541983
result.add(n)
19551984

1985+
proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
1986+
result = newRstNodeA(p, rnLiteralBlock)
1987+
var n = newLeaf("")
1988+
if currentTok(p).kind == tkIndent:
1989+
var indent = currInd(p)
1990+
while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
1991+
var quoteSym = currentTok(p).symbol[0]
1992+
while true:
1993+
case currentTok(p).kind
1994+
of tkEof:
1995+
break
1996+
of tkIndent:
1997+
if currentTok(p).ival < indent:
1998+
break
1999+
elif currentTok(p).ival == indent:
2000+
if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
2001+
n.text.add("\n")
2002+
inc p.idx
2003+
elif nextTok(p).kind == tkIndent:
2004+
break
2005+
else:
2006+
rstMessage(p, mwRstStyle, "no newline after quoted literal block")
2007+
break
2008+
else:
2009+
rstMessage(p, mwRstStyle,
2010+
"unexpected indentation in quoted literal block")
2011+
break
2012+
else:
2013+
n.text.add(currentTok(p).symbol)
2014+
inc p.idx
2015+
result.add(n)
2016+
2017+
proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
2018+
if kind == lbIndentedLiteralBlock:
2019+
result = parseLiteralBlock(p)
2020+
else:
2021+
result = parseQuotedLiteralBlock(p)
2022+
19562023
proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
19572024
## Returns (preliminary) heading level corresponding to `c` and
19582025
## `hasOverline`. If level does not exist, add it first.
@@ -2023,6 +2090,33 @@ proc isLineBlock(p: RstParser): bool =
20232090
p.tok[j].col > currentTok(p).col or
20242091
p.tok[j].symbol == "\n"
20252092

2093+
proc isMarkdownBlockQuote(p: RstParser): bool =
2094+
result = currentTok(p).symbol[0] == '>'
2095+
2096+
proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
2097+
## Checks that the following tokens are either Indented Literal Block or
2098+
## Quoted Literal Block (which is not quite the same as Markdown quote block).
2099+
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
2100+
if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
2101+
if currInd(p) > nextTok(p).ival:
2102+
result = lbNone
2103+
if currInd(p) < nextTok(p).ival:
2104+
result = lbIndentedLiteralBlock
2105+
elif currInd(p) == nextTok(p).ival:
2106+
var i = p.idx + 1
2107+
while p.tok[i].kind == tkIndent: inc i
2108+
const validQuotingCharacters = {
2109+
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
2110+
'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
2111+
'_', '`', '{', '|', '}', '~'}
2112+
if p.tok[i].kind in {tkPunct, tkAdornment} and
2113+
p.tok[i].symbol[0] in validQuotingCharacters:
2114+
result = lbQuotedLiteralBlock
2115+
else:
2116+
result = lbNone
2117+
else:
2118+
result = lbNone
2119+
20262120
proc predNL(p: RstParser): bool =
20272121
result = true
20282122
if p.idx > 0:
@@ -2078,6 +2172,8 @@ proc whichSection(p: RstParser): RstNodeKind =
20782172
elif match(p, p.idx + 1, " a"): result = rnTable
20792173
elif currentTok(p).symbol == "|" and isLineBlock(p):
20802174
result = rnLineBlock
2175+
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
2176+
result = rnMarkdownBlockQuote
20812177
elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
20822178
result = rnOverline
20832179
else:
@@ -2090,6 +2186,8 @@ proc whichSection(p: RstParser): RstNodeKind =
20902186
result = rnMarkdownTable
20912187
elif currentTok(p).symbol == "|" and isLineBlock(p):
20922188
result = rnLineBlock
2189+
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
2190+
result = rnMarkdownBlockQuote
20932191
elif match(p, tokenAfterNewline(p), "aI") and
20942192
isAdornmentHeadline(p, tokenAfterNewline(p)):
20952193
result = rnHeadline
@@ -2143,6 +2241,102 @@ proc parseLineBlock(p: var RstParser): PRstNode =
21432241
else:
21442242
break
21452243

2244+
proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
2245+
2246+
proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
2247+
result = ("", 0, 0)
2248+
var i = idx
2249+
result.sym &= p.tok[i].symbol
2250+
result.depth += p.tok[i].symbol.len
2251+
inc result.tokens
2252+
inc i
2253+
while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
2254+
p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
2255+
result.sym &= p.tok[i].symbol
2256+
result.sym &= p.tok[i+1].symbol
2257+
result.depth += p.tok[i+1].symbol.len
2258+
inc result.tokens, 2
2259+
inc i, 2
2260+
2261+
proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
2262+
PRstNode =
2263+
## We define *segment* as a group of lines that starts with exactly the
2264+
## same quote symbol. If the following lines don't contain any `>` (*lazy*
2265+
## continuation) they considered as continuation of the current segment.
2266+
var q: RstParser # to delete `>` at a start of line and then parse normally
2267+
initParser(q, p.s)
2268+
q.col = p.col
2269+
q.line = p.line
2270+
var minCol = int.high # minimum colum num in the segment
2271+
while true: # move tokens of segment from `p` to `q` skipping `curSym`
2272+
case currentTok(p).kind
2273+
of tkEof:
2274+
break
2275+
of tkIndent:
2276+
if nextTok(p).kind in {tkIndent, tkEof}:
2277+
break
2278+
else:
2279+
if nextTok(p).symbol[0] == '>':
2280+
var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
2281+
if quoteSym == curSym: # the segment continues
2282+
var iTok = tokenAfterNewline(p, p.idx+1)
2283+
if p.tok[iTok].kind notin {tkEof, tkIndent} and
2284+
p.tok[iTok].symbol[0] != '>':
2285+
rstMessage(p, mwRstStyle,
2286+
"two or more quoted lines are followed by unquoted line " &
2287+
$(curLine(p) + 1))
2288+
break
2289+
q.tok.add currentTok(p)
2290+
var ival = currentTok(p).ival + quoteSym.len
2291+
inc p.idx, (1 + quoteTokens) # skip newline and > > >
2292+
if currentTok(p).kind == tkWhite:
2293+
ival += currentTok(p).symbol.len
2294+
inc p.idx
2295+
# fix up previous `tkIndent`s to ival (as if >>> were not there)
2296+
var j = q.tok.len - 1
2297+
while j >= 0 and q.tok[j].kind == tkIndent:
2298+
q.tok[j].ival = ival
2299+
dec j
2300+
else: # next segment started
2301+
break
2302+
elif currentTok(p).ival < col:
2303+
break
2304+
else: # the segment continues, a case like:
2305+
# > beginning
2306+
# continuation
2307+
q.tok.add currentTok(p)
2308+
inc p.idx
2309+
else:
2310+
if currentTok(p).col < minCol: minCol = currentTok(p).col
2311+
q.tok.add currentTok(p)
2312+
inc p.idx
2313+
q.indentStack = @[minCol]
2314+
# if initial indentation `minCol` is > 0 then final newlines
2315+
# should be omitted so that parseDoc could advance to the end of tokens:
2316+
var j = q.tok.len - 1
2317+
while q.tok[j].kind == tkIndent: dec j
2318+
q.tok.setLen (j+1)
2319+
q.tok.add Token(kind: tkEof, line: currentTok(p).line)
2320+
result = parseDoc(q)
2321+
2322+
proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
2323+
var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
2324+
let col = currentTok(p).col
2325+
result = newRstNodeA(p, rnMarkdownBlockQuote)
2326+
inc p.idx, quoteTokens # skip first >
2327+
while true:
2328+
var item = newRstNode(rnMarkdownBlockQuoteItem)
2329+
item.quotationDepth = quotationDepth
2330+
if currentTok(p).kind == tkWhite: inc p.idx
2331+
item.add parseMarkdownQuoteSegment(p, curSym, col)
2332+
result.add(item)
2333+
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
2334+
nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
2335+
(curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
2336+
inc p.idx, (1 + quoteTokens) # skip newline and > > >
2337+
else:
2338+
break
2339+
21462340
proc parseParagraph(p: var RstParser, result: PRstNode) =
21472341
while true:
21482342
case currentTok(p).kind
@@ -2158,16 +2352,17 @@ proc parseParagraph(p: var RstParser, result: PRstNode) =
21582352
result.add newLeaf(" ")
21592353
of rnLineBlock:
21602354
result.addIfNotNil(parseLineBlock(p))
2355+
of rnMarkdownBlockQuote:
2356+
result.addIfNotNil(parseMarkdownBlockQuote(p))
21612357
else: break
21622358
else:
21632359
break
21642360
of tkPunct:
2165-
if currentTok(p).symbol == "::" and
2166-
nextTok(p).kind == tkIndent and
2167-
currInd(p) < nextTok(p).ival:
2361+
if (let literalBlockKind = whichRstLiteralBlock(p);
2362+
literalBlockKind != lbNone):
21682363
result.add newLeaf(":")
21692364
inc p.idx # skip '::'
2170-
result.add(parseLiteralBlock(p))
2365+
result.add(parseRstLiteralBlock(p, literalBlockKind))
21712366
break
21722367
else:
21732368
parseInline(p, result)
@@ -2257,8 +2452,6 @@ proc getColumns(p: var RstParser, cols: var IntSeq) =
22572452
# last column has no limit:
22582453
cols[L - 1] = 32000
22592454

2260-
proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
2261-
22622455
proc parseSimpleTable(p: var RstParser): PRstNode =
22632456
var
22642457
cols: IntSeq
@@ -2585,6 +2778,7 @@ proc parseSection(p: var RstParser, result: PRstNode) =
25852778
a = parseLiteralBlock(p)
25862779
of rnBulletList: a = parseBulletList(p)
25872780
of rnLineBlock: a = parseLineBlock(p)
2781+
of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
25882782
of rnDirective: a = parseDotDot(p)
25892783
of rnEnumList: a = parseEnumList(p)
25902784
of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")

lib/packages/docutils/rstast.nim

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ type
3232
rnFieldName, # consisting of a field name ...
3333
rnFieldBody, # ... and a field body
3434
rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString,
35-
rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock,
35+
rnOptionArgument, rnDescription, rnLiteralBlock,
36+
rnMarkdownBlockQuote, # a quote starting from punctuation like >>>
37+
rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with
38+
# the same number of chars
3639
rnLineBlock, # the | thingie
3740
rnLineBlockItem, # a son of rnLineBlock - one line inside it.
3841
# When `RstNode` lineIndent="\n" the line's empty
@@ -101,6 +104,8 @@ type
101104
of rnFootnote, rnCitation, rnOptionListItem:
102105
order*: int ## footnote order (for auto-symbol footnotes and
103106
## auto-numbered ones without a label)
107+
of rnMarkdownBlockQuoteItem:
108+
quotationDepth*: int ## number of characters in line prefix
104109
of rnRef, rnSubstitutionReferences,
105110
rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
106111
info*: TLineInfo ## To have line/column info for warnings at
@@ -409,6 +414,8 @@ proc treeRepr*(node: PRstNode, indent=0): string =
409414
result.add " level=" & $node.level
410415
of rnFootnote, rnCitation, rnOptionListItem:
411416
result.add (if node.order == 0: "" else: " order=" & $node.order)
417+
of rnMarkdownBlockQuoteItem:
418+
result.add " quotationDepth=" & $node.quotationDepth
412419
else:
413420
discard
414421
result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'")

0 commit comments

Comments
 (0)