From 5f0f9d16758fc03aa0e9c512ab415dd87e0a3e4e Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 3 Dec 2021 08:41:10 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20NEW:=20Add=20field=20list=20plugin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the restructured text syntax: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#field-lists --- docs/index.md | 6 + mdit_py_plugins/field_list/__init__.py | 208 +++++++++++++ tests/fixtures/field_list.md | 252 ++++++++++++++++ tests/test_field_list.py | 32 ++ tests/test_field_list/test_plugin_parse.yml | 310 ++++++++++++++++++++ 5 files changed, 808 insertions(+) create mode 100644 mdit_py_plugins/field_list/__init__.py create mode 100644 tests/fixtures/field_list.md create mode 100644 tests/test_field_list.py create mode 100644 tests/test_field_list/test_plugin_parse.yml diff --git a/docs/index.md b/docs/index.md index 34011f6..c4f2217 100644 --- a/docs/index.md +++ b/docs/index.md @@ -61,6 +61,12 @@ html_string = md.render("some *Markdown*") .. autofunction:: mdit_py_plugins.tasklists.tasklists_plugin ``` +## Field Lists + +```{eval-rst} +.. autofunction:: mdit_py_plugins.field_list.fieldlist_plugin +``` + ## Heading Anchors ```{eval-rst} diff --git a/mdit_py_plugins/field_list/__init__.py b/mdit_py_plugins/field_list/__init__.py new file mode 100644 index 0000000..9e21fb5 --- /dev/null +++ b/mdit_py_plugins/field_list/__init__.py @@ -0,0 +1,208 @@ +"""Field list plugin""" +from contextlib import contextmanager +from typing import Tuple + +from markdown_it import MarkdownIt +from markdown_it.rules_block import StateBlock + + +def fieldlist_plugin(md: MarkdownIt): + """Field lists are mappings from field names to field bodies, based on the + `reStructureText syntax + `_. + + .. code-block:: md + + :name *markup*: + :name1: body content + :name2: paragraph 1 + + paragraph 2 + :name3: + paragraph 1 + + paragraph 2 + + A field name may consist of any characters except colons (":"). + Inline markup is parsed in field names. + + The field name is followed by whitespace and the field body. + The field body may be empty or contain multiple body elements. + The field body is aligned either by the start of the body on the first line or, + if no body content is on the first line, by 2 spaces. + """ + md.block.ruler.before( + "paragraph", + "fieldlist", + _fieldlist_rule, + {"alt": ["paragraph", "reference", "blockquote"]}, + ) + + +def parseNameMarker(state: StateBlock, startLine: int) -> Tuple[int, str]: + """Parse field name: `:name:` + + :returns: position after name marker, name text + """ + start = state.bMarks[startLine] + state.tShift[startLine] + pos = start + maximum = state.eMarks[startLine] + + # marker should have at least 3 chars (colon + character + colon) + if pos + 2 >= maximum: + return -1, "" + + # first character should be ':' + if state.src[pos] != ":": + return -1, "" + + # scan name length + name_length = 1 + found_close = False + for ch in state.src[pos + 1 :]: + if ch == "\n": + break + if ch == ":": + # TODO backslash escapes + found_close = True + break + name_length += 1 + + if not found_close: + return -1, "" + + # get name + name_text = state.src[pos + 1 : pos + name_length] + + # name should contain at least one character + if not name_text.strip(): + return -1, "" + + return pos + name_length + 1, name_text + + +@contextmanager +def set_parent_type(state: StateBlock, name: str): + """Temporarily set parent type to `name`""" + oldParentType = state.parentType + state.parentType = name + yield + state.parentType = oldParentType + + +def _fieldlist_rule(state: StateBlock, startLine: int, endLine: int, silent: bool): + # adapted from markdown_it/rules_block/list.py::list_block + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + posAfterName, name_text = parseNameMarker(state, startLine) + if posAfterName < 0: + return False + + # For validation mode we can terminate immediately + if silent: + return True + + # start field list + token = state.push("field_list_open", "dl", 1) + token.attrSet("class", "field-list") + token.map = listLines = [startLine, 0] + + # iterate list items + nextLine = startLine + + with set_parent_type(state, "fieldlist"): + + while nextLine < endLine: + + # create name tokens + token = state.push("fieldlist_name_open", "dt", 1) + token.map = [startLine, startLine] + token = state.push("inline", "", 0) + token.map = [startLine, startLine] + token.content = name_text + token.children = [] + token = state.push("fieldlist_name_close", "dt", -1) + + # set indent positions + pos = posAfterName + maximum = state.eMarks[nextLine] + offset = ( + state.sCount[nextLine] + + posAfterName + - (state.bMarks[startLine] + state.tShift[startLine]) + ) + + # find indent to start of body on first line + while pos < maximum: + ch = state.srcCharCode[pos] + + if ch == 0x09: # \t + offset += 4 - (offset + state.bsCount[nextLine]) % 4 + elif ch == 0x20: # \s + offset += 1 + else: + break + + pos += 1 + + contentStart = pos + + # set indent for body text + if contentStart >= maximum: + # no body on first line, so use constant indentation + # TODO adapt to indentation of subsequent lines? + indent = 2 + else: + indent = offset + + # Run subparser on the field body + token = state.push("fieldlist_body_open", "dd", 1) + token.map = itemLines = [startLine, 0] + + # change current state, then restore it after parser subcall + oldTShift = state.tShift[startLine] + oldSCount = state.sCount[startLine] + oldBlkIndent = state.blkIndent + + state.tShift[startLine] = contentStart - state.bMarks[startLine] + state.sCount[startLine] = offset + state.blkIndent = indent + + state.md.block.tokenize(state, startLine, endLine) + + state.blkIndent = oldBlkIndent + state.tShift[startLine] = oldTShift + state.sCount[startLine] = oldSCount + + token = state.push("fieldlist_body_close", "dd", -1) + + nextLine = startLine = state.line + itemLines[1] = nextLine + + if nextLine >= endLine: + break + + contentStart = state.bMarks[startLine] + + # Try to check if list is terminated or continued. + if state.sCount[nextLine] < state.blkIndent: + break + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + break + + # get next field item + posAfterName, name_text = parseNameMarker(state, startLine) + if posAfterName < 0: + break + + # Finalize list + token = state.push("field_list_close", "dl", -1) + listLines[1] = nextLine + state.line = nextLine + + return True diff --git a/tests/fixtures/field_list.md b/tests/fixtures/field_list.md new file mode 100644 index 0000000..bf31ad9 --- /dev/null +++ b/tests/fixtures/field_list.md @@ -0,0 +1,252 @@ +Body alignment: +. +:no body: + +:single line: content +:paragraph: content + running onto new line +:body inline: paragraph 1 + + paragraph 2 + + paragraph 3 + +:body on 2nd line: + paragraph 1 + + paragraph 2 + +:body on 3rd line: + + paragraph 1 + + paragraph 2 +. +
+
no body
+
+
single line
+
+

content

+
+
paragraph
+
+

content +running onto new line

+
+
body inline
+
+

paragraph 1

+

paragraph 2

+

paragraph 3

+
+
body on 2nd line
+
+

paragraph 1

+

paragraph 2

+
+
body on 3rd line
+
+

paragraph 1

+

paragraph 2

+
+
+. + +Empty name: +. +:: +. +

::

+. + +Whitespace only name: +. +: : +. +

: :

+. + +Name markup: +. +:inline *markup*: +. +
+
inline markup
+
+
+. + +Content paragraph markup: +. +:name: body *markup* +. +
+
name
+
+

body markup

+
+
+. + +Body list: +. +:name: + - item 1 + - item 2 +:name: - item 1 + - item 2 +. +
+
name
+
+
    +
  • item 1
  • +
  • item 2
  • +
+
+
name
+
+
    +
  • item 1
  • +
  • item 2
  • +
+
+
+. + +Body code block +. +:name: + code +:name: body + + code +. +
+
name
+
+
code
+
+
+
name
+
+

body

+
code
+
+
+
+. + +Body blockquote: +. +:name: + > item 1 + > item 2 +:name: > item 1 + > item 2 +. +
+
name
+
+
+

item 1 +item 2

+
+
+
name
+
+
+

item 1 +item 2

+
+
+
+. + +Body fence: +. +:name: + ```python + code + ``` +. +
+
name
+
+
code
+
+
+
+. + +Following blocks: +. +:name: body +- item 1 +:name: body +> block quote +:name: body +```python +code +``` +. +
+
name
+
+

body

+
+
+ +
+
name
+
+

body

+
+
+
+

block quote

+
+
+
name
+
+

body

+
+
+
code
+
+. + +In list: +. +- :name: body +- item 2 +. + +. + +In blockquote: +. +> :name: body +. +
+
+
name
+
+

body

+
+
+
+. diff --git a/tests/test_field_list.py b/tests/test_field_list.py new file mode 100644 index 0000000..568a52a --- /dev/null +++ b/tests/test_field_list.py @@ -0,0 +1,32 @@ +from pathlib import Path +from textwrap import dedent + +import pytest +from markdown_it import MarkdownIt +from markdown_it.utils import read_fixture_file + +from mdit_py_plugins.field_list import fieldlist_plugin + +FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "field_list.md") + + +def test_plugin_parse(data_regression): + md = MarkdownIt().use(fieldlist_plugin) + tokens = md.parse( + dedent( + """\ + :abc: Content + :def: Content + """ + ) + ) + data_regression.check([t.as_dict() for t in tokens]) + + +@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH)) +def test_all(line, title, input, expected): + md = MarkdownIt("commonmark").use(fieldlist_plugin) + md.options["xhtmlOut"] = False + text = md.render(input) + print(text) + assert text.rstrip() == expected.rstrip() diff --git a/tests/test_field_list/test_plugin_parse.yml b/tests/test_field_list/test_plugin_parse.yml new file mode 100644 index 0000000..c188c03 --- /dev/null +++ b/tests/test_field_list/test_plugin_parse.yml @@ -0,0 +1,310 @@ +- attrs: + - - class + - field-list + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: + - 0 + - 2 + markup: '' + meta: {} + nesting: 1 + tag: dl + type: field_list_open +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: + - 0 + - 0 + markup: '' + meta: {} + nesting: 1 + tag: dt + type: fieldlist_name_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: abc + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: abc + hidden: false + info: '' + level: 2 + map: + - 0 + - 0 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: null + markup: '' + meta: {} + nesting: -1 + tag: dt + type: fieldlist_name_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: + - 0 + - 1 + markup: '' + meta: {} + nesting: 1 + tag: dd + type: fieldlist_body_open +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 2 + map: + - 0 + - 1 + markup: '' + meta: {} + nesting: 1 + tag: p + type: paragraph_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: Content + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: Content + hidden: false + info: '' + level: 3 + map: + - 0 + - 1 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 2 + map: null + markup: '' + meta: {} + nesting: -1 + tag: p + type: paragraph_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: null + markup: '' + meta: {} + nesting: -1 + tag: dd + type: fieldlist_body_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: + - 1 + - 1 + markup: '' + meta: {} + nesting: 1 + tag: dt + type: fieldlist_name_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: def + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: def + hidden: false + info: '' + level: 2 + map: + - 1 + - 1 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: null + markup: '' + meta: {} + nesting: -1 + tag: dt + type: fieldlist_name_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: + - 1 + - 2 + markup: '' + meta: {} + nesting: 1 + tag: dd + type: fieldlist_body_open +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 2 + map: + - 1 + - 2 + markup: '' + meta: {} + nesting: 1 + tag: p + type: paragraph_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: Content + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: Content + hidden: false + info: '' + level: 3 + map: + - 1 + - 2 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 2 + map: null + markup: '' + meta: {} + nesting: -1 + tag: p + type: paragraph_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 1 + map: null + markup: '' + meta: {} + nesting: -1 + tag: dd + type: fieldlist_body_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: -1 + tag: dl + type: field_list_close