From 2cfa7873fd1bd9978cad12559d6e3fd41ccafa40 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Sep 2022 08:56:23 +0200 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20NEW:=20Add=20`attrs=5Fplugin`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add attributes to self-closing inline elements: `![alt](https://image.com){#id .a b=c}` --- docs/index.md | 6 + mdit_py_plugins/attrs/__init__.py | 1 + mdit_py_plugins/attrs/index.py | 50 ++++++ mdit_py_plugins/attrs/parse.py | 265 ++++++++++++++++++++++++++++++ tests/fixtures/attrs.md | 46 ++++++ tests/test_attrs.py | 18 ++ 6 files changed, 386 insertions(+) create mode 100644 mdit_py_plugins/attrs/__init__.py create mode 100644 mdit_py_plugins/attrs/index.py create mode 100644 mdit_py_plugins/attrs/parse.py create mode 100644 tests/fixtures/attrs.md create mode 100644 tests/test_attrs.py diff --git a/docs/index.md b/docs/index.md index c4f2217..7e5f992 100644 --- a/docs/index.md +++ b/docs/index.md @@ -85,6 +85,12 @@ html_string = md.render("some *Markdown*") .. autofunction:: mdit_py_plugins.container.container_plugin ``` +## Attributes + +```{eval-rst} +.. autofunction:: mdit_py_plugins.attrs.attrs_plugin +``` + ## Math ```{eval-rst} diff --git a/mdit_py_plugins/attrs/__init__.py b/mdit_py_plugins/attrs/__init__.py new file mode 100644 index 0000000..9359cf8 --- /dev/null +++ b/mdit_py_plugins/attrs/__init__.py @@ -0,0 +1 @@ +from .index import attrs_plugin # noqa: F401 diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py new file mode 100644 index 0000000..bc3feda --- /dev/null +++ b/mdit_py_plugins/attrs/index.py @@ -0,0 +1,50 @@ +from markdown_it import MarkdownIt +from markdown_it.rules_inline import StateInline + +from .parse import ParseError, parse + + +def attrs_plugin(md: MarkdownIt, *, after=("image", "code_inline")): + """Parse inline attributes that immediately follow certain inline elements:: + + ![alt](https://image.com){#id .a b=c} + + Inside the curly braces, the following syntax is possible: + + - `.foo` specifies foo as a class. + Multiple classes may be given in this way; they will be combined. + - `#foo` specifies foo as an identifier. + An element may have only one identifier; + if multiple identifiers are given, the last one is used. + - `key="value"` or `key=value` specifies a key-value attribute. + Quotes are not needed when the value consists entirely of + ASCII alphanumeric characters or `_` or `:` or `-`. + Backslash escapes may be used inside quoted values. + - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`). + + **Note:** This plugin is currently limited to "self-closing" elements, + such as images and code spans. It does not work with links or emphasis. + + :param md: The MarkdownIt instance to modify. + :param after: The names of inline elements after which attributes may be specified. + """ + + def attr_rule(state: StateInline, silent: bool): + if state.pending or not state.tokens: + return False + token = state.tokens[-1] + if token.type not in after: + return False + try: + new_pos, attrs = parse(state.src[state.pos :]) + except ParseError: + return False + state.pos += new_pos + 1 + if not silent: + if "class" in attrs and "class" in token.attrs: + attrs["class"] = f"{token.attrs['class']} {attrs['class']}" + token.attrs.update(attrs) + + return True + + md.inline.ruler.push("attr", attr_rule) diff --git a/mdit_py_plugins/attrs/parse.py b/mdit_py_plugins/attrs/parse.py new file mode 100644 index 0000000..5bf7f84 --- /dev/null +++ b/mdit_py_plugins/attrs/parse.py @@ -0,0 +1,265 @@ +"""Parser for attributes:: + + attributes { id = "foo", class = "bar baz", + key1 = "val1", key2 = "val2" } + +Adapted from: +https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1 + +syntax: + +attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}' +attribute <- identifier | class | keyval +identifier <- '#' name +class <- '.' name +name <- (nonspace, nonpunctuation other than ':', '_', '-')+ +keyval <- key '=' val +key <- (ASCII_ALPHANUM | ':' | '_' | '-')+ +val <- bareval | quotedval +bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+ +quotedval <- '"' ([^"] | '\"') '"' +""" +from __future__ import annotations + +import re +from enum import Enum +from typing import Callable + + +class State(Enum): + START = 0 + SCANNING = 1 + SCANNING_ID = 2 + SCANNING_CLASS = 3 + SCANNING_KEY = 4 + SCANNING_VALUE = 5 + SCANNING_BARE_VALUE = 6 + SCANNING_QUOTED_VALUE = 7 + SCANNING_COMMENT = 8 + SCANNING_ESCAPED = 9 + DONE = 10 + + +REGEX_SPACE = re.compile(r"\s") +REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]") +REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]") + + +class TokenState: + def __init__(self): + self._tokens = [] + self.start: int = 0 + + def set_start(self, start: int) -> None: + self.start = start + + def append(self, start: int, end: int, ttype: str): + self._tokens.append((start, end, ttype)) + + def compile(self, string: str) -> dict[str, str]: + """compile the tokens into a dictionary""" + attributes = {} + classes = [] + idx = 0 + while idx < len(self._tokens): + start, end, ttype = self._tokens[idx] + if ttype == "id": + attributes["id"] = string[start:end] + elif ttype == "class": + classes.append(string[start:end]) + elif ttype == "key": + key = string[start:end] + if idx + 1 < len(self._tokens): + start, end, ttype = self._tokens[idx + 1] + if ttype == "value": + if key == "class": + classes.append(string[start:end]) + else: + attributes[key] = string[start:end] + idx += 1 + idx += 1 + if classes: + attributes["class"] = " ".join(classes) + return attributes + + def __str__(self) -> str: + return str(self._tokens) + + def __repr__(self) -> str: + return repr(self._tokens) + + +class ParseError(Exception): + def __init__(self, msg: str, pos: int) -> None: + self.pos = pos + super().__init__(msg + f" at position {pos}") + + +def parse(string: str) -> tuple[int, dict[str, str]]: + """Parse attributes from start of string. + + :returns: (length of parsed string, dict of attributes) + """ + pos = 0 + state: State = State.START + tokens = TokenState() + while pos < len(string): + state = HANDLERS[state](string[pos], pos, tokens) + if state == State.DONE: + return pos, tokens.compile(string) + pos = pos + 1 + + return pos, tokens.compile(string) + + +def handle_start(char: str, pos: int, tokens: TokenState) -> State: + + if char == "{": + return State.SCANNING + raise ParseError("Attributes must start with '{'", pos) + + +def handle_scanning(char: str, pos: int, tokens: TokenState) -> State: + + if char == " " or char == "\t" or char == "\n" or char == "\r": + return State.SCANNING + if char == "}": + return State.DONE + if char == "#": + tokens.set_start(pos) + return State.SCANNING_ID + if char == "%": + tokens.set_start(pos) + return State.SCANNING_COMMENT + if char == ".": + tokens.set_start(pos) + return State.SCANNING_CLASS + if REGEX_KEY_CHARACTERS.fullmatch(char): + tokens.set_start(pos) + return State.SCANNING_KEY + + raise ParseError(f"Unexpected character whilst scanning: {char}", pos) + + +def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State: + + if char == "%": + return State.SCANNING + + return State.SCANNING_COMMENT + + +def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State: + + if not REGEX_SPACE_PUNCTUATION.fullmatch(char): + return State.SCANNING_ID + + if char == "}": + if (pos - 1) > tokens.start: + tokens.append(tokens.start + 1, pos, "id") + return State.DONE + + if REGEX_SPACE.fullmatch(char): + if (pos - 1) > tokens.start: + tokens.append(tokens.start + 1, pos, "id") + return State.SCANNING + + raise ParseError(f"Unexpected character whilst scanning id: {char}", pos) + + +def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State: + + if not REGEX_SPACE_PUNCTUATION.fullmatch(char): + return State.SCANNING_CLASS + + if char == "}": + if (pos - 1) > tokens.start: + tokens.append(tokens.start + 1, pos, "class") + return State.DONE + + if REGEX_SPACE.fullmatch(char): + if (pos - 1) > tokens.start: + tokens.append(tokens.start + 1, pos, "class") + return State.SCANNING + + raise ParseError(f"Unexpected character whilst scanning class: {char}", pos) + + +def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State: + + if char == "=": + tokens.append(tokens.start, pos, "key") + return State.SCANNING_VALUE + + if REGEX_KEY_CHARACTERS.fullmatch(char): + return State.SCANNING_KEY + + raise ParseError(f"Unexpected character whilst scanning key: {char}", pos) + + +def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State: + + if char == '"': + tokens.set_start(pos) + return State.SCANNING_QUOTED_VALUE + + if REGEX_KEY_CHARACTERS.fullmatch(char): + tokens.set_start(pos) + return State.SCANNING_BARE_VALUE + + raise ParseError(f"Unexpected character whilst scanning value: {char}", pos) + + +def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State: + + if REGEX_KEY_CHARACTERS.fullmatch(char): + return State.SCANNING_BARE_VALUE + + if char == "}": + tokens.append(tokens.start, pos, "value") + return State.DONE + + if REGEX_SPACE.fullmatch(char): + tokens.append(tokens.start, pos, "value") + return State.SCANNING + + raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos) + + +def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State: + return State.SCANNING_QUOTED_VALUE + + +def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State: + + if char == '"': + tokens.append(tokens.start + 1, pos, "value") + return State.SCANNING + + if char == "\\": + return State.SCANNING_ESCAPED + + if char == "{" or char == "}": + raise ParseError( + f"Unexpected character whilst scanning quoted value: {char}", pos + ) + + if char == "\n": + tokens.append(tokens.start + 1, pos, "value") + return State.SCANNING_QUOTED_VALUE + + return State.SCANNING_QUOTED_VALUE + + +HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = { + State.START: handle_start, + State.SCANNING: handle_scanning, + State.SCANNING_COMMENT: handle_scanning_comment, + State.SCANNING_ID: handle_scanning_id, + State.SCANNING_CLASS: handle_scanning_class, + State.SCANNING_KEY: handle_scanning_key, + State.SCANNING_VALUE: handle_scanning_value, + State.SCANNING_BARE_VALUE: handle_scanning_bare_value, + State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value, + State.SCANNING_ESCAPED: handle_scanning_escaped, +} diff --git a/tests/fixtures/attrs.md b/tests/fixtures/attrs.md new file mode 100644 index 0000000..5910f00 --- /dev/null +++ b/tests/fixtures/attrs.md @@ -0,0 +1,46 @@ +simple image +. +![a](b){#id .a b=c} +. +

a

+. + +simple inline code +. +`a`{#id .a b=c} +. +

a

+. + +ignore if space +. +![a](b) {#id key="*"} +. +

a {#id key="*"}

+. + +ignore if text +. +![a](b)b{#id key="*"} +. +

ab{#id key="*"}

+. + +multi-line +. +![a](b){ + #id .a + b=c + } +more +. +

a +more

+. + +combined +. +![a](b){#a .a}{.b class=x other=h}{#x class="x g" other=a} +. +

a

+. diff --git a/tests/test_attrs.py b/tests/test_attrs.py new file mode 100644 index 0000000..f58fe69 --- /dev/null +++ b/tests/test_attrs.py @@ -0,0 +1,18 @@ +from pathlib import Path + +import pytest +from markdown_it import MarkdownIt +from markdown_it.utils import read_fixture_file + +from mdit_py_plugins.attrs import attrs_plugin + +FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "attrs.md") + + +@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH)) +def test_fixture(line, title, input, expected): + md = MarkdownIt("commonmark").use(attrs_plugin) + md.options["xhtmlOut"] = False + text = md.render(input) + print(text) + assert text.rstrip() == expected.rstrip() From d564b6313da8ec81438004b7273ebf8d4611aafa Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Sep 2022 09:18:41 +0200 Subject: [PATCH 2/3] update --- mdit_py_plugins/attrs/parse.py | 2 +- tests/test_attrs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mdit_py_plugins/attrs/parse.py b/mdit_py_plugins/attrs/parse.py index 5bf7f84..4a30353 100644 --- a/mdit_py_plugins/attrs/parse.py +++ b/mdit_py_plugins/attrs/parse.py @@ -21,8 +21,8 @@ class <- '.' name """ from __future__ import annotations -import re from enum import Enum +import re from typing import Callable diff --git a/tests/test_attrs.py b/tests/test_attrs.py index f58fe69..729162c 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -1,8 +1,8 @@ from pathlib import Path -import pytest from markdown_it import MarkdownIt from markdown_it.utils import read_fixture_file +import pytest from mdit_py_plugins.attrs import attrs_plugin From 8dbb8010ffa7caab2a021cd5f79ef97aa0573223 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 27 Sep 2022 09:20:17 +0200 Subject: [PATCH 3/3] Update index.md --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 7e5f992..8b38d31 100644 --- a/docs/index.md +++ b/docs/index.md @@ -85,7 +85,7 @@ html_string = md.render("some *Markdown*") .. autofunction:: mdit_py_plugins.container.container_plugin ``` -## Attributes +## Inline Attributes ```{eval-rst} .. autofunction:: mdit_py_plugins.attrs.attrs_plugin