Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ NEW: Add attrs_plugin #50

Merged
merged 4 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ html_string = md.render("some *Markdown*")
.. autofunction:: mdit_py_plugins.container.container_plugin
```

## Inline Attributes

```{eval-rst}
.. autofunction:: mdit_py_plugins.attrs.attrs_plugin
```

## Math

```{eval-rst}
Expand Down
1 change: 1 addition & 0 deletions mdit_py_plugins/attrs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .index import attrs_plugin # noqa: F401
50 changes: 50 additions & 0 deletions mdit_py_plugins/attrs/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from markdown_it import MarkdownIt
from markdown_it.rules_inline import StateInline

from .parse import ParseError, parse


def attrs_plugin(md: MarkdownIt, *, after=("image", "code_inline")):
"""Parse inline attributes that immediately follow certain inline elements::

![alt](https://image.com){#id .a b=c}

Inside the curly braces, the following syntax is possible:

- `.foo` specifies foo as a class.
Multiple classes may be given in this way; they will be combined.
- `#foo` specifies foo as an identifier.
An element may have only one identifier;
if multiple identifiers are given, the last one is used.
- `key="value"` or `key=value` specifies a key-value attribute.
Quotes are not needed when the value consists entirely of
ASCII alphanumeric characters or `_` or `:` or `-`.
Backslash escapes may be used inside quoted values.
- `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).

**Note:** This plugin is currently limited to "self-closing" elements,
such as images and code spans. It does not work with links or emphasis.

:param md: The MarkdownIt instance to modify.
:param after: The names of inline elements after which attributes may be specified.
"""

def attr_rule(state: StateInline, silent: bool):
if state.pending or not state.tokens:
return False
token = state.tokens[-1]
if token.type not in after:
return False
try:
new_pos, attrs = parse(state.src[state.pos :])
except ParseError:
return False
state.pos += new_pos + 1
if not silent:
if "class" in attrs and "class" in token.attrs:
attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
token.attrs.update(attrs)

return True

md.inline.ruler.push("attr", attr_rule)
265 changes: 265 additions & 0 deletions mdit_py_plugins/attrs/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
"""Parser for attributes::

attributes { id = "foo", class = "bar baz",
key1 = "val1", key2 = "val2" }

Adapted from:
https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1

syntax:

attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
attribute <- identifier | class | keyval
identifier <- '#' name
class <- '.' name
name <- (nonspace, nonpunctuation other than ':', '_', '-')+
keyval <- key '=' val
key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
val <- bareval | quotedval
bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
quotedval <- '"' ([^"] | '\"') '"'
"""
from __future__ import annotations

from enum import Enum
import re
from typing import Callable


class State(Enum):
START = 0
SCANNING = 1
SCANNING_ID = 2
SCANNING_CLASS = 3
SCANNING_KEY = 4
SCANNING_VALUE = 5
SCANNING_BARE_VALUE = 6
SCANNING_QUOTED_VALUE = 7
SCANNING_COMMENT = 8
SCANNING_ESCAPED = 9
DONE = 10


REGEX_SPACE = re.compile(r"\s")
REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]")
REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]")


class TokenState:
def __init__(self):
self._tokens = []
self.start: int = 0

def set_start(self, start: int) -> None:
self.start = start

def append(self, start: int, end: int, ttype: str):
self._tokens.append((start, end, ttype))

def compile(self, string: str) -> dict[str, str]:
"""compile the tokens into a dictionary"""
attributes = {}
classes = []
idx = 0
while idx < len(self._tokens):
start, end, ttype = self._tokens[idx]
if ttype == "id":
attributes["id"] = string[start:end]
elif ttype == "class":
classes.append(string[start:end])
elif ttype == "key":
key = string[start:end]
if idx + 1 < len(self._tokens):
start, end, ttype = self._tokens[idx + 1]
if ttype == "value":
if key == "class":
classes.append(string[start:end])
else:
attributes[key] = string[start:end]
idx += 1
idx += 1
if classes:
attributes["class"] = " ".join(classes)
return attributes

def __str__(self) -> str:
return str(self._tokens)

def __repr__(self) -> str:
return repr(self._tokens)


class ParseError(Exception):
def __init__(self, msg: str, pos: int) -> None:
self.pos = pos
super().__init__(msg + f" at position {pos}")


def parse(string: str) -> tuple[int, dict[str, str]]:
"""Parse attributes from start of string.

:returns: (length of parsed string, dict of attributes)
"""
pos = 0
state: State = State.START
tokens = TokenState()
while pos < len(string):
state = HANDLERS[state](string[pos], pos, tokens)
if state == State.DONE:
return pos, tokens.compile(string)
pos = pos + 1

return pos, tokens.compile(string)


def handle_start(char: str, pos: int, tokens: TokenState) -> State:

if char == "{":
return State.SCANNING
raise ParseError("Attributes must start with '{'", pos)


def handle_scanning(char: str, pos: int, tokens: TokenState) -> State:

if char == " " or char == "\t" or char == "\n" or char == "\r":
return State.SCANNING
if char == "}":
return State.DONE
if char == "#":
tokens.set_start(pos)
return State.SCANNING_ID
if char == "%":
tokens.set_start(pos)
return State.SCANNING_COMMENT
if char == ".":
tokens.set_start(pos)
return State.SCANNING_CLASS
if REGEX_KEY_CHARACTERS.fullmatch(char):
tokens.set_start(pos)
return State.SCANNING_KEY

raise ParseError(f"Unexpected character whilst scanning: {char}", pos)


def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State:

if char == "%":
return State.SCANNING

return State.SCANNING_COMMENT


def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State:

if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
return State.SCANNING_ID

if char == "}":
if (pos - 1) > tokens.start:
tokens.append(tokens.start + 1, pos, "id")
return State.DONE

if REGEX_SPACE.fullmatch(char):
if (pos - 1) > tokens.start:
tokens.append(tokens.start + 1, pos, "id")
return State.SCANNING

raise ParseError(f"Unexpected character whilst scanning id: {char}", pos)


def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State:

if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
return State.SCANNING_CLASS

if char == "}":
if (pos - 1) > tokens.start:
tokens.append(tokens.start + 1, pos, "class")
return State.DONE

if REGEX_SPACE.fullmatch(char):
if (pos - 1) > tokens.start:
tokens.append(tokens.start + 1, pos, "class")
return State.SCANNING

raise ParseError(f"Unexpected character whilst scanning class: {char}", pos)


def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State:

if char == "=":
tokens.append(tokens.start, pos, "key")
return State.SCANNING_VALUE

if REGEX_KEY_CHARACTERS.fullmatch(char):
return State.SCANNING_KEY

raise ParseError(f"Unexpected character whilst scanning key: {char}", pos)


def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State:

if char == '"':
tokens.set_start(pos)
return State.SCANNING_QUOTED_VALUE

if REGEX_KEY_CHARACTERS.fullmatch(char):
tokens.set_start(pos)
return State.SCANNING_BARE_VALUE

raise ParseError(f"Unexpected character whilst scanning value: {char}", pos)


def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State:

if REGEX_KEY_CHARACTERS.fullmatch(char):
return State.SCANNING_BARE_VALUE

if char == "}":
tokens.append(tokens.start, pos, "value")
return State.DONE

if REGEX_SPACE.fullmatch(char):
tokens.append(tokens.start, pos, "value")
return State.SCANNING

raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos)


def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State:
return State.SCANNING_QUOTED_VALUE


def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State:

if char == '"':
tokens.append(tokens.start + 1, pos, "value")
return State.SCANNING

if char == "\\":
return State.SCANNING_ESCAPED

if char == "{" or char == "}":
raise ParseError(
f"Unexpected character whilst scanning quoted value: {char}", pos
)

if char == "\n":
tokens.append(tokens.start + 1, pos, "value")
return State.SCANNING_QUOTED_VALUE

return State.SCANNING_QUOTED_VALUE


HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = {
State.START: handle_start,
State.SCANNING: handle_scanning,
State.SCANNING_COMMENT: handle_scanning_comment,
State.SCANNING_ID: handle_scanning_id,
State.SCANNING_CLASS: handle_scanning_class,
State.SCANNING_KEY: handle_scanning_key,
State.SCANNING_VALUE: handle_scanning_value,
State.SCANNING_BARE_VALUE: handle_scanning_bare_value,
State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value,
State.SCANNING_ESCAPED: handle_scanning_escaped,
}
46 changes: 46 additions & 0 deletions tests/fixtures/attrs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
simple image
.
![a](b){#id .a b=c}
.
<p><img src="b" alt="a" id="id" b="c" class="a"></p>
.

simple inline code
.
`a`{#id .a b=c}
.
<p><code id="id" b="c" class="a">a</code></p>
.

ignore if space
.
![a](b) {#id key="*"}
.
<p><img src="b" alt="a"> {#id key=&quot;*&quot;}</p>
.

ignore if text
.
![a](b)b{#id key="*"}
.
<p><img src="b" alt="a">b{#id key=&quot;*&quot;}</p>
.

multi-line
.
![a](b){
#id .a
b=c
}
more
.
<p><img src="b" alt="a" id="id" b="c" class="a">
more</p>
.

combined
.
![a](b){#a .a}{.b class=x other=h}{#x class="x g" other=a}
.
<p><img src="b" alt="a" id="x" class="a b x x g" other="a"></p>
.
18 changes: 18 additions & 0 deletions tests/test_attrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from pathlib import Path

from markdown_it import MarkdownIt
from markdown_it.utils import read_fixture_file
import pytest

from mdit_py_plugins.attrs import attrs_plugin

FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "attrs.md")


@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH))
def test_fixture(line, title, input, expected):
md = MarkdownIt("commonmark").use(attrs_plugin)
md.options["xhtmlOut"] = False
text = md.render(input)
print(text)
assert text.rstrip() == expected.rstrip()