Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes and refactoring #35

Merged
merged 9 commits into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[flake8]
max-line-length = 88
# the default ignores minus E704
ignore = E121,E123,E126,E226,E24,W503,W504
ignore = E121,E123,E126,E226,E203,E24,W503,W504

47 changes: 29 additions & 18 deletions snakefmt/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
PathLike = Union[Path, str]
rule_like_formatted = {"rule", "checkpoint"}

triple_quote_matcher = re.compile(r"(\"{3}.*?\"{3})|('{3}.*?'{3})", re.DOTALL)


class Formatter(Parser):
def __init__(
Expand Down Expand Up @@ -110,20 +112,30 @@ def run_black_format_str(self, string: str, target_indent: int) -> str:
f"Got error:\n```\n{str(e)}\n```\n" f"while formatting code with black."
) from None

indented = textwrap.indent(fmted, TAB * target_indent)
# Only indent non-triple-quoted string portions
pos = 0
used_indent = TAB * target_indent
indented = ""
for match in re.finditer(triple_quote_matcher, fmted):
indented += textwrap.indent(fmted[pos : match.start()], used_indent)
match_slice = fmted[match.start() : match.end()]
indented += f"{used_indent}{match_slice}"
pos = match.end()
indented += textwrap.indent(fmted[pos:], used_indent)

return indented

def format_param(
self,
parameter: Parameter,
used_indent: str,
target_indent: str,
inline_formatting: bool,
single_param: bool = False,
) -> str:
if inline_formatting:
used_indent = ""
comments = "\n{i}".format(i=used_indent).join(parameter.comments)
val = parameter.value
target_indent = 0
comments = f"\n{TAB * target_indent}".join(parameter.comments)
val = str(parameter)

try:
ast_parse(f"param({val})")
Expand All @@ -133,28 +145,27 @@ def format_param(
if inline_formatting:
val = val.replace("\n", "")
try:
val = self.run_black_format_str(val, 0)
val = self.run_black_format_str(val, target_indent)
if parameter.has_a_key(): # Remove space either side of '='
match_equal = re.match("(.*?) = (.*)", val, re.DOTALL)
val = f"{match_equal.group(1)}={match_equal.group(2)}"

except InvalidPython:
if "**" in val:
val = val.replace("** ", "**")
pass
val = val.strip("\n")
val = re.sub("\n +", "\n", val)
val = val.replace("\n", f"\n{used_indent}")

val = val.strip("\n")
if single_param:
result = f"{val}{comments}\n"
else:
result = f"{val},{comments}\n"
if parameter.has_key(): # noqa: W601
result = f"{parameter.key}={result}"
result = f"{used_indent}{result}"
return result

def format_params(self, parameters: ParameterSyntax, in_rule: bool) -> str:
used_indent = TAB * (parameters.target_indent - 1)
target_indent = parameters.target_indent
used_indent = TAB * (target_indent - 1)
result = f"{used_indent}{parameters.keyword_name}:{parameters.comment}"
used_indent += TAB

p_class = parameters.__class__
single_param = issubclass(p_class, SingleParam)
Expand All @@ -168,10 +179,10 @@ def format_params(self, parameters: ParameterSyntax, in_rule: bool) -> str:
else:
result += "\n"

for elem in parameters.positional_params:
result += self.format_param(elem, used_indent, inline_fmting, single_param)
for elem in parameters.keyword_params:
result += self.format_param(elem, used_indent, inline_fmting, single_param)
for elem in parameters.all_params:
result += self.format_param(
elem, target_indent, inline_fmting, single_param
)
return result

def add_newlines(self, cur_indent: int, keyword_name: str = ""):
Expand Down
4 changes: 3 additions & 1 deletion snakefmt/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(self, snakefile: TokenIterator):
)
self.context_stack = [self.grammar]
self.snakefile = snakefile
from_python = False

status = self.context.get_next_queriable(self.snakefile)
self.buffer = status.buffer
Expand All @@ -70,6 +71,7 @@ def __init__(self, snakefile: TokenIterator):
self.flush_buffer(from_python)
status = self.process_keyword(status, from_python)
else:
from_python = False
if not self.context.accepts_python_code and not keyword[0] == "#":
raise SyntaxError(
f"L{status.token.start[0]}: Unrecognised keyword '{keyword}' "
Expand All @@ -80,7 +82,7 @@ def __init__(self, snakefile: TokenIterator):
status = self.context.get_next_queriable(self.snakefile)
self.buffer += status.buffer
self.context.cur_indent = status.indent
self.flush_buffer()
self.flush_buffer(from_python)

@property
def vocab(self) -> Vocabulary:
Expand Down
66 changes: 47 additions & 19 deletions snakefmt/parser/syntax.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import tokenize
from typing import NamedTuple
from typing import NamedTuple, Optional

from snakefmt.types import Token, TokenIterator, Parameter
from snakefmt.exceptions import (
Expand All @@ -13,7 +13,7 @@
)

possibly_named_keywords = {"rule", "checkpoint", "subworkflow"}
possibly_duplicated_keywords = {"include", "ruleorder", "localrules"}
possibly_duplicated_keywords = {"include", "ruleorder", "localrules", "configfile"}

"""
Token parsing
Expand Down Expand Up @@ -44,20 +44,34 @@ def is_comma_sign(token: Token):
return token.type == tokenize.OP and token.string == ","


def is_spaceable(token: Token):
if (
token.type == tokenize.NAME
or token.type == tokenize.STRING
or token.type == tokenize.NUMBER
):
return True
return False


def not_empty(token: Token):
return len(token.string) > 0 and not token.string.isspace()


"""
Token spacing: for when cannot run black
"""
spacing_triggers = {
tokenize.NAME: {tokenize.NAME, tokenize.STRING, tokenize.NUMBER, tokenize.OP},
tokenize.STRING: {tokenize.NAME, tokenize.OP},
tokenize.NUMBER: {tokenize.NAME, tokenize.OP},
tokenize.OP: {tokenize.NAME, tokenize.STRING, tokenize.NUMBER, tokenize.OP},
}


def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
if prev_token.type != tokenize.OP and token.type != tokenize.OP:
return False
elif (
prev_token.string in BRACKETS_OPEN
or token.string in BRACKETS_CLOSE
or token.string in {"[", ":"}
):
return True
else:
return False


class Vocabulary:
"""
Responsible for recognising keywords
Expand Down Expand Up @@ -179,24 +193,28 @@ def check_empty(self):
def effective_indent(self) -> int:
return max(0, self.cur_indent - self.target_indent)

def get_next_queriable(self, snakefile) -> Syntax.Status:
def get_next_queriable(self, snakefile: TokenIterator) -> Syntax.Status:
buffer = ""
newline, used_name = False, True
newline = False
pythonable = False
prev_token: Optional[Token] = Token(tokenize.NAME)
while True:
token = next(snakefile)
if token.type == tokenize.INDENT:
self.cur_indent += 1
prev_token = None
continue
elif token.type == tokenize.DEDENT:
if self.cur_indent > 0:
self.cur_indent -= 1
prev_token = None
continue
elif token.type == tokenize.ENDMARKER:
return self.Status(token, self.cur_indent, buffer, True, pythonable)
elif token.type == tokenize.NEWLINE or token.type == tokenize.NL:
self.queriable, newline = True, True
buffer += "\n"
prev_token = None
continue

if newline: # Records relative tabbing, used for python code formatting
Expand All @@ -205,9 +223,12 @@ def get_next_queriable(self, snakefile) -> Syntax.Status:
if token.type == tokenize.NAME and self.queriable:
self.queriable = False
return self.Status(token, self.cur_indent, buffer, False, pythonable)
if used_name and is_spaceable(token) and not newline:
buffer += " "
used_name = token.type == tokenize.NAME

if prev_token is not None and prev_token.type in spacing_triggers:
if not operator_skip_spacing(prev_token, token):
if token.type in spacing_triggers[prev_token.type]:
buffer += " "
prev_token = token
if newline:
newline = False
if not pythonable and token.type != tokenize.COMMENT:
Expand Down Expand Up @@ -235,6 +256,7 @@ def __init__(
self.incident_vocab = incident_vocab
self._brackets = list()
self.found_newline, self.in_lambda = False, False
self.latest_pushed_param = None

self.parse_params(snakefile)

Expand Down Expand Up @@ -288,7 +310,11 @@ def process_token(self, cur_param: Parameter) -> Parameter:
if cur_param.has_value():
cur_param.add_elem(self.token)
elif token_type == tokenize.COMMENT:
cur_param.comments.append(" " + self.token.string)
if str(cur_param) == "":
target = self.latest_pushed_param.comments
else:
target = cur_param.comments
target.append(" " + self.token.string)
elif is_equal_sign(self.token) and not self.in_brackets:
cur_param.to_key_val_mode(self.token)
elif is_comma_sign(self.token) and not self.in_brackets and not self.in_lambda:
Expand Down Expand Up @@ -318,10 +344,12 @@ def flush_param(self, parameter: Parameter, skip_empty: bool = False) -> None:
if not parameter.has_value() and skip_empty:
return

if parameter.has_key(): # noqa: W601
if parameter.has_a_key():
self.keyword_params.append(parameter)
self.latest_pushed_param = self.keyword_params[-1]
else:
self.positional_params.append(parameter)
self.latest_pushed_param = self.positional_params[-1]

def num_params(self):
return len(self.keyword_params) + len(self.positional_params)
Expand Down
20 changes: 16 additions & 4 deletions snakefmt/types.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import tokenize
from typing import Iterator
from collections import namedtuple
from typing import Iterator, NamedTuple, Tuple

from snakefmt.exceptions import InvalidParameterSyntax

Token = namedtuple

class Token(NamedTuple):
type: int
string: str = ""
start: Tuple[int, int] = (-1, -1)
end: Tuple[int, int] = (-1, -1)


TokenIterator = Iterator[Token]


Expand All @@ -20,7 +26,13 @@ def __init__(self, line_nb: str):
self.comments = list()
self.len = 0

def has_key(self) -> bool:
def __repr__(self):
if self.has_a_key():
return f"{self.key}={self.value}"
else:
return self.value

def has_a_key(self) -> bool:
return len(self.key) > 0

def has_value(self) -> bool:
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pytest
from click.testing import CliRunner


@pytest.fixture
def cli_runner() -> CliRunner:
return CliRunner()


pytest_plugins = "pytester"
Loading