snakemake · bricoletc · Apr 24, 2020 · Apr 23, 2020 · Apr 23, 2020 · Apr 23, 2020
diff --git a/.flake8 b/.flake8
@@ -1,4 +1,5 @@
 [flake8]
 max-line-length = 88
 # the default ignores minus E704
-ignore = E121,E123,E126,E226,E24,W503,W504
+ignore = E121,E123,E126,E226,E203,E24,W503,W504
+
diff --git a/snakefmt/formatter.py b/snakefmt/formatter.py
@@ -28,6 +28,8 @@
 PathLike = Union[Path, str]
 rule_like_formatted = {"rule", "checkpoint"}
 
+triple_quote_matcher = re.compile(r"(\"{3}.*?\"{3})|('{3}.*?'{3})", re.DOTALL)
+
 
 class Formatter(Parser):
     def __init__(
@@ -110,20 +112,30 @@ def run_black_format_str(self, string: str, target_indent: int) -> str:
                 f"Got error:\n```\n{str(e)}\n```\n" f"while formatting code with black."
             ) from None
 
-        indented = textwrap.indent(fmted, TAB * target_indent)
+        # Only indent non-triple-quoted string portions
+        pos = 0
+        used_indent = TAB * target_indent
+        indented = ""
+        for match in re.finditer(triple_quote_matcher, fmted):
+            indented += textwrap.indent(fmted[pos : match.start()], used_indent)
+            match_slice = fmted[match.start() : match.end()]
+            indented += f"{used_indent}{match_slice}"
+            pos = match.end()
+        indented += textwrap.indent(fmted[pos:], used_indent)
+
         return indented
 
     def format_param(
         self,
         parameter: Parameter,
-        used_indent: str,
+        target_indent: str,
         inline_formatting: bool,
         single_param: bool = False,
     ) -> str:
         if inline_formatting:
-            used_indent = ""
-        comments = "\n{i}".format(i=used_indent).join(parameter.comments)
-        val = parameter.value
+            target_indent = 0
+        comments = f"\n{TAB * target_indent}".join(parameter.comments)
+        val = str(parameter)
 
         try:
             ast_parse(f"param({val})")
@@ -133,28 +145,27 @@ def format_param(
         if inline_formatting:
             val = val.replace("\n", "")
         try:
-            val = self.run_black_format_str(val, 0)
+            val = self.run_black_format_str(val, target_indent)
+            if parameter.has_a_key():  # Remove space either side of '='
+                match_equal = re.match("(.*?) = (.*)", val, re.DOTALL)
+                val = f"{match_equal.group(1)}={match_equal.group(2)}"
+
         except InvalidPython:
             if "**" in val:
                 val = val.replace("** ", "**")
             pass
-        val = val.strip("\n")
-        val = re.sub("\n +", "\n", val)
-        val = val.replace("\n", f"\n{used_indent}")
 
+        val = val.strip("\n")
         if single_param:
             result = f"{val}{comments}\n"
         else:
             result = f"{val},{comments}\n"
-        if parameter.has_key():  # noqa: W601
-            result = f"{parameter.key}={result}"
-        result = f"{used_indent}{result}"
         return result
 
     def format_params(self, parameters: ParameterSyntax, in_rule: bool) -> str:
-        used_indent = TAB * (parameters.target_indent - 1)
+        target_indent = parameters.target_indent
+        used_indent = TAB * (target_indent - 1)
         result = f"{used_indent}{parameters.keyword_name}:{parameters.comment}"
-        used_indent += TAB
 
         p_class = parameters.__class__
         single_param = issubclass(p_class, SingleParam)
@@ -168,10 +179,10 @@ def format_params(self, parameters: ParameterSyntax, in_rule: bool) -> str:
         else:
             result += "\n"
 
-        for elem in parameters.positional_params:
-            result += self.format_param(elem, used_indent, inline_fmting, single_param)
-        for elem in parameters.keyword_params:
-            result += self.format_param(elem, used_indent, inline_fmting, single_param)
+        for elem in parameters.all_params:
+            result += self.format_param(
+                elem, target_indent, inline_fmting, single_param
+            )
         return result
 
     def add_newlines(self, cur_indent: int, keyword_name: str = ""):

diff --git a/snakefmt/parser/parser.py b/snakefmt/parser/parser.py
@@ -48,6 +48,7 @@ def __init__(self, snakefile: TokenIterator):
         )
         self.context_stack = [self.grammar]
         self.snakefile = snakefile
+        from_python = False
 
         status = self.context.get_next_queriable(self.snakefile)
         self.buffer = status.buffer
@@ -70,6 +71,7 @@ def __init__(self, snakefile: TokenIterator):
                 self.flush_buffer(from_python)
                 status = self.process_keyword(status, from_python)
             else:
+                from_python = False
                 if not self.context.accepts_python_code and not keyword[0] == "#":
                     raise SyntaxError(
                         f"L{status.token.start[0]}: Unrecognised keyword '{keyword}' "
@@ -80,7 +82,7 @@ def __init__(self, snakefile: TokenIterator):
                     status = self.context.get_next_queriable(self.snakefile)
                     self.buffer += status.buffer
             self.context.cur_indent = status.indent
-        self.flush_buffer()
+        self.flush_buffer(from_python)
 
     @property
     def vocab(self) -> Vocabulary:

diff --git a/snakefmt/parser/syntax.py b/snakefmt/parser/syntax.py
@@ -1,5 +1,5 @@
 import tokenize
-from typing import NamedTuple
+from typing import NamedTuple, Optional
 
 from snakefmt.types import Token, TokenIterator, Parameter
 from snakefmt.exceptions import (
@@ -13,7 +13,7 @@
 )
 
 possibly_named_keywords = {"rule", "checkpoint", "subworkflow"}
-possibly_duplicated_keywords = {"include", "ruleorder", "localrules"}
+possibly_duplicated_keywords = {"include", "ruleorder", "localrules", "configfile"}
 
 """
 Token parsing
@@ -44,20 +44,34 @@ def is_comma_sign(token: Token):
     return token.type == tokenize.OP and token.string == ","
 
 
-def is_spaceable(token: Token):
-    if (
-        token.type == tokenize.NAME
-        or token.type == tokenize.STRING
-        or token.type == tokenize.NUMBER
-    ):
-        return True
-    return False
-
-
 def not_empty(token: Token):
     return len(token.string) > 0 and not token.string.isspace()
 
 
+"""
+Token spacing: for when cannot run black
+"""
+spacing_triggers = {
+    tokenize.NAME: {tokenize.NAME, tokenize.STRING, tokenize.NUMBER, tokenize.OP},
+    tokenize.STRING: {tokenize.NAME, tokenize.OP},
+    tokenize.NUMBER: {tokenize.NAME, tokenize.OP},
+    tokenize.OP: {tokenize.NAME, tokenize.STRING, tokenize.NUMBER, tokenize.OP},
+}
+
+
+def operator_skip_spacing(prev_token: Token, token: Token) -> bool:
+    if prev_token.type != tokenize.OP and token.type != tokenize.OP:
+        return False
+    elif (
+        prev_token.string in BRACKETS_OPEN
+        or token.string in BRACKETS_CLOSE
+        or token.string in {"[", ":"}
+    ):
+        return True
+    else:
+        return False
+
+
 class Vocabulary:
     """
     Responsible for recognising keywords
@@ -179,24 +193,28 @@ def check_empty(self):
     def effective_indent(self) -> int:
         return max(0, self.cur_indent - self.target_indent)
 
-    def get_next_queriable(self, snakefile) -> Syntax.Status:
+    def get_next_queriable(self, snakefile: TokenIterator) -> Syntax.Status:
         buffer = ""
-        newline, used_name = False, True
+        newline = False
         pythonable = False
+        prev_token: Optional[Token] = Token(tokenize.NAME)
         while True:
             token = next(snakefile)
             if token.type == tokenize.INDENT:
                 self.cur_indent += 1
+                prev_token = None
                 continue
             elif token.type == tokenize.DEDENT:
                 if self.cur_indent > 0:
                     self.cur_indent -= 1
+                prev_token = None
                 continue
             elif token.type == tokenize.ENDMARKER:
                 return self.Status(token, self.cur_indent, buffer, True, pythonable)
             elif token.type == tokenize.NEWLINE or token.type == tokenize.NL:
                 self.queriable, newline = True, True
                 buffer += "\n"
+                prev_token = None
                 continue
 
             if newline:  # Records relative tabbing, used for python code formatting
@@ -205,9 +223,12 @@ def get_next_queriable(self, snakefile) -> Syntax.Status:
             if token.type == tokenize.NAME and self.queriable:
                 self.queriable = False
                 return self.Status(token, self.cur_indent, buffer, False, pythonable)
-            if used_name and is_spaceable(token) and not newline:
-                buffer += " "
-            used_name = token.type == tokenize.NAME
+
+            if prev_token is not None and prev_token.type in spacing_triggers:
+                if not operator_skip_spacing(prev_token, token):
+                    if token.type in spacing_triggers[prev_token.type]:
+                        buffer += " "
+            prev_token = token
             if newline:
                 newline = False
             if not pythonable and token.type != tokenize.COMMENT:
@@ -235,6 +256,7 @@ def __init__(
         self.incident_vocab = incident_vocab
         self._brackets = list()
         self.found_newline, self.in_lambda = False, False
+        self.latest_pushed_param = None
 
         self.parse_params(snakefile)
 
@@ -288,7 +310,11 @@ def process_token(self, cur_param: Parameter) -> Parameter:
             if cur_param.has_value():
                 cur_param.add_elem(self.token)
         elif token_type == tokenize.COMMENT:
-            cur_param.comments.append(" " + self.token.string)
+            if str(cur_param) == "":
+                target = self.latest_pushed_param.comments
+            else:
+                target = cur_param.comments
+            target.append(" " + self.token.string)
         elif is_equal_sign(self.token) and not self.in_brackets:
             cur_param.to_key_val_mode(self.token)
         elif is_comma_sign(self.token) and not self.in_brackets and not self.in_lambda:
@@ -318,10 +344,12 @@ def flush_param(self, parameter: Parameter, skip_empty: bool = False) -> None:
         if not parameter.has_value() and skip_empty:
             return
 
-        if parameter.has_key():  # noqa: W601
+        if parameter.has_a_key():
             self.keyword_params.append(parameter)
+            self.latest_pushed_param = self.keyword_params[-1]
         else:
             self.positional_params.append(parameter)
+            self.latest_pushed_param = self.positional_params[-1]
 
     def num_params(self):
         return len(self.keyword_params) + len(self.positional_params)

diff --git a/snakefmt/types.py b/snakefmt/types.py
@@ -1,10 +1,16 @@
 import tokenize
-from typing import Iterator
-from collections import namedtuple
+from typing import Iterator, NamedTuple, Tuple
 
 from snakefmt.exceptions import InvalidParameterSyntax
 
-Token = namedtuple
+
+class Token(NamedTuple):
+    type: int
+    string: str = ""
+    start: Tuple[int, int] = (-1, -1)
+    end: Tuple[int, int] = (-1, -1)
+
+
 TokenIterator = Iterator[Token]
 
 
@@ -20,7 +26,13 @@ def __init__(self, line_nb: str):
         self.comments = list()
         self.len = 0
 
-    def has_key(self) -> bool:
+    def __repr__(self):
+        if self.has_a_key():
+            return f"{self.key}={self.value}"
+        else:
+            return self.value
+
+    def has_a_key(self) -> bool:
         return len(self.key) > 0
 
     def has_value(self) -> bool:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,10 @@
+import pytest
+from click.testing import CliRunner
+
+
+@pytest.fixture
+def cli_runner() -> CliRunner:
+    return CliRunner()
+
+
+pytest_plugins = "pytester"