From 5f44d426d69607c96444c8be56cc4ac170b1e6a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Wed, 3 Nov 2021 12:36:11 +0100 Subject: [PATCH 1/5] Remove old pyparsing code --- packaging/markers.py | 89 --------------------------------------- packaging/requirements.py | 74 +++----------------------------- setup.py | 1 - 3 files changed, 5 insertions(+), 159 deletions(-) diff --git a/packaging/markers.py b/packaging/markers.py index 814c1213..7145943e 100644 --- a/packaging/markers.py +++ b/packaging/markers.py @@ -8,18 +8,6 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, -) - from .specifiers import InvalidSpecifier, Specifier from .utils import canonicalize_name @@ -82,83 +70,6 @@ def serialize(self) -> str: return str(self) -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Any) -> Any: - """ - Flatten the parse results into a list of results. - - Also normalize extra values. - """ - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - elif isinstance(results, tuple): - lhs, op, rhs = results - if isinstance(lhs, Variable) and lhs.value == "extra": - normalized_extra = canonicalize_name(rhs.value) - rhs = Value(normalized_extra) - elif isinstance(rhs, Variable) and rhs.value == "extra": - normalized_extra = canonicalize_name(lhs.value) - lhs = Value(normalized_extra) - results = lhs, op, rhs - return results - - def _format_marker( marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True ) -> str: diff --git a/packaging/requirements.py b/packaging/requirements.py index 79a044fd..52fe60f7 100644 --- a/packaging/requirements.py +++ b/packaging/requirements.py @@ -5,23 +5,10 @@ import re import string import urllib.parse -from typing import Any, List, Optional as TOptional, Set +from typing import Any, List, Optional, Set -from pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR as _MARKER_EXPR, Marker -from .specifiers import Specifier, SpecifierSet +from .markers import Marker +from .specifiers import LegacySpecifier, Specifier, SpecifierSet class InvalidRequirement(ValueError): @@ -30,57 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_ONE = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(_MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -112,12 +48,12 @@ def __init__(self, requirement_string: str) -> None: not parsed_url.scheme and not parsed_url.netloc ): raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url + self.url: Optional[str] = req.url else: self.url = None self.extras: Set[str] = set(req.extras.asList() if req.extras else []) self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None + self.marker: Optional[Marker] = req.marker if req.marker else None def __str__(self) -> str: parts: List[str] = [self.name] diff --git a/setup.py b/setup.py index 82ef248e..d2cb3eea 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,6 @@ author=about["__author__"], author_email=about["__email__"], python_requires=">=3.7", - install_requires=["pyparsing>=2.0.2,!=3.0.5"], # 2.0.2 + needed to avoid issue #91 classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", From b1861b0da35f8fd8ab8e651c26279bd9d895f07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Wed, 3 Nov 2021 08:42:21 +0100 Subject: [PATCH 2/5] Add sources for new parser and tokenizer --- packaging/_parser.py | 228 ++++++++++++++++++++++++++++++++++++++++ packaging/_tokenizer.py | 166 +++++++++++++++++++++++++++++ 2 files changed, 394 insertions(+) create mode 100644 packaging/_parser.py create mode 100644 packaging/_tokenizer.py diff --git a/packaging/_parser.py b/packaging/_parser.py new file mode 100644 index 00000000..77b308a6 --- /dev/null +++ b/packaging/_parser.py @@ -0,0 +1,228 @@ +# The docstring for each parse function contains the grammar for the rule. +# The grammar uses a simple EBNF-inspired syntax: +# +# - Uppercase names are tokens +# - Lowercase names are rules (parsed with a parse_* function) +# - Parentheses are used for grouping +# - A | means either-or +# - A * means 0 or more +# - A + means 1 or more +# - A ? means 0 or 1 + +from ast import literal_eval +from typing import Any, List, NamedTuple, Tuple, Union + +from ._tokenizer import Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return str(self.value) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not suport recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class Requirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: str + + +def parse_named_requirement(requirement: str) -> Requirement: + """ + named_requirement: + IDENTIFIER extras (URL_SPEC | specifier) (SEMICOLON marker_expr)? END + """ + tokens = Tokenizer(requirement) + tokens.expect("IDENTIFIER", error_message="Expression must begin with package name") + name = tokens.read("IDENTIFIER").text + extras = parse_extras(tokens) + specifier = "" + url = "" + if tokens.match("URL_SPEC"): + url = tokens.read().text[1:].strip() + elif not tokens.match("END"): + specifier = parse_specifier(tokens) + if tokens.try_read("SEMICOLON"): + marker = "" + while not tokens.match("END"): + # we don't validate markers here, it's done later as part of + # packaging/requirements.py + marker += tokens.read().text + else: + marker = "" + tokens.expect( + "END", + error_message="Expected semicolon (followed by markers) or end of string", + ) + return Requirement(name, url, extras, specifier, marker) + + +def parse_extras(tokens: Tokenizer) -> List[str]: + """ + extras: LBRACKET (IDENTIFIER (COMMA IDENTIFIER)*)? RBRACKET + """ + extras = [] + if tokens.try_read("LBRACKET"): + while tokens.match("IDENTIFIER"): + extras.append(tokens.read("IDENTIFIER").text) + if not tokens.match("RBRACKET"): + tokens.read("COMMA", error_message="Missing comma after extra") + if not tokens.match("COMMA") and tokens.match("RBRACKET"): + break + tokens.read("RBRACKET", error_message="Closing square bracket is missing") + return extras + + +def parse_specifier(tokens: Tokenizer) -> str: + """ + specifier: + LPAREN version_many? RPAREN | version_many + """ + lparen = False + if tokens.try_read("LPAREN"): + lparen = True + parsed_specifiers = parse_version_many(tokens) + if lparen and not tokens.try_read("RPAREN"): + tokens.raise_syntax_error(message="Closing right parenthesis is missing") + return parsed_specifiers + + +def parse_version_many(tokens: Tokenizer) -> str: + """ + version_many: OP VERSION (COMMA OP VERSION)* + """ + parsed_specifiers = "" + while tokens.match("OP"): + parsed_specifiers += tokens.read("OP").text + if tokens.match("VERSION"): + parsed_specifiers += tokens.read("VERSION").text + else: + tokens.raise_syntax_error(message="Missing version") + if not tokens.match("COMMA"): + break + tokens.expect("COMMA", error_message="Missing comma after version") + parsed_specifiers += tokens.read("COMMA").text + return parsed_specifiers + + +def parse_marker_expr(tokens: Tokenizer) -> MarkerList: + """ + marker_expr: MARKER_ATOM (BOOLOP + MARKER_ATOM)+ + """ + expression = [parse_marker_atom(tokens)] + while tokens.match("BOOLOP"): + tok = tokens.read("BOOLOP") + expr_right = parse_marker_atom(tokens) + expression.extend((tok.text, expr_right)) + return expression + + +def parse_marker_atom(tokens: Tokenizer) -> MarkerAtom: + """ + marker_atom: LPAREN marker_expr RPAREN | marker_item + """ + if tokens.try_read("LPAREN"): + marker = parse_marker_expr(tokens) + tokens.read("RPAREN", error_message="Closing right parenthesis is missing") + return marker + else: + return parse_marker_item(tokens) + + +def parse_marker_item(tokens: Tokenizer) -> MarkerItem: + """ + marker_item: marker_var marker_op marker_var + """ + marker_var_left = parse_marker_var(tokens) + marker_op = parse_marker_op(tokens) + marker_var_right = parse_marker_var(tokens) + return (marker_var_left, marker_op, marker_var_right) + + +def parse_marker_var(tokens: Tokenizer) -> MarkerVar: + """ + marker_var: env_var | python_str + """ + if tokens.match("VARIABLE"): + return parse_env_var(tokens) + else: + return parse_python_str(tokens) + + +def parse_env_var(tokens: Tokenizer) -> Variable: + """ + env_var: VARIABLE + """ + env_var = tokens.read("VARIABLE").text.replace(".", "_") + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def parse_python_str(tokens: Tokenizer) -> Value: + """ + python_str: QUOTED_STRING + """ + token = tokens.read( + "QUOTED_STRING", + error_message="String with single or double quote at the beginning is expected", + ).text + python_str = literal_eval(token) + return Value(str(python_str)) + + +def parse_marker_op(tokens: Tokenizer) -> Op: + """ + marker_op: IN | NOT IN | OP + """ + if tokens.try_read("IN"): + return Op("in") + elif tokens.try_read("NOT"): + tokens.read("IN", error_message="NOT token must be follewed by IN token") + return Op("not in") + elif tokens.match("OP"): + return Op(tokens.read().text) + else: + return tokens.raise_syntax_error( + message='Couldn\'t parse marker operator. Expecting one of \ + "<=, <, !=, ==, >=, >, ~=, ===, not, not in"' + ) diff --git a/packaging/_tokenizer.py b/packaging/_tokenizer.py new file mode 100644 index 00000000..1eddba2d --- /dev/null +++ b/packaging/_tokenizer.py @@ -0,0 +1,166 @@ +import re +from typing import Dict, Generator, NoReturn, Optional + +from .specifiers import Specifier + + +class Token: + def __init__(self, name: str, text: str, position: int) -> None: + self.name = name + self.text = text + self.position = position + + def matches(self, name: str = "") -> bool: + if name and self.name != name: + return False + return True + + +class ParseException(Exception): + """ + Parsing failed. + """ + + def __init__(self, message: str, position: int) -> None: + super().__init__(message) + self.position = position + + +DEFAULT_RULES = { + "LPAREN": r"\s*\(", + "RPAREN": r"\s*\)", + "LBRACKET": r"\s*\[", + "RBRACKET": r"\s*\]", + "SEMICOLON": r"\s*;", + "COMMA": r"\s*,", + "QUOTED_STRING": re.compile( + r""" + \s* + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"\s*(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\s*(or|and)", + "IN": r"\s*in", + "NOT": r"\s*not", + "VARIABLE": re.compile( + r""" + \s* + ( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + ) + """, + re.VERBOSE, + ), + "VERSION": re.compile(Specifier._version_regex_str, re.VERBOSE | re.IGNORECASE), + "URL_SPEC": r"\s*@ *[^ ]+", + "IDENTIFIER": r"\s*[a-zA-Z0-9._-]+", +} + + +class Tokenizer: + """Stream of tokens for a LL(1) parser. + + Provides methods to examine the next token to be read, and to read it + (advance to the next token). + """ + + next_token: Optional[Token] + + def __init__( + self, source: str, rules: Dict[Optional[str], object] = DEFAULT_RULES + ) -> None: + self.source = source + self.rules = {name: re.compile(pattern) for name, pattern in rules.items()} + self.next_token = None + self.generator = self._tokenize() + self.position = 0 + + def peek(self) -> Token: + """ + Return the next token to be read. + """ + if not self.next_token: + self.next_token = next(self.generator) + return self.next_token + + def match(self, *name: str) -> bool: + """ + Return True if the next token matches the given arguments. + """ + token = self.peek() + return token.matches(*name) + + def expect(self, *name: str, error_message: str) -> Token: + """ + Raise SyntaxError if the next token doesn't match given arguments. + """ + token = self.peek() + if not token.matches(*name): + raise self.raise_syntax_error(message=error_message) + return token + + def read(self, *name: str, error_message: str = "") -> Token: + """Return the next token and advance to the next token. + + Raise SyntaxError if the token doesn't match. + """ + result = self.expect(*name, error_message=error_message) + self.next_token = None + return result + + def try_read(self, *name: str) -> Optional[Token]: + """read() if the next token matches the given arguments. + + Do nothing if it does not match. + """ + if self.match(*name): + return self.read() + return None + + def raise_syntax_error(self, *, message: str) -> NoReturn: + """ + Raise SyntaxError at the given position in the marker. + """ + at = f"at position {self.position}:" + marker = " " * self.position + "^" + raise ParseException( + f"{message}\n{at}\n {self.source}\n {marker}", + self.position, + ) + + def _make_token(self, name: str, text: str) -> Token: + """ + Make a token with the current position. + """ + return Token(name, text, self.position) + + def _tokenize(self) -> Generator[Token, Token, None]: + """ + The main generator of tokens. + """ + while self.position < len(self.source): + for name, expression in self.rules.items(): + match = expression.match(self.source, self.position) + if match: + token_text = match[0] + + yield self._make_token(name, token_text.strip()) + self.position += len(token_text) + break + else: + raise self.raise_syntax_error(message="Unrecognized token") + yield self._make_token("END", "") From 6ceecf60e4037c62ca2bf3264cb9a6cba14d9ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Mon, 25 Oct 2021 11:37:46 +0200 Subject: [PATCH 3/5] Split Specifier regex into operator and version parts Tokenizer uses _version_regex_str to detect 'VERSION' token. --- packaging/_parser.py | 2 +- packaging/specifiers.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packaging/_parser.py b/packaging/_parser.py index 77b308a6..ec02e72c 100644 --- a/packaging/_parser.py +++ b/packaging/_parser.py @@ -20,7 +20,7 @@ def __init__(self, value: str) -> None: self.value = value def __str__(self) -> str: - return str(self.value) + return self.value def __repr__(self) -> str: return f"<{self.__class__.__name__}('{self}')>" diff --git a/packaging/specifiers.py b/packaging/specifiers.py index dab49eef..2aa6dd48 100644 --- a/packaging/specifiers.py +++ b/packaging/specifiers.py @@ -80,8 +80,10 @@ def filter( class Specifier(BaseSpecifier): - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -173,7 +175,10 @@ class Specifier(BaseSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", From 32a3f32df9755788d710b3441894ebf879555b89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Mon, 25 Oct 2021 11:40:37 +0200 Subject: [PATCH 4/5] Changes to integrate new parser into packaging + test adjustments --- packaging/_tokenizer.py | 8 ++--- packaging/markers.py | 41 ++++----------------- packaging/requirements.py | 28 +++++++++------ tests/test_markers.py | 7 ++-- tests/test_requirements.py | 73 +++++++++++++++++++++++++++++++------- 5 files changed, 91 insertions(+), 66 deletions(-) diff --git a/packaging/_tokenizer.py b/packaging/_tokenizer.py index 1eddba2d..ecae9e34 100644 --- a/packaging/_tokenizer.py +++ b/packaging/_tokenizer.py @@ -16,7 +16,7 @@ def matches(self, name: str = "") -> bool: return True -class ParseException(Exception): +class ParseExceptionError(Exception): """ Parsing failed. """ @@ -80,9 +80,7 @@ class Tokenizer: next_token: Optional[Token] - def __init__( - self, source: str, rules: Dict[Optional[str], object] = DEFAULT_RULES - ) -> None: + def __init__(self, source: str, rules: Dict[str, object] = DEFAULT_RULES) -> None: self.source = source self.rules = {name: re.compile(pattern) for name, pattern in rules.items()} self.next_token = None @@ -137,7 +135,7 @@ def raise_syntax_error(self, *, message: str) -> NoReturn: """ at = f"at position {self.position}:" marker = " " * self.position + "^" - raise ParseException( + raise ParseExceptionError( f"{message}\n{at}\n {self.source}\n {marker}", self.position, ) diff --git a/packaging/markers.py b/packaging/markers.py index 7145943e..2bb7de36 100644 --- a/packaging/markers.py +++ b/packaging/markers.py @@ -8,6 +8,8 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from ._parser import MarkerAtom, MarkerList, Op, Variable, parse_marker_expr +from ._tokenizer import ParseExceptionError, Tokenizer from .specifiers import InvalidSpecifier, Specifier from .utils import canonicalize_name @@ -41,37 +43,8 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -138,7 +111,7 @@ def _normalize(*values: str, key: str) -> Tuple[str, ...]: return values -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -197,7 +170,7 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) + self._markers = parse_marker_expr(Tokenizer(marker)) # The attribute `_markers` can be described in terms of a recursive type: # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] # @@ -214,10 +187,10 @@ def __init__(self, marker: str) -> None: # (, , ) # ] # ] - except ParseException as e: + except ParseExceptionError as e: raise InvalidMarker( f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" + f"{marker[e.position : e.position + 8]!r}" ) def __str__(self) -> str: diff --git a/packaging/requirements.py b/packaging/requirements.py index 52fe60f7..971fb8fb 100644 --- a/packaging/requirements.py +++ b/packaging/requirements.py @@ -2,13 +2,18 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string import urllib.parse +from collections import namedtuple from typing import Any, List, Optional, Set -from .markers import Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from ._parser import parse_named_requirement +from ._tokenizer import ParseExceptionError +from .markers import InvalidMarker, Marker +from .specifiers import SpecifierSet + +_RequirementTuple = namedtuple( + "_RequirementTuple", ["name", "url", "extras", "specifier", "marker"] +) class InvalidRequirement(ValueError): @@ -32,11 +37,9 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) + req = _RequirementTuple(*parse_named_requirement(requirement_string)) + except ParseExceptionError as e: + raise InvalidRequirement(str(e)) self.name: str = req.name if req.url: @@ -51,9 +54,12 @@ def __init__(self, requirement_string: str) -> None: self.url: Optional[str] = req.url else: self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) + self.extras: Set[str] = set(req.extras if req.extras else []) self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: Optional[Marker] = req.marker if req.marker else None + try: + self.marker: Optional[Marker] = Marker(req.marker) if req.marker else None + except InvalidMarker as e: + raise InvalidRequirement(str(e)) def __str__(self) -> str: parts: List[str] = [self.name] diff --git a/tests/test_markers.py b/tests/test_markers.py index e943e492..41bda762 100644 --- a/tests/test_markers.py +++ b/tests/test_markers.py @@ -10,10 +10,10 @@ import pytest +from packaging._parser import Node from packaging.markers import ( InvalidMarker, Marker, - Node, UndefinedComparison, default_environment, format_full_version, @@ -61,11 +61,11 @@ class TestNode: def test_accepts_value(self, value): assert Node(value).value == value - @pytest.mark.parametrize("value", ["one", "two", None, 3, 5, []]) + @pytest.mark.parametrize("value", ["one", "two"]) def test_str(self, value): assert str(Node(value)) == str(value) - @pytest.mark.parametrize("value", ["one", "two", None, 3, 5, []]) + @pytest.mark.parametrize("value", ["one", "two"]) def test_repr(self, value): assert repr(Node(value)) == f"" @@ -165,6 +165,7 @@ def test_parses_valid(self, marker_string): "python_version", "(python_version)", "python_version >= 1.0 and (python_version)", + '(python_version == "2.7" and os_name == "linux"', ], ) def test_parses_invalid(self, marker_string): diff --git a/tests/test_requirements.py b/tests/test_requirements.py index 5e4059df..3360b5ed 100644 --- a/tests/test_requirements.py +++ b/tests/test_requirements.py @@ -5,7 +5,7 @@ import pytest from packaging.markers import Marker -from packaging.requirements import URL, URL_AND_MARKER, InvalidRequirement, Requirement +from packaging.requirements import InvalidRequirement, Requirement from packaging.specifiers import SpecifierSet @@ -60,17 +60,34 @@ def test_name_with_version(self): self._assert_requirement(req, "name", specifier=">=3") def test_with_legacy_version(self): - with pytest.raises(InvalidRequirement): + with pytest.raises(InvalidRequirement) as e: Requirement("name==1.0.org1") + assert "Expected semicolon (followed by markers) or end of string" in str(e) def test_with_legacy_version_and_marker(self): - with pytest.raises(InvalidRequirement): + with pytest.raises(InvalidRequirement) as e: Requirement("name>=1.x.y;python_version=='2.6'") + assert "Expected semicolon (followed by markers) or end of string" in str(e) + + def test_missing_name(self): + with pytest.raises(InvalidRequirement) as e: + Requirement("@ http://example.com") + assert "Expression must begin with package name" in str(e) + + def test_name_with_missing_version(self): + with pytest.raises(InvalidRequirement) as e: + Requirement("name>=") + assert "Missing version" in str(e) def test_version_with_parens_and_whitespace(self): req = Requirement("name (==4)") self._assert_requirement(req, "name", specifier="==4") + def test_version_with_missing_closing_paren(self): + with pytest.raises(InvalidRequirement) as e: + Requirement("name(==4") + assert "Closing right parenthesis is missing" in str(e) + def test_name_with_multiple_versions(self): req = Requirement("name>=3,<2") self._assert_requirement(req, "name", specifier="<2,>=3") @@ -79,6 +96,22 @@ def test_name_with_multiple_versions_and_whitespace(self): req = Requirement("name >=2, <3") self._assert_requirement(req, "name", specifier="<3,>=2") + def test_name_with_multiple_versions_in_parenthesis(self): + req = Requirement("name (>=2,<3)") + self._assert_requirement(req, "name", specifier="<3,>=2") + + def test_name_with_no_extras_no_versions_in_parenthesis(self): + req = Requirement("name []()") + self._assert_requirement(req, "name", specifier="", extras=[]) + + def test_name_with_extra_and_multiple_versions_in_parenthesis(self): + req = Requirement("name [foo, bar](>=2,<3)") + self._assert_requirement(req, "name", specifier="<3,>=2", extras=["foo", "bar"]) + + def test_name_with_no_versions_in_parenthesis(self): + req = Requirement("name ()") + self._assert_requirement(req, "name", specifier="") + def test_extras(self): req = Requirement("foobar [quux,bar]") self._assert_requirement(req, "foobar", extras=["bar", "quux"]) @@ -87,16 +120,27 @@ def test_empty_extras(self): req = Requirement("foo[]") self._assert_requirement(req, "foo") + def test_unclosed_extras(self): + with pytest.raises(InvalidRequirement) as e: + Requirement("foo[") + assert "Closing square bracket is missing" in str(e) + + def test_extras_without_comma(self): + with pytest.raises(InvalidRequirement) as e: + Requirement("foobar[quux bar]") + assert "Missing comma after extra" in str(e) + def test_url(self): - url_section = "@ http://example.com" - parsed = URL.parseString(url_section) - assert parsed.url == "http://example.com" + url_section = "test @ http://example.com" + req = Requirement(url_section) + self._assert_requirement(req, "test", "http://example.com", extras=[]) def test_url_and_marker(self): - instring = "@ http://example.com ; os_name=='a'" - parsed = URL_AND_MARKER.parseString(instring) - assert parsed.url == "http://example.com" - assert str(parsed.marker) == 'os_name == "a"' + instring = "test @ http://example.com ; os_name=='a'" + req = Requirement(instring) + self._assert_requirement( + req, "test", "http://example.com", extras=[], marker='os_name == "a"' + ) def test_invalid_url(self): with pytest.raises(InvalidRequirement) as e: @@ -147,6 +191,11 @@ def test_invalid_marker(self): with pytest.raises(InvalidRequirement): Requirement("name; foobar=='x'") + def test_marker_with_missing_semicolon(self): + with pytest.raises(InvalidRequirement) as e: + Requirement('name[bar]>=3 python_version == "2.7"') + assert "Expected semicolon (followed by markers) or end of string" in str(e) + def test_types(self): req = Requirement("foobar[quux]<2,>=3; os_name=='a'") assert isinstance(req.name, str) @@ -190,9 +239,7 @@ def test_sys_platform_linux_in(self): def test_parseexception_error_msg(self): with pytest.raises(InvalidRequirement) as e: Requirement("toto 42") - assert "Expected stringEnd" in str(e.value) or ( - "Expected string_end" in str(e.value) # pyparsing>=3.0.0 - ) + assert "Expected semicolon (followed by markers) or end of string" in str(e) EQUAL_DEPENDENCIES = [ ("packaging>20.1", "packaging>20.1"), From c30f323abdcf06ed26554ccec548a9a1f8610ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Mon, 27 Jun 2022 08:49:24 +0200 Subject: [PATCH 5/5] Normalize extra values in markers --- packaging/markers.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/packaging/markers.py b/packaging/markers.py index 2bb7de36..ddb0ac17 100644 --- a/packaging/markers.py +++ b/packaging/markers.py @@ -8,7 +8,7 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from ._parser import MarkerAtom, MarkerList, Op, Variable, parse_marker_expr +from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker_expr from ._tokenizer import ParseExceptionError, Tokenizer from .specifiers import InvalidSpecifier, Specifier from .utils import canonicalize_name @@ -43,6 +43,22 @@ class UndefinedEnvironmentName(ValueError): """ +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results + + def _format_marker( marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: @@ -170,7 +186,9 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: try: - self._markers = parse_marker_expr(Tokenizer(marker)) + self._markers = _normalize_extra_values( + parse_marker_expr(Tokenizer(marker)) + ) # The attribute `_markers` can be described in terms of a recursive type: # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] #