1
1
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md).
2
2
# Licensed under the Apache License, Version 2.0 (see LICENSE).
3
3
4
- import logging
4
+ import ast as ast3
5
5
import re
6
+ import sys
6
7
from dataclasses import dataclass
7
- from typing import Optional , Set , Union
8
+ from typing import Optional , Set , Tuple
8
9
9
- import libcst as cst
10
10
from typed_ast import ast27
11
- from typing_extensions import Protocol
12
11
13
12
from pants .util .memo import memoized_property
14
13
from pants .util .ordered_set import FrozenOrderedSet
15
14
from pants .util .strutil import ensure_text
16
15
17
- logger = logging .getLogger (__name__ )
16
+
17
+ class ImportParseError (ValueError ):
18
+ pass
18
19
19
20
20
21
@dataclass (frozen = True )
@@ -33,56 +34,41 @@ class ParsedPythonImports:
33
34
def all_imports (self ) -> FrozenOrderedSet [str ]:
34
35
return FrozenOrderedSet (sorted ([* self .explicit_imports , * self .inferred_imports ]))
35
36
36
- @classmethod
37
- def empty (cls ) -> "ParsedPythonImports" :
38
- return cls (FrozenOrderedSet (), FrozenOrderedSet ())
39
-
40
-
41
- class VisitorInterface (Protocol ):
42
- explicit_imports : Set [str ]
43
- inferred_imports : Set [str ]
44
37
45
-
46
- def parse_file (* , filename : str , content : str , module_name : str ) -> Optional [VisitorInterface ]:
47
- """Parse the file for python imports, and return a visitor with the imports it found."""
48
- # Parse all python 3 code with libCST. We parse assuming python 3 goes first, because we assume
49
- # most user code will be python 3.
50
- # TODO(#10921): identify the appropriate interpreter version beforehand!
38
+ def parse_file (* , filename : str , content : str ) -> Optional [Tuple ]:
51
39
try :
52
- # NB: Since all python 3 code is forwards-compatible with the 3.8 parser, and the import
53
- # syntax remains unchanged, we are safely able to use the 3.8 parser for parsing imports.
54
- # TODO(#10922): Support parsing python 3.9/3.10 with libCST!
55
- config = cst .PartialParserConfig (python_version = "3.8" )
56
- cst_tree = cst .parse_module (content , config = config )
57
- completed_cst_visitor = _CSTVisitor .visit_tree (cst_tree , module_name = module_name )
58
- return completed_cst_visitor
59
- except cst .ParserSyntaxError as e :
60
- # NB: When the python 3 ast visitor fails to parse python 2 syntax, it raises a
61
- # ParserSyntaxError. This may also occur when the file contains invalid python code. If we
62
- # successfully parse a python 2 file with a python 3 parser, that should not change the
63
- # imports we calculate.
64
- logger .debug (f"Failed to parse { filename } with python 3.8 libCST parser: { e } " )
65
-
66
- try :
67
- py27_tree = ast27 .parse (content , filename = filename )
68
- completed_ast27_visitor = _Py27AstVisitor .visit_tree (py27_tree , module_name = module_name )
69
- return completed_ast27_visitor
70
- except SyntaxError as e :
71
- logger .debug (f"Failed to parse { filename } with python 2.7 typed-ast parser: { e } " )
72
-
73
- return None
40
+ # NB: The Python 3 ast is generally backwards-compatible with earlier versions. The only
41
+ # breaking change is `async` `await` becoming reserved keywords in Python 3.7 (deprecated
42
+ # in 3.6). If the std-lib fails to parse, we could use typed-ast to try parsing with a
43
+ # target version of Python 3.5, but we don't because Python 3.5 is almost EOL and has very
44
+ # low usage.
45
+ # We will also fail to parse Python 3.8 syntax if Pants is run with Python 3.6 or 3.7.
46
+ # There is no known workaround for this, beyond users changing their `./pants` script to
47
+ # always use >= 3.8.
48
+ tree = ast3 .parse (content , filename = filename )
49
+ visitor_cls = _Py3AstVisitor if sys .version_info [:2 ] < (3 , 8 ) else _Py38AstVisitor
50
+ return tree , visitor_cls
51
+ except SyntaxError :
52
+ try :
53
+ py27_tree = ast27 .parse (content , filename = filename )
54
+ return py27_tree , _Py27AstVisitor
55
+ except SyntaxError :
56
+ return None
74
57
75
58
76
59
def find_python_imports (* , filename : str , content : str , module_name : str ) -> ParsedPythonImports :
77
- completed_visitor = parse_file (filename = filename , content = content , module_name = module_name )
60
+ parse_result = parse_file (filename = filename , content = content )
78
61
# If there were syntax errors, gracefully early return. This is more user friendly than
79
62
# propagating the exception. Dependency inference simply won't be used for that file, and
80
63
# it'll be up to the tool actually being run (e.g. Pytest or Flake8) to error.
81
- if completed_visitor is None :
82
- return ParsedPythonImports .empty ()
64
+ if parse_result is None :
65
+ return ParsedPythonImports (FrozenOrderedSet (), FrozenOrderedSet ())
66
+ tree , ast_visitor_cls = parse_result
67
+ ast_visitor = ast_visitor_cls (module_name )
68
+ ast_visitor .visit (tree )
83
69
return ParsedPythonImports (
84
- explicit_imports = FrozenOrderedSet (sorted (completed_visitor .explicit_imports )),
85
- inferred_imports = FrozenOrderedSet (sorted (completed_visitor .inferred_imports )),
70
+ explicit_imports = FrozenOrderedSet (sorted (ast_visitor .explicit_imports )),
71
+ inferred_imports = FrozenOrderedSet (sorted (ast_visitor .inferred_imports )),
86
72
)
87
73
88
74
@@ -91,77 +77,42 @@ def find_python_imports(*, filename: str, content: str, module_name: str) -> Par
91
77
_INFERRED_IMPORT_REGEX = re .compile (r"^([a-z_][a-z_\d]*\.){2,}[a-zA-Z_]\w*$" )
92
78
93
79
94
- class _Py27AstVisitor ( ast27 . NodeVisitor ) :
80
+ class _BaseAstVisitor :
95
81
def __init__ (self , module_name : str ) -> None :
96
82
self ._module_parts = module_name .split ("." )
97
83
self .explicit_imports : Set [str ] = set ()
98
84
self .inferred_imports : Set [str ] = set ()
99
85
100
- @classmethod
101
- def visit_tree (cls , tree : ast27 .AST , module_name : str ) -> "_Py27AstVisitor" :
102
- visitor = cls (module_name )
103
- visitor .visit (tree )
104
- return visitor
105
-
106
- def _maybe_add_inferred_import (self , s : str ) -> None :
86
+ def maybe_add_inferred_import (self , s : str ) -> None :
107
87
if _INFERRED_IMPORT_REGEX .match (s ):
108
88
self .inferred_imports .add (s )
109
89
110
- def visit_Import (self , node : ast27 . Import ) -> None :
90
+ def visit_Import (self , node ) -> None :
111
91
for alias in node .names :
112
92
self .explicit_imports .add (alias .name )
113
93
114
- def visit_ImportFrom (self , node : ast27 .ImportFrom ) -> None :
115
- rel_module = [] if node .module is None else [node .module ]
116
- relative_level = 0 if node .level is None else node .level
117
- abs_module = "." .join (self ._module_parts [0 :- relative_level ] + rel_module )
94
+ def visit_ImportFrom (self , node ) -> None :
95
+ rel_module = node .module
96
+ abs_module = "." .join (
97
+ self ._module_parts [0 : - node .level ] + ([] if rel_module is None else [rel_module ])
98
+ )
118
99
for alias in node .names :
119
100
self .explicit_imports .add (f"{ abs_module } .{ alias .name } " )
120
101
121
- def visit_Str (self , node : ast27 .Str ) -> None :
122
- val = ensure_text (node .s )
123
- self ._maybe_add_inferred_import (val )
124
-
125
102
126
- class _CSTVisitor (cst .CSTVisitor ):
127
- def __init__ (self , module_name : str ) -> None :
128
- self ._module_parts = module_name .split ("." )
129
- self .explicit_imports : Set [str ] = set ()
130
- self .inferred_imports : Set [str ] = set ()
103
+ class _Py27AstVisitor (ast27 .NodeVisitor , _BaseAstVisitor ):
104
+ def visit_Str (self , node ) -> None :
105
+ val = ensure_text (node .s )
106
+ self .maybe_add_inferred_import (val )
131
107
132
- @classmethod
133
- def visit_tree (cls , tree : cst .Module , module_name : str ) -> "_CSTVisitor" :
134
- visitor = cls (module_name )
135
- tree .visit (visitor )
136
- return visitor
137
108
138
- def _maybe_add_inferred_import (self , s : Union [str , bytes ]) -> None :
139
- if isinstance (s , bytes ):
140
- return
141
- if _INFERRED_IMPORT_REGEX .match (s ):
142
- self .inferred_imports .add (s )
109
+ class _Py3AstVisitor (ast3 .NodeVisitor , _BaseAstVisitor ):
110
+ def visit_Str (self , node ) -> None :
111
+ self .maybe_add_inferred_import (node .s )
143
112
144
- def _flatten_attribute_or_name (self , node : cst .BaseExpression ) -> str :
145
- if isinstance (node , cst .Name ):
146
- return node .value
147
- if not isinstance (node , cst .Attribute ):
148
- raise TypeError (f"Unrecognized cst.BaseExpression subclass: { node } " )
149
- inner = self ._flatten_attribute_or_name (node .value )
150
- return f"{ inner } .{ node .attr .value } "
151
113
152
- def visit_Import (self , node : cst .Import ) -> None :
153
- for alias in node .names :
154
- self .explicit_imports .add (self ._flatten_attribute_or_name (alias .name ))
155
-
156
- def visit_ImportFrom (self , node : cst .ImportFrom ) -> None :
157
- rel_module = [] if node .module is None else [self ._flatten_attribute_or_name (node .module )]
158
- relative_level = len (node .relative )
159
- abs_module = "." .join (self ._module_parts [0 :- relative_level ] + rel_module )
160
- if isinstance (node .names , cst .ImportStar ):
161
- self .explicit_imports .add (f"{ abs_module } .*" )
162
- else :
163
- for alias in node .names :
164
- self .explicit_imports .add (f"{ abs_module } .{ alias .name .value } " )
165
-
166
- def visit_SimpleString (self , node : cst .SimpleString ) -> None :
167
- self ._maybe_add_inferred_import (node .evaluated_value )
114
+ class _Py38AstVisitor (ast3 .NodeVisitor , _BaseAstVisitor ):
115
+ # Python 3.8 deprecated the Str node in favor of Constant.
116
+ def visit_Constant (self , node ) -> None :
117
+ if isinstance (node .value , str ):
118
+ self .maybe_add_inferred_import (node .value )
0 commit comments