2 Parse Python code and perform AST validation.
6 from typing
import Final
, Iterable
, Iterator
, List
, Set
, Tuple
8 from black
.mode
import VERSION_TO_FEATURES
, Feature
, TargetVersion
, supports_feature
9 from black
.nodes
import syms
10 from blib2to3
import pygram
11 from blib2to3
.pgen2
import driver
12 from blib2to3
.pgen2
.grammar
import Grammar
13 from blib2to3
.pgen2
.parse
import ParseError
14 from blib2to3
.pgen2
.tokenize
import TokenError
15 from blib2to3
.pytree
import Leaf
, Node
17 PY2_HINT
: Final
= "Python 2 support was removed in version 22.0."
20 class InvalidInput(ValueError):
21 """Raised when input source code fails all parse attempts."""
24 def get_grammars(target_versions
: Set
[TargetVersion
]) -> List
[Grammar
]:
25 if not target_versions
:
26 # No target_version specified, so try all grammars.
29 pygram
.python_grammar_no_print_statement_no_exec_statement_async_keywords
,
31 pygram
.python_grammar_no_print_statement_no_exec_statement
,
33 pygram
.python_grammar_soft_keywords
,
37 # If we have to parse both, try to parse async as a keyword first
38 if not supports_feature(
39 target_versions
, Feature
.ASYNC_IDENTIFIERS
40 ) and not supports_feature(target_versions
, Feature
.PATTERN_MATCHING
):
43 pygram
.python_grammar_no_print_statement_no_exec_statement_async_keywords
45 if not supports_feature(target_versions
, Feature
.ASYNC_KEYWORDS
):
47 grammars
.append(pygram
.python_grammar_no_print_statement_no_exec_statement
)
48 if any(Feature
.PATTERN_MATCHING
in VERSION_TO_FEATURES
[v
] for v
in target_versions
):
50 grammars
.append(pygram
.python_grammar_soft_keywords
)
52 # At least one of the above branches must have been taken, because every Python
53 # version has exactly one of the two 'ASYNC_*' flags
57 def lib2to3_parse(src_txt
: str, target_versions
: Iterable
[TargetVersion
] = ()) -> Node
:
58 """Given a string with source, return the lib2to3 Node."""
59 if not src_txt
.endswith("\n"):
62 grammars
= get_grammars(set(target_versions
))
64 for grammar
in grammars
:
65 drv
= driver
.Driver(grammar
)
67 result
= drv
.parse_string(src_txt
, True)
70 except ParseError
as pe
:
71 lineno
, column
= pe
.context
[1]
72 lines
= src_txt
.splitlines()
74 faulty_line
= lines
[lineno
- 1]
76 faulty_line
= "<line number missing in source>"
77 errors
[grammar
.version
] = InvalidInput(
78 f
"Cannot parse: {lineno}:{column}: {faulty_line}"
81 except TokenError
as te
:
82 # In edge cases these are raised; and typically don't have a "faulty_line".
83 lineno
, column
= te
.args
[1]
84 errors
[grammar
.version
] = InvalidInput(
85 f
"Cannot parse: {lineno}:{column}: {te.args[0]}"
89 # Choose the latest version when raising the actual parsing error.
90 assert len(errors
) >= 1
91 exc
= errors
[max(errors
)]
93 if matches_grammar(src_txt
, pygram
.python_grammar
) or matches_grammar(
94 src_txt
, pygram
.python_grammar_no_print_statement
96 original_msg
= exc
.args
[0]
97 msg
= f
"{original_msg}\n{PY2_HINT}"
98 raise InvalidInput(msg
) from None
102 if isinstance(result
, Leaf
):
103 result
= Node(syms
.file_input
, [result
])
107 def matches_grammar(src_txt
: str, grammar
: Grammar
) -> bool:
108 drv
= driver
.Driver(grammar
)
110 drv
.parse_string(src_txt
, True)
111 except (ParseError
, TokenError
, IndentationError):
117 def lib2to3_unparse(node
: Node
) -> str:
118 """Given a lib2to3 node, return its string representation."""
123 def parse_single_version(
124 src
: str, version
: Tuple
[int, int], *, type_comments
: bool
126 filename
= "<unknown>"
128 src
, filename
, feature_version
=version
, type_comments
=type_comments
132 def parse_ast(src
: str) -> ast
.AST
:
133 # TODO: support Python 4+ ;)
134 versions
= [(3, minor
) for minor
in range(3, sys
.version_info
[1] + 1)]
137 for version
in sorted(versions
, reverse
=True):
139 return parse_single_version(src
, version
, type_comments
=True)
140 except SyntaxError as e
:
144 # Try to parse without type comments
145 for version
in sorted(versions
, reverse
=True):
147 return parse_single_version(src
, version
, type_comments
=False)
151 raise SyntaxError(first_error
)
154 def _normalize(lineend
: str, value
: str) -> str:
155 # To normalize, we strip any leading and trailing space from
157 stripped
: List
[str] = [i
.strip() for i
in value
.splitlines()]
158 normalized
= lineend
.join(stripped
)
159 # ...and remove any blank lines at the beginning and end of
161 return normalized
.strip()
164 def stringify_ast(node
: ast
.AST
, depth
: int = 0) -> Iterator
[str]:
165 """Simple visitor generating strings to compare ASTs by content."""
168 isinstance(node
, ast
.Constant
)
169 and isinstance(node
.value
, str)
172 # It's a quirk of history that we strip the u prefix over here. We used to
173 # rewrite the AST nodes for Python version compatibility and we never copied
177 yield f
"{' ' * depth}{node.__class__.__name__}("
179 for field
in sorted(node
._fields
): # noqa: F402
180 # TypeIgnore has only one field 'lineno' which breaks this comparison
181 if isinstance(node
, ast
.TypeIgnore
):
185 value
: object = getattr(node
, field
)
186 except AttributeError:
189 yield f
"{' ' * (depth+1)}{field}="
191 if isinstance(value
, list):
193 # Ignore nested tuples within del statements, because we may insert
194 # parentheses and they change the AST.
197 and isinstance(node
, ast
.Delete
)
198 and isinstance(item
, ast
.Tuple
)
200 for elt
in item
.elts
:
201 yield from stringify_ast(elt
, depth
+ 2)
203 elif isinstance(item
, ast
.AST
):
204 yield from stringify_ast(item
, depth
+ 2)
206 elif isinstance(value
, ast
.AST
):
207 yield from stringify_ast(value
, depth
+ 2)
212 isinstance(node
, ast
.Constant
)
214 and isinstance(value
, str)
216 # Constant strings may be indented across newlines, if they are
217 # docstrings; fold spaces after newlines when comparing. Similarly,
218 # trailing and leading space may be removed.
219 normalized
= _normalize("\n", value
)
220 elif field
== "type_comment" and isinstance(value
, str):
221 # Trailing whitespace in type comments is removed.
222 normalized
= value
.rstrip()
225 yield f
"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
227 yield f
"{' ' * depth}) # /{node.__class__.__name__}"