]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | # Copyright 2015 Google Inc. All Rights Reserved. |
2 | # | |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | """PyTreeUnwrapper - produces a list of logical lines from a pytree. | |
15 | ||
16 | [for a description of what a logical line is, see logical_line.py] | |
17 | ||
18 | This is a pytree visitor that goes over a parse tree and produces a list of | |
19 | LogicalLine containers from it, each with its own depth and containing all the | |
20 | tokens that could fit on the line if there were no maximal line-length | |
21 | limitations. | |
22 | ||
23 | Note: a precondition to running this visitor and obtaining correct results is | |
24 | for the tree to have its comments spliced in as nodes. Prefixes are ignored. | |
25 | ||
26 | For most uses, the convenience function UnwrapPyTree should be sufficient. | |
27 | """ | |
28 | ||
29 | # The word "token" is overloaded within this module, so for clarity rename | |
30 | # the imported pgen2.token module. | |
31 | from yapf_third_party._ylib2to3 import pytree | |
32 | from yapf_third_party._ylib2to3.pgen2 import token as grammar_token | |
33 | ||
34 | from yapf.pytree import pytree_utils | |
35 | from yapf.pytree import pytree_visitor | |
36 | from yapf.pytree import split_penalty | |
37 | from yapf.yapflib import format_token | |
38 | from yapf.yapflib import logical_line | |
39 | from yapf.yapflib import object_state | |
40 | from yapf.yapflib import style | |
41 | from yapf.yapflib import subtypes | |
42 | ||
43 | _OPENING_BRACKETS = frozenset({'(', '[', '{'}) | |
44 | _CLOSING_BRACKETS = frozenset({')', ']', '}'}) | |
45 | ||
46 | ||
47 | def UnwrapPyTree(tree): | |
48 | """Create and return a list of logical lines from the given pytree. | |
49 | ||
50 | Arguments: | |
51 | tree: the top-level pytree node to unwrap.. | |
52 | ||
53 | Returns: | |
54 | A list of LogicalLine objects. | |
55 | """ | |
56 | unwrapper = PyTreeUnwrapper() | |
57 | unwrapper.Visit(tree) | |
58 | llines = unwrapper.GetLogicalLines() | |
59 | llines.sort(key=lambda x: x.lineno) | |
60 | return llines | |
61 | ||
62 | ||
63 | # Grammar tokens considered as whitespace for the purpose of unwrapping. | |
64 | _WHITESPACE_TOKENS = frozenset([ | |
65 | grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, | |
66 | grammar_token.ENDMARKER | |
67 | ]) | |
68 | ||
69 | ||
70 | class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): | |
71 | """PyTreeUnwrapper - see file-level docstring for detailed description. | |
72 | ||
73 | Note: since this implements PyTreeVisitor and node names in lib2to3 are | |
74 | underscore_separated, the visiting methods of this class are named as | |
75 | Visit_node_name. invalid-name pragmas are added to each such method to silence | |
76 | a style warning. This is forced on us by the usage of lib2to3, and re-munging | |
77 | method names to make them different from actual node names sounded like a | |
78 | confusing and brittle affair that wasn't worth it for this small & controlled | |
79 | deviation from the style guide. | |
80 | ||
81 | To understand the connection between visitor methods in this class, some | |
82 | familiarity with the Python grammar is required. | |
83 | """ | |
84 | ||
85 | def __init__(self): | |
86 | # A list of all logical lines finished visiting so far. | |
87 | self._logical_lines = [] | |
88 | ||
89 | # Builds up a "current" logical line while visiting pytree nodes. Some nodes | |
90 | # will finish a line and start a new one. | |
91 | self._cur_logical_line = logical_line.LogicalLine(0) | |
92 | ||
93 | # Current indentation depth. | |
94 | self._cur_depth = 0 | |
95 | ||
96 | def GetLogicalLines(self): | |
97 | """Fetch the result of the tree walk. | |
98 | ||
99 | Note: only call this after visiting the whole tree. | |
100 | ||
101 | Returns: | |
102 | A list of LogicalLine objects. | |
103 | """ | |
104 | # Make sure the last line that was being populated is flushed. | |
105 | self._StartNewLine() | |
106 | return self._logical_lines | |
107 | ||
108 | def _StartNewLine(self): | |
109 | """Finish current line and start a new one. | |
110 | ||
111 | Place the currently accumulated line into the _logical_lines list and | |
112 | start a new one. | |
113 | """ | |
114 | if self._cur_logical_line.tokens: | |
115 | self._logical_lines.append(self._cur_logical_line) | |
116 | _MatchBrackets(self._cur_logical_line) | |
117 | _IdentifyParameterLists(self._cur_logical_line) | |
118 | _AdjustSplitPenalty(self._cur_logical_line) | |
119 | self._cur_logical_line = logical_line.LogicalLine(self._cur_depth) | |
120 | ||
121 | _STMT_TYPES = frozenset({ | |
122 | 'if_stmt', | |
123 | 'while_stmt', | |
124 | 'for_stmt', | |
125 | 'try_stmt', | |
126 | 'expect_clause', | |
127 | 'with_stmt', | |
128 | 'match_stmt', | |
129 | 'case_block', | |
130 | 'funcdef', | |
131 | 'classdef', | |
132 | }) | |
133 | ||
134 | # pylint: disable=invalid-name,missing-docstring | |
135 | def Visit_simple_stmt(self, node): | |
136 | # A 'simple_stmt' conveniently represents a non-compound Python statement, | |
137 | # i.e. a statement that does not contain other statements. | |
138 | ||
139 | # When compound nodes have a single statement as their suite, the parser | |
140 | # can leave it in the tree directly without creating a suite. But we have | |
141 | # to increase depth in these cases as well. However, don't increase the | |
142 | # depth of we have a simple_stmt that's a comment node. This represents a | |
143 | # standalone comment and in the case of it coming directly after the | |
144 | # funcdef, it is a "top" comment for the whole function. | |
145 | # TODO(eliben): add more relevant compound statements here. | |
146 | single_stmt_suite = ( | |
147 | node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES) | |
148 | is_comment_stmt = pytree_utils.IsCommentStatement(node) | |
149 | is_inside_match = node.parent and pytree_utils.NodeName( | |
150 | node.parent) == 'match_stmt' | |
151 | if (single_stmt_suite and not is_comment_stmt) or is_inside_match: | |
152 | self._cur_depth += 1 | |
153 | self._StartNewLine() | |
154 | self.DefaultNodeVisit(node) | |
155 | if (single_stmt_suite and not is_comment_stmt) or is_inside_match: | |
156 | self._cur_depth -= 1 | |
157 | ||
158 | def _VisitCompoundStatement(self, node, substatement_names): | |
159 | """Helper for visiting compound statements. | |
160 | ||
161 | Python compound statements serve as containers for other statements. Thus, | |
162 | when we encounter a new compound statement, we start a new logical line. | |
163 | ||
164 | Arguments: | |
165 | node: the node to visit. | |
166 | substatement_names: set of node names. A compound statement will be | |
167 | recognized as a NAME node with a name in this set. | |
168 | """ | |
169 | for child in node.children: | |
170 | # A pytree is structured in such a way that a single 'if_stmt' node will | |
171 | # contain all the 'if', 'elif' and 'else' nodes as children (similar | |
172 | # structure applies to 'while' statements, 'try' blocks, etc). Therefore, | |
173 | # we visit all children here and create a new line before the requested | |
174 | # set of nodes. | |
175 | if (child.type == grammar_token.NAME and | |
176 | child.value in substatement_names): | |
177 | self._StartNewLine() | |
178 | self.Visit(child) | |
179 | ||
180 | _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) | |
181 | ||
182 | def Visit_if_stmt(self, node): # pylint: disable=invalid-name | |
183 | self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) | |
184 | ||
185 | _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) | |
186 | ||
187 | def Visit_while_stmt(self, node): # pylint: disable=invalid-name | |
188 | self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) | |
189 | ||
190 | _FOR_STMT_ELEMS = frozenset({'for', 'else'}) | |
191 | ||
192 | def Visit_for_stmt(self, node): # pylint: disable=invalid-name | |
193 | self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) | |
194 | ||
195 | _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) | |
196 | ||
197 | def Visit_try_stmt(self, node): # pylint: disable=invalid-name | |
198 | self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) | |
199 | ||
200 | _EXCEPT_STMT_ELEMS = frozenset({'except'}) | |
201 | ||
202 | def Visit_except_clause(self, node): # pylint: disable=invalid-name | |
203 | self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) | |
204 | ||
205 | _FUNC_DEF_ELEMS = frozenset({'def'}) | |
206 | ||
207 | def Visit_funcdef(self, node): # pylint: disable=invalid-name | |
208 | self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) | |
209 | ||
210 | def Visit_async_funcdef(self, node): # pylint: disable=invalid-name | |
211 | self._StartNewLine() | |
212 | index = 0 | |
213 | for child in node.children: | |
214 | index += 1 | |
215 | self.Visit(child) | |
216 | if child.type == grammar_token.ASYNC: | |
217 | break | |
218 | for child in node.children[index].children: | |
219 | self.Visit(child) | |
220 | ||
221 | _CLASS_DEF_ELEMS = frozenset({'class'}) | |
222 | ||
223 | def Visit_classdef(self, node): # pylint: disable=invalid-name | |
224 | self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) | |
225 | ||
226 | def Visit_async_stmt(self, node): # pylint: disable=invalid-name | |
227 | self._StartNewLine() | |
228 | index = 0 | |
229 | for child in node.children: | |
230 | index += 1 | |
231 | self.Visit(child) | |
232 | if child.type == grammar_token.ASYNC: | |
233 | break | |
234 | for child in node.children[index].children: | |
235 | if child.type == grammar_token.NAME and child.value == 'else': | |
236 | self._StartNewLine() | |
237 | self.Visit(child) | |
238 | ||
239 | def Visit_decorator(self, node): # pylint: disable=invalid-name | |
240 | for child in node.children: | |
241 | self.Visit(child) | |
242 | if child.type == grammar_token.COMMENT and child == node.children[0]: | |
243 | self._StartNewLine() | |
244 | ||
245 | def Visit_decorators(self, node): # pylint: disable=invalid-name | |
246 | for child in node.children: | |
247 | self._StartNewLine() | |
248 | self.Visit(child) | |
249 | ||
250 | def Visit_decorated(self, node): # pylint: disable=invalid-name | |
251 | for child in node.children: | |
252 | self._StartNewLine() | |
253 | self.Visit(child) | |
254 | ||
255 | _WITH_STMT_ELEMS = frozenset({'with'}) | |
256 | ||
257 | def Visit_with_stmt(self, node): # pylint: disable=invalid-name | |
258 | self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) | |
259 | ||
260 | _MATCH_STMT_ELEMS = frozenset({'match', 'case'}) | |
261 | ||
262 | def Visit_match_stmt(self, node): # pylint: disable=invalid-name | |
263 | self._VisitCompoundStatement(node, self._MATCH_STMT_ELEMS) | |
264 | ||
265 | # case_block refers to the grammar element name in Grammar.txt | |
266 | _CASE_BLOCK_ELEMS = frozenset({'case'}) | |
267 | ||
268 | def Visit_case_block(self, node): | |
269 | self._cur_depth += 1 | |
270 | self._StartNewLine() | |
271 | self._VisitCompoundStatement(node, self._CASE_BLOCK_ELEMS) | |
272 | self._cur_depth -= 1 | |
273 | ||
274 | def Visit_suite(self, node): # pylint: disable=invalid-name | |
275 | # A 'suite' starts a new indentation level in Python. | |
276 | self._cur_depth += 1 | |
277 | self._StartNewLine() | |
278 | self.DefaultNodeVisit(node) | |
279 | self._cur_depth -= 1 | |
280 | ||
281 | def Visit_listmaker(self, node): # pylint: disable=invalid-name | |
282 | _DetermineMustSplitAnnotation(node) | |
283 | self.DefaultNodeVisit(node) | |
284 | ||
285 | def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name | |
286 | _DetermineMustSplitAnnotation(node) | |
287 | self.DefaultNodeVisit(node) | |
288 | ||
289 | def Visit_import_as_names(self, node): # pylint: disable=invalid-name | |
290 | if node.prev_sibling.value == '(': | |
291 | _DetermineMustSplitAnnotation(node) | |
292 | self.DefaultNodeVisit(node) | |
293 | ||
294 | def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name | |
295 | _DetermineMustSplitAnnotation(node) | |
296 | self.DefaultNodeVisit(node) | |
297 | ||
298 | def Visit_arglist(self, node): # pylint: disable=invalid-name | |
299 | _DetermineMustSplitAnnotation(node) | |
300 | self.DefaultNodeVisit(node) | |
301 | ||
302 | def Visit_typedargslist(self, node): # pylint: disable=invalid-name | |
303 | _DetermineMustSplitAnnotation(node) | |
304 | self.DefaultNodeVisit(node) | |
305 | ||
306 | def Visit_subscriptlist(self, node): # pylint: disable=invalid-name | |
307 | _DetermineMustSplitAnnotation(node) | |
308 | self.DefaultNodeVisit(node) | |
309 | ||
310 | def DefaultLeafVisit(self, leaf): | |
311 | """Default visitor for tree leaves. | |
312 | ||
313 | A tree leaf is always just gets appended to the current logical line. | |
314 | ||
315 | Arguments: | |
316 | leaf: the leaf to visit. | |
317 | """ | |
318 | if leaf.type in _WHITESPACE_TOKENS: | |
319 | self._StartNewLine() | |
320 | elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): | |
321 | # Add non-whitespace tokens and comments that aren't empty. | |
322 | self._cur_logical_line.AppendToken( | |
323 | format_token.FormatToken(leaf, pytree_utils.NodeName(leaf))) | |
324 | ||
325 | ||
326 | _BRACKET_MATCH = {')': '(', '}': '{', ']': '['} | |
327 | ||
328 | ||
329 | def _MatchBrackets(line): | |
330 | """Visit the node and match the brackets. | |
331 | ||
332 | For every open bracket ('[', '{', or '('), find the associated closing bracket | |
333 | and "match" them up. I.e., save in the token a pointer to its associated open | |
334 | or close bracket. | |
335 | ||
336 | Arguments: | |
337 | line: (LogicalLine) A logical line. | |
338 | """ | |
339 | bracket_stack = [] | |
340 | for token in line.tokens: | |
341 | if token.value in _OPENING_BRACKETS: | |
342 | bracket_stack.append(token) | |
343 | elif token.value in _CLOSING_BRACKETS: | |
344 | bracket_stack[-1].matching_bracket = token | |
345 | token.matching_bracket = bracket_stack[-1] | |
346 | bracket_stack.pop() | |
347 | ||
348 | for bracket in bracket_stack: | |
349 | if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node): | |
350 | bracket.container_elements.append(token) | |
351 | token.container_opening = bracket | |
352 | ||
353 | ||
354 | def _IdentifyParameterLists(line): | |
355 | """Visit the node to create a state for parameter lists. | |
356 | ||
357 | For instance, a parameter is considered an "object" with its first and last | |
358 | token uniquely identifying the object. | |
359 | ||
360 | Arguments: | |
361 | line: (LogicalLine) A logical line. | |
362 | """ | |
363 | func_stack = [] | |
364 | param_stack = [] | |
365 | for tok in line.tokens: | |
366 | # Identify parameter list objects. | |
367 | if subtypes.FUNC_DEF in tok.subtypes: | |
368 | assert tok.next_token.value == '(' | |
369 | func_stack.append(tok.next_token) | |
370 | continue | |
371 | ||
372 | if func_stack and tok.value == ')': | |
373 | if tok == func_stack[-1].matching_bracket: | |
374 | func_stack.pop() | |
375 | continue | |
376 | ||
377 | # Identify parameter objects. | |
378 | if subtypes.PARAMETER_START in tok.subtypes: | |
379 | param_stack.append(tok) | |
380 | ||
381 | # Not "elif", a parameter could be a single token. | |
382 | if param_stack and subtypes.PARAMETER_STOP in tok.subtypes: | |
383 | start = param_stack.pop() | |
384 | func_stack[-1].parameters.append(object_state.Parameter(start, tok)) | |
385 | ||
386 | ||
387 | def _AdjustSplitPenalty(line): | |
388 | """Visit the node and adjust the split penalties if needed. | |
389 | ||
390 | A token shouldn't be split if it's not within a bracket pair. Mark any token | |
391 | that's not within a bracket pair as "unbreakable". | |
392 | ||
393 | Arguments: | |
394 | line: (LogicalLine) An logical line. | |
395 | """ | |
396 | bracket_level = 0 | |
397 | for index, token in enumerate(line.tokens): | |
398 | if index and not bracket_level: | |
399 | pytree_utils.SetNodeAnnotation(token.node, | |
400 | pytree_utils.Annotation.SPLIT_PENALTY, | |
401 | split_penalty.UNBREAKABLE) | |
402 | if token.value in _OPENING_BRACKETS: | |
403 | bracket_level += 1 | |
404 | elif token.value in _CLOSING_BRACKETS: | |
405 | bracket_level -= 1 | |
406 | ||
407 | ||
408 | def _DetermineMustSplitAnnotation(node): | |
409 | """Enforce a split in the list if the list ends with a comma.""" | |
410 | if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'): | |
411 | return | |
412 | if not _ContainsComments(node): | |
413 | token = next(node.parent.leaves()) | |
414 | if token.value == '(': | |
415 | if sum(1 for ch in node.children if ch.type == grammar_token.COMMA) < 2: | |
416 | return | |
417 | if (not isinstance(node.children[-1], pytree.Leaf) or | |
418 | node.children[-1].value != ','): | |
419 | return | |
420 | num_children = len(node.children) | |
421 | index = 0 | |
422 | _SetMustSplitOnFirstLeaf(node.children[0]) | |
423 | while index < num_children - 1: | |
424 | child = node.children[index] | |
425 | if isinstance(child, pytree.Leaf) and child.value == ',': | |
426 | next_child = node.children[index + 1] | |
427 | if next_child.type == grammar_token.COMMENT: | |
428 | index += 1 | |
429 | if index >= num_children - 1: | |
430 | break | |
431 | _SetMustSplitOnFirstLeaf(node.children[index + 1]) | |
432 | index += 1 | |
433 | ||
434 | ||
435 | def _ContainsComments(node): | |
436 | """Return True if the list has a comment in it.""" | |
437 | if isinstance(node, pytree.Leaf): | |
438 | return node.type == grammar_token.COMMENT | |
439 | for child in node.children: | |
440 | if _ContainsComments(child): | |
441 | return True | |
442 | return False | |
443 | ||
444 | ||
445 | def _SetMustSplitOnFirstLeaf(node): | |
446 | """Set the "must split" annotation on the first leaf node.""" | |
447 | pytree_utils.SetNodeAnnotation( | |
448 | pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT, | |
449 | True) |