]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | # Copyright 2015 Google Inc. All Rights Reserved. |
2 | # | |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | """Comment splicer for lib2to3 trees. | |
15 | ||
16 | The lib2to3 syntax tree produced by the parser holds comments and whitespace in | |
17 | prefix attributes of nodes, rather than nodes themselves. This module provides | |
18 | functionality to splice comments out of prefixes and into nodes of their own, | |
19 | making them easier to process. | |
20 | ||
21 | SpliceComments(): the main function exported by this module. | |
22 | """ | |
23 | ||
24 | from yapf_third_party._ylib2to3 import pygram | |
25 | from yapf_third_party._ylib2to3 import pytree | |
26 | from yapf_third_party._ylib2to3.pgen2 import token | |
27 | ||
28 | from yapf.pytree import pytree_utils | |
29 | ||
30 | ||
31 | def SpliceComments(tree): | |
32 | """Given a pytree, splice comments into nodes of their own right. | |
33 | ||
34 | Extract comments from the prefixes where they are housed after parsing. | |
35 | The prefixes that previously housed the comments become empty. | |
36 | ||
37 | Args: | |
38 | tree: a pytree.Node - the tree to work on. The tree is modified by this | |
39 | function. | |
40 | """ | |
41 | # The previous leaf node encountered in the traversal. | |
42 | # This is a list because Python 2.x doesn't have 'nonlocal' :) | |
43 | prev_leaf = [None] | |
44 | _AnnotateIndents(tree) | |
45 | ||
46 | def _VisitNodeRec(node): | |
47 | """Recursively visit each node to splice comments into the AST.""" | |
48 | # This loop may insert into node.children, so we'll iterate over a copy. | |
49 | for child in node.children[:]: | |
50 | if isinstance(child, pytree.Node): | |
51 | # Nodes don't have prefixes. | |
52 | _VisitNodeRec(child) | |
53 | else: | |
54 | if child.prefix.lstrip().startswith('#'): | |
55 | # We have a comment prefix in this child, so splicing is needed. | |
56 | comment_prefix = child.prefix | |
57 | comment_lineno = child.lineno - comment_prefix.count('\n') | |
58 | comment_column = child.column | |
59 | ||
60 | # Remember the leading indentation of this prefix and clear it. | |
61 | # Mopping up the prefix is important because we may go over this same | |
62 | # child in the next iteration... | |
63 | child_prefix = child.prefix.lstrip('\n') | |
64 | prefix_indent = child_prefix[:child_prefix.find('#')] | |
65 | if '\n' in prefix_indent: | |
66 | prefix_indent = prefix_indent[prefix_indent.rfind('\n') + 1:] | |
67 | child.prefix = '' | |
68 | ||
69 | if child.type == token.NEWLINE: | |
70 | # If the prefix was on a NEWLINE leaf, it's part of the line so it | |
71 | # will be inserted after the previously encountered leaf. | |
72 | # We can't just insert it before the NEWLINE node, because as a | |
73 | # result of the way pytrees are organized, this node can be under | |
74 | # an inappropriate parent. | |
75 | comment_column -= len(comment_prefix.lstrip()) | |
76 | pytree_utils.InsertNodesAfter( | |
77 | _CreateCommentsFromPrefix( | |
78 | comment_prefix, | |
79 | comment_lineno, | |
80 | comment_column, | |
81 | standalone=False), prev_leaf[0]) | |
82 | elif child.type == token.DEDENT: | |
83 | # Comment prefixes on DEDENT nodes also deserve special treatment, | |
84 | # because their final placement depends on their prefix. | |
85 | # We'll look for an ancestor of this child with a matching | |
86 | # indentation, and insert the comment before it if the ancestor is | |
87 | # on a DEDENT node and after it otherwise. | |
88 | # | |
89 | # lib2to3 places comments that should be separated into the same | |
90 | # DEDENT node. For example, "comment 1" and "comment 2" will be | |
91 | # combined. | |
92 | # | |
93 | # def _(): | |
94 | # for x in y: | |
95 | # pass | |
96 | # # comment 1 | |
97 | # | |
98 | # # comment 2 | |
99 | # pass | |
100 | # | |
101 | # In this case, we need to split them up ourselves. | |
102 | ||
103 | # Split into groups of comments at decreasing levels of indentation | |
104 | comment_groups = [] | |
105 | comment_column = None | |
106 | for cmt in comment_prefix.split('\n'): | |
107 | col = cmt.find('#') | |
108 | if col < 0: | |
109 | if comment_column is None: | |
110 | # Skip empty lines at the top of the first comment group | |
111 | comment_lineno += 1 | |
112 | continue | |
113 | elif comment_column is None or col < comment_column: | |
114 | comment_column = col | |
115 | comment_indent = cmt[:comment_column] | |
116 | comment_groups.append((comment_column, comment_indent, [])) | |
117 | comment_groups[-1][-1].append(cmt) | |
118 | ||
119 | # Insert a node for each group | |
120 | for comment_column, comment_indent, comment_group in comment_groups: | |
121 | ancestor_at_indent = _FindAncestorAtIndent(child, comment_indent) | |
122 | if ancestor_at_indent.type == token.DEDENT: | |
123 | InsertNodes = pytree_utils.InsertNodesBefore # pylint: disable=invalid-name # noqa | |
124 | else: | |
125 | InsertNodes = pytree_utils.InsertNodesAfter # pylint: disable=invalid-name # noqa | |
126 | InsertNodes( | |
127 | _CreateCommentsFromPrefix( | |
128 | '\n'.join(comment_group) + '\n', | |
129 | comment_lineno, | |
130 | comment_column, | |
131 | standalone=True), ancestor_at_indent) | |
132 | comment_lineno += len(comment_group) | |
133 | else: | |
134 | # Otherwise there are two cases. | |
135 | # | |
136 | # 1. The comment is on its own line | |
137 | # 2. The comment is part of an expression. | |
138 | # | |
139 | # Unfortunately, it's fairly difficult to distinguish between the | |
140 | # two in lib2to3 trees. The algorithm here is to determine whether | |
141 | # child is the first leaf in the statement it belongs to. If it is, | |
142 | # then the comment (which is a prefix) belongs on a separate line. | |
143 | # If it is not, it means the comment is buried deep in the statement | |
144 | # and is part of some expression. | |
145 | stmt_parent = _FindStmtParent(child) | |
146 | ||
147 | for leaf_in_parent in stmt_parent.leaves(): | |
148 | if leaf_in_parent.type == token.NEWLINE: | |
149 | continue | |
150 | elif id(leaf_in_parent) == id(child): | |
151 | # This comment stands on its own line, and it has to be inserted | |
152 | # into the appropriate parent. We'll have to find a suitable | |
153 | # parent to insert into. See comments above | |
154 | # _STANDALONE_LINE_NODES for more details. | |
155 | node_with_line_parent = _FindNodeWithStandaloneLineParent(child) | |
156 | ||
157 | if pytree_utils.NodeName( | |
158 | node_with_line_parent.parent) in {'funcdef', 'classdef'}: | |
159 | # Keep a comment that's not attached to a function or class | |
160 | # next to the object it is attached to. | |
161 | comment_end = ( | |
162 | comment_lineno + comment_prefix.rstrip('\n').count('\n')) | |
163 | if comment_end < node_with_line_parent.lineno - 1: | |
164 | node_with_line_parent = node_with_line_parent.parent | |
165 | ||
166 | pytree_utils.InsertNodesBefore( | |
167 | _CreateCommentsFromPrefix( | |
168 | comment_prefix, comment_lineno, 0, standalone=True), | |
169 | node_with_line_parent) | |
170 | break | |
171 | else: | |
172 | if comment_lineno == prev_leaf[0].lineno: | |
173 | comment_lines = comment_prefix.splitlines() | |
174 | value = comment_lines[0].lstrip() | |
175 | if value.rstrip('\n'): | |
176 | comment_column = prev_leaf[0].column | |
177 | comment_column += len(prev_leaf[0].value) | |
178 | comment_column += ( | |
179 | len(comment_lines[0]) - len(comment_lines[0].lstrip())) | |
180 | comment_leaf = pytree.Leaf( | |
181 | type=token.COMMENT, | |
182 | value=value.rstrip('\n'), | |
183 | context=('', (comment_lineno, comment_column))) | |
184 | pytree_utils.InsertNodesAfter([comment_leaf], prev_leaf[0]) | |
185 | comment_prefix = '\n'.join(comment_lines[1:]) | |
186 | comment_lineno += 1 | |
187 | ||
188 | rindex = (0 if '\n' not in comment_prefix.rstrip() else | |
189 | comment_prefix.rstrip().rindex('\n') + 1) | |
190 | comment_column = ( | |
191 | len(comment_prefix[rindex:]) - | |
192 | len(comment_prefix[rindex:].lstrip())) | |
193 | comments = _CreateCommentsFromPrefix( | |
194 | comment_prefix, | |
195 | comment_lineno, | |
196 | comment_column, | |
197 | standalone=False) | |
198 | pytree_utils.InsertNodesBefore(comments, child) | |
199 | break | |
200 | ||
201 | prev_leaf[0] = child | |
202 | ||
203 | _VisitNodeRec(tree) | |
204 | ||
205 | ||
206 | def _CreateCommentsFromPrefix(comment_prefix, | |
207 | comment_lineno, | |
208 | comment_column, | |
209 | standalone=False): | |
210 | """Create pytree nodes to represent the given comment prefix. | |
211 | ||
212 | Args: | |
213 | comment_prefix: (unicode) the text of the comment from the node's prefix. | |
214 | comment_lineno: (int) the line number for the start of the comment. | |
215 | comment_column: (int) the column for the start of the comment. | |
216 | standalone: (bool) determines if the comment is standalone or not. | |
217 | ||
218 | Returns: | |
219 | The simple_stmt nodes if this is a standalone comment, otherwise a list of | |
220 | new COMMENT leafs. The prefix may consist of multiple comment blocks, | |
221 | separated by blank lines. Each block gets its own leaf. | |
222 | """ | |
223 | # The comment is stored in the prefix attribute, with no lineno of its | |
224 | # own. So we only know at which line it ends. To find out at which line it | |
225 | # starts, look at how many newlines the comment itself contains. | |
226 | comments = [] | |
227 | ||
228 | lines = comment_prefix.split('\n') | |
229 | index = 0 | |
230 | while index < len(lines): | |
231 | comment_block = [] | |
232 | while index < len(lines) and lines[index].lstrip().startswith('#'): | |
233 | comment_block.append(lines[index].strip()) | |
234 | index += 1 | |
235 | ||
236 | if comment_block: | |
237 | new_lineno = comment_lineno + index - 1 | |
238 | comment_block[0] = comment_block[0].strip() | |
239 | comment_block[-1] = comment_block[-1].strip() | |
240 | comment_leaf = pytree.Leaf( | |
241 | type=token.COMMENT, | |
242 | value='\n'.join(comment_block), | |
243 | context=('', (new_lineno, comment_column))) | |
244 | comment_node = comment_leaf if not standalone else pytree.Node( | |
245 | pygram.python_symbols.simple_stmt, [comment_leaf]) | |
246 | comments.append(comment_node) | |
247 | ||
248 | while index < len(lines) and not lines[index].lstrip(): | |
249 | index += 1 | |
250 | ||
251 | return comments | |
252 | ||
253 | ||
254 | # "Standalone line nodes" are tree nodes that have to start a new line in Python | |
255 | # code (and cannot follow a ';' or ':'). Other nodes, like 'expr_stmt', serve as | |
256 | # parents of other nodes but can come later in a line. This is a list of | |
257 | # standalone line nodes in the grammar. It is meant to be exhaustive | |
258 | # *eventually*, and we'll modify it with time as we discover more corner cases | |
259 | # in the parse tree. | |
260 | # | |
261 | # When splicing a standalone comment (i.e. a comment that appears on its own | |
262 | # line, not on the same line with other code), it's important to insert it into | |
263 | # an appropriate parent of the node it's attached to. An appropriate parent | |
264 | # is the first "standalone line node" in the parent chain of a node. | |
265 | _STANDALONE_LINE_NODES = frozenset([ | |
266 | 'suite', 'if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt', | |
267 | 'funcdef', 'classdef', 'decorated', 'file_input' | |
268 | ]) | |
269 | ||
270 | ||
271 | def _FindNodeWithStandaloneLineParent(node): | |
272 | """Find a node whose parent is a 'standalone line' node. | |
273 | ||
274 | See the comment above _STANDALONE_LINE_NODES for more details. | |
275 | ||
276 | Arguments: | |
277 | node: node to start from | |
278 | ||
279 | Returns: | |
280 | Suitable node that's either the node itself or one of its ancestors. | |
281 | """ | |
282 | if pytree_utils.NodeName(node.parent) in _STANDALONE_LINE_NODES: | |
283 | return node | |
284 | else: | |
285 | # This is guaranteed to terminate because 'file_input' is the root node of | |
286 | # any pytree. | |
287 | return _FindNodeWithStandaloneLineParent(node.parent) | |
288 | ||
289 | ||
290 | # "Statement nodes" are standalone statements. The don't have to start a new | |
291 | # line. | |
292 | _STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES | |
293 | ||
294 | ||
295 | def _FindStmtParent(node): | |
296 | """Find the nearest parent of node that is a statement node. | |
297 | ||
298 | Arguments: | |
299 | node: node to start from | |
300 | ||
301 | Returns: | |
302 | Nearest parent (or node itself, if suitable). | |
303 | """ | |
304 | if pytree_utils.NodeName(node) in _STATEMENT_NODES: | |
305 | return node | |
306 | else: | |
307 | return _FindStmtParent(node.parent) | |
308 | ||
309 | ||
310 | def _FindAncestorAtIndent(node, indent): | |
311 | """Find an ancestor of node with the given indentation. | |
312 | ||
313 | Arguments: | |
314 | node: node to start from. This must not be the tree root. | |
315 | indent: indentation string for the ancestor we're looking for. | |
316 | See _AnnotateIndents for more details. | |
317 | ||
318 | Returns: | |
319 | An ancestor node with suitable indentation. If no suitable ancestor is | |
320 | found, the closest ancestor to the tree root is returned. | |
321 | """ | |
322 | if node.parent.parent is None: | |
323 | # Our parent is the tree root, so there's nowhere else to go. | |
324 | return node | |
325 | ||
326 | # If the parent has an indent annotation, and it's shorter than node's | |
327 | # indent, this is a suitable ancestor. | |
328 | # The reason for "shorter" rather than "equal" is that comments may be | |
329 | # improperly indented (i.e. by three spaces, where surrounding statements | |
330 | # have either zero or two or four), and we don't want to propagate them all | |
331 | # the way to the root. | |
332 | parent_indent = pytree_utils.GetNodeAnnotation( | |
333 | node.parent, pytree_utils.Annotation.CHILD_INDENT) | |
334 | if parent_indent is not None and indent.startswith(parent_indent): | |
335 | return node | |
336 | else: | |
337 | # Keep looking up the tree. | |
338 | return _FindAncestorAtIndent(node.parent, indent) | |
339 | ||
340 | ||
341 | def _AnnotateIndents(tree): | |
342 | """Annotate the tree with child_indent annotations. | |
343 | ||
344 | A child_indent annotation on a node specifies the indentation (as a string, | |
345 | like " ") of its children. It is inferred from the INDENT child of a node. | |
346 | ||
347 | Arguments: | |
348 | tree: root of a pytree. The pytree is modified to add annotations to nodes. | |
349 | ||
350 | Raises: | |
351 | RuntimeError: if the tree is malformed. | |
352 | """ | |
353 | # Annotate the root of the tree with zero indent. | |
354 | if tree.parent is None: | |
355 | pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT, | |
356 | '') | |
357 | for child in tree.children: | |
358 | if child.type == token.INDENT: | |
359 | child_indent = pytree_utils.GetNodeAnnotation( | |
360 | tree, pytree_utils.Annotation.CHILD_INDENT) | |
361 | if child_indent is not None and child_indent != child.value: | |
362 | raise RuntimeError('inconsistent indentation for child', (tree, child)) | |
363 | pytree_utils.SetNodeAnnotation(tree, pytree_utils.Annotation.CHILD_INDENT, | |
364 | child.value) | |
365 | _AnnotateIndents(child) |