]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | # Copyright 2015 Google Inc. All Rights Reserved. |
2 | # | |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | """pytree-related utilities. | |
15 | ||
16 | This module collects various utilities related to the parse trees produced by | |
17 | the lib2to3 library. | |
18 | ||
19 | NodeName(): produces a string name for pytree nodes. | |
20 | ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse | |
21 | a given string with code to a pytree. | |
22 | InsertNodeBefore(): insert a node before another in a pytree. | |
23 | InsertNodeAfter(): insert a node after another in a pytree. | |
24 | {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. | |
25 | """ | |
26 | ||
27 | import ast | |
28 | import os | |
29 | ||
30 | from yapf_third_party._ylib2to3 import pygram | |
31 | from yapf_third_party._ylib2to3 import pytree | |
32 | from yapf_third_party._ylib2to3.pgen2 import driver | |
33 | from yapf_third_party._ylib2to3.pgen2 import parse | |
34 | from yapf_third_party._ylib2to3.pgen2 import token | |
35 | ||
36 | # TODO(eliben): We may want to get rid of this filtering at some point once we | |
37 | # have a better understanding of what information we need from the tree. Then, | |
38 | # these tokens may be filtered out from the tree before the tree gets to the | |
39 | # unwrapper. | |
40 | NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) | |
41 | ||
42 | ||
43 | class Annotation(object): | |
44 | """Annotation names associated with pytrees.""" | |
45 | CHILD_INDENT = 'child_indent' | |
46 | NEWLINES = 'newlines' | |
47 | MUST_SPLIT = 'must_split' | |
48 | SPLIT_PENALTY = 'split_penalty' | |
49 | SUBTYPE = 'subtype' | |
50 | ||
51 | ||
52 | def NodeName(node): | |
53 | """Produce a string name for a given node. | |
54 | ||
55 | For a Leaf this is the token name, and for a Node this is the type. | |
56 | ||
57 | Arguments: | |
58 | node: a tree node | |
59 | ||
60 | Returns: | |
61 | Name as a string. | |
62 | """ | |
63 | # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. | |
64 | if node.type < 256: | |
65 | return token.tok_name[node.type] | |
66 | else: | |
67 | return pygram.python_grammar.number2symbol[node.type] | |
68 | ||
69 | ||
70 | def FirstLeafNode(node): | |
71 | if isinstance(node, pytree.Leaf): | |
72 | return node | |
73 | return FirstLeafNode(node.children[0]) | |
74 | ||
75 | ||
76 | def LastLeafNode(node): | |
77 | if isinstance(node, pytree.Leaf): | |
78 | return node | |
79 | return LastLeafNode(node.children[-1]) | |
80 | ||
81 | ||
82 | # lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for | |
83 | # parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a | |
84 | # context where a keyword is disallowed). | |
85 | # It forgets to do the same for 'exec' though. Luckily, Python is amenable to | |
86 | # monkey-patching. | |
87 | # Note that pygram.python_grammar_no_print_and_exec_statement with "_and_exec" | |
88 | # will require Python >=3.8. | |
89 | _PYTHON_GRAMMAR = pygram.python_grammar_no_print_statement.copy() | |
90 | del _PYTHON_GRAMMAR.keywords['exec'] | |
91 | ||
92 | ||
93 | def ParseCodeToTree(code): | |
94 | """Parse the given code to a lib2to3 pytree. | |
95 | ||
96 | Arguments: | |
97 | code: a string with the code to parse. | |
98 | ||
99 | Raises: | |
100 | SyntaxError if the code is invalid syntax. | |
101 | parse.ParseError if some other parsing failure. | |
102 | ||
103 | Returns: | |
104 | The root node of the parsed tree. | |
105 | """ | |
106 | # This function is tiny, but the incantation for invoking the parser correctly | |
107 | # is sufficiently magical to be worth abstracting away. | |
108 | if not code.endswith(os.linesep): | |
109 | code += os.linesep | |
110 | ||
111 | try: | |
112 | parser_driver = driver.Driver(_PYTHON_GRAMMAR, convert=pytree.convert) | |
113 | tree = parser_driver.parse_string(code, debug=False) | |
114 | except parse.ParseError: | |
115 | # Raise a syntax error if the code is invalid python syntax. | |
116 | ast.parse(code) | |
117 | raise | |
118 | return _WrapEndMarker(tree) | |
119 | ||
120 | ||
121 | def _WrapEndMarker(tree): | |
122 | """Wrap a single ENDMARKER token in a "file_input" node. | |
123 | ||
124 | Arguments: | |
125 | tree: (pytree.Node) The root node of the parsed tree. | |
126 | ||
127 | Returns: | |
128 | The root node of the parsed tree. If the tree is a single ENDMARKER node, | |
129 | then that node is wrapped in a "file_input" node. That will ensure we don't | |
130 | skip comments attached to that node. | |
131 | """ | |
132 | if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: | |
133 | return pytree.Node(pygram.python_symbols.file_input, [tree]) | |
134 | return tree | |
135 | ||
136 | ||
137 | def InsertNodesBefore(new_nodes, target): | |
138 | """Insert new_nodes before the given target location in the tree. | |
139 | ||
140 | Arguments: | |
141 | new_nodes: a sequence of new nodes to insert (the nodes should not be in the | |
142 | tree). | |
143 | target: the target node before which the new node node will be inserted. | |
144 | ||
145 | Raises: | |
146 | RuntimeError: if the tree is corrupted, or the insertion would corrupt it. | |
147 | """ | |
148 | for node in new_nodes: | |
149 | _InsertNodeAt(node, target, after=False) | |
150 | ||
151 | ||
152 | def InsertNodesAfter(new_nodes, target): | |
153 | """Insert new_nodes after the given target location in the tree. | |
154 | ||
155 | Arguments: | |
156 | new_nodes: a sequence of new nodes to insert (the nodes should not be in the | |
157 | tree). | |
158 | target: the target node after which the new node node will be inserted. | |
159 | ||
160 | Raises: | |
161 | RuntimeError: if the tree is corrupted, or the insertion would corrupt it. | |
162 | """ | |
163 | for node in reversed(new_nodes): | |
164 | _InsertNodeAt(node, target, after=True) | |
165 | ||
166 | ||
167 | def _InsertNodeAt(new_node, target, after=False): | |
168 | """Underlying implementation for node insertion. | |
169 | ||
170 | Arguments: | |
171 | new_node: a new node to insert (this node should not be in the tree). | |
172 | target: the target node. | |
173 | after: if True, new_node is inserted after target. Otherwise, it's inserted | |
174 | before target. | |
175 | ||
176 | Returns: | |
177 | nothing | |
178 | ||
179 | Raises: | |
180 | RuntimeError: if the tree is corrupted, or the insertion would corrupt it. | |
181 | """ | |
182 | ||
183 | # Protect against attempts to insert nodes which already belong to some tree. | |
184 | if new_node.parent is not None: | |
185 | raise RuntimeError('inserting node which already has a parent', | |
186 | (new_node, new_node.parent)) | |
187 | ||
188 | # The code here is based on pytree.Base.next_sibling | |
189 | parent_of_target = target.parent | |
190 | if parent_of_target is None: | |
191 | raise RuntimeError('expected target node to have a parent', (target,)) | |
192 | ||
193 | for i, child in enumerate(parent_of_target.children): | |
194 | if child is target: | |
195 | insertion_index = i + 1 if after else i | |
196 | parent_of_target.insert_child(insertion_index, new_node) | |
197 | return | |
198 | ||
199 | raise RuntimeError('unable to find insertion point for target node', | |
200 | (target,)) | |
201 | ||
202 | ||
203 | # The following constant and functions implement a simple custom annotation | |
204 | # mechanism for pytree nodes. We attach new attributes to nodes. Each attribute | |
205 | # is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be | |
206 | # managed through GetNodeAnnotation and SetNodeAnnotation. | |
207 | _NODE_ANNOTATION_PREFIX = '_yapf_annotation_' | |
208 | ||
209 | ||
210 | def CopyYapfAnnotations(src, dst): | |
211 | """Copy all YAPF annotations from the source node to the destination node. | |
212 | ||
213 | Arguments: | |
214 | src: the source node. | |
215 | dst: the destination node. | |
216 | """ | |
217 | for annotation in dir(src): | |
218 | if annotation.startswith(_NODE_ANNOTATION_PREFIX): | |
219 | setattr(dst, annotation, getattr(src, annotation, None)) | |
220 | ||
221 | ||
222 | def GetNodeAnnotation(node, annotation, default=None): | |
223 | """Get annotation value from a node. | |
224 | ||
225 | Arguments: | |
226 | node: the node. | |
227 | annotation: annotation name - a string. | |
228 | default: the default value to return if there's no annotation. | |
229 | ||
230 | Returns: | |
231 | Value of the annotation in the given node. If the node doesn't have this | |
232 | particular annotation name yet, returns default. | |
233 | """ | |
234 | return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) | |
235 | ||
236 | ||
237 | def SetNodeAnnotation(node, annotation, value): | |
238 | """Set annotation value on a node. | |
239 | ||
240 | Arguments: | |
241 | node: the node. | |
242 | annotation: annotation name - a string. | |
243 | value: annotation value to set. | |
244 | """ | |
245 | setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) | |
246 | ||
247 | ||
248 | def AppendNodeAnnotation(node, annotation, value): | |
249 | """Appends an annotation value to a list of annotations on the node. | |
250 | ||
251 | Arguments: | |
252 | node: the node. | |
253 | annotation: annotation name - a string. | |
254 | value: annotation value to set. | |
255 | """ | |
256 | attr = GetNodeAnnotation(node, annotation, set()) | |
257 | attr.add(value) | |
258 | SetNodeAnnotation(node, annotation, attr) | |
259 | ||
260 | ||
261 | def RemoveSubtypeAnnotation(node, value): | |
262 | """Removes an annotation value from the subtype annotations on the node. | |
263 | ||
264 | Arguments: | |
265 | node: the node. | |
266 | value: annotation value to remove. | |
267 | """ | |
268 | attr = GetNodeAnnotation(node, Annotation.SUBTYPE) | |
269 | if attr and value in attr: | |
270 | attr.remove(value) | |
271 | SetNodeAnnotation(node, Annotation.SUBTYPE, attr) | |
272 | ||
273 | ||
274 | def GetOpeningBracket(node): | |
275 | """Get opening bracket value from a node. | |
276 | ||
277 | Arguments: | |
278 | node: the node. | |
279 | ||
280 | Returns: | |
281 | The opening bracket node or None if it couldn't find one. | |
282 | """ | |
283 | return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None) | |
284 | ||
285 | ||
286 | def SetOpeningBracket(node, bracket): | |
287 | """Set opening bracket value for a node. | |
288 | ||
289 | Arguments: | |
290 | node: the node. | |
291 | bracket: opening bracket to set. | |
292 | """ | |
293 | setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket) | |
294 | ||
295 | ||
296 | def DumpNodeToString(node): | |
297 | """Dump a string representation of the given node. For debugging. | |
298 | ||
299 | Arguments: | |
300 | node: the node. | |
301 | ||
302 | Returns: | |
303 | The string representation. | |
304 | """ | |
305 | if isinstance(node, pytree.Leaf): | |
306 | fmt = ('{name}({value}) [lineno={lineno}, column={column}, ' | |
307 | 'prefix={prefix}, penalty={penalty}]') | |
308 | return fmt.format( | |
309 | name=NodeName(node), | |
310 | value=_PytreeNodeRepr(node), | |
311 | lineno=node.lineno, | |
312 | column=node.column, | |
313 | prefix=repr(node.prefix), | |
314 | penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None)) | |
315 | else: | |
316 | fmt = '{node} [{len} children] [child_indent="{indent}"]' | |
317 | return fmt.format( | |
318 | node=NodeName(node), | |
319 | len=len(node.children), | |
320 | indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) | |
321 | ||
322 | ||
323 | def _PytreeNodeRepr(node): | |
324 | """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" | |
325 | if isinstance(node, pytree.Node): | |
326 | return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), | |
327 | [_PytreeNodeRepr(c) for c in node.children]) | |
328 | if isinstance(node, pytree.Leaf): | |
329 | return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) | |
330 | ||
331 | ||
332 | def IsCommentStatement(node): | |
333 | return (NodeName(node) == 'simple_stmt' and | |
334 | node.children[0].type == token.COMMENT) |