1 # Copyright 2015 Google Inc. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 """pytree-related utilities.
16 This module collects various utilities related to the parse trees produced by
19 NodeName(): produces a string name for pytree nodes.
20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21 a given string with code to a pytree.
22 InsertNodeBefore(): insert a node before another in a pytree.
23 InsertNodeAfter(): insert a node after another in a pytree.
24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
30 from yapf_third_party
._ylib
2to
3 import pygram
31 from yapf_third_party
._ylib
2to
3 import pytree
32 from yapf_third_party
._ylib
2to
3.pgen2
import driver
33 from yapf_third_party
._ylib
2to
3.pgen2
import parse
34 from yapf_third_party
._ylib
2to
3.pgen2
import token
36 # TODO(eliben): We may want to get rid of this filtering at some point once we
37 # have a better understanding of what information we need from the tree. Then,
38 # these tokens may be filtered out from the tree before the tree gets to the
40 NONSEMANTIC_TOKENS
= frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
43 class Annotation(object):
44 """Annotation names associated with pytrees."""
45 CHILD_INDENT
= 'child_indent'
47 MUST_SPLIT
= 'must_split'
48 SPLIT_PENALTY
= 'split_penalty'
53 """Produce a string name for a given node.
55 For a Leaf this is the token name, and for a Node this is the type.
63 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
65 return token
.tok_name
[node
.type]
67 return pygram
.python_grammar
.number2symbol
[node
.type]
70 def FirstLeafNode(node
):
71 if isinstance(node
, pytree
.Leaf
):
73 return FirstLeafNode(node
.children
[0])
76 def LastLeafNode(node
):
77 if isinstance(node
, pytree
.Leaf
):
79 return LastLeafNode(node
.children
[-1])
82 # lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
83 # parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
84 # context where a keyword is disallowed).
85 # It forgets to do the same for 'exec' though. Luckily, Python is amenable to
87 # Note that pygram.python_grammar_no_print_and_exec_statement with "_and_exec"
88 # will require Python >=3.8.
89 _PYTHON_GRAMMAR
= pygram
.python_grammar_no_print_statement
.copy()
90 del _PYTHON_GRAMMAR
.keywords
['exec']
93 def ParseCodeToTree(code
):
94 """Parse the given code to a lib2to3 pytree.
97 code: a string with the code to parse.
100 SyntaxError if the code is invalid syntax.
101 parse.ParseError if some other parsing failure.
104 The root node of the parsed tree.
106 # This function is tiny, but the incantation for invoking the parser correctly
107 # is sufficiently magical to be worth abstracting away.
108 if not code
.endswith(os
.linesep
):
112 parser_driver
= driver
.Driver(_PYTHON_GRAMMAR
, convert
=pytree
.convert
)
113 tree
= parser_driver
.parse_string(code
, debug
=False)
114 except parse
.ParseError
:
115 # Raise a syntax error if the code is invalid python syntax.
118 return _WrapEndMarker(tree
)
121 def _WrapEndMarker(tree
):
122 """Wrap a single ENDMARKER token in a "file_input" node.
125 tree: (pytree.Node) The root node of the parsed tree.
128 The root node of the parsed tree. If the tree is a single ENDMARKER node,
129 then that node is wrapped in a "file_input" node. That will ensure we don't
130 skip comments attached to that node.
132 if isinstance(tree
, pytree
.Leaf
) and tree
.type == token
.ENDMARKER
:
133 return pytree
.Node(pygram
.python_symbols
.file_input
, [tree
])
137 def InsertNodesBefore(new_nodes
, target
):
138 """Insert new_nodes before the given target location in the tree.
141 new_nodes: a sequence of new nodes to insert (the nodes should not be in the
143 target: the target node before which the new node node will be inserted.
146 RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
148 for node
in new_nodes
:
149 _InsertNodeAt(node
, target
, after
=False)
152 def InsertNodesAfter(new_nodes
, target
):
153 """Insert new_nodes after the given target location in the tree.
156 new_nodes: a sequence of new nodes to insert (the nodes should not be in the
158 target: the target node after which the new node node will be inserted.
161 RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
163 for node
in reversed(new_nodes
):
164 _InsertNodeAt(node
, target
, after
=True)
167 def _InsertNodeAt(new_node
, target
, after
=False):
168 """Underlying implementation for node insertion.
171 new_node: a new node to insert (this node should not be in the tree).
172 target: the target node.
173 after: if True, new_node is inserted after target. Otherwise, it's inserted
180 RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
183 # Protect against attempts to insert nodes which already belong to some tree.
184 if new_node
.parent
is not None:
185 raise RuntimeError('inserting node which already has a parent',
186 (new_node
, new_node
.parent
))
188 # The code here is based on pytree.Base.next_sibling
189 parent_of_target
= target
.parent
190 if parent_of_target
is None:
191 raise RuntimeError('expected target node to have a parent', (target
,))
193 for i
, child
in enumerate(parent_of_target
.children
):
195 insertion_index
= i
+ 1 if after
else i
196 parent_of_target
.insert_child(insertion_index
, new_node
)
199 raise RuntimeError('unable to find insertion point for target node',
203 # The following constant and functions implement a simple custom annotation
204 # mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
205 # is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
206 # managed through GetNodeAnnotation and SetNodeAnnotation.
207 _NODE_ANNOTATION_PREFIX
= '_yapf_annotation_'
210 def CopyYapfAnnotations(src
, dst
):
211 """Copy all YAPF annotations from the source node to the destination node.
214 src: the source node.
215 dst: the destination node.
217 for annotation
in dir(src
):
218 if annotation
.startswith(_NODE_ANNOTATION_PREFIX
):
219 setattr(dst
, annotation
, getattr(src
, annotation
, None))
222 def GetNodeAnnotation(node
, annotation
, default
=None):
223 """Get annotation value from a node.
227 annotation: annotation name - a string.
228 default: the default value to return if there's no annotation.
231 Value of the annotation in the given node. If the node doesn't have this
232 particular annotation name yet, returns default.
234 return getattr(node
, _NODE_ANNOTATION_PREFIX
+ annotation
, default
)
237 def SetNodeAnnotation(node
, annotation
, value
):
238 """Set annotation value on a node.
242 annotation: annotation name - a string.
243 value: annotation value to set.
245 setattr(node
, _NODE_ANNOTATION_PREFIX
+ annotation
, value
)
248 def AppendNodeAnnotation(node
, annotation
, value
):
249 """Appends an annotation value to a list of annotations on the node.
253 annotation: annotation name - a string.
254 value: annotation value to set.
256 attr
= GetNodeAnnotation(node
, annotation
, set())
258 SetNodeAnnotation(node
, annotation
, attr
)
261 def RemoveSubtypeAnnotation(node
, value
):
262 """Removes an annotation value from the subtype annotations on the node.
266 value: annotation value to remove.
268 attr
= GetNodeAnnotation(node
, Annotation
.SUBTYPE
)
269 if attr
and value
in attr
:
271 SetNodeAnnotation(node
, Annotation
.SUBTYPE
, attr
)
274 def GetOpeningBracket(node
):
275 """Get opening bracket value from a node.
281 The opening bracket node or None if it couldn't find one.
283 return getattr(node
, _NODE_ANNOTATION_PREFIX
+ 'container_bracket', None)
286 def SetOpeningBracket(node
, bracket
):
287 """Set opening bracket value for a node.
291 bracket: opening bracket to set.
293 setattr(node
, _NODE_ANNOTATION_PREFIX
+ 'container_bracket', bracket
)
296 def DumpNodeToString(node
):
297 """Dump a string representation of the given node. For debugging.
303 The string representation.
305 if isinstance(node
, pytree
.Leaf
):
306 fmt
= ('{name}({value}) [lineno={lineno}, column={column}, '
307 'prefix={prefix}, penalty={penalty}]')
310 value
=_PytreeNodeRepr(node
),
313 prefix
=repr(node
.prefix
),
314 penalty
=GetNodeAnnotation(node
, Annotation
.SPLIT_PENALTY
, None))
316 fmt
= '{node} [{len} children] [child_indent="{indent}"]'
319 len=len(node
.children
),
320 indent
=GetNodeAnnotation(node
, Annotation
.CHILD_INDENT
))
323 def _PytreeNodeRepr(node
):
324 """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
325 if isinstance(node
, pytree
.Node
):
326 return '%s(%s, %r)' % (node
.__class
__.__name
__, NodeName(node
),
327 [_PytreeNodeRepr(c
) for c
in node
.children
])
328 if isinstance(node
, pytree
.Leaf
):
329 return '%s(%s, %r)' % (node
.__class
__.__name
__, NodeName(node
), node
.value
)
332 def IsCommentStatement(node
):
333 return (NodeName(node
) == 'simple_stmt' and
334 node
.children
[0].type == token
.COMMENT
)