]>
Commit | Line | Data |
---|---|---|
1 | from abc import abstractmethod, abstractproperty | |
2 | from typing import List, Optional, Tuple, Union | |
3 | ||
4 | from parso.utils import split_lines | |
5 | ||
6 | ||
7 | def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]': | |
8 | """ | |
9 | Recursively looks at the parents of a node and returns the first found node | |
10 | that matches ``node_types``. Returns ``None`` if no matching node is found. | |
11 | ||
12 | This function is deprecated, use :meth:`NodeOrLeaf.search_ancestor` instead. | |
13 | ||
14 | :param node: The ancestors of this node will be checked. | |
15 | :param node_types: type names that are searched for. | |
16 | """ | |
17 | n = node.parent | |
18 | while n is not None: | |
19 | if n.type in node_types: | |
20 | return n | |
21 | n = n.parent | |
22 | return None | |
23 | ||
24 | ||
25 | class NodeOrLeaf: | |
26 | """ | |
27 | The base class for nodes and leaves. | |
28 | """ | |
29 | __slots__ = ('parent',) | |
30 | type: str | |
31 | ''' | |
32 | The type is a string that typically matches the types of the grammar file. | |
33 | ''' | |
34 | parent: 'Optional[BaseNode]' | |
35 | ''' | |
36 | The parent :class:`BaseNode` of this node or leaf. | |
37 | None if this is the root node. | |
38 | ''' | |
39 | ||
40 | def get_root_node(self): | |
41 | """ | |
42 | Returns the root node of a parser tree. The returned node doesn't have | |
43 | a parent node like all the other nodes/leaves. | |
44 | """ | |
45 | scope = self | |
46 | while scope.parent is not None: | |
47 | scope = scope.parent | |
48 | return scope | |
49 | ||
50 | def get_next_sibling(self): | |
51 | """ | |
52 | Returns the node immediately following this node in this parent's | |
53 | children list. If this node does not have a next sibling, it is None | |
54 | """ | |
55 | parent = self.parent | |
56 | if parent is None: | |
57 | return None | |
58 | ||
59 | # Can't use index(); we need to test by identity | |
60 | for i, child in enumerate(parent.children): | |
61 | if child is self: | |
62 | try: | |
63 | return self.parent.children[i + 1] | |
64 | except IndexError: | |
65 | return None | |
66 | ||
67 | def get_previous_sibling(self): | |
68 | """ | |
69 | Returns the node immediately preceding this node in this parent's | |
70 | children list. If this node does not have a previous sibling, it is | |
71 | None. | |
72 | """ | |
73 | parent = self.parent | |
74 | if parent is None: | |
75 | return None | |
76 | ||
77 | # Can't use index(); we need to test by identity | |
78 | for i, child in enumerate(parent.children): | |
79 | if child is self: | |
80 | if i == 0: | |
81 | return None | |
82 | return self.parent.children[i - 1] | |
83 | ||
84 | def get_previous_leaf(self): | |
85 | """ | |
86 | Returns the previous leaf in the parser tree. | |
87 | Returns `None` if this is the first element in the parser tree. | |
88 | """ | |
89 | if self.parent is None: | |
90 | return None | |
91 | ||
92 | node = self | |
93 | while True: | |
94 | c = node.parent.children | |
95 | i = c.index(node) | |
96 | if i == 0: | |
97 | node = node.parent | |
98 | if node.parent is None: | |
99 | return None | |
100 | else: | |
101 | node = c[i - 1] | |
102 | break | |
103 | ||
104 | while True: | |
105 | try: | |
106 | node = node.children[-1] | |
107 | except AttributeError: # A Leaf doesn't have children. | |
108 | return node | |
109 | ||
110 | def get_next_leaf(self): | |
111 | """ | |
112 | Returns the next leaf in the parser tree. | |
113 | Returns None if this is the last element in the parser tree. | |
114 | """ | |
115 | if self.parent is None: | |
116 | return None | |
117 | ||
118 | node = self | |
119 | while True: | |
120 | c = node.parent.children | |
121 | i = c.index(node) | |
122 | if i == len(c) - 1: | |
123 | node = node.parent | |
124 | if node.parent is None: | |
125 | return None | |
126 | else: | |
127 | node = c[i + 1] | |
128 | break | |
129 | ||
130 | while True: | |
131 | try: | |
132 | node = node.children[0] | |
133 | except AttributeError: # A Leaf doesn't have children. | |
134 | return node | |
135 | ||
136 | @abstractproperty | |
137 | def start_pos(self) -> Tuple[int, int]: | |
138 | """ | |
139 | Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. | |
140 | ||
141 | :return tuple of int: (line, column) | |
142 | """ | |
143 | ||
144 | @abstractproperty | |
145 | def end_pos(self) -> Tuple[int, int]: | |
146 | """ | |
147 | Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. | |
148 | ||
149 | :return tuple of int: (line, column) | |
150 | """ | |
151 | ||
152 | @abstractmethod | |
153 | def get_start_pos_of_prefix(self): | |
154 | """ | |
155 | Returns the start_pos of the prefix. This means basically it returns | |
156 | the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the | |
157 | prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is | |
158 | `(1, 2)`. | |
159 | ||
160 | :return tuple of int: (line, column) | |
161 | """ | |
162 | ||
163 | @abstractmethod | |
164 | def get_first_leaf(self): | |
165 | """ | |
166 | Returns the first leaf of a node or itself if this is a leaf. | |
167 | """ | |
168 | ||
169 | @abstractmethod | |
170 | def get_last_leaf(self): | |
171 | """ | |
172 | Returns the last leaf of a node or itself if this is a leaf. | |
173 | """ | |
174 | ||
175 | @abstractmethod | |
176 | def get_code(self, include_prefix=True): | |
177 | """ | |
178 | Returns the code that was the input for the parser for this node. | |
179 | ||
180 | :param include_prefix: Removes the prefix (whitespace and comments) of | |
181 | e.g. a statement. | |
182 | """ | |
183 | ||
184 | def search_ancestor(self, *node_types: str) -> 'Optional[BaseNode]': | |
185 | """ | |
186 | Recursively looks at the parents of this node or leaf and returns the | |
187 | first found node that matches ``node_types``. Returns ``None`` if no | |
188 | matching node is found. | |
189 | ||
190 | :param node_types: type names that are searched for. | |
191 | """ | |
192 | node = self.parent | |
193 | while node is not None: | |
194 | if node.type in node_types: | |
195 | return node | |
196 | node = node.parent | |
197 | return None | |
198 | ||
199 | def dump(self, *, indent: Optional[Union[int, str]] = 4) -> str: | |
200 | """ | |
201 | Returns a formatted dump of the parser tree rooted at this node or leaf. This is | |
202 | mainly useful for debugging purposes. | |
203 | ||
204 | The ``indent`` parameter is interpreted in a similar way as :py:func:`ast.dump`. | |
205 | If ``indent`` is a non-negative integer or string, then the tree will be | |
206 | pretty-printed with that indent level. An indent level of 0, negative, or ``""`` | |
207 | will only insert newlines. ``None`` selects the single line representation. | |
208 | Using a positive integer indent indents that many spaces per level. If | |
209 | ``indent`` is a string (such as ``"\\t"``), that string is used to indent each | |
210 | level. | |
211 | ||
212 | :param indent: Indentation style as described above. The default indentation is | |
213 | 4 spaces, which yields a pretty-printed dump. | |
214 | ||
215 | >>> import parso | |
216 | >>> print(parso.parse("lambda x, y: x + y").dump()) | |
217 | Module([ | |
218 | Lambda([ | |
219 | Keyword('lambda', (1, 0)), | |
220 | Param([ | |
221 | Name('x', (1, 7), prefix=' '), | |
222 | Operator(',', (1, 8)), | |
223 | ]), | |
224 | Param([ | |
225 | Name('y', (1, 10), prefix=' '), | |
226 | ]), | |
227 | Operator(':', (1, 11)), | |
228 | PythonNode('arith_expr', [ | |
229 | Name('x', (1, 13), prefix=' '), | |
230 | Operator('+', (1, 15), prefix=' '), | |
231 | Name('y', (1, 17), prefix=' '), | |
232 | ]), | |
233 | ]), | |
234 | EndMarker('', (1, 18)), | |
235 | ]) | |
236 | """ | |
237 | if indent is None: | |
238 | newline = False | |
239 | indent_string = '' | |
240 | elif isinstance(indent, int): | |
241 | newline = True | |
242 | indent_string = ' ' * indent | |
243 | elif isinstance(indent, str): | |
244 | newline = True | |
245 | indent_string = indent | |
246 | else: | |
247 | raise TypeError(f"expect 'indent' to be int, str or None, got {indent!r}") | |
248 | ||
249 | def _format_dump(node: NodeOrLeaf, indent: str = '', top_level: bool = True) -> str: | |
250 | result = '' | |
251 | node_type = type(node).__name__ | |
252 | if isinstance(node, Leaf): | |
253 | result += f'{indent}{node_type}(' | |
254 | if isinstance(node, ErrorLeaf): | |
255 | result += f'{node.token_type!r}, ' | |
256 | elif isinstance(node, TypedLeaf): | |
257 | result += f'{node.type!r}, ' | |
258 | result += f'{node.value!r}, {node.start_pos!r}' | |
259 | if node.prefix: | |
260 | result += f', prefix={node.prefix!r}' | |
261 | result += ')' | |
262 | elif isinstance(node, BaseNode): | |
263 | result += f'{indent}{node_type}(' | |
264 | if isinstance(node, Node): | |
265 | result += f'{node.type!r}, ' | |
266 | result += '[' | |
267 | if newline: | |
268 | result += '\n' | |
269 | for child in node.children: | |
270 | result += _format_dump(child, indent=indent + indent_string, top_level=False) | |
271 | result += f'{indent}])' | |
272 | else: # pragma: no cover | |
273 | # We shouldn't ever reach here, unless: | |
274 | # - `NodeOrLeaf` is incorrectly subclassed else where | |
275 | # - or a node's children list contains invalid nodes or leafs | |
276 | # Both are unexpected internal errors. | |
277 | raise TypeError(f'unsupported node encountered: {node!r}') | |
278 | if not top_level: | |
279 | if newline: | |
280 | result += ',\n' | |
281 | else: | |
282 | result += ', ' | |
283 | return result | |
284 | ||
285 | return _format_dump(self) | |
286 | ||
287 | ||
288 | class Leaf(NodeOrLeaf): | |
289 | ''' | |
290 | Leafs are basically tokens with a better API. Leafs exactly know where they | |
291 | were defined and what text preceeds them. | |
292 | ''' | |
293 | __slots__ = ('value', 'line', 'column', 'prefix') | |
294 | prefix: str | |
295 | ||
296 | def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None: | |
297 | self.value = value | |
298 | ''' | |
299 | :py:func:`str` The value of the current token. | |
300 | ''' | |
301 | self.start_pos = start_pos | |
302 | self.prefix = prefix | |
303 | ''' | |
304 | :py:func:`str` Typically a mixture of whitespace and comments. Stuff | |
305 | that is syntactically irrelevant for the syntax tree. | |
306 | ''' | |
307 | self.parent: Optional[BaseNode] = None | |
308 | ''' | |
309 | The parent :class:`BaseNode` of this leaf. | |
310 | ''' | |
311 | ||
312 | @property | |
313 | def start_pos(self) -> Tuple[int, int]: | |
314 | return self.line, self.column | |
315 | ||
316 | @start_pos.setter | |
317 | def start_pos(self, value: Tuple[int, int]) -> None: | |
318 | self.line = value[0] | |
319 | self.column = value[1] | |
320 | ||
321 | def get_start_pos_of_prefix(self): | |
322 | previous_leaf = self.get_previous_leaf() | |
323 | if previous_leaf is None: | |
324 | lines = split_lines(self.prefix) | |
325 | # + 1 is needed because split_lines always returns at least ['']. | |
326 | return self.line - len(lines) + 1, 0 # It's the first leaf. | |
327 | return previous_leaf.end_pos | |
328 | ||
329 | def get_first_leaf(self): | |
330 | return self | |
331 | ||
332 | def get_last_leaf(self): | |
333 | return self | |
334 | ||
335 | def get_code(self, include_prefix=True): | |
336 | if include_prefix: | |
337 | return self.prefix + self.value | |
338 | else: | |
339 | return self.value | |
340 | ||
341 | @property | |
342 | def end_pos(self) -> Tuple[int, int]: | |
343 | lines = split_lines(self.value) | |
344 | end_pos_line = self.line + len(lines) - 1 | |
345 | # Check for multiline token | |
346 | if self.line == end_pos_line: | |
347 | end_pos_column = self.column + len(lines[-1]) | |
348 | else: | |
349 | end_pos_column = len(lines[-1]) | |
350 | return end_pos_line, end_pos_column | |
351 | ||
352 | def __repr__(self): | |
353 | value = self.value | |
354 | if not value: | |
355 | value = self.type | |
356 | return "<%s: %s>" % (type(self).__name__, value) | |
357 | ||
358 | ||
359 | class TypedLeaf(Leaf): | |
360 | __slots__ = ('type',) | |
361 | ||
362 | def __init__(self, type, value, start_pos, prefix=''): | |
363 | super().__init__(value, start_pos, prefix) | |
364 | self.type = type | |
365 | ||
366 | ||
367 | class BaseNode(NodeOrLeaf): | |
368 | """ | |
369 | The super class for all nodes. | |
370 | A node has children, a type and possibly a parent node. | |
371 | """ | |
372 | __slots__ = ('children',) | |
373 | ||
374 | def __init__(self, children: List[NodeOrLeaf]) -> None: | |
375 | self.children = children | |
376 | """ | |
377 | A list of :class:`NodeOrLeaf` child nodes. | |
378 | """ | |
379 | self.parent: Optional[BaseNode] = None | |
380 | ''' | |
381 | The parent :class:`BaseNode` of this node. | |
382 | None if this is the root node. | |
383 | ''' | |
384 | for child in children: | |
385 | child.parent = self | |
386 | ||
387 | @property | |
388 | def start_pos(self) -> Tuple[int, int]: | |
389 | return self.children[0].start_pos | |
390 | ||
391 | def get_start_pos_of_prefix(self): | |
392 | return self.children[0].get_start_pos_of_prefix() | |
393 | ||
394 | @property | |
395 | def end_pos(self) -> Tuple[int, int]: | |
396 | return self.children[-1].end_pos | |
397 | ||
398 | def _get_code_for_children(self, children, include_prefix): | |
399 | if include_prefix: | |
400 | return "".join(c.get_code() for c in children) | |
401 | else: | |
402 | first = children[0].get_code(include_prefix=False) | |
403 | return first + "".join(c.get_code() for c in children[1:]) | |
404 | ||
405 | def get_code(self, include_prefix=True): | |
406 | return self._get_code_for_children(self.children, include_prefix) | |
407 | ||
408 | def get_leaf_for_position(self, position, include_prefixes=False): | |
409 | """ | |
410 | Get the :py:class:`parso.tree.Leaf` at ``position`` | |
411 | ||
412 | :param tuple position: A position tuple, row, column. Rows start from 1 | |
413 | :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls | |
414 | on whitespace or comments before a leaf | |
415 | :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None`` | |
416 | """ | |
417 | def binary_search(lower, upper): | |
418 | if lower == upper: | |
419 | element = self.children[lower] | |
420 | if not include_prefixes and position < element.start_pos: | |
421 | # We're on a prefix. | |
422 | return None | |
423 | # In case we have prefixes, a leaf always matches | |
424 | try: | |
425 | return element.get_leaf_for_position(position, include_prefixes) | |
426 | except AttributeError: | |
427 | return element | |
428 | ||
429 | index = int((lower + upper) / 2) | |
430 | element = self.children[index] | |
431 | if position <= element.end_pos: | |
432 | return binary_search(lower, index) | |
433 | else: | |
434 | return binary_search(index + 1, upper) | |
435 | ||
436 | if not ((1, 0) <= position <= self.children[-1].end_pos): | |
437 | raise ValueError('Please provide a position that exists within this node.') | |
438 | return binary_search(0, len(self.children) - 1) | |
439 | ||
440 | def get_first_leaf(self): | |
441 | return self.children[0].get_first_leaf() | |
442 | ||
443 | def get_last_leaf(self): | |
444 | return self.children[-1].get_last_leaf() | |
445 | ||
446 | def __repr__(self): | |
447 | code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip() | |
448 | return "<%s: %s@%s,%s>" % \ | |
449 | (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) | |
450 | ||
451 | ||
452 | class Node(BaseNode): | |
453 | """Concrete implementation for interior nodes.""" | |
454 | __slots__ = ('type',) | |
455 | ||
456 | def __init__(self, type, children): | |
457 | super().__init__(children) | |
458 | self.type = type | |
459 | ||
460 | def __repr__(self): | |
461 | return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) | |
462 | ||
463 | ||
464 | class ErrorNode(BaseNode): | |
465 | """ | |
466 | A node that contains valid nodes/leaves that we're follow by a token that | |
467 | was invalid. This basically means that the leaf after this node is where | |
468 | Python would mark a syntax error. | |
469 | """ | |
470 | __slots__ = () | |
471 | type = 'error_node' | |
472 | ||
473 | ||
474 | class ErrorLeaf(Leaf): | |
475 | """ | |
476 | A leaf that is either completely invalid in a language (like `$` in Python) | |
477 | or is invalid at that position. Like the star in `1 +* 1`. | |
478 | """ | |
479 | __slots__ = ('token_type',) | |
480 | type = 'error_leaf' | |
481 | ||
482 | def __init__(self, token_type, value, start_pos, prefix=''): | |
483 | super().__init__(value, start_pos, prefix) | |
484 | self.token_type = token_type | |
485 | ||
486 | def __repr__(self): | |
487 | return "<%s: %s:%s, %s>" % \ | |
488 | (type(self).__name__, self.token_type, repr(self.value), self.start_pos) |