]> crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/python/diff.py
Actualizado el Readme
[config.git] / djavu-asus / elpy / rpc-venv / lib / python3.11 / site-packages / parso / python / diff.py
1 """
2 The diff parser is trying to be a faster version of the normal parser by trying
3 to reuse the nodes of a previous pass over the same file. This is also called
4 incremental parsing in parser literature. The difference is mostly that with
5 incremental parsing you get a range that needs to be reparsed. Here we
6 calculate that range ourselves by using difflib. After that it's essentially
7 incremental parsing.
8
9 The biggest issue of this approach is that we reuse nodes in a mutable way. The
10 intial design and idea is quite problematic for this parser, but it is also
11 pretty fast. Measurements showed that just copying nodes in Python is simply
12 quite a bit slower (especially for big files >3 kLOC). Therefore we did not
13 want to get rid of the mutable nodes, since this is usually not an issue.
14
15 This is by far the hardest software I ever wrote, exactly because the initial
16 design is crappy. When you have to account for a lot of mutable state, it
17 creates a ton of issues that you would otherwise not have. This file took
18 probably 3-6 months to write, which is insane for a parser.
19
20 There is a fuzzer in that helps test this whole thing. Please use it if you
21 make changes here. If you run the fuzzer like::
22
23 test/fuzz_diff_parser.py random -n 100000
24
25 you can be pretty sure that everything is still fine. I sometimes run the
26 fuzzer up to 24h to make sure everything is still ok.
27 """
28 import re
29 import difflib
30 from collections import namedtuple
31 import logging
32
33 from parso.utils import split_lines
34 from parso.python.parser import Parser
35 from parso.python.tree import EndMarker
36 from parso.python.tokenize import PythonToken, BOM_UTF8_STRING
37 from parso.python.token import PythonTokenTypes
38
39 LOG = logging.getLogger(__name__)
40 DEBUG_DIFF_PARSER = False
41
42 _INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
43
44 NEWLINE = PythonTokenTypes.NEWLINE
45 DEDENT = PythonTokenTypes.DEDENT
46 NAME = PythonTokenTypes.NAME
47 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
48 ENDMARKER = PythonTokenTypes.ENDMARKER
49
50
51 def _is_indentation_error_leaf(node):
52 return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS
53
54
55 def _get_previous_leaf_if_indentation(leaf):
56 while leaf and _is_indentation_error_leaf(leaf):
57 leaf = leaf.get_previous_leaf()
58 return leaf
59
60
61 def _get_next_leaf_if_indentation(leaf):
62 while leaf and _is_indentation_error_leaf(leaf):
63 leaf = leaf.get_next_leaf()
64 return leaf
65
66
67 def _get_suite_indentation(tree_node):
68 return _get_indentation(tree_node.children[1])
69
70
71 def _get_indentation(tree_node):
72 return tree_node.start_pos[1]
73
74
75 def _assert_valid_graph(node):
76 """
77 Checks if the parent/children relationship is correct.
78
79 This is a check that only runs during debugging/testing.
80 """
81 try:
82 children = node.children
83 except AttributeError:
84 # Ignore INDENT is necessary, because indent/dedent tokens don't
85 # contain value/prefix and are just around, because of the tokenizer.
86 if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
87 assert not node.value
88 assert not node.prefix
89 return
90
91 # Calculate the content between two start positions.
92 previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
93 if previous_leaf is None:
94 content = node.prefix
95 previous_start_pos = 1, 0
96 else:
97 assert previous_leaf.end_pos <= node.start_pos, \
98 (previous_leaf, node)
99
100 content = previous_leaf.value + node.prefix
101 previous_start_pos = previous_leaf.start_pos
102
103 if '\n' in content or '\r' in content:
104 splitted = split_lines(content)
105 line = previous_start_pos[0] + len(splitted) - 1
106 actual = line, len(splitted[-1])
107 else:
108 actual = previous_start_pos[0], previous_start_pos[1] + len(content)
109 if content.startswith(BOM_UTF8_STRING) \
110 and node.get_start_pos_of_prefix() == (1, 0):
111 # Remove the byte order mark
112 actual = actual[0], actual[1] - 1
113
114 assert node.start_pos == actual, (node.start_pos, actual)
115 else:
116 for child in children:
117 assert child.parent == node, (node, child)
118 _assert_valid_graph(child)
119
120
121 def _assert_nodes_are_equal(node1, node2):
122 try:
123 children1 = node1.children
124 except AttributeError:
125 assert not hasattr(node2, 'children'), (node1, node2)
126 assert node1.value == node2.value, (node1, node2)
127 assert node1.type == node2.type, (node1, node2)
128 assert node1.prefix == node2.prefix, (node1, node2)
129 assert node1.start_pos == node2.start_pos, (node1, node2)
130 return
131 else:
132 try:
133 children2 = node2.children
134 except AttributeError:
135 assert False, (node1, node2)
136 for n1, n2 in zip(children1, children2):
137 _assert_nodes_are_equal(n1, n2)
138 assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2)
139
140
141 def _get_debug_error_message(module, old_lines, new_lines):
142 current_lines = split_lines(module.get_code(), keepends=True)
143 current_diff = difflib.unified_diff(new_lines, current_lines)
144 old_new_diff = difflib.unified_diff(old_lines, new_lines)
145 import parso
146 return (
147 "There's an issue with the diff parser. Please "
148 "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
149 % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
150 )
151
152
153 def _get_last_line(node_or_leaf):
154 last_leaf = node_or_leaf.get_last_leaf()
155 if _ends_with_newline(last_leaf):
156 return last_leaf.start_pos[0]
157 else:
158 n = last_leaf.get_next_leaf()
159 if n.type == 'endmarker' and '\n' in n.prefix:
160 # This is a very special case and has to do with error recovery in
161 # Parso. The problem is basically that there's no newline leaf at
162 # the end sometimes (it's required in the grammar, but not needed
163 # actually before endmarker, CPython just adds a newline to make
164 # source code pass the parser, to account for that Parso error
165 # recovery allows small_stmt instead of simple_stmt).
166 return last_leaf.end_pos[0] + 1
167 return last_leaf.end_pos[0]
168
169
170 def _skip_dedent_error_leaves(leaf):
171 while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
172 leaf = leaf.get_previous_leaf()
173 return leaf
174
175
176 def _ends_with_newline(leaf, suffix=''):
177 leaf = _skip_dedent_error_leaves(leaf)
178
179 if leaf.type == 'error_leaf':
180 typ = leaf.token_type.lower()
181 else:
182 typ = leaf.type
183
184 return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
185
186
187 def _flows_finished(pgen_grammar, stack):
188 """
189 if, while, for and try might not be finished, because another part might
190 still be parsed.
191 """
192 for stack_node in stack:
193 if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
194 return False
195 return True
196
197
198 def _func_or_class_has_suite(node):
199 if node.type == 'decorated':
200 node = node.children[-1]
201 if node.type in ('async_funcdef', 'async_stmt'):
202 node = node.children[-1]
203 return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
204
205
206 def _suite_or_file_input_is_valid(pgen_grammar, stack):
207 if not _flows_finished(pgen_grammar, stack):
208 return False
209
210 for stack_node in reversed(stack):
211 if stack_node.nonterminal == 'decorator':
212 # A decorator is only valid with the upcoming function.
213 return False
214
215 if stack_node.nonterminal == 'suite':
216 # If only newline is in the suite, the suite is not valid, yet.
217 return len(stack_node.nodes) > 1
218 # Not reaching a suite means that we're dealing with file_input levels
219 # where there's no need for a valid statement in it. It can also be empty.
220 return True
221
222
223 def _is_flow_node(node):
224 if node.type == 'async_stmt':
225 node = node.children[1]
226 try:
227 value = node.children[0].value
228 except AttributeError:
229 return False
230 return value in ('if', 'for', 'while', 'try', 'with')
231
232
233 class _PositionUpdatingFinished(Exception):
234 pass
235
236
237 def _update_positions(nodes, line_offset, last_leaf):
238 for node in nodes:
239 try:
240 children = node.children
241 except AttributeError:
242 # Is a leaf
243 node.line += line_offset
244 if node is last_leaf:
245 raise _PositionUpdatingFinished
246 else:
247 _update_positions(children, line_offset, last_leaf)
248
249
250 class DiffParser:
251 """
252 An advanced form of parsing a file faster. Unfortunately comes with huge
253 side effects. It changes the given module.
254 """
255 def __init__(self, pgen_grammar, tokenizer, module):
256 self._pgen_grammar = pgen_grammar
257 self._tokenizer = tokenizer
258 self._module = module
259
260 def _reset(self):
261 self._copy_count = 0
262 self._parser_count = 0
263
264 self._nodes_tree = _NodesTree(self._module)
265
266 def update(self, old_lines, new_lines):
267 '''
268 The algorithm works as follows:
269
270 Equal:
271 - Assure that the start is a newline, otherwise parse until we get
272 one.
273 - Copy from parsed_until_line + 1 to max(i2 + 1)
274 - Make sure that the indentation is correct (e.g. add DEDENT)
275 - Add old and change positions
276 Insert:
277 - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
278 much more.
279
280 Returns the new module node.
281 '''
282 LOG.debug('diff parser start')
283 # Reset the used names cache so they get regenerated.
284 self._module._used_names = None
285
286 self._parser_lines_new = new_lines
287
288 self._reset()
289
290 line_length = len(new_lines)
291 sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
292 opcodes = sm.get_opcodes()
293 LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
294
295 for operation, i1, i2, j1, j2 in opcodes:
296 LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
297 operation, i1 + 1, i2, j1 + 1, j2)
298
299 if j2 == line_length and new_lines[-1] == '':
300 # The empty part after the last newline is not relevant.
301 j2 -= 1
302
303 if operation == 'equal':
304 line_offset = j1 - i1
305 self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)
306 elif operation == 'replace':
307 self._parse(until_line=j2)
308 elif operation == 'insert':
309 self._parse(until_line=j2)
310 else:
311 assert operation == 'delete'
312
313 # With this action all change will finally be applied and we have a
314 # changed module.
315 self._nodes_tree.close()
316
317 if DEBUG_DIFF_PARSER:
318 # If there is reasonable suspicion that the diff parser is not
319 # behaving well, this should be enabled.
320 try:
321 code = ''.join(new_lines)
322 assert self._module.get_code() == code
323 _assert_valid_graph(self._module)
324 without_diff_parser_module = Parser(
325 self._pgen_grammar,
326 error_recovery=True
327 ).parse(self._tokenizer(new_lines))
328 _assert_nodes_are_equal(self._module, without_diff_parser_module)
329 except AssertionError:
330 print(_get_debug_error_message(self._module, old_lines, new_lines))
331 raise
332
333 last_pos = self._module.end_pos[0]
334 if last_pos != line_length:
335 raise Exception(
336 ('(%s != %s) ' % (last_pos, line_length))
337 + _get_debug_error_message(self._module, old_lines, new_lines)
338 )
339 LOG.debug('diff parser end')
340 return self._module
341
342 def _enabled_debugging(self, old_lines, lines_new):
343 if self._module.get_code() != ''.join(lines_new):
344 LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
345
346 def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new):
347 last_until_line = -1
348 while until_line_new > self._nodes_tree.parsed_until_line:
349 parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
350 line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
351 if line_stmt is None:
352 # Parse 1 line at least. We don't need more, because we just
353 # want to get into a state where the old parser has statements
354 # again that can be copied (e.g. not lines within parentheses).
355 self._parse(self._nodes_tree.parsed_until_line + 1)
356 else:
357 p_children = line_stmt.parent.children
358 index = p_children.index(line_stmt)
359
360 if start_line_old == 1 \
361 and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING):
362 # If there's a BOM in the beginning, just reparse. It's too
363 # complicated to account for it otherwise.
364 copied_nodes = []
365 else:
366 from_ = self._nodes_tree.parsed_until_line + 1
367 copied_nodes = self._nodes_tree.copy_nodes(
368 p_children[index:],
369 until_line_old,
370 line_offset
371 )
372 # Match all the nodes that are in the wanted range.
373 if copied_nodes:
374 self._copy_count += 1
375
376 to = self._nodes_tree.parsed_until_line
377
378 LOG.debug('copy old[%s:%s] new[%s:%s]',
379 copied_nodes[0].start_pos[0],
380 copied_nodes[-1].end_pos[0] - 1, from_, to)
381 else:
382 # We have copied as much as possible (but definitely not too
383 # much). Therefore we just parse a bit more.
384 self._parse(self._nodes_tree.parsed_until_line + 1)
385 # Since there are potential bugs that might loop here endlessly, we
386 # just stop here.
387 assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
388 last_until_line = self._nodes_tree.parsed_until_line
389
390 def _get_old_line_stmt(self, old_line):
391 leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
392
393 if _ends_with_newline(leaf):
394 leaf = leaf.get_next_leaf()
395 if leaf.get_start_pos_of_prefix()[0] == old_line:
396 node = leaf
397 while node.parent.type not in ('file_input', 'suite'):
398 node = node.parent
399
400 # Make sure that if only the `else:` line of an if statement is
401 # copied that not the whole thing is going to be copied.
402 if node.start_pos[0] >= old_line:
403 return node
404 # Must be on the same line. Otherwise we need to parse that bit.
405 return None
406
407 def _parse(self, until_line):
408 """
409 Parses at least until the given line, but might just parse more until a
410 valid state is reached.
411 """
412 last_until_line = 0
413 while until_line > self._nodes_tree.parsed_until_line:
414 node = self._try_parse_part(until_line)
415 nodes = node.children
416
417 self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents)
418 if self._replace_tos_indent is not None:
419 self._nodes_tree.indents[-1] = self._replace_tos_indent
420
421 LOG.debug(
422 'parse_part from %s to %s (to %s in part parser)',
423 nodes[0].get_start_pos_of_prefix()[0],
424 self._nodes_tree.parsed_until_line,
425 node.end_pos[0] - 1
426 )
427 # Since the tokenizer sometimes has bugs, we cannot be sure that
428 # this loop terminates. Therefore assert that there's always a
429 # change.
430 assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
431 last_until_line = self._nodes_tree.parsed_until_line
432
433 def _try_parse_part(self, until_line):
434 """
435 Sets up a normal parser that uses a spezialized tokenizer to only parse
436 until a certain position (or a bit longer if the statement hasn't
437 ended.
438 """
439 self._parser_count += 1
440 # TODO speed up, shouldn't copy the whole list all the time.
441 # memoryview?
442 parsed_until_line = self._nodes_tree.parsed_until_line
443 lines_after = self._parser_lines_new[parsed_until_line:]
444 tokens = self._diff_tokenize(
445 lines_after,
446 until_line,
447 line_offset=parsed_until_line
448 )
449 self._active_parser = Parser(
450 self._pgen_grammar,
451 error_recovery=True
452 )
453 return self._active_parser.parse(tokens=tokens)
454
455 def _diff_tokenize(self, lines, until_line, line_offset=0):
456 was_newline = False
457 indents = self._nodes_tree.indents
458 initial_indentation_count = len(indents)
459
460 tokens = self._tokenizer(
461 lines,
462 start_pos=(line_offset + 1, 0),
463 indents=indents,
464 is_first_token=line_offset == 0,
465 )
466 stack = self._active_parser.stack
467 self._replace_tos_indent = None
468 self._keyword_token_indents = {}
469 # print('start', line_offset + 1, indents)
470 for token in tokens:
471 # print(token, indents)
472 typ = token.type
473 if typ == DEDENT:
474 if len(indents) < initial_indentation_count:
475 # We are done here, only thing that can come now is an
476 # endmarker or another dedented code block.
477 while True:
478 typ, string, start_pos, prefix = token = next(tokens)
479 if typ in (DEDENT, ERROR_DEDENT):
480 if typ == ERROR_DEDENT:
481 # We want to force an error dedent in the next
482 # parser/pass. To make this possible we just
483 # increase the location by one.
484 self._replace_tos_indent = start_pos[1] + 1
485 pass
486 else:
487 break
488
489 if '\n' in prefix or '\r' in prefix:
490 prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
491 else:
492 assert start_pos[1] >= len(prefix), repr(prefix)
493 if start_pos[1] - len(prefix) == 0:
494 prefix = ''
495 yield PythonToken(
496 ENDMARKER, '',
497 start_pos,
498 prefix
499 )
500 break
501 elif typ == NEWLINE and token.start_pos[0] >= until_line:
502 was_newline = True
503 elif was_newline:
504 was_newline = False
505 if len(indents) == initial_indentation_count:
506 # Check if the parser is actually in a valid suite state.
507 if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
508 yield PythonToken(ENDMARKER, '', token.start_pos, '')
509 break
510
511 if typ == NAME and token.string in ('class', 'def'):
512 self._keyword_token_indents[token.start_pos] = list(indents)
513
514 yield token
515
516
517 class _NodesTreeNode:
518 _ChildrenGroup = namedtuple(
519 '_ChildrenGroup',
520 'prefix children line_offset last_line_offset_leaf')
521
522 def __init__(self, tree_node, parent=None, indentation=0):
523 self.tree_node = tree_node
524 self._children_groups = []
525 self.parent = parent
526 self._node_children = []
527 self.indentation = indentation
528
529 def finish(self):
530 children = []
531 for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
532 first_leaf = _get_next_leaf_if_indentation(
533 children_part[0].get_first_leaf()
534 )
535
536 first_leaf.prefix = prefix + first_leaf.prefix
537 if line_offset != 0:
538 try:
539 _update_positions(
540 children_part, line_offset, last_line_offset_leaf)
541 except _PositionUpdatingFinished:
542 pass
543 children += children_part
544 self.tree_node.children = children
545 # Reset the parents
546 for node in children:
547 node.parent = self.tree_node
548
549 for node_child in self._node_children:
550 node_child.finish()
551
552 def add_child_node(self, child_node):
553 self._node_children.append(child_node)
554
555 def add_tree_nodes(self, prefix, children, line_offset=0,
556 last_line_offset_leaf=None):
557 if last_line_offset_leaf is None:
558 last_line_offset_leaf = children[-1].get_last_leaf()
559 group = self._ChildrenGroup(
560 prefix, children, line_offset, last_line_offset_leaf
561 )
562 self._children_groups.append(group)
563
564 def get_last_line(self, suffix):
565 line = 0
566 if self._children_groups:
567 children_group = self._children_groups[-1]
568 last_leaf = _get_previous_leaf_if_indentation(
569 children_group.last_line_offset_leaf
570 )
571
572 line = last_leaf.end_pos[0] + children_group.line_offset
573
574 # Newlines end on the next line, which means that they would cover
575 # the next line. That line is not fully parsed at this point.
576 if _ends_with_newline(last_leaf, suffix):
577 line -= 1
578 line += len(split_lines(suffix)) - 1
579
580 if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
581 # This is the end of a file (that doesn't end with a newline).
582 line += 1
583
584 if self._node_children:
585 return max(line, self._node_children[-1].get_last_line(suffix))
586 return line
587
588 def __repr__(self):
589 return '<%s: %s>' % (self.__class__.__name__, self.tree_node)
590
591
592 class _NodesTree:
593 def __init__(self, module):
594 self._base_node = _NodesTreeNode(module)
595 self._working_stack = [self._base_node]
596 self._module = module
597 self._prefix_remainder = ''
598 self.prefix = ''
599 self.indents = [0]
600
601 @property
602 def parsed_until_line(self):
603 return self._working_stack[-1].get_last_line(self.prefix)
604
605 def _update_insertion_node(self, indentation):
606 for node in reversed(list(self._working_stack)):
607 if node.indentation < indentation or node is self._working_stack[0]:
608 return node
609 self._working_stack.pop()
610
611 def add_parsed_nodes(self, tree_nodes, keyword_token_indents):
612 old_prefix = self.prefix
613 tree_nodes = self._remove_endmarker(tree_nodes)
614 if not tree_nodes:
615 self.prefix = old_prefix + self.prefix
616 return
617
618 assert tree_nodes[0].type != 'newline'
619
620 node = self._update_insertion_node(tree_nodes[0].start_pos[1])
621 assert node.tree_node.type in ('suite', 'file_input')
622 node.add_tree_nodes(old_prefix, tree_nodes)
623 # tos = Top of stack
624 self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents)
625
626 def _update_parsed_node_tos(self, tree_node, keyword_token_indents):
627 if tree_node.type == 'suite':
628 def_leaf = tree_node.parent.children[0]
629 new_tos = _NodesTreeNode(
630 tree_node,
631 indentation=keyword_token_indents[def_leaf.start_pos][-1],
632 )
633 new_tos.add_tree_nodes('', list(tree_node.children))
634
635 self._working_stack[-1].add_child_node(new_tos)
636 self._working_stack.append(new_tos)
637
638 self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
639 elif _func_or_class_has_suite(tree_node):
640 self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
641
642 def _remove_endmarker(self, tree_nodes):
643 """
644 Helps cleaning up the tree nodes that get inserted.
645 """
646 last_leaf = tree_nodes[-1].get_last_leaf()
647 is_endmarker = last_leaf.type == 'endmarker'
648 self._prefix_remainder = ''
649 if is_endmarker:
650 prefix = last_leaf.prefix
651 separation = max(prefix.rfind('\n'), prefix.rfind('\r'))
652 if separation > -1:
653 # Remove the whitespace part of the prefix after a newline.
654 # That is not relevant if parentheses were opened. Always parse
655 # until the end of a line.
656 last_leaf.prefix, self._prefix_remainder = \
657 last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
658
659 self.prefix = ''
660
661 if is_endmarker:
662 self.prefix = last_leaf.prefix
663
664 tree_nodes = tree_nodes[:-1]
665 return tree_nodes
666
667 def _get_matching_indent_nodes(self, tree_nodes, is_new_suite):
668 # There might be a random dedent where we have to stop copying.
669 # Invalid indents are ok, because the parser handled that
670 # properly before. An invalid dedent can happen, because a few
671 # lines above there was an invalid indent.
672 node_iterator = iter(tree_nodes)
673 if is_new_suite:
674 yield next(node_iterator)
675
676 first_node = next(node_iterator)
677 indent = _get_indentation(first_node)
678 if not is_new_suite and indent not in self.indents:
679 return
680 yield first_node
681
682 for n in node_iterator:
683 if _get_indentation(n) != indent:
684 return
685 yield n
686
687 def copy_nodes(self, tree_nodes, until_line, line_offset):
688 """
689 Copies tree nodes from the old parser tree.
690
691 Returns the number of tree nodes that were copied.
692 """
693 if tree_nodes[0].type in ('error_leaf', 'error_node'):
694 # Avoid copying errors in the beginning. Can lead to a lot of
695 # issues.
696 return []
697
698 indentation = _get_indentation(tree_nodes[0])
699 old_working_stack = list(self._working_stack)
700 old_prefix = self.prefix
701 old_indents = self.indents
702 self.indents = [i for i in self.indents if i <= indentation]
703
704 self._update_insertion_node(indentation)
705
706 new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes(
707 list(self._working_stack),
708 tree_nodes,
709 until_line,
710 line_offset,
711 self.prefix,
712 )
713 if new_nodes:
714 self.indents += added_indents
715 else:
716 self._working_stack = old_working_stack
717 self.prefix = old_prefix
718 self.indents = old_indents
719 return new_nodes
720
721 def _copy_nodes(self, working_stack, nodes, until_line, line_offset,
722 prefix='', is_nested=False):
723 new_nodes = []
724 added_indents = []
725
726 nodes = list(self._get_matching_indent_nodes(
727 nodes,
728 is_new_suite=is_nested,
729 ))
730
731 new_prefix = ''
732 for node in nodes:
733 if node.start_pos[0] > until_line:
734 break
735
736 if node.type == 'endmarker':
737 break
738
739 if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
740 break
741 # TODO this check might take a bit of time for large files. We
742 # might want to change this to do more intelligent guessing or
743 # binary search.
744 if _get_last_line(node) > until_line:
745 # We can split up functions and classes later.
746 if _func_or_class_has_suite(node):
747 new_nodes.append(node)
748 break
749 try:
750 c = node.children
751 except AttributeError:
752 pass
753 else:
754 # This case basically appears with error recovery of one line
755 # suites like `def foo(): bar.-`. In this case we might not
756 # include a newline in the statement and we need to take care
757 # of that.
758 n = node
759 if n.type == 'decorated':
760 n = n.children[-1]
761 if n.type in ('async_funcdef', 'async_stmt'):
762 n = n.children[-1]
763 if n.type in ('classdef', 'funcdef'):
764 suite_node = n.children[-1]
765 else:
766 suite_node = c[-1]
767
768 if suite_node.type in ('error_leaf', 'error_node'):
769 break
770
771 new_nodes.append(node)
772
773 # Pop error nodes at the end from the list
774 if new_nodes:
775 while new_nodes:
776 last_node = new_nodes[-1]
777 if (last_node.type in ('error_leaf', 'error_node')
778 or _is_flow_node(new_nodes[-1])):
779 # Error leafs/nodes don't have a defined start/end. Error
780 # nodes might not end with a newline (e.g. if there's an
781 # open `(`). Therefore ignore all of them unless they are
782 # succeeded with valid parser state.
783 # If we copy flows at the end, they might be continued
784 # after the copy limit (in the new parser).
785 # In this while loop we try to remove until we find a newline.
786 new_prefix = ''
787 new_nodes.pop()
788 while new_nodes:
789 last_node = new_nodes[-1]
790 if last_node.get_last_leaf().type == 'newline':
791 break
792 new_nodes.pop()
793 continue
794 if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':
795 # The problem here is that Parso error recovery sometimes
796 # influences nodes before this node.
797 # Since the new last node is an error node this will get
798 # cleaned up in the next while iteration.
799 new_nodes.pop()
800 continue
801 break
802
803 if not new_nodes:
804 return [], working_stack, prefix, added_indents
805
806 tos = working_stack[-1]
807 last_node = new_nodes[-1]
808 had_valid_suite_last = False
809 # Pop incomplete suites from the list
810 if _func_or_class_has_suite(last_node):
811 suite = last_node
812 while suite.type != 'suite':
813 suite = suite.children[-1]
814
815 indent = _get_suite_indentation(suite)
816 added_indents.append(indent)
817
818 suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node))
819 # Don't need to pass line_offset here, it's already done by the
820 # parent.
821 suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(
822 working_stack + [suite_tos], suite.children, until_line, line_offset,
823 is_nested=True,
824 )
825 added_indents += ai
826 if len(suite_nodes) < 2:
827 # A suite only with newline is not valid.
828 new_nodes.pop()
829 new_prefix = ''
830 else:
831 assert new_nodes
832 tos.add_child_node(suite_tos)
833 working_stack = new_working_stack
834 had_valid_suite_last = True
835
836 if new_nodes:
837 if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
838 p = new_nodes[-1].get_next_leaf().prefix
839 # We are not allowed to remove the newline at the end of the
840 # line, otherwise it's going to be missing. This happens e.g.
841 # if a bracket is around before that moves newlines to
842 # prefixes.
843 new_prefix = split_lines(p, keepends=True)[0]
844
845 if had_valid_suite_last:
846 last = new_nodes[-1]
847 if last.type == 'decorated':
848 last = last.children[-1]
849 if last.type in ('async_funcdef', 'async_stmt'):
850 last = last.children[-1]
851 last_line_offset_leaf = last.children[-2].get_last_leaf()
852 assert last_line_offset_leaf == ':'
853 else:
854 last_line_offset_leaf = new_nodes[-1].get_last_leaf()
855 tos.add_tree_nodes(
856 prefix, new_nodes, line_offset, last_line_offset_leaf,
857 )
858 prefix = new_prefix
859 self._prefix_remainder = ''
860
861 return new_nodes, working_stack, prefix, added_indents
862
863 def close(self):
864 self._base_node.finish()
865
866 # Add an endmarker.
867 try:
868 last_leaf = self._module.get_last_leaf()
869 except IndexError:
870 end_pos = [1, 0]
871 else:
872 last_leaf = _skip_dedent_error_leaves(last_leaf)
873 end_pos = list(last_leaf.end_pos)
874 lines = split_lines(self.prefix)
875 assert len(lines) > 0
876 if len(lines) == 1:
877 if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
878 end_pos[1] -= 1
879 end_pos[1] += len(lines[0])
880 else:
881 end_pos[0] += len(lines) - 1
882 end_pos[1] = len(lines[-1])
883
884 endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
885 endmarker.parent = self._module
886 self._module.children.append(endmarker)