djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/python/diff.py

   1 """
   2 The diff parser is trying to be a faster version of the normal parser by trying
   3 to reuse the nodes of a previous pass over the same file. This is also called
   4 incremental parsing in parser literature. The difference is mostly that with
   5 incremental parsing you get a range that needs to be reparsed. Here we
   6 calculate that range ourselves by using difflib. After that it's essentially
   7 incremental parsing.
   8
   9 The biggest issue of this approach is that we reuse nodes in a mutable way. The
  10 intial design and idea is quite problematic for this parser, but it is also
  11 pretty fast. Measurements showed that just copying nodes in Python is simply
  12 quite a bit slower (especially for big files >3 kLOC). Therefore we did not
  13 want to get rid of the mutable nodes, since this is usually not an issue.
  14
  15 This is by far the hardest software I ever wrote, exactly because the initial
  16 design is crappy. When you have to account for a lot of mutable state, it
  17 creates a ton of issues that you would otherwise not have. This file took
  18 probably 3-6 months to write, which is insane for a parser.
  19
  20 There is a fuzzer in that helps test this whole thing. Please use it if you
  21 make changes here. If you run the fuzzer like::
  22
  23     test/fuzz_diff_parser.py random -n 100000
  24
  25 you can be pretty sure that everything is still fine. I sometimes run the
  26 fuzzer up to 24h to make sure everything is still ok.
  27 """
  28 import re
  29 import difflib
  30 from collections import namedtuple
  31 import logging
  32
  33 from parso.utils import split_lines
  34 from parso.python.parser import Parser
  35 from parso.python.tree import EndMarker
  36 from parso.python.tokenize import PythonToken, BOM_UTF8_STRING
  37 from parso.python.token import PythonTokenTypes
  38
  39 LOG = logging.getLogger(__name__)
  40 DEBUG_DIFF_PARSER = False
  41
  42 _INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
  43
  44 NEWLINE = PythonTokenTypes.NEWLINE
  45 DEDENT = PythonTokenTypes.DEDENT
  46 NAME = PythonTokenTypes.NAME
  47 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
  48 ENDMARKER = PythonTokenTypes.ENDMARKER
  49
  50
  51 def _is_indentation_error_leaf(node):
  52     return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS
  53
  54
  55 def _get_previous_leaf_if_indentation(leaf):
  56     while leaf and _is_indentation_error_leaf(leaf):
  57         leaf = leaf.get_previous_leaf()
  58     return leaf
  59
  60
  61 def _get_next_leaf_if_indentation(leaf):
  62     while leaf and _is_indentation_error_leaf(leaf):
  63         leaf = leaf.get_next_leaf()
  64     return leaf
  65
  66
  67 def _get_suite_indentation(tree_node):
  68     return _get_indentation(tree_node.children[1])
  69
  70
  71 def _get_indentation(tree_node):
  72     return tree_node.start_pos[1]
  73
  74
  75 def _assert_valid_graph(node):
  76     """
  77     Checks if the parent/children relationship is correct.
  78
  79     This is a check that only runs during debugging/testing.
  80     """
  81     try:
  82         children = node.children
  83     except AttributeError:
  84         # Ignore INDENT is necessary, because indent/dedent tokens don't
  85         # contain value/prefix and are just around, because of the tokenizer.
  86         if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
  87             assert not node.value
  88             assert not node.prefix
  89             return
  90
  91         # Calculate the content between two start positions.
  92         previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
  93         if previous_leaf is None:
  94             content = node.prefix
  95             previous_start_pos = 1, 0
  96         else:
  97             assert previous_leaf.end_pos <= node.start_pos, \
  98                 (previous_leaf, node)
  99
 100             content = previous_leaf.value + node.prefix
 101             previous_start_pos = previous_leaf.start_pos
 102
 103         if '\n' in content or '\r' in content:
 104             splitted = split_lines(content)
 105             line = previous_start_pos[0] + len(splitted) - 1
 106             actual = line, len(splitted[-1])
 107         else:
 108             actual = previous_start_pos[0], previous_start_pos[1] + len(content)
 109             if content.startswith(BOM_UTF8_STRING) \
 110                     and node.get_start_pos_of_prefix() == (1, 0):
 111                 # Remove the byte order mark
 112                 actual = actual[0], actual[1] - 1
 113
 114         assert node.start_pos == actual, (node.start_pos, actual)
 115     else:
 116         for child in children:
 117             assert child.parent == node, (node, child)
 118             _assert_valid_graph(child)
 119
 120
 121 def _assert_nodes_are_equal(node1, node2):
 122     try:
 123         children1 = node1.children
 124     except AttributeError:
 125         assert not hasattr(node2, 'children'), (node1, node2)
 126         assert node1.value == node2.value, (node1, node2)
 127         assert node1.type == node2.type, (node1, node2)
 128         assert node1.prefix == node2.prefix, (node1, node2)
 129         assert node1.start_pos == node2.start_pos, (node1, node2)
 130         return
 131     else:
 132         try:
 133             children2 = node2.children
 134         except AttributeError:
 135             assert False, (node1, node2)
 136     for n1, n2 in zip(children1, children2):
 137         _assert_nodes_are_equal(n1, n2)
 138     assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2)
 139
 140
 141 def _get_debug_error_message(module, old_lines, new_lines):
 142     current_lines = split_lines(module.get_code(), keepends=True)
 143     current_diff = difflib.unified_diff(new_lines, current_lines)
 144     old_new_diff = difflib.unified_diff(old_lines, new_lines)
 145     import parso
 146     return (
 147         "There's an issue with the diff parser. Please "
 148         "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
 149         % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
 150     )
 151
 152
 153 def _get_last_line(node_or_leaf):
 154     last_leaf = node_or_leaf.get_last_leaf()
 155     if _ends_with_newline(last_leaf):
 156         return last_leaf.start_pos[0]
 157     else:
 158         n = last_leaf.get_next_leaf()
 159         if n.type == 'endmarker' and '\n' in n.prefix:
 160             # This is a very special case and has to do with error recovery in
 161             # Parso. The problem is basically that there's no newline leaf at
 162             # the end sometimes (it's required in the grammar, but not needed
 163             # actually before endmarker, CPython just adds a newline to make
 164             # source code pass the parser, to account for that Parso error
 165             # recovery allows small_stmt instead of simple_stmt).
 166             return last_leaf.end_pos[0] + 1
 167         return last_leaf.end_pos[0]
 168
 169
 170 def _skip_dedent_error_leaves(leaf):
 171     while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
 172         leaf = leaf.get_previous_leaf()
 173     return leaf
 174
 175
 176 def _ends_with_newline(leaf, suffix=''):
 177     leaf = _skip_dedent_error_leaves(leaf)
 178
 179     if leaf.type == 'error_leaf':
 180         typ = leaf.token_type.lower()
 181     else:
 182         typ = leaf.type
 183
 184     return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
 185
 186
 187 def _flows_finished(pgen_grammar, stack):
 188     """
 189     if, while, for and try might not be finished, because another part might
 190     still be parsed.
 191     """
 192     for stack_node in stack:
 193         if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
 194             return False
 195     return True
 196
 197
 198 def _func_or_class_has_suite(node):
 199     if node.type == 'decorated':
 200         node = node.children[-1]
 201     if node.type in ('async_funcdef', 'async_stmt'):
 202         node = node.children[-1]
 203     return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
 204
 205
 206 def _suite_or_file_input_is_valid(pgen_grammar, stack):
 207     if not _flows_finished(pgen_grammar, stack):
 208         return False
 209
 210     for stack_node in reversed(stack):
 211         if stack_node.nonterminal == 'decorator':
 212             # A decorator is only valid with the upcoming function.
 213             return False
 214
 215         if stack_node.nonterminal == 'suite':
 216             # If only newline is in the suite, the suite is not valid, yet.
 217             return len(stack_node.nodes) > 1
 218     # Not reaching a suite means that we're dealing with file_input levels
 219     # where there's no need for a valid statement in it. It can also be empty.
 220     return True
 221
 222
 223 def _is_flow_node(node):
 224     if node.type == 'async_stmt':
 225         node = node.children[1]
 226     try:
 227         value = node.children[0].value
 228     except AttributeError:
 229         return False
 230     return value in ('if', 'for', 'while', 'try', 'with')
 231
 232
 233 class _PositionUpdatingFinished(Exception):
 234     pass
 235
 236
 237 def _update_positions(nodes, line_offset, last_leaf):
 238     for node in nodes:
 239         try:
 240             children = node.children
 241         except AttributeError:
 242             # Is a leaf
 243             node.line += line_offset
 244             if node is last_leaf:
 245                 raise _PositionUpdatingFinished
 246         else:
 247             _update_positions(children, line_offset, last_leaf)
 248
 249
 250 class DiffParser:
 251     """
 252     An advanced form of parsing a file faster. Unfortunately comes with huge
 253     side effects. It changes the given module.
 254     """
 255     def __init__(self, pgen_grammar, tokenizer, module):
 256         self._pgen_grammar = pgen_grammar
 257         self._tokenizer = tokenizer
 258         self._module = module
 259
 260     def _reset(self):
 261         self._copy_count = 0
 262         self._parser_count = 0
 263
 264         self._nodes_tree = _NodesTree(self._module)
 265
 266     def update(self, old_lines, new_lines):
 267         '''
 268         The algorithm works as follows:
 269
 270         Equal:
 271             - Assure that the start is a newline, otherwise parse until we get
 272               one.
 273             - Copy from parsed_until_line + 1 to max(i2 + 1)
 274             - Make sure that the indentation is correct (e.g. add DEDENT)
 275             - Add old and change positions
 276         Insert:
 277             - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
 278               much more.
 279
 280         Returns the new module node.
 281         '''
 282         LOG.debug('diff parser start')
 283         # Reset the used names cache so they get regenerated.
 284         self._module._used_names = None
 285
 286         self._parser_lines_new = new_lines
 287
 288         self._reset()
 289
 290         line_length = len(new_lines)
 291         sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
 292         opcodes = sm.get_opcodes()
 293         LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
 294
 295         for operation, i1, i2, j1, j2 in opcodes:
 296             LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
 297                       operation, i1 + 1, i2, j1 + 1, j2)
 298
 299             if j2 == line_length and new_lines[-1] == '':
 300                 # The empty part after the last newline is not relevant.
 301                 j2 -= 1
 302
 303             if operation == 'equal':
 304                 line_offset = j1 - i1
 305                 self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)
 306             elif operation == 'replace':
 307                 self._parse(until_line=j2)
 308             elif operation == 'insert':
 309                 self._parse(until_line=j2)
 310             else:
 311                 assert operation == 'delete'
 312
 313         # With this action all change will finally be applied and we have a
 314         # changed module.
 315         self._nodes_tree.close()
 316
 317         if DEBUG_DIFF_PARSER:
 318             # If there is reasonable suspicion that the diff parser is not
 319             # behaving well, this should be enabled.
 320             try:
 321                 code = ''.join(new_lines)
 322                 assert self._module.get_code() == code
 323                 _assert_valid_graph(self._module)
 324                 without_diff_parser_module = Parser(
 325                     self._pgen_grammar,
 326                     error_recovery=True
 327                 ).parse(self._tokenizer(new_lines))
 328                 _assert_nodes_are_equal(self._module, without_diff_parser_module)
 329             except AssertionError:
 330                 print(_get_debug_error_message(self._module, old_lines, new_lines))
 331                 raise
 332
 333         last_pos = self._module.end_pos[0]
 334         if last_pos != line_length:
 335             raise Exception(
 336                 ('(%s != %s) ' % (last_pos, line_length))
 337                 + _get_debug_error_message(self._module, old_lines, new_lines)
 338             )
 339         LOG.debug('diff parser end')
 340         return self._module
 341
 342     def _enabled_debugging(self, old_lines, lines_new):
 343         if self._module.get_code() != ''.join(lines_new):
 344             LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
 345
 346     def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new):
 347         last_until_line = -1
 348         while until_line_new > self._nodes_tree.parsed_until_line:
 349             parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
 350             line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
 351             if line_stmt is None:
 352                 # Parse 1 line at least. We don't need more, because we just
 353                 # want to get into a state where the old parser has statements
 354                 # again that can be copied (e.g. not lines within parentheses).
 355                 self._parse(self._nodes_tree.parsed_until_line + 1)
 356             else:
 357                 p_children = line_stmt.parent.children
 358                 index = p_children.index(line_stmt)
 359
 360                 if start_line_old == 1 \
 361                         and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING):
 362                     # If there's a BOM in the beginning, just reparse. It's too
 363                     # complicated to account for it otherwise.
 364                     copied_nodes = []
 365                 else:
 366                     from_ = self._nodes_tree.parsed_until_line + 1
 367                     copied_nodes = self._nodes_tree.copy_nodes(
 368                         p_children[index:],
 369                         until_line_old,
 370                         line_offset
 371                     )
 372                 # Match all the nodes that are in the wanted range.
 373                 if copied_nodes:
 374                     self._copy_count += 1
 375
 376                     to = self._nodes_tree.parsed_until_line
 377
 378                     LOG.debug('copy old[%s:%s] new[%s:%s]',
 379                               copied_nodes[0].start_pos[0],
 380                               copied_nodes[-1].end_pos[0] - 1, from_, to)
 381                 else:
 382                     # We have copied as much as possible (but definitely not too
 383                     # much). Therefore we just parse a bit more.
 384                     self._parse(self._nodes_tree.parsed_until_line + 1)
 385             # Since there are potential bugs that might loop here endlessly, we
 386             # just stop here.
 387             assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
 388             last_until_line = self._nodes_tree.parsed_until_line
 389
 390     def _get_old_line_stmt(self, old_line):
 391         leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
 392
 393         if _ends_with_newline(leaf):
 394             leaf = leaf.get_next_leaf()
 395         if leaf.get_start_pos_of_prefix()[0] == old_line:
 396             node = leaf
 397             while node.parent.type not in ('file_input', 'suite'):
 398                 node = node.parent
 399
 400             # Make sure that if only the `else:` line of an if statement is
 401             # copied that not the whole thing is going to be copied.
 402             if node.start_pos[0] >= old_line:
 403                 return node
 404         # Must be on the same line. Otherwise we need to parse that bit.
 405         return None
 406
 407     def _parse(self, until_line):
 408         """
 409         Parses at least until the given line, but might just parse more until a
 410         valid state is reached.
 411         """
 412         last_until_line = 0
 413         while until_line > self._nodes_tree.parsed_until_line:
 414             node = self._try_parse_part(until_line)
 415             nodes = node.children
 416
 417             self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents)
 418             if self._replace_tos_indent is not None:
 419                 self._nodes_tree.indents[-1] = self._replace_tos_indent
 420
 421             LOG.debug(
 422                 'parse_part from %s to %s (to %s in part parser)',
 423                 nodes[0].get_start_pos_of_prefix()[0],
 424                 self._nodes_tree.parsed_until_line,
 425                 node.end_pos[0] - 1
 426             )
 427             # Since the tokenizer sometimes has bugs, we cannot be sure that
 428             # this loop terminates. Therefore assert that there's always a
 429             # change.
 430             assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
 431             last_until_line = self._nodes_tree.parsed_until_line
 432
 433     def _try_parse_part(self, until_line):
 434         """
 435         Sets up a normal parser that uses a spezialized tokenizer to only parse
 436         until a certain position (or a bit longer if the statement hasn't
 437         ended.
 438         """
 439         self._parser_count += 1
 440         # TODO speed up, shouldn't copy the whole list all the time.
 441         # memoryview?
 442         parsed_until_line = self._nodes_tree.parsed_until_line
 443         lines_after = self._parser_lines_new[parsed_until_line:]
 444         tokens = self._diff_tokenize(
 445             lines_after,
 446             until_line,
 447             line_offset=parsed_until_line
 448         )
 449         self._active_parser = Parser(
 450             self._pgen_grammar,
 451             error_recovery=True
 452         )
 453         return self._active_parser.parse(tokens=tokens)
 454
 455     def _diff_tokenize(self, lines, until_line, line_offset=0):
 456         was_newline = False
 457         indents = self._nodes_tree.indents
 458         initial_indentation_count = len(indents)
 459
 460         tokens = self._tokenizer(
 461             lines,
 462             start_pos=(line_offset + 1, 0),
 463             indents=indents,
 464             is_first_token=line_offset == 0,
 465         )
 466         stack = self._active_parser.stack
 467         self._replace_tos_indent = None
 468         self._keyword_token_indents = {}
 469         # print('start', line_offset + 1, indents)
 470         for token in tokens:
 471             # print(token, indents)
 472             typ = token.type
 473             if typ == DEDENT:
 474                 if len(indents) < initial_indentation_count:
 475                     # We are done here, only thing that can come now is an
 476                     # endmarker or another dedented code block.
 477                     while True:
 478                         typ, string, start_pos, prefix = token = next(tokens)
 479                         if typ in (DEDENT, ERROR_DEDENT):
 480                             if typ == ERROR_DEDENT:
 481                                 # We want to force an error dedent in the next
 482                                 # parser/pass. To make this possible we just
 483                                 # increase the location by one.
 484                                 self._replace_tos_indent = start_pos[1] + 1
 485                                 pass
 486                         else:
 487                             break
 488
 489                     if '\n' in prefix or '\r' in prefix:
 490                         prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
 491                     else:
 492                         assert start_pos[1] >= len(prefix), repr(prefix)
 493                         if start_pos[1] - len(prefix) == 0:
 494                             prefix = ''
 495                     yield PythonToken(
 496                         ENDMARKER, '',
 497                         start_pos,
 498                         prefix
 499                     )
 500                     break
 501             elif typ == NEWLINE and token.start_pos[0] >= until_line:
 502                 was_newline = True
 503             elif was_newline:
 504                 was_newline = False
 505                 if len(indents) == initial_indentation_count:
 506                     # Check if the parser is actually in a valid suite state.
 507                     if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
 508                         yield PythonToken(ENDMARKER, '', token.start_pos, '')
 509                         break
 510
 511             if typ == NAME and token.string in ('class', 'def'):
 512                 self._keyword_token_indents[token.start_pos] = list(indents)
 513
 514             yield token
 515
 516
 517 class _NodesTreeNode:
 518     _ChildrenGroup = namedtuple(
 519         '_ChildrenGroup',
 520         'prefix children line_offset last_line_offset_leaf')
 521
 522     def __init__(self, tree_node, parent=None, indentation=0):
 523         self.tree_node = tree_node
 524         self._children_groups = []
 525         self.parent = parent
 526         self._node_children = []
 527         self.indentation = indentation
 528
 529     def finish(self):
 530         children = []
 531         for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
 532             first_leaf = _get_next_leaf_if_indentation(
 533                 children_part[0].get_first_leaf()
 534             )
 535
 536             first_leaf.prefix = prefix + first_leaf.prefix
 537             if line_offset != 0:
 538                 try:
 539                     _update_positions(
 540                         children_part, line_offset, last_line_offset_leaf)
 541                 except _PositionUpdatingFinished:
 542                     pass
 543             children += children_part
 544         self.tree_node.children = children
 545         # Reset the parents
 546         for node in children:
 547             node.parent = self.tree_node
 548
 549         for node_child in self._node_children:
 550             node_child.finish()
 551
 552     def add_child_node(self, child_node):
 553         self._node_children.append(child_node)
 554
 555     def add_tree_nodes(self, prefix, children, line_offset=0,
 556                        last_line_offset_leaf=None):
 557         if last_line_offset_leaf is None:
 558             last_line_offset_leaf = children[-1].get_last_leaf()
 559         group = self._ChildrenGroup(
 560             prefix, children, line_offset, last_line_offset_leaf
 561         )
 562         self._children_groups.append(group)
 563
 564     def get_last_line(self, suffix):
 565         line = 0
 566         if self._children_groups:
 567             children_group = self._children_groups[-1]
 568             last_leaf = _get_previous_leaf_if_indentation(
 569                 children_group.last_line_offset_leaf
 570             )
 571
 572             line = last_leaf.end_pos[0] + children_group.line_offset
 573
 574             # Newlines end on the next line, which means that they would cover
 575             # the next line. That line is not fully parsed at this point.
 576             if _ends_with_newline(last_leaf, suffix):
 577                 line -= 1
 578         line += len(split_lines(suffix)) - 1
 579
 580         if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
 581             # This is the end of a file (that doesn't end with a newline).
 582             line += 1
 583
 584         if self._node_children:
 585             return max(line, self._node_children[-1].get_last_line(suffix))
 586         return line
 587
 588     def __repr__(self):
 589         return '<%s: %s>' % (self.__class__.__name__, self.tree_node)
 590
 591
 592 class _NodesTree:
 593     def __init__(self, module):
 594         self._base_node = _NodesTreeNode(module)
 595         self._working_stack = [self._base_node]
 596         self._module = module
 597         self._prefix_remainder = ''
 598         self.prefix = ''
 599         self.indents = [0]
 600
 601     @property
 602     def parsed_until_line(self):
 603         return self._working_stack[-1].get_last_line(self.prefix)
 604
 605     def _update_insertion_node(self, indentation):
 606         for node in reversed(list(self._working_stack)):
 607             if node.indentation < indentation or node is self._working_stack[0]:
 608                 return node
 609             self._working_stack.pop()
 610
 611     def add_parsed_nodes(self, tree_nodes, keyword_token_indents):
 612         old_prefix = self.prefix
 613         tree_nodes = self._remove_endmarker(tree_nodes)
 614         if not tree_nodes:
 615             self.prefix = old_prefix + self.prefix
 616             return
 617
 618         assert tree_nodes[0].type != 'newline'
 619
 620         node = self._update_insertion_node(tree_nodes[0].start_pos[1])
 621         assert node.tree_node.type in ('suite', 'file_input')
 622         node.add_tree_nodes(old_prefix, tree_nodes)
 623         # tos = Top of stack
 624         self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents)
 625
 626     def _update_parsed_node_tos(self, tree_node, keyword_token_indents):
 627         if tree_node.type == 'suite':
 628             def_leaf = tree_node.parent.children[0]
 629             new_tos = _NodesTreeNode(
 630                 tree_node,
 631                 indentation=keyword_token_indents[def_leaf.start_pos][-1],
 632             )
 633             new_tos.add_tree_nodes('', list(tree_node.children))
 634
 635             self._working_stack[-1].add_child_node(new_tos)
 636             self._working_stack.append(new_tos)
 637
 638             self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
 639         elif _func_or_class_has_suite(tree_node):
 640             self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
 641
 642     def _remove_endmarker(self, tree_nodes):
 643         """
 644         Helps cleaning up the tree nodes that get inserted.
 645         """
 646         last_leaf = tree_nodes[-1].get_last_leaf()
 647         is_endmarker = last_leaf.type == 'endmarker'
 648         self._prefix_remainder = ''
 649         if is_endmarker:
 650             prefix = last_leaf.prefix
 651             separation = max(prefix.rfind('\n'), prefix.rfind('\r'))
 652             if separation > -1:
 653                 # Remove the whitespace part of the prefix after a newline.
 654                 # That is not relevant if parentheses were opened. Always parse
 655                 # until the end of a line.
 656                 last_leaf.prefix, self._prefix_remainder = \
 657                     last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
 658
 659         self.prefix = ''
 660
 661         if is_endmarker:
 662             self.prefix = last_leaf.prefix
 663
 664             tree_nodes = tree_nodes[:-1]
 665         return tree_nodes
 666
 667     def _get_matching_indent_nodes(self, tree_nodes, is_new_suite):
 668         # There might be a random dedent where we have to stop copying.
 669         # Invalid indents are ok, because the parser handled that
 670         # properly before. An invalid dedent can happen, because a few
 671         # lines above there was an invalid indent.
 672         node_iterator = iter(tree_nodes)
 673         if is_new_suite:
 674             yield next(node_iterator)
 675
 676         first_node = next(node_iterator)
 677         indent = _get_indentation(first_node)
 678         if not is_new_suite and indent not in self.indents:
 679             return
 680         yield first_node
 681
 682         for n in node_iterator:
 683             if _get_indentation(n) != indent:
 684                 return
 685             yield n
 686
 687     def copy_nodes(self, tree_nodes, until_line, line_offset):
 688         """
 689         Copies tree nodes from the old parser tree.
 690
 691         Returns the number of tree nodes that were copied.
 692         """
 693         if tree_nodes[0].type in ('error_leaf', 'error_node'):
 694             # Avoid copying errors in the beginning. Can lead to a lot of
 695             # issues.
 696             return []
 697
 698         indentation = _get_indentation(tree_nodes[0])
 699         old_working_stack = list(self._working_stack)
 700         old_prefix = self.prefix
 701         old_indents = self.indents
 702         self.indents = [i for i in self.indents if i <= indentation]
 703
 704         self._update_insertion_node(indentation)
 705
 706         new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes(
 707             list(self._working_stack),
 708             tree_nodes,
 709             until_line,
 710             line_offset,
 711             self.prefix,
 712         )
 713         if new_nodes:
 714             self.indents += added_indents
 715         else:
 716             self._working_stack = old_working_stack
 717             self.prefix = old_prefix
 718             self.indents = old_indents
 719         return new_nodes
 720
 721     def _copy_nodes(self, working_stack, nodes, until_line, line_offset,
 722                     prefix='', is_nested=False):
 723         new_nodes = []
 724         added_indents = []
 725
 726         nodes = list(self._get_matching_indent_nodes(
 727             nodes,
 728             is_new_suite=is_nested,
 729         ))
 730
 731         new_prefix = ''
 732         for node in nodes:
 733             if node.start_pos[0] > until_line:
 734                 break
 735
 736             if node.type == 'endmarker':
 737                 break
 738
 739             if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
 740                 break
 741             # TODO this check might take a bit of time for large files. We
 742             # might want to change this to do more intelligent guessing or
 743             # binary search.
 744             if _get_last_line(node) > until_line:
 745                 # We can split up functions and classes later.
 746                 if _func_or_class_has_suite(node):
 747                     new_nodes.append(node)
 748                 break
 749             try:
 750                 c = node.children
 751             except AttributeError:
 752                 pass
 753             else:
 754                 # This case basically appears with error recovery of one line
 755                 # suites like `def foo(): bar.-`. In this case we might not
 756                 # include a newline in the statement and we need to take care
 757                 # of that.
 758                 n = node
 759                 if n.type == 'decorated':
 760                     n = n.children[-1]
 761                 if n.type in ('async_funcdef', 'async_stmt'):
 762                     n = n.children[-1]
 763                 if n.type in ('classdef', 'funcdef'):
 764                     suite_node = n.children[-1]
 765                 else:
 766                     suite_node = c[-1]
 767
 768                 if suite_node.type in ('error_leaf', 'error_node'):
 769                     break
 770
 771             new_nodes.append(node)
 772
 773         # Pop error nodes at the end from the list
 774         if new_nodes:
 775             while new_nodes:
 776                 last_node = new_nodes[-1]
 777                 if (last_node.type in ('error_leaf', 'error_node')
 778                         or _is_flow_node(new_nodes[-1])):
 779                     # Error leafs/nodes don't have a defined start/end. Error
 780                     # nodes might not end with a newline (e.g. if there's an
 781                     # open `(`). Therefore ignore all of them unless they are
 782                     # succeeded with valid parser state.
 783                     # If we copy flows at the end, they might be continued
 784                     # after the copy limit (in the new parser).
 785                     # In this while loop we try to remove until we find a newline.
 786                     new_prefix = ''
 787                     new_nodes.pop()
 788                     while new_nodes:
 789                         last_node = new_nodes[-1]
 790                         if last_node.get_last_leaf().type == 'newline':
 791                             break
 792                         new_nodes.pop()
 793                     continue
 794                 if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':
 795                     # The problem here is that Parso error recovery sometimes
 796                     # influences nodes before this node.
 797                     # Since the new last node is an error node this will get
 798                     # cleaned up in the next while iteration.
 799                     new_nodes.pop()
 800                     continue
 801                 break
 802
 803         if not new_nodes:
 804             return [], working_stack, prefix, added_indents
 805
 806         tos = working_stack[-1]
 807         last_node = new_nodes[-1]
 808         had_valid_suite_last = False
 809         # Pop incomplete suites from the list
 810         if _func_or_class_has_suite(last_node):
 811             suite = last_node
 812             while suite.type != 'suite':
 813                 suite = suite.children[-1]
 814
 815             indent = _get_suite_indentation(suite)
 816             added_indents.append(indent)
 817
 818             suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node))
 819             # Don't need to pass line_offset here, it's already done by the
 820             # parent.
 821             suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(
 822                 working_stack + [suite_tos], suite.children, until_line, line_offset,
 823                 is_nested=True,
 824             )
 825             added_indents += ai
 826             if len(suite_nodes) < 2:
 827                 # A suite only with newline is not valid.
 828                 new_nodes.pop()
 829                 new_prefix = ''
 830             else:
 831                 assert new_nodes
 832                 tos.add_child_node(suite_tos)
 833                 working_stack = new_working_stack
 834                 had_valid_suite_last = True
 835
 836         if new_nodes:
 837             if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
 838                 p = new_nodes[-1].get_next_leaf().prefix
 839                 # We are not allowed to remove the newline at the end of the
 840                 # line, otherwise it's going to be missing. This happens e.g.
 841                 # if a bracket is around before that moves newlines to
 842                 # prefixes.
 843                 new_prefix = split_lines(p, keepends=True)[0]
 844
 845             if had_valid_suite_last:
 846                 last = new_nodes[-1]
 847                 if last.type == 'decorated':
 848                     last = last.children[-1]
 849                 if last.type in ('async_funcdef', 'async_stmt'):
 850                     last = last.children[-1]
 851                 last_line_offset_leaf = last.children[-2].get_last_leaf()
 852                 assert last_line_offset_leaf == ':'
 853             else:
 854                 last_line_offset_leaf = new_nodes[-1].get_last_leaf()
 855             tos.add_tree_nodes(
 856                 prefix, new_nodes, line_offset, last_line_offset_leaf,
 857             )
 858             prefix = new_prefix
 859             self._prefix_remainder = ''
 860
 861         return new_nodes, working_stack, prefix, added_indents
 862
 863     def close(self):
 864         self._base_node.finish()
 865
 866         # Add an endmarker.
 867         try:
 868             last_leaf = self._module.get_last_leaf()
 869         except IndexError:
 870             end_pos = [1, 0]
 871         else:
 872             last_leaf = _skip_dedent_error_leaves(last_leaf)
 873             end_pos = list(last_leaf.end_pos)
 874         lines = split_lines(self.prefix)
 875         assert len(lines) > 0
 876         if len(lines) == 1:
 877             if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
 878                 end_pos[1] -= 1
 879             end_pos[1] += len(lines[0])
 880         else:
 881             end_pos[0] += len(lines) - 1
 882             end_pos[1] = len(lines[-1])
 883
 884         endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
 885         endmarker.parent = self._module
 886         self._module.children.append(endmarker)