2 The diff parser is trying to be a faster version of the normal parser by trying
3 to reuse the nodes of a previous pass over the same file. This is also called
4 incremental parsing in parser literature. The difference is mostly that with
5 incremental parsing you get a range that needs to be reparsed. Here we
6 calculate that range ourselves by using difflib. After that it's essentially
9 The biggest issue of this approach is that we reuse nodes in a mutable way. The
10 intial design and idea is quite problematic for this parser, but it is also
11 pretty fast. Measurements showed that just copying nodes in Python is simply
12 quite a bit slower (especially for big files >3 kLOC). Therefore we did not
13 want to get rid of the mutable nodes, since this is usually not an issue.
15 This is by far the hardest software I ever wrote, exactly because the initial
16 design is crappy. When you have to account for a lot of mutable state, it
17 creates a ton of issues that you would otherwise not have. This file took
18 probably 3-6 months to write, which is insane for a parser.
20 There is a fuzzer in that helps test this whole thing. Please use it if you
21 make changes here. If you run the fuzzer like::
23 test/fuzz_diff_parser.py random -n 100000
25 you can be pretty sure that everything is still fine. I sometimes run the
26 fuzzer up to 24h to make sure everything is still ok.
30 from collections
import namedtuple
33 from parso
.utils
import split_lines
34 from parso
.python
.parser
import Parser
35 from parso
.python
.tree
import EndMarker
36 from parso
.python
.tokenize
import PythonToken
, BOM_UTF8_STRING
37 from parso
.python
.token
import PythonTokenTypes
39 LOG
= logging
.getLogger(__name__
)
40 DEBUG_DIFF_PARSER
= False
42 _INDENTATION_TOKENS
= 'INDENT', 'ERROR_DEDENT', 'DEDENT'
44 NEWLINE
= PythonTokenTypes
.NEWLINE
45 DEDENT
= PythonTokenTypes
.DEDENT
46 NAME
= PythonTokenTypes
.NAME
47 ERROR_DEDENT
= PythonTokenTypes
.ERROR_DEDENT
48 ENDMARKER
= PythonTokenTypes
.ENDMARKER
51 def _is_indentation_error_leaf(node
):
52 return node
.type == 'error_leaf' and node
.token_type
in _INDENTATION_TOKENS
55 def _get_previous_leaf_if_indentation(leaf
):
56 while leaf
and _is_indentation_error_leaf(leaf
):
57 leaf
= leaf
.get_previous_leaf()
61 def _get_next_leaf_if_indentation(leaf
):
62 while leaf
and _is_indentation_error_leaf(leaf
):
63 leaf
= leaf
.get_next_leaf()
67 def _get_suite_indentation(tree_node
):
68 return _get_indentation(tree_node
.children
[1])
71 def _get_indentation(tree_node
):
72 return tree_node
.start_pos
[1]
75 def _assert_valid_graph(node
):
77 Checks if the parent/children relationship is correct.
79 This is a check that only runs during debugging/testing.
82 children
= node
.children
83 except AttributeError:
84 # Ignore INDENT is necessary, because indent/dedent tokens don't
85 # contain value/prefix and are just around, because of the tokenizer.
86 if node
.type == 'error_leaf' and node
.token_type
in _INDENTATION_TOKENS
:
88 assert not node
.prefix
91 # Calculate the content between two start positions.
92 previous_leaf
= _get_previous_leaf_if_indentation(node
.get_previous_leaf())
93 if previous_leaf
is None:
95 previous_start_pos
= 1, 0
97 assert previous_leaf
.end_pos
<= node
.start_pos
, \
100 content
= previous_leaf
.value
+ node
.prefix
101 previous_start_pos
= previous_leaf
.start_pos
103 if '\n' in content
or '\r' in content
:
104 splitted
= split_lines(content
)
105 line
= previous_start_pos
[0] + len(splitted
) - 1
106 actual
= line
, len(splitted
[-1])
108 actual
= previous_start_pos
[0], previous_start_pos
[1] + len(content
)
109 if content
.startswith(BOM_UTF8_STRING
) \
110 and node
.get_start_pos_of_prefix() == (1, 0):
111 # Remove the byte order mark
112 actual
= actual
[0], actual
[1] - 1
114 assert node
.start_pos
== actual
, (node
.start_pos
, actual
)
116 for child
in children
:
117 assert child
.parent
== node
, (node
, child
)
118 _assert_valid_graph(child
)
121 def _assert_nodes_are_equal(node1
, node2
):
123 children1
= node1
.children
124 except AttributeError:
125 assert not hasattr(node2
, 'children'), (node1
, node2
)
126 assert node1
.value
== node2
.value
, (node1
, node2
)
127 assert node1
.type == node2
.type, (node1
, node2
)
128 assert node1
.prefix
== node2
.prefix
, (node1
, node2
)
129 assert node1
.start_pos
== node2
.start_pos
, (node1
, node2
)
133 children2
= node2
.children
134 except AttributeError:
135 assert False, (node1
, node2
)
136 for n1
, n2
in zip(children1
, children2
):
137 _assert_nodes_are_equal(n1
, n2
)
138 assert len(children1
) == len(children2
), '\n' + repr(children1
) + '\n' + repr(children2
)
141 def _get_debug_error_message(module
, old_lines
, new_lines
):
142 current_lines
= split_lines(module
.get_code(), keepends
=True)
143 current_diff
= difflib
.unified_diff(new_lines
, current_lines
)
144 old_new_diff
= difflib
.unified_diff(old_lines
, new_lines
)
147 "There's an issue with the diff parser. Please "
148 "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
149 % (parso
.__version
__, ''.join(old_new_diff
), ''.join(current_diff
))
153 def _get_last_line(node_or_leaf
):
154 last_leaf
= node_or_leaf
.get_last_leaf()
155 if _ends_with_newline(last_leaf
):
156 return last_leaf
.start_pos
[0]
158 n
= last_leaf
.get_next_leaf()
159 if n
.type == 'endmarker' and '\n' in n
.prefix
:
160 # This is a very special case and has to do with error recovery in
161 # Parso. The problem is basically that there's no newline leaf at
162 # the end sometimes (it's required in the grammar, but not needed
163 # actually before endmarker, CPython just adds a newline to make
164 # source code pass the parser, to account for that Parso error
165 # recovery allows small_stmt instead of simple_stmt).
166 return last_leaf
.end_pos
[0] + 1
167 return last_leaf
.end_pos
[0]
170 def _skip_dedent_error_leaves(leaf
):
171 while leaf
is not None and leaf
.type == 'error_leaf' and leaf
.token_type
== 'DEDENT':
172 leaf
= leaf
.get_previous_leaf()
176 def _ends_with_newline(leaf
, suffix
=''):
177 leaf
= _skip_dedent_error_leaves(leaf
)
179 if leaf
.type == 'error_leaf':
180 typ
= leaf
.token_type
.lower()
184 return typ
== 'newline' or suffix
.endswith('\n') or suffix
.endswith('\r')
187 def _flows_finished(pgen_grammar
, stack
):
189 if, while, for and try might not be finished, because another part might
192 for stack_node
in stack
:
193 if stack_node
.nonterminal
in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
198 def _func_or_class_has_suite(node
):
199 if node
.type == 'decorated':
200 node
= node
.children
[-1]
201 if node
.type in ('async_funcdef', 'async_stmt'):
202 node
= node
.children
[-1]
203 return node
.type in ('classdef', 'funcdef') and node
.children
[-1].type == 'suite'
206 def _suite_or_file_input_is_valid(pgen_grammar
, stack
):
207 if not _flows_finished(pgen_grammar
, stack
):
210 for stack_node
in reversed(stack
):
211 if stack_node
.nonterminal
== 'decorator':
212 # A decorator is only valid with the upcoming function.
215 if stack_node
.nonterminal
== 'suite':
216 # If only newline is in the suite, the suite is not valid, yet.
217 return len(stack_node
.nodes
) > 1
218 # Not reaching a suite means that we're dealing with file_input levels
219 # where there's no need for a valid statement in it. It can also be empty.
223 def _is_flow_node(node
):
224 if node
.type == 'async_stmt':
225 node
= node
.children
[1]
227 value
= node
.children
[0].value
228 except AttributeError:
230 return value
in ('if', 'for', 'while', 'try', 'with')
233 class _PositionUpdatingFinished(Exception):
237 def _update_positions(nodes
, line_offset
, last_leaf
):
240 children
= node
.children
241 except AttributeError:
243 node
.line
+= line_offset
244 if node
is last_leaf
:
245 raise _PositionUpdatingFinished
247 _update_positions(children
, line_offset
, last_leaf
)
252 An advanced form of parsing a file faster. Unfortunately comes with huge
253 side effects. It changes the given module.
255 def __init__(self
, pgen_grammar
, tokenizer
, module
):
256 self
._pgen
_grammar
= pgen_grammar
257 self
._tokenizer
= tokenizer
258 self
._module
= module
262 self
._parser
_count
= 0
264 self
._nodes
_tree
= _NodesTree(self
._module
)
266 def update(self
, old_lines
, new_lines
):
268 The algorithm works as follows:
271 - Assure that the start is a newline, otherwise parse until we get
273 - Copy from parsed_until_line + 1 to max(i2 + 1)
274 - Make sure that the indentation is correct (e.g. add DEDENT)
275 - Add old and change positions
277 - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
280 Returns the new module node.
282 LOG
.debug('diff parser start')
283 # Reset the used names cache so they get regenerated.
284 self
._module
._used
_names
= None
286 self
._parser
_lines
_new
= new_lines
290 line_length
= len(new_lines
)
291 sm
= difflib
.SequenceMatcher(None, old_lines
, self
._parser
_lines
_new
)
292 opcodes
= sm
.get_opcodes()
293 LOG
.debug('line_lengths old: %s; new: %s' % (len(old_lines
), line_length
))
295 for operation
, i1
, i2
, j1
, j2
in opcodes
:
296 LOG
.debug('-> code[%s] old[%s:%s] new[%s:%s]',
297 operation
, i1
+ 1, i2
, j1
+ 1, j2
)
299 if j2
== line_length
and new_lines
[-1] == '':
300 # The empty part after the last newline is not relevant.
303 if operation
== 'equal':
304 line_offset
= j1
- i1
305 self
._copy
_from
_old
_parser
(line_offset
, i1
+ 1, i2
, j2
)
306 elif operation
== 'replace':
307 self
._parse
(until_line
=j2
)
308 elif operation
== 'insert':
309 self
._parse
(until_line
=j2
)
311 assert operation
== 'delete'
313 # With this action all change will finally be applied and we have a
315 self
._nodes
_tree
.close()
317 if DEBUG_DIFF_PARSER
:
318 # If there is reasonable suspicion that the diff parser is not
319 # behaving well, this should be enabled.
321 code
= ''.join(new_lines
)
322 assert self
._module
.get_code() == code
323 _assert_valid_graph(self
._module
)
324 without_diff_parser_module
= Parser(
327 ).parse(self
._tokenizer
(new_lines
))
328 _assert_nodes_are_equal(self
._module
, without_diff_parser_module
)
329 except AssertionError:
330 print(_get_debug_error_message(self
._module
, old_lines
, new_lines
))
333 last_pos
= self
._module
.end_pos
[0]
334 if last_pos
!= line_length
:
336 ('(%s != %s) ' % (last_pos
, line_length
))
337 + _get_debug_error_message(self
._module
, old_lines
, new_lines
)
339 LOG
.debug('diff parser end')
342 def _enabled_debugging(self
, old_lines
, lines_new
):
343 if self
._module
.get_code() != ''.join(lines_new
):
344 LOG
.warning('parser issue:\n%s\n%s', ''.join(old_lines
), ''.join(lines_new
))
346 def _copy_from_old_parser(self
, line_offset
, start_line_old
, until_line_old
, until_line_new
):
348 while until_line_new
> self
._nodes
_tree
.parsed_until_line
:
349 parsed_until_line_old
= self
._nodes
_tree
.parsed_until_line
- line_offset
350 line_stmt
= self
._get
_old
_line
_stmt
(parsed_until_line_old
+ 1)
351 if line_stmt
is None:
352 # Parse 1 line at least. We don't need more, because we just
353 # want to get into a state where the old parser has statements
354 # again that can be copied (e.g. not lines within parentheses).
355 self
._parse
(self
._nodes
_tree
.parsed_until_line
+ 1)
357 p_children
= line_stmt
.parent
.children
358 index
= p_children
.index(line_stmt
)
360 if start_line_old
== 1 \
361 and p_children
[0].get_first_leaf().prefix
.startswith(BOM_UTF8_STRING
):
362 # If there's a BOM in the beginning, just reparse. It's too
363 # complicated to account for it otherwise.
366 from_
= self
._nodes
_tree
.parsed_until_line
+ 1
367 copied_nodes
= self
._nodes
_tree
.copy_nodes(
372 # Match all the nodes that are in the wanted range.
374 self
._copy
_count
+= 1
376 to
= self
._nodes
_tree
.parsed_until_line
378 LOG
.debug('copy old[%s:%s] new[%s:%s]',
379 copied_nodes
[0].start_pos
[0],
380 copied_nodes
[-1].end_pos
[0] - 1, from_
, to
)
382 # We have copied as much as possible (but definitely not too
383 # much). Therefore we just parse a bit more.
384 self
._parse
(self
._nodes
_tree
.parsed_until_line
+ 1)
385 # Since there are potential bugs that might loop here endlessly, we
387 assert last_until_line
!= self
._nodes
_tree
.parsed_until_line
, last_until_line
388 last_until_line
= self
._nodes
_tree
.parsed_until_line
390 def _get_old_line_stmt(self
, old_line
):
391 leaf
= self
._module
.get_leaf_for_position((old_line
, 0), include_prefixes
=True)
393 if _ends_with_newline(leaf
):
394 leaf
= leaf
.get_next_leaf()
395 if leaf
.get_start_pos_of_prefix()[0] == old_line
:
397 while node
.parent
.type not in ('file_input', 'suite'):
400 # Make sure that if only the `else:` line of an if statement is
401 # copied that not the whole thing is going to be copied.
402 if node
.start_pos
[0] >= old_line
:
404 # Must be on the same line. Otherwise we need to parse that bit.
407 def _parse(self
, until_line
):
409 Parses at least until the given line, but might just parse more until a
410 valid state is reached.
413 while until_line
> self
._nodes
_tree
.parsed_until_line
:
414 node
= self
._try
_parse
_part
(until_line
)
415 nodes
= node
.children
417 self
._nodes
_tree
.add_parsed_nodes(nodes
, self
._keyword
_token
_indents
)
418 if self
._replace
_tos
_indent
is not None:
419 self
._nodes
_tree
.indents
[-1] = self
._replace
_tos
_indent
422 'parse_part from %s to %s (to %s in part parser)',
423 nodes
[0].get_start_pos_of_prefix()[0],
424 self
._nodes
_tree
.parsed_until_line
,
427 # Since the tokenizer sometimes has bugs, we cannot be sure that
428 # this loop terminates. Therefore assert that there's always a
430 assert last_until_line
!= self
._nodes
_tree
.parsed_until_line
, last_until_line
431 last_until_line
= self
._nodes
_tree
.parsed_until_line
433 def _try_parse_part(self
, until_line
):
435 Sets up a normal parser that uses a spezialized tokenizer to only parse
436 until a certain position (or a bit longer if the statement hasn't
439 self
._parser
_count
+= 1
440 # TODO speed up, shouldn't copy the whole list all the time.
442 parsed_until_line
= self
._nodes
_tree
.parsed_until_line
443 lines_after
= self
._parser
_lines
_new
[parsed_until_line
:]
444 tokens
= self
._diff
_tokenize
(
447 line_offset
=parsed_until_line
449 self
._active
_parser
= Parser(
453 return self
._active
_parser
.parse(tokens
=tokens
)
455 def _diff_tokenize(self
, lines
, until_line
, line_offset
=0):
457 indents
= self
._nodes
_tree
.indents
458 initial_indentation_count
= len(indents
)
460 tokens
= self
._tokenizer
(
462 start_pos
=(line_offset
+ 1, 0),
464 is_first_token
=line_offset
== 0,
466 stack
= self
._active
_parser
.stack
467 self
._replace
_tos
_indent
= None
468 self
._keyword
_token
_indents
= {}
469 # print('start', line_offset + 1, indents)
471 # print(token, indents)
474 if len(indents
) < initial_indentation_count
:
475 # We are done here, only thing that can come now is an
476 # endmarker or another dedented code block.
478 typ
, string
, start_pos
, prefix
= token
= next(tokens
)
479 if typ
in (DEDENT
, ERROR_DEDENT
):
480 if typ
== ERROR_DEDENT
:
481 # We want to force an error dedent in the next
482 # parser/pass. To make this possible we just
483 # increase the location by one.
484 self
._replace
_tos
_indent
= start_pos
[1] + 1
489 if '\n' in prefix
or '\r' in prefix
:
490 prefix
= re
.sub(r
'[^\n\r]+\Z', '', prefix
)
492 assert start_pos
[1] >= len(prefix
), repr(prefix
)
493 if start_pos
[1] - len(prefix
) == 0:
501 elif typ
== NEWLINE
and token
.start_pos
[0] >= until_line
:
505 if len(indents
) == initial_indentation_count
:
506 # Check if the parser is actually in a valid suite state.
507 if _suite_or_file_input_is_valid(self
._pgen
_grammar
, stack
):
508 yield PythonToken(ENDMARKER
, '', token
.start_pos
, '')
511 if typ
== NAME
and token
.string
in ('class', 'def'):
512 self
._keyword
_token
_indents
[token
.start_pos
] = list(indents
)
517 class _NodesTreeNode
:
518 _ChildrenGroup
= namedtuple(
520 'prefix children line_offset last_line_offset_leaf')
522 def __init__(self
, tree_node
, parent
=None, indentation
=0):
523 self
.tree_node
= tree_node
524 self
._children
_groups
= []
526 self
._node
_children
= []
527 self
.indentation
= indentation
531 for prefix
, children_part
, line_offset
, last_line_offset_leaf
in self
._children
_groups
:
532 first_leaf
= _get_next_leaf_if_indentation(
533 children_part
[0].get_first_leaf()
536 first_leaf
.prefix
= prefix
+ first_leaf
.prefix
540 children_part
, line_offset
, last_line_offset_leaf
)
541 except _PositionUpdatingFinished
:
543 children
+= children_part
544 self
.tree_node
.children
= children
546 for node
in children
:
547 node
.parent
= self
.tree_node
549 for node_child
in self
._node
_children
:
552 def add_child_node(self
, child_node
):
553 self
._node
_children
.append(child_node
)
555 def add_tree_nodes(self
, prefix
, children
, line_offset
=0,
556 last_line_offset_leaf
=None):
557 if last_line_offset_leaf
is None:
558 last_line_offset_leaf
= children
[-1].get_last_leaf()
559 group
= self
._ChildrenGroup
(
560 prefix
, children
, line_offset
, last_line_offset_leaf
562 self
._children
_groups
.append(group
)
564 def get_last_line(self
, suffix
):
566 if self
._children
_groups
:
567 children_group
= self
._children
_groups
[-1]
568 last_leaf
= _get_previous_leaf_if_indentation(
569 children_group
.last_line_offset_leaf
572 line
= last_leaf
.end_pos
[0] + children_group
.line_offset
574 # Newlines end on the next line, which means that they would cover
575 # the next line. That line is not fully parsed at this point.
576 if _ends_with_newline(last_leaf
, suffix
):
578 line
+= len(split_lines(suffix
)) - 1
580 if suffix
and not suffix
.endswith('\n') and not suffix
.endswith('\r'):
581 # This is the end of a file (that doesn't end with a newline).
584 if self
._node
_children
:
585 return max(line
, self
._node
_children
[-1].get_last_line(suffix
))
589 return '<%s: %s>' % (self
.__class
__.__name
__, self
.tree_node
)
593 def __init__(self
, module
):
594 self
._base
_node
= _NodesTreeNode(module
)
595 self
._working
_stack
= [self
._base
_node
]
596 self
._module
= module
597 self
._prefix
_remainder
= ''
602 def parsed_until_line(self
):
603 return self
._working
_stack
[-1].get_last_line(self
.prefix
)
605 def _update_insertion_node(self
, indentation
):
606 for node
in reversed(list(self
._working
_stack
)):
607 if node
.indentation
< indentation
or node
is self
._working
_stack
[0]:
609 self
._working
_stack
.pop()
611 def add_parsed_nodes(self
, tree_nodes
, keyword_token_indents
):
612 old_prefix
= self
.prefix
613 tree_nodes
= self
._remove
_endmarker
(tree_nodes
)
615 self
.prefix
= old_prefix
+ self
.prefix
618 assert tree_nodes
[0].type != 'newline'
620 node
= self
._update
_insertion
_node
(tree_nodes
[0].start_pos
[1])
621 assert node
.tree_node
.type in ('suite', 'file_input')
622 node
.add_tree_nodes(old_prefix
, tree_nodes
)
624 self
._update
_parsed
_node
_tos
(tree_nodes
[-1], keyword_token_indents
)
626 def _update_parsed_node_tos(self
, tree_node
, keyword_token_indents
):
627 if tree_node
.type == 'suite':
628 def_leaf
= tree_node
.parent
.children
[0]
629 new_tos
= _NodesTreeNode(
631 indentation
=keyword_token_indents
[def_leaf
.start_pos
][-1],
633 new_tos
.add_tree_nodes('', list(tree_node
.children
))
635 self
._working
_stack
[-1].add_child_node(new_tos
)
636 self
._working
_stack
.append(new_tos
)
638 self
._update
_parsed
_node
_tos
(tree_node
.children
[-1], keyword_token_indents
)
639 elif _func_or_class_has_suite(tree_node
):
640 self
._update
_parsed
_node
_tos
(tree_node
.children
[-1], keyword_token_indents
)
642 def _remove_endmarker(self
, tree_nodes
):
644 Helps cleaning up the tree nodes that get inserted.
646 last_leaf
= tree_nodes
[-1].get_last_leaf()
647 is_endmarker
= last_leaf
.type == 'endmarker'
648 self
._prefix
_remainder
= ''
650 prefix
= last_leaf
.prefix
651 separation
= max(prefix
.rfind('\n'), prefix
.rfind('\r'))
653 # Remove the whitespace part of the prefix after a newline.
654 # That is not relevant if parentheses were opened. Always parse
655 # until the end of a line.
656 last_leaf
.prefix
, self
._prefix
_remainder
= \
657 last_leaf
.prefix
[:separation
+ 1], last_leaf
.prefix
[separation
+ 1:]
662 self
.prefix
= last_leaf
.prefix
664 tree_nodes
= tree_nodes
[:-1]
667 def _get_matching_indent_nodes(self
, tree_nodes
, is_new_suite
):
668 # There might be a random dedent where we have to stop copying.
669 # Invalid indents are ok, because the parser handled that
670 # properly before. An invalid dedent can happen, because a few
671 # lines above there was an invalid indent.
672 node_iterator
= iter(tree_nodes
)
674 yield next(node_iterator
)
676 first_node
= next(node_iterator
)
677 indent
= _get_indentation(first_node
)
678 if not is_new_suite
and indent
not in self
.indents
:
682 for n
in node_iterator
:
683 if _get_indentation(n
) != indent
:
687 def copy_nodes(self
, tree_nodes
, until_line
, line_offset
):
689 Copies tree nodes from the old parser tree.
691 Returns the number of tree nodes that were copied.
693 if tree_nodes
[0].type in ('error_leaf', 'error_node'):
694 # Avoid copying errors in the beginning. Can lead to a lot of
698 indentation
= _get_indentation(tree_nodes
[0])
699 old_working_stack
= list(self
._working
_stack
)
700 old_prefix
= self
.prefix
701 old_indents
= self
.indents
702 self
.indents
= [i
for i
in self
.indents
if i
<= indentation
]
704 self
._update
_insertion
_node
(indentation
)
706 new_nodes
, self
._working
_stack
, self
.prefix
, added_indents
= self
._copy
_nodes
(
707 list(self
._working
_stack
),
714 self
.indents
+= added_indents
716 self
._working
_stack
= old_working_stack
717 self
.prefix
= old_prefix
718 self
.indents
= old_indents
721 def _copy_nodes(self
, working_stack
, nodes
, until_line
, line_offset
,
722 prefix
='', is_nested
=False):
726 nodes
= list(self
._get
_matching
_indent
_nodes
(
728 is_new_suite
=is_nested
,
733 if node
.start_pos
[0] > until_line
:
736 if node
.type == 'endmarker':
739 if node
.type == 'error_leaf' and node
.token_type
in ('DEDENT', 'ERROR_DEDENT'):
741 # TODO this check might take a bit of time for large files. We
742 # might want to change this to do more intelligent guessing or
744 if _get_last_line(node
) > until_line
:
745 # We can split up functions and classes later.
746 if _func_or_class_has_suite(node
):
747 new_nodes
.append(node
)
751 except AttributeError:
754 # This case basically appears with error recovery of one line
755 # suites like `def foo(): bar.-`. In this case we might not
756 # include a newline in the statement and we need to take care
759 if n
.type == 'decorated':
761 if n
.type in ('async_funcdef', 'async_stmt'):
763 if n
.type in ('classdef', 'funcdef'):
764 suite_node
= n
.children
[-1]
768 if suite_node
.type in ('error_leaf', 'error_node'):
771 new_nodes
.append(node
)
773 # Pop error nodes at the end from the list
776 last_node
= new_nodes
[-1]
777 if (last_node
.type in ('error_leaf', 'error_node')
778 or _is_flow_node(new_nodes
[-1])):
779 # Error leafs/nodes don't have a defined start/end. Error
780 # nodes might not end with a newline (e.g. if there's an
781 # open `(`). Therefore ignore all of them unless they are
782 # succeeded with valid parser state.
783 # If we copy flows at the end, they might be continued
784 # after the copy limit (in the new parser).
785 # In this while loop we try to remove until we find a newline.
789 last_node
= new_nodes
[-1]
790 if last_node
.get_last_leaf().type == 'newline':
794 if len(new_nodes
) > 1 and new_nodes
[-2].type == 'error_node':
795 # The problem here is that Parso error recovery sometimes
796 # influences nodes before this node.
797 # Since the new last node is an error node this will get
798 # cleaned up in the next while iteration.
804 return [], working_stack
, prefix
, added_indents
806 tos
= working_stack
[-1]
807 last_node
= new_nodes
[-1]
808 had_valid_suite_last
= False
809 # Pop incomplete suites from the list
810 if _func_or_class_has_suite(last_node
):
812 while suite
.type != 'suite':
813 suite
= suite
.children
[-1]
815 indent
= _get_suite_indentation(suite
)
816 added_indents
.append(indent
)
818 suite_tos
= _NodesTreeNode(suite
, indentation
=_get_indentation(last_node
))
819 # Don't need to pass line_offset here, it's already done by the
821 suite_nodes
, new_working_stack
, new_prefix
, ai
= self
._copy
_nodes
(
822 working_stack
+ [suite_tos
], suite
.children
, until_line
, line_offset
,
826 if len(suite_nodes
) < 2:
827 # A suite only with newline is not valid.
832 tos
.add_child_node(suite_tos
)
833 working_stack
= new_working_stack
834 had_valid_suite_last
= True
837 if not _ends_with_newline(new_nodes
[-1].get_last_leaf()) and not had_valid_suite_last
:
838 p
= new_nodes
[-1].get_next_leaf().prefix
839 # We are not allowed to remove the newline at the end of the
840 # line, otherwise it's going to be missing. This happens e.g.
841 # if a bracket is around before that moves newlines to
843 new_prefix
= split_lines(p
, keepends
=True)[0]
845 if had_valid_suite_last
:
847 if last
.type == 'decorated':
848 last
= last
.children
[-1]
849 if last
.type in ('async_funcdef', 'async_stmt'):
850 last
= last
.children
[-1]
851 last_line_offset_leaf
= last
.children
[-2].get_last_leaf()
852 assert last_line_offset_leaf
== ':'
854 last_line_offset_leaf
= new_nodes
[-1].get_last_leaf()
856 prefix
, new_nodes
, line_offset
, last_line_offset_leaf
,
859 self
._prefix
_remainder
= ''
861 return new_nodes
, working_stack
, prefix
, added_indents
864 self
._base
_node
.finish()
868 last_leaf
= self
._module
.get_last_leaf()
872 last_leaf
= _skip_dedent_error_leaves(last_leaf
)
873 end_pos
= list(last_leaf
.end_pos
)
874 lines
= split_lines(self
.prefix
)
875 assert len(lines
) > 0
877 if lines
[0].startswith(BOM_UTF8_STRING
) and end_pos
== [1, 0]:
879 end_pos
[1] += len(lines
[0])
881 end_pos
[0] += len(lines
) - 1
882 end_pos
[1] = len(lines
[-1])
884 endmarker
= EndMarker('', tuple(end_pos
), self
.prefix
+ self
._prefix
_remainder
)
885 endmarker
.parent
= self
._module
886 self
._module
.children
.append(endmarker
)