crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/parso/python/diff.py

... / ...

Commit	Line	Data
	1	"""
	2	The diff parser is trying to be a faster version of the normal parser by trying
	3	to reuse the nodes of a previous pass over the same file. This is also called
	4	incremental parsing in parser literature. The difference is mostly that with
	5	incremental parsing you get a range that needs to be reparsed. Here we
	6	calculate that range ourselves by using difflib. After that it's essentially
	7	incremental parsing.
	8
	9	The biggest issue of this approach is that we reuse nodes in a mutable way. The
	10	intial design and idea is quite problematic for this parser, but it is also
	11	pretty fast. Measurements showed that just copying nodes in Python is simply
	12	quite a bit slower (especially for big files >3 kLOC). Therefore we did not
	13	want to get rid of the mutable nodes, since this is usually not an issue.
	14
	15	This is by far the hardest software I ever wrote, exactly because the initial
	16	design is crappy. When you have to account for a lot of mutable state, it
	17	creates a ton of issues that you would otherwise not have. This file took
	18	probably 3-6 months to write, which is insane for a parser.
	19
	20	There is a fuzzer in that helps test this whole thing. Please use it if you
	21	make changes here. If you run the fuzzer like::
	22
	23	test/fuzz_diff_parser.py random -n 100000
	24
	25	you can be pretty sure that everything is still fine. I sometimes run the
	26	fuzzer up to 24h to make sure everything is still ok.
	27	"""
	28	import re
	29	import difflib
	30	from collections import namedtuple
	31	import logging
	32
	33	from parso.utils import split_lines
	34	from parso.python.parser import Parser
	35	from parso.python.tree import EndMarker
	36	from parso.python.tokenize import PythonToken, BOM_UTF8_STRING
	37	from parso.python.token import PythonTokenTypes
	38
	39	LOG = logging.getLogger(__name__)
	40	DEBUG_DIFF_PARSER = False
	41
	42	_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
	43
	44	NEWLINE = PythonTokenTypes.NEWLINE
	45	DEDENT = PythonTokenTypes.DEDENT
	46	NAME = PythonTokenTypes.NAME
	47	ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
	48	ENDMARKER = PythonTokenTypes.ENDMARKER
	49
	50
	51	def _is_indentation_error_leaf(node):
	52	return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS
	53
	54
	55	def _get_previous_leaf_if_indentation(leaf):
	56	while leaf and _is_indentation_error_leaf(leaf):
	57	leaf = leaf.get_previous_leaf()
	58	return leaf
	59
	60
	61	def _get_next_leaf_if_indentation(leaf):
	62	while leaf and _is_indentation_error_leaf(leaf):
	63	leaf = leaf.get_next_leaf()
	64	return leaf
	65
	66
	67	def _get_suite_indentation(tree_node):
	68	return _get_indentation(tree_node.children[1])
	69
	70
	71	def _get_indentation(tree_node):
	72	return tree_node.start_pos[1]
	73
	74
	75	def _assert_valid_graph(node):
	76	"""
	77	Checks if the parent/children relationship is correct.
	78
	79	This is a check that only runs during debugging/testing.
	80	"""
	81	try:
	82	children = node.children
	83	except AttributeError:
	84	# Ignore INDENT is necessary, because indent/dedent tokens don't
	85	# contain value/prefix and are just around, because of the tokenizer.
	86	if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
	87	assert not node.value
	88	assert not node.prefix
	89	return
	90
	91	# Calculate the content between two start positions.
	92	previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
	93	if previous_leaf is None:
	94	content = node.prefix
	95	previous_start_pos = 1, 0
	96	else:
	97	assert previous_leaf.end_pos <= node.start_pos, \
	98	(previous_leaf, node)
	99
	100	content = previous_leaf.value + node.prefix
	101	previous_start_pos = previous_leaf.start_pos
	102
	103	if '\n' in content or '\r' in content:
	104	splitted = split_lines(content)
	105	line = previous_start_pos[0] + len(splitted) - 1
	106	actual = line, len(splitted[-1])
	107	else:
	108	actual = previous_start_pos[0], previous_start_pos[1] + len(content)
	109	if content.startswith(BOM_UTF8_STRING) \
	110	and node.get_start_pos_of_prefix() == (1, 0):
	111	# Remove the byte order mark
	112	actual = actual[0], actual[1] - 1
	113
	114	assert node.start_pos == actual, (node.start_pos, actual)
	115	else:
	116	for child in children:
	117	assert child.parent == node, (node, child)
	118	_assert_valid_graph(child)
	119
	120
	121	def _assert_nodes_are_equal(node1, node2):
	122	try:
	123	children1 = node1.children
	124	except AttributeError:
	125	assert not hasattr(node2, 'children'), (node1, node2)
	126	assert node1.value == node2.value, (node1, node2)
	127	assert node1.type == node2.type, (node1, node2)
	128	assert node1.prefix == node2.prefix, (node1, node2)
	129	assert node1.start_pos == node2.start_pos, (node1, node2)
	130	return
	131	else:
	132	try:
	133	children2 = node2.children
	134	except AttributeError:
	135	assert False, (node1, node2)
	136	for n1, n2 in zip(children1, children2):
	137	_assert_nodes_are_equal(n1, n2)
	138	assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2)
	139
	140
	141	def _get_debug_error_message(module, old_lines, new_lines):
	142	current_lines = split_lines(module.get_code(), keepends=True)
	143	current_diff = difflib.unified_diff(new_lines, current_lines)
	144	old_new_diff = difflib.unified_diff(old_lines, new_lines)
	145	import parso
	146	return (
	147	"There's an issue with the diff parser. Please "
	148	"report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
	149	% (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
	150	)
	151
	152
	153	def _get_last_line(node_or_leaf):
	154	last_leaf = node_or_leaf.get_last_leaf()
	155	if _ends_with_newline(last_leaf):
	156	return last_leaf.start_pos[0]
	157	else:
	158	n = last_leaf.get_next_leaf()
	159	if n.type == 'endmarker' and '\n' in n.prefix:
	160	# This is a very special case and has to do with error recovery in
	161	# Parso. The problem is basically that there's no newline leaf at
	162	# the end sometimes (it's required in the grammar, but not needed
	163	# actually before endmarker, CPython just adds a newline to make
	164	# source code pass the parser, to account for that Parso error
	165	# recovery allows small_stmt instead of simple_stmt).
	166	return last_leaf.end_pos[0] + 1
	167	return last_leaf.end_pos[0]
	168
	169
	170	def _skip_dedent_error_leaves(leaf):
	171	while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
	172	leaf = leaf.get_previous_leaf()
	173	return leaf
	174
	175
	176	def _ends_with_newline(leaf, suffix=''):
	177	leaf = _skip_dedent_error_leaves(leaf)
	178
	179	if leaf.type == 'error_leaf':
	180	typ = leaf.token_type.lower()
	181	else:
	182	typ = leaf.type
	183
	184	return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
	185
	186
	187	def _flows_finished(pgen_grammar, stack):
	188	"""
	189	if, while, for and try might not be finished, because another part might
	190	still be parsed.
	191	"""
	192	for stack_node in stack:
	193	if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
	194	return False
	195	return True
	196
	197
	198	def _func_or_class_has_suite(node):
	199	if node.type == 'decorated':
	200	node = node.children[-1]
	201	if node.type in ('async_funcdef', 'async_stmt'):
	202	node = node.children[-1]
	203	return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
	204
	205
	206	def _suite_or_file_input_is_valid(pgen_grammar, stack):
	207	if not _flows_finished(pgen_grammar, stack):
	208	return False
	209
	210	for stack_node in reversed(stack):
	211	if stack_node.nonterminal == 'decorator':
	212	# A decorator is only valid with the upcoming function.
	213	return False
	214
	215	if stack_node.nonterminal == 'suite':
	216	# If only newline is in the suite, the suite is not valid, yet.
	217	return len(stack_node.nodes) > 1
	218	# Not reaching a suite means that we're dealing with file_input levels
	219	# where there's no need for a valid statement in it. It can also be empty.
	220	return True
	221
	222
	223	def _is_flow_node(node):
	224	if node.type == 'async_stmt':
	225	node = node.children[1]
	226	try:
	227	value = node.children[0].value
	228	except AttributeError:
	229	return False
	230	return value in ('if', 'for', 'while', 'try', 'with')
	231
	232
	233	class _PositionUpdatingFinished(Exception):
	234	pass
	235
	236
	237	def _update_positions(nodes, line_offset, last_leaf):
	238	for node in nodes:
	239	try:
	240	children = node.children
	241	except AttributeError:
	242	# Is a leaf
	243	node.line += line_offset
	244	if node is last_leaf:
	245	raise _PositionUpdatingFinished
	246	else:
	247	_update_positions(children, line_offset, last_leaf)
	248
	249
	250	class DiffParser:
	251	"""
	252	An advanced form of parsing a file faster. Unfortunately comes with huge
	253	side effects. It changes the given module.
	254	"""
	255	def __init__(self, pgen_grammar, tokenizer, module):
	256	self._pgen_grammar = pgen_grammar
	257	self._tokenizer = tokenizer
	258	self._module = module
	259
	260	def _reset(self):
	261	self._copy_count = 0
	262	self._parser_count = 0
	263
	264	self._nodes_tree = _NodesTree(self._module)
	265
	266	def update(self, old_lines, new_lines):
	267	'''
	268	The algorithm works as follows:
	269
	270	Equal:
	271	- Assure that the start is a newline, otherwise parse until we get
	272	one.
	273	- Copy from parsed_until_line + 1 to max(i2 + 1)
	274	- Make sure that the indentation is correct (e.g. add DEDENT)
	275	- Add old and change positions
	276	Insert:
	277	- Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
	278	much more.
	279
	280	Returns the new module node.
	281	'''
	282	LOG.debug('diff parser start')
	283	# Reset the used names cache so they get regenerated.
	284	self._module._used_names = None
	285
	286	self._parser_lines_new = new_lines
	287
	288	self._reset()
	289
	290	line_length = len(new_lines)
	291	sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
	292	opcodes = sm.get_opcodes()
	293	LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
	294
	295	for operation, i1, i2, j1, j2 in opcodes:
	296	LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
	297	operation, i1 + 1, i2, j1 + 1, j2)
	298
	299	if j2 == line_length and new_lines[-1] == '':
	300	# The empty part after the last newline is not relevant.
	301	j2 -= 1
	302
	303	if operation == 'equal':
	304	line_offset = j1 - i1
	305	self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)
	306	elif operation == 'replace':
	307	self._parse(until_line=j2)
	308	elif operation == 'insert':
	309	self._parse(until_line=j2)
	310	else:
	311	assert operation == 'delete'
	312
	313	# With this action all change will finally be applied and we have a
	314	# changed module.
	315	self._nodes_tree.close()
	316
	317	if DEBUG_DIFF_PARSER:
	318	# If there is reasonable suspicion that the diff parser is not
	319	# behaving well, this should be enabled.
	320	try:
	321	code = ''.join(new_lines)
	322	assert self._module.get_code() == code
	323	_assert_valid_graph(self._module)
	324	without_diff_parser_module = Parser(
	325	self._pgen_grammar,
	326	error_recovery=True
	327	).parse(self._tokenizer(new_lines))
	328	_assert_nodes_are_equal(self._module, without_diff_parser_module)
	329	except AssertionError:
	330	print(_get_debug_error_message(self._module, old_lines, new_lines))
	331	raise
	332
	333	last_pos = self._module.end_pos[0]
	334	if last_pos != line_length:
	335	raise Exception(
	336	('(%s != %s) ' % (last_pos, line_length))
	337	+ _get_debug_error_message(self._module, old_lines, new_lines)
	338	)
	339	LOG.debug('diff parser end')
	340	return self._module
	341
	342	def _enabled_debugging(self, old_lines, lines_new):
	343	if self._module.get_code() != ''.join(lines_new):
	344	LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
	345
	346	def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new):
	347	last_until_line = -1
	348	while until_line_new > self._nodes_tree.parsed_until_line:
	349	parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
	350	line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
	351	if line_stmt is None:
	352	# Parse 1 line at least. We don't need more, because we just
	353	# want to get into a state where the old parser has statements
	354	# again that can be copied (e.g. not lines within parentheses).
	355	self._parse(self._nodes_tree.parsed_until_line + 1)
	356	else:
	357	p_children = line_stmt.parent.children
	358	index = p_children.index(line_stmt)
	359
	360	if start_line_old == 1 \
	361	and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING):
	362	# If there's a BOM in the beginning, just reparse. It's too
	363	# complicated to account for it otherwise.
	364	copied_nodes = []
	365	else:
	366	from_ = self._nodes_tree.parsed_until_line + 1
	367	copied_nodes = self._nodes_tree.copy_nodes(
	368	p_children[index:],
	369	until_line_old,
	370	line_offset
	371	)
	372	# Match all the nodes that are in the wanted range.
	373	if copied_nodes:
	374	self._copy_count += 1
	375
	376	to = self._nodes_tree.parsed_until_line
	377
	378	LOG.debug('copy old[%s:%s] new[%s:%s]',
	379	copied_nodes[0].start_pos[0],
	380	copied_nodes[-1].end_pos[0] - 1, from_, to)
	381	else:
	382	# We have copied as much as possible (but definitely not too
	383	# much). Therefore we just parse a bit more.
	384	self._parse(self._nodes_tree.parsed_until_line + 1)
	385	# Since there are potential bugs that might loop here endlessly, we
	386	# just stop here.
	387	assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
	388	last_until_line = self._nodes_tree.parsed_until_line
	389
	390	def _get_old_line_stmt(self, old_line):
	391	leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
	392
	393	if _ends_with_newline(leaf):
	394	leaf = leaf.get_next_leaf()
	395	if leaf.get_start_pos_of_prefix()[0] == old_line:
	396	node = leaf
	397	while node.parent.type not in ('file_input', 'suite'):
	398	node = node.parent
	399
	400	# Make sure that if only the `else:` line of an if statement is
	401	# copied that not the whole thing is going to be copied.
	402	if node.start_pos[0] >= old_line:
	403	return node
	404	# Must be on the same line. Otherwise we need to parse that bit.
	405	return None
	406
	407	def _parse(self, until_line):
	408	"""
	409	Parses at least until the given line, but might just parse more until a
	410	valid state is reached.
	411	"""
	412	last_until_line = 0
	413	while until_line > self._nodes_tree.parsed_until_line:
	414	node = self._try_parse_part(until_line)
	415	nodes = node.children
	416
	417	self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents)
	418	if self._replace_tos_indent is not None:
	419	self._nodes_tree.indents[-1] = self._replace_tos_indent
	420
	421	LOG.debug(
	422	'parse_part from %s to %s (to %s in part parser)',
	423	nodes[0].get_start_pos_of_prefix()[0],
	424	self._nodes_tree.parsed_until_line,
	425	node.end_pos[0] - 1
	426	)
	427	# Since the tokenizer sometimes has bugs, we cannot be sure that
	428	# this loop terminates. Therefore assert that there's always a
	429	# change.
	430	assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
	431	last_until_line = self._nodes_tree.parsed_until_line
	432
	433	def _try_parse_part(self, until_line):
	434	"""
	435	Sets up a normal parser that uses a spezialized tokenizer to only parse
	436	until a certain position (or a bit longer if the statement hasn't
	437	ended.
	438	"""
	439	self._parser_count += 1
	440	# TODO speed up, shouldn't copy the whole list all the time.
	441	# memoryview?
	442	parsed_until_line = self._nodes_tree.parsed_until_line
	443	lines_after = self._parser_lines_new[parsed_until_line:]
	444	tokens = self._diff_tokenize(
	445	lines_after,
	446	until_line,
	447	line_offset=parsed_until_line
	448	)
	449	self._active_parser = Parser(
	450	self._pgen_grammar,
	451	error_recovery=True
	452	)
	453	return self._active_parser.parse(tokens=tokens)
	454
	455	def _diff_tokenize(self, lines, until_line, line_offset=0):
	456	was_newline = False
	457	indents = self._nodes_tree.indents
	458	initial_indentation_count = len(indents)
	459
	460	tokens = self._tokenizer(
	461	lines,
	462	start_pos=(line_offset + 1, 0),
	463	indents=indents,
	464	is_first_token=line_offset == 0,
	465	)
	466	stack = self._active_parser.stack
	467	self._replace_tos_indent = None
	468	self._keyword_token_indents = {}
	469	# print('start', line_offset + 1, indents)
	470	for token in tokens:
	471	# print(token, indents)
	472	typ = token.type
	473	if typ == DEDENT:
	474	if len(indents) < initial_indentation_count:
	475	# We are done here, only thing that can come now is an
	476	# endmarker or another dedented code block.
	477	while True:
	478	typ, string, start_pos, prefix = token = next(tokens)
	479	if typ in (DEDENT, ERROR_DEDENT):
	480	if typ == ERROR_DEDENT:
	481	# We want to force an error dedent in the next
	482	# parser/pass. To make this possible we just
	483	# increase the location by one.
	484	self._replace_tos_indent = start_pos[1] + 1
	485	pass
	486	else:
	487	break
	488
	489	if '\n' in prefix or '\r' in prefix:
	490	prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
	491	else:
	492	assert start_pos[1] >= len(prefix), repr(prefix)
	493	if start_pos[1] - len(prefix) == 0:
	494	prefix = ''
	495	yield PythonToken(
	496	ENDMARKER, '',
	497	start_pos,
	498	prefix
	499	)
	500	break
	501	elif typ == NEWLINE and token.start_pos[0] >= until_line:
	502	was_newline = True
	503	elif was_newline:
	504	was_newline = False
	505	if len(indents) == initial_indentation_count:
	506	# Check if the parser is actually in a valid suite state.
	507	if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
	508	yield PythonToken(ENDMARKER, '', token.start_pos, '')
	509	break
	510
	511	if typ == NAME and token.string in ('class', 'def'):
	512	self._keyword_token_indents[token.start_pos] = list(indents)
	513
	514	yield token
	515
	516
	517	class _NodesTreeNode:
	518	_ChildrenGroup = namedtuple(
	519	'_ChildrenGroup',
	520	'prefix children line_offset last_line_offset_leaf')
	521
	522	def __init__(self, tree_node, parent=None, indentation=0):
	523	self.tree_node = tree_node
	524	self._children_groups = []
	525	self.parent = parent
	526	self._node_children = []
	527	self.indentation = indentation
	528
	529	def finish(self):
	530	children = []
	531	for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
	532	first_leaf = _get_next_leaf_if_indentation(
	533	children_part[0].get_first_leaf()
	534	)
	535
	536	first_leaf.prefix = prefix + first_leaf.prefix
	537	if line_offset != 0:
	538	try:
	539	_update_positions(
	540	children_part, line_offset, last_line_offset_leaf)
	541	except _PositionUpdatingFinished:
	542	pass
	543	children += children_part
	544	self.tree_node.children = children
	545	# Reset the parents
	546	for node in children:
	547	node.parent = self.tree_node
	548
	549	for node_child in self._node_children:
	550	node_child.finish()
	551
	552	def add_child_node(self, child_node):
	553	self._node_children.append(child_node)
	554
	555	def add_tree_nodes(self, prefix, children, line_offset=0,
	556	last_line_offset_leaf=None):
	557	if last_line_offset_leaf is None:
	558	last_line_offset_leaf = children[-1].get_last_leaf()
	559	group = self._ChildrenGroup(
	560	prefix, children, line_offset, last_line_offset_leaf
	561	)
	562	self._children_groups.append(group)
	563
	564	def get_last_line(self, suffix):
	565	line = 0
	566	if self._children_groups:
	567	children_group = self._children_groups[-1]
	568	last_leaf = _get_previous_leaf_if_indentation(
	569	children_group.last_line_offset_leaf
	570	)
	571
	572	line = last_leaf.end_pos[0] + children_group.line_offset
	573
	574	# Newlines end on the next line, which means that they would cover
	575	# the next line. That line is not fully parsed at this point.
	576	if _ends_with_newline(last_leaf, suffix):
	577	line -= 1
	578	line += len(split_lines(suffix)) - 1
	579
	580	if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
	581	# This is the end of a file (that doesn't end with a newline).
	582	line += 1
	583
	584	if self._node_children:
	585	return max(line, self._node_children[-1].get_last_line(suffix))
	586	return line
	587
	588	def __repr__(self):
	589	return '<%s: %s>' % (self.__class__.__name__, self.tree_node)
	590
	591
	592	class _NodesTree:
	593	def __init__(self, module):
	594	self._base_node = _NodesTreeNode(module)
	595	self._working_stack = [self._base_node]
	596	self._module = module
	597	self._prefix_remainder = ''
	598	self.prefix = ''
	599	self.indents = [0]
	600
	601	@property
	602	def parsed_until_line(self):
	603	return self._working_stack[-1].get_last_line(self.prefix)
	604
	605	def _update_insertion_node(self, indentation):
	606	for node in reversed(list(self._working_stack)):
	607	if node.indentation < indentation or node is self._working_stack[0]:
	608	return node
	609	self._working_stack.pop()
	610
	611	def add_parsed_nodes(self, tree_nodes, keyword_token_indents):
	612	old_prefix = self.prefix
	613	tree_nodes = self._remove_endmarker(tree_nodes)
	614	if not tree_nodes:
	615	self.prefix = old_prefix + self.prefix
	616	return
	617
	618	assert tree_nodes[0].type != 'newline'
	619
	620	node = self._update_insertion_node(tree_nodes[0].start_pos[1])
	621	assert node.tree_node.type in ('suite', 'file_input')
	622	node.add_tree_nodes(old_prefix, tree_nodes)
	623	# tos = Top of stack
	624	self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents)
	625
	626	def _update_parsed_node_tos(self, tree_node, keyword_token_indents):
	627	if tree_node.type == 'suite':
	628	def_leaf = tree_node.parent.children[0]
	629	new_tos = _NodesTreeNode(
	630	tree_node,
	631	indentation=keyword_token_indents[def_leaf.start_pos][-1],
	632	)
	633	new_tos.add_tree_nodes('', list(tree_node.children))
	634
	635	self._working_stack[-1].add_child_node(new_tos)
	636	self._working_stack.append(new_tos)
	637
	638	self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
	639	elif _func_or_class_has_suite(tree_node):
	640	self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents)
	641
	642	def _remove_endmarker(self, tree_nodes):
	643	"""
	644	Helps cleaning up the tree nodes that get inserted.
	645	"""
	646	last_leaf = tree_nodes[-1].get_last_leaf()
	647	is_endmarker = last_leaf.type == 'endmarker'
	648	self._prefix_remainder = ''
	649	if is_endmarker:
	650	prefix = last_leaf.prefix
	651	separation = max(prefix.rfind('\n'), prefix.rfind('\r'))
	652	if separation > -1:
	653	# Remove the whitespace part of the prefix after a newline.
	654	# That is not relevant if parentheses were opened. Always parse
	655	# until the end of a line.
	656	last_leaf.prefix, self._prefix_remainder = \
	657	last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
	658
	659	self.prefix = ''
	660
	661	if is_endmarker:
	662	self.prefix = last_leaf.prefix
	663
	664	tree_nodes = tree_nodes[:-1]
	665	return tree_nodes
	666
	667	def _get_matching_indent_nodes(self, tree_nodes, is_new_suite):
	668	# There might be a random dedent where we have to stop copying.
	669	# Invalid indents are ok, because the parser handled that
	670	# properly before. An invalid dedent can happen, because a few
	671	# lines above there was an invalid indent.
	672	node_iterator = iter(tree_nodes)
	673	if is_new_suite:
	674	yield next(node_iterator)
	675
	676	first_node = next(node_iterator)
	677	indent = _get_indentation(first_node)
	678	if not is_new_suite and indent not in self.indents:
	679	return
	680	yield first_node
	681
	682	for n in node_iterator:
	683	if _get_indentation(n) != indent:
	684	return
	685	yield n
	686
	687	def copy_nodes(self, tree_nodes, until_line, line_offset):
	688	"""
	689	Copies tree nodes from the old parser tree.
	690
	691	Returns the number of tree nodes that were copied.
	692	"""
	693	if tree_nodes[0].type in ('error_leaf', 'error_node'):
	694	# Avoid copying errors in the beginning. Can lead to a lot of
	695	# issues.
	696	return []
	697
	698	indentation = _get_indentation(tree_nodes[0])
	699	old_working_stack = list(self._working_stack)
	700	old_prefix = self.prefix
	701	old_indents = self.indents
	702	self.indents = [i for i in self.indents if i <= indentation]
	703
	704	self._update_insertion_node(indentation)
	705
	706	new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes(
	707	list(self._working_stack),
	708	tree_nodes,
	709	until_line,
	710	line_offset,
	711	self.prefix,
	712	)
	713	if new_nodes:
	714	self.indents += added_indents
	715	else:
	716	self._working_stack = old_working_stack
	717	self.prefix = old_prefix
	718	self.indents = old_indents
	719	return new_nodes
	720
	721	def _copy_nodes(self, working_stack, nodes, until_line, line_offset,
	722	prefix='', is_nested=False):
	723	new_nodes = []
	724	added_indents = []
	725
	726	nodes = list(self._get_matching_indent_nodes(
	727	nodes,
	728	is_new_suite=is_nested,
	729	))
	730
	731	new_prefix = ''
	732	for node in nodes:
	733	if node.start_pos[0] > until_line:
	734	break
	735
	736	if node.type == 'endmarker':
	737	break
	738
	739	if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
	740	break
	741	# TODO this check might take a bit of time for large files. We
	742	# might want to change this to do more intelligent guessing or
	743	# binary search.
	744	if _get_last_line(node) > until_line:
	745	# We can split up functions and classes later.
	746	if _func_or_class_has_suite(node):
	747	new_nodes.append(node)
	748	break
	749	try:
	750	c = node.children
	751	except AttributeError:
	752	pass
	753	else:
	754	# This case basically appears with error recovery of one line
	755	# suites like `def foo(): bar.-`. In this case we might not
	756	# include a newline in the statement and we need to take care
	757	# of that.
	758	n = node
	759	if n.type == 'decorated':
	760	n = n.children[-1]
	761	if n.type in ('async_funcdef', 'async_stmt'):
	762	n = n.children[-1]
	763	if n.type in ('classdef', 'funcdef'):
	764	suite_node = n.children[-1]
	765	else:
	766	suite_node = c[-1]
	767
	768	if suite_node.type in ('error_leaf', 'error_node'):
	769	break
	770
	771	new_nodes.append(node)
	772
	773	# Pop error nodes at the end from the list
	774	if new_nodes:
	775	while new_nodes:
	776	last_node = new_nodes[-1]
	777	if (last_node.type in ('error_leaf', 'error_node')
	778	or _is_flow_node(new_nodes[-1])):
	779	# Error leafs/nodes don't have a defined start/end. Error
	780	# nodes might not end with a newline (e.g. if there's an
	781	# open `(`). Therefore ignore all of them unless they are
	782	# succeeded with valid parser state.
	783	# If we copy flows at the end, they might be continued
	784	# after the copy limit (in the new parser).
	785	# In this while loop we try to remove until we find a newline.
	786	new_prefix = ''
	787	new_nodes.pop()
	788	while new_nodes:
	789	last_node = new_nodes[-1]
	790	if last_node.get_last_leaf().type == 'newline':
	791	break
	792	new_nodes.pop()
	793	continue
	794	if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':
	795	# The problem here is that Parso error recovery sometimes
	796	# influences nodes before this node.
	797	# Since the new last node is an error node this will get
	798	# cleaned up in the next while iteration.
	799	new_nodes.pop()
	800	continue
	801	break
	802
	803	if not new_nodes:
	804	return [], working_stack, prefix, added_indents
	805
	806	tos = working_stack[-1]
	807	last_node = new_nodes[-1]
	808	had_valid_suite_last = False
	809	# Pop incomplete suites from the list
	810	if _func_or_class_has_suite(last_node):
	811	suite = last_node
	812	while suite.type != 'suite':
	813	suite = suite.children[-1]
	814
	815	indent = _get_suite_indentation(suite)
	816	added_indents.append(indent)
	817
	818	suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node))
	819	# Don't need to pass line_offset here, it's already done by the
	820	# parent.
	821	suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(
	822	working_stack + [suite_tos], suite.children, until_line, line_offset,
	823	is_nested=True,
	824	)
	825	added_indents += ai
	826	if len(suite_nodes) < 2:
	827	# A suite only with newline is not valid.
	828	new_nodes.pop()
	829	new_prefix = ''
	830	else:
	831	assert new_nodes
	832	tos.add_child_node(suite_tos)
	833	working_stack = new_working_stack
	834	had_valid_suite_last = True
	835
	836	if new_nodes:
	837	if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
	838	p = new_nodes[-1].get_next_leaf().prefix
	839	# We are not allowed to remove the newline at the end of the
	840	# line, otherwise it's going to be missing. This happens e.g.
	841	# if a bracket is around before that moves newlines to
	842	# prefixes.
	843	new_prefix = split_lines(p, keepends=True)[0]
	844
	845	if had_valid_suite_last:
	846	last = new_nodes[-1]
	847	if last.type == 'decorated':
	848	last = last.children[-1]
	849	if last.type in ('async_funcdef', 'async_stmt'):
	850	last = last.children[-1]
	851	last_line_offset_leaf = last.children[-2].get_last_leaf()
	852	assert last_line_offset_leaf == ':'
	853	else:
	854	last_line_offset_leaf = new_nodes[-1].get_last_leaf()
	855	tos.add_tree_nodes(
	856	prefix, new_nodes, line_offset, last_line_offset_leaf,
	857	)
	858	prefix = new_prefix
	859	self._prefix_remainder = ''
	860
	861	return new_nodes, working_stack, prefix, added_indents
	862
	863	def close(self):
	864	self._base_node.finish()
	865
	866	# Add an endmarker.
	867	try:
	868	last_leaf = self._module.get_last_leaf()
	869	except IndexError:
	870	end_pos = [1, 0]
	871	else:
	872	last_leaf = _skip_dedent_error_leaves(last_leaf)
	873	end_pos = list(last_leaf.end_pos)
	874	lines = split_lines(self.prefix)
	875	assert len(lines) > 0
	876	if len(lines) == 1:
	877	if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
	878	end_pos[1] -= 1
	879	end_pos[1] += len(lines[0])
	880	else:
	881	end_pos[0] += len(lines) - 1
	882	end_pos[1] = len(lines[-1])
	883
	884	endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
	885	endmarker.parent = self._module
	886	self._module.children.append(endmarker)