crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/blib2to3/pgen2/parse.py

... / ...

Commit	Line	Data
	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
	2	# Licensed to PSF under a Contributor Agreement.
	3
	4	"""Parser engine for the grammar tables generated by pgen.
	5
	6	The grammar table must be loaded first.
	7
	8	See Parser/parser.c in the Python distribution for additional info on
	9	how this parsing engine works.
	10
	11	"""
	12	from contextlib import contextmanager
	13	from typing import (
	14	TYPE_CHECKING,
	15	Any,
	16	Callable,
	17	Dict,
	18	Iterator,
	19	List,
	20	Optional,
	21	Set,
	22	Tuple,
	23	Union,
	24	cast,
	25	)
	26
	27	from blib2to3.pgen2.grammar import Grammar
	28	from blib2to3.pytree import NL, Context, Leaf, Node, RawNode, convert
	29
	30	# Local imports
	31	from . import grammar, token, tokenize
	32
	33	if TYPE_CHECKING:
	34	from blib2to3.pgen2.driver import TokenProxy
	35
	36
	37	Results = Dict[str, NL]
	38	Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
	39	DFA = List[List[Tuple[int, int]]]
	40	DFAS = Tuple[DFA, Dict[int, int]]
	41
	42
	43	def lam_sub(grammar: Grammar, node: RawNode) -> NL:
	44	assert node[3] is not None
	45	return Node(type=node[0], children=node[3], context=node[2])
	46
	47
	48	# A placeholder node, used when parser is backtracking.
	49	DUMMY_NODE = (-1, None, None, None)
	50
	51
	52	def stack_copy(
	53	stack: List[Tuple[DFAS, int, RawNode]]
	54	) -> List[Tuple[DFAS, int, RawNode]]:
	55	"""Nodeless stack copy."""
	56	return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]
	57
	58
	59	class Recorder:
	60	def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
	61	self.parser = parser
	62	self._ilabels = ilabels
	63	self.context = context # not really matter
	64
	65	self._dead_ilabels: Set[int] = set()
	66	self._start_point = self.parser.stack
	67	self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
	68
	69	@property
	70	def ilabels(self) -> Set[int]:
	71	return self._dead_ilabels.symmetric_difference(self._ilabels)
	72
	73	@contextmanager
	74	def switch_to(self, ilabel: int) -> Iterator[None]:
	75	with self.backtrack():
	76	self.parser.stack = self._points[ilabel]
	77	try:
	78	yield
	79	except ParseError:
	80	self._dead_ilabels.add(ilabel)
	81	finally:
	82	self.parser.stack = self._start_point
	83
	84	@contextmanager
	85	def backtrack(self) -> Iterator[None]:
	86	"""
	87	Use the node-level invariant ones for basic parsing operations (push/pop/shift).
	88	These still will operate on the stack; but they won't create any new nodes, or
	89	modify the contents of any other existing nodes.
	90
	91	This saves us a ton of time when we are backtracking, since we
	92	want to restore to the initial state as quick as possible, which
	93	can only be done by having as little mutatations as possible.
	94	"""
	95	is_backtracking = self.parser.is_backtracking
	96	try:
	97	self.parser.is_backtracking = True
	98	yield
	99	finally:
	100	self.parser.is_backtracking = is_backtracking
	101
	102	def add_token(self, tok_type: int, tok_val: str, raw: bool = False) -> None:
	103	func: Callable[..., Any]
	104	if raw:
	105	func = self.parser._addtoken
	106	else:
	107	func = self.parser.addtoken
	108
	109	for ilabel in self.ilabels:
	110	with self.switch_to(ilabel):
	111	args = [tok_type, tok_val, self.context]
	112	if raw:
	113	args.insert(0, ilabel)
	114	func(*args)
	115
	116	def determine_route(
	117	self, value: Optional[str] = None, force: bool = False
	118	) -> Optional[int]:
	119	alive_ilabels = self.ilabels
	120	if len(alive_ilabels) == 0:
	121	*_, most_successful_ilabel = self._dead_ilabels
	122	raise ParseError("bad input", most_successful_ilabel, value, self.context)
	123
	124	ilabel, *rest = alive_ilabels
	125	if force or not rest:
	126	return ilabel
	127	else:
	128	return None
	129
	130
	131	class ParseError(Exception):
	132	"""Exception to signal the parser is stuck."""
	133
	134	def __init__(
	135	self, msg: str, type: Optional[int], value: Optional[str], context: Context
	136	) -> None:
	137	Exception.__init__(
	138	self, f"{msg}: type={type!r}, value={value!r}, context={context!r}"
	139	)
	140	self.msg = msg
	141	self.type = type
	142	self.value = value
	143	self.context = context
	144
	145
	146	class Parser:
	147	"""Parser engine.
	148
	149	The proper usage sequence is:
	150
	151	p = Parser(grammar, [converter]) # create instance
	152	p.setup([start]) # prepare for parsing
	153	<for each input token>:
	154	if p.addtoken(...): # parse a token; may raise ParseError
	155	break
	156	root = p.rootnode # root of abstract syntax tree
	157
	158	A Parser instance may be reused by calling setup() repeatedly.
	159
	160	A Parser instance contains state pertaining to the current token
	161	sequence, and should not be used concurrently by different threads
	162	to parse separate token sequences.
	163
	164	See driver.py for how to get input tokens by tokenizing a file or
	165	string.
	166
	167	Parsing is complete when addtoken() returns True; the root of the
	168	abstract syntax tree can then be retrieved from the rootnode
	169	instance variable. When a syntax error occurs, addtoken() raises
	170	the ParseError exception. There is no error recovery; the parser
	171	cannot be used after a syntax error was reported (but it can be
	172	reinitialized by calling setup()).
	173
	174	"""
	175
	176	def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
	177	"""Constructor.
	178
	179	The grammar argument is a grammar.Grammar instance; see the
	180	grammar module for more information.
	181
	182	The parser is not ready yet for parsing; you must call the
	183	setup() method to get it started.
	184
	185	The optional convert argument is a function mapping concrete
	186	syntax tree nodes to abstract syntax tree nodes. If not
	187	given, no conversion is done and the syntax tree produced is
	188	the concrete syntax tree. If given, it must be a function of
	189	two arguments, the first being the grammar (a grammar.Grammar
	190	instance), and the second being the concrete syntax tree node
	191	to be converted. The syntax tree is converted from the bottom
	192	up.
	193
	194	**post-note: the convert argument is ignored since for Black's
	195	usage, convert will always be blib2to3.pytree.convert. Allowing
	196	this to be dynamic hurts mypyc's ability to use early binding.
	197	These docs are left for historical and informational value.
	198
	199	A concrete syntax tree node is a (type, value, context, nodes)
	200	tuple, where type is the node type (a token or symbol number),
	201	value is None for symbols and a string for tokens, context is
	202	None or an opaque value used for error reporting (typically a
	203	(lineno, offset) pair), and nodes is a list of children for
	204	symbols, and None for tokens.
	205
	206	An abstract syntax tree node may be anything; this is entirely
	207	up to the converter function.
	208
	209	"""
	210	self.grammar = grammar
	211	# See note in docstring above. TL;DR this is ignored.
	212	self.convert = convert or lam_sub
	213	self.is_backtracking = False
	214
	215	def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
	216	"""Prepare for parsing.
	217
	218	This must be called before starting to parse.
	219
	220	The optional argument is an alternative start symbol; it
	221	defaults to the grammar's start symbol.
	222
	223	You can use a Parser instance to parse any number of programs;
	224	each time you call setup() the parser is reset to an initial
	225	state determined by the (implicit or explicit) start symbol.
	226
	227	"""
	228	if start is None:
	229	start = self.grammar.start
	230	# Each stack entry is a tuple: (dfa, state, node).
	231	# A node is a tuple: (type, value, context, children),
	232	# where children is a list of nodes or None, and context may be None.
	233	newnode: RawNode = (start, None, None, [])
	234	stackentry = (self.grammar.dfas[start], 0, newnode)
	235	self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
	236	self.rootnode: Optional[NL] = None
	237	self.used_names: Set[str] = set()
	238	self.proxy = proxy
	239
	240	def addtoken(self, type: int, value: str, context: Context) -> bool:
	241	"""Add a token; return True iff this is the end of the program."""
	242	# Map from token to label
	243	ilabels = self.classify(type, value, context)
	244	assert len(ilabels) >= 1
	245
	246	# If we have only one state to advance, we'll directly
	247	# take it as is.
	248	if len(ilabels) == 1:
	249	[ilabel] = ilabels
	250	return self._addtoken(ilabel, type, value, context)
	251
	252	# If there are multiple states which we can advance (only
	253	# happen under soft-keywords), then we will try all of them
	254	# in parallel and as soon as one state can reach further than
	255	# the rest, we'll choose that one. This is a pretty hacky
	256	# and hopefully temporary algorithm.
	257	#
	258	# For a more detailed explanation, check out this post:
	259	# https://tree.science/what-the-backtracking.html
	260
	261	with self.proxy.release() as proxy:
	262	counter, force = 0, False
	263	recorder = Recorder(self, ilabels, context)
	264	recorder.add_token(type, value, raw=True)
	265
	266	next_token_value = value
	267	while recorder.determine_route(next_token_value) is None:
	268	if not proxy.can_advance(counter):
	269	force = True
	270	break
	271
	272	next_token_type, next_token_value, *_ = proxy.eat(counter)
	273	if next_token_type in (tokenize.COMMENT, tokenize.NL):
	274	counter += 1
	275	continue
	276
	277	if next_token_type == tokenize.OP:
	278	next_token_type = grammar.opmap[next_token_value]
	279
	280	recorder.add_token(next_token_type, next_token_value)
	281	counter += 1
	282
	283	ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
	284	assert ilabel is not None
	285
	286	return self._addtoken(ilabel, type, value, context)
	287
	288	def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> bool:
	289	# Loop until the token is shifted; may raise exceptions
	290	while True:
	291	dfa, state, node = self.stack[-1]
	292	states, first = dfa
	293	arcs = states[state]
	294	# Look for a state with this label
	295	for i, newstate in arcs:
	296	t = self.grammar.labels[i][0]
	297	if t >= 256:
	298	# See if it's a symbol and if we're in its first set
	299	itsdfa = self.grammar.dfas[t]
	300	itsstates, itsfirst = itsdfa
	301	if ilabel in itsfirst:
	302	# Push a symbol
	303	self.push(t, itsdfa, newstate, context)
	304	break # To continue the outer while loop
	305
	306	elif ilabel == i:
	307	# Look it up in the list of labels
	308	# Shift a token; we're done with it
	309	self.shift(type, value, newstate, context)
	310	# Pop while we are in an accept-only state
	311	state = newstate
	312	while states[state] == [(0, state)]:
	313	self.pop()
	314	if not self.stack:
	315	# Done parsing!
	316	return True
	317	dfa, state, node = self.stack[-1]
	318	states, first = dfa
	319	# Done with this token
	320	return False
	321
	322	else:
	323	if (0, state) in arcs:
	324	# An accepting state, pop it and try something else
	325	self.pop()
	326	if not self.stack:
	327	# Done parsing, but another token is input
	328	raise ParseError("too much input", type, value, context)
	329	else:
	330	# No success finding a transition
	331	raise ParseError("bad input", type, value, context)
	332
	333	def classify(self, type: int, value: str, context: Context) -> List[int]:
	334	"""Turn a token into a label. (Internal)
	335
	336	Depending on whether the value is a soft-keyword or not,
	337	this function may return multiple labels to choose from."""
	338	if type == token.NAME:
	339	# Keep a listing of all used names
	340	self.used_names.add(value)
	341	# Check for reserved words
	342	if value in self.grammar.keywords:
	343	return [self.grammar.keywords[value]]
	344	elif value in self.grammar.soft_keywords:
	345	assert type in self.grammar.tokens
	346	return [
	347	self.grammar.soft_keywords[value],
	348	self.grammar.tokens[type],
	349	]
	350
	351	ilabel = self.grammar.tokens.get(type)
	352	if ilabel is None:
	353	raise ParseError("bad token", type, value, context)
	354	return [ilabel]
	355
	356	def shift(self, type: int, value: str, newstate: int, context: Context) -> None:
	357	"""Shift a token. (Internal)"""
	358	if self.is_backtracking:
	359	dfa, state, _ = self.stack[-1]
	360	self.stack[-1] = (dfa, newstate, DUMMY_NODE)
	361	else:
	362	dfa, state, node = self.stack[-1]
	363	rawnode: RawNode = (type, value, context, None)
	364	newnode = convert(self.grammar, rawnode)
	365	assert node[-1] is not None
	366	node[-1].append(newnode)
	367	self.stack[-1] = (dfa, newstate, node)
	368
	369	def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
	370	"""Push a nonterminal. (Internal)"""
	371	if self.is_backtracking:
	372	dfa, state, _ = self.stack[-1]
	373	self.stack[-1] = (dfa, newstate, DUMMY_NODE)
	374	self.stack.append((newdfa, 0, DUMMY_NODE))
	375	else:
	376	dfa, state, node = self.stack[-1]
	377	newnode: RawNode = (type, None, context, [])
	378	self.stack[-1] = (dfa, newstate, node)
	379	self.stack.append((newdfa, 0, newnode))
	380
	381	def pop(self) -> None:
	382	"""Pop a nonterminal. (Internal)"""
	383	if self.is_backtracking:
	384	self.stack.pop()
	385	else:
	386	popdfa, popstate, popnode = self.stack.pop()
	387	newnode = convert(self.grammar, popnode)
	388	if self.stack:
	389	dfa, state, node = self.stack[-1]
	390	assert node[-1] is not None
	391	node[-1].append(newnode)
	392	else:
	393	self.rootnode = newnode
	394	self.rootnode.used_names = self.used_names