]> crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/blib2to3/pgen2/parse.py
Reorganización de directorios
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / blib2to3 / pgen2 / parse.py
... / ...
CommitLineData
1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""Parser engine for the grammar tables generated by pgen.
5
6The grammar table must be loaded first.
7
8See Parser/parser.c in the Python distribution for additional info on
9how this parsing engine works.
10
11"""
12from contextlib import contextmanager
13from typing import (
14 TYPE_CHECKING,
15 Any,
16 Callable,
17 Dict,
18 Iterator,
19 List,
20 Optional,
21 Set,
22 Tuple,
23 Union,
24 cast,
25)
26
27from blib2to3.pgen2.grammar import Grammar
28from blib2to3.pytree import NL, Context, Leaf, Node, RawNode, convert
29
30# Local imports
31from . import grammar, token, tokenize
32
33if TYPE_CHECKING:
34 from blib2to3.pgen2.driver import TokenProxy
35
36
37Results = Dict[str, NL]
38Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
39DFA = List[List[Tuple[int, int]]]
40DFAS = Tuple[DFA, Dict[int, int]]
41
42
43def lam_sub(grammar: Grammar, node: RawNode) -> NL:
44 assert node[3] is not None
45 return Node(type=node[0], children=node[3], context=node[2])
46
47
48# A placeholder node, used when parser is backtracking.
49DUMMY_NODE = (-1, None, None, None)
50
51
52def stack_copy(
53 stack: List[Tuple[DFAS, int, RawNode]]
54) -> List[Tuple[DFAS, int, RawNode]]:
55 """Nodeless stack copy."""
56 return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]
57
58
59class Recorder:
60 def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
61 self.parser = parser
62 self._ilabels = ilabels
63 self.context = context # not really matter
64
65 self._dead_ilabels: Set[int] = set()
66 self._start_point = self.parser.stack
67 self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
68
69 @property
70 def ilabels(self) -> Set[int]:
71 return self._dead_ilabels.symmetric_difference(self._ilabels)
72
73 @contextmanager
74 def switch_to(self, ilabel: int) -> Iterator[None]:
75 with self.backtrack():
76 self.parser.stack = self._points[ilabel]
77 try:
78 yield
79 except ParseError:
80 self._dead_ilabels.add(ilabel)
81 finally:
82 self.parser.stack = self._start_point
83
84 @contextmanager
85 def backtrack(self) -> Iterator[None]:
86 """
87 Use the node-level invariant ones for basic parsing operations (push/pop/shift).
88 These still will operate on the stack; but they won't create any new nodes, or
89 modify the contents of any other existing nodes.
90
91 This saves us a ton of time when we are backtracking, since we
92 want to restore to the initial state as quick as possible, which
93 can only be done by having as little mutatations as possible.
94 """
95 is_backtracking = self.parser.is_backtracking
96 try:
97 self.parser.is_backtracking = True
98 yield
99 finally:
100 self.parser.is_backtracking = is_backtracking
101
102 def add_token(self, tok_type: int, tok_val: str, raw: bool = False) -> None:
103 func: Callable[..., Any]
104 if raw:
105 func = self.parser._addtoken
106 else:
107 func = self.parser.addtoken
108
109 for ilabel in self.ilabels:
110 with self.switch_to(ilabel):
111 args = [tok_type, tok_val, self.context]
112 if raw:
113 args.insert(0, ilabel)
114 func(*args)
115
116 def determine_route(
117 self, value: Optional[str] = None, force: bool = False
118 ) -> Optional[int]:
119 alive_ilabels = self.ilabels
120 if len(alive_ilabels) == 0:
121 *_, most_successful_ilabel = self._dead_ilabels
122 raise ParseError("bad input", most_successful_ilabel, value, self.context)
123
124 ilabel, *rest = alive_ilabels
125 if force or not rest:
126 return ilabel
127 else:
128 return None
129
130
131class ParseError(Exception):
132 """Exception to signal the parser is stuck."""
133
134 def __init__(
135 self, msg: str, type: Optional[int], value: Optional[str], context: Context
136 ) -> None:
137 Exception.__init__(
138 self, f"{msg}: type={type!r}, value={value!r}, context={context!r}"
139 )
140 self.msg = msg
141 self.type = type
142 self.value = value
143 self.context = context
144
145
146class Parser:
147 """Parser engine.
148
149 The proper usage sequence is:
150
151 p = Parser(grammar, [converter]) # create instance
152 p.setup([start]) # prepare for parsing
153 <for each input token>:
154 if p.addtoken(...): # parse a token; may raise ParseError
155 break
156 root = p.rootnode # root of abstract syntax tree
157
158 A Parser instance may be reused by calling setup() repeatedly.
159
160 A Parser instance contains state pertaining to the current token
161 sequence, and should not be used concurrently by different threads
162 to parse separate token sequences.
163
164 See driver.py for how to get input tokens by tokenizing a file or
165 string.
166
167 Parsing is complete when addtoken() returns True; the root of the
168 abstract syntax tree can then be retrieved from the rootnode
169 instance variable. When a syntax error occurs, addtoken() raises
170 the ParseError exception. There is no error recovery; the parser
171 cannot be used after a syntax error was reported (but it can be
172 reinitialized by calling setup()).
173
174 """
175
176 def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
177 """Constructor.
178
179 The grammar argument is a grammar.Grammar instance; see the
180 grammar module for more information.
181
182 The parser is not ready yet for parsing; you must call the
183 setup() method to get it started.
184
185 The optional convert argument is a function mapping concrete
186 syntax tree nodes to abstract syntax tree nodes. If not
187 given, no conversion is done and the syntax tree produced is
188 the concrete syntax tree. If given, it must be a function of
189 two arguments, the first being the grammar (a grammar.Grammar
190 instance), and the second being the concrete syntax tree node
191 to be converted. The syntax tree is converted from the bottom
192 up.
193
194 **post-note: the convert argument is ignored since for Black's
195 usage, convert will always be blib2to3.pytree.convert. Allowing
196 this to be dynamic hurts mypyc's ability to use early binding.
197 These docs are left for historical and informational value.
198
199 A concrete syntax tree node is a (type, value, context, nodes)
200 tuple, where type is the node type (a token or symbol number),
201 value is None for symbols and a string for tokens, context is
202 None or an opaque value used for error reporting (typically a
203 (lineno, offset) pair), and nodes is a list of children for
204 symbols, and None for tokens.
205
206 An abstract syntax tree node may be anything; this is entirely
207 up to the converter function.
208
209 """
210 self.grammar = grammar
211 # See note in docstring above. TL;DR this is ignored.
212 self.convert = convert or lam_sub
213 self.is_backtracking = False
214
215 def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
216 """Prepare for parsing.
217
218 This *must* be called before starting to parse.
219
220 The optional argument is an alternative start symbol; it
221 defaults to the grammar's start symbol.
222
223 You can use a Parser instance to parse any number of programs;
224 each time you call setup() the parser is reset to an initial
225 state determined by the (implicit or explicit) start symbol.
226
227 """
228 if start is None:
229 start = self.grammar.start
230 # Each stack entry is a tuple: (dfa, state, node).
231 # A node is a tuple: (type, value, context, children),
232 # where children is a list of nodes or None, and context may be None.
233 newnode: RawNode = (start, None, None, [])
234 stackentry = (self.grammar.dfas[start], 0, newnode)
235 self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
236 self.rootnode: Optional[NL] = None
237 self.used_names: Set[str] = set()
238 self.proxy = proxy
239
240 def addtoken(self, type: int, value: str, context: Context) -> bool:
241 """Add a token; return True iff this is the end of the program."""
242 # Map from token to label
243 ilabels = self.classify(type, value, context)
244 assert len(ilabels) >= 1
245
246 # If we have only one state to advance, we'll directly
247 # take it as is.
248 if len(ilabels) == 1:
249 [ilabel] = ilabels
250 return self._addtoken(ilabel, type, value, context)
251
252 # If there are multiple states which we can advance (only
253 # happen under soft-keywords), then we will try all of them
254 # in parallel and as soon as one state can reach further than
255 # the rest, we'll choose that one. This is a pretty hacky
256 # and hopefully temporary algorithm.
257 #
258 # For a more detailed explanation, check out this post:
259 # https://tree.science/what-the-backtracking.html
260
261 with self.proxy.release() as proxy:
262 counter, force = 0, False
263 recorder = Recorder(self, ilabels, context)
264 recorder.add_token(type, value, raw=True)
265
266 next_token_value = value
267 while recorder.determine_route(next_token_value) is None:
268 if not proxy.can_advance(counter):
269 force = True
270 break
271
272 next_token_type, next_token_value, *_ = proxy.eat(counter)
273 if next_token_type in (tokenize.COMMENT, tokenize.NL):
274 counter += 1
275 continue
276
277 if next_token_type == tokenize.OP:
278 next_token_type = grammar.opmap[next_token_value]
279
280 recorder.add_token(next_token_type, next_token_value)
281 counter += 1
282
283 ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
284 assert ilabel is not None
285
286 return self._addtoken(ilabel, type, value, context)
287
288 def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> bool:
289 # Loop until the token is shifted; may raise exceptions
290 while True:
291 dfa, state, node = self.stack[-1]
292 states, first = dfa
293 arcs = states[state]
294 # Look for a state with this label
295 for i, newstate in arcs:
296 t = self.grammar.labels[i][0]
297 if t >= 256:
298 # See if it's a symbol and if we're in its first set
299 itsdfa = self.grammar.dfas[t]
300 itsstates, itsfirst = itsdfa
301 if ilabel in itsfirst:
302 # Push a symbol
303 self.push(t, itsdfa, newstate, context)
304 break # To continue the outer while loop
305
306 elif ilabel == i:
307 # Look it up in the list of labels
308 # Shift a token; we're done with it
309 self.shift(type, value, newstate, context)
310 # Pop while we are in an accept-only state
311 state = newstate
312 while states[state] == [(0, state)]:
313 self.pop()
314 if not self.stack:
315 # Done parsing!
316 return True
317 dfa, state, node = self.stack[-1]
318 states, first = dfa
319 # Done with this token
320 return False
321
322 else:
323 if (0, state) in arcs:
324 # An accepting state, pop it and try something else
325 self.pop()
326 if not self.stack:
327 # Done parsing, but another token is input
328 raise ParseError("too much input", type, value, context)
329 else:
330 # No success finding a transition
331 raise ParseError("bad input", type, value, context)
332
333 def classify(self, type: int, value: str, context: Context) -> List[int]:
334 """Turn a token into a label. (Internal)
335
336 Depending on whether the value is a soft-keyword or not,
337 this function may return multiple labels to choose from."""
338 if type == token.NAME:
339 # Keep a listing of all used names
340 self.used_names.add(value)
341 # Check for reserved words
342 if value in self.grammar.keywords:
343 return [self.grammar.keywords[value]]
344 elif value in self.grammar.soft_keywords:
345 assert type in self.grammar.tokens
346 return [
347 self.grammar.soft_keywords[value],
348 self.grammar.tokens[type],
349 ]
350
351 ilabel = self.grammar.tokens.get(type)
352 if ilabel is None:
353 raise ParseError("bad token", type, value, context)
354 return [ilabel]
355
356 def shift(self, type: int, value: str, newstate: int, context: Context) -> None:
357 """Shift a token. (Internal)"""
358 if self.is_backtracking:
359 dfa, state, _ = self.stack[-1]
360 self.stack[-1] = (dfa, newstate, DUMMY_NODE)
361 else:
362 dfa, state, node = self.stack[-1]
363 rawnode: RawNode = (type, value, context, None)
364 newnode = convert(self.grammar, rawnode)
365 assert node[-1] is not None
366 node[-1].append(newnode)
367 self.stack[-1] = (dfa, newstate, node)
368
369 def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
370 """Push a nonterminal. (Internal)"""
371 if self.is_backtracking:
372 dfa, state, _ = self.stack[-1]
373 self.stack[-1] = (dfa, newstate, DUMMY_NODE)
374 self.stack.append((newdfa, 0, DUMMY_NODE))
375 else:
376 dfa, state, node = self.stack[-1]
377 newnode: RawNode = (type, None, context, [])
378 self.stack[-1] = (dfa, newstate, node)
379 self.stack.append((newdfa, 0, newnode))
380
381 def pop(self) -> None:
382 """Pop a nonterminal. (Internal)"""
383 if self.is_backtracking:
384 self.stack.pop()
385 else:
386 popdfa, popstate, popnode = self.stack.pop()
387 newnode = convert(self.grammar, popnode)
388 if self.stack:
389 dfa, state, node = self.stack[-1]
390 assert node[-1] is not None
391 node[-1].append(newnode)
392 else:
393 self.rootnode = newnode
394 self.rootnode.used_names = self.used_names