]> crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/pgen2/generator.py
Actualizado el Readme
[config.git] / djavu-asus / elpy / rpc-venv / lib / python3.11 / site-packages / parso / pgen2 / generator.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 # Modifications:
5 # Copyright David Halter and Contributors
6 # Modifications are dual-licensed: MIT and PSF.
7
8 """
9 This module defines the data structures used to represent a grammar.
10
11 Specifying grammars in pgen is possible with this grammar::
12
13 grammar: (NEWLINE | rule)* ENDMARKER
14 rule: NAME ':' rhs NEWLINE
15 rhs: items ('|' items)*
16 items: item+
17 item: '[' rhs ']' | atom ['+' | '*']
18 atom: '(' rhs ')' | NAME | STRING
19
20 This grammar is self-referencing.
21
22 This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
23 Most of the code has been refactored to make it more Pythonic. Since this was a
24 "copy" of the CPython Parser parser "pgen", there was some work needed to make
25 it more readable. It should also be slightly faster than the original pgen2,
26 because we made some optimizations.
27 """
28
29 from ast import literal_eval
30 from typing import TypeVar, Generic, Mapping, Sequence, Set, Union
31
32 from parso.pgen2.grammar_parser import GrammarParser, NFAState
33
34 _TokenTypeT = TypeVar("_TokenTypeT")
35
36
37 class Grammar(Generic[_TokenTypeT]):
38 """
39 Once initialized, this class supplies the grammar tables for the
40 parsing engine implemented by parse.py. The parsing engine
41 accesses the instance variables directly.
42
43 The only important part in this parsers are dfas and transitions between
44 dfas.
45 """
46
47 def __init__(self,
48 start_nonterminal: str,
49 rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']],
50 reserved_syntax_strings: Mapping[str, 'ReservedString']):
51 self.nonterminal_to_dfas = rule_to_dfas
52 self.reserved_syntax_strings = reserved_syntax_strings
53 self.start_nonterminal = start_nonterminal
54
55
56 class DFAPlan:
57 """
58 Plans are used for the parser to create stack nodes and do the proper
59 DFA state transitions.
60 """
61 def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []):
62 self.next_dfa = next_dfa
63 self.dfa_pushes = dfa_pushes
64
65 def __repr__(self):
66 return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
67
68
69 class DFAState(Generic[_TokenTypeT]):
70 """
71 The DFAState object is the core class for pretty much anything. DFAState
72 are the vertices of an ordered graph while arcs and transitions are the
73 edges.
74
75 Arcs are the initial edges, where most DFAStates are not connected and
76 transitions are then calculated to connect the DFA state machines that have
77 different nonterminals.
78 """
79 def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState):
80 assert isinstance(nfa_set, set)
81 assert isinstance(next(iter(nfa_set)), NFAState)
82 assert isinstance(final, NFAState)
83 self.from_rule = from_rule
84 self.nfa_set = nfa_set
85 # map from terminals/nonterminals to DFAState
86 self.arcs: Mapping[str, DFAState] = {}
87 # In an intermediary step we set these nonterminal arcs (which has the
88 # same structure as arcs). These don't contain terminals anymore.
89 self.nonterminal_arcs: Mapping[str, DFAState] = {}
90
91 # Transitions are basically the only thing that the parser is using
92 # with is_final. Everyting else is purely here to create a parser.
93 self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}
94 self.is_final = final in nfa_set
95
96 def add_arc(self, next_, label):
97 assert isinstance(label, str)
98 assert label not in self.arcs
99 assert isinstance(next_, DFAState)
100 self.arcs[label] = next_
101
102 def unifystate(self, old, new):
103 for label, next_ in self.arcs.items():
104 if next_ is old:
105 self.arcs[label] = new
106
107 def __eq__(self, other):
108 # Equality test -- ignore the nfa_set instance variable
109 assert isinstance(other, DFAState)
110 if self.is_final != other.is_final:
111 return False
112 # Can't just return self.arcs == other.arcs, because that
113 # would invoke this method recursively, with cycles...
114 if len(self.arcs) != len(other.arcs):
115 return False
116 for label, next_ in self.arcs.items():
117 if next_ is not other.arcs.get(label):
118 return False
119 return True
120
121 def __repr__(self):
122 return '<%s: %s is_final=%s>' % (
123 self.__class__.__name__, self.from_rule, self.is_final
124 )
125
126
127 class ReservedString:
128 """
129 Most grammars will have certain keywords and operators that are mentioned
130 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
131 This class basically is the former.
132 """
133
134 def __init__(self, value: str):
135 self.value = value
136
137 def __repr__(self):
138 return '%s(%s)' % (self.__class__.__name__, self.value)
139
140
141 def _simplify_dfas(dfas):
142 """
143 This is not theoretically optimal, but works well enough.
144 Algorithm: repeatedly look for two states that have the same
145 set of arcs (same labels pointing to the same nodes) and
146 unify them, until things stop changing.
147
148 dfas is a list of DFAState instances
149 """
150 changes = True
151 while changes:
152 changes = False
153 for i, state_i in enumerate(dfas):
154 for j in range(i + 1, len(dfas)):
155 state_j = dfas[j]
156 if state_i == state_j:
157 del dfas[j]
158 for state in dfas:
159 state.unifystate(state_j, state_i)
160 changes = True
161 break
162
163
164 def _make_dfas(start, finish):
165 """
166 Uses the powerset construction algorithm to create DFA states from sets of
167 NFA states.
168
169 Also does state reduction if some states are not needed.
170 """
171 # To turn an NFA into a DFA, we define the states of the DFA
172 # to correspond to *sets* of states of the NFA. Then do some
173 # state reduction.
174 assert isinstance(start, NFAState)
175 assert isinstance(finish, NFAState)
176
177 def addclosure(nfa_state, base_nfa_set):
178 assert isinstance(nfa_state, NFAState)
179 if nfa_state in base_nfa_set:
180 return
181 base_nfa_set.add(nfa_state)
182 for nfa_arc in nfa_state.arcs:
183 if nfa_arc.nonterminal_or_string is None:
184 addclosure(nfa_arc.next, base_nfa_set)
185
186 base_nfa_set = set()
187 addclosure(start, base_nfa_set)
188 states = [DFAState(start.from_rule, base_nfa_set, finish)]
189 for state in states: # NB states grows while we're iterating
190 arcs = {}
191 # Find state transitions and store them in arcs.
192 for nfa_state in state.nfa_set:
193 for nfa_arc in nfa_state.arcs:
194 if nfa_arc.nonterminal_or_string is not None:
195 nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
196 addclosure(nfa_arc.next, nfa_set)
197
198 # Now create the dfa's with no None's in arcs anymore. All Nones have
199 # been eliminated and state transitions (arcs) are properly defined, we
200 # just need to create the dfa's.
201 for nonterminal_or_string, nfa_set in arcs.items():
202 for nested_state in states:
203 if nested_state.nfa_set == nfa_set:
204 # The DFA state already exists for this rule.
205 break
206 else:
207 nested_state = DFAState(start.from_rule, nfa_set, finish)
208 states.append(nested_state)
209
210 state.add_arc(nested_state, nonterminal_or_string)
211 return states # List of DFAState instances; first one is start
212
213
214 def _dump_nfa(start, finish):
215 print("Dump of NFA for", start.from_rule)
216 todo = [start]
217 for i, state in enumerate(todo):
218 print(" State", i, state is finish and "(final)" or "")
219 for arc in state.arcs:
220 label, next_ = arc.nonterminal_or_string, arc.next
221 if next_ in todo:
222 j = todo.index(next_)
223 else:
224 j = len(todo)
225 todo.append(next_)
226 if label is None:
227 print(" -> %d" % j)
228 else:
229 print(" %s -> %d" % (label, j))
230
231
232 def _dump_dfas(dfas):
233 print("Dump of DFA for", dfas[0].from_rule)
234 for i, state in enumerate(dfas):
235 print(" State", i, state.is_final and "(final)" or "")
236 for nonterminal, next_ in state.arcs.items():
237 print(" %s -> %d" % (nonterminal, dfas.index(next_)))
238
239
240 def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar:
241 """
242 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
243 at-least-once repetition, [] for optional parts, | for alternatives and ()
244 for grouping).
245
246 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
247 own parser.
248 """
249 rule_to_dfas = {}
250 start_nonterminal = None
251 for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
252 # _dump_nfa(nfa_a, nfa_z)
253 dfas = _make_dfas(nfa_a, nfa_z)
254 # _dump_dfas(dfas)
255 # oldlen = len(dfas)
256 _simplify_dfas(dfas)
257 # newlen = len(dfas)
258 rule_to_dfas[nfa_a.from_rule] = dfas
259 # print(nfa_a.from_rule, oldlen, newlen)
260
261 if start_nonterminal is None:
262 start_nonterminal = nfa_a.from_rule
263
264 reserved_strings: Mapping[str, ReservedString] = {}
265 for nonterminal, dfas in rule_to_dfas.items():
266 for dfa_state in dfas:
267 for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
268 if terminal_or_nonterminal in rule_to_dfas:
269 dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
270 else:
271 transition = _make_transition(
272 token_namespace,
273 reserved_strings,
274 terminal_or_nonterminal
275 )
276 dfa_state.transitions[transition] = DFAPlan(next_dfa)
277
278 _calculate_tree_traversal(rule_to_dfas)
279 return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) # type: ignore
280
281
282 def _make_transition(token_namespace, reserved_syntax_strings, label):
283 """
284 Creates a reserved string ("if", "for", "*", ...) or returns the token type
285 (NUMBER, STRING, ...) for a given grammar terminal.
286 """
287 if label[0].isalpha():
288 # A named token (e.g. NAME, NUMBER, STRING)
289 return getattr(token_namespace, label)
290 else:
291 # Either a keyword or an operator
292 assert label[0] in ('"', "'"), label
293 assert not label.startswith('"""') and not label.startswith("'''")
294 value = literal_eval(label)
295 try:
296 return reserved_syntax_strings[value]
297 except KeyError:
298 r = reserved_syntax_strings[value] = ReservedString(value)
299 return r
300
301
302 def _calculate_tree_traversal(nonterminal_to_dfas):
303 """
304 By this point we know how dfas can move around within a stack node, but we
305 don't know how we can add a new stack node (nonterminal transitions).
306 """
307 # Map from grammar rule (nonterminal) name to a set of tokens.
308 first_plans = {}
309
310 nonterminals = list(nonterminal_to_dfas.keys())
311 nonterminals.sort()
312 for nonterminal in nonterminals:
313 if nonterminal not in first_plans:
314 _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
315
316 # Now that we have calculated the first terminals, we are sure that
317 # there is no left recursion.
318
319 for dfas in nonterminal_to_dfas.values():
320 for dfa_state in dfas:
321 transitions = dfa_state.transitions
322 for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
323 for transition, pushes in first_plans[nonterminal].items():
324 if transition in transitions:
325 prev_plan = transitions[transition]
326 # Make sure these are sorted so that error messages are
327 # at least deterministic
328 choices = sorted([
329 (
330 prev_plan.dfa_pushes[0].from_rule
331 if prev_plan.dfa_pushes
332 else prev_plan.next_dfa.from_rule
333 ),
334 (
335 pushes[0].from_rule
336 if pushes else next_dfa.from_rule
337 ),
338 ])
339 raise ValueError(
340 "Rule %s is ambiguous; given a %s token, we "
341 "can't determine if we should evaluate %s or %s."
342 % (
343 (
344 dfa_state.from_rule,
345 transition,
346 ) + tuple(choices)
347 )
348 )
349 transitions[transition] = DFAPlan(next_dfa, pushes)
350
351
352 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
353 """
354 Calculates the first plan in the first_plans dictionary for every given
355 nonterminal. This is going to be used to know when to create stack nodes.
356 """
357 dfas = nonterminal_to_dfas[nonterminal]
358 new_first_plans = {}
359 first_plans[nonterminal] = None # dummy to detect left recursion
360 # We only need to check the first dfa. All the following ones are not
361 # interesting to find first terminals.
362 state = dfas[0]
363 for transition, next_ in state.transitions.items():
364 # It's a string. We have finally found a possible first token.
365 new_first_plans[transition] = [next_.next_dfa]
366
367 for nonterminal2, next_ in state.nonterminal_arcs.items():
368 # It's a nonterminal and we have either a left recursion issue
369 # in the grammar or we have to recurse.
370 try:
371 first_plans2 = first_plans[nonterminal2]
372 except KeyError:
373 first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
374 else:
375 if first_plans2 is None:
376 raise ValueError("left recursion for rule %r" % nonterminal)
377
378 for t, pushes in first_plans2.items():
379 new_first_plans[t] = [next_] + pushes
380
381 first_plans[nonterminal] = new_first_plans
382 return new_first_plans