]>
crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/pgen2/generator.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 # Copyright David Halter and Contributors
6 # Modifications are dual-licensed: MIT and PSF.
9 This module defines the data structures used to represent a grammar.
11 Specifying grammars in pgen is possible with this grammar::
13 grammar: (NEWLINE | rule)* ENDMARKER
14 rule: NAME ':' rhs NEWLINE
15 rhs: items ('|' items)*
17 item: '[' rhs ']' | atom ['+' | '*']
18 atom: '(' rhs ')' | NAME | STRING
20 This grammar is self-referencing.
22 This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
23 Most of the code has been refactored to make it more Pythonic. Since this was a
24 "copy" of the CPython Parser parser "pgen", there was some work needed to make
25 it more readable. It should also be slightly faster than the original pgen2,
26 because we made some optimizations.
29 from ast
import literal_eval
30 from typing
import TypeVar
, Generic
, Mapping
, Sequence
, Set
, Union
32 from parso
.pgen2
.grammar_parser
import GrammarParser
, NFAState
34 _TokenTypeT
= TypeVar("_TokenTypeT")
37 class Grammar(Generic
[_TokenTypeT
]):
39 Once initialized, this class supplies the grammar tables for the
40 parsing engine implemented by parse.py. The parsing engine
41 accesses the instance variables directly.
43 The only important part in this parsers are dfas and transitions between
48 start_nonterminal
: str,
49 rule_to_dfas
: Mapping
[str, Sequence
['DFAState[_TokenTypeT]']],
50 reserved_syntax_strings
: Mapping
[str, 'ReservedString']):
51 self
.nonterminal_to_dfas
= rule_to_dfas
52 self
.reserved_syntax_strings
= reserved_syntax_strings
53 self
.start_nonterminal
= start_nonterminal
58 Plans are used for the parser to create stack nodes and do the proper
59 DFA state transitions.
61 def __init__(self
, next_dfa
: 'DFAState', dfa_pushes
: Sequence
['DFAState'] = []):
62 self
.next_dfa
= next_dfa
63 self
.dfa_pushes
= dfa_pushes
66 return '%s(%s, %s)' % (self
.__class
__.__name
__, self
.next_dfa
, self
.dfa_pushes
)
69 class DFAState(Generic
[_TokenTypeT
]):
71 The DFAState object is the core class for pretty much anything. DFAState
72 are the vertices of an ordered graph while arcs and transitions are the
75 Arcs are the initial edges, where most DFAStates are not connected and
76 transitions are then calculated to connect the DFA state machines that have
77 different nonterminals.
79 def __init__(self
, from_rule
: str, nfa_set
: Set
[NFAState
], final
: NFAState
):
80 assert isinstance(nfa_set
, set)
81 assert isinstance(next(iter(nfa_set
)), NFAState
)
82 assert isinstance(final
, NFAState
)
83 self
.from_rule
= from_rule
84 self
.nfa_set
= nfa_set
85 # map from terminals/nonterminals to DFAState
86 self
.arcs
: Mapping
[str, DFAState
] = {}
87 # In an intermediary step we set these nonterminal arcs (which has the
88 # same structure as arcs). These don't contain terminals anymore.
89 self
.nonterminal_arcs
: Mapping
[str, DFAState
] = {}
91 # Transitions are basically the only thing that the parser is using
92 # with is_final. Everyting else is purely here to create a parser.
93 self
.transitions
: Mapping
[Union
[_TokenTypeT
, ReservedString
], DFAPlan
] = {}
94 self
.is_final
= final
in nfa_set
96 def add_arc(self
, next_
, label
):
97 assert isinstance(label
, str)
98 assert label
not in self
.arcs
99 assert isinstance(next_
, DFAState
)
100 self
.arcs
[label
] = next_
102 def unifystate(self
, old
, new
):
103 for label
, next_
in self
.arcs
.items():
105 self
.arcs
[label
] = new
107 def __eq__(self
, other
):
108 # Equality test -- ignore the nfa_set instance variable
109 assert isinstance(other
, DFAState
)
110 if self
.is_final
!= other
.is_final
:
112 # Can't just return self.arcs == other.arcs, because that
113 # would invoke this method recursively, with cycles...
114 if len(self
.arcs
) != len(other
.arcs
):
116 for label
, next_
in self
.arcs
.items():
117 if next_
is not other
.arcs
.get(label
):
122 return '<%s: %s is_final=%s>' % (
123 self
.__class
__.__name
__, self
.from_rule
, self
.is_final
127 class ReservedString
:
129 Most grammars will have certain keywords and operators that are mentioned
130 in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
131 This class basically is the former.
134 def __init__(self
, value
: str):
138 return '%s(%s)' % (self
.__class
__.__name
__, self
.value
)
141 def _simplify_dfas(dfas
):
143 This is not theoretically optimal, but works well enough.
144 Algorithm: repeatedly look for two states that have the same
145 set of arcs (same labels pointing to the same nodes) and
146 unify them, until things stop changing.
148 dfas is a list of DFAState instances
153 for i
, state_i
in enumerate(dfas
):
154 for j
in range(i
+ 1, len(dfas
)):
156 if state_i
== state_j
:
159 state
.unifystate(state_j
, state_i
)
164 def _make_dfas(start
, finish
):
166 Uses the powerset construction algorithm to create DFA states from sets of
169 Also does state reduction if some states are not needed.
171 # To turn an NFA into a DFA, we define the states of the DFA
172 # to correspond to *sets* of states of the NFA. Then do some
174 assert isinstance(start
, NFAState
)
175 assert isinstance(finish
, NFAState
)
177 def addclosure(nfa_state
, base_nfa_set
):
178 assert isinstance(nfa_state
, NFAState
)
179 if nfa_state
in base_nfa_set
:
181 base_nfa_set
.add(nfa_state
)
182 for nfa_arc
in nfa_state
.arcs
:
183 if nfa_arc
.nonterminal_or_string
is None:
184 addclosure(nfa_arc
.next
, base_nfa_set
)
187 addclosure(start
, base_nfa_set
)
188 states
= [DFAState(start
.from_rule
, base_nfa_set
, finish
)]
189 for state
in states
: # NB states grows while we're iterating
191 # Find state transitions and store them in arcs.
192 for nfa_state
in state
.nfa_set
:
193 for nfa_arc
in nfa_state
.arcs
:
194 if nfa_arc
.nonterminal_or_string
is not None:
195 nfa_set
= arcs
.setdefault(nfa_arc
.nonterminal_or_string
, set())
196 addclosure(nfa_arc
.next
, nfa_set
)
198 # Now create the dfa's with no None's in arcs anymore. All Nones have
199 # been eliminated and state transitions (arcs) are properly defined, we
200 # just need to create the dfa's.
201 for nonterminal_or_string
, nfa_set
in arcs
.items():
202 for nested_state
in states
:
203 if nested_state
.nfa_set
== nfa_set
:
204 # The DFA state already exists for this rule.
207 nested_state
= DFAState(start
.from_rule
, nfa_set
, finish
)
208 states
.append(nested_state
)
210 state
.add_arc(nested_state
, nonterminal_or_string
)
211 return states
# List of DFAState instances; first one is start
214 def _dump_nfa(start
, finish
):
215 print("Dump of NFA for", start
.from_rule
)
217 for i
, state
in enumerate(todo
):
218 print(" State", i
, state
is finish
and "(final)" or "")
219 for arc
in state
.arcs
:
220 label
, next_
= arc
.nonterminal_or_string
, arc
.next
222 j
= todo
.index(next_
)
229 print(" %s -> %d" % (label
, j
))
232 def _dump_dfas(dfas
):
233 print("Dump of DFA for", dfas
[0].from_rule
)
234 for i
, state
in enumerate(dfas
):
235 print(" State", i
, state
.is_final
and "(final)" or "")
236 for nonterminal
, next_
in state
.arcs
.items():
237 print(" %s -> %d" % (nonterminal
, dfas
.index(next_
)))
240 def generate_grammar(bnf_grammar
: str, token_namespace
) -> Grammar
:
242 ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
243 at-least-once repetition, [] for optional parts, | for alternatives and ()
246 It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
250 start_nonterminal
= None
251 for nfa_a
, nfa_z
in GrammarParser(bnf_grammar
).parse():
252 # _dump_nfa(nfa_a, nfa_z)
253 dfas
= _make_dfas(nfa_a
, nfa_z
)
258 rule_to_dfas
[nfa_a
.from_rule
] = dfas
259 # print(nfa_a.from_rule, oldlen, newlen)
261 if start_nonterminal
is None:
262 start_nonterminal
= nfa_a
.from_rule
264 reserved_strings
: Mapping
[str, ReservedString
] = {}
265 for nonterminal
, dfas
in rule_to_dfas
.items():
266 for dfa_state
in dfas
:
267 for terminal_or_nonterminal
, next_dfa
in dfa_state
.arcs
.items():
268 if terminal_or_nonterminal
in rule_to_dfas
:
269 dfa_state
.nonterminal_arcs
[terminal_or_nonterminal
] = next_dfa
271 transition
= _make_transition(
274 terminal_or_nonterminal
276 dfa_state
.transitions
[transition
] = DFAPlan(next_dfa
)
278 _calculate_tree_traversal(rule_to_dfas
)
279 return Grammar(start_nonterminal
, rule_to_dfas
, reserved_strings
) # type: ignore
282 def _make_transition(token_namespace
, reserved_syntax_strings
, label
):
284 Creates a reserved string ("if", "for", "*", ...) or returns the token type
285 (NUMBER, STRING, ...) for a given grammar terminal.
287 if label
[0].isalpha():
288 # A named token (e.g. NAME, NUMBER, STRING)
289 return getattr(token_namespace
, label
)
291 # Either a keyword or an operator
292 assert label
[0] in ('"', "'"), label
293 assert not label
.startswith('"""') and not label
.startswith("'''")
294 value
= literal_eval(label
)
296 return reserved_syntax_strings
[value
]
298 r
= reserved_syntax_strings
[value
] = ReservedString(value
)
302 def _calculate_tree_traversal(nonterminal_to_dfas
):
304 By this point we know how dfas can move around within a stack node, but we
305 don't know how we can add a new stack node (nonterminal transitions).
307 # Map from grammar rule (nonterminal) name to a set of tokens.
310 nonterminals
= list(nonterminal_to_dfas
.keys())
312 for nonterminal
in nonterminals
:
313 if nonterminal
not in first_plans
:
314 _calculate_first_plans(nonterminal_to_dfas
, first_plans
, nonterminal
)
316 # Now that we have calculated the first terminals, we are sure that
317 # there is no left recursion.
319 for dfas
in nonterminal_to_dfas
.values():
320 for dfa_state
in dfas
:
321 transitions
= dfa_state
.transitions
322 for nonterminal
, next_dfa
in dfa_state
.nonterminal_arcs
.items():
323 for transition
, pushes
in first_plans
[nonterminal
].items():
324 if transition
in transitions
:
325 prev_plan
= transitions
[transition
]
326 # Make sure these are sorted so that error messages are
327 # at least deterministic
330 prev_plan
.dfa_pushes
[0].from_rule
331 if prev_plan
.dfa_pushes
332 else prev_plan
.next_dfa
.from_rule
336 if pushes
else next_dfa
.from_rule
340 "Rule %s is ambiguous; given a %s token, we "
341 "can't determine if we should evaluate %s or %s."
349 transitions
[transition
] = DFAPlan(next_dfa
, pushes
)
352 def _calculate_first_plans(nonterminal_to_dfas
, first_plans
, nonterminal
):
354 Calculates the first plan in the first_plans dictionary for every given
355 nonterminal. This is going to be used to know when to create stack nodes.
357 dfas
= nonterminal_to_dfas
[nonterminal
]
359 first_plans
[nonterminal
] = None # dummy to detect left recursion
360 # We only need to check the first dfa. All the following ones are not
361 # interesting to find first terminals.
363 for transition
, next_
in state
.transitions
.items():
364 # It's a string. We have finally found a possible first token.
365 new_first_plans
[transition
] = [next_
.next_dfa
]
367 for nonterminal2
, next_
in state
.nonterminal_arcs
.items():
368 # It's a nonterminal and we have either a left recursion issue
369 # in the grammar or we have to recurse.
371 first_plans2
= first_plans
[nonterminal2
]
373 first_plans2
= _calculate_first_plans(nonterminal_to_dfas
, first_plans
, nonterminal2
)
375 if first_plans2
is None:
376 raise ValueError("left recursion for rule %r" % nonterminal
)
378 for t
, pushes
in first_plans2
.items():
379 new_first_plans
[t
] = [next_
] + pushes
381 first_plans
[nonterminal
] = new_first_plans
382 return new_first_plans