djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/pgen2/generator.py

   1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
   2 # Licensed to PSF under a Contributor Agreement.
   3
   4 # Modifications:
   5 # Copyright David Halter and Contributors
   6 # Modifications are dual-licensed: MIT and PSF.
   7
   8 """
   9 This module defines the data structures used to represent a grammar.
  10
  11 Specifying grammars in pgen is possible with this grammar::
  12
  13     grammar: (NEWLINE | rule)* ENDMARKER
  14     rule: NAME ':' rhs NEWLINE
  15     rhs: items ('|' items)*
  16     items: item+
  17     item: '[' rhs ']' | atom ['+' | '*']
  18     atom: '(' rhs ')' | NAME | STRING
  19
  20 This grammar is self-referencing.
  21
  22 This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
  23 Most of the code has been refactored to make it more Pythonic. Since this was a
  24 "copy" of the CPython Parser parser "pgen", there was some work needed to make
  25 it more readable. It should also be slightly faster than the original pgen2,
  26 because we made some optimizations.
  27 """
  28
  29 from ast import literal_eval
  30 from typing import TypeVar, Generic, Mapping, Sequence, Set, Union
  31
  32 from parso.pgen2.grammar_parser import GrammarParser, NFAState
  33
  34 _TokenTypeT = TypeVar("_TokenTypeT")
  35
  36
  37 class Grammar(Generic[_TokenTypeT]):
  38     """
  39     Once initialized, this class supplies the grammar tables for the
  40     parsing engine implemented by parse.py.  The parsing engine
  41     accesses the instance variables directly.
  42
  43     The only important part in this parsers are dfas and transitions between
  44     dfas.
  45     """
  46
  47     def __init__(self,
  48                  start_nonterminal: str,
  49                  rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']],
  50                  reserved_syntax_strings: Mapping[str, 'ReservedString']):
  51         self.nonterminal_to_dfas = rule_to_dfas
  52         self.reserved_syntax_strings = reserved_syntax_strings
  53         self.start_nonterminal = start_nonterminal
  54
  55
  56 class DFAPlan:
  57     """
  58     Plans are used for the parser to create stack nodes and do the proper
  59     DFA state transitions.
  60     """
  61     def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []):
  62         self.next_dfa = next_dfa
  63         self.dfa_pushes = dfa_pushes
  64
  65     def __repr__(self):
  66         return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
  67
  68
  69 class DFAState(Generic[_TokenTypeT]):
  70     """
  71     The DFAState object is the core class for pretty much anything. DFAState
  72     are the vertices of an ordered graph while arcs and transitions are the
  73     edges.
  74
  75     Arcs are the initial edges, where most DFAStates are not connected and
  76     transitions are then calculated to connect the DFA state machines that have
  77     different nonterminals.
  78     """
  79     def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState):
  80         assert isinstance(nfa_set, set)
  81         assert isinstance(next(iter(nfa_set)), NFAState)
  82         assert isinstance(final, NFAState)
  83         self.from_rule = from_rule
  84         self.nfa_set = nfa_set
  85         # map from terminals/nonterminals to DFAState
  86         self.arcs: Mapping[str, DFAState] = {}
  87         # In an intermediary step we set these nonterminal arcs (which has the
  88         # same structure as arcs). These don't contain terminals anymore.
  89         self.nonterminal_arcs: Mapping[str, DFAState] = {}
  90
  91         # Transitions are basically the only thing that  the parser is using
  92         # with is_final. Everyting else is purely here to create a parser.
  93         self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {}
  94         self.is_final = final in nfa_set
  95
  96     def add_arc(self, next_, label):
  97         assert isinstance(label, str)
  98         assert label not in self.arcs
  99         assert isinstance(next_, DFAState)
 100         self.arcs[label] = next_
 101
 102     def unifystate(self, old, new):
 103         for label, next_ in self.arcs.items():
 104             if next_ is old:
 105                 self.arcs[label] = new
 106
 107     def __eq__(self, other):
 108         # Equality test -- ignore the nfa_set instance variable
 109         assert isinstance(other, DFAState)
 110         if self.is_final != other.is_final:
 111             return False
 112         # Can't just return self.arcs == other.arcs, because that
 113         # would invoke this method recursively, with cycles...
 114         if len(self.arcs) != len(other.arcs):
 115             return False
 116         for label, next_ in self.arcs.items():
 117             if next_ is not other.arcs.get(label):
 118                 return False
 119         return True
 120
 121     def __repr__(self):
 122         return '<%s: %s is_final=%s>' % (
 123             self.__class__.__name__, self.from_rule, self.is_final
 124         )
 125
 126
 127 class ReservedString:
 128     """
 129     Most grammars will have certain keywords and operators that are mentioned
 130     in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
 131     This class basically is the former.
 132     """
 133
 134     def __init__(self, value: str):
 135         self.value = value
 136
 137     def __repr__(self):
 138         return '%s(%s)' % (self.__class__.__name__, self.value)
 139
 140
 141 def _simplify_dfas(dfas):
 142     """
 143     This is not theoretically optimal, but works well enough.
 144     Algorithm: repeatedly look for two states that have the same
 145     set of arcs (same labels pointing to the same nodes) and
 146     unify them, until things stop changing.
 147
 148     dfas is a list of DFAState instances
 149     """
 150     changes = True
 151     while changes:
 152         changes = False
 153         for i, state_i in enumerate(dfas):
 154             for j in range(i + 1, len(dfas)):
 155                 state_j = dfas[j]
 156                 if state_i == state_j:
 157                     del dfas[j]
 158                     for state in dfas:
 159                         state.unifystate(state_j, state_i)
 160                     changes = True
 161                     break
 162
 163
 164 def _make_dfas(start, finish):
 165     """
 166     Uses the powerset construction algorithm to create DFA states from sets of
 167     NFA states.
 168
 169     Also does state reduction if some states are not needed.
 170     """
 171     # To turn an NFA into a DFA, we define the states of the DFA
 172     # to correspond to *sets* of states of the NFA.  Then do some
 173     # state reduction.
 174     assert isinstance(start, NFAState)
 175     assert isinstance(finish, NFAState)
 176
 177     def addclosure(nfa_state, base_nfa_set):
 178         assert isinstance(nfa_state, NFAState)
 179         if nfa_state in base_nfa_set:
 180             return
 181         base_nfa_set.add(nfa_state)
 182         for nfa_arc in nfa_state.arcs:
 183             if nfa_arc.nonterminal_or_string is None:
 184                 addclosure(nfa_arc.next, base_nfa_set)
 185
 186     base_nfa_set = set()
 187     addclosure(start, base_nfa_set)
 188     states = [DFAState(start.from_rule, base_nfa_set, finish)]
 189     for state in states:  # NB states grows while we're iterating
 190         arcs = {}
 191         # Find state transitions and store them in arcs.
 192         for nfa_state in state.nfa_set:
 193             for nfa_arc in nfa_state.arcs:
 194                 if nfa_arc.nonterminal_or_string is not None:
 195                     nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
 196                     addclosure(nfa_arc.next, nfa_set)
 197
 198         # Now create the dfa's with no None's in arcs anymore. All Nones have
 199         # been eliminated and state transitions (arcs) are properly defined, we
 200         # just need to create the dfa's.
 201         for nonterminal_or_string, nfa_set in arcs.items():
 202             for nested_state in states:
 203                 if nested_state.nfa_set == nfa_set:
 204                     # The DFA state already exists for this rule.
 205                     break
 206             else:
 207                 nested_state = DFAState(start.from_rule, nfa_set, finish)
 208                 states.append(nested_state)
 209
 210             state.add_arc(nested_state, nonterminal_or_string)
 211     return states  # List of DFAState instances; first one is start
 212
 213
 214 def _dump_nfa(start, finish):
 215     print("Dump of NFA for", start.from_rule)
 216     todo = [start]
 217     for i, state in enumerate(todo):
 218         print("  State", i, state is finish and "(final)" or "")
 219         for arc in state.arcs:
 220             label, next_ = arc.nonterminal_or_string, arc.next
 221             if next_ in todo:
 222                 j = todo.index(next_)
 223             else:
 224                 j = len(todo)
 225                 todo.append(next_)
 226             if label is None:
 227                 print("    -> %d" % j)
 228             else:
 229                 print("    %s -> %d" % (label, j))
 230
 231
 232 def _dump_dfas(dfas):
 233     print("Dump of DFA for", dfas[0].from_rule)
 234     for i, state in enumerate(dfas):
 235         print("  State", i, state.is_final and "(final)" or "")
 236         for nonterminal, next_ in state.arcs.items():
 237             print("    %s -> %d" % (nonterminal, dfas.index(next_)))
 238
 239
 240 def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar:
 241     """
 242     ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
 243     at-least-once repetition, [] for optional parts, | for alternatives and ()
 244     for grouping).
 245
 246     It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
 247     own parser.
 248     """
 249     rule_to_dfas = {}
 250     start_nonterminal = None
 251     for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
 252         # _dump_nfa(nfa_a, nfa_z)
 253         dfas = _make_dfas(nfa_a, nfa_z)
 254         # _dump_dfas(dfas)
 255         # oldlen = len(dfas)
 256         _simplify_dfas(dfas)
 257         # newlen = len(dfas)
 258         rule_to_dfas[nfa_a.from_rule] = dfas
 259         # print(nfa_a.from_rule, oldlen, newlen)
 260
 261         if start_nonterminal is None:
 262             start_nonterminal = nfa_a.from_rule
 263
 264     reserved_strings: Mapping[str, ReservedString] = {}
 265     for nonterminal, dfas in rule_to_dfas.items():
 266         for dfa_state in dfas:
 267             for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
 268                 if terminal_or_nonterminal in rule_to_dfas:
 269                     dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
 270                 else:
 271                     transition = _make_transition(
 272                         token_namespace,
 273                         reserved_strings,
 274                         terminal_or_nonterminal
 275                     )
 276                     dfa_state.transitions[transition] = DFAPlan(next_dfa)
 277
 278     _calculate_tree_traversal(rule_to_dfas)
 279     return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)  # type: ignore
 280
 281
 282 def _make_transition(token_namespace, reserved_syntax_strings, label):
 283     """
 284     Creates a reserved string ("if", "for", "*", ...) or returns the token type
 285     (NUMBER, STRING, ...) for a given grammar terminal.
 286     """
 287     if label[0].isalpha():
 288         # A named token (e.g. NAME, NUMBER, STRING)
 289         return getattr(token_namespace, label)
 290     else:
 291         # Either a keyword or an operator
 292         assert label[0] in ('"', "'"), label
 293         assert not label.startswith('"""') and not label.startswith("'''")
 294         value = literal_eval(label)
 295         try:
 296             return reserved_syntax_strings[value]
 297         except KeyError:
 298             r = reserved_syntax_strings[value] = ReservedString(value)
 299             return r
 300
 301
 302 def _calculate_tree_traversal(nonterminal_to_dfas):
 303     """
 304     By this point we know how dfas can move around within a stack node, but we
 305     don't know how we can add a new stack node (nonterminal transitions).
 306     """
 307     # Map from grammar rule (nonterminal) name to a set of tokens.
 308     first_plans = {}
 309
 310     nonterminals = list(nonterminal_to_dfas.keys())
 311     nonterminals.sort()
 312     for nonterminal in nonterminals:
 313         if nonterminal not in first_plans:
 314             _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
 315
 316     # Now that we have calculated the first terminals, we are sure that
 317     # there is no left recursion.
 318
 319     for dfas in nonterminal_to_dfas.values():
 320         for dfa_state in dfas:
 321             transitions = dfa_state.transitions
 322             for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
 323                 for transition, pushes in first_plans[nonterminal].items():
 324                     if transition in transitions:
 325                         prev_plan = transitions[transition]
 326                         # Make sure these are sorted so that error messages are
 327                         # at least deterministic
 328                         choices = sorted([
 329                             (
 330                                 prev_plan.dfa_pushes[0].from_rule
 331                                 if prev_plan.dfa_pushes
 332                                 else prev_plan.next_dfa.from_rule
 333                             ),
 334                             (
 335                                 pushes[0].from_rule
 336                                 if pushes else next_dfa.from_rule
 337                             ),
 338                         ])
 339                         raise ValueError(
 340                             "Rule %s is ambiguous; given a %s token, we "
 341                             "can't determine if we should evaluate %s or %s."
 342                             % (
 343                                 (
 344                                     dfa_state.from_rule,
 345                                     transition,
 346                                 ) + tuple(choices)
 347                             )
 348                         )
 349                     transitions[transition] = DFAPlan(next_dfa, pushes)
 350
 351
 352 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
 353     """
 354     Calculates the first plan in the first_plans dictionary for every given
 355     nonterminal. This is going to be used to know when to create stack nodes.
 356     """
 357     dfas = nonterminal_to_dfas[nonterminal]
 358     new_first_plans = {}
 359     first_plans[nonterminal] = None  # dummy to detect left recursion
 360     # We only need to check the first dfa. All the following ones are not
 361     # interesting to find first terminals.
 362     state = dfas[0]
 363     for transition, next_ in state.transitions.items():
 364         # It's a string. We have finally found a possible first token.
 365         new_first_plans[transition] = [next_.next_dfa]
 366
 367     for nonterminal2, next_ in state.nonterminal_arcs.items():
 368         # It's a nonterminal and we have either a left recursion issue
 369         # in the grammar or we have to recurse.
 370         try:
 371             first_plans2 = first_plans[nonterminal2]
 372         except KeyError:
 373             first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
 374         else:
 375             if first_plans2 is None:
 376                 raise ValueError("left recursion for rule %r" % nonterminal)
 377
 378         for t, pushes in first_plans2.items():
 379             new_first_plans[t] = [next_] + pushes
 380
 381     first_plans[nonterminal] = new_first_plans
 382     return new_first_plans