]>
crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/blib2to3/pgen2/pgen.py
1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
18 from blib2to3
.pgen2
import grammar
, token
, tokenize
19 from blib2to3
.pgen2
.tokenize
import GoodTokenInfo
21 Path
= Union
[str, "os.PathLike[str]"]
24 class PgenGrammar(grammar
.Grammar
):
28 class ParserGenerator
:
31 generator
: Iterator
[GoodTokenInfo
]
32 first
: Dict
[str, Optional
[Dict
[str, int]]]
34 def __init__(self
, filename
: Path
, stream
: Optional
[IO
[str]] = None) -> None:
37 stream
= open(filename
, encoding
="utf-8")
38 close_stream
= stream
.close
39 self
.filename
= filename
41 self
.generator
= tokenize
.generate_tokens(stream
.readline
)
42 self
.gettoken() # Initialize lookahead
43 self
.dfas
, self
.startsymbol
= self
.parse()
44 if close_stream
is not None:
46 self
.first
= {} # map from symbol name to set of tokens
49 def make_grammar(self
) -> PgenGrammar
:
51 names
= list(self
.dfas
.keys())
53 names
.remove(self
.startsymbol
)
54 names
.insert(0, self
.startsymbol
)
56 i
= 256 + len(c
.symbol2number
)
57 c
.symbol2number
[name
] = i
58 c
.number2symbol
[i
] = name
64 for label
, next
in sorted(state
.arcs
.items()):
65 arcs
.append((self
.make_label(c
, label
), dfa
.index(next
)))
67 arcs
.append((0, dfa
.index(state
)))
69 c
.states
.append(states
)
70 c
.dfas
[c
.symbol2number
[name
]] = (states
, self
.make_first(c
, name
))
71 c
.start
= c
.symbol2number
[self
.startsymbol
]
74 def make_first(self
, c
: PgenGrammar
, name
: str) -> Dict
[int, int]:
75 rawfirst
= self
.first
[name
]
76 assert rawfirst
is not None
78 for label
in sorted(rawfirst
):
79 ilabel
= self
.make_label(c
, label
)
80 ##assert ilabel not in first # XXX failed on <> ... !=
84 def make_label(self
, c
: PgenGrammar
, label
: str) -> int:
85 # XXX Maybe this should be a method on a subclass of converter?
86 ilabel
= len(c
.labels
)
87 if label
[0].isalpha():
88 # Either a symbol name or a named token
89 if label
in c
.symbol2number
:
90 # A symbol name (a non-terminal)
91 if label
in c
.symbol2label
:
92 return c
.symbol2label
[label
]
94 c
.labels
.append((c
.symbol2number
[label
], None))
95 c
.symbol2label
[label
] = ilabel
98 # A named token (NAME, NUMBER, STRING)
99 itoken
= getattr(token
, label
, None)
100 assert isinstance(itoken
, int), label
101 assert itoken
in token
.tok_name
, label
102 if itoken
in c
.tokens
:
103 return c
.tokens
[itoken
]
105 c
.labels
.append((itoken
, None))
106 c
.tokens
[itoken
] = ilabel
109 # Either a keyword or an operator
110 assert label
[0] in ('"', "'"), label
112 if value
[0].isalpha():
114 keywords
= c
.soft_keywords
116 keywords
= c
.keywords
119 if value
in keywords
:
120 return keywords
[value
]
122 c
.labels
.append((token
.NAME
, value
))
123 keywords
[value
] = ilabel
126 # An operator (any non-numeric token)
127 itoken
= grammar
.opmap
[value
] # Fails if unknown token
128 if itoken
in c
.tokens
:
129 return c
.tokens
[itoken
]
131 c
.labels
.append((itoken
, None))
132 c
.tokens
[itoken
] = ilabel
135 def addfirstsets(self
) -> None:
136 names
= list(self
.dfas
.keys())
139 if name
not in self
.first
:
141 # print name, self.first[name].keys()
143 def calcfirst(self
, name
: str) -> None:
144 dfa
= self
.dfas
[name
]
145 self
.first
[name
] = None # dummy to detect left recursion
147 totalset
: Dict
[str, int] = {}
149 for label
in state
.arcs
:
150 if label
in self
.dfas
:
151 if label
in self
.first
:
152 fset
= self
.first
[label
]
154 raise ValueError("recursion for rule %r" % name
)
156 self
.calcfirst(label
)
157 fset
= self
.first
[label
]
158 assert fset
is not None
159 totalset
.update(fset
)
160 overlapcheck
[label
] = fset
163 overlapcheck
[label
] = {label
: 1}
164 inverse
: Dict
[str, str] = {}
165 for label
, itsfirst
in overlapcheck
.items():
166 for symbol
in itsfirst
:
167 if symbol
in inverse
:
169 "rule %s is ambiguous; %s is in the first sets of %s as well"
170 " as %s" % (name
, symbol
, label
, inverse
[symbol
])
172 inverse
[symbol
] = label
173 self
.first
[name
] = totalset
175 def parse(self
) -> Tuple
[Dict
[str, List
["DFAState"]], str]:
177 startsymbol
: Optional
[str] = None
178 # MSTART: (NEWLINE | RULE)* ENDMARKER
179 while self
.type != token
.ENDMARKER
:
180 while self
.type == token
.NEWLINE
:
182 # RULE: NAME ':' RHS NEWLINE
183 name
= self
.expect(token
.NAME
)
184 self
.expect(token
.OP
, ":")
185 a
, z
= self
.parse_rhs()
186 self
.expect(token
.NEWLINE
)
187 # self.dump_nfa(name, a, z)
188 dfa
= self
.make_dfa(a
, z
)
189 # self.dump_dfa(name, dfa)
191 self
.simplify_dfa(dfa
)
194 # print name, oldlen, newlen
195 if startsymbol
is None:
197 assert startsymbol
is not None
198 return dfas
, startsymbol
200 def make_dfa(self
, start
: "NFAState", finish
: "NFAState") -> List
["DFAState"]:
201 # To turn an NFA into a DFA, we define the states of the DFA
202 # to correspond to *sets* of states of the NFA. Then do some
203 # state reduction. Let's represent sets as dicts with 1 for
205 assert isinstance(start
, NFAState
)
206 assert isinstance(finish
, NFAState
)
208 def closure(state
: NFAState
) -> Dict
[NFAState
, int]:
209 base
: Dict
[NFAState
, int] = {}
210 addclosure(state
, base
)
213 def addclosure(state
: NFAState
, base
: Dict
[NFAState
, int]) -> None:
214 assert isinstance(state
, NFAState
)
218 for label
, next
in state
.arcs
:
220 addclosure(next
, base
)
222 states
= [DFAState(closure(start
), finish
)]
223 for state
in states
: # NB states grows while we're iterating
224 arcs
: Dict
[str, Dict
[NFAState
, int]] = {}
225 for nfastate
in state
.nfaset
:
226 for label
, next
in nfastate
.arcs
:
227 if label
is not None:
228 addclosure(next
, arcs
.setdefault(label
, {}))
229 for label
, nfaset
in sorted(arcs
.items()):
231 if st
.nfaset
== nfaset
:
234 st
= DFAState(nfaset
, finish
)
236 state
.addarc(st
, label
)
237 return states
# List of DFAState instances; first one is start
239 def dump_nfa(self
, name
: str, start
: "NFAState", finish
: "NFAState") -> None:
240 print("Dump of NFA for", name
)
242 for i
, state
in enumerate(todo
):
243 print(" State", i
, state
is finish
and "(final)" or "")
244 for label
, next
in state
.arcs
:
253 print(" %s -> %d" % (label
, j
))
255 def dump_dfa(self
, name
: str, dfa
: Sequence
["DFAState"]) -> None:
256 print("Dump of DFA for", name
)
257 for i
, state
in enumerate(dfa
):
258 print(" State", i
, state
.isfinal
and "(final)" or "")
259 for label
, next
in sorted(state
.arcs
.items()):
260 print(" %s -> %d" % (label
, dfa
.index(next
)))
262 def simplify_dfa(self
, dfa
: List
["DFAState"]) -> None:
263 # This is not theoretically optimal, but works well enough.
264 # Algorithm: repeatedly look for two states that have the same
265 # set of arcs (same labels pointing to the same nodes) and
266 # unify them, until things stop changing.
268 # dfa is a list of DFAState instances
272 for i
, state_i
in enumerate(dfa
):
273 for j
in range(i
+ 1, len(dfa
)):
275 if state_i
== state_j
:
276 # print " unify", i, j
279 state
.unifystate(state_j
, state_i
)
283 def parse_rhs(self
) -> Tuple
["NFAState", "NFAState"]:
284 # RHS: ALT ('|' ALT)*
285 a
, z
= self
.parse_alt()
286 if self
.value
!= "|":
293 while self
.value
== "|":
295 a
, z
= self
.parse_alt()
300 def parse_alt(self
) -> Tuple
["NFAState", "NFAState"]:
302 a
, b
= self
.parse_item()
303 while self
.value
in ("(", "[") or self
.type in (token
.NAME
, token
.STRING
):
304 c
, d
= self
.parse_item()
309 def parse_item(self
) -> Tuple
["NFAState", "NFAState"]:
310 # ITEM: '[' RHS ']' | ATOM ['+' | '*']
311 if self
.value
== "[":
313 a
, z
= self
.parse_rhs()
314 self
.expect(token
.OP
, "]")
318 a
, z
= self
.parse_atom()
320 if value
not in ("+", "*"):
329 def parse_atom(self
) -> Tuple
["NFAState", "NFAState"]:
330 # ATOM: '(' RHS ')' | NAME | STRING
331 if self
.value
== "(":
333 a
, z
= self
.parse_rhs()
334 self
.expect(token
.OP
, ")")
336 elif self
.type in (token
.NAME
, token
.STRING
):
339 a
.addarc(z
, self
.value
)
344 "expected (...) or NAME or STRING, got %s/%s", self
.type, self
.value
348 def expect(self
, type: int, value
: Optional
[Any
] = None) -> str:
349 if self
.type != type or (value
is not None and self
.value
!= value
):
351 "expected %s/%s, got %s/%s", type, value
, self
.type, self
.value
357 def gettoken(self
) -> None:
358 tup
= next(self
.generator
)
359 while tup
[0] in (tokenize
.COMMENT
, tokenize
.NL
):
360 tup
= next(self
.generator
)
361 self
.type, self
.value
, self
.begin
, self
.end
, self
.line
= tup
362 # print token.tok_name[self.type], repr(self.value)
364 def raise_error(self
, msg
: str, *args
: Any
) -> NoReturn
:
369 msg
= " ".join([msg
] + list(map(str, args
)))
370 raise SyntaxError(msg
, (self
.filename
, self
.end
[0], self
.end
[1], self
.line
))
374 arcs
: List
[Tuple
[Optional
[str], "NFAState"]]
376 def __init__(self
) -> None:
377 self
.arcs
= [] # list of (label, NFAState) pairs
379 def addarc(self
, next
: "NFAState", label
: Optional
[str] = None) -> None:
380 assert label
is None or isinstance(label
, str)
381 assert isinstance(next
, NFAState
)
382 self
.arcs
.append((label
, next
))
386 nfaset
: Dict
[NFAState
, Any
]
388 arcs
: Dict
[str, "DFAState"]
390 def __init__(self
, nfaset
: Dict
[NFAState
, Any
], final
: NFAState
) -> None:
391 assert isinstance(nfaset
, dict)
392 assert isinstance(next(iter(nfaset
)), NFAState
)
393 assert isinstance(final
, NFAState
)
395 self
.isfinal
= final
in nfaset
396 self
.arcs
= {} # map from label to DFAState
398 def addarc(self
, next
: "DFAState", label
: str) -> None:
399 assert isinstance(label
, str)
400 assert label
not in self
.arcs
401 assert isinstance(next
, DFAState
)
402 self
.arcs
[label
] = next
404 def unifystate(self
, old
: "DFAState", new
: "DFAState") -> None:
405 for label
, next
in self
.arcs
.items():
407 self
.arcs
[label
] = new
409 def __eq__(self
, other
: Any
) -> bool:
410 # Equality test -- ignore the nfaset instance variable
411 assert isinstance(other
, DFAState
)
412 if self
.isfinal
!= other
.isfinal
:
414 # Can't just return self.arcs == other.arcs, because that
415 # would invoke this method recursively, with cycles...
416 if len(self
.arcs
) != len(other
.arcs
):
418 for label
, next
in self
.arcs
.items():
419 if next
is not other
.arcs
.get(label
):
423 __hash__
: Any
= None # For Py3 compatibility.
426 def generate_grammar(filename
: Path
= "Grammar.txt") -> PgenGrammar
:
427 p
= ParserGenerator(filename
)
428 return p
.make_grammar()