crepu.dev Git - config.git/blob - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/blib2to3/pgen2/parse.py

2 # Licensed to PSF under a Contributor Agreement.

4 """Parser engine for the grammar tables generated by pgen.

6 The grammar table must be loaded first.

8 See Parser/parser.c in the Python distribution for additional info on

9 how this parsing engine works.

11 """

12 from contextlib import contextmanager

13 from typing import (

14 TYPE_CHECKING,

15 Any,

16 Callable,

17 Dict,

18 Iterator,

19 List,

20 Optional,

21 Set,

22 Tuple,

23 Union,

24 cast,

25 )

27 from blib2to3.pgen2.grammar import Grammar

 from blib2to3.pytree import NL, Context, Leaf, Node, RawNode, convert

30 # Local imports

 from . import grammar, token, tokenize

33 if TYPE_CHECKING:

34 from blib2to3.pgen2.driver import TokenProxy

 Results = Dict[str, NL]

 Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]

 DFA = List[List[Tuple[int, int]]]

 DFAS = Tuple[DFA, Dict[int, int]]

 def lam_sub(grammar: Grammar, node: RawNode) -> NL:

     assert node[3] is not None

     return Node(type=node[0], children=node[3], context=node[2])

48 # A placeholder node, used when parser is backtracking.

 DUMMY_NODE = (-1, None, None, None)

52 def stack_copy(

     stack: List[Tuple[DFAS, int, RawNode]]

 ) -> List[Tuple[DFAS, int, RawNode]]:

55 """Nodeless stack copy."""

     return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]

59 class Recorder:

     def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:

61 self.parser = parser

62 self._ilabels = ilabels

63 self.context = context # not really matter

         self._dead_ilabels: Set[int] = set()

         self._start_point = self.parser.stack

         self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}

69 @property

     def ilabels(self) -> Set[int]:

         return self._dead_ilabels.symmetric_difference(self._ilabels)

73 @contextmanager

     def switch_to(self, ilabel: int) -> Iterator[None]:

75 with self.backtrack():

             self.parser.stack = self._points[ilabel]

77 try:

78 yield

79 except ParseError:

                 self._dead_ilabels.add(ilabel)

81 finally:

                 self.parser.stack = self._start_point

84 @contextmanager

     def backtrack(self) -> Iterator[None]:

86 """

87 Use the node-level invariant ones for basic parsing operations (push/pop/shift).

88 These still will operate on the stack; but they won't create any new nodes, or

89 modify the contents of any other existing nodes.

91 This saves us a ton of time when we are backtracking, since we

92 want to restore to the initial state as quick as possible, which

93 can only be done by having as little mutatations as possible.

94 """

95 is_backtracking = self.parser.is_backtracking

96 try:

97 self.parser.is_backtracking = True

98 yield

99 finally:

100 self.parser.is_backtracking = is_backtracking

101

     def add_token(self, tok_type: int, tok_val: str, raw: bool = False) -> None:

103 func: Callable[..., Any]

104 if raw:

105 func = self.parser._addtoken

106 else:

107 func = self.parser.addtoken

108

109 for ilabel in self.ilabels:

110 with self.switch_to(ilabel):

                 args = [tok_type, tok_val, self.context]

112 if raw:

                     args.insert(0, ilabel)

114 func(*args)

115

116 def determine_route(

         self, value: Optional[str] = None, force: bool = False

118 ) -> Optional[int]:

119 alive_ilabels = self.ilabels

         if len(alive_ilabels) == 0:

             *_, most_successful_ilabel = self._dead_ilabels

             raise ParseError("bad input", most_successful_ilabel, value, self.context)

123

124 ilabel, *rest = alive_ilabels

125 if force or not rest:

126 return ilabel

127 else:

128 return None

129

130

 class ParseError(Exception):

132 """Exception to signal the parser is stuck."""

133

134 def __init__(

         self, msg: str, type: Optional[int], value: Optional[str], context: Context

136 ) -> None:

137 Exception.__init__(

138 self, f"{msg}: type={type!r}, value={value!r}, context={context!r}"

139 )

140 self.msg = msg

141 self.type = type

142 self.value = value

143 self.context = context

144

145

146 class Parser:

147 """Parser engine.

148

149 The proper usage sequence is:

150

151 p = Parser(grammar, [converter]) # create instance

152 p.setup([start]) # prepare for parsing

153 <for each input token>:

154 if p.addtoken(...): # parse a token; may raise ParseError

155 break

156 root = p.rootnode # root of abstract syntax tree

157

158 A Parser instance may be reused by calling setup() repeatedly.

159

160 A Parser instance contains state pertaining to the current token

161 sequence, and should not be used concurrently by different threads

162 to parse separate token sequences.

163

164 See driver.py for how to get input tokens by tokenizing a file or

165 string.

166

167 Parsing is complete when addtoken() returns True; the root of the

168 abstract syntax tree can then be retrieved from the rootnode

169 instance variable. When a syntax error occurs, addtoken() raises

170 the ParseError exception. There is no error recovery; the parser

171 cannot be used after a syntax error was reported (but it can be

172 reinitialized by calling setup()).

173

174 """

175

     def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:

177 """Constructor.

178

179 The grammar argument is a grammar.Grammar instance; see the

180 grammar module for more information.

181

182 The parser is not ready yet for parsing; you must call the

183 setup() method to get it started.

184

185 The optional convert argument is a function mapping concrete

186 syntax tree nodes to abstract syntax tree nodes. If not

187 given, no conversion is done and the syntax tree produced is

188 the concrete syntax tree. If given, it must be a function of

189 two arguments, the first being the grammar (a grammar.Grammar

190 instance), and the second being the concrete syntax tree node

191 to be converted. The syntax tree is converted from the bottom

192 up.

193

194 **post-note: the convert argument is ignored since for Black's

195 usage, convert will always be blib2to3.pytree.convert. Allowing

196 this to be dynamic hurts mypyc's ability to use early binding.

197 These docs are left for historical and informational value.

198

199 A concrete syntax tree node is a (type, value, context, nodes)

200 tuple, where type is the node type (a token or symbol number),

201 value is None for symbols and a string for tokens, context is

202 None or an opaque value used for error reporting (typically a

203 (lineno, offset) pair), and nodes is a list of children for

204 symbols, and None for tokens.

205

206 An abstract syntax tree node may be anything; this is entirely

207 up to the converter function.

208

209 """

210 self.grammar = grammar

211 # See note in docstring above. TL;DR this is ignored.

212 self.convert = convert or lam_sub

213 self.is_backtracking = False

214

     def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:

216 """Prepare for parsing.

217

218 This *must* be called before starting to parse.

219

220 The optional argument is an alternative start symbol; it

221 defaults to the grammar's start symbol.

222

223 You can use a Parser instance to parse any number of programs;

224 each time you call setup() the parser is reset to an initial

225 state determined by the (implicit or explicit) start symbol.

226

227 """

228 if start is None:

229 start = self.grammar.start

230 # Each stack entry is a tuple: (dfa, state, node).

231 # A node is a tuple: (type, value, context, children),

232 # where children is a list of nodes or None, and context may be None.

         newnode: RawNode = (start, None, None, [])

         stackentry = (self.grammar.dfas[start], 0, newnode)

         self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]

         self.rootnode: Optional[NL] = None

         self.used_names: Set[str] = set()

238 self.proxy = proxy

239

     def addtoken(self, type: int, value: str, context: Context) -> bool:

241 """Add a token; return True iff this is the end of the program."""

242 # Map from token to label

         ilabels = self.classify(type, value, context)

         assert len(ilabels) >= 1

245

246 # If we have only one state to advance, we'll directly

247 # take it as is.

         if len(ilabels) == 1:

249 [ilabel] = ilabels

             return self._addtoken(ilabel, type, value, context)

251

252 # If there are multiple states which we can advance (only

253 # happen under soft-keywords), then we will try all of them

254 # in parallel and as soon as one state can reach further than

255 # the rest, we'll choose that one. This is a pretty hacky

256 # and hopefully temporary algorithm.

257 #

258 # For a more detailed explanation, check out this post:

259 # https://tree.science/what-the-backtracking.html

260

         with self.proxy.release() as proxy:

             counter, force = 0, False

             recorder = Recorder(self, ilabels, context)

             recorder.add_token(type, value, raw=True)

265

266 next_token_value = value

             while recorder.determine_route(next_token_value) is None:

                 if not proxy.can_advance(counter):

269 force = True

270 break

271

                 next_token_type, next_token_value, *_ = proxy.eat(counter)

                 if next_token_type in (tokenize.COMMENT, tokenize.NL):

274 counter += 1

275 continue

276

277 if next_token_type == tokenize.OP:

278 next_token_type = grammar.opmap[next_token_value]

279

                 recorder.add_token(next_token_type, next_token_value)

281 counter += 1

282

             ilabel = cast(int, recorder.determine_route(next_token_value, force=force))

284 assert ilabel is not None

285

         return self._addtoken(ilabel, type, value, context)

287

     def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> bool:

289 # Loop until the token is shifted; may raise exceptions

290 while True:

             dfa, state, node = self.stack[-1]

292 states, first = dfa

293 arcs = states[state]

294 # Look for a state with this label

295 for i, newstate in arcs:

                 t = self.grammar.labels[i][0]

297 if t >= 256:

298 # See if it's a symbol and if we're in its first set

                     itsdfa = self.grammar.dfas[t]

300 itsstates, itsfirst = itsdfa

301 if ilabel in itsfirst:

302 # Push a symbol

                         self.push(t, itsdfa, newstate, context)

304 break # To continue the outer while loop

305

306 elif ilabel == i:

307 # Look it up in the list of labels

308 # Shift a token; we're done with it

                     self.shift(type, value, newstate, context)

310 # Pop while we are in an accept-only state

311 state = newstate

                     while states[state] == [(0, state)]:

313 self.pop()

314 if not self.stack:

315 # Done parsing!

316 return True

                         dfa, state, node = self.stack[-1]

318 states, first = dfa

319 # Done with this token

320 return False

321

322 else:

                 if (0, state) in arcs:

324 # An accepting state, pop it and try something else

325 self.pop()

326 if not self.stack:

327 # Done parsing, but another token is input

                         raise ParseError("too much input", type, value, context)

329 else:

330 # No success finding a transition

                     raise ParseError("bad input", type, value, context)

332

     def classify(self, type: int, value: str, context: Context) -> List[int]:

334 """Turn a token into a label. (Internal)

335

336 Depending on whether the value is a soft-keyword or not,

337 this function may return multiple labels to choose from."""

         if type == token.NAME:

339 # Keep a listing of all used names

             self.used_names.add(value)

341 # Check for reserved words

             if value in self.grammar.keywords:

                 return [self.grammar.keywords[value]]

             elif value in self.grammar.soft_keywords:

                 assert type in self.grammar.tokens

346 return [

347 self.grammar.soft_keywords[value],

                     self.grammar.tokens[type],

349 ]

350

         ilabel = self.grammar.tokens.get(type)

352 if ilabel is None:

             raise ParseError("bad token", type, value, context)

354 return [ilabel]

355

     def shift(self, type: int, value: str, newstate: int, context: Context) -> None:

357 """Shift a token. (Internal)"""

358 if self.is_backtracking:

             dfa, state, _ = self.stack[-1]

             self.stack[-1] = (dfa, newstate, DUMMY_NODE)

361 else:

             dfa, state, node = self.stack[-1]

             rawnode: RawNode = (type, value, context, None)

             newnode = convert(self.grammar, rawnode)

             assert node[-1] is not None

             node[-1].append(newnode)

             self.stack[-1] = (dfa, newstate, node)

368

     def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:

370 """Push a nonterminal. (Internal)"""

371 if self.is_backtracking:

             dfa, state, _ = self.stack[-1]

             self.stack[-1] = (dfa, newstate, DUMMY_NODE)

             self.stack.append((newdfa, 0, DUMMY_NODE))

375 else:

             dfa, state, node = self.stack[-1]

             newnode: RawNode = (type, None, context, [])

             self.stack[-1] = (dfa, newstate, node)

             self.stack.append((newdfa, 0, newnode))

380

     def pop(self) -> None:

382 """Pop a nonterminal. (Internal)"""

383 if self.is_backtracking:

384 self.stack.pop()

385 else:

             popdfa, popstate, popnode = self.stack.pop()

             newnode = convert(self.grammar, popnode)

388 if self.stack:

                 dfa, state, node = self.stack[-1]

                 assert node[-1] is not None

                 node[-1].append(newnode)

392 else:

393 self.rootnode = newnode

394 self.rootnode.used_names = self.used_names