[config.git] / djavu-asus / elpy / rpc-venv / lib / python3.11 / site-packages / blib2to3 / pgen2 / grammar.py

# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.

"""This module defines the data structures used to represent a grammar.

These are a bit arcane because they are derived from the data
structures used by Python's 'pgen' parser generator.

There's also a table here mapping operators to their names in the
token module; the Python tokenize module reports all operators as the
fallback token code OP, but the parser needs the actual token code.

"""

# Python imports
import os
import pickle
import tempfile
from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union

# Local imports
from . import token

_P = TypeVar("_P", bound="Grammar")
Label = Tuple[int, Optional[str]]
DFA = List[List[Tuple[int, int]]]
DFAS = Tuple[DFA, Dict[int, int]]
Path = Union[str, "os.PathLike[str]"]


class Grammar:
    """Pgen parsing tables conversion class.

    Once initialized, this class supplies the grammar tables for the
    parsing engine implemented by parse.py.  The parsing engine
    accesses the instance variables directly.  The class here does not
    provide initialization of the tables; several subclasses exist to
    do this (see the conv and pgen modules).

    The load() method reads the tables from a pickle file, which is
    much faster than the other ways offered by subclasses.  The pickle
    file is written by calling dump() (after loading the grammar
    tables using a subclass).  The report() method prints a readable
    representation of the tables to stdout, for debugging.

    The instance variables are as follows:

    symbol2number -- a dict mapping symbol names to numbers.  Symbol
                     numbers are always 256 or higher, to distinguish
                     them from token numbers, which are between 0 and
                     255 (inclusive).

    number2symbol -- a dict mapping numbers to symbol names;
                     these two are each other's inverse.

    states        -- a list of DFAs, where each DFA is a list of
                     states, each state is a list of arcs, and each
                     arc is a (i, j) pair where i is a label and j is
                     a state number.  The DFA number is the index into
                     this list.  (This name is slightly confusing.)
                     Final states are represented by a special arc of
                     the form (0, j) where j is its own state number.

    dfas          -- a dict mapping symbol numbers to (DFA, first)
                     pairs, where DFA is an item from the states list
                     above, and first is a set of tokens that can
                     begin this grammar rule (represented by a dict
                     whose values are always 1).

    labels        -- a list of (x, y) pairs where x is either a token
                     number or a symbol number, and y is either None
                     or a string; the strings are keywords.  The label
                     number is the index in this list; label numbers
                     are used to mark state transitions (arcs) in the
                     DFAs.

    start         -- the number of the grammar's start symbol.

    keywords      -- a dict mapping keyword strings to arc labels.

    tokens        -- a dict mapping token numbers to arc labels.

    """

    def __init__(self) -> None:
        self.symbol2number: Dict[str, int] = {}
        self.number2symbol: Dict[int, str] = {}
        self.states: List[DFA] = []
        self.dfas: Dict[int, DFAS] = {}
        self.labels: List[Label] = [(0, "EMPTY")]
        self.keywords: Dict[str, int] = {}
        self.soft_keywords: Dict[str, int] = {}
        self.tokens: Dict[int, int] = {}
        self.symbol2label: Dict[str, int] = {}
        self.version: Tuple[int, int] = (0, 0)
        self.start = 256
        # Python 3.7+ parses async as a keyword, not an identifier
        self.async_keywords = False

    def dump(self, filename: Path) -> None:
        """Dump the grammar tables to a pickle file."""

        # mypyc generates objects that don't have a __dict__, but they
        # do have __getstate__ methods that will return an equivalent
        # dictionary
        if hasattr(self, "__dict__"):
            d = self.__dict__
        else:
            d = self.__getstate__()  # type: ignore

        with tempfile.NamedTemporaryFile(
            dir=os.path.dirname(filename), delete=False
        ) as f:
            pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
        os.replace(f.name, filename)

    def _update(self, attrs: Dict[str, Any]) -> None:
        for k, v in attrs.items():
            setattr(self, k, v)

    def load(self, filename: Path) -> None:
        """Load the grammar tables from a pickle file."""
        with open(filename, "rb") as f:
            d = pickle.load(f)
        self._update(d)

    def loads(self, pkl: bytes) -> None:
        """Load the grammar tables from a pickle bytes object."""
        self._update(pickle.loads(pkl))

    def copy(self: _P) -> _P:
        """
        Copy the grammar.
        """
        new = self.__class__()
        for dict_attr in (
            "symbol2number",
            "number2symbol",
            "dfas",
            "keywords",
            "soft_keywords",
            "tokens",
            "symbol2label",
        ):
            setattr(new, dict_attr, getattr(self, dict_attr).copy())
        new.labels = self.labels[:]
        new.states = self.states[:]
        new.start = self.start
        new.version = self.version
        new.async_keywords = self.async_keywords
        return new

    def report(self) -> None:
        """Dump the grammar tables to standard output, for debugging."""
        from pprint import pprint

        print("s2n")
        pprint(self.symbol2number)
        print("n2s")
        pprint(self.number2symbol)
        print("states")
        pprint(self.states)
        print("dfas")
        pprint(self.dfas)
        print("labels")
        pprint(self.labels)
        print("start", self.start)


# Map from operator to number (since tokenize doesn't do this)

opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
@= ATEQUAL
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
:= COLONEQUAL
"""

opmap = {}
for line in opmap_raw.splitlines():
    if line:
        op, name = line.split()
        opmap[op] = getattr(token, name)
Commit	Line	Data
53e6db90 DC	1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
	2	# Licensed to PSF under a Contributor Agreement.
	3
	4	"""This module defines the data structures used to represent a grammar.
	5
	6	These are a bit arcane because they are derived from the data
	7	structures used by Python's 'pgen' parser generator.
	8
	9	There's also a table here mapping operators to their names in the
	10	token module; the Python tokenize module reports all operators as the
	11	fallback token code OP, but the parser needs the actual token code.
	12
	13	"""
	14
	15	# Python imports
	16	import os
	17	import pickle
	18	import tempfile
	19	from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
	20
	21	# Local imports
	22	from . import token
	23
	24	_P = TypeVar("_P", bound="Grammar")
	25	Label = Tuple[int, Optional[str]]
	26	DFA = List[List[Tuple[int, int]]]
	27	DFAS = Tuple[DFA, Dict[int, int]]
	28	Path = Union[str, "os.PathLike[str]"]
	29
	30
	31	class Grammar:
	32	"""Pgen parsing tables conversion class.
	33
	34	Once initialized, this class supplies the grammar tables for the
	35	parsing engine implemented by parse.py. The parsing engine
	36	accesses the instance variables directly. The class here does not
	37	provide initialization of the tables; several subclasses exist to
	38	do this (see the conv and pgen modules).
	39
	40	The load() method reads the tables from a pickle file, which is
	41	much faster than the other ways offered by subclasses. The pickle
	42	file is written by calling dump() (after loading the grammar
	43	tables using a subclass). The report() method prints a readable
	44	representation of the tables to stdout, for debugging.
	45
	46	The instance variables are as follows:
	47
	48	symbol2number -- a dict mapping symbol names to numbers. Symbol
	49	numbers are always 256 or higher, to distinguish
	50	them from token numbers, which are between 0 and
	51	255 (inclusive).
	52
	53	number2symbol -- a dict mapping numbers to symbol names;
	54	these two are each other's inverse.
	55
	56	states -- a list of DFAs, where each DFA is a list of
	57	states, each state is a list of arcs, and each
	58	arc is a (i, j) pair where i is a label and j is
	59	a state number. The DFA number is the index into
	60	this list. (This name is slightly confusing.)
	61	Final states are represented by a special arc of
	62	the form (0, j) where j is its own state number.
	63
	64	dfas -- a dict mapping symbol numbers to (DFA, first)
65	pairs, where DFA is an item from the states list
66	above, and first is a set of tokens that can
67	begin this grammar rule (represented by a dict
68	whose values are always 1).
69
70	labels -- a list of (x, y) pairs where x is either a token
71	number or a symbol number, and y is either None
72	or a string; the strings are keywords. The label
73	number is the index in this list; label numbers
74	are used to mark state transitions (arcs) in the
75	DFAs.
76
77	start -- the number of the grammar's start symbol.
78
79	keywords -- a dict mapping keyword strings to arc labels.
80
81	tokens -- a dict mapping token numbers to arc labels.
82
83	"""
84
85	def __init__(self) -> None:
86	self.symbol2number: Dict[str, int] = {}
87	self.number2symbol: Dict[int, str] = {}
88	self.states: List[DFA] = []
89	self.dfas: Dict[int, DFAS] = {}
90	self.labels: List[Label] = [(0, "EMPTY")]
91	self.keywords: Dict[str, int] = {}
92	self.soft_keywords: Dict[str, int] = {}
93	self.tokens: Dict[int, int] = {}
94	self.symbol2label: Dict[str, int] = {}
95	self.version: Tuple[int, int] = (0, 0)
96	self.start = 256
97	# Python 3.7+ parses async as a keyword, not an identifier
98	self.async_keywords = False
99
100	def dump(self, filename: Path) -> None:
101	"""Dump the grammar tables to a pickle file."""
102
103	# mypyc generates objects that don't have a __dict__, but they
104	# do have __getstate__ methods that will return an equivalent
105	# dictionary
106	if hasattr(self, "__dict__"):
107	d = self.__dict__
108	else:
109	d = self.__getstate__() # type: ignore
110
111	with tempfile.NamedTemporaryFile(
112	dir=os.path.dirname(filename), delete=False
113	) as f:
114	pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
115	os.replace(f.name, filename)
116
117	def _update(self, attrs: Dict[str, Any]) -> None:
118	for k, v in attrs.items():
119	setattr(self, k, v)
120
121	def load(self, filename: Path) -> None:
122	"""Load the grammar tables from a pickle file."""
123	with open(filename, "rb") as f:
124	d = pickle.load(f)
125	self._update(d)
126
127	def loads(self, pkl: bytes) -> None:
128	"""Load the grammar tables from a pickle bytes object."""
129	self._update(pickle.loads(pkl))
130
131	def copy(self: _P) -> _P:
132	"""
133	Copy the grammar.
134	"""
135	new = self.__class__()
136	for dict_attr in (
137	"symbol2number",
138	"number2symbol",
139	"dfas",
140	"keywords",
141	"soft_keywords",
142	"tokens",
143	"symbol2label",
144	):
145	setattr(new, dict_attr, getattr(self, dict_attr).copy())
146	new.labels = self.labels[:]
147	new.states = self.states[:]
148	new.start = self.start
149	new.version = self.version
150	new.async_keywords = self.async_keywords
151	return new
152
153	def report(self) -> None:
154	"""Dump the grammar tables to standard output, for debugging."""
155	from pprint import pprint
156
157	print("s2n")
158	pprint(self.symbol2number)
159	print("n2s")
160	pprint(self.number2symbol)
161	print("states")
162	pprint(self.states)
163	print("dfas")
164	pprint(self.dfas)
165	print("labels")
166	pprint(self.labels)
167	print("start", self.start)
168
169
170	# Map from operator to number (since tokenize doesn't do this)
171
172	opmap_raw = """
173	( LPAR
174	) RPAR
175	[ LSQB
176	] RSQB
177	: COLON
178	, COMMA
179	; SEMI
180	+ PLUS
181	- MINUS
182	* STAR
183	/ SLASH
184	\| VBAR
185	& AMPER
186	< LESS
187	> GREATER
188	= EQUAL
189	. DOT
190	% PERCENT
191	` BACKQUOTE
192	{ LBRACE
193	} RBRACE
194	@ AT
195	@= ATEQUAL
196	== EQEQUAL
197	!= NOTEQUAL
198	<> NOTEQUAL
199	<= LESSEQUAL
200	>= GREATEREQUAL
201	~ TILDE
202	^ CIRCUMFLEX
203	<< LEFTSHIFT
204	>> RIGHTSHIFT
205	** DOUBLESTAR
206	+= PLUSEQUAL
207	-= MINEQUAL
208	*= STAREQUAL
209	/= SLASHEQUAL
210	%= PERCENTEQUAL
211	&= AMPEREQUAL
212	\|= VBAREQUAL
213	^= CIRCUMFLEXEQUAL
214	<<= LEFTSHIFTEQUAL
215	>>= RIGHTSHIFTEQUAL
216	**= DOUBLESTAREQUAL
217	// DOUBLESLASH
218	//= DOUBLESLASHEQUAL
219	-> RARROW
220	:= COLONEQUAL
221	"""
222
223	opmap = {}
224	for line in opmap_raw.splitlines():
225	if line:
226	op, name = line.split()
227	opmap[op] = getattr(token, name)