]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | import contextlib |
2 | import re | |
3 | from dataclasses import dataclass | |
4 | from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union | |
5 | ||
6 | from .specifiers import Specifier | |
7 | ||
8 | ||
9 | @dataclass | |
10 | class Token: | |
11 | name: str | |
12 | text: str | |
13 | position: int | |
14 | ||
15 | ||
16 | class ParserSyntaxError(Exception): | |
17 | """The provided source text could not be parsed correctly.""" | |
18 | ||
19 | def __init__( | |
20 | self, | |
21 | message: str, | |
22 | *, | |
23 | source: str, | |
24 | span: Tuple[int, int], | |
25 | ) -> None: | |
26 | self.span = span | |
27 | self.message = message | |
28 | self.source = source | |
29 | ||
30 | super().__init__() | |
31 | ||
32 | def __str__(self) -> str: | |
33 | marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" | |
34 | return "\n ".join([self.message, self.source, marker]) | |
35 | ||
36 | ||
37 | DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { | |
38 | "LEFT_PARENTHESIS": r"\(", | |
39 | "RIGHT_PARENTHESIS": r"\)", | |
40 | "LEFT_BRACKET": r"\[", | |
41 | "RIGHT_BRACKET": r"\]", | |
42 | "SEMICOLON": r";", | |
43 | "COMMA": r",", | |
44 | "QUOTED_STRING": re.compile( | |
45 | r""" | |
46 | ( | |
47 | ('[^']*') | |
48 | | | |
49 | ("[^"]*") | |
50 | ) | |
51 | """, | |
52 | re.VERBOSE, | |
53 | ), | |
54 | "OP": r"(===|==|~=|!=|<=|>=|<|>)", | |
55 | "BOOLOP": r"\b(or|and)\b", | |
56 | "IN": r"\bin\b", | |
57 | "NOT": r"\bnot\b", | |
58 | "VARIABLE": re.compile( | |
59 | r""" | |
60 | \b( | |
61 | python_version | |
62 | |python_full_version | |
63 | |os[._]name | |
64 | |sys[._]platform | |
65 | |platform_(release|system) | |
66 | |platform[._](version|machine|python_implementation) | |
67 | |python_implementation | |
68 | |implementation_(name|version) | |
69 | |extra | |
70 | )\b | |
71 | """, | |
72 | re.VERBOSE, | |
73 | ), | |
74 | "SPECIFIER": re.compile( | |
75 | Specifier._operator_regex_str + Specifier._version_regex_str, | |
76 | re.VERBOSE | re.IGNORECASE, | |
77 | ), | |
78 | "AT": r"\@", | |
79 | "URL": r"[^ \t]+", | |
80 | "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", | |
81 | "VERSION_PREFIX_TRAIL": r"\.\*", | |
82 | "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", | |
83 | "WS": r"[ \t]+", | |
84 | "END": r"$", | |
85 | } | |
86 | ||
87 | ||
88 | class Tokenizer: | |
89 | """Context-sensitive token parsing. | |
90 | ||
91 | Provides methods to examine the input stream to check whether the next token | |
92 | matches. | |
93 | """ | |
94 | ||
95 | def __init__( | |
96 | self, | |
97 | source: str, | |
98 | *, | |
99 | rules: "Dict[str, Union[str, re.Pattern[str]]]", | |
100 | ) -> None: | |
101 | self.source = source | |
102 | self.rules: Dict[str, re.Pattern[str]] = { | |
103 | name: re.compile(pattern) for name, pattern in rules.items() | |
104 | } | |
105 | self.next_token: Optional[Token] = None | |
106 | self.position = 0 | |
107 | ||
108 | def consume(self, name: str) -> None: | |
109 | """Move beyond provided token name, if at current position.""" | |
110 | if self.check(name): | |
111 | self.read() | |
112 | ||
113 | def check(self, name: str, *, peek: bool = False) -> bool: | |
114 | """Check whether the next token has the provided name. | |
115 | ||
116 | By default, if the check succeeds, the token *must* be read before | |
117 | another check. If `peek` is set to `True`, the token is not loaded and | |
118 | would need to be checked again. | |
119 | """ | |
120 | assert ( | |
121 | self.next_token is None | |
122 | ), f"Cannot check for {name!r}, already have {self.next_token!r}" | |
123 | assert name in self.rules, f"Unknown token name: {name!r}" | |
124 | ||
125 | expression = self.rules[name] | |
126 | ||
127 | match = expression.match(self.source, self.position) | |
128 | if match is None: | |
129 | return False | |
130 | if not peek: | |
131 | self.next_token = Token(name, match[0], self.position) | |
132 | return True | |
133 | ||
134 | def expect(self, name: str, *, expected: str) -> Token: | |
135 | """Expect a certain token name next, failing with a syntax error otherwise. | |
136 | ||
137 | The token is *not* read. | |
138 | """ | |
139 | if not self.check(name): | |
140 | raise self.raise_syntax_error(f"Expected {expected}") | |
141 | return self.read() | |
142 | ||
143 | def read(self) -> Token: | |
144 | """Consume the next token and return it.""" | |
145 | token = self.next_token | |
146 | assert token is not None | |
147 | ||
148 | self.position += len(token.text) | |
149 | self.next_token = None | |
150 | ||
151 | return token | |
152 | ||
153 | def raise_syntax_error( | |
154 | self, | |
155 | message: str, | |
156 | *, | |
157 | span_start: Optional[int] = None, | |
158 | span_end: Optional[int] = None, | |
159 | ) -> NoReturn: | |
160 | """Raise ParserSyntaxError at the given position.""" | |
161 | span = ( | |
162 | self.position if span_start is None else span_start, | |
163 | self.position if span_end is None else span_end, | |
164 | ) | |
165 | raise ParserSyntaxError( | |
166 | message, | |
167 | source=self.source, | |
168 | span=span, | |
169 | ) | |
170 | ||
171 | @contextlib.contextmanager | |
172 | def enclosing_tokens( | |
173 | self, open_token: str, close_token: str, *, around: str | |
174 | ) -> Iterator[None]: | |
175 | if self.check(open_token): | |
176 | open_position = self.position | |
177 | self.read() | |
178 | else: | |
179 | open_position = None | |
180 | ||
181 | yield | |
182 | ||
183 | if open_position is None: | |
184 | return | |
185 | ||
186 | if not self.check(close_token): | |
187 | self.raise_syntax_error( | |
188 | f"Expected matching {close_token} for {open_token}, after {around}", | |
189 | span_start=open_position, | |
190 | ) | |
191 | ||
192 | self.read() |