]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """Handwritten parser of dependency specifiers. |
2 | ||
3 | The docstring for each __parse_* function contains ENBF-inspired grammar representing | |
4 | the implementation. | |
5 | """ | |
6 | ||
7 | import ast | |
8 | from typing import Any, List, NamedTuple, Optional, Tuple, Union | |
9 | ||
10 | from ._tokenizer import DEFAULT_RULES, Tokenizer | |
11 | ||
12 | ||
13 | class Node: | |
14 | def __init__(self, value: str) -> None: | |
15 | self.value = value | |
16 | ||
17 | def __str__(self) -> str: | |
18 | return self.value | |
19 | ||
20 | def __repr__(self) -> str: | |
21 | return f"<{self.__class__.__name__}('{self}')>" | |
22 | ||
23 | def serialize(self) -> str: | |
24 | raise NotImplementedError | |
25 | ||
26 | ||
27 | class Variable(Node): | |
28 | def serialize(self) -> str: | |
29 | return str(self) | |
30 | ||
31 | ||
32 | class Value(Node): | |
33 | def serialize(self) -> str: | |
34 | return f'"{self}"' | |
35 | ||
36 | ||
37 | class Op(Node): | |
38 | def serialize(self) -> str: | |
39 | return str(self) | |
40 | ||
41 | ||
42 | MarkerVar = Union[Variable, Value] | |
43 | MarkerItem = Tuple[MarkerVar, Op, MarkerVar] | |
44 | # MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] | |
45 | # MarkerList = List[Union["MarkerList", MarkerAtom, str]] | |
46 | # mypy does not support recursive type definition | |
47 | # https://github.com/python/mypy/issues/731 | |
48 | MarkerAtom = Any | |
49 | MarkerList = List[Any] | |
50 | ||
51 | ||
52 | class ParsedRequirement(NamedTuple): | |
53 | name: str | |
54 | url: str | |
55 | extras: List[str] | |
56 | specifier: str | |
57 | marker: Optional[MarkerList] | |
58 | ||
59 | ||
60 | # -------------------------------------------------------------------------------------- | |
61 | # Recursive descent parser for dependency specifier | |
62 | # -------------------------------------------------------------------------------------- | |
63 | def parse_requirement(source: str) -> ParsedRequirement: | |
64 | return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) | |
65 | ||
66 | ||
67 | def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: | |
68 | """ | |
69 | requirement = WS? IDENTIFIER WS? extras WS? requirement_details | |
70 | """ | |
71 | tokenizer.consume("WS") | |
72 | ||
73 | name_token = tokenizer.expect( | |
74 | "IDENTIFIER", expected="package name at the start of dependency specifier" | |
75 | ) | |
76 | name = name_token.text | |
77 | tokenizer.consume("WS") | |
78 | ||
79 | extras = _parse_extras(tokenizer) | |
80 | tokenizer.consume("WS") | |
81 | ||
82 | url, specifier, marker = _parse_requirement_details(tokenizer) | |
83 | tokenizer.expect("END", expected="end of dependency specifier") | |
84 | ||
85 | return ParsedRequirement(name, url, extras, specifier, marker) | |
86 | ||
87 | ||
88 | def _parse_requirement_details( | |
89 | tokenizer: Tokenizer, | |
90 | ) -> Tuple[str, str, Optional[MarkerList]]: | |
91 | """ | |
92 | requirement_details = AT URL (WS requirement_marker?)? | |
93 | | specifier WS? (requirement_marker)? | |
94 | """ | |
95 | ||
96 | specifier = "" | |
97 | url = "" | |
98 | marker = None | |
99 | ||
100 | if tokenizer.check("AT"): | |
101 | tokenizer.read() | |
102 | tokenizer.consume("WS") | |
103 | ||
104 | url_start = tokenizer.position | |
105 | url = tokenizer.expect("URL", expected="URL after @").text | |
106 | if tokenizer.check("END", peek=True): | |
107 | return (url, specifier, marker) | |
108 | ||
109 | tokenizer.expect("WS", expected="whitespace after URL") | |
110 | ||
111 | # The input might end after whitespace. | |
112 | if tokenizer.check("END", peek=True): | |
113 | return (url, specifier, marker) | |
114 | ||
115 | marker = _parse_requirement_marker( | |
116 | tokenizer, span_start=url_start, after="URL and whitespace" | |
117 | ) | |
118 | else: | |
119 | specifier_start = tokenizer.position | |
120 | specifier = _parse_specifier(tokenizer) | |
121 | tokenizer.consume("WS") | |
122 | ||
123 | if tokenizer.check("END", peek=True): | |
124 | return (url, specifier, marker) | |
125 | ||
126 | marker = _parse_requirement_marker( | |
127 | tokenizer, | |
128 | span_start=specifier_start, | |
129 | after=( | |
130 | "version specifier" | |
131 | if specifier | |
132 | else "name and no valid version specifier" | |
133 | ), | |
134 | ) | |
135 | ||
136 | return (url, specifier, marker) | |
137 | ||
138 | ||
139 | def _parse_requirement_marker( | |
140 | tokenizer: Tokenizer, *, span_start: int, after: str | |
141 | ) -> MarkerList: | |
142 | """ | |
143 | requirement_marker = SEMICOLON marker WS? | |
144 | """ | |
145 | ||
146 | if not tokenizer.check("SEMICOLON"): | |
147 | tokenizer.raise_syntax_error( | |
148 | f"Expected end or semicolon (after {after})", | |
149 | span_start=span_start, | |
150 | ) | |
151 | tokenizer.read() | |
152 | ||
153 | marker = _parse_marker(tokenizer) | |
154 | tokenizer.consume("WS") | |
155 | ||
156 | return marker | |
157 | ||
158 | ||
159 | def _parse_extras(tokenizer: Tokenizer) -> List[str]: | |
160 | """ | |
161 | extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? | |
162 | """ | |
163 | if not tokenizer.check("LEFT_BRACKET", peek=True): | |
164 | return [] | |
165 | ||
166 | with tokenizer.enclosing_tokens( | |
167 | "LEFT_BRACKET", | |
168 | "RIGHT_BRACKET", | |
169 | around="extras", | |
170 | ): | |
171 | tokenizer.consume("WS") | |
172 | extras = _parse_extras_list(tokenizer) | |
173 | tokenizer.consume("WS") | |
174 | ||
175 | return extras | |
176 | ||
177 | ||
178 | def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: | |
179 | """ | |
180 | extras_list = identifier (wsp* ',' wsp* identifier)* | |
181 | """ | |
182 | extras: List[str] = [] | |
183 | ||
184 | if not tokenizer.check("IDENTIFIER"): | |
185 | return extras | |
186 | ||
187 | extras.append(tokenizer.read().text) | |
188 | ||
189 | while True: | |
190 | tokenizer.consume("WS") | |
191 | if tokenizer.check("IDENTIFIER", peek=True): | |
192 | tokenizer.raise_syntax_error("Expected comma between extra names") | |
193 | elif not tokenizer.check("COMMA"): | |
194 | break | |
195 | ||
196 | tokenizer.read() | |
197 | tokenizer.consume("WS") | |
198 | ||
199 | extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") | |
200 | extras.append(extra_token.text) | |
201 | ||
202 | return extras | |
203 | ||
204 | ||
205 | def _parse_specifier(tokenizer: Tokenizer) -> str: | |
206 | """ | |
207 | specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS | |
208 | | WS? version_many WS? | |
209 | """ | |
210 | with tokenizer.enclosing_tokens( | |
211 | "LEFT_PARENTHESIS", | |
212 | "RIGHT_PARENTHESIS", | |
213 | around="version specifier", | |
214 | ): | |
215 | tokenizer.consume("WS") | |
216 | parsed_specifiers = _parse_version_many(tokenizer) | |
217 | tokenizer.consume("WS") | |
218 | ||
219 | return parsed_specifiers | |
220 | ||
221 | ||
222 | def _parse_version_many(tokenizer: Tokenizer) -> str: | |
223 | """ | |
224 | version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? | |
225 | """ | |
226 | parsed_specifiers = "" | |
227 | while tokenizer.check("SPECIFIER"): | |
228 | span_start = tokenizer.position | |
229 | parsed_specifiers += tokenizer.read().text | |
230 | if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): | |
231 | tokenizer.raise_syntax_error( | |
232 | ".* suffix can only be used with `==` or `!=` operators", | |
233 | span_start=span_start, | |
234 | span_end=tokenizer.position + 1, | |
235 | ) | |
236 | if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): | |
237 | tokenizer.raise_syntax_error( | |
238 | "Local version label can only be used with `==` or `!=` operators", | |
239 | span_start=span_start, | |
240 | span_end=tokenizer.position, | |
241 | ) | |
242 | tokenizer.consume("WS") | |
243 | if not tokenizer.check("COMMA"): | |
244 | break | |
245 | parsed_specifiers += tokenizer.read().text | |
246 | tokenizer.consume("WS") | |
247 | ||
248 | return parsed_specifiers | |
249 | ||
250 | ||
251 | # -------------------------------------------------------------------------------------- | |
252 | # Recursive descent parser for marker expression | |
253 | # -------------------------------------------------------------------------------------- | |
254 | def parse_marker(source: str) -> MarkerList: | |
255 | return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) | |
256 | ||
257 | ||
258 | def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: | |
259 | retval = _parse_marker(tokenizer) | |
260 | tokenizer.expect("END", expected="end of marker expression") | |
261 | return retval | |
262 | ||
263 | ||
264 | def _parse_marker(tokenizer: Tokenizer) -> MarkerList: | |
265 | """ | |
266 | marker = marker_atom (BOOLOP marker_atom)+ | |
267 | """ | |
268 | expression = [_parse_marker_atom(tokenizer)] | |
269 | while tokenizer.check("BOOLOP"): | |
270 | token = tokenizer.read() | |
271 | expr_right = _parse_marker_atom(tokenizer) | |
272 | expression.extend((token.text, expr_right)) | |
273 | return expression | |
274 | ||
275 | ||
276 | def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: | |
277 | """ | |
278 | marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? | |
279 | | WS? marker_item WS? | |
280 | """ | |
281 | ||
282 | tokenizer.consume("WS") | |
283 | if tokenizer.check("LEFT_PARENTHESIS", peek=True): | |
284 | with tokenizer.enclosing_tokens( | |
285 | "LEFT_PARENTHESIS", | |
286 | "RIGHT_PARENTHESIS", | |
287 | around="marker expression", | |
288 | ): | |
289 | tokenizer.consume("WS") | |
290 | marker: MarkerAtom = _parse_marker(tokenizer) | |
291 | tokenizer.consume("WS") | |
292 | else: | |
293 | marker = _parse_marker_item(tokenizer) | |
294 | tokenizer.consume("WS") | |
295 | return marker | |
296 | ||
297 | ||
298 | def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: | |
299 | """ | |
300 | marker_item = WS? marker_var WS? marker_op WS? marker_var WS? | |
301 | """ | |
302 | tokenizer.consume("WS") | |
303 | marker_var_left = _parse_marker_var(tokenizer) | |
304 | tokenizer.consume("WS") | |
305 | marker_op = _parse_marker_op(tokenizer) | |
306 | tokenizer.consume("WS") | |
307 | marker_var_right = _parse_marker_var(tokenizer) | |
308 | tokenizer.consume("WS") | |
309 | return (marker_var_left, marker_op, marker_var_right) | |
310 | ||
311 | ||
312 | def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: | |
313 | """ | |
314 | marker_var = VARIABLE | QUOTED_STRING | |
315 | """ | |
316 | if tokenizer.check("VARIABLE"): | |
317 | return process_env_var(tokenizer.read().text.replace(".", "_")) | |
318 | elif tokenizer.check("QUOTED_STRING"): | |
319 | return process_python_str(tokenizer.read().text) | |
320 | else: | |
321 | tokenizer.raise_syntax_error( | |
322 | message="Expected a marker variable or quoted string" | |
323 | ) | |
324 | ||
325 | ||
326 | def process_env_var(env_var: str) -> Variable: | |
327 | if ( | |
328 | env_var == "platform_python_implementation" | |
329 | or env_var == "python_implementation" | |
330 | ): | |
331 | return Variable("platform_python_implementation") | |
332 | else: | |
333 | return Variable(env_var) | |
334 | ||
335 | ||
336 | def process_python_str(python_str: str) -> Value: | |
337 | value = ast.literal_eval(python_str) | |
338 | return Value(str(value)) | |
339 | ||
340 | ||
341 | def _parse_marker_op(tokenizer: Tokenizer) -> Op: | |
342 | """ | |
343 | marker_op = IN | NOT IN | OP | |
344 | """ | |
345 | if tokenizer.check("IN"): | |
346 | tokenizer.read() | |
347 | return Op("in") | |
348 | elif tokenizer.check("NOT"): | |
349 | tokenizer.read() | |
350 | tokenizer.expect("WS", expected="whitespace after 'not'") | |
351 | tokenizer.expect("IN", expected="'in' after 'not'") | |
352 | return Op("not in") | |
353 | elif tokenizer.check("OP"): | |
354 | return Op(tokenizer.read().text) | |
355 | else: | |
356 | return tokenizer.raise_syntax_error( | |
357 | "Expected marker operator, one of " | |
358 | "<=, <, !=, ==, >=, >, ~=, ===, in, not in" | |
359 | ) |