]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """ |
2 | Generating lines of code. | |
3 | """ | |
4 | import sys | |
5 | from dataclasses import replace | |
6 | from enum import Enum, auto | |
7 | from functools import partial, wraps | |
8 | from typing import Collection, Iterator, List, Optional, Set, Union, cast | |
9 | ||
10 | from black.brackets import ( | |
11 | COMMA_PRIORITY, | |
12 | DOT_PRIORITY, | |
13 | get_leaves_inside_matching_brackets, | |
14 | max_delimiter_priority_in_atom, | |
15 | ) | |
16 | from black.comments import FMT_OFF, generate_comments, list_comments | |
17 | from black.lines import ( | |
18 | Line, | |
19 | RHSResult, | |
20 | append_leaves, | |
21 | can_be_split, | |
22 | can_omit_invisible_parens, | |
23 | is_line_short_enough, | |
24 | line_to_string, | |
25 | ) | |
26 | from black.mode import Feature, Mode, Preview | |
27 | from black.nodes import ( | |
28 | ASSIGNMENTS, | |
29 | BRACKETS, | |
30 | CLOSING_BRACKETS, | |
31 | OPENING_BRACKETS, | |
32 | RARROW, | |
33 | STANDALONE_COMMENT, | |
34 | STATEMENT, | |
35 | WHITESPACE, | |
36 | Visitor, | |
37 | ensure_visible, | |
38 | is_arith_like, | |
39 | is_async_stmt_or_funcdef, | |
40 | is_atom_with_invisible_parens, | |
41 | is_docstring, | |
42 | is_empty_tuple, | |
43 | is_lpar_token, | |
44 | is_multiline_string, | |
45 | is_name_token, | |
46 | is_one_sequence_between, | |
47 | is_one_tuple, | |
48 | is_rpar_token, | |
49 | is_stub_body, | |
50 | is_stub_suite, | |
51 | is_tuple_containing_walrus, | |
52 | is_type_ignore_comment_string, | |
53 | is_vararg, | |
54 | is_walrus_assignment, | |
55 | is_yield, | |
56 | syms, | |
57 | wrap_in_parentheses, | |
58 | ) | |
59 | from black.numerics import normalize_numeric_literal | |
60 | from black.strings import ( | |
61 | fix_docstring, | |
62 | get_string_prefix, | |
63 | normalize_string_prefix, | |
64 | normalize_string_quotes, | |
65 | normalize_unicode_escape_sequences, | |
66 | ) | |
67 | from black.trans import ( | |
68 | CannotTransform, | |
69 | StringMerger, | |
70 | StringParenStripper, | |
71 | StringParenWrapper, | |
72 | StringSplitter, | |
73 | Transformer, | |
74 | hug_power_op, | |
75 | ) | |
76 | from blib2to3.pgen2 import token | |
77 | from blib2to3.pytree import Leaf, Node | |
78 | ||
79 | # types | |
80 | LeafID = int | |
81 | LN = Union[Leaf, Node] | |
82 | ||
83 | ||
84 | class CannotSplit(CannotTransform): | |
85 | """A readable split that fits the allotted line length is impossible.""" | |
86 | ||
87 | ||
88 | # This isn't a dataclass because @dataclass + Generic breaks mypyc. | |
89 | # See also https://github.com/mypyc/mypyc/issues/827. | |
90 | class LineGenerator(Visitor[Line]): | |
91 | """Generates reformatted Line objects. Empty lines are not emitted. | |
92 | ||
93 | Note: destroys the tree it's visiting by mutating prefixes of its leaves | |
94 | in ways that will no longer stringify to valid Python code on the tree. | |
95 | """ | |
96 | ||
97 | def __init__(self, mode: Mode, features: Collection[Feature]) -> None: | |
98 | self.mode = mode | |
99 | self.features = features | |
100 | self.current_line: Line | |
101 | self.__post_init__() | |
102 | ||
103 | def line(self, indent: int = 0) -> Iterator[Line]: | |
104 | """Generate a line. | |
105 | ||
106 | If the line is empty, only emit if it makes sense. | |
107 | If the line is too long, split it first and then generate. | |
108 | ||
109 | If any lines were generated, set up a new current_line. | |
110 | """ | |
111 | if not self.current_line: | |
112 | self.current_line.depth += indent | |
113 | return # Line is empty, don't emit. Creating a new one unnecessary. | |
114 | ||
115 | if ( | |
116 | Preview.improved_async_statements_handling in self.mode | |
117 | and len(self.current_line.leaves) == 1 | |
118 | and is_async_stmt_or_funcdef(self.current_line.leaves[0]) | |
119 | ): | |
120 | # Special case for async def/for/with statements. `visit_async_stmt` | |
121 | # adds an `ASYNC` leaf then visits the child def/for/with statement | |
122 | # nodes. Line yields from those nodes shouldn't treat the former | |
123 | # `ASYNC` leaf as a complete line. | |
124 | return | |
125 | ||
126 | complete_line = self.current_line | |
127 | self.current_line = Line(mode=self.mode, depth=complete_line.depth + indent) | |
128 | yield complete_line | |
129 | ||
130 | def visit_default(self, node: LN) -> Iterator[Line]: | |
131 | """Default `visit_*()` implementation. Recurses to children of `node`.""" | |
132 | if isinstance(node, Leaf): | |
133 | any_open_brackets = self.current_line.bracket_tracker.any_open_brackets() | |
134 | for comment in generate_comments(node): | |
135 | if any_open_brackets: | |
136 | # any comment within brackets is subject to splitting | |
137 | self.current_line.append(comment) | |
138 | elif comment.type == token.COMMENT: | |
139 | # regular trailing comment | |
140 | self.current_line.append(comment) | |
141 | yield from self.line() | |
142 | ||
143 | else: | |
144 | # regular standalone comment | |
145 | yield from self.line() | |
146 | ||
147 | self.current_line.append(comment) | |
148 | yield from self.line() | |
149 | ||
150 | normalize_prefix(node, inside_brackets=any_open_brackets) | |
151 | if self.mode.string_normalization and node.type == token.STRING: | |
152 | node.value = normalize_string_prefix(node.value) | |
153 | node.value = normalize_string_quotes(node.value) | |
154 | if node.type == token.NUMBER: | |
155 | normalize_numeric_literal(node) | |
156 | if node.type not in WHITESPACE: | |
157 | self.current_line.append(node) | |
158 | yield from super().visit_default(node) | |
159 | ||
160 | def visit_test(self, node: Node) -> Iterator[Line]: | |
161 | """Visit an `x if y else z` test""" | |
162 | ||
163 | if Preview.parenthesize_conditional_expressions in self.mode: | |
164 | already_parenthesized = ( | |
165 | node.prev_sibling and node.prev_sibling.type == token.LPAR | |
166 | ) | |
167 | ||
168 | if not already_parenthesized: | |
169 | lpar = Leaf(token.LPAR, "") | |
170 | rpar = Leaf(token.RPAR, "") | |
171 | node.insert_child(0, lpar) | |
172 | node.append_child(rpar) | |
173 | ||
174 | yield from self.visit_default(node) | |
175 | ||
176 | def visit_INDENT(self, node: Leaf) -> Iterator[Line]: | |
177 | """Increase indentation level, maybe yield a line.""" | |
178 | # In blib2to3 INDENT never holds comments. | |
179 | yield from self.line(+1) | |
180 | yield from self.visit_default(node) | |
181 | ||
182 | def visit_DEDENT(self, node: Leaf) -> Iterator[Line]: | |
183 | """Decrease indentation level, maybe yield a line.""" | |
184 | # The current line might still wait for trailing comments. At DEDENT time | |
185 | # there won't be any (they would be prefixes on the preceding NEWLINE). | |
186 | # Emit the line then. | |
187 | yield from self.line() | |
188 | ||
189 | # While DEDENT has no value, its prefix may contain standalone comments | |
190 | # that belong to the current indentation level. Get 'em. | |
191 | yield from self.visit_default(node) | |
192 | ||
193 | # Finally, emit the dedent. | |
194 | yield from self.line(-1) | |
195 | ||
196 | def visit_stmt( | |
197 | self, node: Node, keywords: Set[str], parens: Set[str] | |
198 | ) -> Iterator[Line]: | |
199 | """Visit a statement. | |
200 | ||
201 | This implementation is shared for `if`, `while`, `for`, `try`, `except`, | |
202 | `def`, `with`, `class`, `assert`, and assignments. | |
203 | ||
204 | The relevant Python language `keywords` for a given statement will be | |
205 | NAME leaves within it. This methods puts those on a separate line. | |
206 | ||
207 | `parens` holds a set of string leaf values immediately after which | |
208 | invisible parens should be put. | |
209 | """ | |
210 | normalize_invisible_parens( | |
211 | node, parens_after=parens, mode=self.mode, features=self.features | |
212 | ) | |
213 | for child in node.children: | |
214 | if is_name_token(child) and child.value in keywords: | |
215 | yield from self.line() | |
216 | ||
217 | yield from self.visit(child) | |
218 | ||
219 | def visit_typeparams(self, node: Node) -> Iterator[Line]: | |
220 | yield from self.visit_default(node) | |
221 | node.children[0].prefix = "" | |
222 | ||
223 | def visit_typevartuple(self, node: Node) -> Iterator[Line]: | |
224 | yield from self.visit_default(node) | |
225 | node.children[1].prefix = "" | |
226 | ||
227 | def visit_paramspec(self, node: Node) -> Iterator[Line]: | |
228 | yield from self.visit_default(node) | |
229 | node.children[1].prefix = "" | |
230 | ||
231 | def visit_dictsetmaker(self, node: Node) -> Iterator[Line]: | |
232 | if Preview.wrap_long_dict_values_in_parens in self.mode: | |
233 | for i, child in enumerate(node.children): | |
234 | if i == 0: | |
235 | continue | |
236 | if node.children[i - 1].type == token.COLON: | |
237 | if child.type == syms.atom and child.children[0].type == token.LPAR: | |
238 | if maybe_make_parens_invisible_in_atom( | |
239 | child, | |
240 | parent=node, | |
241 | remove_brackets_around_comma=False, | |
242 | ): | |
243 | wrap_in_parentheses(node, child, visible=False) | |
244 | else: | |
245 | wrap_in_parentheses(node, child, visible=False) | |
246 | yield from self.visit_default(node) | |
247 | ||
248 | def visit_funcdef(self, node: Node) -> Iterator[Line]: | |
249 | """Visit function definition.""" | |
250 | yield from self.line() | |
251 | ||
252 | # Remove redundant brackets around return type annotation. | |
253 | is_return_annotation = False | |
254 | for child in node.children: | |
255 | if child.type == token.RARROW: | |
256 | is_return_annotation = True | |
257 | elif is_return_annotation: | |
258 | if child.type == syms.atom and child.children[0].type == token.LPAR: | |
259 | if maybe_make_parens_invisible_in_atom( | |
260 | child, | |
261 | parent=node, | |
262 | remove_brackets_around_comma=False, | |
263 | ): | |
264 | wrap_in_parentheses(node, child, visible=False) | |
265 | else: | |
266 | wrap_in_parentheses(node, child, visible=False) | |
267 | is_return_annotation = False | |
268 | ||
269 | for child in node.children: | |
270 | yield from self.visit(child) | |
271 | ||
272 | def visit_match_case(self, node: Node) -> Iterator[Line]: | |
273 | """Visit either a match or case statement.""" | |
274 | normalize_invisible_parens( | |
275 | node, parens_after=set(), mode=self.mode, features=self.features | |
276 | ) | |
277 | ||
278 | yield from self.line() | |
279 | for child in node.children: | |
280 | yield from self.visit(child) | |
281 | ||
282 | def visit_suite(self, node: Node) -> Iterator[Line]: | |
283 | """Visit a suite.""" | |
284 | if ( | |
285 | self.mode.is_pyi or Preview.dummy_implementations in self.mode | |
286 | ) and is_stub_suite(node): | |
287 | yield from self.visit(node.children[2]) | |
288 | else: | |
289 | yield from self.visit_default(node) | |
290 | ||
291 | def visit_simple_stmt(self, node: Node) -> Iterator[Line]: | |
292 | """Visit a statement without nested statements.""" | |
293 | prev_type: Optional[int] = None | |
294 | for child in node.children: | |
295 | if (prev_type is None or prev_type == token.SEMI) and is_arith_like(child): | |
296 | wrap_in_parentheses(node, child, visible=False) | |
297 | prev_type = child.type | |
298 | ||
299 | is_suite_like = node.parent and node.parent.type in STATEMENT | |
300 | if is_suite_like: | |
301 | if ( | |
302 | self.mode.is_pyi or Preview.dummy_implementations in self.mode | |
303 | ) and is_stub_body(node): | |
304 | yield from self.visit_default(node) | |
305 | else: | |
306 | yield from self.line(+1) | |
307 | yield from self.visit_default(node) | |
308 | yield from self.line(-1) | |
309 | ||
310 | else: | |
311 | if ( | |
312 | not (self.mode.is_pyi or Preview.dummy_implementations in self.mode) | |
313 | or not node.parent | |
314 | or not is_stub_suite(node.parent) | |
315 | ): | |
316 | yield from self.line() | |
317 | yield from self.visit_default(node) | |
318 | ||
319 | def visit_async_stmt(self, node: Node) -> Iterator[Line]: | |
320 | """Visit `async def`, `async for`, `async with`.""" | |
321 | yield from self.line() | |
322 | ||
323 | children = iter(node.children) | |
324 | for child in children: | |
325 | yield from self.visit(child) | |
326 | ||
327 | if child.type == token.ASYNC or child.type == STANDALONE_COMMENT: | |
328 | # STANDALONE_COMMENT happens when `# fmt: skip` is applied on the async | |
329 | # line. | |
330 | break | |
331 | ||
332 | internal_stmt = next(children) | |
333 | if Preview.improved_async_statements_handling in self.mode: | |
334 | yield from self.visit(internal_stmt) | |
335 | else: | |
336 | for child in internal_stmt.children: | |
337 | yield from self.visit(child) | |
338 | ||
339 | def visit_decorators(self, node: Node) -> Iterator[Line]: | |
340 | """Visit decorators.""" | |
341 | for child in node.children: | |
342 | yield from self.line() | |
343 | yield from self.visit(child) | |
344 | ||
345 | def visit_power(self, node: Node) -> Iterator[Line]: | |
346 | for idx, leaf in enumerate(node.children[:-1]): | |
347 | next_leaf = node.children[idx + 1] | |
348 | ||
349 | if not isinstance(leaf, Leaf): | |
350 | continue | |
351 | ||
352 | value = leaf.value.lower() | |
353 | if ( | |
354 | leaf.type == token.NUMBER | |
355 | and next_leaf.type == syms.trailer | |
356 | # Ensure that we are in an attribute trailer | |
357 | and next_leaf.children[0].type == token.DOT | |
358 | # It shouldn't wrap hexadecimal, binary and octal literals | |
359 | and not value.startswith(("0x", "0b", "0o")) | |
360 | # It shouldn't wrap complex literals | |
361 | and "j" not in value | |
362 | ): | |
363 | wrap_in_parentheses(node, leaf) | |
364 | ||
365 | remove_await_parens(node) | |
366 | ||
367 | yield from self.visit_default(node) | |
368 | ||
369 | def visit_SEMI(self, leaf: Leaf) -> Iterator[Line]: | |
370 | """Remove a semicolon and put the other statement on a separate line.""" | |
371 | yield from self.line() | |
372 | ||
373 | def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]: | |
374 | """End of file. Process outstanding comments and end with a newline.""" | |
375 | yield from self.visit_default(leaf) | |
376 | yield from self.line() | |
377 | ||
378 | def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]: | |
379 | if not self.current_line.bracket_tracker.any_open_brackets(): | |
380 | yield from self.line() | |
381 | yield from self.visit_default(leaf) | |
382 | ||
383 | def visit_factor(self, node: Node) -> Iterator[Line]: | |
384 | """Force parentheses between a unary op and a binary power: | |
385 | ||
386 | -2 ** 8 -> -(2 ** 8) | |
387 | """ | |
388 | _operator, operand = node.children | |
389 | if ( | |
390 | operand.type == syms.power | |
391 | and len(operand.children) == 3 | |
392 | and operand.children[1].type == token.DOUBLESTAR | |
393 | ): | |
394 | lpar = Leaf(token.LPAR, "(") | |
395 | rpar = Leaf(token.RPAR, ")") | |
396 | index = operand.remove() or 0 | |
397 | node.insert_child(index, Node(syms.atom, [lpar, operand, rpar])) | |
398 | yield from self.visit_default(node) | |
399 | ||
400 | def visit_STRING(self, leaf: Leaf) -> Iterator[Line]: | |
401 | if Preview.hex_codes_in_unicode_sequences in self.mode: | |
402 | normalize_unicode_escape_sequences(leaf) | |
403 | ||
404 | if is_docstring(leaf) and "\\\n" not in leaf.value: | |
405 | # We're ignoring docstrings with backslash newline escapes because changing | |
406 | # indentation of those changes the AST representation of the code. | |
407 | if self.mode.string_normalization: | |
408 | docstring = normalize_string_prefix(leaf.value) | |
409 | # visit_default() does handle string normalization for us, but | |
410 | # since this method acts differently depending on quote style (ex. | |
411 | # see padding logic below), there's a possibility for unstable | |
412 | # formatting as visit_default() is called *after*. To avoid a | |
413 | # situation where this function formats a docstring differently on | |
414 | # the second pass, normalize it early. | |
415 | docstring = normalize_string_quotes(docstring) | |
416 | else: | |
417 | docstring = leaf.value | |
418 | prefix = get_string_prefix(docstring) | |
419 | docstring = docstring[len(prefix) :] # Remove the prefix | |
420 | quote_char = docstring[0] | |
421 | # A natural way to remove the outer quotes is to do: | |
422 | # docstring = docstring.strip(quote_char) | |
423 | # but that breaks on """""x""" (which is '""x'). | |
424 | # So we actually need to remove the first character and the next two | |
425 | # characters but only if they are the same as the first. | |
426 | quote_len = 1 if docstring[1] != quote_char else 3 | |
427 | docstring = docstring[quote_len:-quote_len] | |
428 | docstring_started_empty = not docstring | |
429 | indent = " " * 4 * self.current_line.depth | |
430 | ||
431 | if is_multiline_string(leaf): | |
432 | docstring = fix_docstring(docstring, indent) | |
433 | else: | |
434 | docstring = docstring.strip() | |
435 | ||
436 | has_trailing_backslash = False | |
437 | if docstring: | |
438 | # Add some padding if the docstring starts / ends with a quote mark. | |
439 | if docstring[0] == quote_char: | |
440 | docstring = " " + docstring | |
441 | if docstring[-1] == quote_char: | |
442 | docstring += " " | |
443 | if docstring[-1] == "\\": | |
444 | backslash_count = len(docstring) - len(docstring.rstrip("\\")) | |
445 | if backslash_count % 2: | |
446 | # Odd number of tailing backslashes, add some padding to | |
447 | # avoid escaping the closing string quote. | |
448 | docstring += " " | |
449 | has_trailing_backslash = True | |
450 | elif not docstring_started_empty: | |
451 | docstring = " " | |
452 | ||
453 | # We could enforce triple quotes at this point. | |
454 | quote = quote_char * quote_len | |
455 | ||
456 | # It's invalid to put closing single-character quotes on a new line. | |
457 | if self.mode and quote_len == 3: | |
458 | # We need to find the length of the last line of the docstring | |
459 | # to find if we can add the closing quotes to the line without | |
460 | # exceeding the maximum line length. | |
461 | # If docstring is one line, we don't put the closing quotes on a | |
462 | # separate line because it looks ugly (#3320). | |
463 | lines = docstring.splitlines() | |
464 | last_line_length = len(lines[-1]) if docstring else 0 | |
465 | ||
466 | # If adding closing quotes would cause the last line to exceed | |
467 | # the maximum line length then put a line break before the | |
468 | # closing quotes | |
469 | if ( | |
470 | len(lines) > 1 | |
471 | and last_line_length + quote_len > self.mode.line_length | |
472 | and len(indent) + quote_len <= self.mode.line_length | |
473 | and not has_trailing_backslash | |
474 | ): | |
475 | leaf.value = prefix + quote + docstring + "\n" + indent + quote | |
476 | else: | |
477 | leaf.value = prefix + quote + docstring + quote | |
478 | else: | |
479 | leaf.value = prefix + quote + docstring + quote | |
480 | ||
481 | yield from self.visit_default(leaf) | |
482 | ||
483 | def __post_init__(self) -> None: | |
484 | """You are in a twisty little maze of passages.""" | |
485 | self.current_line = Line(mode=self.mode) | |
486 | ||
487 | v = self.visit_stmt | |
488 | Ć: Set[str] = set() | |
489 | self.visit_assert_stmt = partial(v, keywords={"assert"}, parens={"assert", ","}) | |
490 | self.visit_if_stmt = partial( | |
491 | v, keywords={"if", "else", "elif"}, parens={"if", "elif"} | |
492 | ) | |
493 | self.visit_while_stmt = partial(v, keywords={"while", "else"}, parens={"while"}) | |
494 | self.visit_for_stmt = partial(v, keywords={"for", "else"}, parens={"for", "in"}) | |
495 | self.visit_try_stmt = partial( | |
496 | v, keywords={"try", "except", "else", "finally"}, parens=Ć | |
497 | ) | |
498 | self.visit_except_clause = partial(v, keywords={"except"}, parens={"except"}) | |
499 | self.visit_with_stmt = partial(v, keywords={"with"}, parens={"with"}) | |
500 | self.visit_classdef = partial(v, keywords={"class"}, parens=Ć) | |
501 | self.visit_expr_stmt = partial(v, keywords=Ć, parens=ASSIGNMENTS) | |
502 | self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"}) | |
503 | self.visit_import_from = partial(v, keywords=Ć, parens={"import"}) | |
504 | self.visit_del_stmt = partial(v, keywords=Ć, parens={"del"}) | |
505 | self.visit_async_funcdef = self.visit_async_stmt | |
506 | self.visit_decorated = self.visit_decorators | |
507 | ||
508 | # PEP 634 | |
509 | self.visit_match_stmt = self.visit_match_case | |
510 | self.visit_case_block = self.visit_match_case | |
511 | ||
512 | ||
513 | def transform_line( | |
514 | line: Line, mode: Mode, features: Collection[Feature] = () | |
515 | ) -> Iterator[Line]: | |
516 | """Transform a `line`, potentially splitting it into many lines. | |
517 | ||
518 | They should fit in the allotted `line_length` but might not be able to. | |
519 | ||
520 | `features` are syntactical features that may be used in the output. | |
521 | """ | |
522 | if line.is_comment: | |
523 | yield line | |
524 | return | |
525 | ||
526 | line_str = line_to_string(line) | |
527 | ||
528 | ll = mode.line_length | |
529 | sn = mode.string_normalization | |
530 | string_merge = StringMerger(ll, sn) | |
531 | string_paren_strip = StringParenStripper(ll, sn) | |
532 | string_split = StringSplitter(ll, sn) | |
533 | string_paren_wrap = StringParenWrapper(ll, sn) | |
534 | ||
535 | transformers: List[Transformer] | |
536 | if ( | |
537 | not line.contains_uncollapsable_type_comments() | |
538 | and not line.should_split_rhs | |
539 | and not line.magic_trailing_comma | |
540 | and ( | |
541 | is_line_short_enough(line, mode=mode, line_str=line_str) | |
542 | or line.contains_unsplittable_type_ignore() | |
543 | ) | |
544 | and not (line.inside_brackets and line.contains_standalone_comments()) | |
545 | ): | |
546 | # Only apply basic string preprocessing, since lines shouldn't be split here. | |
547 | if Preview.string_processing in mode: | |
548 | transformers = [string_merge, string_paren_strip] | |
549 | else: | |
550 | transformers = [] | |
551 | elif line.is_def: | |
552 | transformers = [left_hand_split] | |
553 | else: | |
554 | ||
555 | def _rhs( | |
556 | self: object, line: Line, features: Collection[Feature], mode: Mode | |
557 | ) -> Iterator[Line]: | |
558 | """Wraps calls to `right_hand_split`. | |
559 | ||
560 | The calls increasingly `omit` right-hand trailers (bracket pairs with | |
561 | content), meaning the trailers get glued together to split on another | |
562 | bracket pair instead. | |
563 | """ | |
564 | for omit in generate_trailers_to_omit(line, mode.line_length): | |
565 | lines = list(right_hand_split(line, mode, features, omit=omit)) | |
566 | # Note: this check is only able to figure out if the first line of the | |
567 | # *current* transformation fits in the line length. This is true only | |
568 | # for simple cases. All others require running more transforms via | |
569 | # `transform_line()`. This check doesn't know if those would succeed. | |
570 | if is_line_short_enough(lines[0], mode=mode): | |
571 | yield from lines | |
572 | return | |
573 | ||
574 | # All splits failed, best effort split with no omits. | |
575 | # This mostly happens to multiline strings that are by definition | |
576 | # reported as not fitting a single line, as well as lines that contain | |
577 | # trailing commas (those have to be exploded). | |
578 | yield from right_hand_split(line, mode, features=features) | |
579 | ||
580 | # HACK: nested functions (like _rhs) compiled by mypyc don't retain their | |
581 | # __name__ attribute which is needed in `run_transformer` further down. | |
582 | # Unfortunately a nested class breaks mypyc too. So a class must be created | |
583 | # via type ... https://github.com/mypyc/mypyc/issues/884 | |
584 | rhs = type("rhs", (), {"__call__": _rhs})() | |
585 | ||
586 | if Preview.string_processing in mode: | |
587 | if line.inside_brackets: | |
588 | transformers = [ | |
589 | string_merge, | |
590 | string_paren_strip, | |
591 | string_split, | |
592 | delimiter_split, | |
593 | standalone_comment_split, | |
594 | string_paren_wrap, | |
595 | rhs, | |
596 | ] | |
597 | else: | |
598 | transformers = [ | |
599 | string_merge, | |
600 | string_paren_strip, | |
601 | string_split, | |
602 | string_paren_wrap, | |
603 | rhs, | |
604 | ] | |
605 | else: | |
606 | if line.inside_brackets: | |
607 | transformers = [delimiter_split, standalone_comment_split, rhs] | |
608 | else: | |
609 | transformers = [rhs] | |
610 | # It's always safe to attempt hugging of power operations and pretty much every line | |
611 | # could match. | |
612 | transformers.append(hug_power_op) | |
613 | ||
614 | for transform in transformers: | |
615 | # We are accumulating lines in `result` because we might want to abort | |
616 | # mission and return the original line in the end, or attempt a different | |
617 | # split altogether. | |
618 | try: | |
619 | result = run_transformer(line, transform, mode, features, line_str=line_str) | |
620 | except CannotTransform: | |
621 | continue | |
622 | else: | |
623 | yield from result | |
624 | break | |
625 | ||
626 | else: | |
627 | yield line | |
628 | ||
629 | ||
630 | class _BracketSplitComponent(Enum): | |
631 | head = auto() | |
632 | body = auto() | |
633 | tail = auto() | |
634 | ||
635 | ||
636 | def left_hand_split( | |
637 | line: Line, _features: Collection[Feature], mode: Mode | |
638 | ) -> Iterator[Line]: | |
639 | """Split line into many lines, starting with the first matching bracket pair. | |
640 | ||
641 | Note: this usually looks weird, only use this for function definitions. | |
642 | Prefer RHS otherwise. This is why this function is not symmetrical with | |
643 | :func:`right_hand_split` which also handles optional parentheses. | |
644 | """ | |
645 | tail_leaves: List[Leaf] = [] | |
646 | body_leaves: List[Leaf] = [] | |
647 | head_leaves: List[Leaf] = [] | |
648 | current_leaves = head_leaves | |
649 | matching_bracket: Optional[Leaf] = None | |
650 | for leaf in line.leaves: | |
651 | if ( | |
652 | current_leaves is body_leaves | |
653 | and leaf.type in CLOSING_BRACKETS | |
654 | and leaf.opening_bracket is matching_bracket | |
655 | and isinstance(matching_bracket, Leaf) | |
656 | ): | |
657 | ensure_visible(leaf) | |
658 | ensure_visible(matching_bracket) | |
659 | current_leaves = tail_leaves if body_leaves else head_leaves | |
660 | current_leaves.append(leaf) | |
661 | if current_leaves is head_leaves: | |
662 | if leaf.type in OPENING_BRACKETS: | |
663 | matching_bracket = leaf | |
664 | current_leaves = body_leaves | |
665 | if not matching_bracket: | |
666 | raise CannotSplit("No brackets found") | |
667 | ||
668 | head = bracket_split_build_line( | |
669 | head_leaves, line, matching_bracket, component=_BracketSplitComponent.head | |
670 | ) | |
671 | body = bracket_split_build_line( | |
672 | body_leaves, line, matching_bracket, component=_BracketSplitComponent.body | |
673 | ) | |
674 | tail = bracket_split_build_line( | |
675 | tail_leaves, line, matching_bracket, component=_BracketSplitComponent.tail | |
676 | ) | |
677 | bracket_split_succeeded_or_raise(head, body, tail) | |
678 | for result in (head, body, tail): | |
679 | if result: | |
680 | yield result | |
681 | ||
682 | ||
683 | def right_hand_split( | |
684 | line: Line, | |
685 | mode: Mode, | |
686 | features: Collection[Feature] = (), | |
687 | omit: Collection[LeafID] = (), | |
688 | ) -> Iterator[Line]: | |
689 | """Split line into many lines, starting with the last matching bracket pair. | |
690 | ||
691 | If the split was by optional parentheses, attempt splitting without them, too. | |
692 | `omit` is a collection of closing bracket IDs that shouldn't be considered for | |
693 | this split. | |
694 | ||
695 | Note: running this function modifies `bracket_depth` on the leaves of `line`. | |
696 | """ | |
697 | rhs_result = _first_right_hand_split(line, omit=omit) | |
698 | yield from _maybe_split_omitting_optional_parens( | |
699 | rhs_result, line, mode, features=features, omit=omit | |
700 | ) | |
701 | ||
702 | ||
703 | def _first_right_hand_split( | |
704 | line: Line, | |
705 | omit: Collection[LeafID] = (), | |
706 | ) -> RHSResult: | |
707 | """Split the line into head, body, tail starting with the last bracket pair. | |
708 | ||
709 | Note: this function should not have side effects. It's relied upon by | |
710 | _maybe_split_omitting_optional_parens to get an opinion whether to prefer | |
711 | splitting on the right side of an assignment statement. | |
712 | """ | |
713 | tail_leaves: List[Leaf] = [] | |
714 | body_leaves: List[Leaf] = [] | |
715 | head_leaves: List[Leaf] = [] | |
716 | current_leaves = tail_leaves | |
717 | opening_bracket: Optional[Leaf] = None | |
718 | closing_bracket: Optional[Leaf] = None | |
719 | for leaf in reversed(line.leaves): | |
720 | if current_leaves is body_leaves: | |
721 | if leaf is opening_bracket: | |
722 | current_leaves = head_leaves if body_leaves else tail_leaves | |
723 | current_leaves.append(leaf) | |
724 | if current_leaves is tail_leaves: | |
725 | if leaf.type in CLOSING_BRACKETS and id(leaf) not in omit: | |
726 | opening_bracket = leaf.opening_bracket | |
727 | closing_bracket = leaf | |
728 | current_leaves = body_leaves | |
729 | if not (opening_bracket and closing_bracket and head_leaves): | |
730 | # If there is no opening or closing_bracket that means the split failed and | |
731 | # all content is in the tail. Otherwise, if `head_leaves` are empty, it means | |
732 | # the matching `opening_bracket` wasn't available on `line` anymore. | |
733 | raise CannotSplit("No brackets found") | |
734 | ||
735 | tail_leaves.reverse() | |
736 | body_leaves.reverse() | |
737 | head_leaves.reverse() | |
738 | head = bracket_split_build_line( | |
739 | head_leaves, line, opening_bracket, component=_BracketSplitComponent.head | |
740 | ) | |
741 | body = bracket_split_build_line( | |
742 | body_leaves, line, opening_bracket, component=_BracketSplitComponent.body | |
743 | ) | |
744 | tail = bracket_split_build_line( | |
745 | tail_leaves, line, opening_bracket, component=_BracketSplitComponent.tail | |
746 | ) | |
747 | bracket_split_succeeded_or_raise(head, body, tail) | |
748 | return RHSResult(head, body, tail, opening_bracket, closing_bracket) | |
749 | ||
750 | ||
751 | def _maybe_split_omitting_optional_parens( | |
752 | rhs: RHSResult, | |
753 | line: Line, | |
754 | mode: Mode, | |
755 | features: Collection[Feature] = (), | |
756 | omit: Collection[LeafID] = (), | |
757 | ) -> Iterator[Line]: | |
758 | if ( | |
759 | Feature.FORCE_OPTIONAL_PARENTHESES not in features | |
760 | # the opening bracket is an optional paren | |
761 | and rhs.opening_bracket.type == token.LPAR | |
762 | and not rhs.opening_bracket.value | |
763 | # the closing bracket is an optional paren | |
764 | and rhs.closing_bracket.type == token.RPAR | |
765 | and not rhs.closing_bracket.value | |
766 | # it's not an import (optional parens are the only thing we can split on | |
767 | # in this case; attempting a split without them is a waste of time) | |
768 | and not line.is_import | |
769 | # there are no standalone comments in the body | |
770 | and not rhs.body.contains_standalone_comments(0) | |
771 | # and we can actually remove the parens | |
772 | and can_omit_invisible_parens(rhs, mode.line_length) | |
773 | ): | |
774 | omit = {id(rhs.closing_bracket), *omit} | |
775 | try: | |
776 | # The RHSResult Omitting Optional Parens. | |
777 | rhs_oop = _first_right_hand_split(line, omit=omit) | |
778 | if not ( | |
779 | Preview.prefer_splitting_right_hand_side_of_assignments in line.mode | |
780 | # the split is right after `=` | |
781 | and len(rhs.head.leaves) >= 2 | |
782 | and rhs.head.leaves[-2].type == token.EQUAL | |
783 | # the left side of assignment contains brackets | |
784 | and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]) | |
785 | # the left side of assignment is short enough (the -1 is for the ending | |
786 | # optional paren) | |
787 | and is_line_short_enough( | |
788 | rhs.head, mode=replace(mode, line_length=mode.line_length - 1) | |
789 | ) | |
790 | # the left side of assignment won't explode further because of magic | |
791 | # trailing comma | |
792 | and rhs.head.magic_trailing_comma is None | |
793 | # the split by omitting optional parens isn't preferred by some other | |
794 | # reason | |
795 | and not _prefer_split_rhs_oop(rhs_oop, mode) | |
796 | ): | |
797 | yield from _maybe_split_omitting_optional_parens( | |
798 | rhs_oop, line, mode, features=features, omit=omit | |
799 | ) | |
800 | return | |
801 | ||
802 | except CannotSplit as e: | |
803 | if not ( | |
804 | can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode) | |
805 | ): | |
806 | raise CannotSplit( | |
807 | "Splitting failed, body is still too long and can't be split." | |
808 | ) from e | |
809 | ||
810 | elif ( | |
811 | rhs.head.contains_multiline_strings() | |
812 | or rhs.tail.contains_multiline_strings() | |
813 | ): | |
814 | raise CannotSplit( | |
815 | "The current optional pair of parentheses is bound to fail to" | |
816 | " satisfy the splitting algorithm because the head or the tail" | |
817 | " contains multiline strings which by definition never fit one" | |
818 | " line." | |
819 | ) from e | |
820 | ||
821 | ensure_visible(rhs.opening_bracket) | |
822 | ensure_visible(rhs.closing_bracket) | |
823 | for result in (rhs.head, rhs.body, rhs.tail): | |
824 | if result: | |
825 | yield result | |
826 | ||
827 | ||
828 | def _prefer_split_rhs_oop(rhs_oop: RHSResult, mode: Mode) -> bool: | |
829 | """ | |
830 | Returns whether we should prefer the result from a split omitting optional parens. | |
831 | """ | |
832 | has_closing_bracket_after_assign = False | |
833 | for leaf in reversed(rhs_oop.head.leaves): | |
834 | if leaf.type == token.EQUAL: | |
835 | break | |
836 | if leaf.type in CLOSING_BRACKETS: | |
837 | has_closing_bracket_after_assign = True | |
838 | break | |
839 | return ( | |
840 | # contains matching brackets after the `=` (done by checking there is a | |
841 | # closing bracket) | |
842 | has_closing_bracket_after_assign | |
843 | or ( | |
844 | # the split is actually from inside the optional parens (done by checking | |
845 | # the first line still contains the `=`) | |
846 | any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves) | |
847 | # the first line is short enough | |
848 | and is_line_short_enough(rhs_oop.head, mode=mode) | |
849 | ) | |
850 | # contains unsplittable type ignore | |
851 | or rhs_oop.head.contains_unsplittable_type_ignore() | |
852 | or rhs_oop.body.contains_unsplittable_type_ignore() | |
853 | or rhs_oop.tail.contains_unsplittable_type_ignore() | |
854 | ) | |
855 | ||
856 | ||
857 | def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None: | |
858 | """Raise :exc:`CannotSplit` if the last left- or right-hand split failed. | |
859 | ||
860 | Do nothing otherwise. | |
861 | ||
862 | A left- or right-hand split is based on a pair of brackets. Content before | |
863 | (and including) the opening bracket is left on one line, content inside the | |
864 | brackets is put on a separate line, and finally content starting with and | |
865 | following the closing bracket is put on a separate line. | |
866 | ||
867 | Those are called `head`, `body`, and `tail`, respectively. If the split | |
868 | produced the same line (all content in `head`) or ended up with an empty `body` | |
869 | and the `tail` is just the closing bracket, then it's considered failed. | |
870 | """ | |
871 | tail_len = len(str(tail).strip()) | |
872 | if not body: | |
873 | if tail_len == 0: | |
874 | raise CannotSplit("Splitting brackets produced the same line") | |
875 | ||
876 | elif tail_len < 3: | |
877 | raise CannotSplit( | |
878 | f"Splitting brackets on an empty body to save {tail_len} characters is" | |
879 | " not worth it" | |
880 | ) | |
881 | ||
882 | ||
883 | def bracket_split_build_line( | |
884 | leaves: List[Leaf], | |
885 | original: Line, | |
886 | opening_bracket: Leaf, | |
887 | *, | |
888 | component: _BracketSplitComponent, | |
889 | ) -> Line: | |
890 | """Return a new line with given `leaves` and respective comments from `original`. | |
891 | ||
892 | If it's the head component, brackets will be tracked so trailing commas are | |
893 | respected. | |
894 | ||
895 | If it's the body component, the result line is one-indented inside brackets and as | |
896 | such has its first leaf's prefix normalized and a trailing comma added when | |
897 | expected. | |
898 | """ | |
899 | result = Line(mode=original.mode, depth=original.depth) | |
900 | if component is _BracketSplitComponent.body: | |
901 | result.inside_brackets = True | |
902 | result.depth += 1 | |
903 | if leaves: | |
904 | # Since body is a new indent level, remove spurious leading whitespace. | |
905 | normalize_prefix(leaves[0], inside_brackets=True) | |
906 | # Ensure a trailing comma for imports and standalone function arguments, but | |
907 | # be careful not to add one after any comments or within type annotations. | |
908 | no_commas = ( | |
909 | original.is_def | |
910 | and opening_bracket.value == "(" | |
911 | and not any(leaf.type == token.COMMA for leaf in leaves) | |
912 | # In particular, don't add one within a parenthesized return annotation. | |
913 | # Unfortunately the indicator we're in a return annotation (RARROW) may | |
914 | # be defined directly in the parent node, the parent of the parent ... | |
915 | # and so on depending on how complex the return annotation is. | |
916 | # This isn't perfect and there's some false negatives but they are in | |
917 | # contexts were a comma is actually fine. | |
918 | and not any( | |
919 | node.prev_sibling.type == RARROW | |
920 | for node in ( | |
921 | leaves[0].parent, | |
922 | getattr(leaves[0].parent, "parent", None), | |
923 | ) | |
924 | if isinstance(node, Node) and isinstance(node.prev_sibling, Leaf) | |
925 | ) | |
926 | # Except the false negatives above for PEP 604 unions where we | |
927 | # can't add the comma. | |
928 | and not ( | |
929 | leaves[0].parent | |
930 | and leaves[0].parent.next_sibling | |
931 | and leaves[0].parent.next_sibling.type == token.VBAR | |
932 | ) | |
933 | ) | |
934 | ||
935 | if original.is_import or no_commas: | |
936 | for i in range(len(leaves) - 1, -1, -1): | |
937 | if leaves[i].type == STANDALONE_COMMENT: | |
938 | continue | |
939 | ||
940 | if leaves[i].type != token.COMMA: | |
941 | new_comma = Leaf(token.COMMA, ",") | |
942 | leaves.insert(i + 1, new_comma) | |
943 | break | |
944 | ||
945 | leaves_to_track: Set[LeafID] = set() | |
946 | if component is _BracketSplitComponent.head: | |
947 | leaves_to_track = get_leaves_inside_matching_brackets(leaves) | |
948 | # Populate the line | |
949 | for leaf in leaves: | |
950 | result.append( | |
951 | leaf, | |
952 | preformatted=True, | |
953 | track_bracket=id(leaf) in leaves_to_track, | |
954 | ) | |
955 | for comment_after in original.comments_after(leaf): | |
956 | result.append(comment_after, preformatted=True) | |
957 | if component is _BracketSplitComponent.body and should_split_line( | |
958 | result, opening_bracket | |
959 | ): | |
960 | result.should_split_rhs = True | |
961 | return result | |
962 | ||
963 | ||
964 | def dont_increase_indentation(split_func: Transformer) -> Transformer: | |
965 | """Normalize prefix of the first leaf in every line returned by `split_func`. | |
966 | ||
967 | This is a decorator over relevant split functions. | |
968 | """ | |
969 | ||
970 | @wraps(split_func) | |
971 | def split_wrapper( | |
972 | line: Line, features: Collection[Feature], mode: Mode | |
973 | ) -> Iterator[Line]: | |
974 | for split_line in split_func(line, features, mode): | |
975 | normalize_prefix(split_line.leaves[0], inside_brackets=True) | |
976 | yield split_line | |
977 | ||
978 | return split_wrapper | |
979 | ||
980 | ||
981 | def _get_last_non_comment_leaf(line: Line) -> Optional[int]: | |
982 | for leaf_idx in range(len(line.leaves) - 1, 0, -1): | |
983 | if line.leaves[leaf_idx].type != STANDALONE_COMMENT: | |
984 | return leaf_idx | |
985 | return None | |
986 | ||
987 | ||
988 | def _safe_add_trailing_comma(safe: bool, delimiter_priority: int, line: Line) -> Line: | |
989 | if ( | |
990 | safe | |
991 | and delimiter_priority == COMMA_PRIORITY | |
992 | and line.leaves[-1].type != token.COMMA | |
993 | and line.leaves[-1].type != STANDALONE_COMMENT | |
994 | ): | |
995 | new_comma = Leaf(token.COMMA, ",") | |
996 | line.append(new_comma) | |
997 | return line | |
998 | ||
999 | ||
1000 | @dont_increase_indentation | |
1001 | def delimiter_split( | |
1002 | line: Line, features: Collection[Feature], mode: Mode | |
1003 | ) -> Iterator[Line]: | |
1004 | """Split according to delimiters of the highest priority. | |
1005 | ||
1006 | If the appropriate Features are given, the split will add trailing commas | |
1007 | also in function signatures and calls that contain `*` and `**`. | |
1008 | """ | |
1009 | try: | |
1010 | last_leaf = line.leaves[-1] | |
1011 | except IndexError: | |
1012 | raise CannotSplit("Line empty") from None | |
1013 | ||
1014 | bt = line.bracket_tracker | |
1015 | try: | |
1016 | delimiter_priority = bt.max_delimiter_priority(exclude={id(last_leaf)}) | |
1017 | except ValueError: | |
1018 | raise CannotSplit("No delimiters found") from None | |
1019 | ||
1020 | if delimiter_priority == DOT_PRIORITY: | |
1021 | if bt.delimiter_count_with_priority(delimiter_priority) == 1: | |
1022 | raise CannotSplit("Splitting a single attribute from its owner looks wrong") | |
1023 | ||
1024 | current_line = Line( | |
1025 | mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets | |
1026 | ) | |
1027 | lowest_depth = sys.maxsize | |
1028 | trailing_comma_safe = True | |
1029 | ||
1030 | def append_to_line(leaf: Leaf) -> Iterator[Line]: | |
1031 | """Append `leaf` to current line or to new line if appending impossible.""" | |
1032 | nonlocal current_line | |
1033 | try: | |
1034 | current_line.append_safe(leaf, preformatted=True) | |
1035 | except ValueError: | |
1036 | yield current_line | |
1037 | ||
1038 | current_line = Line( | |
1039 | mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets | |
1040 | ) | |
1041 | current_line.append(leaf) | |
1042 | ||
1043 | last_non_comment_leaf = _get_last_non_comment_leaf(line) | |
1044 | for leaf_idx, leaf in enumerate(line.leaves): | |
1045 | yield from append_to_line(leaf) | |
1046 | ||
1047 | for comment_after in line.comments_after(leaf): | |
1048 | yield from append_to_line(comment_after) | |
1049 | ||
1050 | lowest_depth = min(lowest_depth, leaf.bracket_depth) | |
1051 | if leaf.bracket_depth == lowest_depth: | |
1052 | if is_vararg(leaf, within={syms.typedargslist}): | |
1053 | trailing_comma_safe = ( | |
1054 | trailing_comma_safe and Feature.TRAILING_COMMA_IN_DEF in features | |
1055 | ) | |
1056 | elif is_vararg(leaf, within={syms.arglist, syms.argument}): | |
1057 | trailing_comma_safe = ( | |
1058 | trailing_comma_safe and Feature.TRAILING_COMMA_IN_CALL in features | |
1059 | ) | |
1060 | ||
1061 | if ( | |
1062 | Preview.add_trailing_comma_consistently in mode | |
1063 | and last_leaf.type == STANDALONE_COMMENT | |
1064 | and leaf_idx == last_non_comment_leaf | |
1065 | ): | |
1066 | current_line = _safe_add_trailing_comma( | |
1067 | trailing_comma_safe, delimiter_priority, current_line | |
1068 | ) | |
1069 | ||
1070 | leaf_priority = bt.delimiters.get(id(leaf)) | |
1071 | if leaf_priority == delimiter_priority: | |
1072 | yield current_line | |
1073 | ||
1074 | current_line = Line( | |
1075 | mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets | |
1076 | ) | |
1077 | if current_line: | |
1078 | current_line = _safe_add_trailing_comma( | |
1079 | trailing_comma_safe, delimiter_priority, current_line | |
1080 | ) | |
1081 | yield current_line | |
1082 | ||
1083 | ||
1084 | @dont_increase_indentation | |
1085 | def standalone_comment_split( | |
1086 | line: Line, features: Collection[Feature], mode: Mode | |
1087 | ) -> Iterator[Line]: | |
1088 | """Split standalone comments from the rest of the line.""" | |
1089 | if not line.contains_standalone_comments(0): | |
1090 | raise CannotSplit("Line does not have any standalone comments") | |
1091 | ||
1092 | current_line = Line( | |
1093 | mode=line.mode, depth=line.depth, inside_brackets=line.inside_brackets | |
1094 | ) | |
1095 | ||
1096 | def append_to_line(leaf: Leaf) -> Iterator[Line]: | |
1097 | """Append `leaf` to current line or to new line if appending impossible.""" | |
1098 | nonlocal current_line | |
1099 | try: | |
1100 | current_line.append_safe(leaf, preformatted=True) | |
1101 | except ValueError: | |
1102 | yield current_line | |
1103 | ||
1104 | current_line = Line( | |
1105 | line.mode, depth=line.depth, inside_brackets=line.inside_brackets | |
1106 | ) | |
1107 | current_line.append(leaf) | |
1108 | ||
1109 | for leaf in line.leaves: | |
1110 | yield from append_to_line(leaf) | |
1111 | ||
1112 | for comment_after in line.comments_after(leaf): | |
1113 | yield from append_to_line(comment_after) | |
1114 | ||
1115 | if current_line: | |
1116 | yield current_line | |
1117 | ||
1118 | ||
1119 | def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None: | |
1120 | """Leave existing extra newlines if not `inside_brackets`. Remove everything | |
1121 | else. | |
1122 | ||
1123 | Note: don't use backslashes for formatting or you'll lose your voting rights. | |
1124 | """ | |
1125 | if not inside_brackets: | |
1126 | spl = leaf.prefix.split("#") | |
1127 | if "\\" not in spl[0]: | |
1128 | nl_count = spl[-1].count("\n") | |
1129 | if len(spl) > 1: | |
1130 | nl_count -= 1 | |
1131 | leaf.prefix = "\n" * nl_count | |
1132 | return | |
1133 | ||
1134 | leaf.prefix = "" | |
1135 | ||
1136 | ||
1137 | def normalize_invisible_parens( | |
1138 | node: Node, parens_after: Set[str], *, mode: Mode, features: Collection[Feature] | |
1139 | ) -> None: | |
1140 | """Make existing optional parentheses invisible or create new ones. | |
1141 | ||
1142 | `parens_after` is a set of string leaf values immediately after which parens | |
1143 | should be put. | |
1144 | ||
1145 | Standardizes on visible parentheses for single-element tuples, and keeps | |
1146 | existing visible parentheses for other tuples and generator expressions. | |
1147 | """ | |
1148 | for pc in list_comments(node.prefix, is_endmarker=False): | |
1149 | if pc.value in FMT_OFF: | |
1150 | # This `node` has a prefix with `# fmt: off`, don't mess with parens. | |
1151 | return | |
1152 | ||
1153 | # The multiple context managers grammar has a different pattern, thus this is | |
1154 | # separate from the for-loop below. This possibly wraps them in invisible parens, | |
1155 | # and later will be removed in remove_with_parens when needed. | |
1156 | if node.type == syms.with_stmt: | |
1157 | _maybe_wrap_cms_in_parens(node, mode, features) | |
1158 | ||
1159 | check_lpar = False | |
1160 | for index, child in enumerate(list(node.children)): | |
1161 | # Fixes a bug where invisible parens are not properly stripped from | |
1162 | # assignment statements that contain type annotations. | |
1163 | if isinstance(child, Node) and child.type == syms.annassign: | |
1164 | normalize_invisible_parens( | |
1165 | child, parens_after=parens_after, mode=mode, features=features | |
1166 | ) | |
1167 | ||
1168 | # Add parentheses around long tuple unpacking in assignments. | |
1169 | if ( | |
1170 | index == 0 | |
1171 | and isinstance(child, Node) | |
1172 | and child.type == syms.testlist_star_expr | |
1173 | ): | |
1174 | check_lpar = True | |
1175 | ||
1176 | if check_lpar: | |
1177 | if ( | |
1178 | child.type == syms.atom | |
1179 | and node.type == syms.for_stmt | |
1180 | and isinstance(child.prev_sibling, Leaf) | |
1181 | and child.prev_sibling.type == token.NAME | |
1182 | and child.prev_sibling.value == "for" | |
1183 | ): | |
1184 | if maybe_make_parens_invisible_in_atom( | |
1185 | child, | |
1186 | parent=node, | |
1187 | remove_brackets_around_comma=True, | |
1188 | ): | |
1189 | wrap_in_parentheses(node, child, visible=False) | |
1190 | elif isinstance(child, Node) and node.type == syms.with_stmt: | |
1191 | remove_with_parens(child, node) | |
1192 | elif child.type == syms.atom: | |
1193 | if maybe_make_parens_invisible_in_atom( | |
1194 | child, | |
1195 | parent=node, | |
1196 | ): | |
1197 | wrap_in_parentheses(node, child, visible=False) | |
1198 | elif is_one_tuple(child): | |
1199 | wrap_in_parentheses(node, child, visible=True) | |
1200 | elif node.type == syms.import_from: | |
1201 | _normalize_import_from(node, child, index) | |
1202 | break | |
1203 | elif ( | |
1204 | index == 1 | |
1205 | and child.type == token.STAR | |
1206 | and node.type == syms.except_clause | |
1207 | ): | |
1208 | # In except* (PEP 654), the star is actually part of | |
1209 | # of the keyword. So we need to skip the insertion of | |
1210 | # invisible parentheses to work more precisely. | |
1211 | continue | |
1212 | ||
1213 | elif not (isinstance(child, Leaf) and is_multiline_string(child)): | |
1214 | wrap_in_parentheses(node, child, visible=False) | |
1215 | ||
1216 | comma_check = child.type == token.COMMA | |
1217 | ||
1218 | check_lpar = isinstance(child, Leaf) and ( | |
1219 | child.value in parens_after or comma_check | |
1220 | ) | |
1221 | ||
1222 | ||
1223 | def _normalize_import_from(parent: Node, child: LN, index: int) -> None: | |
1224 | # "import from" nodes store parentheses directly as part of | |
1225 | # the statement | |
1226 | if is_lpar_token(child): | |
1227 | assert is_rpar_token(parent.children[-1]) | |
1228 | # make parentheses invisible | |
1229 | child.value = "" | |
1230 | parent.children[-1].value = "" | |
1231 | elif child.type != token.STAR: | |
1232 | # insert invisible parentheses | |
1233 | parent.insert_child(index, Leaf(token.LPAR, "")) | |
1234 | parent.append_child(Leaf(token.RPAR, "")) | |
1235 | ||
1236 | ||
1237 | def remove_await_parens(node: Node) -> None: | |
1238 | if node.children[0].type == token.AWAIT and len(node.children) > 1: | |
1239 | if ( | |
1240 | node.children[1].type == syms.atom | |
1241 | and node.children[1].children[0].type == token.LPAR | |
1242 | ): | |
1243 | if maybe_make_parens_invisible_in_atom( | |
1244 | node.children[1], | |
1245 | parent=node, | |
1246 | remove_brackets_around_comma=True, | |
1247 | ): | |
1248 | wrap_in_parentheses(node, node.children[1], visible=False) | |
1249 | ||
1250 | # Since await is an expression we shouldn't remove | |
1251 | # brackets in cases where this would change | |
1252 | # the AST due to operator precedence. | |
1253 | # Therefore we only aim to remove brackets around | |
1254 | # power nodes that aren't also await expressions themselves. | |
1255 | # https://peps.python.org/pep-0492/#updated-operator-precedence-table | |
1256 | # N.B. We've still removed any redundant nested brackets though :) | |
1257 | opening_bracket = cast(Leaf, node.children[1].children[0]) | |
1258 | closing_bracket = cast(Leaf, node.children[1].children[-1]) | |
1259 | bracket_contents = node.children[1].children[1] | |
1260 | if isinstance(bracket_contents, Node): | |
1261 | if bracket_contents.type != syms.power: | |
1262 | ensure_visible(opening_bracket) | |
1263 | ensure_visible(closing_bracket) | |
1264 | elif ( | |
1265 | bracket_contents.type == syms.power | |
1266 | and bracket_contents.children[0].type == token.AWAIT | |
1267 | ): | |
1268 | ensure_visible(opening_bracket) | |
1269 | ensure_visible(closing_bracket) | |
1270 | # If we are in a nested await then recurse down. | |
1271 | remove_await_parens(bracket_contents) | |
1272 | ||
1273 | ||
1274 | def _maybe_wrap_cms_in_parens( | |
1275 | node: Node, mode: Mode, features: Collection[Feature] | |
1276 | ) -> None: | |
1277 | """When enabled and safe, wrap the multiple context managers in invisible parens. | |
1278 | ||
1279 | It is only safe when `features` contain Feature.PARENTHESIZED_CONTEXT_MANAGERS. | |
1280 | """ | |
1281 | if ( | |
1282 | Feature.PARENTHESIZED_CONTEXT_MANAGERS not in features | |
1283 | or Preview.wrap_multiple_context_managers_in_parens not in mode | |
1284 | or len(node.children) <= 2 | |
1285 | # If it's an atom, it's already wrapped in parens. | |
1286 | or node.children[1].type == syms.atom | |
1287 | ): | |
1288 | return | |
1289 | colon_index: Optional[int] = None | |
1290 | for i in range(2, len(node.children)): | |
1291 | if node.children[i].type == token.COLON: | |
1292 | colon_index = i | |
1293 | break | |
1294 | if colon_index is not None: | |
1295 | lpar = Leaf(token.LPAR, "") | |
1296 | rpar = Leaf(token.RPAR, "") | |
1297 | context_managers = node.children[1:colon_index] | |
1298 | for child in context_managers: | |
1299 | child.remove() | |
1300 | # After wrapping, the with_stmt will look like this: | |
1301 | # with_stmt | |
1302 | # NAME 'with' | |
1303 | # atom | |
1304 | # LPAR '' | |
1305 | # testlist_gexp | |
1306 | # ... <-- context_managers | |
1307 | # /testlist_gexp | |
1308 | # RPAR '' | |
1309 | # /atom | |
1310 | # COLON ':' | |
1311 | new_child = Node( | |
1312 | syms.atom, [lpar, Node(syms.testlist_gexp, context_managers), rpar] | |
1313 | ) | |
1314 | node.insert_child(1, new_child) | |
1315 | ||
1316 | ||
1317 | def remove_with_parens(node: Node, parent: Node) -> None: | |
1318 | """Recursively hide optional parens in `with` statements.""" | |
1319 | # Removing all unnecessary parentheses in with statements in one pass is a tad | |
1320 | # complex as different variations of bracketed statements result in pretty | |
1321 | # different parse trees: | |
1322 | # | |
1323 | # with (open("file")) as f: # this is an asexpr_test | |
1324 | # ... | |
1325 | # | |
1326 | # with (open("file") as f): # this is an atom containing an | |
1327 | # ... # asexpr_test | |
1328 | # | |
1329 | # with (open("file")) as f, (open("file")) as f: # this is asexpr_test, COMMA, | |
1330 | # ... # asexpr_test | |
1331 | # | |
1332 | # with (open("file") as f, open("file") as f): # an atom containing a | |
1333 | # ... # testlist_gexp which then | |
1334 | # # contains multiple asexpr_test(s) | |
1335 | if node.type == syms.atom: | |
1336 | if maybe_make_parens_invisible_in_atom( | |
1337 | node, | |
1338 | parent=parent, | |
1339 | remove_brackets_around_comma=True, | |
1340 | ): | |
1341 | wrap_in_parentheses(parent, node, visible=False) | |
1342 | if isinstance(node.children[1], Node): | |
1343 | remove_with_parens(node.children[1], node) | |
1344 | elif node.type == syms.testlist_gexp: | |
1345 | for child in node.children: | |
1346 | if isinstance(child, Node): | |
1347 | remove_with_parens(child, node) | |
1348 | elif node.type == syms.asexpr_test and not any( | |
1349 | leaf.type == token.COLONEQUAL for leaf in node.leaves() | |
1350 | ): | |
1351 | if maybe_make_parens_invisible_in_atom( | |
1352 | node.children[0], | |
1353 | parent=node, | |
1354 | remove_brackets_around_comma=True, | |
1355 | ): | |
1356 | wrap_in_parentheses(node, node.children[0], visible=False) | |
1357 | ||
1358 | ||
1359 | def maybe_make_parens_invisible_in_atom( | |
1360 | node: LN, | |
1361 | parent: LN, | |
1362 | remove_brackets_around_comma: bool = False, | |
1363 | ) -> bool: | |
1364 | """If it's safe, make the parens in the atom `node` invisible, recursively. | |
1365 | Additionally, remove repeated, adjacent invisible parens from the atom `node` | |
1366 | as they are redundant. | |
1367 | ||
1368 | Returns whether the node should itself be wrapped in invisible parentheses. | |
1369 | """ | |
1370 | if ( | |
1371 | node.type != syms.atom | |
1372 | or is_empty_tuple(node) | |
1373 | or is_one_tuple(node) | |
1374 | or (is_yield(node) and parent.type != syms.expr_stmt) | |
1375 | or ( | |
1376 | # This condition tries to prevent removing non-optional brackets | |
1377 | # around a tuple, however, can be a bit overzealous so we provide | |
1378 | # and option to skip this check for `for` and `with` statements. | |
1379 | not remove_brackets_around_comma | |
1380 | and max_delimiter_priority_in_atom(node) >= COMMA_PRIORITY | |
1381 | ) | |
1382 | or is_tuple_containing_walrus(node) | |
1383 | ): | |
1384 | return False | |
1385 | ||
1386 | if is_walrus_assignment(node): | |
1387 | if parent.type in [ | |
1388 | syms.annassign, | |
1389 | syms.expr_stmt, | |
1390 | syms.assert_stmt, | |
1391 | syms.return_stmt, | |
1392 | syms.except_clause, | |
1393 | syms.funcdef, | |
1394 | syms.with_stmt, | |
1395 | # these ones aren't useful to end users, but they do please fuzzers | |
1396 | syms.for_stmt, | |
1397 | syms.del_stmt, | |
1398 | syms.for_stmt, | |
1399 | ]: | |
1400 | return False | |
1401 | ||
1402 | first = node.children[0] | |
1403 | last = node.children[-1] | |
1404 | if is_lpar_token(first) and is_rpar_token(last): | |
1405 | middle = node.children[1] | |
1406 | # make parentheses invisible | |
1407 | if ( | |
1408 | # If the prefix of `middle` includes a type comment with | |
1409 | # ignore annotation, then we do not remove the parentheses | |
1410 | not is_type_ignore_comment_string(middle.prefix.strip()) | |
1411 | ): | |
1412 | first.value = "" | |
1413 | last.value = "" | |
1414 | maybe_make_parens_invisible_in_atom( | |
1415 | middle, | |
1416 | parent=parent, | |
1417 | remove_brackets_around_comma=remove_brackets_around_comma, | |
1418 | ) | |
1419 | ||
1420 | if is_atom_with_invisible_parens(middle): | |
1421 | # Strip the invisible parens from `middle` by replacing | |
1422 | # it with the child in-between the invisible parens | |
1423 | middle.replace(middle.children[1]) | |
1424 | ||
1425 | return False | |
1426 | ||
1427 | return True | |
1428 | ||
1429 | ||
1430 | def should_split_line(line: Line, opening_bracket: Leaf) -> bool: | |
1431 | """Should `line` be immediately split with `delimiter_split()` after RHS?""" | |
1432 | ||
1433 | if not (opening_bracket.parent and opening_bracket.value in "[{("): | |
1434 | return False | |
1435 | ||
1436 | # We're essentially checking if the body is delimited by commas and there's more | |
1437 | # than one of them (we're excluding the trailing comma and if the delimiter priority | |
1438 | # is still commas, that means there's more). | |
1439 | exclude = set() | |
1440 | trailing_comma = False | |
1441 | try: | |
1442 | last_leaf = line.leaves[-1] | |
1443 | if last_leaf.type == token.COMMA: | |
1444 | trailing_comma = True | |
1445 | exclude.add(id(last_leaf)) | |
1446 | max_priority = line.bracket_tracker.max_delimiter_priority(exclude=exclude) | |
1447 | except (IndexError, ValueError): | |
1448 | return False | |
1449 | ||
1450 | return max_priority == COMMA_PRIORITY and ( | |
1451 | (line.mode.magic_trailing_comma and trailing_comma) | |
1452 | # always explode imports | |
1453 | or opening_bracket.parent.type in {syms.atom, syms.import_from} | |
1454 | ) | |
1455 | ||
1456 | ||
1457 | def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[LeafID]]: | |
1458 | """Generate sets of closing bracket IDs that should be omitted in a RHS. | |
1459 | ||
1460 | Brackets can be omitted if the entire trailer up to and including | |
1461 | a preceding closing bracket fits in one line. | |
1462 | ||
1463 | Yielded sets are cumulative (contain results of previous yields, too). First | |
1464 | set is empty, unless the line should explode, in which case bracket pairs until | |
1465 | the one that needs to explode are omitted. | |
1466 | """ | |
1467 | ||
1468 | omit: Set[LeafID] = set() | |
1469 | if not line.magic_trailing_comma: | |
1470 | yield omit | |
1471 | ||
1472 | length = 4 * line.depth | |
1473 | opening_bracket: Optional[Leaf] = None | |
1474 | closing_bracket: Optional[Leaf] = None | |
1475 | inner_brackets: Set[LeafID] = set() | |
1476 | for index, leaf, leaf_length in line.enumerate_with_length(reversed=True): | |
1477 | length += leaf_length | |
1478 | if length > line_length: | |
1479 | break | |
1480 | ||
1481 | has_inline_comment = leaf_length > len(leaf.value) + len(leaf.prefix) | |
1482 | if leaf.type == STANDALONE_COMMENT or has_inline_comment: | |
1483 | break | |
1484 | ||
1485 | if opening_bracket: | |
1486 | if leaf is opening_bracket: | |
1487 | opening_bracket = None | |
1488 | elif leaf.type in CLOSING_BRACKETS: | |
1489 | prev = line.leaves[index - 1] if index > 0 else None | |
1490 | if ( | |
1491 | prev | |
1492 | and prev.type == token.COMMA | |
1493 | and leaf.opening_bracket is not None | |
1494 | and not is_one_sequence_between( | |
1495 | leaf.opening_bracket, leaf, line.leaves | |
1496 | ) | |
1497 | ): | |
1498 | # Never omit bracket pairs with trailing commas. | |
1499 | # We need to explode on those. | |
1500 | break | |
1501 | ||
1502 | inner_brackets.add(id(leaf)) | |
1503 | elif leaf.type in CLOSING_BRACKETS: | |
1504 | prev = line.leaves[index - 1] if index > 0 else None | |
1505 | if prev and prev.type in OPENING_BRACKETS: | |
1506 | # Empty brackets would fail a split so treat them as "inner" | |
1507 | # brackets (e.g. only add them to the `omit` set if another | |
1508 | # pair of brackets was good enough. | |
1509 | inner_brackets.add(id(leaf)) | |
1510 | continue | |
1511 | ||
1512 | if closing_bracket: | |
1513 | omit.add(id(closing_bracket)) | |
1514 | omit.update(inner_brackets) | |
1515 | inner_brackets.clear() | |
1516 | yield omit | |
1517 | ||
1518 | if ( | |
1519 | prev | |
1520 | and prev.type == token.COMMA | |
1521 | and leaf.opening_bracket is not None | |
1522 | and not is_one_sequence_between(leaf.opening_bracket, leaf, line.leaves) | |
1523 | ): | |
1524 | # Never omit bracket pairs with trailing commas. | |
1525 | # We need to explode on those. | |
1526 | break | |
1527 | ||
1528 | if leaf.value: | |
1529 | opening_bracket = leaf.opening_bracket | |
1530 | closing_bracket = leaf | |
1531 | ||
1532 | ||
1533 | def run_transformer( | |
1534 | line: Line, | |
1535 | transform: Transformer, | |
1536 | mode: Mode, | |
1537 | features: Collection[Feature], | |
1538 | *, | |
1539 | line_str: str = "", | |
1540 | ) -> List[Line]: | |
1541 | if not line_str: | |
1542 | line_str = line_to_string(line) | |
1543 | result: List[Line] = [] | |
1544 | for transformed_line in transform(line, features, mode): | |
1545 | if str(transformed_line).strip("\n") == line_str: | |
1546 | raise CannotTransform("Line transformer returned an unchanged result") | |
1547 | ||
1548 | result.extend(transform_line(transformed_line, mode=mode, features=features)) | |
1549 | ||
1550 | features_set = set(features) | |
1551 | if ( | |
1552 | Feature.FORCE_OPTIONAL_PARENTHESES in features_set | |
1553 | or transform.__class__.__name__ != "rhs" | |
1554 | or not line.bracket_tracker.invisible | |
1555 | or any(bracket.value for bracket in line.bracket_tracker.invisible) | |
1556 | or line.contains_multiline_strings() | |
1557 | or result[0].contains_uncollapsable_type_comments() | |
1558 | or result[0].contains_unsplittable_type_ignore() | |
1559 | or is_line_short_enough(result[0], mode=mode) | |
1560 | # If any leaves have no parents (which _can_ occur since | |
1561 | # `transform(line)` potentially destroys the line's underlying node | |
1562 | # structure), then we can't proceed. Doing so would cause the below | |
1563 | # call to `append_leaves()` to fail. | |
1564 | or any(leaf.parent is None for leaf in line.leaves) | |
1565 | ): | |
1566 | return result | |
1567 | ||
1568 | line_copy = line.clone() | |
1569 | append_leaves(line_copy, line, line.leaves) | |
1570 | features_fop = features_set | {Feature.FORCE_OPTIONAL_PARENTHESES} | |
1571 | second_opinion = run_transformer( | |
1572 | line_copy, transform, mode, features_fop, line_str=line_str | |
1573 | ) | |
1574 | if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion): | |
1575 | result = second_opinion | |
1576 | return result |