2 blib2to3 Node/Leaf transformation-related utility functions.
6 from typing
import Final
, Generic
, Iterator
, List
, Optional
, Set
, Tuple
, TypeVar
, Union
8 if sys
.version_info
>= (3, 10):
9 from typing
import TypeGuard
11 from typing_extensions
import TypeGuard
13 from mypy_extensions
import mypyc_attr
15 from black
.cache
import CACHE_DIR
16 from black
.mode
import Mode
, Preview
17 from black
.strings
import has_triple_quotes
18 from blib2to3
import pygram
19 from blib2to3
.pgen2
import token
20 from blib2to3
.pytree
import NL
, Leaf
, Node
, type_repr
22 pygram
.initialize(CACHE_DIR
)
23 syms
: Final
= pygram
.python_symbols
28 LN
= Union
[Leaf
, Node
]
33 WHITESPACE
: Final
= {token
.DEDENT
, token
.INDENT
, token
.NEWLINE
}
46 STANDALONE_COMMENT
: Final
= 153
47 token
.tok_name
[STANDALONE_COMMENT
] = "STANDALONE_COMMENT"
48 LOGIC_OPERATORS
: Final
= {"and", "or"}
49 COMPARATORS
: Final
= {
57 MATH_OPERATORS
: Final
= {
73 STARS
: Final
= {token
.STAR
, token
.DOUBLESTAR
}
74 VARARGS_SPECIALS
: Final
= STARS |
{token
.SLASH
}
75 VARARGS_PARENTS
: Final
= {
77 syms
.argument
, # double star in arglist
78 syms
.trailer
, # single argument to call
80 syms
.varargslist
, # lambdas
82 UNPACKING_PARENTS
: Final
= {
83 syms
.atom
, # single element of a list or set literal
87 syms
.testlist_star_expr
,
91 TEST_DESCENDANTS
: Final
= {
108 TYPED_NAMES
: Final
= {syms
.tname
, syms
.tname_star
}
109 ASSIGNMENTS
: Final
= {
126 IMPLICIT_TUPLE
: Final
= {syms
.testlist
, syms
.testlist_star_expr
, syms
.exprlist
}
128 token
.LPAR
: token
.RPAR
,
129 token
.LSQB
: token
.RSQB
,
130 token
.LBRACE
: token
.RBRACE
,
132 OPENING_BRACKETS
: Final
= set(BRACKET
.keys())
133 CLOSING_BRACKETS
: Final
= set(BRACKET
.values())
134 BRACKETS
: Final
= OPENING_BRACKETS | CLOSING_BRACKETS
135 ALWAYS_NO_SPACE
: Final
= CLOSING_BRACKETS |
{token
.COMMA
, STANDALONE_COMMENT
}
140 @mypyc_attr(allow_interpreted_subclasses
=True)
141 class Visitor(Generic
[T
]):
142 """Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
144 def visit(self
, node
: LN
) -> Iterator
[T
]:
145 """Main method to visit `node` and its children.
147 It tries to find a `visit_*()` method for the given `node.type`, like
148 `visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
149 If no dedicated `visit_*()` method is found, chooses `visit_default()`
152 Then yields objects of type `T` from the selected visitor.
155 name
= token
.tok_name
[node
.type]
157 name
= str(type_repr(node
.type))
158 # We explicitly branch on whether a visitor exists (instead of
159 # using self.visit_default as the default arg to getattr) in order
160 # to save needing to create a bound method object and so mypyc can
161 # generate a native call to visit_default.
162 visitf
= getattr(self
, f
"visit_{name}", None)
164 yield from visitf(node
)
166 yield from self
.visit_default(node
)
168 def visit_default(self
, node
: LN
) -> Iterator
[T
]:
169 """Default `visit_*()` implementation. Recurses to children of `node`."""
170 if isinstance(node
, Node
):
171 for child
in node
.children
:
172 yield from self
.visit(child
)
175 def whitespace(leaf
: Leaf
, *, complex_subscript
: bool, mode
: Mode
) -> str: # noqa: C901
176 """Return whitespace prefix if needed for the given `leaf`.
178 `complex_subscript` signals whether the given leaf is part of a subscription
179 which has non-trivial arguments, like arithmetic expressions or function calls.
182 SPACE
: Final
[str] = " "
183 DOUBLESPACE
: Final
[str] = " "
187 if t
in ALWAYS_NO_SPACE
:
190 if t
== token
.COMMENT
:
193 assert p
is not None, f
"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
194 if t
== token
.COLON
and p
.type not in {
201 prev
= leaf
.prev_sibling
203 prevp
= preceding_leaf(p
)
204 if not prevp
or prevp
.type in OPENING_BRACKETS
:
208 if prevp
.type == token
.COLON
:
211 elif prevp
.type != token
.COMMA
and not complex_subscript
:
216 if prevp
.type == token
.EQUAL
:
218 if prevp
.parent
.type in {
226 elif prevp
.parent
.type == syms
.typedargslist
:
227 # A bit hacky: if the equal sign has whitespace, it means we
228 # previously found it's a typed argument. So, we're using
233 prevp
.type == token
.STAR
234 and parent_type(prevp
) == syms
.star_expr
235 and parent_type(prevp
.parent
) == syms
.subscriptlist
237 # No space between typevar tuples.
240 elif prevp
.type in VARARGS_SPECIALS
:
241 if is_vararg(prevp
, within
=VARARGS_PARENTS | UNPACKING_PARENTS
):
244 elif prevp
.type == token
.COLON
:
245 if prevp
.parent
and prevp
.parent
.type in {syms
.subscript
, syms
.sliceop
}:
246 return SPACE
if complex_subscript
else NO
250 and prevp
.parent
.type == syms
.factor
251 and prevp
.type in MATH_OPERATORS
255 elif prevp
.type == token
.AT
and p
.parent
and p
.parent
.type == syms
.decorator
:
256 # no space in decorators
259 elif prev
.type in OPENING_BRACKETS
:
262 if p
.type in {syms
.parameters
, syms
.arglist
}:
263 # untyped function signatures or calls
264 if not prev
or prev
.type != token
.COMMA
:
267 elif p
.type == syms
.varargslist
:
269 if prev
and prev
.type != token
.COMMA
:
272 elif p
.type == syms
.typedargslist
:
273 # typed function signatures
278 if prev
.type not in TYPED_NAMES
:
281 elif prev
.type == token
.EQUAL
:
282 # A bit hacky: if the equal sign has whitespace, it means we
283 # previously found it's a typed argument. So, we're using that, too.
286 elif prev
.type != token
.COMMA
:
289 elif p
.type in TYPED_NAMES
:
292 prevp
= preceding_leaf(p
)
293 if not prevp
or prevp
.type != token
.COMMA
:
296 elif p
.type == syms
.trailer
:
297 # attributes and calls
298 if t
== token
.LPAR
or t
== token
.RPAR
:
302 if t
== token
.DOT
or t
== token
.LSQB
:
305 elif prev
.type != token
.COMMA
:
308 elif p
.type == syms
.argument
:
314 prevp
= preceding_leaf(p
)
315 if not prevp
or prevp
.type == token
.LPAR
:
318 elif prev
.type in {token
.EQUAL
} | VARARGS_SPECIALS
:
321 elif p
.type == syms
.decorator
:
325 elif p
.type == syms
.dotted_name
:
329 prevp
= preceding_leaf(p
)
330 if not prevp
or prevp
.type == token
.AT
or prevp
.type == token
.DOT
:
333 elif p
.type == syms
.classdef
:
337 if prev
and prev
.type == token
.LPAR
:
340 elif p
.type in {syms
.subscript
, syms
.sliceop
}:
343 assert p
.parent
is not None, "subscripts are always parented"
344 if p
.parent
.type == syms
.subscriptlist
:
349 elif Preview
.walrus_subscript
in mode
and (
350 t
== token
.COLONEQUAL
or prev
.type == token
.COLONEQUAL
354 elif not complex_subscript
:
357 elif p
.type == syms
.atom
:
358 if prev
and t
== token
.DOT
:
359 # dots, but not the first one.
362 elif p
.type == syms
.dictsetmaker
:
364 if prev
and prev
.type == token
.DOUBLESTAR
:
367 elif p
.type in {syms
.factor
, syms
.star_expr
}:
370 prevp
= preceding_leaf(p
)
371 if not prevp
or prevp
.type in OPENING_BRACKETS
:
374 prevp_parent
= prevp
.parent
375 assert prevp_parent
is not None
376 if prevp
.type == token
.COLON
and prevp_parent
.type in {
382 elif prevp
.type == token
.EQUAL
and prevp_parent
.type == syms
.argument
:
385 elif t
in {token
.NAME
, token
.NUMBER
, token
.STRING
}:
388 elif p
.type == syms
.import_from
:
390 if prev
and prev
.type == token
.DOT
:
393 elif t
== token
.NAME
:
397 if prev
and prev
.type == token
.DOT
:
400 elif p
.type == syms
.sliceop
:
403 elif p
.type == syms
.except_clause
:
410 def preceding_leaf(node
: Optional
[LN
]) -> Optional
[Leaf
]:
411 """Return the first leaf that precedes `node`, if any."""
413 res
= node
.prev_sibling
415 if isinstance(res
, Leaf
):
419 return list(res
.leaves())[-1]
428 def prev_siblings_are(node
: Optional
[LN
], tokens
: List
[Optional
[NodeType
]]) -> bool:
429 """Return if the `node` and its previous siblings match types against the provided
430 list of tokens; the provided `node`has its type matched against the last element in
431 the list. `None` can be used as the first element to declare that the start of the
432 list is anchored at the start of its parent's children."""
435 if tokens
[-1] is None:
439 if node
.type != tokens
[-1]:
441 return prev_siblings_are(node
.prev_sibling
, tokens
[:-1])
444 def parent_type(node
: Optional
[LN
]) -> Optional
[NodeType
]:
447 @node.parent.type, if @node is not None and has a parent.
451 if node
is None or node
.parent
is None:
454 return node
.parent
.type
457 def child_towards(ancestor
: Node
, descendant
: LN
) -> Optional
[LN
]:
458 """Return the child of `ancestor` that contains `descendant`."""
459 node
: Optional
[LN
] = descendant
460 while node
and node
.parent
!= ancestor
:
465 def replace_child(old_child
: LN
, new_child
: LN
) -> None:
468 * If @old_child.parent is set, replace @old_child with @new_child in
469 @old_child's underlying Node structure.
471 * Otherwise, this function does nothing.
473 parent
= old_child
.parent
477 child_idx
= old_child
.remove()
478 if child_idx
is not None:
479 parent
.insert_child(child_idx
, new_child
)
482 def container_of(leaf
: Leaf
) -> LN
:
483 """Return `leaf` or one of its ancestors that is the topmost container of it.
485 By "container" we mean a node where `leaf` is the very first child.
487 same_prefix
= leaf
.prefix
490 parent
= container
.parent
494 if parent
.children
[0].prefix
!= same_prefix
:
497 if parent
.type == syms
.file_input
:
500 if parent
.prev_sibling
is not None and parent
.prev_sibling
.type in BRACKETS
:
507 def first_leaf_of(node
: LN
) -> Optional
[Leaf
]:
508 """Returns the first leaf of the node tree."""
509 if isinstance(node
, Leaf
):
512 return first_leaf_of(node
.children
[0])
517 def is_arith_like(node
: LN
) -> bool:
518 """Whether node is an arithmetic or a binary arithmetic expression"""
519 return node
.type in {
527 def is_docstring(leaf
: Leaf
) -> bool:
528 if prev_siblings_are(
529 leaf
.parent
, [None, token
.NEWLINE
, token
.INDENT
, syms
.simple_stmt
]
533 # Multiline docstring on the same line as the `def`.
534 if prev_siblings_are(leaf
.parent
, [syms
.parameters
, token
.COLON
, syms
.simple_stmt
]):
535 # `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
536 # grammar. We're safe to return True without further checks.
542 def is_empty_tuple(node
: LN
) -> bool:
543 """Return True if `node` holds an empty tuple."""
545 node
.type == syms
.atom
546 and len(node
.children
) == 2
547 and node
.children
[0].type == token
.LPAR
548 and node
.children
[1].type == token
.RPAR
552 def is_one_tuple(node
: LN
) -> bool:
553 """Return True if `node` holds a tuple with one element, with or without parens."""
554 if node
.type == syms
.atom
:
555 gexp
= unwrap_singleton_parenthesis(node
)
556 if gexp
is None or gexp
.type != syms
.testlist_gexp
:
559 return len(gexp
.children
) == 2 and gexp
.children
[1].type == token
.COMMA
562 node
.type in IMPLICIT_TUPLE
563 and len(node
.children
) == 2
564 and node
.children
[1].type == token
.COMMA
568 def is_tuple_containing_walrus(node
: LN
) -> bool:
569 """Return True if `node` holds a tuple that contains a walrus operator."""
570 if node
.type != syms
.atom
:
572 gexp
= unwrap_singleton_parenthesis(node
)
573 if gexp
is None or gexp
.type != syms
.testlist_gexp
:
576 return any(child
.type == syms
.namedexpr_test
for child
in gexp
.children
)
579 def is_one_sequence_between(
583 brackets
: Tuple
[int, int] = (token
.LPAR
, token
.RPAR
),
585 """Return True if content between `opening` and `closing` is a one-sequence."""
586 if (opening
.type, closing
.type) != brackets
:
589 depth
= closing
.bracket_depth
+ 1
590 for _opening_index
, leaf
in enumerate(leaves
):
595 raise LookupError("Opening paren not found in `leaves`")
599 for leaf
in leaves
[_opening_index
:]:
603 bracket_depth
= leaf
.bracket_depth
604 if bracket_depth
== depth
and leaf
.type == token
.COMMA
:
606 if leaf
.parent
and leaf
.parent
.type in {
616 def is_walrus_assignment(node
: LN
) -> bool:
617 """Return True iff `node` is of the shape ( test := test )"""
618 inner
= unwrap_singleton_parenthesis(node
)
619 return inner
is not None and inner
.type == syms
.namedexpr_test
622 def is_simple_decorator_trailer(node
: LN
, last
: bool = False) -> bool:
623 """Return True iff `node` is a trailer valid in a simple decorator"""
624 return node
.type == syms
.trailer
and (
626 len(node
.children
) == 2
627 and node
.children
[0].type == token
.DOT
628 and node
.children
[1].type == token
.NAME
630 # last trailer can be an argument-less parentheses pair
633 and len(node
.children
) == 2
634 and node
.children
[0].type == token
.LPAR
635 and node
.children
[1].type == token
.RPAR
637 # last trailer can be arguments
640 and len(node
.children
) == 3
641 and node
.children
[0].type == token
.LPAR
642 # and node.children[1].type == syms.argument
643 and node
.children
[2].type == token
.RPAR
648 def is_simple_decorator_expression(node
: LN
) -> bool:
649 """Return True iff `node` could be a 'dotted name' decorator
651 This function takes the node of the 'namedexpr_test' of the new decorator
652 grammar and test if it would be valid under the old decorator grammar.
654 The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
655 The new grammar is : decorator: @ namedexpr_test NEWLINE
657 if node
.type == token
.NAME
:
659 if node
.type == syms
.power
:
662 node
.children
[0].type == token
.NAME
663 and all(map(is_simple_decorator_trailer
, node
.children
[1:-1]))
665 len(node
.children
) < 2
666 or is_simple_decorator_trailer(node
.children
[-1], last
=True)
672 def is_yield(node
: LN
) -> bool:
673 """Return True if `node` holds a `yield` or `yield from` expression."""
674 if node
.type == syms
.yield_expr
:
677 if is_name_token(node
) and node
.value
== "yield":
680 if node
.type != syms
.atom
:
683 if len(node
.children
) != 3:
686 lpar
, expr
, rpar
= node
.children
687 if lpar
.type == token
.LPAR
and rpar
.type == token
.RPAR
:
688 return is_yield(expr
)
693 def is_vararg(leaf
: Leaf
, within
: Set
[NodeType
]) -> bool:
694 """Return True if `leaf` is a star or double star in a vararg or kwarg.
696 If `within` includes VARARGS_PARENTS, this applies to function signatures.
697 If `within` includes UNPACKING_PARENTS, it applies to right hand-side
698 extended iterable unpacking (PEP 3132) and additional unpacking
699 generalizations (PEP 448).
701 if leaf
.type not in VARARGS_SPECIALS
or not leaf
.parent
:
705 if p
.type == syms
.star_expr
:
706 # Star expressions are also used as assignment targets in extended
707 # iterable unpacking (PEP 3132). See what its parent is instead.
713 return p
.type in within
716 def is_multiline_string(leaf
: Leaf
) -> bool:
717 """Return True if `leaf` is a multiline string that actually spans many lines."""
718 return has_triple_quotes(leaf
.value
) and "\n" in leaf
.value
721 def is_stub_suite(node
: Node
) -> bool:
722 """Return True if `node` is a suite with a stub body."""
724 # If there is a comment, we want to keep it.
725 if node
.prefix
.strip():
729 len(node
.children
) != 4
730 or node
.children
[0].type != token
.NEWLINE
731 or node
.children
[1].type != token
.INDENT
732 or node
.children
[3].type != token
.DEDENT
736 if node
.children
[3].prefix
.strip():
739 return is_stub_body(node
.children
[2])
742 def is_stub_body(node
: LN
) -> bool:
743 """Return True if `node` is a simple statement containing an ellipsis."""
744 if not isinstance(node
, Node
) or node
.type != syms
.simple_stmt
:
747 if len(node
.children
) != 2:
750 child
= node
.children
[0]
752 not child
.prefix
.strip()
753 and child
.type == syms
.atom
754 and len(child
.children
) == 3
755 and all(leaf
== Leaf(token
.DOT
, ".") for leaf
in child
.children
)
759 def is_atom_with_invisible_parens(node
: LN
) -> bool:
760 """Given a `LN`, determines whether it's an atom `node` with invisible
761 parens. Useful in dedupe-ing and normalizing parens.
763 if isinstance(node
, Leaf
) or node
.type != syms
.atom
:
766 first
, last
= node
.children
[0], node
.children
[-1]
768 isinstance(first
, Leaf
)
769 and first
.type == token
.LPAR
770 and first
.value
== ""
771 and isinstance(last
, Leaf
)
772 and last
.type == token
.RPAR
777 def is_empty_par(leaf
: Leaf
) -> bool:
778 return is_empty_lpar(leaf
) or is_empty_rpar(leaf
)
781 def is_empty_lpar(leaf
: Leaf
) -> bool:
782 return leaf
.type == token
.LPAR
and leaf
.value
== ""
785 def is_empty_rpar(leaf
: Leaf
) -> bool:
786 return leaf
.type == token
.RPAR
and leaf
.value
== ""
789 def is_import(leaf
: Leaf
) -> bool:
790 """Return True if the given leaf starts an import statement."""
797 (v
== "import" and p
and p
.type == syms
.import_name
)
798 or (v
== "from" and p
and p
.type == syms
.import_from
)
803 def is_with_or_async_with_stmt(leaf
: Leaf
) -> bool:
804 """Return True if the given leaf starts a with or async with statement."""
806 leaf
.type == token
.NAME
807 and leaf
.value
== "with"
809 and leaf
.parent
.type == syms
.with_stmt
811 leaf
.type == token
.ASYNC
812 and leaf
.next_sibling
813 and leaf
.next_sibling
.type == syms
.with_stmt
817 def is_async_stmt_or_funcdef(leaf
: Leaf
) -> bool:
818 """Return True if the given leaf starts an async def/for/with statement.
820 Note that `async def` can be either an `async_stmt` or `async_funcdef`,
821 the latter is used when it has decorators.
824 leaf
.type == token
.ASYNC
826 and leaf
.parent
.type in {syms
.async_stmt
, syms
.async_funcdef
}
830 def is_type_comment(leaf
: Leaf
) -> bool:
831 """Return True if the given leaf is a type comment. This function should only
832 be used for general type comments (excluding ignore annotations, which should
833 use `is_type_ignore_comment`). Note that general type comments are no longer
834 used in modern version of Python, this function may be deprecated in the future."""
837 return t
in {token
.COMMENT
, STANDALONE_COMMENT
} and v
.startswith("# type:")
840 def is_type_ignore_comment(leaf
: Leaf
) -> bool:
841 """Return True if the given leaf is a type comment with ignore annotation."""
844 return t
in {token
.COMMENT
, STANDALONE_COMMENT
} and is_type_ignore_comment_string(v
)
847 def is_type_ignore_comment_string(value
: str) -> bool:
848 """Return True if the given string match with type comment with
849 ignore annotation."""
850 return value
.startswith("# type: ignore")
853 def wrap_in_parentheses(parent
: Node
, child
: LN
, *, visible
: bool = True) -> None:
854 """Wrap `child` in parentheses.
856 This replaces `child` with an atom holding the parentheses and the old
857 child. That requires moving the prefix.
859 If `visible` is False, the leaves will be valueless (and thus invisible).
861 lpar
= Leaf(token
.LPAR
, "(" if visible
else "")
862 rpar
= Leaf(token
.RPAR
, ")" if visible
else "")
863 prefix
= child
.prefix
865 index
= child
.remove() or 0
866 new_child
= Node(syms
.atom
, [lpar
, child
, rpar
])
867 new_child
.prefix
= prefix
868 parent
.insert_child(index
, new_child
)
871 def unwrap_singleton_parenthesis(node
: LN
) -> Optional
[LN
]:
872 """Returns `wrapped` if `node` is of the shape ( wrapped ).
874 Parenthesis can be optional. Returns None otherwise"""
875 if len(node
.children
) != 3:
878 lpar
, wrapped
, rpar
= node
.children
879 if not (lpar
.type == token
.LPAR
and rpar
.type == token
.RPAR
):
885 def ensure_visible(leaf
: Leaf
) -> None:
886 """Make sure parentheses are visible.
888 They could be invisible as part of some statements (see
889 :func:`normalize_invisible_parens` and :func:`visit_import_from`).
891 if leaf
.type == token
.LPAR
:
893 elif leaf
.type == token
.RPAR
:
897 def is_name_token(nl
: NL
) -> TypeGuard
[Leaf
]:
898 return nl
.type == token
.NAME
901 def is_lpar_token(nl
: NL
) -> TypeGuard
[Leaf
]:
902 return nl
.type == token
.LPAR
905 def is_rpar_token(nl
: NL
) -> TypeGuard
[Leaf
]:
906 return nl
.type == token
.RPAR
909 def is_string_token(nl
: NL
) -> TypeGuard
[Leaf
]:
910 return nl
.type == token
.STRING
913 def is_number_token(nl
: NL
) -> TypeGuard
[Leaf
]:
914 return nl
.type == token
.NUMBER
917 def is_part_of_annotation(leaf
: Leaf
) -> bool:
918 """Returns whether this leaf is part of type annotations."""
919 ancestor
= leaf
.parent
920 while ancestor
is not None:
921 if ancestor
.prev_sibling
and ancestor
.prev_sibling
.type == token
.RARROW
:
923 if ancestor
.parent
and ancestor
.parent
.type == syms
.tname
:
925 ancestor
= ancestor
.parent