1 # Copyright 2015 Google Inc. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 """LogicalLine primitive for formatting.
16 A logical line is the containing data structure produced by the parser. It
17 collects all nodes (stored in FormatToken objects) that could appear on a single
18 line if there were no line length restrictions. It's then used by the parser to
19 perform the wrapping required to comply with the style guide.
22 from yapf_third_party
._ylib
2to
3.fixer_util
import syms
as python_symbols
24 from yapf
.pytree
import pytree_utils
25 from yapf
.pytree
import split_penalty
26 from yapf
.yapflib
import format_token
27 from yapf
.yapflib
import style
28 from yapf
.yapflib
import subtypes
31 class LogicalLine(object):
32 """Represents a single logical line in the output.
35 depth: indentation depth of this line. This is just a numeric value used to
36 distinguish lines that are more deeply nested than others. It is not the
37 actual amount of spaces, which is style-dependent.
40 def __init__(self
, depth
, tokens
=None):
43 Creates a new logical line with the given depth an initial list of tokens.
44 Constructs the doubly-linked lists for format tokens using their built-in
45 next_token and previous_token attributes.
48 depth: indentation depth of this line
49 tokens: initial list of tokens
52 self
._tokens
= tokens
or []
56 # Set up a doubly linked list.
57 for index
, tok
in enumerate(self
._tokens
[1:]):
58 # Note, 'index' is the index to the previous token.
59 tok
.previous_token
= self
._tokens
[index
]
60 self
._tokens
[index
].next_token
= tok
62 def CalculateFormattingInformation(self
):
63 """Calculate the split penalty and total length for the tokens."""
64 # Say that the first token in the line should have a space before it. This
65 # means only that if this logical line is joined with a predecessor line,
66 # then there will be a space between them.
67 self
.first
.spaces_required_before
= 1
68 self
.first
.total_length
= len(self
.first
.value
)
70 prev_token
= self
.first
71 prev_length
= self
.first
.total_length
72 for token
in self
._tokens
[1:]:
73 if (token
.spaces_required_before
== 0 and
74 _SpaceRequiredBetween(prev_token
, token
, self
.disable
)):
75 token
.spaces_required_before
= 1
77 tok_len
= len(token
.value
) if not token
.is_pseudo
else 0
79 spaces_required_before
= token
.spaces_required_before
80 if isinstance(spaces_required_before
, list):
81 assert token
.is_comment
, token
83 # If here, we are looking at a comment token that appears on a line
84 # with other tokens (but because it is a comment, it is always the last
85 # token). Rather than specifying the actual number of spaces here,
86 # hard code a value of 0 and then set it later. This logic only works
87 # because this comment token is guaranteed to be the last token in the
89 spaces_required_before
= 0
91 token
.total_length
= prev_length
+ tok_len
+ spaces_required_before
93 # The split penalty has to be computed before {must|can}_break_before,
94 # because these may use it for their decision.
95 token
.split_penalty
+= _SplitPenalty(prev_token
, token
)
96 token
.must_break_before
= _MustBreakBefore(prev_token
, token
)
97 token
.can_break_before
= (
98 token
.must_break_before
or _CanBreakBefore(prev_token
, token
))
100 prev_length
= token
.total_length
104 """Split the line at semicolons."""
105 if not self
.has_semicolon
or self
.disable
:
109 lline
= LogicalLine(self
.depth
)
110 for tok
in self
._tokens
:
113 lline
= LogicalLine(self
.depth
)
115 lline
.AppendToken(tok
)
121 lline
.first
.previous_token
= None
122 lline
.last
.next_token
= None
126 ############################################################################
127 # Token Access and Manipulation Methods #
128 ############################################################################
130 def AppendToken(self
, token
):
131 """Append a new FormatToken to the tokens contained in this line."""
133 token
.previous_token
= self
.last
134 self
.last
.next_token
= token
135 self
._tokens
.append(token
)
139 """Returns the first non-whitespace token."""
140 return self
._tokens
[0]
144 """Returns the last non-whitespace token."""
145 return self
._tokens
[-1]
147 ############################################################################
148 # Token -> String Methods #
149 ############################################################################
151 def AsCode(self
, indent_per_depth
=2):
152 """Return a "code" representation of this line.
154 The code representation shows how the line would be printed out as code.
156 TODO(eliben): for now this is rudimentary for debugging - once we add
157 formatting capabilities, this method will have other uses (not all tokens
158 have spaces around them, for example).
161 indent_per_depth: how much spaces to indent per depth level.
164 A string representing the line as code.
166 indent
= ' ' * indent_per_depth
* self
.depth
167 tokens_str
= ' '.join(tok
.value
for tok
in self
._tokens
)
168 return indent
+ tokens_str
170 def __str__(self
): # pragma: no cover
173 def __repr__(self
): # pragma: no cover
174 tokens_repr
= ','.join(
175 '{0}({1!r})'.format(tok
.name
, tok
.value
) for tok
in self
._tokens
)
176 return 'LogicalLine(depth={0}, tokens=[{1}])'.format(
177 self
.depth
, tokens_repr
)
179 ############################################################################
181 ############################################################################
185 """Access the tokens contained within this line.
187 The caller must not modify the tokens list returned by this method.
190 List of tokens in this line.
196 """Return the line number of this logical line.
199 The line number of the first token in this logical line.
201 return self
.first
.lineno
205 """The start of the logical line.
208 A tuple of the starting line number and column.
210 return (self
.first
.lineno
, self
.first
.column
)
214 """The end of the logical line.
217 A tuple of the ending line number and column.
219 return (self
.last
.lineno
, self
.last
.column
+ len(self
.last
.value
))
222 def is_comment(self
):
223 return self
.first
.is_comment
226 def has_semicolon(self
):
227 return any(tok
.value
== ';' for tok
in self
._tokens
)
230 def _IsIdNumberStringToken(tok
):
231 return tok
.is_keyword
or tok
.is_name
or tok
.is_number
or tok
.is_string
234 def _IsUnaryOperator(tok
):
235 return subtypes
.UNARY_OPERATOR
in tok
.subtypes
238 def _HasPrecedence(tok
):
239 """Whether a binary operation has precedence within its context."""
242 # We let ancestor be the statement surrounding the operation that tok is the
244 ancestor
= node
.parent
.parent
246 while ancestor
is not None:
247 # Search through the ancestor nodes in the parse tree for operators with
249 predecessor_type
= pytree_utils
.NodeName(ancestor
)
250 if predecessor_type
in ['arith_expr', 'term']:
251 # An ancestor "arith_expr" or "term" means we have found an operator
252 # with lower precedence than our tok.
254 if predecessor_type
!= 'atom':
255 # We understand the context to look for precedence within as an
256 # arbitrary nesting of "arith_expr", "term", and "atom" nodes. If we
257 # leave this context we have not found a lower precedence operator.
259 # Under normal usage we expect a complete parse tree to be available and
260 # we will return before we get an AttributeError from the root.
261 ancestor
= ancestor
.parent
264 def _PriorityIndicatingNoSpace(tok
):
265 """Whether to remove spaces around an operator due to precedence."""
266 if not tok
.is_arithmetic_op
or not tok
.is_simple_expr
:
267 # Limit space removal to highest priority arithmetic operators
269 return _HasPrecedence(tok
)
272 def _IsSubscriptColonAndValuePair(token1
, token2
):
273 return (token1
.is_number
or token1
.is_name
) and token2
.is_subscript_colon
276 def _SpaceRequiredBetween(left
, right
, is_line_disabled
):
277 """Return True if a space is required between the left and right token."""
280 if (left
.is_pseudo
and _IsIdNumberStringToken(right
) and
281 left
.previous_token
and _IsIdNumberStringToken(left
.previous_token
)):
282 # Space between keyword... tokens and pseudo parens.
284 if left
.is_pseudo
or right
.is_pseudo
:
285 # There should be a space after the ':' in a dictionary.
286 if left
.OpensScope():
288 # The closing pseudo-paren shouldn't affect spacing.
290 if left
.is_continuation
or right
.is_continuation
:
291 # The continuation node's value has all of the spaces it needs.
293 if right
.name
in pytree_utils
.NONSEMANTIC_TOKENS
:
294 # No space before a non-semantic token.
296 if _IsIdNumberStringToken(left
) and _IsIdNumberStringToken(right
):
297 # Spaces between keyword, string, number, and identifier tokens.
299 if lval
== ',' and rval
== ':':
300 # We do want a space between a comma and colon.
302 if style
.Get('SPACE_INSIDE_BRACKETS'):
303 # Supersede the "no space before a colon or comma" check.
304 if left
.OpensScope() and rval
== ':':
306 if right
.ClosesScope() and lval
== ':':
308 if (style
.Get('SPACES_AROUND_SUBSCRIPT_COLON') and
309 (_IsSubscriptColonAndValuePair(left
, right
) or
310 _IsSubscriptColonAndValuePair(right
, left
))):
311 # Supersede the "never want a space before a colon or comma" check.
314 # Otherwise, we never want a space before a colon or comma.
316 if lval
== ',' and rval
in ']})':
317 # Add a space between ending ',' and closing bracket if requested.
318 return style
.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET')
320 # We want a space after a comma.
322 if lval
== 'from' and rval
== '.':
323 # Space before the '.' in an import statement.
325 if lval
== '.' and rval
== 'import':
326 # Space after the '.' in an import statement.
328 if (lval
== '=' and rval
in {'.', ',,,'} and
329 subtypes
.DEFAULT_OR_NAMED_ASSIGN
not in left
.subtypes
):
330 # Space between equal and '.' as in "X = ...".
332 if lval
== ':' and rval
in {'.', '...'}:
333 # Space between : and ...
335 if ((right
.is_keyword
or right
.is_name
) and
336 (left
.is_keyword
or left
.is_name
)):
337 # Don't merge two keywords/identifiers.
339 if (subtypes
.SUBSCRIPT_COLON
in left
.subtypes
or
340 subtypes
.SUBSCRIPT_COLON
in right
.subtypes
):
341 # A subscript shouldn't have spaces separating its colons.
343 if (subtypes
.TYPED_NAME
in left
.subtypes
or
344 subtypes
.TYPED_NAME
in right
.subtypes
):
345 # A typed argument should have a space after the colon.
349 subtypes
.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST
in right
.subtypes
):
350 # If there is a type hint, then we don't want to add a space between the
351 # equal sign and the hint.
353 if rval
not in '[)]}.' and not right
.is_binary_op
:
354 # A string followed by something other than a subscript, closing bracket,
355 # dot, or a binary op should have a space after it.
357 if right
.ClosesScope():
358 # A string followed by closing brackets should have a space after it
359 # depending on SPACE_INSIDE_BRACKETS. A string followed by opening
360 # brackets, however, should not.
361 return style
.Get('SPACE_INSIDE_BRACKETS')
362 if subtypes
.SUBSCRIPT_BRACKET
in right
.subtypes
:
363 # It's legal to do this in Python: 'hello'[a]
365 if left
.is_binary_op
and lval
!= '**' and _IsUnaryOperator(right
):
366 # Space between the binary operator and the unary operator.
368 if left
.is_keyword
and _IsUnaryOperator(right
):
369 # Handle things like "not -3 < x".
371 if _IsUnaryOperator(left
) and _IsUnaryOperator(right
):
372 # No space between two unary operators.
374 if left
.is_binary_op
or right
.is_binary_op
:
375 if lval
== '**' or rval
== '**':
376 # Space around the "power" operator.
377 return style
.Get('SPACES_AROUND_POWER_OPERATOR')
378 # Enforce spaces around binary operators except the blocked ones.
379 block_list
= style
.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS')
380 if lval
in block_list
or rval
in block_list
:
382 if style
.Get('ARITHMETIC_PRECEDENCE_INDICATION'):
383 if _PriorityIndicatingNoSpace(left
) or _PriorityIndicatingNoSpace(right
):
389 if (_IsUnaryOperator(left
) and lval
!= 'not' and
390 (right
.is_name
or right
.is_number
or rval
== '(')):
391 # The previous token was a unary op. No space is desired between it and
394 if (subtypes
.DEFAULT_OR_NAMED_ASSIGN
in left
.subtypes
and
395 subtypes
.TYPED_NAME
not in right
.subtypes
):
396 # A named argument or default parameter shouldn't have spaces around it.
397 return style
.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN')
398 if (subtypes
.DEFAULT_OR_NAMED_ASSIGN
in right
.subtypes
and
399 subtypes
.TYPED_NAME
not in left
.subtypes
):
400 # A named argument or default parameter shouldn't have spaces around it.
401 return style
.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN')
402 if (subtypes
.VARARGS_LIST
in left
.subtypes
or
403 subtypes
.VARARGS_LIST
in right
.subtypes
):
405 if (subtypes
.VARARGS_STAR
in left
.subtypes
or
406 subtypes
.KWARGS_STAR_STAR
in left
.subtypes
):
407 # Don't add a space after a vararg's star or a keyword's star-star.
409 if lval
== '@' and subtypes
.DECORATOR
in left
.subtypes
:
410 # Decorators shouldn't be separated from the 'at' sign.
412 if left
.is_keyword
and rval
== '.':
413 # Add space between keywords and dots.
414 return lval
not in {'None', 'print'}
415 if lval
== '.' and right
.is_keyword
:
416 # Add space between keywords and dots.
417 return rval
not in {'None', 'print'}
418 if lval
== '.' or rval
== '.':
419 # Don't place spaces between dots.
421 if ((lval
== '(' and rval
== ')') or (lval
== '[' and rval
== ']') or
422 (lval
== '{' and rval
== '}')):
423 # Empty objects shouldn't be separated by spaces.
425 if not is_line_disabled
and (left
.OpensScope() or right
.ClosesScope()):
426 if (style
.GetOrDefault('SPACES_AROUND_DICT_DELIMITERS', False) and (
427 (lval
== '{' and _IsDictListTupleDelimiterTok(left
, is_opening
=True)) or
429 _IsDictListTupleDelimiterTok(right
, is_opening
=False)))):
431 if (style
.GetOrDefault('SPACES_AROUND_LIST_DELIMITERS', False) and (
432 (lval
== '[' and _IsDictListTupleDelimiterTok(left
, is_opening
=True)) or
434 _IsDictListTupleDelimiterTok(right
, is_opening
=False)))):
436 if (style
.GetOrDefault('SPACES_AROUND_TUPLE_DELIMITERS', False) and (
437 (lval
== '(' and _IsDictListTupleDelimiterTok(left
, is_opening
=True)) or
439 _IsDictListTupleDelimiterTok(right
, is_opening
=False)))):
441 if left
.OpensScope() and right
.OpensScope():
442 # Nested objects' opening brackets shouldn't be separated, unless enabled
443 # by SPACE_INSIDE_BRACKETS.
444 return style
.Get('SPACE_INSIDE_BRACKETS')
445 if left
.ClosesScope() and right
.ClosesScope():
446 # Nested objects' closing brackets shouldn't be separated, unless enabled
447 # by SPACE_INSIDE_BRACKETS.
448 return style
.Get('SPACE_INSIDE_BRACKETS')
449 if left
.ClosesScope() and rval
in '([':
450 # A call, set, dictionary, or subscript that has a call or subscript after
451 # it shouldn't have a space between them.
453 if left
.OpensScope() and _IsIdNumberStringToken(right
):
454 # Don't separate the opening bracket from the first item, unless enabled
455 # by SPACE_INSIDE_BRACKETS.
456 return style
.Get('SPACE_INSIDE_BRACKETS')
457 if left
.is_name
and rval
in '([':
458 # Don't separate a call or array access from the name.
460 if right
.ClosesScope():
461 # Don't separate the closing bracket from the last item, unless enabled
462 # by SPACE_INSIDE_BRACKETS.
463 # FIXME(morbo): This might be too permissive.
464 return style
.Get('SPACE_INSIDE_BRACKETS')
465 if lval
== 'print' and rval
== '(':
466 # Special support for the 'print' function.
468 if left
.OpensScope() and _IsUnaryOperator(right
):
469 # Don't separate a unary operator from the opening bracket, unless enabled
470 # by SPACE_INSIDE_BRACKETS.
471 return style
.Get('SPACE_INSIDE_BRACKETS')
472 if (left
.OpensScope() and (subtypes
.VARARGS_STAR
in right
.subtypes
or
473 subtypes
.KWARGS_STAR_STAR
in right
.subtypes
)):
474 # Don't separate a '*' or '**' from the opening bracket, unless enabled
475 # by SPACE_INSIDE_BRACKETS.
476 return style
.Get('SPACE_INSIDE_BRACKETS')
478 # Avoid spaces before a semicolon. (Why is there a semicolon?!)
480 if lval
== '(' and rval
== 'await':
481 # Special support for the 'await' keyword. Don't separate the 'await'
482 # keyword from an opening paren, unless enabled by SPACE_INSIDE_BRACKETS.
483 return style
.Get('SPACE_INSIDE_BRACKETS')
487 def _MustBreakBefore(prev_token
, cur_token
):
488 """Return True if a line break is required before the current token."""
489 if prev_token
.is_comment
or (prev_token
.previous_token
and
490 prev_token
.is_pseudo
and
491 prev_token
.previous_token
.is_comment
):
492 # Must break if the previous token was a comment.
494 if (cur_token
.is_string
and prev_token
.is_string
and
495 IsSurroundedByBrackets(cur_token
)):
496 # We want consecutive strings to be on separate lines. This is a
497 # reasonable assumption, because otherwise they should have written them
498 # all on the same line, or with a '+'.
500 return cur_token
.must_break_before
503 def _CanBreakBefore(prev_token
, cur_token
):
504 """Return True if a line break may occur before the current token."""
505 pval
= prev_token
.value
506 cval
= cur_token
.value
507 if pval
== 'yield' and cval
== 'from':
508 # Don't break before a yield argument.
510 if pval
in {'async', 'await'} and cval
in {'def', 'with', 'for'}:
511 # Don't break after sync keywords.
513 if cur_token
.split_penalty
>= split_penalty
.UNBREAKABLE
:
516 # Don't break right after the beginning of a decorator.
519 # Don't break before the start of a block of code.
522 # Don't break before a comma.
524 if prev_token
.is_name
and cval
== '(':
525 # Don't break in the middle of a function definition or call.
527 if prev_token
.is_name
and cval
== '[':
528 # Don't break in the middle of an array dereference.
530 if cur_token
.is_comment
and prev_token
.lineno
== cur_token
.lineno
:
531 # Don't break a comment at the end of the line.
533 if subtypes
.UNARY_OPERATOR
in prev_token
.subtypes
:
534 # Don't break after a unary token.
536 if not style
.Get('ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS'):
537 if (subtypes
.DEFAULT_OR_NAMED_ASSIGN
in cur_token
.subtypes
or
538 subtypes
.DEFAULT_OR_NAMED_ASSIGN
in prev_token
.subtypes
):
543 def IsSurroundedByBrackets(tok
):
544 """Return True if the token is surrounded by brackets."""
548 previous_token
= tok
.previous_token
549 while previous_token
:
550 if previous_token
.value
== ')':
552 elif previous_token
.value
== '}':
554 elif previous_token
.value
== ']':
555 sq_bracket_count
-= 1
557 if previous_token
.value
== '(':
559 return previous_token
561 elif previous_token
.value
== '{':
563 return previous_token
565 elif previous_token
.value
== '[':
566 if sq_bracket_count
== 0:
567 return previous_token
568 sq_bracket_count
+= 1
570 previous_token
= previous_token
.previous_token
574 def _IsDictListTupleDelimiterTok(tok
, is_opening
):
577 if tok
.matching_bracket
is None:
582 close_tok
= tok
.matching_bracket
584 open_tok
= tok
.matching_bracket
587 # There must be something in between the tokens
588 if open_tok
.next_token
== close_tok
:
591 assert open_tok
.next_token
.node
592 assert open_tok
.next_token
.node
.parent
594 return open_tok
.next_token
.node
.parent
.type in [
595 python_symbols
.dictsetmaker
,
596 python_symbols
.listmaker
,
597 python_symbols
.testlist_gexp
,
601 _LOGICAL_OPERATORS
= frozenset({'and', 'or'})
602 _BITWISE_OPERATORS
= frozenset({'&', '|', '^'})
603 _ARITHMETIC_OPERATORS
= frozenset({'+', '-', '*', '/', '%', '//', '@'})
606 def _SplitPenalty(prev_token
, cur_token
):
607 """Return the penalty for breaking the line before the current token."""
608 pval
= prev_token
.value
609 cval
= cur_token
.value
611 return split_penalty
.UNBREAKABLE
613 if cur_token
.node_split_penalty
> 0:
614 return cur_token
.node_split_penalty
616 if style
.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'):
617 # Prefer to split before 'and' and 'or'.
618 if pval
in _LOGICAL_OPERATORS
:
619 return style
.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')
620 if cval
in _LOGICAL_OPERATORS
:
623 # Prefer to split after 'and' and 'or'.
624 if pval
in _LOGICAL_OPERATORS
:
626 if cval
in _LOGICAL_OPERATORS
:
627 return style
.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')
629 if style
.Get('SPLIT_BEFORE_BITWISE_OPERATOR'):
630 # Prefer to split before '&', '|', and '^'.
631 if pval
in _BITWISE_OPERATORS
:
632 return style
.Get('SPLIT_PENALTY_BITWISE_OPERATOR')
633 if cval
in _BITWISE_OPERATORS
:
636 # Prefer to split after '&', '|', and '^'.
637 if pval
in _BITWISE_OPERATORS
:
639 if cval
in _BITWISE_OPERATORS
:
640 return style
.Get('SPLIT_PENALTY_BITWISE_OPERATOR')
642 if (subtypes
.COMP_FOR
in cur_token
.subtypes
or
643 subtypes
.COMP_IF
in cur_token
.subtypes
):
644 # We don't mind breaking before the 'for' or 'if' of a list comprehension.
646 if subtypes
.UNARY_OPERATOR
in prev_token
.subtypes
:
647 # Try not to break after a unary operator.
648 return style
.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR')
650 # Breaking after a comma is fine, if need be.
652 if pval
== '**' or cval
== '**':
653 return split_penalty
.STRONGLY_CONNECTED
654 if (subtypes
.VARARGS_STAR
in prev_token
.subtypes
or
655 subtypes
.KWARGS_STAR_STAR
in prev_token
.subtypes
):
656 # Don't split after a varargs * or kwargs **.
657 return split_penalty
.UNBREAKABLE
658 if prev_token
.OpensScope() and cval
!= '(':
660 return style
.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET')
662 # Don't split before a colon.
663 return split_penalty
.UNBREAKABLE
665 # Don't split before an assignment.
666 return split_penalty
.UNBREAKABLE
667 if (subtypes
.DEFAULT_OR_NAMED_ASSIGN
in prev_token
.subtypes
or
668 subtypes
.DEFAULT_OR_NAMED_ASSIGN
in cur_token
.subtypes
):
669 # Don't break before or after an default or named assignment.
670 return split_penalty
.UNBREAKABLE
672 # We would rather not split before an equality operator.
673 return split_penalty
.STRONGLY_CONNECTED
674 if cur_token
.ClosesScope():
675 # Give a slight penalty for splitting before the closing scope.