]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | # Copyright 2015 Google Inc. All Rights Reserved. |
2 | # | |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | """Decide what the format for the code should be. | |
15 | ||
16 | The `logical_line.LogicalLine`s are now ready to be formatted. LogicalLInes that | |
17 | can be merged together are. The best formatting is returned as a string. | |
18 | ||
19 | Reformat(): the main function exported by this module. | |
20 | """ | |
21 | ||
22 | import collections | |
23 | import heapq | |
24 | import re | |
25 | ||
26 | from yapf_third_party._ylib2to3 import pytree | |
27 | from yapf_third_party._ylib2to3.pgen2 import token | |
28 | ||
29 | from yapf.pytree import pytree_utils | |
30 | from yapf.yapflib import format_decision_state | |
31 | from yapf.yapflib import format_token | |
32 | from yapf.yapflib import line_joiner | |
33 | from yapf.yapflib import style | |
34 | ||
35 | ||
36 | def Reformat(llines, lines=None): | |
37 | """Reformat the logical lines. | |
38 | ||
39 | Arguments: | |
40 | llines: (list of logical_line.LogicalLine) Lines we want to format. | |
41 | lines: (set of int) The lines which can be modified or None if there is no | |
42 | line range restriction. | |
43 | ||
44 | Returns: | |
45 | A string representing the reformatted code. | |
46 | """ | |
47 | final_lines = [] | |
48 | prev_line = None # The previous line. | |
49 | indent_width = style.Get('INDENT_WIDTH') | |
50 | ||
51 | for lline in _SingleOrMergedLines(llines): | |
52 | first_token = lline.first | |
53 | _FormatFirstToken(first_token, lline.depth, prev_line, final_lines) | |
54 | ||
55 | indent_amt = indent_width * lline.depth | |
56 | state = format_decision_state.FormatDecisionState(lline, indent_amt) | |
57 | state.MoveStateToNextToken() | |
58 | ||
59 | if not lline.disable: | |
60 | if lline.first.is_comment: | |
61 | lline.first.value = lline.first.value.rstrip() | |
62 | elif lline.last.is_comment: | |
63 | lline.last.value = lline.last.value.rstrip() | |
64 | if prev_line and prev_line.disable: | |
65 | # Keep the vertical spacing between a disabled and enabled formatting | |
66 | # region. | |
67 | _RetainRequiredVerticalSpacingBetweenTokens(lline.first, prev_line.last, | |
68 | lines) | |
69 | if any(tok.is_comment for tok in lline.tokens): | |
70 | _RetainVerticalSpacingBeforeComments(lline) | |
71 | ||
72 | if lline.disable or _LineHasContinuationMarkers(lline): | |
73 | _RetainHorizontalSpacing(lline) | |
74 | _RetainRequiredVerticalSpacing(lline, prev_line, lines) | |
75 | _EmitLineUnformatted(state) | |
76 | ||
77 | elif (_LineContainsPylintDisableLineTooLong(lline) or | |
78 | _LineContainsI18n(lline)): | |
79 | # Don't modify vertical spacing, but fix any horizontal spacing issues. | |
80 | _RetainRequiredVerticalSpacing(lline, prev_line, lines) | |
81 | _EmitLineUnformatted(state) | |
82 | ||
83 | elif _CanPlaceOnSingleLine(lline) and not any(tok.must_break_before | |
84 | for tok in lline.tokens): | |
85 | # The logical line fits on one line. | |
86 | while state.next_token: | |
87 | state.AddTokenToState(newline=False, dry_run=False) | |
88 | ||
89 | elif not _AnalyzeSolutionSpace(state): | |
90 | # Failsafe mode. If there isn't a solution to the line, then just emit | |
91 | # it as is. | |
92 | state = format_decision_state.FormatDecisionState(lline, indent_amt) | |
93 | state.MoveStateToNextToken() | |
94 | _RetainHorizontalSpacing(lline) | |
95 | _RetainRequiredVerticalSpacing(lline, prev_line, None) | |
96 | _EmitLineUnformatted(state) | |
97 | ||
98 | final_lines.append(lline) | |
99 | prev_line = lline | |
100 | ||
101 | _AlignTrailingComments(final_lines) | |
102 | return _FormatFinalLines(final_lines) | |
103 | ||
104 | ||
105 | def _RetainHorizontalSpacing(line): | |
106 | """Retain all horizontal spacing between tokens.""" | |
107 | for tok in line.tokens: | |
108 | tok.RetainHorizontalSpacing(line.first.column, line.depth) | |
109 | ||
110 | ||
111 | def _RetainRequiredVerticalSpacing(cur_line, prev_line, lines): | |
112 | """Retain all vertical spacing between lines.""" | |
113 | prev_tok = None | |
114 | if prev_line is not None: | |
115 | prev_tok = prev_line.last | |
116 | ||
117 | if cur_line.disable: | |
118 | # After the first token we are acting on a single line. So if it is | |
119 | # disabled we must not reformat. | |
120 | lines = set() | |
121 | ||
122 | for cur_tok in cur_line.tokens: | |
123 | _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines) | |
124 | prev_tok = cur_tok | |
125 | ||
126 | ||
127 | def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines): | |
128 | """Retain vertical spacing between two tokens if not in editable range.""" | |
129 | if prev_tok is None: | |
130 | return | |
131 | ||
132 | if prev_tok.is_string: | |
133 | prev_lineno = prev_tok.lineno + prev_tok.value.count('\n') | |
134 | elif prev_tok.is_pseudo: | |
135 | if not prev_tok.previous_token.is_multiline_string: | |
136 | prev_lineno = prev_tok.previous_token.lineno | |
137 | else: | |
138 | prev_lineno = prev_tok.lineno | |
139 | else: | |
140 | prev_lineno = prev_tok.lineno | |
141 | ||
142 | if cur_tok.is_comment: | |
143 | cur_lineno = cur_tok.lineno - cur_tok.value.count('\n') | |
144 | else: | |
145 | cur_lineno = cur_tok.lineno | |
146 | ||
147 | if not prev_tok.is_comment and prev_tok.value.endswith('\\'): | |
148 | prev_lineno += prev_tok.value.count('\n') | |
149 | ||
150 | required_newlines = cur_lineno - prev_lineno | |
151 | if cur_tok.is_comment and not prev_tok.is_comment: | |
152 | # Don't adjust between a comment and non-comment. | |
153 | pass | |
154 | elif lines and lines.intersection(range(prev_lineno, cur_lineno + 1)): | |
155 | desired_newlines = cur_tok.whitespace_prefix.count('\n') | |
156 | whitespace_lines = range(prev_lineno + 1, cur_lineno) | |
157 | deletable_lines = len(lines.intersection(whitespace_lines)) | |
158 | required_newlines = max(required_newlines - deletable_lines, | |
159 | desired_newlines) | |
160 | ||
161 | cur_tok.AdjustNewlinesBefore(required_newlines) | |
162 | ||
163 | ||
164 | def _RetainVerticalSpacingBeforeComments(line): | |
165 | """Retain vertical spacing before comments.""" | |
166 | prev_token = None | |
167 | for tok in line.tokens: | |
168 | if tok.is_comment and prev_token: | |
169 | if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1: | |
170 | tok.AdjustNewlinesBefore(ONE_BLANK_LINE) | |
171 | ||
172 | prev_token = tok | |
173 | ||
174 | ||
175 | def _EmitLineUnformatted(state): | |
176 | """Emit the line without formatting. | |
177 | ||
178 | The line contains code that if reformatted would break a non-syntactic | |
179 | convention. E.g., i18n comments and function calls are tightly bound by | |
180 | convention. Instead, we calculate when / if a newline should occur and honor | |
181 | that. But otherwise the code emitted will be the same as the original code. | |
182 | ||
183 | Arguments: | |
184 | state: (format_decision_state.FormatDecisionState) The format decision | |
185 | state. | |
186 | """ | |
187 | while state.next_token: | |
188 | previous_token = state.next_token.previous_token | |
189 | previous_lineno = previous_token.lineno | |
190 | ||
191 | if previous_token.is_multiline_string or previous_token.is_string: | |
192 | previous_lineno += previous_token.value.count('\n') | |
193 | ||
194 | if previous_token.is_continuation: | |
195 | newline = False | |
196 | else: | |
197 | newline = state.next_token.lineno > previous_lineno | |
198 | ||
199 | state.AddTokenToState(newline=newline, dry_run=False) | |
200 | ||
201 | ||
202 | def _LineContainsI18n(line): | |
203 | """Return true if there are i18n comments or function calls in the line. | |
204 | ||
205 | I18n comments and pseudo-function calls are closely related. They cannot | |
206 | be moved apart without breaking i18n. | |
207 | ||
208 | Arguments: | |
209 | line: (logical_line.LogicalLine) The line currently being formatted. | |
210 | ||
211 | Returns: | |
212 | True if the line contains i18n comments or function calls. False otherwise. | |
213 | """ | |
214 | if style.Get('I18N_COMMENT'): | |
215 | for tok in line.tokens: | |
216 | if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value): | |
217 | # Contains an i18n comment. | |
218 | return True | |
219 | ||
220 | if style.Get('I18N_FUNCTION_CALL'): | |
221 | length = len(line.tokens) | |
222 | for index in range(length - 1): | |
223 | if (line.tokens[index + 1].value == '(' and | |
224 | line.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): | |
225 | return True | |
226 | return False | |
227 | ||
228 | ||
229 | def _LineContainsPylintDisableLineTooLong(line): | |
230 | """Return true if there is a "pylint: disable=line-too-long" comment.""" | |
231 | return re.search(r'\bpylint:\s+disable=line-too-long\b', line.last.value) | |
232 | ||
233 | ||
234 | def _LineHasContinuationMarkers(line): | |
235 | """Return true if the line has continuation markers in it.""" | |
236 | return any(tok.is_continuation for tok in line.tokens) | |
237 | ||
238 | ||
239 | def _CanPlaceOnSingleLine(line): | |
240 | """Determine if the logical line can go on a single line. | |
241 | ||
242 | Arguments: | |
243 | line: (logical_line.LogicalLine) The line currently being formatted. | |
244 | ||
245 | Returns: | |
246 | True if the line can or should be added to a single line. False otherwise. | |
247 | """ | |
248 | token_types = [x.type for x in line.tokens] | |
249 | if (style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED') and | |
250 | any(token_types[token_index - 1] == token.COMMA | |
251 | for token_index, token_type in enumerate(token_types[1:], start=1) | |
252 | if token_type == token.RPAR)): | |
253 | return False | |
254 | if (style.Get('FORCE_MULTILINE_DICT') and token.LBRACE in token_types): | |
255 | return False | |
256 | indent_amt = style.Get('INDENT_WIDTH') * line.depth | |
257 | last = line.last | |
258 | last_index = -1 | |
259 | if (last.is_pylint_comment or last.is_pytype_comment or | |
260 | last.is_copybara_comment): | |
261 | last = last.previous_token | |
262 | last_index = -2 | |
263 | if last is None: | |
264 | return True | |
265 | return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and | |
266 | not any(tok.is_comment for tok in line.tokens[:last_index])) | |
267 | ||
268 | ||
269 | def _AlignTrailingComments(final_lines): | |
270 | """Align trailing comments to the same column.""" | |
271 | final_lines_index = 0 | |
272 | while final_lines_index < len(final_lines): | |
273 | line = final_lines[final_lines_index] | |
274 | assert line.tokens | |
275 | ||
276 | processed_content = False | |
277 | ||
278 | for tok in line.tokens: | |
279 | if (tok.is_comment and isinstance(tok.spaces_required_before, list) and | |
280 | tok.value.startswith('#')): | |
281 | # All trailing comments and comments that appear on a line by themselves | |
282 | # in this block should be indented at the same level. The block is | |
283 | # terminated by an empty line or EOF. Enumerate through each line in | |
284 | # the block and calculate the max line length. Once complete, use the | |
285 | # first col value greater than that value and create the necessary for | |
286 | # each line accordingly. | |
287 | all_pc_line_lengths = [] # All pre-comment line lengths | |
288 | max_line_length = 0 | |
289 | ||
290 | while True: | |
291 | # EOF | |
292 | if final_lines_index + len(all_pc_line_lengths) == len(final_lines): | |
293 | break | |
294 | ||
295 | this_line = final_lines[final_lines_index + len(all_pc_line_lengths)] | |
296 | ||
297 | # Blank line - note that content is preformatted so we don't need to | |
298 | # worry about spaces/tabs; a blank line will always be '\n\n'. | |
299 | assert this_line.tokens | |
300 | if (all_pc_line_lengths and | |
301 | this_line.tokens[0].formatted_whitespace_prefix.startswith('\n\n') | |
302 | ): | |
303 | break | |
304 | ||
305 | if this_line.disable: | |
306 | all_pc_line_lengths.append([]) | |
307 | continue | |
308 | ||
309 | # Calculate the length of each line in this logical line. | |
310 | line_content = '' | |
311 | pc_line_lengths = [] | |
312 | ||
313 | for line_tok in this_line.tokens: | |
314 | whitespace_prefix = line_tok.formatted_whitespace_prefix | |
315 | ||
316 | newline_index = whitespace_prefix.rfind('\n') | |
317 | if newline_index != -1: | |
318 | max_line_length = max(max_line_length, len(line_content)) | |
319 | line_content = '' | |
320 | ||
321 | whitespace_prefix = whitespace_prefix[newline_index + 1:] | |
322 | ||
323 | if line_tok.is_comment: | |
324 | pc_line_lengths.append(len(line_content)) | |
325 | else: | |
326 | line_content += '{}{}'.format(whitespace_prefix, line_tok.value) | |
327 | ||
328 | if pc_line_lengths: | |
329 | max_line_length = max(max_line_length, max(pc_line_lengths)) | |
330 | ||
331 | all_pc_line_lengths.append(pc_line_lengths) | |
332 | ||
333 | # Calculate the aligned column value | |
334 | max_line_length += 2 | |
335 | ||
336 | aligned_col = None | |
337 | for potential_col in tok.spaces_required_before: | |
338 | if potential_col > max_line_length: | |
339 | aligned_col = potential_col | |
340 | break | |
341 | ||
342 | if aligned_col is None: | |
343 | aligned_col = max_line_length | |
344 | ||
345 | # Update the comment token values based on the aligned values | |
346 | for all_pc_line_lengths_index, pc_line_lengths in enumerate( | |
347 | all_pc_line_lengths): | |
348 | if not pc_line_lengths: | |
349 | continue | |
350 | ||
351 | this_line = final_lines[final_lines_index + all_pc_line_lengths_index] | |
352 | ||
353 | pc_line_length_index = 0 | |
354 | for line_tok in this_line.tokens: | |
355 | if line_tok.is_comment: | |
356 | assert pc_line_length_index < len(pc_line_lengths) | |
357 | assert pc_line_lengths[pc_line_length_index] < aligned_col | |
358 | ||
359 | # Note that there may be newlines embedded in the comments, so | |
360 | # we need to apply a whitespace prefix to each line. | |
361 | whitespace = ' ' * ( | |
362 | aligned_col - pc_line_lengths[pc_line_length_index] - 1) | |
363 | pc_line_length_index += 1 | |
364 | ||
365 | line_content = [] | |
366 | ||
367 | for comment_line_index, comment_line in enumerate( | |
368 | line_tok.value.split('\n')): | |
369 | line_content.append('{}{}'.format(whitespace, | |
370 | comment_line.strip())) | |
371 | ||
372 | if comment_line_index == 0: | |
373 | whitespace = ' ' * (aligned_col - 1) | |
374 | ||
375 | line_content = '\n'.join(line_content) | |
376 | ||
377 | # Account for initial whitespace already slated for the | |
378 | # beginning of the line. | |
379 | existing_whitespace_prefix = \ | |
380 | line_tok.formatted_whitespace_prefix.lstrip('\n') | |
381 | ||
382 | if line_content.startswith(existing_whitespace_prefix): | |
383 | line_content = line_content[len(existing_whitespace_prefix):] | |
384 | ||
385 | line_tok.value = line_content | |
386 | ||
387 | assert pc_line_length_index == len(pc_line_lengths) | |
388 | ||
389 | final_lines_index += len(all_pc_line_lengths) | |
390 | ||
391 | processed_content = True | |
392 | break | |
393 | ||
394 | if not processed_content: | |
395 | final_lines_index += 1 | |
396 | ||
397 | ||
398 | def _FormatFinalLines(final_lines): | |
399 | """Compose the final output from the finalized lines.""" | |
400 | formatted_code = [] | |
401 | for line in final_lines: | |
402 | formatted_line = [] | |
403 | for tok in line.tokens: | |
404 | if not tok.is_pseudo: | |
405 | formatted_line.append(tok.formatted_whitespace_prefix) | |
406 | formatted_line.append(tok.value) | |
407 | elif (not tok.next_token.whitespace_prefix.startswith('\n') and | |
408 | not tok.next_token.whitespace_prefix.startswith(' ')): | |
409 | if (tok.previous_token.value == ':' or | |
410 | tok.next_token.value not in ',}])'): | |
411 | formatted_line.append(' ') | |
412 | ||
413 | formatted_code.append(''.join(formatted_line)) | |
414 | ||
415 | return ''.join(formatted_code) + '\n' | |
416 | ||
417 | ||
418 | class _StateNode(object): | |
419 | """An edge in the solution space from 'previous.state' to 'state'. | |
420 | ||
421 | Attributes: | |
422 | state: (format_decision_state.FormatDecisionState) The format decision state | |
423 | for this node. | |
424 | newline: If True, then on the edge from 'previous.state' to 'state' a | |
425 | newline is inserted. | |
426 | previous: (_StateNode) The previous state node in the graph. | |
427 | """ | |
428 | ||
429 | # TODO(morbo): Add a '__cmp__' method. | |
430 | ||
431 | def __init__(self, state, newline, previous): | |
432 | self.state = state.Clone() | |
433 | self.newline = newline | |
434 | self.previous = previous | |
435 | ||
436 | def __repr__(self): # pragma: no cover | |
437 | return 'StateNode(state=[\n{0}\n], newline={1})'.format( | |
438 | self.state, self.newline) | |
439 | ||
440 | ||
441 | # A tuple of (penalty, count) that is used to prioritize the BFS. In case of | |
442 | # equal penalties, we prefer states that were inserted first. During state | |
443 | # generation, we make sure that we insert states first that break the line as | |
444 | # late as possible. | |
445 | _OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count']) | |
446 | ||
447 | # An item in the prioritized BFS search queue. The 'StateNode's 'state' has | |
448 | # the given '_OrderedPenalty'. | |
449 | _QueueItem = collections.namedtuple('QueueItem', | |
450 | ['ordered_penalty', 'state_node']) | |
451 | ||
452 | ||
453 | def _AnalyzeSolutionSpace(initial_state): | |
454 | """Analyze the entire solution space starting from initial_state. | |
455 | ||
456 | This implements a variant of Dijkstra's algorithm on the graph that spans | |
457 | the solution space (LineStates are the nodes). The algorithm tries to find | |
458 | the shortest path (the one with the lowest penalty) from 'initial_state' to | |
459 | the state where all tokens are placed. | |
460 | ||
461 | Arguments: | |
462 | initial_state: (format_decision_state.FormatDecisionState) The initial state | |
463 | to start the search from. | |
464 | ||
465 | Returns: | |
466 | True if a formatting solution was found. False otherwise. | |
467 | """ | |
468 | count = 0 | |
469 | seen = set() | |
470 | p_queue = [] | |
471 | ||
472 | # Insert start element. | |
473 | node = _StateNode(initial_state, False, None) | |
474 | heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node)) | |
475 | ||
476 | count += 1 | |
477 | while p_queue: | |
478 | item = p_queue[0] | |
479 | penalty = item.ordered_penalty.penalty | |
480 | node = item.state_node | |
481 | if not node.state.next_token: | |
482 | break | |
483 | heapq.heappop(p_queue) | |
484 | ||
485 | if count > 10000: | |
486 | node.state.ignore_stack_for_comparison = True | |
487 | ||
488 | # Unconditionally add the state and check if it was present to avoid having | |
489 | # to hash it twice in the common case (state hashing is expensive). | |
490 | before_seen_count = len(seen) | |
491 | seen.add(node.state) | |
492 | # If seen didn't change size, the state was already present. | |
493 | if before_seen_count == len(seen): | |
494 | continue | |
495 | ||
496 | # FIXME(morbo): Add a 'decision' element? | |
497 | ||
498 | count = _AddNextStateToQueue(penalty, node, False, count, p_queue) | |
499 | count = _AddNextStateToQueue(penalty, node, True, count, p_queue) | |
500 | ||
501 | if not p_queue: | |
502 | # We weren't able to find a solution. Do nothing. | |
503 | return False | |
504 | ||
505 | _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node) | |
506 | return True | |
507 | ||
508 | ||
509 | def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue): | |
510 | """Add the following state to the analysis queue. | |
511 | ||
512 | Assume the current state is 'previous_node' and has been reached with a | |
513 | penalty of 'penalty'. Insert a line break if 'newline' is True. | |
514 | ||
515 | Arguments: | |
516 | penalty: (int) The penalty associated with the path up to this point. | |
517 | previous_node: (_StateNode) The last _StateNode inserted into the priority | |
518 | queue. | |
519 | newline: (bool) Add a newline if True. | |
520 | count: (int) The number of elements in the queue. | |
521 | p_queue: (heapq) The priority queue representing the solution space. | |
522 | ||
523 | Returns: | |
524 | The updated number of elements in the queue. | |
525 | """ | |
526 | must_split = previous_node.state.MustSplit() | |
527 | if newline and not previous_node.state.CanSplit(must_split): | |
528 | # Don't add a newline if the token cannot be split. | |
529 | return count | |
530 | if not newline and must_split: | |
531 | # Don't add a token we must split but where we aren't splitting. | |
532 | return count | |
533 | ||
534 | node = _StateNode(previous_node.state, newline, previous_node) | |
535 | penalty += node.state.AddTokenToState( | |
536 | newline=newline, dry_run=True, must_split=must_split) | |
537 | heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node)) | |
538 | return count + 1 | |
539 | ||
540 | ||
541 | def _ReconstructPath(initial_state, current): | |
542 | """Reconstruct the path through the queue with lowest penalty. | |
543 | ||
544 | Arguments: | |
545 | initial_state: (format_decision_state.FormatDecisionState) The initial state | |
546 | to start the search from. | |
547 | current: (_StateNode) The node in the decision graph that is the end point | |
548 | of the path with the least penalty. | |
549 | """ | |
550 | path = collections.deque() | |
551 | ||
552 | while current.previous: | |
553 | path.appendleft(current) | |
554 | current = current.previous | |
555 | ||
556 | for node in path: | |
557 | initial_state.AddTokenToState(newline=node.newline, dry_run=False) | |
558 | ||
559 | ||
560 | NESTED_DEPTH = [] | |
561 | ||
562 | ||
563 | def _FormatFirstToken(first_token, indent_depth, prev_line, final_lines): | |
564 | """Format the first token in the logical line. | |
565 | ||
566 | Add a newline and the required indent before the first token of the logical | |
567 | line. | |
568 | ||
569 | Arguments: | |
570 | first_token: (format_token.FormatToken) The first token in the logical line. | |
571 | indent_depth: (int) The line's indentation depth. | |
572 | prev_line: (list of logical_line.LogicalLine) The logical line previous to | |
573 | this line. | |
574 | final_lines: (list of logical_line.LogicalLine) The logical lines that have | |
575 | already been processed. | |
576 | """ | |
577 | global NESTED_DEPTH | |
578 | while NESTED_DEPTH and NESTED_DEPTH[-1] > indent_depth: | |
579 | NESTED_DEPTH.pop() | |
580 | ||
581 | first_nested = False | |
582 | if _IsClassOrDef(first_token): | |
583 | if not NESTED_DEPTH: | |
584 | NESTED_DEPTH = [indent_depth] | |
585 | elif NESTED_DEPTH[-1] < indent_depth: | |
586 | first_nested = True | |
587 | NESTED_DEPTH.append(indent_depth) | |
588 | ||
589 | first_token.AddWhitespacePrefix( | |
590 | _CalculateNumberOfNewlines(first_token, indent_depth, prev_line, | |
591 | final_lines, first_nested), | |
592 | indent_level=indent_depth) | |
593 | ||
594 | ||
595 | NO_BLANK_LINES = 1 | |
596 | ONE_BLANK_LINE = 2 | |
597 | TWO_BLANK_LINES = 3 | |
598 | ||
599 | ||
600 | def _IsClassOrDef(tok): | |
601 | if tok.value in {'class', 'def', '@'}: | |
602 | return True | |
603 | return (tok.next_token and tok.value == 'async' and | |
604 | tok.next_token.value == 'def') | |
605 | ||
606 | ||
607 | def _CalculateNumberOfNewlines(first_token, indent_depth, prev_line, | |
608 | final_lines, first_nested): | |
609 | """Calculate the number of newlines we need to add. | |
610 | ||
611 | Arguments: | |
612 | first_token: (format_token.FormatToken) The first token in the logical | |
613 | line. | |
614 | indent_depth: (int) The line's indentation depth. | |
615 | prev_line: (list of logical_line.LogicalLine) The logical line previous to | |
616 | this line. | |
617 | final_lines: (list of logical_line.LogicalLine) The logical lines that have | |
618 | already been processed. | |
619 | first_nested: (boolean) Whether this is the first nested class or function. | |
620 | ||
621 | Returns: | |
622 | The number of newlines needed before the first token. | |
623 | """ | |
624 | # TODO(morbo): Special handling for imports. | |
625 | # TODO(morbo): Create a knob that can tune these. | |
626 | if prev_line is None: | |
627 | # The first line in the file. Don't add blank lines. | |
628 | # FIXME(morbo): Is this correct? | |
629 | if first_token.newlines is not None: | |
630 | first_token.newlines = None | |
631 | return 0 | |
632 | ||
633 | if first_token.is_docstring: | |
634 | if (prev_line.first.value == 'class' and | |
635 | style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')): | |
636 | # Enforce a blank line before a class's docstring. | |
637 | return ONE_BLANK_LINE | |
638 | elif (prev_line.first.value.startswith('#') and | |
639 | style.Get('BLANK_LINE_BEFORE_MODULE_DOCSTRING')): | |
640 | # Enforce a blank line before a module's docstring. | |
641 | return ONE_BLANK_LINE | |
642 | # The docstring shouldn't have a newline before it. | |
643 | return NO_BLANK_LINES | |
644 | ||
645 | if first_token.is_name and not indent_depth: | |
646 | if prev_line.first.value in {'from', 'import'}: | |
647 | # Support custom number of blank lines between top-level imports and | |
648 | # variable definitions. | |
649 | return 1 + style.Get( | |
650 | 'BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES') | |
651 | ||
652 | prev_last_token = prev_line.last | |
653 | if prev_last_token.is_docstring: | |
654 | if (not indent_depth and first_token.value in {'class', 'def', 'async'}): | |
655 | # Separate a class or function from the module-level docstring with | |
656 | # appropriate number of blank lines. | |
657 | return 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION') | |
658 | if (first_nested and | |
659 | not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF') and | |
660 | _IsClassOrDef(first_token)): | |
661 | first_token.newlines = None | |
662 | return NO_BLANK_LINES | |
663 | if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, | |
664 | prev_last_token): | |
665 | return NO_BLANK_LINES | |
666 | else: | |
667 | return ONE_BLANK_LINE | |
668 | ||
669 | if _IsClassOrDef(first_token): | |
670 | # TODO(morbo): This can go once the blank line calculator is more | |
671 | # sophisticated. | |
672 | if not indent_depth: | |
673 | # This is a top-level class or function. | |
674 | is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0 | |
675 | if (not prev_line.disable and prev_last_token.is_comment and | |
676 | not is_inline_comment): | |
677 | # This token follows a non-inline comment. | |
678 | if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, | |
679 | prev_last_token): | |
680 | # Assume that the comment is "attached" to the current line. | |
681 | # Therefore, we want two blank lines before the comment. | |
682 | index = len(final_lines) - 1 | |
683 | while index > 0: | |
684 | if not final_lines[index - 1].is_comment: | |
685 | break | |
686 | index -= 1 | |
687 | if final_lines[index - 1].first.value == '@': | |
688 | final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES) | |
689 | else: | |
690 | prev_last_token.AdjustNewlinesBefore( | |
691 | 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION')) | |
692 | if first_token.newlines is not None: | |
693 | first_token.newlines = None | |
694 | return NO_BLANK_LINES | |
695 | elif _IsClassOrDef(prev_line.first): | |
696 | if first_nested and not style.Get( | |
697 | 'BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): | |
698 | first_token.newlines = None | |
699 | return NO_BLANK_LINES | |
700 | ||
701 | # Calculate how many newlines were between the original lines. We want to | |
702 | # retain that formatting if it doesn't violate one of the style guide rules. | |
703 | if first_token.is_comment: | |
704 | first_token_lineno = first_token.lineno - first_token.value.count('\n') | |
705 | else: | |
706 | first_token_lineno = first_token.lineno | |
707 | ||
708 | prev_last_token_lineno = prev_last_token.lineno | |
709 | if prev_last_token.is_multiline_string: | |
710 | prev_last_token_lineno += prev_last_token.value.count('\n') | |
711 | ||
712 | if first_token_lineno - prev_last_token_lineno > 1: | |
713 | return ONE_BLANK_LINE | |
714 | ||
715 | return NO_BLANK_LINES | |
716 | ||
717 | ||
718 | def _SingleOrMergedLines(lines): | |
719 | """Generate the lines we want to format. | |
720 | ||
721 | Arguments: | |
722 | lines: (list of logical_line.LogicalLine) Lines we want to format. | |
723 | ||
724 | Yields: | |
725 | Either a single line, if the current line cannot be merged with the | |
726 | succeeding line, or the next two lines merged into one line. | |
727 | """ | |
728 | index = 0 | |
729 | last_was_merged = False | |
730 | while index < len(lines): | |
731 | if lines[index].disable: | |
732 | line = lines[index] | |
733 | index += 1 | |
734 | while index < len(lines): | |
735 | column = line.last.column + 2 | |
736 | if lines[index].lineno != line.lineno: | |
737 | break | |
738 | if line.last.value != ':': | |
739 | leaf = pytree.Leaf( | |
740 | type=token.SEMI, value=';', context=('', (line.lineno, column))) | |
741 | line.AppendToken( | |
742 | format_token.FormatToken(leaf, pytree_utils.NodeName(leaf))) | |
743 | for tok in lines[index].tokens: | |
744 | line.AppendToken(tok) | |
745 | index += 1 | |
746 | yield line | |
747 | elif line_joiner.CanMergeMultipleLines(lines[index:], last_was_merged): | |
748 | # TODO(morbo): This splice is potentially very slow. Come up with a more | |
749 | # performance-friendly way of determining if two lines can be merged. | |
750 | next_line = lines[index + 1] | |
751 | for tok in next_line.tokens: | |
752 | lines[index].AppendToken(tok) | |
753 | if (len(next_line.tokens) == 1 and next_line.first.is_multiline_string): | |
754 | # This may be a multiline shebang. In that case, we want to retain the | |
755 | # formatting. Otherwise, it could mess up the shell script's syntax. | |
756 | lines[index].disable = True | |
757 | yield lines[index] | |
758 | index += 2 | |
759 | last_was_merged = True | |
760 | else: | |
761 | yield lines[index] | |
762 | index += 1 | |
763 | last_was_merged = False | |
764 | ||
765 | ||
766 | def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token): | |
767 | """Determine if there are no blank lines before the current token. | |
768 | ||
769 | The previous token is a docstring or comment. The prev_token_lineno is the | |
770 | start of the text of that token. Counting the number of newlines in its text | |
771 | gives us the extent and thus where the line number of the end of the | |
772 | docstring or comment. After that, we just compare it to the current token's | |
773 | line number to see if there are blank lines between them. | |
774 | ||
775 | Arguments: | |
776 | text: (unicode) The text of the docstring or comment before the current | |
777 | token. | |
778 | cur_token: (format_token.FormatToken) The current token in the logical line. | |
779 | prev_token: (format_token.FormatToken) The previous token in the logical | |
780 | line. | |
781 | ||
782 | Returns: | |
783 | True if there is no blank line before the current token. | |
784 | """ | |
785 | cur_token_lineno = cur_token.lineno | |
786 | if cur_token.is_comment: | |
787 | cur_token_lineno -= cur_token.value.count('\n') | |
788 | num_newlines = text.count('\n') if not prev_token.is_comment else 0 | |
789 | return prev_token.lineno + num_newlines == cur_token_lineno - 1 |