]>
Commit | Line | Data |
---|---|---|
1 | #!/usr/bin/env python | |
2 | ||
3 | # Copyright (C) 2010-2011 Hideo Hattori | |
4 | # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint | |
5 | # Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling | |
6 | # | |
7 | # Permission is hereby granted, free of charge, to any person obtaining | |
8 | # a copy of this software and associated documentation files (the | |
9 | # "Software"), to deal in the Software without restriction, including | |
10 | # without limitation the rights to use, copy, modify, merge, publish, | |
11 | # distribute, sublicense, and/or sell copies of the Software, and to | |
12 | # permit persons to whom the Software is furnished to do so, subject to | |
13 | # the following conditions: | |
14 | # | |
15 | # The above copyright notice and this permission notice shall be | |
16 | # included in all copies or substantial portions of the Software. | |
17 | # | |
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
19 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
21 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
22 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
23 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
24 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
25 | # SOFTWARE. | |
26 | ||
27 | # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> | |
28 | # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com> | |
29 | # | |
30 | # Permission is hereby granted, free of charge, to any person | |
31 | # obtaining a copy of this software and associated documentation files | |
32 | # (the "Software"), to deal in the Software without restriction, | |
33 | # including without limitation the rights to use, copy, modify, merge, | |
34 | # publish, distribute, sublicense, and/or sell copies of the Software, | |
35 | # and to permit persons to whom the Software is furnished to do so, | |
36 | # subject to the following conditions: | |
37 | # | |
38 | # The above copyright notice and this permission notice shall be | |
39 | # included in all copies or substantial portions of the Software. | |
40 | # | |
41 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
42 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
43 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
44 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
45 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
46 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
47 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
48 | # SOFTWARE. | |
49 | ||
50 | """Automatically formats Python code to conform to the PEP 8 style guide. | |
51 | ||
52 | Fixes that only need be done once can be added by adding a function of the form | |
53 | "fix_<code>(source)" to this module. They should return the fixed source code. | |
54 | These fixes are picked up by apply_global_fixes(). | |
55 | ||
56 | Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the | |
57 | class documentation for more information. | |
58 | ||
59 | """ | |
60 | ||
61 | from __future__ import absolute_import | |
62 | from __future__ import division | |
63 | from __future__ import print_function | |
64 | from __future__ import unicode_literals | |
65 | ||
66 | import argparse | |
67 | import codecs | |
68 | import collections | |
69 | import copy | |
70 | import difflib | |
71 | import fnmatch | |
72 | import inspect | |
73 | import io | |
74 | import itertools | |
75 | import keyword | |
76 | import locale | |
77 | import os | |
78 | import re | |
79 | import signal | |
80 | import sys | |
81 | import textwrap | |
82 | import token | |
83 | import tokenize | |
84 | import warnings | |
85 | import ast | |
86 | from configparser import ConfigParser as SafeConfigParser, Error | |
87 | ||
88 | import pycodestyle | |
89 | from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX | |
90 | ||
91 | ||
92 | __version__ = '2.0.4' | |
93 | ||
94 | ||
95 | CR = '\r' | |
96 | LF = '\n' | |
97 | CRLF = '\r\n' | |
98 | ||
99 | ||
100 | PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$') | |
101 | LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):') | |
102 | COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s') | |
103 | COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s') | |
104 | BARE_EXCEPT_REGEX = re.compile(r'except\s*:') | |
105 | STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):') | |
106 | DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})') | |
107 | ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on') | |
108 | DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off') | |
109 | ||
110 | EXIT_CODE_OK = 0 | |
111 | EXIT_CODE_ERROR = 1 | |
112 | EXIT_CODE_EXISTS_DIFF = 2 | |
113 | EXIT_CODE_ARGPARSE_ERROR = 99 | |
114 | ||
115 | # For generating line shortening candidates. | |
116 | SHORTEN_OPERATOR_GROUPS = frozenset([ | |
117 | frozenset([',']), | |
118 | frozenset(['%']), | |
119 | frozenset([',', '(', '[', '{']), | |
120 | frozenset(['%', '(', '[', '{']), | |
121 | frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), | |
122 | frozenset(['%', '+', '-', '*', '/', '//']), | |
123 | ]) | |
124 | ||
125 | ||
126 | DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE | |
127 | DEFAULT_INDENT_SIZE = 4 | |
128 | # these fixes conflict with each other, if the `--ignore` setting causes both | |
129 | # to be enabled, disable both of them | |
130 | CONFLICTING_CODES = ('W503', 'W504') | |
131 | ||
132 | # W602 is handled separately due to the need to avoid "with_traceback". | |
133 | CODE_TO_2TO3 = { | |
134 | 'E231': ['ws_comma'], | |
135 | 'E721': ['idioms'], | |
136 | 'W690': ['apply', | |
137 | 'except', | |
138 | 'exitfunc', | |
139 | 'numliterals', | |
140 | 'operator', | |
141 | 'paren', | |
142 | 'reduce', | |
143 | 'renames', | |
144 | 'standarderror', | |
145 | 'sys_exc', | |
146 | 'throw', | |
147 | 'tuple_params', | |
148 | 'xreadlines']} | |
149 | ||
150 | ||
151 | if sys.platform == 'win32': # pragma: no cover | |
152 | DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle') | |
153 | else: | |
154 | DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or | |
155 | os.path.expanduser('~/.config'), | |
156 | 'pycodestyle') | |
157 | # fallback, use .pep8 | |
158 | if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover | |
159 | if sys.platform == 'win32': | |
160 | DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') | |
161 | else: | |
162 | DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8') | |
163 | PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8') | |
164 | ||
165 | ||
166 | MAX_PYTHON_FILE_DETECTION_BYTES = 1024 | |
167 | ||
168 | ||
169 | def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1): | |
170 | """Return opened file with a specific encoding.""" | |
171 | if not encoding: | |
172 | encoding = detect_encoding(filename, limit_byte_check=limit_byte_check) | |
173 | ||
174 | return io.open(filename, mode=mode, encoding=encoding, | |
175 | newline='') # Preserve line endings | |
176 | ||
177 | ||
178 | def detect_encoding(filename, limit_byte_check=-1): | |
179 | """Return file encoding.""" | |
180 | try: | |
181 | with open(filename, 'rb') as input_file: | |
182 | from lib2to3.pgen2 import tokenize as lib2to3_tokenize | |
183 | encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] | |
184 | ||
185 | with open_with_encoding(filename, encoding=encoding) as test_file: | |
186 | test_file.read(limit_byte_check) | |
187 | ||
188 | return encoding | |
189 | except (LookupError, SyntaxError, UnicodeDecodeError): | |
190 | return 'latin-1' | |
191 | ||
192 | ||
193 | def readlines_from_file(filename): | |
194 | """Return contents of file.""" | |
195 | with open_with_encoding(filename) as input_file: | |
196 | return input_file.readlines() | |
197 | ||
198 | ||
199 | def extended_blank_lines(logical_line, | |
200 | blank_lines, | |
201 | blank_before, | |
202 | indent_level, | |
203 | previous_logical): | |
204 | """Check for missing blank lines after class declaration.""" | |
205 | if previous_logical.startswith('def '): | |
206 | if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line): | |
207 | yield (0, 'E303 too many blank lines ({})'.format(blank_lines)) | |
208 | elif pycodestyle.DOCSTRING_REGEX.match(previous_logical): | |
209 | # Missing blank line between class docstring and method declaration. | |
210 | if ( | |
211 | indent_level and | |
212 | not blank_lines and | |
213 | not blank_before and | |
214 | logical_line.startswith(('def ')) and | |
215 | '(self' in logical_line | |
216 | ): | |
217 | yield (0, 'E301 expected 1 blank line, found 0') | |
218 | ||
219 | ||
220 | pycodestyle.register_check(extended_blank_lines) | |
221 | ||
222 | ||
223 | def continued_indentation(logical_line, tokens, indent_level, hang_closing, | |
224 | indent_char, noqa): | |
225 | """Override pycodestyle's function to provide indentation information.""" | |
226 | first_row = tokens[0][2][0] | |
227 | nrows = 1 + tokens[-1][2][0] - first_row | |
228 | if noqa or nrows == 1: | |
229 | return | |
230 | ||
231 | # indent_next tells us whether the next block is indented. Assuming | |
232 | # that it is indented by 4 spaces, then we should not allow 4-space | |
233 | # indents on the final continuation line. In turn, some other | |
234 | # indents are allowed to have an extra 4 spaces. | |
235 | indent_next = logical_line.endswith(':') | |
236 | ||
237 | row = depth = 0 | |
238 | valid_hangs = ( | |
239 | (DEFAULT_INDENT_SIZE,) | |
240 | if indent_char != '\t' else (DEFAULT_INDENT_SIZE, | |
241 | 2 * DEFAULT_INDENT_SIZE) | |
242 | ) | |
243 | ||
244 | # Remember how many brackets were opened on each line. | |
245 | parens = [0] * nrows | |
246 | ||
247 | # Relative indents of physical lines. | |
248 | rel_indent = [0] * nrows | |
249 | ||
250 | # For each depth, collect a list of opening rows. | |
251 | open_rows = [[0]] | |
252 | # For each depth, memorize the hanging indentation. | |
253 | hangs = [None] | |
254 | ||
255 | # Visual indents. | |
256 | indent_chances = {} | |
257 | last_indent = tokens[0][2] | |
258 | indent = [last_indent[1]] | |
259 | ||
260 | last_token_multiline = None | |
261 | line = None | |
262 | last_line = '' | |
263 | last_line_begins_with_multiline = False | |
264 | for token_type, text, start, end, line in tokens: | |
265 | ||
266 | newline = row < start[0] - first_row | |
267 | if newline: | |
268 | row = start[0] - first_row | |
269 | newline = (not last_token_multiline and | |
270 | token_type not in (tokenize.NL, tokenize.NEWLINE)) | |
271 | last_line_begins_with_multiline = last_token_multiline | |
272 | ||
273 | if newline: | |
274 | # This is the beginning of a continuation line. | |
275 | last_indent = start | |
276 | ||
277 | # Record the initial indent. | |
278 | rel_indent[row] = pycodestyle.expand_indent(line) - indent_level | |
279 | ||
280 | # Identify closing bracket. | |
281 | close_bracket = (token_type == tokenize.OP and text in ']})') | |
282 | ||
283 | # Is the indent relative to an opening bracket line? | |
284 | for open_row in reversed(open_rows[depth]): | |
285 | hang = rel_indent[row] - rel_indent[open_row] | |
286 | hanging_indent = hang in valid_hangs | |
287 | if hanging_indent: | |
288 | break | |
289 | if hangs[depth]: | |
290 | hanging_indent = (hang == hangs[depth]) | |
291 | ||
292 | visual_indent = (not close_bracket and hang > 0 and | |
293 | indent_chances.get(start[1])) | |
294 | ||
295 | if close_bracket and indent[depth]: | |
296 | # Closing bracket for visual indent. | |
297 | if start[1] != indent[depth]: | |
298 | yield (start, 'E124 {}'.format(indent[depth])) | |
299 | elif close_bracket and not hang: | |
300 | # closing bracket matches indentation of opening bracket's line | |
301 | if hang_closing: | |
302 | yield (start, 'E133 {}'.format(indent[depth])) | |
303 | elif indent[depth] and start[1] < indent[depth]: | |
304 | if visual_indent is not True: | |
305 | # Visual indent is broken. | |
306 | yield (start, 'E128 {}'.format(indent[depth])) | |
307 | elif (hanging_indent or | |
308 | (indent_next and | |
309 | rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)): | |
310 | # Hanging indent is verified. | |
311 | if close_bracket and not hang_closing: | |
312 | yield (start, 'E123 {}'.format(indent_level + | |
313 | rel_indent[open_row])) | |
314 | hangs[depth] = hang | |
315 | elif visual_indent is True: | |
316 | # Visual indent is verified. | |
317 | indent[depth] = start[1] | |
318 | elif visual_indent in (text, str): | |
319 | # Ignore token lined up with matching one from a previous line. | |
320 | pass | |
321 | else: | |
322 | one_indented = (indent_level + rel_indent[open_row] + | |
323 | DEFAULT_INDENT_SIZE) | |
324 | # Indent is broken. | |
325 | if hang <= 0: | |
326 | error = ('E122', one_indented) | |
327 | elif indent[depth]: | |
328 | error = ('E127', indent[depth]) | |
329 | elif not close_bracket and hangs[depth]: | |
330 | error = ('E131', one_indented) | |
331 | elif hang > DEFAULT_INDENT_SIZE: | |
332 | error = ('E126', one_indented) | |
333 | else: | |
334 | hangs[depth] = hang | |
335 | error = ('E121', one_indented) | |
336 | ||
337 | yield (start, '{} {}'.format(*error)) | |
338 | ||
339 | # Look for visual indenting. | |
340 | if ( | |
341 | parens[row] and | |
342 | token_type not in (tokenize.NL, tokenize.COMMENT) and | |
343 | not indent[depth] | |
344 | ): | |
345 | indent[depth] = start[1] | |
346 | indent_chances[start[1]] = True | |
347 | # Deal with implicit string concatenation. | |
348 | elif (token_type in (tokenize.STRING, tokenize.COMMENT) or | |
349 | text in ('u', 'ur', 'b', 'br')): | |
350 | indent_chances[start[1]] = str | |
351 | # Special case for the "if" statement because len("if (") is equal to | |
352 | # 4. | |
353 | elif not indent_chances and not row and not depth and text == 'if': | |
354 | indent_chances[end[1] + 1] = True | |
355 | elif text == ':' and line[end[1]:].isspace(): | |
356 | open_rows[depth].append(row) | |
357 | ||
358 | # Keep track of bracket depth. | |
359 | if token_type == tokenize.OP: | |
360 | if text in '([{': | |
361 | depth += 1 | |
362 | indent.append(0) | |
363 | hangs.append(None) | |
364 | if len(open_rows) == depth: | |
365 | open_rows.append([]) | |
366 | open_rows[depth].append(row) | |
367 | parens[row] += 1 | |
368 | elif text in ')]}' and depth > 0: | |
369 | # Parent indents should not be more than this one. | |
370 | prev_indent = indent.pop() or last_indent[1] | |
371 | hangs.pop() | |
372 | for d in range(depth): | |
373 | if indent[d] > prev_indent: | |
374 | indent[d] = 0 | |
375 | for ind in list(indent_chances): | |
376 | if ind >= prev_indent: | |
377 | del indent_chances[ind] | |
378 | del open_rows[depth + 1:] | |
379 | depth -= 1 | |
380 | if depth: | |
381 | indent_chances[indent[depth]] = True | |
382 | for idx in range(row, -1, -1): | |
383 | if parens[idx]: | |
384 | parens[idx] -= 1 | |
385 | break | |
386 | assert len(indent) == depth + 1 | |
387 | if ( | |
388 | start[1] not in indent_chances and | |
389 | # This is for purposes of speeding up E121 (GitHub #90). | |
390 | not last_line.rstrip().endswith(',') | |
391 | ): | |
392 | # Allow to line up tokens. | |
393 | indent_chances[start[1]] = text | |
394 | ||
395 | last_token_multiline = (start[0] != end[0]) | |
396 | if last_token_multiline: | |
397 | rel_indent[end[0] - first_row] = rel_indent[row] | |
398 | ||
399 | last_line = line | |
400 | ||
401 | if ( | |
402 | indent_next and | |
403 | not last_line_begins_with_multiline and | |
404 | pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE | |
405 | ): | |
406 | pos = (start[0], indent[0] + 4) | |
407 | desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE | |
408 | if visual_indent: | |
409 | yield (pos, 'E129 {}'.format(desired_indent)) | |
410 | else: | |
411 | yield (pos, 'E125 {}'.format(desired_indent)) | |
412 | ||
413 | ||
414 | del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation] | |
415 | pycodestyle.register_check(continued_indentation) | |
416 | ||
417 | ||
418 | class FixPEP8(object): | |
419 | ||
420 | """Fix invalid code. | |
421 | ||
422 | Fixer methods are prefixed "fix_". The _fix_source() method looks for these | |
423 | automatically. | |
424 | ||
425 | The fixer method can take either one or two arguments (in addition to | |
426 | self). The first argument is "result", which is the error information from | |
427 | pycodestyle. The second argument, "logical", is required only for | |
428 | logical-line fixes. | |
429 | ||
430 | The fixer method can return the list of modified lines or None. An empty | |
431 | list would mean that no changes were made. None would mean that only the | |
432 | line reported in the pycodestyle error was modified. Note that the modified | |
433 | line numbers that are returned are indexed at 1. This typically would | |
434 | correspond with the line number reported in the pycodestyle error | |
435 | information. | |
436 | ||
437 | [fixed method list] | |
438 | - e111,e114,e115,e116 | |
439 | - e121,e122,e123,e124,e125,e126,e127,e128,e129 | |
440 | - e201,e202,e203 | |
441 | - e211 | |
442 | - e221,e222,e223,e224,e225 | |
443 | - e231 | |
444 | - e251,e252 | |
445 | - e261,e262 | |
446 | - e271,e272,e273,e274,e275 | |
447 | - e301,e302,e303,e304,e305,e306 | |
448 | - e401,e402 | |
449 | - e502 | |
450 | - e701,e702,e703,e704 | |
451 | - e711,e712,e713,e714 | |
452 | - e722 | |
453 | - e731 | |
454 | - w291 | |
455 | - w503,504 | |
456 | ||
457 | """ | |
458 | ||
459 | def __init__(self, filename, | |
460 | options, | |
461 | contents=None, | |
462 | long_line_ignore_cache=None): | |
463 | self.filename = filename | |
464 | if contents is None: | |
465 | self.source = readlines_from_file(filename) | |
466 | else: | |
467 | sio = io.StringIO(contents) | |
468 | self.source = sio.readlines() | |
469 | self.options = options | |
470 | self.indent_word = _get_indentword(''.join(self.source)) | |
471 | ||
472 | # collect imports line | |
473 | self.imports = {} | |
474 | for i, line in enumerate(self.source): | |
475 | if (line.find("import ") == 0 or line.find("from ") == 0) and \ | |
476 | line not in self.imports: | |
477 | # collect only import statements that first appeared | |
478 | self.imports[line] = i | |
479 | ||
480 | self.long_line_ignore_cache = ( | |
481 | set() if long_line_ignore_cache is None | |
482 | else long_line_ignore_cache) | |
483 | ||
484 | # Many fixers are the same even though pycodestyle categorizes them | |
485 | # differently. | |
486 | self.fix_e115 = self.fix_e112 | |
487 | self.fix_e121 = self._fix_reindent | |
488 | self.fix_e122 = self._fix_reindent | |
489 | self.fix_e123 = self._fix_reindent | |
490 | self.fix_e124 = self._fix_reindent | |
491 | self.fix_e126 = self._fix_reindent | |
492 | self.fix_e127 = self._fix_reindent | |
493 | self.fix_e128 = self._fix_reindent | |
494 | self.fix_e129 = self._fix_reindent | |
495 | self.fix_e133 = self.fix_e131 | |
496 | self.fix_e202 = self.fix_e201 | |
497 | self.fix_e203 = self.fix_e201 | |
498 | self.fix_e211 = self.fix_e201 | |
499 | self.fix_e221 = self.fix_e271 | |
500 | self.fix_e222 = self.fix_e271 | |
501 | self.fix_e223 = self.fix_e271 | |
502 | self.fix_e226 = self.fix_e225 | |
503 | self.fix_e227 = self.fix_e225 | |
504 | self.fix_e228 = self.fix_e225 | |
505 | self.fix_e241 = self.fix_e271 | |
506 | self.fix_e242 = self.fix_e224 | |
507 | self.fix_e252 = self.fix_e225 | |
508 | self.fix_e261 = self.fix_e262 | |
509 | self.fix_e272 = self.fix_e271 | |
510 | self.fix_e273 = self.fix_e271 | |
511 | self.fix_e274 = self.fix_e271 | |
512 | self.fix_e275 = self.fix_e271 | |
513 | self.fix_e306 = self.fix_e301 | |
514 | self.fix_e501 = ( | |
515 | self.fix_long_line_logically if | |
516 | options and (options.aggressive >= 2 or options.experimental) else | |
517 | self.fix_long_line_physically) | |
518 | self.fix_e703 = self.fix_e702 | |
519 | self.fix_w292 = self.fix_w291 | |
520 | self.fix_w293 = self.fix_w291 | |
521 | ||
522 | def _fix_source(self, results): | |
523 | try: | |
524 | (logical_start, logical_end) = _find_logical(self.source) | |
525 | logical_support = True | |
526 | except (SyntaxError, tokenize.TokenError): # pragma: no cover | |
527 | logical_support = False | |
528 | ||
529 | completed_lines = set() | |
530 | for result in sorted(results, key=_priority_key): | |
531 | if result['line'] in completed_lines: | |
532 | continue | |
533 | ||
534 | fixed_methodname = 'fix_' + result['id'].lower() | |
535 | if hasattr(self, fixed_methodname): | |
536 | fix = getattr(self, fixed_methodname) | |
537 | ||
538 | line_index = result['line'] - 1 | |
539 | original_line = self.source[line_index] | |
540 | ||
541 | is_logical_fix = len(_get_parameters(fix)) > 2 | |
542 | if is_logical_fix: | |
543 | logical = None | |
544 | if logical_support: | |
545 | logical = _get_logical(self.source, | |
546 | result, | |
547 | logical_start, | |
548 | logical_end) | |
549 | if logical and set(range( | |
550 | logical[0][0] + 1, | |
551 | logical[1][0] + 1)).intersection( | |
552 | completed_lines): | |
553 | continue | |
554 | ||
555 | modified_lines = fix(result, logical) | |
556 | else: | |
557 | modified_lines = fix(result) | |
558 | ||
559 | if modified_lines is None: | |
560 | # Force logical fixes to report what they modified. | |
561 | assert not is_logical_fix | |
562 | ||
563 | if self.source[line_index] == original_line: | |
564 | modified_lines = [] | |
565 | ||
566 | if modified_lines: | |
567 | completed_lines.update(modified_lines) | |
568 | elif modified_lines == []: # Empty list means no fix | |
569 | if self.options.verbose >= 2: | |
570 | print( | |
571 | '---> Not fixing {error} on line {line}'.format( | |
572 | error=result['id'], line=result['line']), | |
573 | file=sys.stderr) | |
574 | else: # We assume one-line fix when None. | |
575 | completed_lines.add(result['line']) | |
576 | else: | |
577 | if self.options.verbose >= 3: | |
578 | print( | |
579 | "---> '{}' is not defined.".format(fixed_methodname), | |
580 | file=sys.stderr) | |
581 | ||
582 | info = result['info'].strip() | |
583 | print('---> {}:{}:{}:{}'.format(self.filename, | |
584 | result['line'], | |
585 | result['column'], | |
586 | info), | |
587 | file=sys.stderr) | |
588 | ||
589 | def fix(self): | |
590 | """Return a version of the source code with PEP 8 violations fixed.""" | |
591 | pep8_options = { | |
592 | 'ignore': self.options.ignore, | |
593 | 'select': self.options.select, | |
594 | 'max_line_length': self.options.max_line_length, | |
595 | 'hang_closing': self.options.hang_closing, | |
596 | } | |
597 | results = _execute_pep8(pep8_options, self.source) | |
598 | ||
599 | if self.options.verbose: | |
600 | progress = {} | |
601 | for r in results: | |
602 | if r['id'] not in progress: | |
603 | progress[r['id']] = set() | |
604 | progress[r['id']].add(r['line']) | |
605 | print('---> {n} issue(s) to fix {progress}'.format( | |
606 | n=len(results), progress=progress), file=sys.stderr) | |
607 | ||
608 | if self.options.line_range: | |
609 | start, end = self.options.line_range | |
610 | results = [r for r in results | |
611 | if start <= r['line'] <= end] | |
612 | ||
613 | self._fix_source(filter_results(source=''.join(self.source), | |
614 | results=results, | |
615 | aggressive=self.options.aggressive)) | |
616 | ||
617 | if self.options.line_range: | |
618 | # If number of lines has changed then change line_range. | |
619 | count = sum(sline.count('\n') | |
620 | for sline in self.source[start - 1:end]) | |
621 | self.options.line_range[1] = start + count - 1 | |
622 | ||
623 | return ''.join(self.source) | |
624 | ||
625 | def _fix_reindent(self, result): | |
626 | """Fix a badly indented line. | |
627 | ||
628 | This is done by adding or removing from its initial indent only. | |
629 | ||
630 | """ | |
631 | num_indent_spaces = int(result['info'].split()[1]) | |
632 | line_index = result['line'] - 1 | |
633 | target = self.source[line_index] | |
634 | ||
635 | self.source[line_index] = ' ' * num_indent_spaces + target.lstrip() | |
636 | ||
637 | def fix_e112(self, result): | |
638 | """Fix under-indented comments.""" | |
639 | line_index = result['line'] - 1 | |
640 | target = self.source[line_index] | |
641 | ||
642 | if not target.lstrip().startswith('#'): | |
643 | # Don't screw with invalid syntax. | |
644 | return [] | |
645 | ||
646 | self.source[line_index] = self.indent_word + target | |
647 | ||
648 | def fix_e113(self, result): | |
649 | """Fix unexpected indentation.""" | |
650 | line_index = result['line'] - 1 | |
651 | target = self.source[line_index] | |
652 | indent = _get_indentation(target) | |
653 | stripped = target.lstrip() | |
654 | self.source[line_index] = indent[1:] + stripped | |
655 | ||
656 | def fix_e116(self, result): | |
657 | """Fix over-indented comments.""" | |
658 | line_index = result['line'] - 1 | |
659 | target = self.source[line_index] | |
660 | ||
661 | indent = _get_indentation(target) | |
662 | stripped = target.lstrip() | |
663 | ||
664 | if not stripped.startswith('#'): | |
665 | # Don't screw with invalid syntax. | |
666 | return [] | |
667 | ||
668 | self.source[line_index] = indent[1:] + stripped | |
669 | ||
670 | def fix_e117(self, result): | |
671 | """Fix over-indented.""" | |
672 | line_index = result['line'] - 1 | |
673 | target = self.source[line_index] | |
674 | ||
675 | indent = _get_indentation(target) | |
676 | if indent == '\t': | |
677 | return [] | |
678 | ||
679 | stripped = target.lstrip() | |
680 | ||
681 | self.source[line_index] = indent[1:] + stripped | |
682 | ||
683 | def fix_e125(self, result): | |
684 | """Fix indentation undistinguish from the next logical line.""" | |
685 | num_indent_spaces = int(result['info'].split()[1]) | |
686 | line_index = result['line'] - 1 | |
687 | target = self.source[line_index] | |
688 | ||
689 | spaces_to_add = num_indent_spaces - len(_get_indentation(target)) | |
690 | indent = len(_get_indentation(target)) | |
691 | modified_lines = [] | |
692 | ||
693 | while len(_get_indentation(self.source[line_index])) >= indent: | |
694 | self.source[line_index] = (' ' * spaces_to_add + | |
695 | self.source[line_index]) | |
696 | modified_lines.append(1 + line_index) # Line indexed at 1. | |
697 | line_index -= 1 | |
698 | ||
699 | return modified_lines | |
700 | ||
701 | def fix_e131(self, result): | |
702 | """Fix indentation undistinguish from the next logical line.""" | |
703 | num_indent_spaces = int(result['info'].split()[1]) | |
704 | line_index = result['line'] - 1 | |
705 | target = self.source[line_index] | |
706 | ||
707 | spaces_to_add = num_indent_spaces - len(_get_indentation(target)) | |
708 | ||
709 | indent_length = len(_get_indentation(target)) | |
710 | spaces_to_add = num_indent_spaces - indent_length | |
711 | if num_indent_spaces == 0 and indent_length == 0: | |
712 | spaces_to_add = 4 | |
713 | ||
714 | if spaces_to_add >= 0: | |
715 | self.source[line_index] = (' ' * spaces_to_add + | |
716 | self.source[line_index]) | |
717 | else: | |
718 | offset = abs(spaces_to_add) | |
719 | self.source[line_index] = self.source[line_index][offset:] | |
720 | ||
721 | def fix_e201(self, result): | |
722 | """Remove extraneous whitespace.""" | |
723 | line_index = result['line'] - 1 | |
724 | target = self.source[line_index] | |
725 | offset = result['column'] - 1 | |
726 | ||
727 | fixed = fix_whitespace(target, | |
728 | offset=offset, | |
729 | replacement='') | |
730 | ||
731 | self.source[line_index] = fixed | |
732 | ||
733 | def fix_e224(self, result): | |
734 | """Remove extraneous whitespace around operator.""" | |
735 | target = self.source[result['line'] - 1] | |
736 | offset = result['column'] - 1 | |
737 | fixed = target[:offset] + target[offset:].replace('\t', ' ') | |
738 | self.source[result['line'] - 1] = fixed | |
739 | ||
740 | def fix_e225(self, result): | |
741 | """Fix missing whitespace around operator.""" | |
742 | target = self.source[result['line'] - 1] | |
743 | offset = result['column'] - 1 | |
744 | fixed = target[:offset] + ' ' + target[offset:] | |
745 | ||
746 | # Only proceed if non-whitespace characters match. | |
747 | # And make sure we don't break the indentation. | |
748 | if ( | |
749 | fixed.replace(' ', '') == target.replace(' ', '') and | |
750 | _get_indentation(fixed) == _get_indentation(target) | |
751 | ): | |
752 | self.source[result['line'] - 1] = fixed | |
753 | error_code = result.get('id', 0) | |
754 | try: | |
755 | ts = generate_tokens(fixed) | |
756 | except (SyntaxError, tokenize.TokenError): | |
757 | return | |
758 | if not check_syntax(fixed.lstrip()): | |
759 | return | |
760 | try: | |
761 | _missing_whitespace = ( | |
762 | pycodestyle.missing_whitespace_around_operator | |
763 | ) | |
764 | except AttributeError: | |
765 | # pycodestyle >= 2.11.0 | |
766 | _missing_whitespace = pycodestyle.missing_whitespace | |
767 | errors = list(_missing_whitespace(fixed, ts)) | |
768 | for e in reversed(errors): | |
769 | if error_code != e[1].split()[0]: | |
770 | continue | |
771 | offset = e[0][1] | |
772 | fixed = fixed[:offset] + ' ' + fixed[offset:] | |
773 | self.source[result['line'] - 1] = fixed | |
774 | else: | |
775 | return [] | |
776 | ||
777 | def fix_e231(self, result): | |
778 | """Add missing whitespace.""" | |
779 | line_index = result['line'] - 1 | |
780 | target = self.source[line_index] | |
781 | offset = result['column'] | |
782 | fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip() | |
783 | self.source[line_index] = fixed | |
784 | ||
785 | def fix_e251(self, result): | |
786 | """Remove whitespace around parameter '=' sign.""" | |
787 | line_index = result['line'] - 1 | |
788 | target = self.source[line_index] | |
789 | ||
790 | # This is necessary since pycodestyle sometimes reports columns that | |
791 | # goes past the end of the physical line. This happens in cases like, | |
792 | # foo(bar\n=None) | |
793 | c = min(result['column'] - 1, | |
794 | len(target) - 1) | |
795 | ||
796 | if target[c].strip(): | |
797 | fixed = target | |
798 | else: | |
799 | fixed = target[:c].rstrip() + target[c:].lstrip() | |
800 | ||
801 | # There could be an escaped newline | |
802 | # | |
803 | # def foo(a=\ | |
804 | # 1) | |
805 | if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')): | |
806 | self.source[line_index] = fixed.rstrip('\n\r \t\\') | |
807 | self.source[line_index + 1] = self.source[line_index + 1].lstrip() | |
808 | return [line_index + 1, line_index + 2] # Line indexed at 1 | |
809 | ||
810 | self.source[result['line'] - 1] = fixed | |
811 | ||
812 | def fix_e262(self, result): | |
813 | """Fix spacing after inline comment hash.""" | |
814 | target = self.source[result['line'] - 1] | |
815 | offset = result['column'] | |
816 | ||
817 | code = target[:offset].rstrip(' \t#') | |
818 | comment = target[offset:].lstrip(' \t#') | |
819 | ||
820 | fixed = code + (' # ' + comment if comment.strip() else '\n') | |
821 | ||
822 | self.source[result['line'] - 1] = fixed | |
823 | ||
824 | def fix_e265(self, result): | |
825 | """Fix spacing after block comment hash.""" | |
826 | target = self.source[result['line'] - 1] | |
827 | ||
828 | indent = _get_indentation(target) | |
829 | line = target.lstrip(' \t') | |
830 | pos = next((index for index, c in enumerate(line) if c != '#')) | |
831 | hashes = line[:pos] | |
832 | comment = line[pos:].lstrip(' \t') | |
833 | ||
834 | # Ignore special comments, even in the middle of the file. | |
835 | if comment.startswith('!'): | |
836 | return | |
837 | ||
838 | fixed = indent + hashes + (' ' + comment if comment.strip() else '\n') | |
839 | ||
840 | self.source[result['line'] - 1] = fixed | |
841 | ||
842 | def fix_e266(self, result): | |
843 | """Fix too many block comment hashes.""" | |
844 | target = self.source[result['line'] - 1] | |
845 | ||
846 | # Leave stylistic outlined blocks alone. | |
847 | if target.strip().endswith('#'): | |
848 | return | |
849 | ||
850 | indentation = _get_indentation(target) | |
851 | fixed = indentation + '# ' + target.lstrip('# \t') | |
852 | ||
853 | self.source[result['line'] - 1] = fixed | |
854 | ||
855 | def fix_e271(self, result): | |
856 | """Fix extraneous whitespace around keywords.""" | |
857 | line_index = result['line'] - 1 | |
858 | target = self.source[line_index] | |
859 | offset = result['column'] - 1 | |
860 | ||
861 | fixed = fix_whitespace(target, | |
862 | offset=offset, | |
863 | replacement=' ') | |
864 | ||
865 | if fixed == target: | |
866 | return [] | |
867 | else: | |
868 | self.source[line_index] = fixed | |
869 | ||
870 | def fix_e301(self, result): | |
871 | """Add missing blank line.""" | |
872 | cr = '\n' | |
873 | self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] | |
874 | ||
875 | def fix_e302(self, result): | |
876 | """Add missing 2 blank lines.""" | |
877 | add_linenum = 2 - int(result['info'].split()[-1]) | |
878 | offset = 1 | |
879 | if self.source[result['line'] - 2].strip() == "\\": | |
880 | offset = 2 | |
881 | cr = '\n' * add_linenum | |
882 | self.source[result['line'] - offset] = ( | |
883 | cr + self.source[result['line'] - offset] | |
884 | ) | |
885 | ||
886 | def fix_e303(self, result): | |
887 | """Remove extra blank lines.""" | |
888 | delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2 | |
889 | delete_linenum = max(1, delete_linenum) | |
890 | ||
891 | # We need to count because pycodestyle reports an offset line number if | |
892 | # there are comments. | |
893 | cnt = 0 | |
894 | line = result['line'] - 2 | |
895 | modified_lines = [] | |
896 | while cnt < delete_linenum and line >= 0: | |
897 | if not self.source[line].strip(): | |
898 | self.source[line] = '' | |
899 | modified_lines.append(1 + line) # Line indexed at 1 | |
900 | cnt += 1 | |
901 | line -= 1 | |
902 | ||
903 | return modified_lines | |
904 | ||
905 | def fix_e304(self, result): | |
906 | """Remove blank line following function decorator.""" | |
907 | line = result['line'] - 2 | |
908 | if not self.source[line].strip(): | |
909 | self.source[line] = '' | |
910 | ||
911 | def fix_e305(self, result): | |
912 | """Add missing 2 blank lines after end of function or class.""" | |
913 | add_delete_linenum = 2 - int(result['info'].split()[-1]) | |
914 | cnt = 0 | |
915 | offset = result['line'] - 2 | |
916 | modified_lines = [] | |
917 | if add_delete_linenum < 0: | |
918 | # delete cr | |
919 | add_delete_linenum = abs(add_delete_linenum) | |
920 | while cnt < add_delete_linenum and offset >= 0: | |
921 | if not self.source[offset].strip(): | |
922 | self.source[offset] = '' | |
923 | modified_lines.append(1 + offset) # Line indexed at 1 | |
924 | cnt += 1 | |
925 | offset -= 1 | |
926 | else: | |
927 | # add cr | |
928 | cr = '\n' | |
929 | # check comment line | |
930 | while True: | |
931 | if offset < 0: | |
932 | break | |
933 | line = self.source[offset].lstrip() | |
934 | if not line: | |
935 | break | |
936 | if line[0] != '#': | |
937 | break | |
938 | offset -= 1 | |
939 | offset += 1 | |
940 | self.source[offset] = cr + self.source[offset] | |
941 | modified_lines.append(1 + offset) # Line indexed at 1. | |
942 | return modified_lines | |
943 | ||
944 | def fix_e401(self, result): | |
945 | """Put imports on separate lines.""" | |
946 | line_index = result['line'] - 1 | |
947 | target = self.source[line_index] | |
948 | offset = result['column'] - 1 | |
949 | ||
950 | if not target.lstrip().startswith('import'): | |
951 | return [] | |
952 | ||
953 | indentation = re.split(pattern=r'\bimport\b', | |
954 | string=target, maxsplit=1)[0] | |
955 | fixed = (target[:offset].rstrip('\t ,') + '\n' + | |
956 | indentation + 'import ' + target[offset:].lstrip('\t ,')) | |
957 | self.source[line_index] = fixed | |
958 | ||
959 | def fix_e402(self, result): | |
960 | (line_index, offset, target) = get_index_offset_contents(result, | |
961 | self.source) | |
962 | for i in range(1, 100): | |
963 | line = "".join(self.source[line_index:line_index+i]) | |
964 | try: | |
965 | generate_tokens("".join(line)) | |
966 | except (SyntaxError, tokenize.TokenError): | |
967 | continue | |
968 | break | |
969 | if not (target in self.imports and self.imports[target] != line_index): | |
970 | mod_offset = get_module_imports_on_top_of_file(self.source, | |
971 | line_index) | |
972 | self.source[mod_offset] = line + self.source[mod_offset] | |
973 | for offset in range(i): | |
974 | self.source[line_index+offset] = '' | |
975 | ||
976 | def fix_long_line_logically(self, result, logical): | |
977 | """Try to make lines fit within --max-line-length characters.""" | |
978 | if ( | |
979 | not logical or | |
980 | len(logical[2]) == 1 or | |
981 | self.source[result['line'] - 1].lstrip().startswith('#') | |
982 | ): | |
983 | return self.fix_long_line_physically(result) | |
984 | ||
985 | start_line_index = logical[0][0] | |
986 | end_line_index = logical[1][0] | |
987 | logical_lines = logical[2] | |
988 | ||
989 | previous_line = get_item(self.source, start_line_index - 1, default='') | |
990 | next_line = get_item(self.source, end_line_index + 1, default='') | |
991 | ||
992 | single_line = join_logical_line(''.join(logical_lines)) | |
993 | ||
994 | try: | |
995 | fixed = self.fix_long_line( | |
996 | target=single_line, | |
997 | previous_line=previous_line, | |
998 | next_line=next_line, | |
999 | original=''.join(logical_lines)) | |
1000 | except (SyntaxError, tokenize.TokenError): | |
1001 | return self.fix_long_line_physically(result) | |
1002 | ||
1003 | if fixed: | |
1004 | for line_index in range(start_line_index, end_line_index + 1): | |
1005 | self.source[line_index] = '' | |
1006 | self.source[start_line_index] = fixed | |
1007 | return range(start_line_index + 1, end_line_index + 1) | |
1008 | ||
1009 | return [] | |
1010 | ||
1011 | def fix_long_line_physically(self, result): | |
1012 | """Try to make lines fit within --max-line-length characters.""" | |
1013 | line_index = result['line'] - 1 | |
1014 | target = self.source[line_index] | |
1015 | ||
1016 | previous_line = get_item(self.source, line_index - 1, default='') | |
1017 | next_line = get_item(self.source, line_index + 1, default='') | |
1018 | ||
1019 | try: | |
1020 | fixed = self.fix_long_line( | |
1021 | target=target, | |
1022 | previous_line=previous_line, | |
1023 | next_line=next_line, | |
1024 | original=target) | |
1025 | except (SyntaxError, tokenize.TokenError): | |
1026 | return [] | |
1027 | ||
1028 | if fixed: | |
1029 | self.source[line_index] = fixed | |
1030 | return [line_index + 1] | |
1031 | ||
1032 | return [] | |
1033 | ||
1034 | def fix_long_line(self, target, previous_line, | |
1035 | next_line, original): | |
1036 | cache_entry = (target, previous_line, next_line) | |
1037 | if cache_entry in self.long_line_ignore_cache: | |
1038 | return [] | |
1039 | ||
1040 | if target.lstrip().startswith('#'): | |
1041 | if self.options.aggressive: | |
1042 | # Wrap commented lines. | |
1043 | return shorten_comment( | |
1044 | line=target, | |
1045 | max_line_length=self.options.max_line_length, | |
1046 | last_comment=not next_line.lstrip().startswith('#')) | |
1047 | return [] | |
1048 | ||
1049 | fixed = get_fixed_long_line( | |
1050 | target=target, | |
1051 | previous_line=previous_line, | |
1052 | original=original, | |
1053 | indent_word=self.indent_word, | |
1054 | max_line_length=self.options.max_line_length, | |
1055 | aggressive=self.options.aggressive, | |
1056 | experimental=self.options.experimental, | |
1057 | verbose=self.options.verbose) | |
1058 | ||
1059 | if fixed and not code_almost_equal(original, fixed): | |
1060 | return fixed | |
1061 | ||
1062 | self.long_line_ignore_cache.add(cache_entry) | |
1063 | return None | |
1064 | ||
1065 | def fix_e502(self, result): | |
1066 | """Remove extraneous escape of newline.""" | |
1067 | (line_index, _, target) = get_index_offset_contents(result, | |
1068 | self.source) | |
1069 | self.source[line_index] = target.rstrip('\n\r \t\\') + '\n' | |
1070 | ||
1071 | def fix_e701(self, result): | |
1072 | """Put colon-separated compound statement on separate lines.""" | |
1073 | line_index = result['line'] - 1 | |
1074 | target = self.source[line_index] | |
1075 | c = result['column'] | |
1076 | ||
1077 | fixed_source = (target[:c] + '\n' + | |
1078 | _get_indentation(target) + self.indent_word + | |
1079 | target[c:].lstrip('\n\r \t\\')) | |
1080 | self.source[result['line'] - 1] = fixed_source | |
1081 | return [result['line'], result['line'] + 1] | |
1082 | ||
1083 | def fix_e702(self, result, logical): | |
1084 | """Put semicolon-separated compound statement on separate lines.""" | |
1085 | if not logical: | |
1086 | return [] # pragma: no cover | |
1087 | logical_lines = logical[2] | |
1088 | ||
1089 | # Avoid applying this when indented. | |
1090 | # https://docs.python.org/reference/compound_stmts.html | |
1091 | for line in logical_lines: | |
1092 | if (result['id'] == 'E702' and ':' in line | |
1093 | and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)): | |
1094 | if self.options.verbose: | |
1095 | print( | |
1096 | '---> avoid fixing {error} with ' | |
1097 | 'other compound statements'.format(error=result['id']), | |
1098 | file=sys.stderr | |
1099 | ) | |
1100 | return [] | |
1101 | ||
1102 | line_index = result['line'] - 1 | |
1103 | target = self.source[line_index] | |
1104 | ||
1105 | if target.rstrip().endswith('\\'): | |
1106 | # Normalize '1; \\\n2' into '1; 2'. | |
1107 | self.source[line_index] = target.rstrip('\n \r\t\\') | |
1108 | self.source[line_index + 1] = self.source[line_index + 1].lstrip() | |
1109 | return [line_index + 1, line_index + 2] | |
1110 | ||
1111 | if target.rstrip().endswith(';'): | |
1112 | self.source[line_index] = target.rstrip('\n \r\t;') + '\n' | |
1113 | return [line_index + 1] | |
1114 | ||
1115 | offset = result['column'] - 1 | |
1116 | first = target[:offset].rstrip(';').rstrip() | |
1117 | second = (_get_indentation(logical_lines[0]) + | |
1118 | target[offset:].lstrip(';').lstrip()) | |
1119 | ||
1120 | # Find inline comment. | |
1121 | inline_comment = None | |
1122 | if target[offset:].lstrip(';').lstrip()[:2] == '# ': | |
1123 | inline_comment = target[offset:].lstrip(';') | |
1124 | ||
1125 | if inline_comment: | |
1126 | self.source[line_index] = first + inline_comment | |
1127 | else: | |
1128 | self.source[line_index] = first + '\n' + second | |
1129 | return [line_index + 1] | |
1130 | ||
1131 | def fix_e704(self, result): | |
1132 | """Fix multiple statements on one line def""" | |
1133 | (line_index, _, target) = get_index_offset_contents(result, | |
1134 | self.source) | |
1135 | match = STARTSWITH_DEF_REGEX.match(target) | |
1136 | if match: | |
1137 | self.source[line_index] = '{}\n{}{}'.format( | |
1138 | match.group(0), | |
1139 | _get_indentation(target) + self.indent_word, | |
1140 | target[match.end(0):].lstrip()) | |
1141 | ||
1142 | def fix_e711(self, result): | |
1143 | """Fix comparison with None.""" | |
1144 | (line_index, offset, target) = get_index_offset_contents(result, | |
1145 | self.source) | |
1146 | ||
1147 | right_offset = offset + 2 | |
1148 | if right_offset >= len(target): | |
1149 | return [] | |
1150 | ||
1151 | left = target[:offset].rstrip() | |
1152 | center = target[offset:right_offset] | |
1153 | right = target[right_offset:].lstrip() | |
1154 | ||
1155 | if center.strip() == '==': | |
1156 | new_center = 'is' | |
1157 | elif center.strip() == '!=': | |
1158 | new_center = 'is not' | |
1159 | else: | |
1160 | return [] | |
1161 | ||
1162 | self.source[line_index] = ' '.join([left, new_center, right]) | |
1163 | ||
1164 | def fix_e712(self, result): | |
1165 | """Fix (trivial case of) comparison with boolean.""" | |
1166 | (line_index, offset, target) = get_index_offset_contents(result, | |
1167 | self.source) | |
1168 | ||
1169 | # Handle very easy "not" special cases. | |
1170 | if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target): | |
1171 | self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:', | |
1172 | r'if not \1:', target, count=1) | |
1173 | elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target): | |
1174 | self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:', | |
1175 | r'if not \1:', target, count=1) | |
1176 | else: | |
1177 | right_offset = offset + 2 | |
1178 | if right_offset >= len(target): | |
1179 | return [] | |
1180 | ||
1181 | left = target[:offset].rstrip() | |
1182 | center = target[offset:right_offset] | |
1183 | right = target[right_offset:].lstrip() | |
1184 | ||
1185 | # Handle simple cases only. | |
1186 | new_right = None | |
1187 | if center.strip() == '==': | |
1188 | if re.match(r'\bTrue\b', right): | |
1189 | new_right = re.sub(r'\bTrue\b *', '', right, count=1) | |
1190 | elif center.strip() == '!=': | |
1191 | if re.match(r'\bFalse\b', right): | |
1192 | new_right = re.sub(r'\bFalse\b *', '', right, count=1) | |
1193 | ||
1194 | if new_right is None: | |
1195 | return [] | |
1196 | ||
1197 | if new_right[0].isalnum(): | |
1198 | new_right = ' ' + new_right | |
1199 | ||
1200 | self.source[line_index] = left + new_right | |
1201 | ||
1202 | def fix_e713(self, result): | |
1203 | """Fix (trivial case of) non-membership check.""" | |
1204 | (line_index, offset, target) = get_index_offset_contents(result, | |
1205 | self.source) | |
1206 | ||
1207 | # to convert once 'not in' -> 'in' | |
1208 | before_target = target[:offset] | |
1209 | target = target[offset:] | |
1210 | match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target) | |
1211 | notin_pos_start, notin_pos_end = 0, 0 | |
1212 | if match_notin: | |
1213 | notin_pos_start = match_notin.start(1) | |
1214 | notin_pos_end = match_notin.end() | |
1215 | target = '{}{} {}'.format( | |
1216 | target[:notin_pos_start], 'in', target[notin_pos_end:]) | |
1217 | ||
1218 | # fix 'not in' | |
1219 | match = COMPARE_NEGATIVE_REGEX.search(target) | |
1220 | if match: | |
1221 | if match.group(3) == 'in': | |
1222 | pos_start = match.start(1) | |
1223 | new_target = '{5}{0}{1} {2} {3} {4}'.format( | |
1224 | target[:pos_start], match.group(2), match.group(1), | |
1225 | match.group(3), target[match.end():], before_target) | |
1226 | if match_notin: | |
1227 | # revert 'in' -> 'not in' | |
1228 | pos_start = notin_pos_start + offset | |
1229 | pos_end = notin_pos_end + offset - 4 # len('not ') | |
1230 | new_target = '{}{} {}'.format( | |
1231 | new_target[:pos_start], 'not in', new_target[pos_end:]) | |
1232 | self.source[line_index] = new_target | |
1233 | ||
1234 | def fix_e714(self, result): | |
1235 | """Fix object identity should be 'is not' case.""" | |
1236 | (line_index, offset, target) = get_index_offset_contents(result, | |
1237 | self.source) | |
1238 | ||
1239 | # to convert once 'is not' -> 'is' | |
1240 | before_target = target[:offset] | |
1241 | target = target[offset:] | |
1242 | match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target) | |
1243 | isnot_pos_start, isnot_pos_end = 0, 0 | |
1244 | if match_isnot: | |
1245 | isnot_pos_start = match_isnot.start(1) | |
1246 | isnot_pos_end = match_isnot.end() | |
1247 | target = '{}{} {}'.format( | |
1248 | target[:isnot_pos_start], 'in', target[isnot_pos_end:]) | |
1249 | ||
1250 | match = COMPARE_NEGATIVE_REGEX.search(target) | |
1251 | if match: | |
1252 | if match.group(3).startswith('is'): | |
1253 | pos_start = match.start(1) | |
1254 | new_target = '{5}{0}{1} {2} {3} {4}'.format( | |
1255 | target[:pos_start], match.group(2), match.group(3), | |
1256 | match.group(1), target[match.end():], before_target) | |
1257 | if match_isnot: | |
1258 | # revert 'is' -> 'is not' | |
1259 | pos_start = isnot_pos_start + offset | |
1260 | pos_end = isnot_pos_end + offset - 4 # len('not ') | |
1261 | new_target = '{}{} {}'.format( | |
1262 | new_target[:pos_start], 'is not', new_target[pos_end:]) | |
1263 | self.source[line_index] = new_target | |
1264 | ||
1265 | def fix_e722(self, result): | |
1266 | """fix bare except""" | |
1267 | (line_index, _, target) = get_index_offset_contents(result, | |
1268 | self.source) | |
1269 | match = BARE_EXCEPT_REGEX.search(target) | |
1270 | if match: | |
1271 | self.source[line_index] = '{}{}{}'.format( | |
1272 | target[:result['column'] - 1], "except BaseException:", | |
1273 | target[match.end():]) | |
1274 | ||
1275 | def fix_e731(self, result): | |
1276 | """Fix do not assign a lambda expression check.""" | |
1277 | (line_index, _, target) = get_index_offset_contents(result, | |
1278 | self.source) | |
1279 | match = LAMBDA_REGEX.search(target) | |
1280 | if match: | |
1281 | end = match.end() | |
1282 | self.source[line_index] = '{}def {}({}): return {}'.format( | |
1283 | target[:match.start(0)], match.group(1), match.group(2), | |
1284 | target[end:].lstrip()) | |
1285 | ||
1286 | def fix_w291(self, result): | |
1287 | """Remove trailing whitespace.""" | |
1288 | fixed_line = self.source[result['line'] - 1].rstrip() | |
1289 | self.source[result['line'] - 1] = fixed_line + '\n' | |
1290 | ||
1291 | def fix_w391(self, _): | |
1292 | """Remove trailing blank lines.""" | |
1293 | blank_count = 0 | |
1294 | for line in reversed(self.source): | |
1295 | line = line.rstrip() | |
1296 | if line: | |
1297 | break | |
1298 | else: | |
1299 | blank_count += 1 | |
1300 | ||
1301 | original_length = len(self.source) | |
1302 | self.source = self.source[:original_length - blank_count] | |
1303 | return range(1, 1 + original_length) | |
1304 | ||
1305 | def fix_w503(self, result): | |
1306 | (line_index, _, target) = get_index_offset_contents(result, | |
1307 | self.source) | |
1308 | one_string_token = target.split()[0] | |
1309 | try: | |
1310 | ts = generate_tokens(one_string_token) | |
1311 | except (SyntaxError, tokenize.TokenError): | |
1312 | return | |
1313 | if not _is_binary_operator(ts[0][0], one_string_token): | |
1314 | return | |
1315 | # find comment | |
1316 | comment_index = 0 | |
1317 | found_not_comment_only_line = False | |
1318 | comment_only_linenum = 0 | |
1319 | for i in range(5): | |
1320 | # NOTE: try to parse code in 5 times | |
1321 | if (line_index - i) < 0: | |
1322 | break | |
1323 | from_index = line_index - i - 1 | |
1324 | if from_index < 0 or len(self.source) <= from_index: | |
1325 | break | |
1326 | to_index = line_index + 1 | |
1327 | strip_line = self.source[from_index].lstrip() | |
1328 | if ( | |
1329 | not found_not_comment_only_line and | |
1330 | strip_line and strip_line[0] == '#' | |
1331 | ): | |
1332 | comment_only_linenum += 1 | |
1333 | continue | |
1334 | found_not_comment_only_line = True | |
1335 | try: | |
1336 | ts = generate_tokens("".join(self.source[from_index:to_index])) | |
1337 | except (SyntaxError, tokenize.TokenError): | |
1338 | continue | |
1339 | newline_count = 0 | |
1340 | newline_index = [] | |
1341 | for index, t in enumerate(ts): | |
1342 | if t[0] in (tokenize.NEWLINE, tokenize.NL): | |
1343 | newline_index.append(index) | |
1344 | newline_count += 1 | |
1345 | if newline_count > 2: | |
1346 | tts = ts[newline_index[-3]:] | |
1347 | else: | |
1348 | tts = ts | |
1349 | old = [] | |
1350 | for t in tts: | |
1351 | if t[0] in (tokenize.NEWLINE, tokenize.NL): | |
1352 | newline_count -= 1 | |
1353 | if newline_count <= 1: | |
1354 | break | |
1355 | if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL: | |
1356 | comment_index = old[3][1] | |
1357 | break | |
1358 | old = t | |
1359 | break | |
1360 | i = target.index(one_string_token) | |
1361 | fix_target_line = line_index - 1 - comment_only_linenum | |
1362 | self.source[line_index] = '{}{}'.format( | |
1363 | target[:i], target[i + len(one_string_token):].lstrip()) | |
1364 | nl = find_newline(self.source[fix_target_line:line_index]) | |
1365 | before_line = self.source[fix_target_line] | |
1366 | bl = before_line.index(nl) | |
1367 | if comment_index: | |
1368 | self.source[fix_target_line] = '{} {} {}'.format( | |
1369 | before_line[:comment_index], one_string_token, | |
1370 | before_line[comment_index + 1:]) | |
1371 | else: | |
1372 | if before_line[:bl].endswith("#"): | |
1373 | # special case | |
1374 | # see: https://github.com/hhatto/autopep8/issues/503 | |
1375 | self.source[fix_target_line] = '{}{} {}'.format( | |
1376 | before_line[:bl-2], one_string_token, before_line[bl-2:]) | |
1377 | else: | |
1378 | self.source[fix_target_line] = '{} {}{}'.format( | |
1379 | before_line[:bl], one_string_token, before_line[bl:]) | |
1380 | ||
1381 | def fix_w504(self, result): | |
1382 | (line_index, _, target) = get_index_offset_contents(result, | |
1383 | self.source) | |
1384 | # NOTE: is not collect pointed out in pycodestyle==2.4.0 | |
1385 | comment_index = 0 | |
1386 | operator_position = None # (start_position, end_position) | |
1387 | for i in range(1, 6): | |
1388 | to_index = line_index + i | |
1389 | try: | |
1390 | ts = generate_tokens("".join(self.source[line_index:to_index])) | |
1391 | except (SyntaxError, tokenize.TokenError): | |
1392 | continue | |
1393 | newline_count = 0 | |
1394 | newline_index = [] | |
1395 | for index, t in enumerate(ts): | |
1396 | if _is_binary_operator(t[0], t[1]): | |
1397 | if t[2][0] == 1 and t[3][0] == 1: | |
1398 | operator_position = (t[2][1], t[3][1]) | |
1399 | elif t[0] == tokenize.NAME and t[1] in ("and", "or"): | |
1400 | if t[2][0] == 1 and t[3][0] == 1: | |
1401 | operator_position = (t[2][1], t[3][1]) | |
1402 | elif t[0] in (tokenize.NEWLINE, tokenize.NL): | |
1403 | newline_index.append(index) | |
1404 | newline_count += 1 | |
1405 | if newline_count > 2: | |
1406 | tts = ts[:newline_index[-3]] | |
1407 | else: | |
1408 | tts = ts | |
1409 | old = [] | |
1410 | for t in tts: | |
1411 | if tokenize.COMMENT == t[0] and old: | |
1412 | comment_row, comment_index = old[3] | |
1413 | break | |
1414 | old = t | |
1415 | break | |
1416 | if not operator_position: | |
1417 | return | |
1418 | target_operator = target[operator_position[0]:operator_position[1]] | |
1419 | ||
1420 | if comment_index and comment_row == 1: | |
1421 | self.source[line_index] = '{}{}'.format( | |
1422 | target[:operator_position[0]].rstrip(), | |
1423 | target[comment_index:]) | |
1424 | else: | |
1425 | self.source[line_index] = '{}{}{}'.format( | |
1426 | target[:operator_position[0]].rstrip(), | |
1427 | target[operator_position[1]:].lstrip(), | |
1428 | target[operator_position[1]:]) | |
1429 | ||
1430 | next_line = self.source[line_index + 1] | |
1431 | next_line_indent = 0 | |
1432 | m = re.match(r'\s*', next_line) | |
1433 | if m: | |
1434 | next_line_indent = m.span()[1] | |
1435 | self.source[line_index + 1] = '{}{} {}'.format( | |
1436 | next_line[:next_line_indent], target_operator, | |
1437 | next_line[next_line_indent:]) | |
1438 | ||
1439 | def fix_w605(self, result): | |
1440 | (line_index, offset, target) = get_index_offset_contents(result, | |
1441 | self.source) | |
1442 | self.source[line_index] = '{}\\{}'.format( | |
1443 | target[:offset + 1], target[offset + 1:]) | |
1444 | ||
1445 | ||
1446 | def get_module_imports_on_top_of_file(source, import_line_index): | |
1447 | """return import or from keyword position | |
1448 | ||
1449 | example: | |
1450 | > 0: import sys | |
1451 | 1: import os | |
1452 | 2: | |
1453 | 3: def function(): | |
1454 | """ | |
1455 | def is_string_literal(line): | |
1456 | if line[0] in 'uUbB': | |
1457 | line = line[1:] | |
1458 | if line and line[0] in 'rR': | |
1459 | line = line[1:] | |
1460 | return line and (line[0] == '"' or line[0] == "'") | |
1461 | ||
1462 | def is_future_import(line): | |
1463 | nodes = ast.parse(line) | |
1464 | for n in nodes.body: | |
1465 | if isinstance(n, ast.ImportFrom) and n.module == '__future__': | |
1466 | return True | |
1467 | return False | |
1468 | ||
1469 | def has_future_import(source): | |
1470 | offset = 0 | |
1471 | line = '' | |
1472 | for _, next_line in source: | |
1473 | for line_part in next_line.strip().splitlines(True): | |
1474 | line = line + line_part | |
1475 | try: | |
1476 | return is_future_import(line), offset | |
1477 | except SyntaxError: | |
1478 | continue | |
1479 | offset += 1 | |
1480 | return False, offset | |
1481 | ||
1482 | allowed_try_keywords = ('try', 'except', 'else', 'finally') | |
1483 | in_docstring = False | |
1484 | docstring_kind = '"""' | |
1485 | source_stream = iter(enumerate(source)) | |
1486 | for cnt, line in source_stream: | |
1487 | if not in_docstring: | |
1488 | m = DOCSTRING_START_REGEX.match(line.lstrip()) | |
1489 | if m is not None: | |
1490 | in_docstring = True | |
1491 | docstring_kind = m.group('kind') | |
1492 | remain = line[m.end(): m.endpos].rstrip() | |
1493 | if remain[-3:] == docstring_kind: # one line doc | |
1494 | in_docstring = False | |
1495 | continue | |
1496 | if in_docstring: | |
1497 | if line.rstrip()[-3:] == docstring_kind: | |
1498 | in_docstring = False | |
1499 | continue | |
1500 | ||
1501 | if not line.rstrip(): | |
1502 | continue | |
1503 | elif line.startswith('#'): | |
1504 | continue | |
1505 | ||
1506 | if line.startswith('import '): | |
1507 | if cnt == import_line_index: | |
1508 | continue | |
1509 | return cnt | |
1510 | elif line.startswith('from '): | |
1511 | if cnt == import_line_index: | |
1512 | continue | |
1513 | hit, offset = has_future_import( | |
1514 | itertools.chain([(cnt, line)], source_stream) | |
1515 | ) | |
1516 | if hit: | |
1517 | # move to the back | |
1518 | return cnt + offset + 1 | |
1519 | return cnt | |
1520 | elif pycodestyle.DUNDER_REGEX.match(line): | |
1521 | return cnt | |
1522 | elif any(line.startswith(kw) for kw in allowed_try_keywords): | |
1523 | continue | |
1524 | elif is_string_literal(line): | |
1525 | return cnt | |
1526 | else: | |
1527 | return cnt | |
1528 | return 0 | |
1529 | ||
1530 | ||
1531 | def get_index_offset_contents(result, source): | |
1532 | """Return (line_index, column_offset, line_contents).""" | |
1533 | line_index = result['line'] - 1 | |
1534 | return (line_index, | |
1535 | result['column'] - 1, | |
1536 | source[line_index]) | |
1537 | ||
1538 | ||
1539 | def get_fixed_long_line(target, previous_line, original, | |
1540 | indent_word=' ', max_line_length=79, | |
1541 | aggressive=False, experimental=False, verbose=False): | |
1542 | """Break up long line and return result. | |
1543 | ||
1544 | Do this by generating multiple reformatted candidates and then | |
1545 | ranking the candidates to heuristically select the best option. | |
1546 | ||
1547 | """ | |
1548 | indent = _get_indentation(target) | |
1549 | source = target[len(indent):] | |
1550 | assert source.lstrip() == source | |
1551 | assert not target.lstrip().startswith('#') | |
1552 | ||
1553 | # Check for partial multiline. | |
1554 | tokens = list(generate_tokens(source)) | |
1555 | ||
1556 | candidates = shorten_line( | |
1557 | tokens, source, indent, | |
1558 | indent_word, | |
1559 | max_line_length, | |
1560 | aggressive=aggressive, | |
1561 | experimental=experimental, | |
1562 | previous_line=previous_line) | |
1563 | ||
1564 | # Also sort alphabetically as a tie breaker (for determinism). | |
1565 | candidates = sorted( | |
1566 | sorted(set(candidates).union([target, original])), | |
1567 | key=lambda x: line_shortening_rank( | |
1568 | x, | |
1569 | indent_word, | |
1570 | max_line_length, | |
1571 | experimental=experimental)) | |
1572 | ||
1573 | if verbose >= 4: | |
1574 | print(('-' * 79 + '\n').join([''] + candidates + ['']), | |
1575 | file=wrap_output(sys.stderr, 'utf-8')) | |
1576 | ||
1577 | if candidates: | |
1578 | best_candidate = candidates[0] | |
1579 | ||
1580 | # Don't allow things to get longer. | |
1581 | if longest_line_length(best_candidate) > longest_line_length(original): | |
1582 | return None | |
1583 | ||
1584 | return best_candidate | |
1585 | ||
1586 | ||
1587 | def longest_line_length(code): | |
1588 | """Return length of longest line.""" | |
1589 | if len(code) == 0: | |
1590 | return 0 | |
1591 | return max(len(line) for line in code.splitlines()) | |
1592 | ||
1593 | ||
1594 | def join_logical_line(logical_line): | |
1595 | """Return single line based on logical line input.""" | |
1596 | indentation = _get_indentation(logical_line) | |
1597 | ||
1598 | return indentation + untokenize_without_newlines( | |
1599 | generate_tokens(logical_line.lstrip())) + '\n' | |
1600 | ||
1601 | ||
1602 | def untokenize_without_newlines(tokens): | |
1603 | """Return source code based on tokens.""" | |
1604 | text = '' | |
1605 | last_row = 0 | |
1606 | last_column = -1 | |
1607 | ||
1608 | for t in tokens: | |
1609 | token_string = t[1] | |
1610 | (start_row, start_column) = t[2] | |
1611 | (end_row, end_column) = t[3] | |
1612 | ||
1613 | if start_row > last_row: | |
1614 | last_column = 0 | |
1615 | if ( | |
1616 | (start_column > last_column or token_string == '\n') and | |
1617 | not text.endswith(' ') | |
1618 | ): | |
1619 | text += ' ' | |
1620 | ||
1621 | if token_string != '\n': | |
1622 | text += token_string | |
1623 | ||
1624 | last_row = end_row | |
1625 | last_column = end_column | |
1626 | ||
1627 | return text.rstrip() | |
1628 | ||
1629 | ||
1630 | def _find_logical(source_lines): | |
1631 | # Make a variable which is the index of all the starts of lines. | |
1632 | logical_start = [] | |
1633 | logical_end = [] | |
1634 | last_newline = True | |
1635 | parens = 0 | |
1636 | for t in generate_tokens(''.join(source_lines)): | |
1637 | if t[0] in [tokenize.COMMENT, tokenize.DEDENT, | |
1638 | tokenize.INDENT, tokenize.NL, | |
1639 | tokenize.ENDMARKER]: | |
1640 | continue | |
1641 | if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: | |
1642 | last_newline = True | |
1643 | logical_end.append((t[3][0] - 1, t[2][1])) | |
1644 | continue | |
1645 | if last_newline and not parens: | |
1646 | logical_start.append((t[2][0] - 1, t[2][1])) | |
1647 | last_newline = False | |
1648 | if t[0] == tokenize.OP: | |
1649 | if t[1] in '([{': | |
1650 | parens += 1 | |
1651 | elif t[1] in '}])': | |
1652 | parens -= 1 | |
1653 | return (logical_start, logical_end) | |
1654 | ||
1655 | ||
1656 | def _get_logical(source_lines, result, logical_start, logical_end): | |
1657 | """Return the logical line corresponding to the result. | |
1658 | ||
1659 | Assumes input is already E702-clean. | |
1660 | ||
1661 | """ | |
1662 | row = result['line'] - 1 | |
1663 | col = result['column'] - 1 | |
1664 | ls = None | |
1665 | le = None | |
1666 | for i in range(0, len(logical_start), 1): | |
1667 | assert logical_end | |
1668 | x = logical_end[i] | |
1669 | if x[0] > row or (x[0] == row and x[1] > col): | |
1670 | le = x | |
1671 | ls = logical_start[i] | |
1672 | break | |
1673 | if ls is None: | |
1674 | return None | |
1675 | original = source_lines[ls[0]:le[0] + 1] | |
1676 | return ls, le, original | |
1677 | ||
1678 | ||
1679 | def get_item(items, index, default=None): | |
1680 | if 0 <= index < len(items): | |
1681 | return items[index] | |
1682 | ||
1683 | return default | |
1684 | ||
1685 | ||
1686 | def reindent(source, indent_size, leave_tabs=False): | |
1687 | """Reindent all lines.""" | |
1688 | reindenter = Reindenter(source, leave_tabs) | |
1689 | return reindenter.run(indent_size) | |
1690 | ||
1691 | ||
1692 | def code_almost_equal(a, b): | |
1693 | """Return True if code is similar. | |
1694 | ||
1695 | Ignore whitespace when comparing specific line. | |
1696 | ||
1697 | """ | |
1698 | split_a = split_and_strip_non_empty_lines(a) | |
1699 | split_b = split_and_strip_non_empty_lines(b) | |
1700 | ||
1701 | if len(split_a) != len(split_b): | |
1702 | return False | |
1703 | ||
1704 | for (index, _) in enumerate(split_a): | |
1705 | if ''.join(split_a[index].split()) != ''.join(split_b[index].split()): | |
1706 | return False | |
1707 | ||
1708 | return True | |
1709 | ||
1710 | ||
1711 | def split_and_strip_non_empty_lines(text): | |
1712 | """Return lines split by newline. | |
1713 | ||
1714 | Ignore empty lines. | |
1715 | ||
1716 | """ | |
1717 | return [line.strip() for line in text.splitlines() if line.strip()] | |
1718 | ||
1719 | ||
1720 | def refactor(source, fixer_names, ignore=None, filename=''): | |
1721 | """Return refactored code using lib2to3. | |
1722 | ||
1723 | Skip if ignore string is produced in the refactored code. | |
1724 | ||
1725 | """ | |
1726 | not_found_end_of_file_newline = source and source.rstrip("\r\n") == source | |
1727 | if not_found_end_of_file_newline: | |
1728 | input_source = source + "\n" | |
1729 | else: | |
1730 | input_source = source | |
1731 | ||
1732 | from lib2to3 import pgen2 | |
1733 | try: | |
1734 | new_text = refactor_with_2to3(input_source, | |
1735 | fixer_names=fixer_names, | |
1736 | filename=filename) | |
1737 | except (pgen2.parse.ParseError, | |
1738 | SyntaxError, | |
1739 | UnicodeDecodeError, | |
1740 | UnicodeEncodeError): | |
1741 | return source | |
1742 | ||
1743 | if ignore: | |
1744 | if ignore in new_text and ignore not in source: | |
1745 | return source | |
1746 | ||
1747 | if not_found_end_of_file_newline: | |
1748 | return new_text.rstrip("\r\n") | |
1749 | ||
1750 | return new_text | |
1751 | ||
1752 | ||
1753 | def code_to_2to3(select, ignore, where='', verbose=False): | |
1754 | fixes = set() | |
1755 | for code, fix in CODE_TO_2TO3.items(): | |
1756 | if code_match(code, select=select, ignore=ignore): | |
1757 | if verbose: | |
1758 | print('---> Applying {} fix for {}'.format(where, | |
1759 | code.upper()), | |
1760 | file=sys.stderr) | |
1761 | fixes |= set(fix) | |
1762 | return fixes | |
1763 | ||
1764 | ||
1765 | def fix_2to3(source, | |
1766 | aggressive=True, select=None, ignore=None, filename='', | |
1767 | where='global', verbose=False): | |
1768 | """Fix various deprecated code (via lib2to3).""" | |
1769 | if not aggressive: | |
1770 | return source | |
1771 | ||
1772 | select = select or [] | |
1773 | ignore = ignore or [] | |
1774 | ||
1775 | return refactor(source, | |
1776 | code_to_2to3(select=select, | |
1777 | ignore=ignore, | |
1778 | where=where, | |
1779 | verbose=verbose), | |
1780 | filename=filename) | |
1781 | ||
1782 | ||
1783 | def find_newline(source): | |
1784 | """Return type of newline used in source. | |
1785 | ||
1786 | Input is a list of lines. | |
1787 | ||
1788 | """ | |
1789 | assert not isinstance(source, str) | |
1790 | ||
1791 | counter = collections.defaultdict(int) | |
1792 | for line in source: | |
1793 | if line.endswith(CRLF): | |
1794 | counter[CRLF] += 1 | |
1795 | elif line.endswith(CR): | |
1796 | counter[CR] += 1 | |
1797 | elif line.endswith(LF): | |
1798 | counter[LF] += 1 | |
1799 | ||
1800 | return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] | |
1801 | ||
1802 | ||
1803 | def _get_indentword(source): | |
1804 | """Return indentation type.""" | |
1805 | indent_word = ' ' # Default in case source has no indentation | |
1806 | try: | |
1807 | for t in generate_tokens(source): | |
1808 | if t[0] == token.INDENT: | |
1809 | indent_word = t[1] | |
1810 | break | |
1811 | except (SyntaxError, tokenize.TokenError): | |
1812 | pass | |
1813 | return indent_word | |
1814 | ||
1815 | ||
1816 | def _get_indentation(line): | |
1817 | """Return leading whitespace.""" | |
1818 | if line.strip(): | |
1819 | non_whitespace_index = len(line) - len(line.lstrip()) | |
1820 | return line[:non_whitespace_index] | |
1821 | ||
1822 | return '' | |
1823 | ||
1824 | ||
1825 | def get_diff_text(old, new, filename): | |
1826 | """Return text of unified diff between old and new.""" | |
1827 | newline = '\n' | |
1828 | diff = difflib.unified_diff( | |
1829 | old, new, | |
1830 | 'original/' + filename, | |
1831 | 'fixed/' + filename, | |
1832 | lineterm=newline) | |
1833 | ||
1834 | text = '' | |
1835 | for line in diff: | |
1836 | text += line | |
1837 | ||
1838 | # Work around missing newline (http://bugs.python.org/issue2142). | |
1839 | if text and not line.endswith(newline): | |
1840 | text += newline + r'\ No newline at end of file' + newline | |
1841 | ||
1842 | return text | |
1843 | ||
1844 | ||
1845 | def _priority_key(pep8_result): | |
1846 | """Key for sorting PEP8 results. | |
1847 | ||
1848 | Global fixes should be done first. This is important for things like | |
1849 | indentation. | |
1850 | ||
1851 | """ | |
1852 | priority = [ | |
1853 | # Fix multiline colon-based before semicolon based. | |
1854 | 'e701', | |
1855 | # Break multiline statements early. | |
1856 | 'e702', | |
1857 | # Things that make lines longer. | |
1858 | 'e225', 'e231', | |
1859 | # Remove extraneous whitespace before breaking lines. | |
1860 | 'e201', | |
1861 | # Shorten whitespace in comment before resorting to wrapping. | |
1862 | 'e262' | |
1863 | ] | |
1864 | middle_index = 10000 | |
1865 | lowest_priority = [ | |
1866 | # We need to shorten lines last since the logical fixer can get in a | |
1867 | # loop, which causes us to exit early. | |
1868 | 'e501', | |
1869 | ] | |
1870 | key = pep8_result['id'].lower() | |
1871 | try: | |
1872 | return priority.index(key) | |
1873 | except ValueError: | |
1874 | try: | |
1875 | return middle_index + lowest_priority.index(key) + 1 | |
1876 | except ValueError: | |
1877 | return middle_index | |
1878 | ||
1879 | ||
1880 | def shorten_line(tokens, source, indentation, indent_word, max_line_length, | |
1881 | aggressive=False, experimental=False, previous_line=''): | |
1882 | """Separate line at OPERATOR. | |
1883 | ||
1884 | Multiple candidates will be yielded. | |
1885 | ||
1886 | """ | |
1887 | for candidate in _shorten_line(tokens=tokens, | |
1888 | source=source, | |
1889 | indentation=indentation, | |
1890 | indent_word=indent_word, | |
1891 | aggressive=aggressive, | |
1892 | previous_line=previous_line): | |
1893 | yield candidate | |
1894 | ||
1895 | if aggressive: | |
1896 | for key_token_strings in SHORTEN_OPERATOR_GROUPS: | |
1897 | shortened = _shorten_line_at_tokens( | |
1898 | tokens=tokens, | |
1899 | source=source, | |
1900 | indentation=indentation, | |
1901 | indent_word=indent_word, | |
1902 | key_token_strings=key_token_strings, | |
1903 | aggressive=aggressive) | |
1904 | ||
1905 | if shortened is not None and shortened != source: | |
1906 | yield shortened | |
1907 | ||
1908 | if experimental: | |
1909 | for shortened in _shorten_line_at_tokens_new( | |
1910 | tokens=tokens, | |
1911 | source=source, | |
1912 | indentation=indentation, | |
1913 | max_line_length=max_line_length): | |
1914 | ||
1915 | yield shortened | |
1916 | ||
1917 | ||
1918 | def _shorten_line(tokens, source, indentation, indent_word, | |
1919 | aggressive=False, previous_line=''): | |
1920 | """Separate line at OPERATOR. | |
1921 | ||
1922 | The input is expected to be free of newlines except for inside multiline | |
1923 | strings and at the end. | |
1924 | ||
1925 | Multiple candidates will be yielded. | |
1926 | ||
1927 | """ | |
1928 | for (token_type, | |
1929 | token_string, | |
1930 | start_offset, | |
1931 | end_offset) in token_offsets(tokens): | |
1932 | ||
1933 | if ( | |
1934 | token_type == tokenize.COMMENT and | |
1935 | not is_probably_part_of_multiline(previous_line) and | |
1936 | not is_probably_part_of_multiline(source) and | |
1937 | not source[start_offset + 1:].strip().lower().startswith( | |
1938 | ('noqa', 'pragma:', 'pylint:')) | |
1939 | ): | |
1940 | # Move inline comments to previous line. | |
1941 | first = source[:start_offset] | |
1942 | second = source[start_offset:] | |
1943 | yield (indentation + second.strip() + '\n' + | |
1944 | indentation + first.strip() + '\n') | |
1945 | elif token_type == token.OP and token_string != '=': | |
1946 | # Don't break on '=' after keyword as this violates PEP 8. | |
1947 | ||
1948 | assert token_type != token.INDENT | |
1949 | ||
1950 | first = source[:end_offset] | |
1951 | ||
1952 | second_indent = indentation | |
1953 | if (first.rstrip().endswith('(') and | |
1954 | source[end_offset:].lstrip().startswith(')')): | |
1955 | pass | |
1956 | elif first.rstrip().endswith('('): | |
1957 | second_indent += indent_word | |
1958 | elif '(' in first: | |
1959 | second_indent += ' ' * (1 + first.find('(')) | |
1960 | else: | |
1961 | second_indent += indent_word | |
1962 | ||
1963 | second = (second_indent + source[end_offset:].lstrip()) | |
1964 | if ( | |
1965 | not second.strip() or | |
1966 | second.lstrip().startswith('#') | |
1967 | ): | |
1968 | continue | |
1969 | ||
1970 | # Do not begin a line with a comma | |
1971 | if second.lstrip().startswith(','): | |
1972 | continue | |
1973 | # Do end a line with a dot | |
1974 | if first.rstrip().endswith('.'): | |
1975 | continue | |
1976 | if token_string in '+-*/': | |
1977 | fixed = first + ' \\' + '\n' + second | |
1978 | else: | |
1979 | fixed = first + '\n' + second | |
1980 | ||
1981 | # Only fix if syntax is okay. | |
1982 | if check_syntax(normalize_multiline(fixed) | |
1983 | if aggressive else fixed): | |
1984 | yield indentation + fixed | |
1985 | ||
1986 | ||
1987 | def _is_binary_operator(token_type, text): | |
1988 | return ((token_type == tokenize.OP or text in ['and', 'or']) and | |
1989 | text not in '()[]{},:.;@=%~') | |
1990 | ||
1991 | ||
1992 | # A convenient way to handle tokens. | |
1993 | Token = collections.namedtuple('Token', ['token_type', 'token_string', | |
1994 | 'spos', 'epos', 'line']) | |
1995 | ||
1996 | ||
1997 | class ReformattedLines(object): | |
1998 | ||
1999 | """The reflowed lines of atoms. | |
2000 | ||
2001 | Each part of the line is represented as an "atom." They can be moved | |
2002 | around when need be to get the optimal formatting. | |
2003 | ||
2004 | """ | |
2005 | ||
2006 | ########################################################################### | |
2007 | # Private Classes | |
2008 | ||
2009 | class _Indent(object): | |
2010 | ||
2011 | """Represent an indentation in the atom stream.""" | |
2012 | ||
2013 | def __init__(self, indent_amt): | |
2014 | self._indent_amt = indent_amt | |
2015 | ||
2016 | def emit(self): | |
2017 | return ' ' * self._indent_amt | |
2018 | ||
2019 | @property | |
2020 | def size(self): | |
2021 | return self._indent_amt | |
2022 | ||
2023 | class _Space(object): | |
2024 | ||
2025 | """Represent a space in the atom stream.""" | |
2026 | ||
2027 | def emit(self): | |
2028 | return ' ' | |
2029 | ||
2030 | @property | |
2031 | def size(self): | |
2032 | return 1 | |
2033 | ||
2034 | class _LineBreak(object): | |
2035 | ||
2036 | """Represent a line break in the atom stream.""" | |
2037 | ||
2038 | def emit(self): | |
2039 | return '\n' | |
2040 | ||
2041 | @property | |
2042 | def size(self): | |
2043 | return 0 | |
2044 | ||
2045 | def __init__(self, max_line_length): | |
2046 | self._max_line_length = max_line_length | |
2047 | self._lines = [] | |
2048 | self._bracket_depth = 0 | |
2049 | self._prev_item = None | |
2050 | self._prev_prev_item = None | |
2051 | ||
2052 | def __repr__(self): | |
2053 | return self.emit() | |
2054 | ||
2055 | ########################################################################### | |
2056 | # Public Methods | |
2057 | ||
2058 | def add(self, obj, indent_amt, break_after_open_bracket): | |
2059 | if isinstance(obj, Atom): | |
2060 | self._add_item(obj, indent_amt) | |
2061 | return | |
2062 | ||
2063 | self._add_container(obj, indent_amt, break_after_open_bracket) | |
2064 | ||
2065 | def add_comment(self, item): | |
2066 | num_spaces = 2 | |
2067 | if len(self._lines) > 1: | |
2068 | if isinstance(self._lines[-1], self._Space): | |
2069 | num_spaces -= 1 | |
2070 | if len(self._lines) > 2: | |
2071 | if isinstance(self._lines[-2], self._Space): | |
2072 | num_spaces -= 1 | |
2073 | ||
2074 | while num_spaces > 0: | |
2075 | self._lines.append(self._Space()) | |
2076 | num_spaces -= 1 | |
2077 | self._lines.append(item) | |
2078 | ||
2079 | def add_indent(self, indent_amt): | |
2080 | self._lines.append(self._Indent(indent_amt)) | |
2081 | ||
2082 | def add_line_break(self, indent): | |
2083 | self._lines.append(self._LineBreak()) | |
2084 | self.add_indent(len(indent)) | |
2085 | ||
2086 | def add_line_break_at(self, index, indent_amt): | |
2087 | self._lines.insert(index, self._LineBreak()) | |
2088 | self._lines.insert(index + 1, self._Indent(indent_amt)) | |
2089 | ||
2090 | def add_space_if_needed(self, curr_text, equal=False): | |
2091 | if ( | |
2092 | not self._lines or isinstance( | |
2093 | self._lines[-1], (self._LineBreak, self._Indent, self._Space)) | |
2094 | ): | |
2095 | return | |
2096 | ||
2097 | prev_text = str(self._prev_item) | |
2098 | prev_prev_text = ( | |
2099 | str(self._prev_prev_item) if self._prev_prev_item else '') | |
2100 | ||
2101 | if ( | |
2102 | # The previous item was a keyword or identifier and the current | |
2103 | # item isn't an operator that doesn't require a space. | |
2104 | ((self._prev_item.is_keyword or self._prev_item.is_string or | |
2105 | self._prev_item.is_name or self._prev_item.is_number) and | |
2106 | (curr_text[0] not in '([{.,:}])' or | |
2107 | (curr_text[0] == '=' and equal))) or | |
2108 | ||
2109 | # Don't place spaces around a '.', unless it's in an 'import' | |
2110 | # statement. | |
2111 | ((prev_prev_text != 'from' and prev_text[-1] != '.' and | |
2112 | curr_text != 'import') and | |
2113 | ||
2114 | # Don't place a space before a colon. | |
2115 | curr_text[0] != ':' and | |
2116 | ||
2117 | # Don't split up ending brackets by spaces. | |
2118 | ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or | |
2119 | ||
2120 | # Put a space after a colon or comma. | |
2121 | prev_text[-1] in ':,' or | |
2122 | ||
2123 | # Put space around '=' if asked to. | |
2124 | (equal and prev_text == '=') or | |
2125 | ||
2126 | # Put spaces around non-unary arithmetic operators. | |
2127 | ((self._prev_prev_item and | |
2128 | (prev_text not in '+-' and | |
2129 | (self._prev_prev_item.is_name or | |
2130 | self._prev_prev_item.is_number or | |
2131 | self._prev_prev_item.is_string)) and | |
2132 | prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in'))))) | |
2133 | ): | |
2134 | self._lines.append(self._Space()) | |
2135 | ||
2136 | def previous_item(self): | |
2137 | """Return the previous non-whitespace item.""" | |
2138 | return self._prev_item | |
2139 | ||
2140 | def fits_on_current_line(self, item_extent): | |
2141 | return self.current_size() + item_extent <= self._max_line_length | |
2142 | ||
2143 | def current_size(self): | |
2144 | """The size of the current line minus the indentation.""" | |
2145 | size = 0 | |
2146 | for item in reversed(self._lines): | |
2147 | size += item.size | |
2148 | if isinstance(item, self._LineBreak): | |
2149 | break | |
2150 | ||
2151 | return size | |
2152 | ||
2153 | def line_empty(self): | |
2154 | return (self._lines and | |
2155 | isinstance(self._lines[-1], | |
2156 | (self._LineBreak, self._Indent))) | |
2157 | ||
2158 | def emit(self): | |
2159 | string = '' | |
2160 | for item in self._lines: | |
2161 | if isinstance(item, self._LineBreak): | |
2162 | string = string.rstrip() | |
2163 | string += item.emit() | |
2164 | ||
2165 | return string.rstrip() + '\n' | |
2166 | ||
2167 | ########################################################################### | |
2168 | # Private Methods | |
2169 | ||
2170 | def _add_item(self, item, indent_amt): | |
2171 | """Add an item to the line. | |
2172 | ||
2173 | Reflow the line to get the best formatting after the item is | |
2174 | inserted. The bracket depth indicates if the item is being | |
2175 | inserted inside of a container or not. | |
2176 | ||
2177 | """ | |
2178 | if self._prev_item and self._prev_item.is_string and item.is_string: | |
2179 | # Place consecutive string literals on separate lines. | |
2180 | self._lines.append(self._LineBreak()) | |
2181 | self._lines.append(self._Indent(indent_amt)) | |
2182 | ||
2183 | item_text = str(item) | |
2184 | if self._lines and self._bracket_depth: | |
2185 | # Adding the item into a container. | |
2186 | self._prevent_default_initializer_splitting(item, indent_amt) | |
2187 | ||
2188 | if item_text in '.,)]}': | |
2189 | self._split_after_delimiter(item, indent_amt) | |
2190 | ||
2191 | elif self._lines and not self.line_empty(): | |
2192 | # Adding the item outside of a container. | |
2193 | if self.fits_on_current_line(len(item_text)): | |
2194 | self._enforce_space(item) | |
2195 | ||
2196 | else: | |
2197 | # Line break for the new item. | |
2198 | self._lines.append(self._LineBreak()) | |
2199 | self._lines.append(self._Indent(indent_amt)) | |
2200 | ||
2201 | self._lines.append(item) | |
2202 | self._prev_item, self._prev_prev_item = item, self._prev_item | |
2203 | ||
2204 | if item_text in '([{': | |
2205 | self._bracket_depth += 1 | |
2206 | ||
2207 | elif item_text in '}])': | |
2208 | self._bracket_depth -= 1 | |
2209 | assert self._bracket_depth >= 0 | |
2210 | ||
2211 | def _add_container(self, container, indent_amt, break_after_open_bracket): | |
2212 | actual_indent = indent_amt + 1 | |
2213 | ||
2214 | if ( | |
2215 | str(self._prev_item) != '=' and | |
2216 | not self.line_empty() and | |
2217 | not self.fits_on_current_line( | |
2218 | container.size + self._bracket_depth + 2) | |
2219 | ): | |
2220 | ||
2221 | if str(container)[0] == '(' and self._prev_item.is_name: | |
2222 | # Don't split before the opening bracket of a call. | |
2223 | break_after_open_bracket = True | |
2224 | actual_indent = indent_amt + 4 | |
2225 | elif ( | |
2226 | break_after_open_bracket or | |
2227 | str(self._prev_item) not in '([{' | |
2228 | ): | |
2229 | # If the container doesn't fit on the current line and the | |
2230 | # current line isn't empty, place the container on the next | |
2231 | # line. | |
2232 | self._lines.append(self._LineBreak()) | |
2233 | self._lines.append(self._Indent(indent_amt)) | |
2234 | break_after_open_bracket = False | |
2235 | else: | |
2236 | actual_indent = self.current_size() + 1 | |
2237 | break_after_open_bracket = False | |
2238 | ||
2239 | if isinstance(container, (ListComprehension, IfExpression)): | |
2240 | actual_indent = indent_amt | |
2241 | ||
2242 | # Increase the continued indentation only if recursing on a | |
2243 | # container. | |
2244 | container.reflow(self, ' ' * actual_indent, | |
2245 | break_after_open_bracket=break_after_open_bracket) | |
2246 | ||
2247 | def _prevent_default_initializer_splitting(self, item, indent_amt): | |
2248 | """Prevent splitting between a default initializer. | |
2249 | ||
2250 | When there is a default initializer, it's best to keep it all on | |
2251 | the same line. It's nicer and more readable, even if it goes | |
2252 | over the maximum allowable line length. This goes back along the | |
2253 | current line to determine if we have a default initializer, and, | |
2254 | if so, to remove extraneous whitespaces and add a line | |
2255 | break/indent before it if needed. | |
2256 | ||
2257 | """ | |
2258 | if str(item) == '=': | |
2259 | # This is the assignment in the initializer. Just remove spaces for | |
2260 | # now. | |
2261 | self._delete_whitespace() | |
2262 | return | |
2263 | ||
2264 | if (not self._prev_item or not self._prev_prev_item or | |
2265 | str(self._prev_item) != '='): | |
2266 | return | |
2267 | ||
2268 | self._delete_whitespace() | |
2269 | prev_prev_index = self._lines.index(self._prev_prev_item) | |
2270 | ||
2271 | if ( | |
2272 | isinstance(self._lines[prev_prev_index - 1], self._Indent) or | |
2273 | self.fits_on_current_line(item.size + 1) | |
2274 | ): | |
2275 | # The default initializer is already the only item on this line. | |
2276 | # Don't insert a newline here. | |
2277 | return | |
2278 | ||
2279 | # Replace the space with a newline/indent combo. | |
2280 | if isinstance(self._lines[prev_prev_index - 1], self._Space): | |
2281 | del self._lines[prev_prev_index - 1] | |
2282 | ||
2283 | self.add_line_break_at(self._lines.index(self._prev_prev_item), | |
2284 | indent_amt) | |
2285 | ||
2286 | def _split_after_delimiter(self, item, indent_amt): | |
2287 | """Split the line only after a delimiter.""" | |
2288 | self._delete_whitespace() | |
2289 | ||
2290 | if self.fits_on_current_line(item.size): | |
2291 | return | |
2292 | ||
2293 | last_space = None | |
2294 | for current_item in reversed(self._lines): | |
2295 | if ( | |
2296 | last_space and | |
2297 | (not isinstance(current_item, Atom) or | |
2298 | not current_item.is_colon) | |
2299 | ): | |
2300 | break | |
2301 | else: | |
2302 | last_space = None | |
2303 | if isinstance(current_item, self._Space): | |
2304 | last_space = current_item | |
2305 | if isinstance(current_item, (self._LineBreak, self._Indent)): | |
2306 | return | |
2307 | ||
2308 | if not last_space: | |
2309 | return | |
2310 | ||
2311 | self.add_line_break_at(self._lines.index(last_space), indent_amt) | |
2312 | ||
2313 | def _enforce_space(self, item): | |
2314 | """Enforce a space in certain situations. | |
2315 | ||
2316 | There are cases where we will want a space where normally we | |
2317 | wouldn't put one. This just enforces the addition of a space. | |
2318 | ||
2319 | """ | |
2320 | if isinstance(self._lines[-1], | |
2321 | (self._Space, self._LineBreak, self._Indent)): | |
2322 | return | |
2323 | ||
2324 | if not self._prev_item: | |
2325 | return | |
2326 | ||
2327 | item_text = str(item) | |
2328 | prev_text = str(self._prev_item) | |
2329 | ||
2330 | # Prefer a space around a '.' in an import statement, and between the | |
2331 | # 'import' and '('. | |
2332 | if ( | |
2333 | (item_text == '.' and prev_text == 'from') or | |
2334 | (item_text == 'import' and prev_text == '.') or | |
2335 | (item_text == '(' and prev_text == 'import') | |
2336 | ): | |
2337 | self._lines.append(self._Space()) | |
2338 | ||
2339 | def _delete_whitespace(self): | |
2340 | """Delete all whitespace from the end of the line.""" | |
2341 | while isinstance(self._lines[-1], (self._Space, self._LineBreak, | |
2342 | self._Indent)): | |
2343 | del self._lines[-1] | |
2344 | ||
2345 | ||
2346 | class Atom(object): | |
2347 | ||
2348 | """The smallest unbreakable unit that can be reflowed.""" | |
2349 | ||
2350 | def __init__(self, atom): | |
2351 | self._atom = atom | |
2352 | ||
2353 | def __repr__(self): | |
2354 | return self._atom.token_string | |
2355 | ||
2356 | def __len__(self): | |
2357 | return self.size | |
2358 | ||
2359 | def reflow( | |
2360 | self, reflowed_lines, continued_indent, extent, | |
2361 | break_after_open_bracket=False, | |
2362 | is_list_comp_or_if_expr=False, | |
2363 | next_is_dot=False | |
2364 | ): | |
2365 | if self._atom.token_type == tokenize.COMMENT: | |
2366 | reflowed_lines.add_comment(self) | |
2367 | return | |
2368 | ||
2369 | total_size = extent if extent else self.size | |
2370 | ||
2371 | if self._atom.token_string not in ',:([{}])': | |
2372 | # Some atoms will need an extra 1-sized space token after them. | |
2373 | total_size += 1 | |
2374 | ||
2375 | prev_item = reflowed_lines.previous_item() | |
2376 | if ( | |
2377 | not is_list_comp_or_if_expr and | |
2378 | not reflowed_lines.fits_on_current_line(total_size) and | |
2379 | not (next_is_dot and | |
2380 | reflowed_lines.fits_on_current_line(self.size + 1)) and | |
2381 | not reflowed_lines.line_empty() and | |
2382 | not self.is_colon and | |
2383 | not (prev_item and prev_item.is_name and | |
2384 | str(self) == '(') | |
2385 | ): | |
2386 | # Start a new line if there is already something on the line and | |
2387 | # adding this atom would make it go over the max line length. | |
2388 | reflowed_lines.add_line_break(continued_indent) | |
2389 | else: | |
2390 | reflowed_lines.add_space_if_needed(str(self)) | |
2391 | ||
2392 | reflowed_lines.add(self, len(continued_indent), | |
2393 | break_after_open_bracket) | |
2394 | ||
2395 | def emit(self): | |
2396 | return self.__repr__() | |
2397 | ||
2398 | @property | |
2399 | def is_keyword(self): | |
2400 | return keyword.iskeyword(self._atom.token_string) | |
2401 | ||
2402 | @property | |
2403 | def is_string(self): | |
2404 | return self._atom.token_type == tokenize.STRING | |
2405 | ||
2406 | @property | |
2407 | def is_name(self): | |
2408 | return self._atom.token_type == tokenize.NAME | |
2409 | ||
2410 | @property | |
2411 | def is_number(self): | |
2412 | return self._atom.token_type == tokenize.NUMBER | |
2413 | ||
2414 | @property | |
2415 | def is_comma(self): | |
2416 | return self._atom.token_string == ',' | |
2417 | ||
2418 | @property | |
2419 | def is_colon(self): | |
2420 | return self._atom.token_string == ':' | |
2421 | ||
2422 | @property | |
2423 | def size(self): | |
2424 | return len(self._atom.token_string) | |
2425 | ||
2426 | ||
2427 | class Container(object): | |
2428 | ||
2429 | """Base class for all container types.""" | |
2430 | ||
2431 | def __init__(self, items): | |
2432 | self._items = items | |
2433 | ||
2434 | def __repr__(self): | |
2435 | string = '' | |
2436 | last_was_keyword = False | |
2437 | ||
2438 | for item in self._items: | |
2439 | if item.is_comma: | |
2440 | string += ', ' | |
2441 | elif item.is_colon: | |
2442 | string += ': ' | |
2443 | else: | |
2444 | item_string = str(item) | |
2445 | if ( | |
2446 | string and | |
2447 | (last_was_keyword or | |
2448 | (not string.endswith(tuple('([{,.:}]) ')) and | |
2449 | not item_string.startswith(tuple('([{,.:}])')))) | |
2450 | ): | |
2451 | string += ' ' | |
2452 | string += item_string | |
2453 | ||
2454 | last_was_keyword = item.is_keyword | |
2455 | return string | |
2456 | ||
2457 | def __iter__(self): | |
2458 | for element in self._items: | |
2459 | yield element | |
2460 | ||
2461 | def __getitem__(self, idx): | |
2462 | return self._items[idx] | |
2463 | ||
2464 | def reflow(self, reflowed_lines, continued_indent, | |
2465 | break_after_open_bracket=False): | |
2466 | last_was_container = False | |
2467 | for (index, item) in enumerate(self._items): | |
2468 | next_item = get_item(self._items, index + 1) | |
2469 | ||
2470 | if isinstance(item, Atom): | |
2471 | is_list_comp_or_if_expr = ( | |
2472 | isinstance(self, (ListComprehension, IfExpression))) | |
2473 | item.reflow(reflowed_lines, continued_indent, | |
2474 | self._get_extent(index), | |
2475 | is_list_comp_or_if_expr=is_list_comp_or_if_expr, | |
2476 | next_is_dot=(next_item and | |
2477 | str(next_item) == '.')) | |
2478 | if last_was_container and item.is_comma: | |
2479 | reflowed_lines.add_line_break(continued_indent) | |
2480 | last_was_container = False | |
2481 | else: # isinstance(item, Container) | |
2482 | reflowed_lines.add(item, len(continued_indent), | |
2483 | break_after_open_bracket) | |
2484 | last_was_container = not isinstance(item, (ListComprehension, | |
2485 | IfExpression)) | |
2486 | ||
2487 | if ( | |
2488 | break_after_open_bracket and index == 0 and | |
2489 | # Prefer to keep empty containers together instead of | |
2490 | # separating them. | |
2491 | str(item) == self.open_bracket and | |
2492 | (not next_item or str(next_item) != self.close_bracket) and | |
2493 | (len(self._items) != 3 or not isinstance(next_item, Atom)) | |
2494 | ): | |
2495 | reflowed_lines.add_line_break(continued_indent) | |
2496 | break_after_open_bracket = False | |
2497 | else: | |
2498 | next_next_item = get_item(self._items, index + 2) | |
2499 | if ( | |
2500 | str(item) not in ['.', '%', 'in'] and | |
2501 | next_item and not isinstance(next_item, Container) and | |
2502 | str(next_item) != ':' and | |
2503 | next_next_item and (not isinstance(next_next_item, Atom) or | |
2504 | str(next_item) == 'not') and | |
2505 | not reflowed_lines.line_empty() and | |
2506 | not reflowed_lines.fits_on_current_line( | |
2507 | self._get_extent(index + 1) + 2) | |
2508 | ): | |
2509 | reflowed_lines.add_line_break(continued_indent) | |
2510 | ||
2511 | def _get_extent(self, index): | |
2512 | """The extent of the full element. | |
2513 | ||
2514 | E.g., the length of a function call or keyword. | |
2515 | ||
2516 | """ | |
2517 | extent = 0 | |
2518 | prev_item = get_item(self._items, index - 1) | |
2519 | seen_dot = prev_item and str(prev_item) == '.' | |
2520 | while index < len(self._items): | |
2521 | item = get_item(self._items, index) | |
2522 | index += 1 | |
2523 | ||
2524 | if isinstance(item, (ListComprehension, IfExpression)): | |
2525 | break | |
2526 | ||
2527 | if isinstance(item, Container): | |
2528 | if prev_item and prev_item.is_name: | |
2529 | if seen_dot: | |
2530 | extent += 1 | |
2531 | else: | |
2532 | extent += item.size | |
2533 | ||
2534 | prev_item = item | |
2535 | continue | |
2536 | elif (str(item) not in ['.', '=', ':', 'not'] and | |
2537 | not item.is_name and not item.is_string): | |
2538 | break | |
2539 | ||
2540 | if str(item) == '.': | |
2541 | seen_dot = True | |
2542 | ||
2543 | extent += item.size | |
2544 | prev_item = item | |
2545 | ||
2546 | return extent | |
2547 | ||
2548 | @property | |
2549 | def is_string(self): | |
2550 | return False | |
2551 | ||
2552 | @property | |
2553 | def size(self): | |
2554 | return len(self.__repr__()) | |
2555 | ||
2556 | @property | |
2557 | def is_keyword(self): | |
2558 | return False | |
2559 | ||
2560 | @property | |
2561 | def is_name(self): | |
2562 | return False | |
2563 | ||
2564 | @property | |
2565 | def is_comma(self): | |
2566 | return False | |
2567 | ||
2568 | @property | |
2569 | def is_colon(self): | |
2570 | return False | |
2571 | ||
2572 | @property | |
2573 | def open_bracket(self): | |
2574 | return None | |
2575 | ||
2576 | @property | |
2577 | def close_bracket(self): | |
2578 | return None | |
2579 | ||
2580 | ||
2581 | class Tuple(Container): | |
2582 | ||
2583 | """A high-level representation of a tuple.""" | |
2584 | ||
2585 | @property | |
2586 | def open_bracket(self): | |
2587 | return '(' | |
2588 | ||
2589 | @property | |
2590 | def close_bracket(self): | |
2591 | return ')' | |
2592 | ||
2593 | ||
2594 | class List(Container): | |
2595 | ||
2596 | """A high-level representation of a list.""" | |
2597 | ||
2598 | @property | |
2599 | def open_bracket(self): | |
2600 | return '[' | |
2601 | ||
2602 | @property | |
2603 | def close_bracket(self): | |
2604 | return ']' | |
2605 | ||
2606 | ||
2607 | class DictOrSet(Container): | |
2608 | ||
2609 | """A high-level representation of a dictionary or set.""" | |
2610 | ||
2611 | @property | |
2612 | def open_bracket(self): | |
2613 | return '{' | |
2614 | ||
2615 | @property | |
2616 | def close_bracket(self): | |
2617 | return '}' | |
2618 | ||
2619 | ||
2620 | class ListComprehension(Container): | |
2621 | ||
2622 | """A high-level representation of a list comprehension.""" | |
2623 | ||
2624 | @property | |
2625 | def size(self): | |
2626 | length = 0 | |
2627 | for item in self._items: | |
2628 | if isinstance(item, IfExpression): | |
2629 | break | |
2630 | length += item.size | |
2631 | return length | |
2632 | ||
2633 | ||
2634 | class IfExpression(Container): | |
2635 | ||
2636 | """A high-level representation of an if-expression.""" | |
2637 | ||
2638 | ||
2639 | def _parse_container(tokens, index, for_or_if=None): | |
2640 | """Parse a high-level container, such as a list, tuple, etc.""" | |
2641 | ||
2642 | # Store the opening bracket. | |
2643 | items = [Atom(Token(*tokens[index]))] | |
2644 | index += 1 | |
2645 | ||
2646 | num_tokens = len(tokens) | |
2647 | while index < num_tokens: | |
2648 | tok = Token(*tokens[index]) | |
2649 | ||
2650 | if tok.token_string in ',)]}': | |
2651 | # First check if we're at the end of a list comprehension or | |
2652 | # if-expression. Don't add the ending token as part of the list | |
2653 | # comprehension or if-expression, because they aren't part of those | |
2654 | # constructs. | |
2655 | if for_or_if == 'for': | |
2656 | return (ListComprehension(items), index - 1) | |
2657 | ||
2658 | elif for_or_if == 'if': | |
2659 | return (IfExpression(items), index - 1) | |
2660 | ||
2661 | # We've reached the end of a container. | |
2662 | items.append(Atom(tok)) | |
2663 | ||
2664 | # If not, then we are at the end of a container. | |
2665 | if tok.token_string == ')': | |
2666 | # The end of a tuple. | |
2667 | return (Tuple(items), index) | |
2668 | ||
2669 | elif tok.token_string == ']': | |
2670 | # The end of a list. | |
2671 | return (List(items), index) | |
2672 | ||
2673 | elif tok.token_string == '}': | |
2674 | # The end of a dictionary or set. | |
2675 | return (DictOrSet(items), index) | |
2676 | ||
2677 | elif tok.token_string in '([{': | |
2678 | # A sub-container is being defined. | |
2679 | (container, index) = _parse_container(tokens, index) | |
2680 | items.append(container) | |
2681 | ||
2682 | elif tok.token_string == 'for': | |
2683 | (container, index) = _parse_container(tokens, index, 'for') | |
2684 | items.append(container) | |
2685 | ||
2686 | elif tok.token_string == 'if': | |
2687 | (container, index) = _parse_container(tokens, index, 'if') | |
2688 | items.append(container) | |
2689 | ||
2690 | else: | |
2691 | items.append(Atom(tok)) | |
2692 | ||
2693 | index += 1 | |
2694 | ||
2695 | return (None, None) | |
2696 | ||
2697 | ||
2698 | def _parse_tokens(tokens): | |
2699 | """Parse the tokens. | |
2700 | ||
2701 | This converts the tokens into a form where we can manipulate them | |
2702 | more easily. | |
2703 | ||
2704 | """ | |
2705 | ||
2706 | index = 0 | |
2707 | parsed_tokens = [] | |
2708 | ||
2709 | num_tokens = len(tokens) | |
2710 | while index < num_tokens: | |
2711 | tok = Token(*tokens[index]) | |
2712 | ||
2713 | assert tok.token_type != token.INDENT | |
2714 | if tok.token_type == tokenize.NEWLINE: | |
2715 | # There's only one newline and it's at the end. | |
2716 | break | |
2717 | ||
2718 | if tok.token_string in '([{': | |
2719 | (container, index) = _parse_container(tokens, index) | |
2720 | if not container: | |
2721 | return None | |
2722 | parsed_tokens.append(container) | |
2723 | else: | |
2724 | parsed_tokens.append(Atom(tok)) | |
2725 | ||
2726 | index += 1 | |
2727 | ||
2728 | return parsed_tokens | |
2729 | ||
2730 | ||
2731 | def _reflow_lines(parsed_tokens, indentation, max_line_length, | |
2732 | start_on_prefix_line): | |
2733 | """Reflow the lines so that it looks nice.""" | |
2734 | ||
2735 | if str(parsed_tokens[0]) == 'def': | |
2736 | # A function definition gets indented a bit more. | |
2737 | continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE | |
2738 | else: | |
2739 | continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE | |
2740 | ||
2741 | break_after_open_bracket = not start_on_prefix_line | |
2742 | ||
2743 | lines = ReformattedLines(max_line_length) | |
2744 | lines.add_indent(len(indentation.lstrip('\r\n'))) | |
2745 | ||
2746 | if not start_on_prefix_line: | |
2747 | # If splitting after the opening bracket will cause the first element | |
2748 | # to be aligned weirdly, don't try it. | |
2749 | first_token = get_item(parsed_tokens, 0) | |
2750 | second_token = get_item(parsed_tokens, 1) | |
2751 | ||
2752 | if ( | |
2753 | first_token and second_token and | |
2754 | str(second_token)[0] == '(' and | |
2755 | len(indentation) + len(first_token) + 1 == len(continued_indent) | |
2756 | ): | |
2757 | return None | |
2758 | ||
2759 | for item in parsed_tokens: | |
2760 | lines.add_space_if_needed(str(item), equal=True) | |
2761 | ||
2762 | save_continued_indent = continued_indent | |
2763 | if start_on_prefix_line and isinstance(item, Container): | |
2764 | start_on_prefix_line = False | |
2765 | continued_indent = ' ' * (lines.current_size() + 1) | |
2766 | ||
2767 | item.reflow(lines, continued_indent, break_after_open_bracket) | |
2768 | continued_indent = save_continued_indent | |
2769 | ||
2770 | return lines.emit() | |
2771 | ||
2772 | ||
2773 | def _shorten_line_at_tokens_new(tokens, source, indentation, | |
2774 | max_line_length): | |
2775 | """Shorten the line taking its length into account. | |
2776 | ||
2777 | The input is expected to be free of newlines except for inside | |
2778 | multiline strings and at the end. | |
2779 | ||
2780 | """ | |
2781 | # Yield the original source so to see if it's a better choice than the | |
2782 | # shortened candidate lines we generate here. | |
2783 | yield indentation + source | |
2784 | ||
2785 | parsed_tokens = _parse_tokens(tokens) | |
2786 | ||
2787 | if parsed_tokens: | |
2788 | # Perform two reflows. The first one starts on the same line as the | |
2789 | # prefix. The second starts on the line after the prefix. | |
2790 | fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, | |
2791 | start_on_prefix_line=True) | |
2792 | if fixed and check_syntax(normalize_multiline(fixed.lstrip())): | |
2793 | yield fixed | |
2794 | ||
2795 | fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, | |
2796 | start_on_prefix_line=False) | |
2797 | if fixed and check_syntax(normalize_multiline(fixed.lstrip())): | |
2798 | yield fixed | |
2799 | ||
2800 | ||
2801 | def _shorten_line_at_tokens(tokens, source, indentation, indent_word, | |
2802 | key_token_strings, aggressive): | |
2803 | """Separate line by breaking at tokens in key_token_strings. | |
2804 | ||
2805 | The input is expected to be free of newlines except for inside | |
2806 | multiline strings and at the end. | |
2807 | ||
2808 | """ | |
2809 | offsets = [] | |
2810 | for (index, _t) in enumerate(token_offsets(tokens)): | |
2811 | (token_type, | |
2812 | token_string, | |
2813 | start_offset, | |
2814 | end_offset) = _t | |
2815 | ||
2816 | assert token_type != token.INDENT | |
2817 | ||
2818 | if token_string in key_token_strings: | |
2819 | # Do not break in containers with zero or one items. | |
2820 | unwanted_next_token = { | |
2821 | '(': ')', | |
2822 | '[': ']', | |
2823 | '{': '}'}.get(token_string) | |
2824 | if unwanted_next_token: | |
2825 | if ( | |
2826 | get_item(tokens, | |
2827 | index + 1, | |
2828 | default=[None, None])[1] == unwanted_next_token or | |
2829 | get_item(tokens, | |
2830 | index + 2, | |
2831 | default=[None, None])[1] == unwanted_next_token | |
2832 | ): | |
2833 | continue | |
2834 | ||
2835 | if ( | |
2836 | index > 2 and token_string == '(' and | |
2837 | tokens[index - 1][1] in ',(%[' | |
2838 | ): | |
2839 | # Don't split after a tuple start, or before a tuple start if | |
2840 | # the tuple is in a list. | |
2841 | continue | |
2842 | ||
2843 | if end_offset < len(source) - 1: | |
2844 | # Don't split right before newline. | |
2845 | offsets.append(end_offset) | |
2846 | else: | |
2847 | # Break at adjacent strings. These were probably meant to be on | |
2848 | # separate lines in the first place. | |
2849 | previous_token = get_item(tokens, index - 1) | |
2850 | if ( | |
2851 | token_type == tokenize.STRING and | |
2852 | previous_token and previous_token[0] == tokenize.STRING | |
2853 | ): | |
2854 | offsets.append(start_offset) | |
2855 | ||
2856 | current_indent = None | |
2857 | fixed = None | |
2858 | for line in split_at_offsets(source, offsets): | |
2859 | if fixed: | |
2860 | fixed += '\n' + current_indent + line | |
2861 | ||
2862 | for symbol in '([{': | |
2863 | if line.endswith(symbol): | |
2864 | current_indent += indent_word | |
2865 | else: | |
2866 | # First line. | |
2867 | fixed = line | |
2868 | assert not current_indent | |
2869 | current_indent = indent_word | |
2870 | ||
2871 | assert fixed is not None | |
2872 | ||
2873 | if check_syntax(normalize_multiline(fixed) | |
2874 | if aggressive > 1 else fixed): | |
2875 | return indentation + fixed | |
2876 | ||
2877 | return None | |
2878 | ||
2879 | ||
2880 | def token_offsets(tokens): | |
2881 | """Yield tokens and offsets.""" | |
2882 | end_offset = 0 | |
2883 | previous_end_row = 0 | |
2884 | previous_end_column = 0 | |
2885 | for t in tokens: | |
2886 | token_type = t[0] | |
2887 | token_string = t[1] | |
2888 | (start_row, start_column) = t[2] | |
2889 | (end_row, end_column) = t[3] | |
2890 | ||
2891 | # Account for the whitespace between tokens. | |
2892 | end_offset += start_column | |
2893 | if previous_end_row == start_row: | |
2894 | end_offset -= previous_end_column | |
2895 | ||
2896 | # Record the start offset of the token. | |
2897 | start_offset = end_offset | |
2898 | ||
2899 | # Account for the length of the token itself. | |
2900 | end_offset += len(token_string) | |
2901 | ||
2902 | yield (token_type, | |
2903 | token_string, | |
2904 | start_offset, | |
2905 | end_offset) | |
2906 | ||
2907 | previous_end_row = end_row | |
2908 | previous_end_column = end_column | |
2909 | ||
2910 | ||
2911 | def normalize_multiline(line): | |
2912 | """Normalize multiline-related code that will cause syntax error. | |
2913 | ||
2914 | This is for purposes of checking syntax. | |
2915 | ||
2916 | """ | |
2917 | if line.startswith('def ') and line.rstrip().endswith(':'): | |
2918 | return line + ' pass' | |
2919 | elif line.startswith('return '): | |
2920 | return 'def _(): ' + line | |
2921 | elif line.startswith('@'): | |
2922 | return line + 'def _(): pass' | |
2923 | elif line.startswith('class '): | |
2924 | return line + ' pass' | |
2925 | elif line.startswith(('if ', 'elif ', 'for ', 'while ')): | |
2926 | return line + ' pass' | |
2927 | ||
2928 | return line | |
2929 | ||
2930 | ||
2931 | def fix_whitespace(line, offset, replacement): | |
2932 | """Replace whitespace at offset and return fixed line.""" | |
2933 | # Replace escaped newlines too | |
2934 | left = line[:offset].rstrip('\n\r \t\\') | |
2935 | right = line[offset:].lstrip('\n\r \t\\') | |
2936 | if right.startswith('#'): | |
2937 | return line | |
2938 | ||
2939 | return left + replacement + right | |
2940 | ||
2941 | ||
2942 | def _execute_pep8(pep8_options, source): | |
2943 | """Execute pycodestyle via python method calls.""" | |
2944 | class QuietReport(pycodestyle.BaseReport): | |
2945 | ||
2946 | """Version of checker that does not print.""" | |
2947 | ||
2948 | def __init__(self, options): | |
2949 | super(QuietReport, self).__init__(options) | |
2950 | self.__full_error_results = [] | |
2951 | ||
2952 | def error(self, line_number, offset, text, check): | |
2953 | """Collect errors.""" | |
2954 | code = super(QuietReport, self).error(line_number, | |
2955 | offset, | |
2956 | text, | |
2957 | check) | |
2958 | if code: | |
2959 | self.__full_error_results.append( | |
2960 | {'id': code, | |
2961 | 'line': line_number, | |
2962 | 'column': offset + 1, | |
2963 | 'info': text}) | |
2964 | ||
2965 | def full_error_results(self): | |
2966 | """Return error results in detail. | |
2967 | ||
2968 | Results are in the form of a list of dictionaries. Each | |
2969 | dictionary contains 'id', 'line', 'column', and 'info'. | |
2970 | ||
2971 | """ | |
2972 | return self.__full_error_results | |
2973 | ||
2974 | checker = pycodestyle.Checker('', lines=source, reporter=QuietReport, | |
2975 | **pep8_options) | |
2976 | checker.check_all() | |
2977 | return checker.report.full_error_results() | |
2978 | ||
2979 | ||
2980 | def _remove_leading_and_normalize(line, with_rstrip=True): | |
2981 | # ignore FF in first lstrip() | |
2982 | if with_rstrip: | |
2983 | return line.lstrip(' \t\v').rstrip(CR + LF) + '\n' | |
2984 | return line.lstrip(' \t\v') | |
2985 | ||
2986 | ||
2987 | class Reindenter(object): | |
2988 | ||
2989 | """Reindents badly-indented code to uniformly use four-space indentation. | |
2990 | ||
2991 | Released to the public domain, by Tim Peters, 03 October 2000. | |
2992 | ||
2993 | """ | |
2994 | ||
2995 | def __init__(self, input_text, leave_tabs=False): | |
2996 | sio = io.StringIO(input_text) | |
2997 | source_lines = sio.readlines() | |
2998 | ||
2999 | self.string_content_line_numbers = multiline_string_lines(input_text) | |
3000 | ||
3001 | # File lines, rstripped & tab-expanded. Dummy at start is so | |
3002 | # that we can use tokenize's 1-based line numbering easily. | |
3003 | # Note that a line is all-blank iff it is a newline. | |
3004 | self.lines = [] | |
3005 | for line_number, line in enumerate(source_lines, start=1): | |
3006 | # Do not modify if inside a multiline string. | |
3007 | if line_number in self.string_content_line_numbers: | |
3008 | self.lines.append(line) | |
3009 | else: | |
3010 | # Only expand leading tabs. | |
3011 | with_rstrip = line_number != len(source_lines) | |
3012 | if leave_tabs: | |
3013 | self.lines.append( | |
3014 | _get_indentation(line) + | |
3015 | _remove_leading_and_normalize(line, with_rstrip) | |
3016 | ) | |
3017 | else: | |
3018 | self.lines.append( | |
3019 | _get_indentation(line).expandtabs() + | |
3020 | _remove_leading_and_normalize(line, with_rstrip) | |
3021 | ) | |
3022 | ||
3023 | self.lines.insert(0, None) | |
3024 | self.index = 1 # index into self.lines of next line | |
3025 | self.input_text = input_text | |
3026 | ||
3027 | def run(self, indent_size=DEFAULT_INDENT_SIZE): | |
3028 | """Fix indentation and return modified line numbers. | |
3029 | ||
3030 | Line numbers are indexed at 1. | |
3031 | ||
3032 | """ | |
3033 | if indent_size < 1: | |
3034 | return self.input_text | |
3035 | ||
3036 | try: | |
3037 | stats = _reindent_stats(tokenize.generate_tokens(self.getline)) | |
3038 | except (SyntaxError, tokenize.TokenError): | |
3039 | return self.input_text | |
3040 | # Remove trailing empty lines. | |
3041 | lines = self.lines | |
3042 | # Sentinel. | |
3043 | stats.append((len(lines), 0)) | |
3044 | # Map count of leading spaces to # we want. | |
3045 | have2want = {} | |
3046 | # Program after transformation. | |
3047 | after = [] | |
3048 | # Copy over initial empty lines -- there's nothing to do until | |
3049 | # we see a line with *something* on it. | |
3050 | i = stats[0][0] | |
3051 | after.extend(lines[1:i]) | |
3052 | for i in range(len(stats) - 1): | |
3053 | thisstmt, thislevel = stats[i] | |
3054 | nextstmt = stats[i + 1][0] | |
3055 | have = _leading_space_count(lines[thisstmt]) | |
3056 | want = thislevel * indent_size | |
3057 | if want < 0: | |
3058 | # A comment line. | |
3059 | if have: | |
3060 | # An indented comment line. If we saw the same | |
3061 | # indentation before, reuse what it most recently | |
3062 | # mapped to. | |
3063 | want = have2want.get(have, -1) | |
3064 | if want < 0: | |
3065 | # Then it probably belongs to the next real stmt. | |
3066 | for j in range(i + 1, len(stats) - 1): | |
3067 | jline, jlevel = stats[j] | |
3068 | if jlevel >= 0: | |
3069 | if have == _leading_space_count(lines[jline]): | |
3070 | want = jlevel * indent_size | |
3071 | break | |
3072 | # Maybe it's a hanging comment like this one, | |
3073 | if want < 0: | |
3074 | # in which case we should shift it like its base | |
3075 | # line got shifted. | |
3076 | for j in range(i - 1, -1, -1): | |
3077 | jline, jlevel = stats[j] | |
3078 | if jlevel >= 0: | |
3079 | want = (have + _leading_space_count( | |
3080 | after[jline - 1]) - | |
3081 | _leading_space_count(lines[jline])) | |
3082 | break | |
3083 | if want < 0: | |
3084 | # Still no luck -- leave it alone. | |
3085 | want = have | |
3086 | else: | |
3087 | want = 0 | |
3088 | assert want >= 0 | |
3089 | have2want[have] = want | |
3090 | diff = want - have | |
3091 | if diff == 0 or have == 0: | |
3092 | after.extend(lines[thisstmt:nextstmt]) | |
3093 | else: | |
3094 | for line_number, line in enumerate(lines[thisstmt:nextstmt], | |
3095 | start=thisstmt): | |
3096 | if line_number in self.string_content_line_numbers: | |
3097 | after.append(line) | |
3098 | elif diff > 0: | |
3099 | if line == '\n': | |
3100 | after.append(line) | |
3101 | else: | |
3102 | after.append(' ' * diff + line) | |
3103 | else: | |
3104 | remove = min(_leading_space_count(line), -diff) | |
3105 | after.append(line[remove:]) | |
3106 | ||
3107 | return ''.join(after) | |
3108 | ||
3109 | def getline(self): | |
3110 | """Line-getter for tokenize.""" | |
3111 | if self.index >= len(self.lines): | |
3112 | line = '' | |
3113 | else: | |
3114 | line = self.lines[self.index] | |
3115 | self.index += 1 | |
3116 | return line | |
3117 | ||
3118 | ||
3119 | def _reindent_stats(tokens): | |
3120 | """Return list of (lineno, indentlevel) pairs. | |
3121 | ||
3122 | One for each stmt and comment line. indentlevel is -1 for comment | |
3123 | lines, as a signal that tokenize doesn't know what to do about them; | |
3124 | indeed, they're our headache! | |
3125 | ||
3126 | """ | |
3127 | find_stmt = 1 # Next token begins a fresh stmt? | |
3128 | level = 0 # Current indent level. | |
3129 | stats = [] | |
3130 | ||
3131 | for t in tokens: | |
3132 | token_type = t[0] | |
3133 | sline = t[2][0] | |
3134 | line = t[4] | |
3135 | ||
3136 | if token_type == tokenize.NEWLINE: | |
3137 | # A program statement, or ENDMARKER, will eventually follow, | |
3138 | # after some (possibly empty) run of tokens of the form | |
3139 | # (NL | COMMENT)* (INDENT | DEDENT+)? | |
3140 | find_stmt = 1 | |
3141 | ||
3142 | elif token_type == tokenize.INDENT: | |
3143 | find_stmt = 1 | |
3144 | level += 1 | |
3145 | ||
3146 | elif token_type == tokenize.DEDENT: | |
3147 | find_stmt = 1 | |
3148 | level -= 1 | |
3149 | ||
3150 | elif token_type == tokenize.COMMENT: | |
3151 | if find_stmt: | |
3152 | stats.append((sline, -1)) | |
3153 | # But we're still looking for a new stmt, so leave | |
3154 | # find_stmt alone. | |
3155 | ||
3156 | elif token_type == tokenize.NL: | |
3157 | pass | |
3158 | ||
3159 | elif find_stmt: | |
3160 | # This is the first "real token" following a NEWLINE, so it | |
3161 | # must be the first token of the next program statement, or an | |
3162 | # ENDMARKER. | |
3163 | find_stmt = 0 | |
3164 | if line: # Not endmarker. | |
3165 | stats.append((sline, level)) | |
3166 | ||
3167 | return stats | |
3168 | ||
3169 | ||
3170 | def _leading_space_count(line): | |
3171 | """Return number of leading spaces in line.""" | |
3172 | i = 0 | |
3173 | while i < len(line) and line[i] == ' ': | |
3174 | i += 1 | |
3175 | return i | |
3176 | ||
3177 | ||
3178 | def refactor_with_2to3(source_text, fixer_names, filename=''): | |
3179 | """Use lib2to3 to refactor the source. | |
3180 | ||
3181 | Return the refactored source code. | |
3182 | ||
3183 | """ | |
3184 | from lib2to3.refactor import RefactoringTool | |
3185 | fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names] | |
3186 | tool = RefactoringTool(fixer_names=fixers, explicit=fixers) | |
3187 | ||
3188 | from lib2to3.pgen2 import tokenize as lib2to3_tokenize | |
3189 | try: | |
3190 | # The name parameter is necessary particularly for the "import" fixer. | |
3191 | return str(tool.refactor_string(source_text, name=filename)) | |
3192 | except lib2to3_tokenize.TokenError: | |
3193 | return source_text | |
3194 | ||
3195 | ||
3196 | def check_syntax(code): | |
3197 | """Return True if syntax is okay.""" | |
3198 | try: | |
3199 | return compile(code, '<string>', 'exec', dont_inherit=True) | |
3200 | except (SyntaxError, TypeError, ValueError): | |
3201 | return False | |
3202 | ||
3203 | ||
3204 | def find_with_line_numbers(pattern, contents): | |
3205 | """A wrapper around 're.finditer' to find line numbers. | |
3206 | ||
3207 | Returns a list of line numbers where pattern was found in contents. | |
3208 | """ | |
3209 | matches = list(re.finditer(pattern, contents)) | |
3210 | if not matches: | |
3211 | return [] | |
3212 | ||
3213 | end = matches[-1].start() | |
3214 | ||
3215 | # -1 so a failed `rfind` maps to the first line. | |
3216 | newline_offsets = { | |
3217 | -1: 0 | |
3218 | } | |
3219 | for line_num, m in enumerate(re.finditer(r'\n', contents), 1): | |
3220 | offset = m.start() | |
3221 | if offset > end: | |
3222 | break | |
3223 | newline_offsets[offset] = line_num | |
3224 | ||
3225 | def get_line_num(match, contents): | |
3226 | """Get the line number of string in a files contents. | |
3227 | ||
3228 | Failing to find the newline is OK, -1 maps to 0 | |
3229 | ||
3230 | """ | |
3231 | newline_offset = contents.rfind('\n', 0, match.start()) | |
3232 | return newline_offsets[newline_offset] | |
3233 | ||
3234 | return [get_line_num(match, contents) + 1 for match in matches] | |
3235 | ||
3236 | ||
3237 | def get_disabled_ranges(source): | |
3238 | """Returns a list of tuples representing the disabled ranges. | |
3239 | ||
3240 | If disabled and no re-enable will disable for rest of file. | |
3241 | ||
3242 | """ | |
3243 | enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source) | |
3244 | disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source) | |
3245 | total_lines = len(re.findall("\n", source)) + 1 | |
3246 | ||
3247 | enable_commands = {} | |
3248 | for num in enable_line_nums: | |
3249 | enable_commands[num] = True | |
3250 | for num in disable_line_nums: | |
3251 | enable_commands[num] = False | |
3252 | ||
3253 | disabled_ranges = [] | |
3254 | currently_enabled = True | |
3255 | disabled_start = None | |
3256 | ||
3257 | for line, commanded_enabled in sorted(enable_commands.items()): | |
3258 | if commanded_enabled is False and currently_enabled is True: | |
3259 | disabled_start = line | |
3260 | currently_enabled = False | |
3261 | elif commanded_enabled is True and currently_enabled is False: | |
3262 | disabled_ranges.append((disabled_start, line)) | |
3263 | currently_enabled = True | |
3264 | ||
3265 | if currently_enabled is False: | |
3266 | disabled_ranges.append((disabled_start, total_lines)) | |
3267 | ||
3268 | return disabled_ranges | |
3269 | ||
3270 | ||
3271 | def filter_disabled_results(result, disabled_ranges): | |
3272 | """Filter out reports based on tuple of disabled ranges. | |
3273 | ||
3274 | """ | |
3275 | line = result['line'] | |
3276 | for disabled_range in disabled_ranges: | |
3277 | if disabled_range[0] <= line <= disabled_range[1]: | |
3278 | return False | |
3279 | return True | |
3280 | ||
3281 | ||
3282 | def filter_results(source, results, aggressive): | |
3283 | """Filter out spurious reports from pycodestyle. | |
3284 | ||
3285 | If aggressive is True, we allow possibly unsafe fixes (E711, E712). | |
3286 | ||
3287 | """ | |
3288 | non_docstring_string_line_numbers = multiline_string_lines( | |
3289 | source, include_docstrings=False) | |
3290 | all_string_line_numbers = multiline_string_lines( | |
3291 | source, include_docstrings=True) | |
3292 | ||
3293 | commented_out_code_line_numbers = commented_out_code_lines(source) | |
3294 | ||
3295 | # Filter out the disabled ranges | |
3296 | disabled_ranges = get_disabled_ranges(source) | |
3297 | if disabled_ranges: | |
3298 | results = [ | |
3299 | result for result in results if filter_disabled_results( | |
3300 | result, | |
3301 | disabled_ranges, | |
3302 | ) | |
3303 | ] | |
3304 | ||
3305 | has_e901 = any(result['id'].lower() == 'e901' for result in results) | |
3306 | ||
3307 | for r in results: | |
3308 | issue_id = r['id'].lower() | |
3309 | ||
3310 | if r['line'] in non_docstring_string_line_numbers: | |
3311 | if issue_id.startswith(('e1', 'e501', 'w191')): | |
3312 | continue | |
3313 | ||
3314 | if r['line'] in all_string_line_numbers: | |
3315 | if issue_id in ['e501']: | |
3316 | continue | |
3317 | ||
3318 | # We must offset by 1 for lines that contain the trailing contents of | |
3319 | # multiline strings. | |
3320 | if not aggressive and (r['line'] + 1) in all_string_line_numbers: | |
3321 | # Do not modify multiline strings in non-aggressive mode. Remove | |
3322 | # trailing whitespace could break doctests. | |
3323 | if issue_id.startswith(('w29', 'w39')): | |
3324 | continue | |
3325 | ||
3326 | if aggressive <= 0: | |
3327 | if issue_id.startswith(('e711', 'e72', 'w6')): | |
3328 | continue | |
3329 | ||
3330 | if aggressive <= 1: | |
3331 | if issue_id.startswith(('e712', 'e713', 'e714')): | |
3332 | continue | |
3333 | ||
3334 | if aggressive <= 2: | |
3335 | if issue_id.startswith(('e704')): | |
3336 | continue | |
3337 | ||
3338 | if r['line'] in commented_out_code_line_numbers: | |
3339 | if issue_id.startswith(('e261', 'e262', 'e501')): | |
3340 | continue | |
3341 | ||
3342 | # Do not touch indentation if there is a token error caused by | |
3343 | # incomplete multi-line statement. Otherwise, we risk screwing up the | |
3344 | # indentation. | |
3345 | if has_e901: | |
3346 | if issue_id.startswith(('e1', 'e7')): | |
3347 | continue | |
3348 | ||
3349 | yield r | |
3350 | ||
3351 | ||
3352 | def multiline_string_lines(source, include_docstrings=False): | |
3353 | """Return line numbers that are within multiline strings. | |
3354 | ||
3355 | The line numbers are indexed at 1. | |
3356 | ||
3357 | Docstrings are ignored. | |
3358 | ||
3359 | """ | |
3360 | line_numbers = set() | |
3361 | previous_token_type = '' | |
3362 | try: | |
3363 | for t in generate_tokens(source): | |
3364 | token_type = t[0] | |
3365 | start_row = t[2][0] | |
3366 | end_row = t[3][0] | |
3367 | ||
3368 | if token_type == tokenize.STRING and start_row != end_row: | |
3369 | if ( | |
3370 | include_docstrings or | |
3371 | previous_token_type != tokenize.INDENT | |
3372 | ): | |
3373 | # We increment by one since we want the contents of the | |
3374 | # string. | |
3375 | line_numbers |= set(range(1 + start_row, 1 + end_row)) | |
3376 | ||
3377 | previous_token_type = token_type | |
3378 | except (SyntaxError, tokenize.TokenError): | |
3379 | pass | |
3380 | ||
3381 | return line_numbers | |
3382 | ||
3383 | ||
3384 | def commented_out_code_lines(source): | |
3385 | """Return line numbers of comments that are likely code. | |
3386 | ||
3387 | Commented-out code is bad practice, but modifying it just adds even | |
3388 | more clutter. | |
3389 | ||
3390 | """ | |
3391 | line_numbers = [] | |
3392 | try: | |
3393 | for t in generate_tokens(source): | |
3394 | token_type = t[0] | |
3395 | token_string = t[1] | |
3396 | start_row = t[2][0] | |
3397 | line = t[4] | |
3398 | ||
3399 | # Ignore inline comments. | |
3400 | if not line.lstrip().startswith('#'): | |
3401 | continue | |
3402 | ||
3403 | if token_type == tokenize.COMMENT: | |
3404 | stripped_line = token_string.lstrip('#').strip() | |
3405 | with warnings.catch_warnings(): | |
3406 | # ignore SyntaxWarning in Python3.8+ | |
3407 | # refs: | |
3408 | # https://bugs.python.org/issue15248 | |
3409 | # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes | |
3410 | warnings.filterwarnings("ignore", category=SyntaxWarning) | |
3411 | if ( | |
3412 | ' ' in stripped_line and | |
3413 | '#' not in stripped_line and | |
3414 | check_syntax(stripped_line) | |
3415 | ): | |
3416 | line_numbers.append(start_row) | |
3417 | except (SyntaxError, tokenize.TokenError): | |
3418 | pass | |
3419 | ||
3420 | return line_numbers | |
3421 | ||
3422 | ||
3423 | def shorten_comment(line, max_line_length, last_comment=False): | |
3424 | """Return trimmed or split long comment line. | |
3425 | ||
3426 | If there are no comments immediately following it, do a text wrap. | |
3427 | Doing this wrapping on all comments in general would lead to jagged | |
3428 | comment text. | |
3429 | ||
3430 | """ | |
3431 | assert len(line) > max_line_length | |
3432 | line = line.rstrip() | |
3433 | ||
3434 | # PEP 8 recommends 72 characters for comment text. | |
3435 | indentation = _get_indentation(line) + '# ' | |
3436 | max_line_length = min(max_line_length, | |
3437 | len(indentation) + 72) | |
3438 | ||
3439 | MIN_CHARACTER_REPEAT = 5 | |
3440 | if ( | |
3441 | len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and | |
3442 | not line[-1].isalnum() | |
3443 | ): | |
3444 | # Trim comments that end with things like --------- | |
3445 | return line[:max_line_length] + '\n' | |
3446 | elif last_comment and re.match(r'\s*#+\s*\w+', line): | |
3447 | split_lines = textwrap.wrap(line.lstrip(' \t#'), | |
3448 | initial_indent=indentation, | |
3449 | subsequent_indent=indentation, | |
3450 | width=max_line_length, | |
3451 | break_long_words=False, | |
3452 | break_on_hyphens=False) | |
3453 | return '\n'.join(split_lines) + '\n' | |
3454 | ||
3455 | return line + '\n' | |
3456 | ||
3457 | ||
3458 | def normalize_line_endings(lines, newline): | |
3459 | """Return fixed line endings. | |
3460 | ||
3461 | All lines will be modified to use the most common line ending. | |
3462 | """ | |
3463 | line = [line.rstrip('\n\r') + newline for line in lines] | |
3464 | if line and lines[-1] == lines[-1].rstrip('\n\r'): | |
3465 | line[-1] = line[-1].rstrip('\n\r') | |
3466 | return line | |
3467 | ||
3468 | ||
3469 | def mutual_startswith(a, b): | |
3470 | return b.startswith(a) or a.startswith(b) | |
3471 | ||
3472 | ||
3473 | def code_match(code, select, ignore): | |
3474 | if ignore: | |
3475 | assert not isinstance(ignore, str) | |
3476 | for ignored_code in [c.strip() for c in ignore]: | |
3477 | if mutual_startswith(code.lower(), ignored_code.lower()): | |
3478 | return False | |
3479 | ||
3480 | if select: | |
3481 | assert not isinstance(select, str) | |
3482 | for selected_code in [c.strip() for c in select]: | |
3483 | if mutual_startswith(code.lower(), selected_code.lower()): | |
3484 | return True | |
3485 | return False | |
3486 | ||
3487 | return True | |
3488 | ||
3489 | ||
3490 | def fix_code(source, options=None, encoding=None, apply_config=False): | |
3491 | """Return fixed source code. | |
3492 | ||
3493 | "encoding" will be used to decode "source" if it is a byte string. | |
3494 | ||
3495 | """ | |
3496 | options = _get_options(options, apply_config) | |
3497 | # normalize | |
3498 | options.ignore = [opt.upper() for opt in options.ignore] | |
3499 | options.select = [opt.upper() for opt in options.select] | |
3500 | ||
3501 | # check ignore args | |
3502 | # NOTE: If W50x is not included, add W50x because the code | |
3503 | # correction result is indefinite. | |
3504 | ignore_opt = options.ignore | |
3505 | if not {"W50", "W503", "W504"} & set(ignore_opt): | |
3506 | options.ignore.append("W50") | |
3507 | ||
3508 | if not isinstance(source, str): | |
3509 | source = source.decode(encoding or get_encoding()) | |
3510 | ||
3511 | sio = io.StringIO(source) | |
3512 | return fix_lines(sio.readlines(), options=options) | |
3513 | ||
3514 | ||
3515 | def _get_options(raw_options, apply_config): | |
3516 | """Return parsed options.""" | |
3517 | if not raw_options: | |
3518 | return parse_args([''], apply_config=apply_config) | |
3519 | ||
3520 | if isinstance(raw_options, dict): | |
3521 | options = parse_args([''], apply_config=apply_config) | |
3522 | for name, value in raw_options.items(): | |
3523 | if not hasattr(options, name): | |
3524 | raise ValueError("No such option '{}'".format(name)) | |
3525 | ||
3526 | # Check for very basic type errors. | |
3527 | expected_type = type(getattr(options, name)) | |
3528 | if not isinstance(expected_type, (str, )): | |
3529 | if isinstance(value, (str, )): | |
3530 | raise ValueError( | |
3531 | "Option '{}' should not be a string".format(name)) | |
3532 | setattr(options, name, value) | |
3533 | else: | |
3534 | options = raw_options | |
3535 | ||
3536 | return options | |
3537 | ||
3538 | ||
3539 | def fix_lines(source_lines, options, filename=''): | |
3540 | """Return fixed source code.""" | |
3541 | # Transform everything to line feed. Then change them back to original | |
3542 | # before returning fixed source code. | |
3543 | original_newline = find_newline(source_lines) | |
3544 | tmp_source = ''.join(normalize_line_endings(source_lines, '\n')) | |
3545 | ||
3546 | # Keep a history to break out of cycles. | |
3547 | previous_hashes = set() | |
3548 | ||
3549 | if options.line_range: | |
3550 | # Disable "apply_local_fixes()" for now due to issue #175. | |
3551 | fixed_source = tmp_source | |
3552 | else: | |
3553 | # Apply global fixes only once (for efficiency). | |
3554 | fixed_source = apply_global_fixes(tmp_source, | |
3555 | options, | |
3556 | filename=filename) | |
3557 | ||
3558 | passes = 0 | |
3559 | long_line_ignore_cache = set() | |
3560 | while hash(fixed_source) not in previous_hashes: | |
3561 | if options.pep8_passes >= 0 and passes > options.pep8_passes: | |
3562 | break | |
3563 | passes += 1 | |
3564 | ||
3565 | previous_hashes.add(hash(fixed_source)) | |
3566 | ||
3567 | tmp_source = copy.copy(fixed_source) | |
3568 | ||
3569 | fix = FixPEP8( | |
3570 | filename, | |
3571 | options, | |
3572 | contents=tmp_source, | |
3573 | long_line_ignore_cache=long_line_ignore_cache) | |
3574 | ||
3575 | fixed_source = fix.fix() | |
3576 | ||
3577 | sio = io.StringIO(fixed_source) | |
3578 | return ''.join(normalize_line_endings(sio.readlines(), original_newline)) | |
3579 | ||
3580 | ||
3581 | def fix_file(filename, options=None, output=None, apply_config=False): | |
3582 | if not options: | |
3583 | options = parse_args([filename], apply_config=apply_config) | |
3584 | ||
3585 | original_source = readlines_from_file(filename) | |
3586 | ||
3587 | fixed_source = original_source | |
3588 | ||
3589 | if options.in_place or options.diff or output: | |
3590 | encoding = detect_encoding(filename) | |
3591 | ||
3592 | if output: | |
3593 | output = LineEndingWrapper(wrap_output(output, encoding=encoding)) | |
3594 | ||
3595 | fixed_source = fix_lines(fixed_source, options, filename=filename) | |
3596 | ||
3597 | if options.diff: | |
3598 | new = io.StringIO(fixed_source) | |
3599 | new = new.readlines() | |
3600 | diff = get_diff_text(original_source, new, filename) | |
3601 | if output: | |
3602 | output.write(diff) | |
3603 | output.flush() | |
3604 | elif options.jobs > 1: | |
3605 | diff = diff.encode(encoding) | |
3606 | return diff | |
3607 | elif options.in_place: | |
3608 | original = "".join(original_source).splitlines() | |
3609 | fixed = fixed_source.splitlines() | |
3610 | original_source_last_line = ( | |
3611 | original_source[-1].split("\n")[-1] if original_source else "" | |
3612 | ) | |
3613 | fixed_source_last_line = fixed_source.split("\n")[-1] | |
3614 | if original != fixed or ( | |
3615 | original_source_last_line != fixed_source_last_line | |
3616 | ): | |
3617 | with open_with_encoding(filename, 'w', encoding=encoding) as fp: | |
3618 | fp.write(fixed_source) | |
3619 | return fixed_source | |
3620 | return None | |
3621 | else: | |
3622 | if output: | |
3623 | output.write(fixed_source) | |
3624 | output.flush() | |
3625 | return fixed_source | |
3626 | ||
3627 | ||
3628 | def global_fixes(): | |
3629 | """Yield multiple (code, function) tuples.""" | |
3630 | for function in list(globals().values()): | |
3631 | if inspect.isfunction(function): | |
3632 | arguments = _get_parameters(function) | |
3633 | if arguments[:1] != ['source']: | |
3634 | continue | |
3635 | ||
3636 | code = extract_code_from_function(function) | |
3637 | if code: | |
3638 | yield (code, function) | |
3639 | ||
3640 | ||
3641 | def _get_parameters(function): | |
3642 | # pylint: disable=deprecated-method | |
3643 | if sys.version_info.major >= 3: | |
3644 | # We need to match "getargspec()", which includes "self" as the first | |
3645 | # value for methods. | |
3646 | # https://bugs.python.org/issue17481#msg209469 | |
3647 | if inspect.ismethod(function): | |
3648 | function = function.__func__ | |
3649 | ||
3650 | return list(inspect.signature(function).parameters) | |
3651 | else: | |
3652 | return inspect.getargspec(function)[0] | |
3653 | ||
3654 | ||
3655 | def apply_global_fixes(source, options, where='global', filename='', | |
3656 | codes=None): | |
3657 | """Run global fixes on source code. | |
3658 | ||
3659 | These are fixes that only need be done once (unlike those in | |
3660 | FixPEP8, which are dependent on pycodestyle). | |
3661 | ||
3662 | """ | |
3663 | if codes is None: | |
3664 | codes = [] | |
3665 | if any(code_match(code, select=options.select, ignore=options.ignore) | |
3666 | for code in ['E101', 'E111']): | |
3667 | source = reindent( | |
3668 | source, | |
3669 | indent_size=options.indent_size, | |
3670 | leave_tabs=not ( | |
3671 | code_match( | |
3672 | 'W191', | |
3673 | select=options.select, | |
3674 | ignore=options.ignore | |
3675 | ) | |
3676 | ) | |
3677 | ) | |
3678 | ||
3679 | for (code, function) in global_fixes(): | |
3680 | if code_match(code, select=options.select, ignore=options.ignore): | |
3681 | if options.verbose: | |
3682 | print('---> Applying {} fix for {}'.format(where, | |
3683 | code.upper()), | |
3684 | file=sys.stderr) | |
3685 | source = function(source, | |
3686 | aggressive=options.aggressive) | |
3687 | ||
3688 | source = fix_2to3(source, | |
3689 | aggressive=options.aggressive, | |
3690 | select=options.select, | |
3691 | ignore=options.ignore, | |
3692 | filename=filename, | |
3693 | where=where, | |
3694 | verbose=options.verbose) | |
3695 | ||
3696 | return source | |
3697 | ||
3698 | ||
3699 | def extract_code_from_function(function): | |
3700 | """Return code handled by function.""" | |
3701 | if not function.__name__.startswith('fix_'): | |
3702 | return None | |
3703 | ||
3704 | code = re.sub('^fix_', '', function.__name__) | |
3705 | if not code: | |
3706 | return None | |
3707 | ||
3708 | try: | |
3709 | int(code[1:]) | |
3710 | except ValueError: | |
3711 | return None | |
3712 | ||
3713 | return code | |
3714 | ||
3715 | ||
3716 | def _get_package_version(): | |
3717 | packages = ["pycodestyle: {}".format(pycodestyle.__version__)] | |
3718 | return ", ".join(packages) | |
3719 | ||
3720 | ||
3721 | def create_parser(): | |
3722 | """Return command-line parser.""" | |
3723 | parser = argparse.ArgumentParser(description=docstring_summary(__doc__), | |
3724 | prog='autopep8') | |
3725 | parser.add_argument('--version', action='version', | |
3726 | version='%(prog)s {} ({})'.format( | |
3727 | __version__, _get_package_version())) | |
3728 | parser.add_argument('-v', '--verbose', action='count', | |
3729 | default=0, | |
3730 | help='print verbose messages; ' | |
3731 | 'multiple -v result in more verbose messages') | |
3732 | parser.add_argument('-d', '--diff', action='store_true', | |
3733 | help='print the diff for the fixed source') | |
3734 | parser.add_argument('-i', '--in-place', action='store_true', | |
3735 | help='make changes to files in place') | |
3736 | parser.add_argument('--global-config', metavar='filename', | |
3737 | default=DEFAULT_CONFIG, | |
3738 | help='path to a global pep8 config file; if this file ' | |
3739 | 'does not exist then this is ignored ' | |
3740 | '(default: {})'.format(DEFAULT_CONFIG)) | |
3741 | parser.add_argument('--ignore-local-config', action='store_true', | |
3742 | help="don't look for and apply local config files; " | |
3743 | 'if not passed, defaults are updated with any ' | |
3744 | "config files in the project's root directory") | |
3745 | parser.add_argument('-r', '--recursive', action='store_true', | |
3746 | help='run recursively over directories; ' | |
3747 | 'must be used with --in-place or --diff') | |
3748 | parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1, | |
3749 | help='number of parallel jobs; ' | |
3750 | 'match CPU count if value is less than 1') | |
3751 | parser.add_argument('-p', '--pep8-passes', metavar='n', | |
3752 | default=-1, type=int, | |
3753 | help='maximum number of additional pep8 passes ' | |
3754 | '(default: infinite)') | |
3755 | parser.add_argument('-a', '--aggressive', action='count', default=0, | |
3756 | help='enable non-whitespace changes; ' | |
3757 | 'multiple -a result in more aggressive changes') | |
3758 | parser.add_argument('--experimental', action='store_true', | |
3759 | help='enable experimental fixes') | |
3760 | parser.add_argument('--exclude', metavar='globs', | |
3761 | help='exclude file/directory names that match these ' | |
3762 | 'comma-separated globs') | |
3763 | parser.add_argument('--list-fixes', action='store_true', | |
3764 | help='list codes for fixes; ' | |
3765 | 'used by --ignore and --select') | |
3766 | parser.add_argument('--ignore', metavar='errors', default='', | |
3767 | help='do not fix these errors/warnings ' | |
3768 | '(default: {})'.format(DEFAULT_IGNORE)) | |
3769 | parser.add_argument('--select', metavar='errors', default='', | |
3770 | help='fix only these errors/warnings (e.g. E4,W)') | |
3771 | parser.add_argument('--max-line-length', metavar='n', default=79, type=int, | |
3772 | help='set maximum allowed line length ' | |
3773 | '(default: %(default)s)') | |
3774 | parser.add_argument('--line-range', '--range', metavar='line', | |
3775 | default=None, type=int, nargs=2, | |
3776 | help='only fix errors found within this inclusive ' | |
3777 | 'range of line numbers (e.g. 1 99); ' | |
3778 | 'line numbers are indexed at 1') | |
3779 | parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE, | |
3780 | type=int, help=argparse.SUPPRESS) | |
3781 | parser.add_argument('--hang-closing', action='store_true', | |
3782 | help='hang-closing option passed to pycodestyle') | |
3783 | parser.add_argument('--exit-code', action='store_true', | |
3784 | help='change to behavior of exit code.' | |
3785 | ' default behavior of return value, 0 is no ' | |
3786 | 'differences, 1 is error exit. return 2 when' | |
3787 | ' add this option. 2 is exists differences.') | |
3788 | parser.add_argument('files', nargs='*', | |
3789 | help="files to format or '-' for standard in") | |
3790 | ||
3791 | return parser | |
3792 | ||
3793 | ||
3794 | def _expand_codes(codes, ignore_codes): | |
3795 | """expand to individual E/W codes""" | |
3796 | ret = set() | |
3797 | ||
3798 | is_conflict = False | |
3799 | if all( | |
3800 | any( | |
3801 | conflicting_code.startswith(code) | |
3802 | for code in codes | |
3803 | ) | |
3804 | for conflicting_code in CONFLICTING_CODES | |
3805 | ): | |
3806 | is_conflict = True | |
3807 | ||
3808 | is_ignore_w503 = "W503" in ignore_codes | |
3809 | is_ignore_w504 = "W504" in ignore_codes | |
3810 | ||
3811 | for code in codes: | |
3812 | if code == "W": | |
3813 | if is_ignore_w503 and is_ignore_w504: | |
3814 | ret.update({"W1", "W2", "W3", "W505", "W6"}) | |
3815 | elif is_ignore_w503: | |
3816 | ret.update({"W1", "W2", "W3", "W504", "W505", "W6"}) | |
3817 | else: | |
3818 | ret.update({"W1", "W2", "W3", "W503", "W505", "W6"}) | |
3819 | elif code in ("W5", "W50"): | |
3820 | if is_ignore_w503 and is_ignore_w504: | |
3821 | ret.update({"W505"}) | |
3822 | elif is_ignore_w503: | |
3823 | ret.update({"W504", "W505"}) | |
3824 | else: | |
3825 | ret.update({"W503", "W505"}) | |
3826 | elif not (code in ("W503", "W504") and is_conflict): | |
3827 | ret.add(code) | |
3828 | ||
3829 | return ret | |
3830 | ||
3831 | ||
3832 | def parse_args(arguments, apply_config=False): | |
3833 | """Parse command-line options.""" | |
3834 | parser = create_parser() | |
3835 | args = parser.parse_args(arguments) | |
3836 | ||
3837 | if not args.files and not args.list_fixes: | |
3838 | parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments') | |
3839 | ||
3840 | args.files = [decode_filename(name) for name in args.files] | |
3841 | ||
3842 | if apply_config: | |
3843 | parser = read_config(args, parser) | |
3844 | # prioritize settings when exist pyproject.toml's tool.autopep8 section | |
3845 | try: | |
3846 | parser_with_pyproject_toml = read_pyproject_toml(args, parser) | |
3847 | except Exception: | |
3848 | parser_with_pyproject_toml = None | |
3849 | if parser_with_pyproject_toml: | |
3850 | parser = parser_with_pyproject_toml | |
3851 | args = parser.parse_args(arguments) | |
3852 | args.files = [decode_filename(name) for name in args.files] | |
3853 | ||
3854 | if '-' in args.files: | |
3855 | if len(args.files) > 1: | |
3856 | parser.exit( | |
3857 | EXIT_CODE_ARGPARSE_ERROR, | |
3858 | 'cannot mix stdin and regular files', | |
3859 | ) | |
3860 | ||
3861 | if args.diff: | |
3862 | parser.exit( | |
3863 | EXIT_CODE_ARGPARSE_ERROR, | |
3864 | '--diff cannot be used with standard input', | |
3865 | ) | |
3866 | ||
3867 | if args.in_place: | |
3868 | parser.exit( | |
3869 | EXIT_CODE_ARGPARSE_ERROR, | |
3870 | '--in-place cannot be used with standard input', | |
3871 | ) | |
3872 | ||
3873 | if args.recursive: | |
3874 | parser.exit( | |
3875 | EXIT_CODE_ARGPARSE_ERROR, | |
3876 | '--recursive cannot be used with standard input', | |
3877 | ) | |
3878 | ||
3879 | if len(args.files) > 1 and not (args.in_place or args.diff): | |
3880 | parser.exit( | |
3881 | EXIT_CODE_ARGPARSE_ERROR, | |
3882 | 'autopep8 only takes one filename as argument ' | |
3883 | 'unless the "--in-place" or "--diff" args are used', | |
3884 | ) | |
3885 | ||
3886 | if args.recursive and not (args.in_place or args.diff): | |
3887 | parser.exit( | |
3888 | EXIT_CODE_ARGPARSE_ERROR, | |
3889 | '--recursive must be used with --in-place or --diff', | |
3890 | ) | |
3891 | ||
3892 | if args.in_place and args.diff: | |
3893 | parser.exit( | |
3894 | EXIT_CODE_ARGPARSE_ERROR, | |
3895 | '--in-place and --diff are mutually exclusive', | |
3896 | ) | |
3897 | ||
3898 | if args.max_line_length <= 0: | |
3899 | parser.exit( | |
3900 | EXIT_CODE_ARGPARSE_ERROR, | |
3901 | '--max-line-length must be greater than 0', | |
3902 | ) | |
3903 | ||
3904 | if args.indent_size <= 0: | |
3905 | parser.exit( | |
3906 | EXIT_CODE_ARGPARSE_ERROR, | |
3907 | '--indent-size must be greater than 0', | |
3908 | ) | |
3909 | ||
3910 | if args.select: | |
3911 | args.select = _expand_codes( | |
3912 | _split_comma_separated(args.select), | |
3913 | (_split_comma_separated(args.ignore) if args.ignore else []) | |
3914 | ) | |
3915 | ||
3916 | if args.ignore: | |
3917 | args.ignore = _split_comma_separated(args.ignore) | |
3918 | if all( | |
3919 | not any( | |
3920 | conflicting_code.startswith(ignore_code) | |
3921 | for ignore_code in args.ignore | |
3922 | ) | |
3923 | for conflicting_code in CONFLICTING_CODES | |
3924 | ): | |
3925 | args.ignore.update(CONFLICTING_CODES) | |
3926 | elif not args.select: | |
3927 | if args.aggressive: | |
3928 | # Enable everything by default if aggressive. | |
3929 | args.select = {'E', 'W1', 'W2', 'W3', 'W6'} | |
3930 | else: | |
3931 | args.ignore = _split_comma_separated(DEFAULT_IGNORE) | |
3932 | ||
3933 | if args.exclude: | |
3934 | args.exclude = _split_comma_separated(args.exclude) | |
3935 | else: | |
3936 | args.exclude = {} | |
3937 | ||
3938 | if args.jobs < 1: | |
3939 | # Do not import multiprocessing globally in case it is not supported | |
3940 | # on the platform. | |
3941 | import multiprocessing | |
3942 | args.jobs = multiprocessing.cpu_count() | |
3943 | ||
3944 | if args.jobs > 1 and not (args.in_place or args.diff): | |
3945 | parser.exit( | |
3946 | EXIT_CODE_ARGPARSE_ERROR, | |
3947 | 'parallel jobs requires --in-place', | |
3948 | ) | |
3949 | ||
3950 | if args.line_range: | |
3951 | if args.line_range[0] <= 0: | |
3952 | parser.exit( | |
3953 | EXIT_CODE_ARGPARSE_ERROR, | |
3954 | '--range must be positive numbers', | |
3955 | ) | |
3956 | if args.line_range[0] > args.line_range[1]: | |
3957 | parser.exit( | |
3958 | EXIT_CODE_ARGPARSE_ERROR, | |
3959 | 'First value of --range should be less than or equal ' | |
3960 | 'to the second', | |
3961 | ) | |
3962 | ||
3963 | return args | |
3964 | ||
3965 | ||
3966 | def _get_normalize_options(args, config, section, option_list): | |
3967 | for (k, v) in config.items(section): | |
3968 | norm_opt = k.lstrip('-').replace('-', '_') | |
3969 | if not option_list.get(norm_opt): | |
3970 | continue | |
3971 | opt_type = option_list[norm_opt] | |
3972 | if opt_type is int: | |
3973 | if v.strip() == "auto": | |
3974 | # skip to special case | |
3975 | if args.verbose: | |
3976 | print(f"ignore config: {k}={v}") | |
3977 | continue | |
3978 | value = config.getint(section, k) | |
3979 | elif opt_type is bool: | |
3980 | value = config.getboolean(section, k) | |
3981 | else: | |
3982 | value = config.get(section, k) | |
3983 | yield norm_opt, k, value | |
3984 | ||
3985 | ||
3986 | def read_config(args, parser): | |
3987 | """Read both user configuration and local configuration.""" | |
3988 | config = SafeConfigParser() | |
3989 | ||
3990 | try: | |
3991 | if args.verbose and os.path.exists(args.global_config): | |
3992 | print("read config path: {}".format(args.global_config)) | |
3993 | config.read(args.global_config) | |
3994 | ||
3995 | if not args.ignore_local_config: | |
3996 | parent = tail = args.files and os.path.abspath( | |
3997 | os.path.commonprefix(args.files)) | |
3998 | while tail: | |
3999 | if config.read([os.path.join(parent, fn) | |
4000 | for fn in PROJECT_CONFIG]): | |
4001 | if args.verbose: | |
4002 | for fn in PROJECT_CONFIG: | |
4003 | config_file = os.path.join(parent, fn) | |
4004 | if not os.path.exists(config_file): | |
4005 | continue | |
4006 | print( | |
4007 | "read config path: {}".format( | |
4008 | os.path.join(parent, fn) | |
4009 | ) | |
4010 | ) | |
4011 | break | |
4012 | (parent, tail) = os.path.split(parent) | |
4013 | ||
4014 | defaults = {} | |
4015 | option_list = {o.dest: o.type or type(o.default) | |
4016 | for o in parser._actions} | |
4017 | ||
4018 | for section in ['pep8', 'pycodestyle', 'flake8']: | |
4019 | if not config.has_section(section): | |
4020 | continue | |
4021 | for norm_opt, k, value in _get_normalize_options( | |
4022 | args, config, section, option_list | |
4023 | ): | |
4024 | if args.verbose: | |
4025 | print("enable config: section={}, key={}, value={}".format( | |
4026 | section, k, value)) | |
4027 | defaults[norm_opt] = value | |
4028 | ||
4029 | parser.set_defaults(**defaults) | |
4030 | except Error: | |
4031 | # Ignore for now. | |
4032 | pass | |
4033 | ||
4034 | return parser | |
4035 | ||
4036 | ||
4037 | def read_pyproject_toml(args, parser): | |
4038 | """Read pyproject.toml and load configuration.""" | |
4039 | if sys.version_info >= (3, 11): | |
4040 | import tomllib | |
4041 | else: | |
4042 | import tomli as tomllib | |
4043 | ||
4044 | config = None | |
4045 | ||
4046 | if os.path.exists(args.global_config): | |
4047 | with open(args.global_config, "rb") as fp: | |
4048 | config = tomllib.load(fp) | |
4049 | ||
4050 | if not args.ignore_local_config: | |
4051 | parent = tail = args.files and os.path.abspath( | |
4052 | os.path.commonprefix(args.files)) | |
4053 | while tail: | |
4054 | pyproject_toml = os.path.join(parent, "pyproject.toml") | |
4055 | if os.path.exists(pyproject_toml): | |
4056 | with open(pyproject_toml, "rb") as fp: | |
4057 | config = tomllib.load(fp) | |
4058 | break | |
4059 | (parent, tail) = os.path.split(parent) | |
4060 | ||
4061 | if not config: | |
4062 | return None | |
4063 | ||
4064 | if config.get("tool", {}).get("autopep8") is None: | |
4065 | return None | |
4066 | ||
4067 | config = config.get("tool").get("autopep8") | |
4068 | ||
4069 | defaults = {} | |
4070 | option_list = {o.dest: o.type or type(o.default) | |
4071 | for o in parser._actions} | |
4072 | ||
4073 | TUPLED_OPTIONS = ("ignore", "select") | |
4074 | for (k, v) in config.items(): | |
4075 | norm_opt = k.lstrip('-').replace('-', '_') | |
4076 | if not option_list.get(norm_opt): | |
4077 | continue | |
4078 | if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS: | |
4079 | value = ",".join(v) | |
4080 | else: | |
4081 | value = v | |
4082 | if args.verbose: | |
4083 | print("enable pyproject.toml config: " | |
4084 | "key={}, value={}".format(k, value)) | |
4085 | defaults[norm_opt] = value | |
4086 | ||
4087 | if defaults: | |
4088 | # set value when exists key-value in defaults dict | |
4089 | parser.set_defaults(**defaults) | |
4090 | ||
4091 | return parser | |
4092 | ||
4093 | ||
4094 | def _split_comma_separated(string): | |
4095 | """Return a set of strings.""" | |
4096 | return {text.strip() for text in string.split(',') if text.strip()} | |
4097 | ||
4098 | ||
4099 | def decode_filename(filename): | |
4100 | """Return Unicode filename.""" | |
4101 | if isinstance(filename, str): | |
4102 | return filename | |
4103 | ||
4104 | return filename.decode(sys.getfilesystemencoding()) | |
4105 | ||
4106 | ||
4107 | def supported_fixes(): | |
4108 | """Yield pep8 error codes that autopep8 fixes. | |
4109 | ||
4110 | Each item we yield is a tuple of the code followed by its | |
4111 | description. | |
4112 | ||
4113 | """ | |
4114 | yield ('E101', docstring_summary(reindent.__doc__)) | |
4115 | ||
4116 | instance = FixPEP8(filename=None, options=None, contents='') | |
4117 | for attribute in dir(instance): | |
4118 | code = re.match('fix_([ew][0-9][0-9][0-9])', attribute) | |
4119 | if code: | |
4120 | yield ( | |
4121 | code.group(1).upper(), | |
4122 | re.sub(r'\s+', ' ', | |
4123 | docstring_summary(getattr(instance, attribute).__doc__)) | |
4124 | ) | |
4125 | ||
4126 | for (code, function) in sorted(global_fixes()): | |
4127 | yield (code.upper() + (4 - len(code)) * ' ', | |
4128 | re.sub(r'\s+', ' ', docstring_summary(function.__doc__))) | |
4129 | ||
4130 | for code in sorted(CODE_TO_2TO3): | |
4131 | yield (code.upper() + (4 - len(code)) * ' ', | |
4132 | re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__))) | |
4133 | ||
4134 | ||
4135 | def docstring_summary(docstring): | |
4136 | """Return summary of docstring.""" | |
4137 | return docstring.split('\n')[0] if docstring else '' | |
4138 | ||
4139 | ||
4140 | def line_shortening_rank(candidate, indent_word, max_line_length, | |
4141 | experimental=False): | |
4142 | """Return rank of candidate. | |
4143 | ||
4144 | This is for sorting candidates. | |
4145 | ||
4146 | """ | |
4147 | if not candidate.strip(): | |
4148 | return 0 | |
4149 | ||
4150 | rank = 0 | |
4151 | lines = candidate.rstrip().split('\n') | |
4152 | ||
4153 | offset = 0 | |
4154 | if ( | |
4155 | not lines[0].lstrip().startswith('#') and | |
4156 | lines[0].rstrip()[-1] not in '([{' | |
4157 | ): | |
4158 | for (opening, closing) in ('()', '[]', '{}'): | |
4159 | # Don't penalize empty containers that aren't split up. Things like | |
4160 | # this "foo(\n )" aren't particularly good. | |
4161 | opening_loc = lines[0].find(opening) | |
4162 | closing_loc = lines[0].find(closing) | |
4163 | if opening_loc >= 0: | |
4164 | if closing_loc < 0 or closing_loc != opening_loc + 1: | |
4165 | offset = max(offset, 1 + opening_loc) | |
4166 | ||
4167 | current_longest = max(offset + len(x.strip()) for x in lines) | |
4168 | ||
4169 | rank += 4 * max(0, current_longest - max_line_length) | |
4170 | ||
4171 | rank += len(lines) | |
4172 | ||
4173 | # Too much variation in line length is ugly. | |
4174 | rank += 2 * standard_deviation(len(line) for line in lines) | |
4175 | ||
4176 | bad_staring_symbol = { | |
4177 | '(': ')', | |
4178 | '[': ']', | |
4179 | '{': '}'}.get(lines[0][-1]) | |
4180 | ||
4181 | if len(lines) > 1: | |
4182 | if ( | |
4183 | bad_staring_symbol and | |
4184 | lines[1].lstrip().startswith(bad_staring_symbol) | |
4185 | ): | |
4186 | rank += 20 | |
4187 | ||
4188 | for lineno, current_line in enumerate(lines): | |
4189 | current_line = current_line.strip() | |
4190 | ||
4191 | if current_line.startswith('#'): | |
4192 | continue | |
4193 | ||
4194 | for bad_start in ['.', '%', '+', '-', '/']: | |
4195 | if current_line.startswith(bad_start): | |
4196 | rank += 100 | |
4197 | ||
4198 | # Do not tolerate operators on their own line. | |
4199 | if current_line == bad_start: | |
4200 | rank += 1000 | |
4201 | ||
4202 | if ( | |
4203 | current_line.endswith(('.', '%', '+', '-', '/')) and | |
4204 | "': " in current_line | |
4205 | ): | |
4206 | rank += 1000 | |
4207 | ||
4208 | if current_line.endswith(('(', '[', '{', '.')): | |
4209 | # Avoid lonely opening. They result in longer lines. | |
4210 | if len(current_line) <= len(indent_word): | |
4211 | rank += 100 | |
4212 | ||
4213 | # Avoid the ugliness of ", (\n". | |
4214 | if ( | |
4215 | current_line.endswith('(') and | |
4216 | current_line[:-1].rstrip().endswith(',') | |
4217 | ): | |
4218 | rank += 100 | |
4219 | ||
4220 | # Avoid the ugliness of "something[\n" and something[index][\n. | |
4221 | if ( | |
4222 | current_line.endswith('[') and | |
4223 | len(current_line) > 1 and | |
4224 | (current_line[-2].isalnum() or current_line[-2] in ']') | |
4225 | ): | |
4226 | rank += 300 | |
4227 | ||
4228 | # Also avoid the ugliness of "foo.\nbar" | |
4229 | if current_line.endswith('.'): | |
4230 | rank += 100 | |
4231 | ||
4232 | if has_arithmetic_operator(current_line): | |
4233 | rank += 100 | |
4234 | ||
4235 | # Avoid breaking at unary operators. | |
4236 | if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')): | |
4237 | rank += 1000 | |
4238 | ||
4239 | if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')): | |
4240 | rank += 1000 | |
4241 | ||
4242 | if current_line.endswith(('%', '(', '[', '{')): | |
4243 | rank -= 20 | |
4244 | ||
4245 | # Try to break list comprehensions at the "for". | |
4246 | if current_line.startswith('for '): | |
4247 | rank -= 50 | |
4248 | ||
4249 | if current_line.endswith('\\'): | |
4250 | # If a line ends in \-newline, it may be part of a | |
4251 | # multiline string. In that case, we would like to know | |
4252 | # how long that line is without the \-newline. If it's | |
4253 | # longer than the maximum, or has comments, then we assume | |
4254 | # that the \-newline is an okay candidate and only | |
4255 | # penalize it a bit. | |
4256 | total_len = len(current_line) | |
4257 | lineno += 1 | |
4258 | while lineno < len(lines): | |
4259 | total_len += len(lines[lineno]) | |
4260 | ||
4261 | if lines[lineno].lstrip().startswith('#'): | |
4262 | total_len = max_line_length | |
4263 | break | |
4264 | ||
4265 | if not lines[lineno].endswith('\\'): | |
4266 | break | |
4267 | ||
4268 | lineno += 1 | |
4269 | ||
4270 | if total_len < max_line_length: | |
4271 | rank += 10 | |
4272 | else: | |
4273 | rank += 100 if experimental else 1 | |
4274 | ||
4275 | # Prefer breaking at commas rather than colon. | |
4276 | if ',' in current_line and current_line.endswith(':'): | |
4277 | rank += 10 | |
4278 | ||
4279 | # Avoid splitting dictionaries between key and value. | |
4280 | if current_line.endswith(':'): | |
4281 | rank += 100 | |
4282 | ||
4283 | rank += 10 * count_unbalanced_brackets(current_line) | |
4284 | ||
4285 | return max(0, rank) | |
4286 | ||
4287 | ||
4288 | def standard_deviation(numbers): | |
4289 | """Return standard deviation.""" | |
4290 | numbers = list(numbers) | |
4291 | if not numbers: | |
4292 | return 0 | |
4293 | mean = sum(numbers) / len(numbers) | |
4294 | return (sum((n - mean) ** 2 for n in numbers) / | |
4295 | len(numbers)) ** .5 | |
4296 | ||
4297 | ||
4298 | def has_arithmetic_operator(line): | |
4299 | """Return True if line contains any arithmetic operators.""" | |
4300 | for operator in pycodestyle.ARITHMETIC_OP: | |
4301 | if operator in line: | |
4302 | return True | |
4303 | ||
4304 | return False | |
4305 | ||
4306 | ||
4307 | def count_unbalanced_brackets(line): | |
4308 | """Return number of unmatched open/close brackets.""" | |
4309 | count = 0 | |
4310 | for opening, closing in ['()', '[]', '{}']: | |
4311 | count += abs(line.count(opening) - line.count(closing)) | |
4312 | ||
4313 | return count | |
4314 | ||
4315 | ||
4316 | def split_at_offsets(line, offsets): | |
4317 | """Split line at offsets. | |
4318 | ||
4319 | Return list of strings. | |
4320 | ||
4321 | """ | |
4322 | result = [] | |
4323 | ||
4324 | previous_offset = 0 | |
4325 | current_offset = 0 | |
4326 | for current_offset in sorted(offsets): | |
4327 | if current_offset < len(line) and previous_offset != current_offset: | |
4328 | result.append(line[previous_offset:current_offset].strip()) | |
4329 | previous_offset = current_offset | |
4330 | ||
4331 | result.append(line[current_offset:]) | |
4332 | ||
4333 | return result | |
4334 | ||
4335 | ||
4336 | class LineEndingWrapper(object): | |
4337 | ||
4338 | r"""Replace line endings to work with sys.stdout. | |
4339 | ||
4340 | It seems that sys.stdout expects only '\n' as the line ending, no matter | |
4341 | the platform. Otherwise, we get repeated line endings. | |
4342 | ||
4343 | """ | |
4344 | ||
4345 | def __init__(self, output): | |
4346 | self.__output = output | |
4347 | ||
4348 | def write(self, s): | |
4349 | self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n')) | |
4350 | ||
4351 | def flush(self): | |
4352 | self.__output.flush() | |
4353 | ||
4354 | ||
4355 | def match_file(filename, exclude): | |
4356 | """Return True if file is okay for modifying/recursing.""" | |
4357 | base_name = os.path.basename(filename) | |
4358 | ||
4359 | if base_name.startswith('.'): | |
4360 | return False | |
4361 | ||
4362 | for pattern in exclude: | |
4363 | if fnmatch.fnmatch(base_name, pattern): | |
4364 | return False | |
4365 | if fnmatch.fnmatch(filename, pattern): | |
4366 | return False | |
4367 | ||
4368 | if not os.path.isdir(filename) and not is_python_file(filename): | |
4369 | return False | |
4370 | ||
4371 | return True | |
4372 | ||
4373 | ||
4374 | def find_files(filenames, recursive, exclude): | |
4375 | """Yield filenames.""" | |
4376 | while filenames: | |
4377 | name = filenames.pop(0) | |
4378 | if recursive and os.path.isdir(name): | |
4379 | for root, directories, children in os.walk(name): | |
4380 | filenames += [os.path.join(root, f) for f in children | |
4381 | if match_file(os.path.join(root, f), | |
4382 | exclude)] | |
4383 | directories[:] = [d for d in directories | |
4384 | if match_file(os.path.join(root, d), | |
4385 | exclude)] | |
4386 | else: | |
4387 | is_exclude_match = False | |
4388 | for pattern in exclude: | |
4389 | if fnmatch.fnmatch(name, pattern): | |
4390 | is_exclude_match = True | |
4391 | break | |
4392 | if not is_exclude_match: | |
4393 | yield name | |
4394 | ||
4395 | ||
4396 | def _fix_file(parameters): | |
4397 | """Helper function for optionally running fix_file() in parallel.""" | |
4398 | if parameters[1].verbose: | |
4399 | print('[file:{}]'.format(parameters[0]), file=sys.stderr) | |
4400 | try: | |
4401 | return fix_file(*parameters) | |
4402 | except IOError as error: | |
4403 | print(str(error), file=sys.stderr) | |
4404 | raise error | |
4405 | ||
4406 | ||
4407 | def fix_multiple_files(filenames, options, output=None): | |
4408 | """Fix list of files. | |
4409 | ||
4410 | Optionally fix files recursively. | |
4411 | ||
4412 | """ | |
4413 | results = [] | |
4414 | filenames = find_files(filenames, options.recursive, options.exclude) | |
4415 | if options.jobs > 1: | |
4416 | import multiprocessing | |
4417 | pool = multiprocessing.Pool(options.jobs) | |
4418 | rets = [] | |
4419 | for name in filenames: | |
4420 | ret = pool.apply_async(_fix_file, ((name, options),)) | |
4421 | rets.append(ret) | |
4422 | pool.close() | |
4423 | pool.join() | |
4424 | if options.diff: | |
4425 | for r in rets: | |
4426 | sys.stdout.write(r.get().decode()) | |
4427 | sys.stdout.flush() | |
4428 | results.extend([x.get() for x in rets if x is not None]) | |
4429 | else: | |
4430 | for name in filenames: | |
4431 | ret = _fix_file((name, options, output)) | |
4432 | if ret is None: | |
4433 | continue | |
4434 | if options.diff: | |
4435 | if ret != '': | |
4436 | results.append(ret) | |
4437 | elif options.in_place: | |
4438 | results.append(ret) | |
4439 | else: | |
4440 | original_source = readlines_from_file(name) | |
4441 | if "".join(original_source).splitlines() != ret.splitlines(): | |
4442 | results.append(ret) | |
4443 | return results | |
4444 | ||
4445 | ||
4446 | def is_python_file(filename): | |
4447 | """Return True if filename is Python file.""" | |
4448 | if filename.endswith('.py'): | |
4449 | return True | |
4450 | ||
4451 | try: | |
4452 | with open_with_encoding( | |
4453 | filename, | |
4454 | limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f: | |
4455 | text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES) | |
4456 | if not text: | |
4457 | return False | |
4458 | first_line = text.splitlines()[0] | |
4459 | except (IOError, IndexError): | |
4460 | return False | |
4461 | ||
4462 | if not PYTHON_SHEBANG_REGEX.match(first_line): | |
4463 | return False | |
4464 | ||
4465 | return True | |
4466 | ||
4467 | ||
4468 | def is_probably_part_of_multiline(line): | |
4469 | """Return True if line is likely part of a multiline string. | |
4470 | ||
4471 | When multiline strings are involved, pep8 reports the error as being | |
4472 | at the start of the multiline string, which doesn't work for us. | |
4473 | ||
4474 | """ | |
4475 | return ( | |
4476 | '"""' in line or | |
4477 | "'''" in line or | |
4478 | line.rstrip().endswith('\\') | |
4479 | ) | |
4480 | ||
4481 | ||
4482 | def wrap_output(output, encoding): | |
4483 | """Return output with specified encoding.""" | |
4484 | return codecs.getwriter(encoding)(output.buffer | |
4485 | if hasattr(output, 'buffer') | |
4486 | else output) | |
4487 | ||
4488 | ||
4489 | def get_encoding(): | |
4490 | """Return preferred encoding.""" | |
4491 | return locale.getpreferredencoding() or sys.getdefaultencoding() | |
4492 | ||
4493 | ||
4494 | def main(argv=None, apply_config=True): | |
4495 | """Command-line entry.""" | |
4496 | if argv is None: | |
4497 | argv = sys.argv | |
4498 | ||
4499 | try: | |
4500 | # Exit on broken pipe. | |
4501 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) | |
4502 | except AttributeError: # pragma: no cover | |
4503 | # SIGPIPE is not available on Windows. | |
4504 | pass | |
4505 | ||
4506 | try: | |
4507 | args = parse_args(argv[1:], apply_config=apply_config) | |
4508 | ||
4509 | if args.list_fixes: | |
4510 | for code, description in sorted(supported_fixes()): | |
4511 | print('{code} - {description}'.format( | |
4512 | code=code, description=description)) | |
4513 | return EXIT_CODE_OK | |
4514 | ||
4515 | if args.files == ['-']: | |
4516 | assert not args.in_place | |
4517 | ||
4518 | encoding = sys.stdin.encoding or get_encoding() | |
4519 | read_stdin = sys.stdin.read() | |
4520 | fixed_stdin = fix_code(read_stdin, args, encoding=encoding) | |
4521 | ||
4522 | # LineEndingWrapper is unnecessary here due to the symmetry between | |
4523 | # standard in and standard out. | |
4524 | wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin) | |
4525 | ||
4526 | if hash(read_stdin) != hash(fixed_stdin): | |
4527 | if args.exit_code: | |
4528 | return EXIT_CODE_EXISTS_DIFF | |
4529 | else: | |
4530 | if args.in_place or args.diff: | |
4531 | args.files = list(set(args.files)) | |
4532 | else: | |
4533 | assert len(args.files) == 1 | |
4534 | assert not args.recursive | |
4535 | ||
4536 | results = fix_multiple_files(args.files, args, sys.stdout) | |
4537 | if args.diff: | |
4538 | ret = any([len(ret) != 0 for ret in results]) | |
4539 | else: | |
4540 | # with in-place option | |
4541 | ret = any([ret is not None for ret in results]) | |
4542 | if args.exit_code and ret: | |
4543 | return EXIT_CODE_EXISTS_DIFF | |
4544 | except IOError: | |
4545 | return EXIT_CODE_ERROR | |
4546 | except KeyboardInterrupt: | |
4547 | return EXIT_CODE_ERROR # pragma: no cover | |
4548 | ||
4549 | ||
4550 | class CachedTokenizer(object): | |
4551 | ||
4552 | """A one-element cache around tokenize.generate_tokens(). | |
4553 | ||
4554 | Original code written by Ned Batchelder, in coverage.py. | |
4555 | ||
4556 | """ | |
4557 | ||
4558 | def __init__(self): | |
4559 | self.last_text = None | |
4560 | self.last_tokens = None | |
4561 | ||
4562 | def generate_tokens(self, text): | |
4563 | """A stand-in for tokenize.generate_tokens().""" | |
4564 | if text != self.last_text: | |
4565 | string_io = io.StringIO(text) | |
4566 | self.last_tokens = list( | |
4567 | tokenize.generate_tokens(string_io.readline) | |
4568 | ) | |
4569 | self.last_text = text | |
4570 | return self.last_tokens | |
4571 | ||
4572 | ||
4573 | _cached_tokenizer = CachedTokenizer() | |
4574 | generate_tokens = _cached_tokenizer.generate_tokens | |
4575 | ||
4576 | ||
4577 | if __name__ == '__main__': | |
4578 | sys.exit(main()) |