3 # Copyright (C) 2010-2011 Hideo Hattori
4 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
5 # Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
7 # Permission is hereby granted, free of charge, to any person obtaining
8 # a copy of this software and associated documentation files (the
9 # "Software"), to deal in the Software without restriction, including
10 # without limitation the rights to use, copy, modify, merge, publish,
11 # distribute, sublicense, and/or sell copies of the Software, and to
12 # permit persons to whom the Software is furnished to do so, subject to
13 # the following conditions:
15 # The above copyright notice and this permission notice shall be
16 # included in all copies or substantial portions of the Software.
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
28 # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
30 # Permission is hereby granted, free of charge, to any person
31 # obtaining a copy of this software and associated documentation files
32 # (the "Software"), to deal in the Software without restriction,
33 # including without limitation the rights to use, copy, modify, merge,
34 # publish, distribute, sublicense, and/or sell copies of the Software,
35 # and to permit persons to whom the Software is furnished to do so,
36 # subject to the following conditions:
38 # The above copyright notice and this permission notice shall be
39 # included in all copies or substantial portions of the Software.
41 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
45 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
50 """Automatically formats Python code to conform to the PEP 8 style guide.
52 Fixes that only need be done once can be added by adding a function of the form
53 "fix_<code>(source)" to this module. They should return the fixed source code.
54 These fixes are picked up by apply_global_fixes().
56 Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
57 class documentation for more information.
61 from __future__
import absolute_import
62 from __future__
import division
63 from __future__
import print_function
64 from __future__
import unicode_literals
86 from configparser
import ConfigParser
as SafeConfigParser
, Error
89 from pycodestyle
import STARTSWITH_INDENT_STATEMENT_REGEX
100 PYTHON_SHEBANG_REGEX
= re
.compile(r
'^#!.*\bpython[23]?\b\s*$')
101 LAMBDA_REGEX
= re
.compile(r
'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
102 COMPARE_NEGATIVE_REGEX
= re
.compile(r
'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
103 COMPARE_NEGATIVE_REGEX_THROUGH
= re
.compile(r
'\b(not\s+in|is\s+not)\s')
104 BARE_EXCEPT_REGEX
= re
.compile(r
'except\s*:')
105 STARTSWITH_DEF_REGEX
= re
.compile(r
'^(async\s+def|def)\s.*\):')
106 DOCSTRING_START_REGEX
= re
.compile(r
'^u?r?(?P<kind>["\']{3}
)')
107 ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
108 DISABLE_REGEX
= re
.compile(r
'# *(fmt|autopep8): *off')
112 EXIT_CODE_EXISTS_DIFF
= 2
113 EXIT_CODE_ARGPARSE_ERROR
= 99
115 # For generating line shortening candidates.
116 SHORTEN_OPERATOR_GROUPS
= frozenset([
119 frozenset([',', '(', '[', '{']),
120 frozenset(['%', '(', '[', '{']),
121 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
122 frozenset(['%', '+', '-', '*', '/', '//']),
126 DEFAULT_IGNORE
= 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
127 DEFAULT_INDENT_SIZE
= 4
128 # these fixes conflict with each other, if the `--ignore` setting causes both
129 # to be enabled, disable both of them
130 CONFLICTING_CODES
= ('W503', 'W504')
132 # W602 is handled separately due to the need to avoid "with_traceback".
134 'E231': ['ws_comma'],
151 if sys
.platform
== 'win32': # pragma: no cover
152 DEFAULT_CONFIG
= os
.path
.expanduser(r
'~\.pycodestyle')
154 DEFAULT_CONFIG
= os
.path
.join(os
.getenv('XDG_CONFIG_HOME') or
155 os
.path
.expanduser('~/.config'),
157 # fallback, use .pep8
158 if not os
.path
.exists(DEFAULT_CONFIG
): # pragma: no cover
159 if sys
.platform
== 'win32':
160 DEFAULT_CONFIG
= os
.path
.expanduser(r
'~\.pep8')
162 DEFAULT_CONFIG
= os
.path
.join(os
.path
.expanduser('~/.config'), 'pep8')
163 PROJECT_CONFIG
= ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
166 MAX_PYTHON_FILE_DETECTION_BYTES
= 1024
169 def open_with_encoding(filename
, mode
='r', encoding
=None, limit_byte_check
=-1):
170 """Return opened file with a specific encoding."""
172 encoding
= detect_encoding(filename
, limit_byte_check
=limit_byte_check
)
174 return io
.open(filename
, mode
=mode
, encoding
=encoding
,
175 newline
='') # Preserve line endings
178 def detect_encoding(filename
, limit_byte_check
=-1):
179 """Return file encoding."""
181 with
open(filename
, 'rb') as input_file
:
182 from lib2to3
.pgen2
import tokenize
as lib2to3_tokenize
183 encoding
= lib2to3_tokenize
.detect_encoding(input_file
.readline
)[0]
185 with
open_with_encoding(filename
, encoding
=encoding
) as test_file
:
186 test_file
.read(limit_byte_check
)
189 except (LookupError, SyntaxError, UnicodeDecodeError):
193 def readlines_from_file(filename
):
194 """Return contents of file."""
195 with
open_with_encoding(filename
) as input_file
:
196 return input_file
.readlines()
199 def extended_blank_lines(logical_line
,
204 """Check for missing blank lines after class declaration."""
205 if previous_logical
.startswith('def '):
206 if blank_lines
and pycodestyle
.DOCSTRING_REGEX
.match(logical_line
):
207 yield (0, 'E303 too many blank lines ({})'.format(blank_lines
))
208 elif pycodestyle
.DOCSTRING_REGEX
.match(previous_logical
):
209 # Missing blank line between class docstring and method declaration.
214 logical_line
.startswith(('def ')) and
215 '(self' in logical_line
217 yield (0, 'E301 expected 1 blank line, found 0')
220 pycodestyle
.register_check(extended_blank_lines
)
223 def continued_indentation(logical_line
, tokens
, indent_level
, hang_closing
,
225 """Override pycodestyle's function to provide indentation information."""
226 first_row
= tokens
[0][2][0]
227 nrows
= 1 + tokens
[-1][2][0] - first_row
228 if noqa
or nrows
== 1:
231 # indent_next tells us whether the next block is indented. Assuming
232 # that it is indented by 4 spaces, then we should not allow 4-space
233 # indents on the final continuation line. In turn, some other
234 # indents are allowed to have an extra 4 spaces.
235 indent_next
= logical_line
.endswith(':')
239 (DEFAULT_INDENT_SIZE
,)
240 if indent_char
!= '\t' else (DEFAULT_INDENT_SIZE
,
241 2 * DEFAULT_INDENT_SIZE
)
244 # Remember how many brackets were opened on each line.
247 # Relative indents of physical lines.
248 rel_indent
= [0] * nrows
250 # For each depth, collect a list of opening rows.
252 # For each depth, memorize the hanging indentation.
257 last_indent
= tokens
[0][2]
258 indent
= [last_indent
[1]]
260 last_token_multiline
= None
263 last_line_begins_with_multiline
= False
264 for token_type
, text
, start
, end
, line
in tokens
:
266 newline
= row
< start
[0] - first_row
268 row
= start
[0] - first_row
269 newline
= (not last_token_multiline
and
270 token_type
not in (tokenize
.NL
, tokenize
.NEWLINE
))
271 last_line_begins_with_multiline
= last_token_multiline
274 # This is the beginning of a continuation line.
277 # Record the initial indent.
278 rel_indent
[row
] = pycodestyle
.expand_indent(line
) - indent_level
280 # Identify closing bracket.
281 close_bracket
= (token_type
== tokenize
.OP
and text
in ']})')
283 # Is the indent relative to an opening bracket line?
284 for open_row
in reversed(open_rows
[depth
]):
285 hang
= rel_indent
[row
] - rel_indent
[open_row
]
286 hanging_indent
= hang
in valid_hangs
290 hanging_indent
= (hang
== hangs
[depth
])
292 visual_indent
= (not close_bracket
and hang
> 0 and
293 indent_chances
.get(start
[1]))
295 if close_bracket
and indent
[depth
]:
296 # Closing bracket for visual indent.
297 if start
[1] != indent
[depth
]:
298 yield (start
, 'E124 {}'.format(indent
[depth
]))
299 elif close_bracket
and not hang
:
300 # closing bracket matches indentation of opening bracket's line
302 yield (start
, 'E133 {}'.format(indent
[depth
]))
303 elif indent
[depth
] and start
[1] < indent
[depth
]:
304 if visual_indent
is not True:
305 # Visual indent is broken.
306 yield (start
, 'E128 {}'.format(indent
[depth
]))
307 elif (hanging_indent
or
309 rel_indent
[row
] == 2 * DEFAULT_INDENT_SIZE
)):
310 # Hanging indent is verified.
311 if close_bracket
and not hang_closing
:
312 yield (start
, 'E123 {}'.format(indent_level
+
313 rel_indent
[open_row
]))
315 elif visual_indent
is True:
316 # Visual indent is verified.
317 indent
[depth
] = start
[1]
318 elif visual_indent
in (text
, str):
319 # Ignore token lined up with matching one from a previous line.
322 one_indented
= (indent_level
+ rel_indent
[open_row
] +
326 error
= ('E122', one_indented
)
328 error
= ('E127', indent
[depth
])
329 elif not close_bracket
and hangs
[depth
]:
330 error
= ('E131', one_indented
)
331 elif hang
> DEFAULT_INDENT_SIZE
:
332 error
= ('E126', one_indented
)
335 error
= ('E121', one_indented
)
337 yield (start
, '{} {}'.format(*error
))
339 # Look for visual indenting.
342 token_type
not in (tokenize
.NL
, tokenize
.COMMENT
) and
345 indent
[depth
] = start
[1]
346 indent_chances
[start
[1]] = True
347 # Deal with implicit string concatenation.
348 elif (token_type
in (tokenize
.STRING
, tokenize
.COMMENT
) or
349 text
in ('u', 'ur', 'b', 'br')):
350 indent_chances
[start
[1]] = str
351 # Special case for the "if" statement because len("if (") is equal to
353 elif not indent_chances
and not row
and not depth
and text
== 'if':
354 indent_chances
[end
[1] + 1] = True
355 elif text
== ':' and line
[end
[1]:].isspace():
356 open_rows
[depth
].append(row
)
358 # Keep track of bracket depth.
359 if token_type
== tokenize
.OP
:
364 if len(open_rows
) == depth
:
366 open_rows
[depth
].append(row
)
368 elif text
in ')]}' and depth
> 0:
369 # Parent indents should not be more than this one.
370 prev_indent
= indent
.pop() or last_indent
[1]
372 for d
in range(depth
):
373 if indent
[d
] > prev_indent
:
375 for ind
in list(indent_chances
):
376 if ind
>= prev_indent
:
377 del indent_chances
[ind
]
378 del open_rows
[depth
+ 1:]
381 indent_chances
[indent
[depth
]] = True
382 for idx
in range(row
, -1, -1):
386 assert len(indent
) == depth
+ 1
388 start
[1] not in indent_chances
and
389 # This is for purposes of speeding up E121 (GitHub #90).
390 not last_line
.rstrip().endswith(',')
392 # Allow to line up tokens.
393 indent_chances
[start
[1]] = text
395 last_token_multiline
= (start
[0] != end
[0])
396 if last_token_multiline
:
397 rel_indent
[end
[0] - first_row
] = rel_indent
[row
]
403 not last_line_begins_with_multiline
and
404 pycodestyle
.expand_indent(line
) == indent_level
+ DEFAULT_INDENT_SIZE
406 pos
= (start
[0], indent
[0] + 4)
407 desired_indent
= indent_level
+ 2 * DEFAULT_INDENT_SIZE
409 yield (pos
, 'E129 {}'.format(desired_indent
))
411 yield (pos
, 'E125 {}'.format(desired_indent
))
414 del pycodestyle
._checks
['logical_line'][pycodestyle
.continued_indentation
]
415 pycodestyle
.register_check(continued_indentation
)
418 class FixPEP8(object):
422 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
425 The fixer method can take either one or two arguments (in addition to
426 self). The first argument is "result", which is the error information from
427 pycodestyle. The second argument, "logical", is required only for
430 The fixer method can return the list of modified lines or None. An empty
431 list would mean that no changes were made. None would mean that only the
432 line reported in the pycodestyle error was modified. Note that the modified
433 line numbers that are returned are indexed at 1. This typically would
434 correspond with the line number reported in the pycodestyle error
438 - e111,e114,e115,e116
439 - e121,e122,e123,e124,e125,e126,e127,e128,e129
442 - e221,e222,e223,e224,e225
446 - e271,e272,e273,e274,e275
447 - e301,e302,e303,e304,e305,e306
450 - e701,e702,e703,e704
451 - e711,e712,e713,e714
459 def __init__(self
, filename
,
462 long_line_ignore_cache
=None):
463 self
.filename
= filename
465 self
.source
= readlines_from_file(filename
)
467 sio
= io
.StringIO(contents
)
468 self
.source
= sio
.readlines()
469 self
.options
= options
470 self
.indent_word
= _get_indentword(''.join(self
.source
))
472 # collect imports line
474 for i
, line
in enumerate(self
.source
):
475 if (line
.find("import ") == 0 or line
.find("from ") == 0) and \
476 line
not in self
.imports
:
477 # collect only import statements that first appeared
478 self
.imports
[line
] = i
480 self
.long_line_ignore_cache
= (
481 set() if long_line_ignore_cache
is None
482 else long_line_ignore_cache
)
484 # Many fixers are the same even though pycodestyle categorizes them
486 self
.fix_e115
= self
.fix_e112
487 self
.fix_e121
= self
._fix
_reindent
488 self
.fix_e122
= self
._fix
_reindent
489 self
.fix_e123
= self
._fix
_reindent
490 self
.fix_e124
= self
._fix
_reindent
491 self
.fix_e126
= self
._fix
_reindent
492 self
.fix_e127
= self
._fix
_reindent
493 self
.fix_e128
= self
._fix
_reindent
494 self
.fix_e129
= self
._fix
_reindent
495 self
.fix_e133
= self
.fix_e131
496 self
.fix_e202
= self
.fix_e201
497 self
.fix_e203
= self
.fix_e201
498 self
.fix_e211
= self
.fix_e201
499 self
.fix_e221
= self
.fix_e271
500 self
.fix_e222
= self
.fix_e271
501 self
.fix_e223
= self
.fix_e271
502 self
.fix_e226
= self
.fix_e225
503 self
.fix_e227
= self
.fix_e225
504 self
.fix_e228
= self
.fix_e225
505 self
.fix_e241
= self
.fix_e271
506 self
.fix_e242
= self
.fix_e224
507 self
.fix_e252
= self
.fix_e225
508 self
.fix_e261
= self
.fix_e262
509 self
.fix_e272
= self
.fix_e271
510 self
.fix_e273
= self
.fix_e271
511 self
.fix_e274
= self
.fix_e271
512 self
.fix_e275
= self
.fix_e271
513 self
.fix_e306
= self
.fix_e301
515 self
.fix_long_line_logically
if
516 options
and (options
.aggressive
>= 2 or options
.experimental
) else
517 self
.fix_long_line_physically
)
518 self
.fix_e703
= self
.fix_e702
519 self
.fix_w292
= self
.fix_w291
520 self
.fix_w293
= self
.fix_w291
522 def _fix_source(self
, results
):
524 (logical_start
, logical_end
) = _find_logical(self
.source
)
525 logical_support
= True
526 except (SyntaxError, tokenize
.TokenError
): # pragma: no cover
527 logical_support
= False
529 completed_lines
= set()
530 for result
in sorted(results
, key
=_priority_key
):
531 if result
['line'] in completed_lines
:
534 fixed_methodname
= 'fix_' + result
['id'].lower()
535 if hasattr(self
, fixed_methodname
):
536 fix
= getattr(self
, fixed_methodname
)
538 line_index
= result
['line'] - 1
539 original_line
= self
.source
[line_index
]
541 is_logical_fix
= len(_get_parameters(fix
)) > 2
545 logical
= _get_logical(self
.source
,
549 if logical
and set(range(
551 logical
[1][0] + 1)).intersection(
555 modified_lines
= fix(result
, logical
)
557 modified_lines
= fix(result
)
559 if modified_lines
is None:
560 # Force logical fixes to report what they modified.
561 assert not is_logical_fix
563 if self
.source
[line_index
] == original_line
:
567 completed_lines
.update(modified_lines
)
568 elif modified_lines
== []: # Empty list means no fix
569 if self
.options
.verbose
>= 2:
571 '---> Not fixing {error} on line {line}'.format(
572 error
=result
['id'], line
=result
['line']),
574 else: # We assume one-line fix when None.
575 completed_lines
.add(result
['line'])
577 if self
.options
.verbose
>= 3:
579 "---> '{}' is not defined.".format(fixed_methodname
),
582 info
= result
['info'].strip()
583 print('---> {}:{}:{}:{}'.format(self
.filename
,
590 """Return a version of the source code with PEP 8 violations fixed."""
592 'ignore': self
.options
.ignore
,
593 'select': self
.options
.select
,
594 'max_line_length': self
.options
.max_line_length
,
595 'hang_closing': self
.options
.hang_closing
,
597 results
= _execute_pep8(pep8_options
, self
.source
)
599 if self
.options
.verbose
:
602 if r
['id'] not in progress
:
603 progress
[r
['id']] = set()
604 progress
[r
['id']].add(r
['line'])
605 print('---> {n} issue(s) to fix {progress}'.format(
606 n
=len(results
), progress
=progress
), file=sys
.stderr
)
608 if self
.options
.line_range
:
609 start
, end
= self
.options
.line_range
610 results
= [r
for r
in results
611 if start
<= r
['line'] <= end
]
613 self
._fix
_source
(filter_results(source
=''.join(self
.source
),
615 aggressive
=self
.options
.aggressive
))
617 if self
.options
.line_range
:
618 # If number of lines has changed then change line_range.
619 count
= sum(sline
.count('\n')
620 for sline
in self
.source
[start
- 1:end
])
621 self
.options
.line_range
[1] = start
+ count
- 1
623 return ''.join(self
.source
)
625 def _fix_reindent(self
, result
):
626 """Fix a badly indented line.
628 This is done by adding or removing from its initial indent only.
631 num_indent_spaces
= int(result
['info'].split()[1])
632 line_index
= result
['line'] - 1
633 target
= self
.source
[line_index
]
635 self
.source
[line_index
] = ' ' * num_indent_spaces
+ target
.lstrip()
637 def fix_e112(self
, result
):
638 """Fix under-indented comments."""
639 line_index
= result
['line'] - 1
640 target
= self
.source
[line_index
]
642 if not target
.lstrip().startswith('#'):
643 # Don't screw with invalid syntax.
646 self
.source
[line_index
] = self
.indent_word
+ target
648 def fix_e113(self
, result
):
649 """Fix unexpected indentation."""
650 line_index
= result
['line'] - 1
651 target
= self
.source
[line_index
]
652 indent
= _get_indentation(target
)
653 stripped
= target
.lstrip()
654 self
.source
[line_index
] = indent
[1:] + stripped
656 def fix_e116(self
, result
):
657 """Fix over-indented comments."""
658 line_index
= result
['line'] - 1
659 target
= self
.source
[line_index
]
661 indent
= _get_indentation(target
)
662 stripped
= target
.lstrip()
664 if not stripped
.startswith('#'):
665 # Don't screw with invalid syntax.
668 self
.source
[line_index
] = indent
[1:] + stripped
670 def fix_e117(self
, result
):
671 """Fix over-indented."""
672 line_index
= result
['line'] - 1
673 target
= self
.source
[line_index
]
675 indent
= _get_indentation(target
)
679 stripped
= target
.lstrip()
681 self
.source
[line_index
] = indent
[1:] + stripped
683 def fix_e125(self
, result
):
684 """Fix indentation undistinguish from the next logical line."""
685 num_indent_spaces
= int(result
['info'].split()[1])
686 line_index
= result
['line'] - 1
687 target
= self
.source
[line_index
]
689 spaces_to_add
= num_indent_spaces
- len(_get_indentation(target
))
690 indent
= len(_get_indentation(target
))
693 while len(_get_indentation(self
.source
[line_index
])) >= indent
:
694 self
.source
[line_index
] = (' ' * spaces_to_add
+
695 self
.source
[line_index
])
696 modified_lines
.append(1 + line_index
) # Line indexed at 1.
699 return modified_lines
701 def fix_e131(self
, result
):
702 """Fix indentation undistinguish from the next logical line."""
703 num_indent_spaces
= int(result
['info'].split()[1])
704 line_index
= result
['line'] - 1
705 target
= self
.source
[line_index
]
707 spaces_to_add
= num_indent_spaces
- len(_get_indentation(target
))
709 indent_length
= len(_get_indentation(target
))
710 spaces_to_add
= num_indent_spaces
- indent_length
711 if num_indent_spaces
== 0 and indent_length
== 0:
714 if spaces_to_add
>= 0:
715 self
.source
[line_index
] = (' ' * spaces_to_add
+
716 self
.source
[line_index
])
718 offset
= abs(spaces_to_add
)
719 self
.source
[line_index
] = self
.source
[line_index
][offset
:]
721 def fix_e201(self
, result
):
722 """Remove extraneous whitespace."""
723 line_index
= result
['line'] - 1
724 target
= self
.source
[line_index
]
725 offset
= result
['column'] - 1
727 fixed
= fix_whitespace(target
,
731 self
.source
[line_index
] = fixed
733 def fix_e224(self
, result
):
734 """Remove extraneous whitespace around operator."""
735 target
= self
.source
[result
['line'] - 1]
736 offset
= result
['column'] - 1
737 fixed
= target
[:offset
] + target
[offset
:].replace('\t', ' ')
738 self
.source
[result
['line'] - 1] = fixed
740 def fix_e225(self
, result
):
741 """Fix missing whitespace around operator."""
742 target
= self
.source
[result
['line'] - 1]
743 offset
= result
['column'] - 1
744 fixed
= target
[:offset
] + ' ' + target
[offset
:]
746 # Only proceed if non-whitespace characters match.
747 # And make sure we don't break the indentation.
749 fixed
.replace(' ', '') == target
.replace(' ', '') and
750 _get_indentation(fixed
) == _get_indentation(target
)
752 self
.source
[result
['line'] - 1] = fixed
753 error_code
= result
.get('id', 0)
755 ts
= generate_tokens(fixed
)
756 except (SyntaxError, tokenize
.TokenError
):
758 if not check_syntax(fixed
.lstrip()):
761 _missing_whitespace
= (
762 pycodestyle
.missing_whitespace_around_operator
764 except AttributeError:
765 # pycodestyle >= 2.11.0
766 _missing_whitespace
= pycodestyle
.missing_whitespace
767 errors
= list(_missing_whitespace(fixed
, ts
))
768 for e
in reversed(errors
):
769 if error_code
!= e
[1].split()[0]:
772 fixed
= fixed
[:offset
] + ' ' + fixed
[offset
:]
773 self
.source
[result
['line'] - 1] = fixed
777 def fix_e231(self
, result
):
778 """Add missing whitespace."""
779 line_index
= result
['line'] - 1
780 target
= self
.source
[line_index
]
781 offset
= result
['column']
782 fixed
= target
[:offset
].rstrip() + ' ' + target
[offset
:].lstrip()
783 self
.source
[line_index
] = fixed
785 def fix_e251(self
, result
):
786 """Remove whitespace around parameter '=' sign."""
787 line_index
= result
['line'] - 1
788 target
= self
.source
[line_index
]
790 # This is necessary since pycodestyle sometimes reports columns that
791 # goes past the end of the physical line. This happens in cases like,
793 c
= min(result
['column'] - 1,
796 if target
[c
].strip():
799 fixed
= target
[:c
].rstrip() + target
[c
:].lstrip()
801 # There could be an escaped newline
805 if fixed
.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
806 self
.source
[line_index
] = fixed
.rstrip('\n\r \t\\')
807 self
.source
[line_index
+ 1] = self
.source
[line_index
+ 1].lstrip()
808 return [line_index
+ 1, line_index
+ 2] # Line indexed at 1
810 self
.source
[result
['line'] - 1] = fixed
812 def fix_e262(self
, result
):
813 """Fix spacing after inline comment hash."""
814 target
= self
.source
[result
['line'] - 1]
815 offset
= result
['column']
817 code
= target
[:offset
].rstrip(' \t#')
818 comment
= target
[offset
:].lstrip(' \t#')
820 fixed
= code
+ (' # ' + comment
if comment
.strip() else '\n')
822 self
.source
[result
['line'] - 1] = fixed
824 def fix_e265(self
, result
):
825 """Fix spacing after block comment hash."""
826 target
= self
.source
[result
['line'] - 1]
828 indent
= _get_indentation(target
)
829 line
= target
.lstrip(' \t')
830 pos
= next((index
for index
, c
in enumerate(line
) if c
!= '#'))
832 comment
= line
[pos
:].lstrip(' \t')
834 # Ignore special comments, even in the middle of the file.
835 if comment
.startswith('!'):
838 fixed
= indent
+ hashes
+ (' ' + comment
if comment
.strip() else '\n')
840 self
.source
[result
['line'] - 1] = fixed
842 def fix_e266(self
, result
):
843 """Fix too many block comment hashes."""
844 target
= self
.source
[result
['line'] - 1]
846 # Leave stylistic outlined blocks alone.
847 if target
.strip().endswith('#'):
850 indentation
= _get_indentation(target
)
851 fixed
= indentation
+ '# ' + target
.lstrip('# \t')
853 self
.source
[result
['line'] - 1] = fixed
855 def fix_e271(self
, result
):
856 """Fix extraneous whitespace around keywords."""
857 line_index
= result
['line'] - 1
858 target
= self
.source
[line_index
]
859 offset
= result
['column'] - 1
861 fixed
= fix_whitespace(target
,
868 self
.source
[line_index
] = fixed
870 def fix_e301(self
, result
):
871 """Add missing blank line."""
873 self
.source
[result
['line'] - 1] = cr
+ self
.source
[result
['line'] - 1]
875 def fix_e302(self
, result
):
876 """Add missing 2 blank lines."""
877 add_linenum
= 2 - int(result
['info'].split()[-1])
879 if self
.source
[result
['line'] - 2].strip() == "\\":
881 cr
= '\n' * add_linenum
882 self
.source
[result
['line'] - offset
] = (
883 cr
+ self
.source
[result
['line'] - offset
]
886 def fix_e303(self
, result
):
887 """Remove extra blank lines."""
888 delete_linenum
= int(result
['info'].split('(')[1].split(')')[0]) - 2
889 delete_linenum
= max(1, delete_linenum
)
891 # We need to count because pycodestyle reports an offset line number if
892 # there are comments.
894 line
= result
['line'] - 2
896 while cnt
< delete_linenum
and line
>= 0:
897 if not self
.source
[line
].strip():
898 self
.source
[line
] = ''
899 modified_lines
.append(1 + line
) # Line indexed at 1
903 return modified_lines
905 def fix_e304(self
, result
):
906 """Remove blank line following function decorator."""
907 line
= result
['line'] - 2
908 if not self
.source
[line
].strip():
909 self
.source
[line
] = ''
911 def fix_e305(self
, result
):
912 """Add missing 2 blank lines after end of function or class."""
913 add_delete_linenum
= 2 - int(result
['info'].split()[-1])
915 offset
= result
['line'] - 2
917 if add_delete_linenum
< 0:
919 add_delete_linenum
= abs(add_delete_linenum
)
920 while cnt
< add_delete_linenum
and offset
>= 0:
921 if not self
.source
[offset
].strip():
922 self
.source
[offset
] = ''
923 modified_lines
.append(1 + offset
) # Line indexed at 1
933 line
= self
.source
[offset
].lstrip()
940 self
.source
[offset
] = cr
+ self
.source
[offset
]
941 modified_lines
.append(1 + offset
) # Line indexed at 1.
942 return modified_lines
944 def fix_e401(self
, result
):
945 """Put imports on separate lines."""
946 line_index
= result
['line'] - 1
947 target
= self
.source
[line_index
]
948 offset
= result
['column'] - 1
950 if not target
.lstrip().startswith('import'):
953 indentation
= re
.split(pattern
=r
'\bimport\b',
954 string
=target
, maxsplit
=1)[0]
955 fixed
= (target
[:offset
].rstrip('\t ,') + '\n' +
956 indentation
+ 'import ' + target
[offset
:].lstrip('\t ,'))
957 self
.source
[line_index
] = fixed
959 def fix_e402(self
, result
):
960 (line_index
, offset
, target
) = get_index_offset_contents(result
,
962 for i
in range(1, 100):
963 line
= "".join(self
.source
[line_index
:line_index
+i
])
965 generate_tokens("".join(line
))
966 except (SyntaxError, tokenize
.TokenError
):
969 if not (target
in self
.imports
and self
.imports
[target
] != line_index
):
970 mod_offset
= get_module_imports_on_top_of_file(self
.source
,
972 self
.source
[mod_offset
] = line
+ self
.source
[mod_offset
]
973 for offset
in range(i
):
974 self
.source
[line_index
+offset
] = ''
976 def fix_long_line_logically(self
, result
, logical
):
977 """Try to make lines fit within --max-line-length characters."""
980 len(logical
[2]) == 1 or
981 self
.source
[result
['line'] - 1].lstrip().startswith('#')
983 return self
.fix_long_line_physically(result
)
985 start_line_index
= logical
[0][0]
986 end_line_index
= logical
[1][0]
987 logical_lines
= logical
[2]
989 previous_line
= get_item(self
.source
, start_line_index
- 1, default
='')
990 next_line
= get_item(self
.source
, end_line_index
+ 1, default
='')
992 single_line
= join_logical_line(''.join(logical_lines
))
995 fixed
= self
.fix_long_line(
997 previous_line
=previous_line
,
999 original
=''.join(logical_lines
))
1000 except (SyntaxError, tokenize
.TokenError
):
1001 return self
.fix_long_line_physically(result
)
1004 for line_index
in range(start_line_index
, end_line_index
+ 1):
1005 self
.source
[line_index
] = ''
1006 self
.source
[start_line_index
] = fixed
1007 return range(start_line_index
+ 1, end_line_index
+ 1)
1011 def fix_long_line_physically(self
, result
):
1012 """Try to make lines fit within --max-line-length characters."""
1013 line_index
= result
['line'] - 1
1014 target
= self
.source
[line_index
]
1016 previous_line
= get_item(self
.source
, line_index
- 1, default
='')
1017 next_line
= get_item(self
.source
, line_index
+ 1, default
='')
1020 fixed
= self
.fix_long_line(
1022 previous_line
=previous_line
,
1023 next_line
=next_line
,
1025 except (SyntaxError, tokenize
.TokenError
):
1029 self
.source
[line_index
] = fixed
1030 return [line_index
+ 1]
1034 def fix_long_line(self
, target
, previous_line
,
1035 next_line
, original
):
1036 cache_entry
= (target
, previous_line
, next_line
)
1037 if cache_entry
in self
.long_line_ignore_cache
:
1040 if target
.lstrip().startswith('#'):
1041 if self
.options
.aggressive
:
1042 # Wrap commented lines.
1043 return shorten_comment(
1045 max_line_length
=self
.options
.max_line_length
,
1046 last_comment
=not next_line
.lstrip().startswith('#'))
1049 fixed
= get_fixed_long_line(
1051 previous_line
=previous_line
,
1053 indent_word
=self
.indent_word
,
1054 max_line_length
=self
.options
.max_line_length
,
1055 aggressive
=self
.options
.aggressive
,
1056 experimental
=self
.options
.experimental
,
1057 verbose
=self
.options
.verbose
)
1059 if fixed
and not code_almost_equal(original
, fixed
):
1062 self
.long_line_ignore_cache
.add(cache_entry
)
1065 def fix_e502(self
, result
):
1066 """Remove extraneous escape of newline."""
1067 (line_index
, _
, target
) = get_index_offset_contents(result
,
1069 self
.source
[line_index
] = target
.rstrip('\n\r \t\\') + '\n'
1071 def fix_e701(self
, result
):
1072 """Put colon-separated compound statement on separate lines."""
1073 line_index
= result
['line'] - 1
1074 target
= self
.source
[line_index
]
1075 c
= result
['column']
1077 fixed_source
= (target
[:c
] + '\n' +
1078 _get_indentation(target
) + self
.indent_word
+
1079 target
[c
:].lstrip('\n\r \t\\'))
1080 self
.source
[result
['line'] - 1] = fixed_source
1081 return [result
['line'], result
['line'] + 1]
1083 def fix_e702(self
, result
, logical
):
1084 """Put semicolon-separated compound statement on separate lines."""
1086 return [] # pragma: no cover
1087 logical_lines
= logical
[2]
1089 # Avoid applying this when indented.
1090 # https://docs.python.org/reference/compound_stmts.html
1091 for line
in logical_lines
:
1092 if (result
['id'] == 'E702' and ':' in line
1093 and STARTSWITH_INDENT_STATEMENT_REGEX
.match(line
)):
1094 if self
.options
.verbose
:
1096 '---> avoid fixing {error} with '
1097 'other compound statements'.format(error
=result
['id']),
1102 line_index
= result
['line'] - 1
1103 target
= self
.source
[line_index
]
1105 if target
.rstrip().endswith('\\'):
1106 # Normalize '1; \\\n2' into '1; 2'.
1107 self
.source
[line_index
] = target
.rstrip('\n \r\t\\')
1108 self
.source
[line_index
+ 1] = self
.source
[line_index
+ 1].lstrip()
1109 return [line_index
+ 1, line_index
+ 2]
1111 if target
.rstrip().endswith(';'):
1112 self
.source
[line_index
] = target
.rstrip('\n \r\t;') + '\n'
1113 return [line_index
+ 1]
1115 offset
= result
['column'] - 1
1116 first
= target
[:offset
].rstrip(';').rstrip()
1117 second
= (_get_indentation(logical_lines
[0]) +
1118 target
[offset
:].lstrip(';').lstrip())
1120 # Find inline comment.
1121 inline_comment
= None
1122 if target
[offset
:].lstrip(';').lstrip()[:2] == '# ':
1123 inline_comment
= target
[offset
:].lstrip(';')
1126 self
.source
[line_index
] = first
+ inline_comment
1128 self
.source
[line_index
] = first
+ '\n' + second
1129 return [line_index
+ 1]
1131 def fix_e704(self
, result
):
1132 """Fix multiple statements on one line def"""
1133 (line_index
, _
, target
) = get_index_offset_contents(result
,
1135 match
= STARTSWITH_DEF_REGEX
.match(target
)
1137 self
.source
[line_index
] = '{}\n{}{}'.format(
1139 _get_indentation(target
) + self
.indent_word
,
1140 target
[match
.end(0):].lstrip())
1142 def fix_e711(self
, result
):
1143 """Fix comparison with None."""
1144 (line_index
, offset
, target
) = get_index_offset_contents(result
,
1147 right_offset
= offset
+ 2
1148 if right_offset
>= len(target
):
1151 left
= target
[:offset
].rstrip()
1152 center
= target
[offset
:right_offset
]
1153 right
= target
[right_offset
:].lstrip()
1155 if center
.strip() == '==':
1157 elif center
.strip() == '!=':
1158 new_center
= 'is not'
1162 self
.source
[line_index
] = ' '.join([left
, new_center
, right
])
1164 def fix_e712(self
, result
):
1165 """Fix (trivial case of) comparison with boolean."""
1166 (line_index
, offset
, target
) = get_index_offset_contents(result
,
1169 # Handle very easy "not" special cases.
1170 if re
.match(r
'^\s*if [\w."\'\
[\
]]+ == False:$
', target):
1171 self.source[line_index] = re.sub(r'if ([\w
."\'\[\]]+) == False:',
1172 r'if not \1:', target, count=1)
1173 elif re.match(r'^\s*if [\w."\'\
[\
]]+ != True:$
', target):
1174 self.source[line_index] = re.sub(r'if ([\w
."\'\[\]]+) != True:',
1175 r'if not \1:', target, count=1)
1177 right_offset = offset + 2
1178 if right_offset >= len(target):
1181 left = target[:offset].rstrip()
1182 center = target[offset:right_offset]
1183 right = target[right_offset:].lstrip()
1185 # Handle simple cases only.
1187 if center.strip() == '==':
1188 if re.match(r'\bTrue\b', right):
1189 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
1190 elif center.strip() == '!=':
1191 if re.match(r'\bFalse\b', right):
1192 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
1194 if new_right is None:
1197 if new_right[0].isalnum():
1198 new_right = ' ' + new_right
1200 self.source[line_index] = left + new_right
1202 def fix_e713(self, result):
1203 """Fix (trivial case of) non-membership check."""
1204 (line_index, offset, target) = get_index_offset_contents(result,
1207 # to convert once 'not in' -> 'in'
1208 before_target = target[:offset]
1209 target = target[offset:]
1210 match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1211 notin_pos_start, notin_pos_end = 0, 0
1213 notin_pos_start = match_notin.start(1)
1214 notin_pos_end = match_notin.end()
1215 target = '{}{} {}'.format(
1216 target[:notin_pos_start], 'in', target[notin_pos_end:])
1219 match = COMPARE_NEGATIVE_REGEX.search(target)
1221 if match.group(3) == 'in':
1222 pos_start = match.start(1)
1223 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1224 target[:pos_start], match.group(2), match.group(1),
1225 match.group(3), target[match.end():], before_target)
1227 # revert 'in' -> 'not in'
1228 pos_start = notin_pos_start + offset
1229 pos_end = notin_pos_end + offset - 4 # len('not ')
1230 new_target = '{}{} {}'.format(
1231 new_target[:pos_start], 'not in', new_target[pos_end:])
1232 self.source[line_index] = new_target
1234 def fix_e714(self, result):
1235 """Fix object identity should be 'is not' case."""
1236 (line_index, offset, target) = get_index_offset_contents(result,
1239 # to convert once 'is not' -> 'is'
1240 before_target = target[:offset]
1241 target = target[offset:]
1242 match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1243 isnot_pos_start, isnot_pos_end = 0, 0
1245 isnot_pos_start = match_isnot.start(1)
1246 isnot_pos_end = match_isnot.end()
1247 target = '{}{} {}'.format(
1248 target[:isnot_pos_start], 'in', target[isnot_pos_end:])
1250 match = COMPARE_NEGATIVE_REGEX.search(target)
1252 if match.group(3).startswith('is'):
1253 pos_start = match.start(1)
1254 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1255 target[:pos_start], match.group(2), match.group(3),
1256 match.group(1), target[match.end():], before_target)
1258 # revert 'is' -> 'is not'
1259 pos_start = isnot_pos_start + offset
1260 pos_end = isnot_pos_end + offset - 4 # len('not ')
1261 new_target = '{}{} {}'.format(
1262 new_target[:pos_start], 'is not', new_target[pos_end:])
1263 self.source[line_index] = new_target
1265 def fix_e722(self, result):
1266 """fix bare except"""
1267 (line_index, _, target) = get_index_offset_contents(result,
1269 match = BARE_EXCEPT_REGEX.search(target)
1271 self.source[line_index] = '{}{}{}'.format(
1272 target[:result['column'] - 1], "except BaseException
:",
1273 target[match.end():])
1275 def fix_e731(self, result):
1276 """Fix do not assign a lambda expression check."""
1277 (line_index, _, target) = get_index_offset_contents(result,
1279 match = LAMBDA_REGEX.search(target)
1282 self.source[line_index] = '{}def {}({}): return {}'.format(
1283 target[:match.start(0)], match.group(1), match.group(2),
1284 target[end:].lstrip())
1286 def fix_w291(self, result):
1287 """Remove trailing whitespace."""
1288 fixed_line = self.source[result['line'] - 1].rstrip()
1289 self.source[result['line'] - 1] = fixed_line + '\n'
1291 def fix_w391(self, _):
1292 """Remove trailing blank lines."""
1294 for line in reversed(self.source):
1295 line = line.rstrip()
1301 original_length = len(self.source)
1302 self.source = self.source[:original_length - blank_count]
1303 return range(1, 1 + original_length)
1305 def fix_w503(self, result):
1306 (line_index, _, target) = get_index_offset_contents(result,
1308 one_string_token = target.split()[0]
1310 ts = generate_tokens(one_string_token)
1311 except (SyntaxError, tokenize.TokenError):
1313 if not _is_binary_operator(ts[0][0], one_string_token):
1317 found_not_comment_only_line = False
1318 comment_only_linenum = 0
1320 # NOTE: try to parse code in 5 times
1321 if (line_index - i) < 0:
1323 from_index = line_index - i - 1
1324 if from_index < 0 or len(self.source) <= from_index:
1326 to_index = line_index + 1
1327 strip_line = self.source[from_index].lstrip()
1329 not found_not_comment_only_line and
1330 strip_line and strip_line[0] == '#'
1332 comment_only_linenum += 1
1334 found_not_comment_only_line = True
1336 ts = generate_tokens("".join(self.source[from_index:to_index]))
1337 except (SyntaxError, tokenize.TokenError):
1341 for index, t in enumerate(ts):
1342 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1343 newline_index.append(index)
1345 if newline_count > 2:
1346 tts = ts[newline_index[-3]:]
1351 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1353 if newline_count <= 1:
1355 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
1356 comment_index = old[3][1]
1360 i = target.index(one_string_token)
1361 fix_target_line = line_index - 1 - comment_only_linenum
1362 self.source[line_index] = '{}{}'.format(
1363 target[:i], target[i + len(one_string_token):].lstrip())
1364 nl = find_newline(self.source[fix_target_line:line_index])
1365 before_line = self.source[fix_target_line]
1366 bl = before_line.index(nl)
1368 self.source[fix_target_line] = '{} {} {}'.format(
1369 before_line[:comment_index], one_string_token,
1370 before_line[comment_index + 1:])
1372 if before_line[:bl].endswith("#"):
1374 # see: https://github.com/hhatto/autopep8/issues/503
1375 self
.source
[fix_target_line
] = '{}{} {}'.format(
1376 before_line
[:bl
-2], one_string_token
, before_line
[bl
-2:])
1378 self
.source
[fix_target_line
] = '{} {}{}'.format(
1379 before_line
[:bl
], one_string_token
, before_line
[bl
:])
1381 def fix_w504(self
, result
):
1382 (line_index
, _
, target
) = get_index_offset_contents(result
,
1384 # NOTE: is not collect pointed out in pycodestyle==2.4.0
1386 operator_position
= None # (start_position, end_position)
1387 for i
in range(1, 6):
1388 to_index
= line_index
+ i
1390 ts
= generate_tokens("".join(self
.source
[line_index
:to_index
]))
1391 except (SyntaxError, tokenize
.TokenError
):
1395 for index
, t
in enumerate(ts
):
1396 if _is_binary_operator(t
[0], t
[1]):
1397 if t
[2][0] == 1 and t
[3][0] == 1:
1398 operator_position
= (t
[2][1], t
[3][1])
1399 elif t
[0] == tokenize
.NAME
and t
[1] in ("and", "or"):
1400 if t
[2][0] == 1 and t
[3][0] == 1:
1401 operator_position
= (t
[2][1], t
[3][1])
1402 elif t
[0] in (tokenize
.NEWLINE
, tokenize
.NL
):
1403 newline_index
.append(index
)
1405 if newline_count
> 2:
1406 tts
= ts
[:newline_index
[-3]]
1411 if tokenize
.COMMENT
== t
[0] and old
:
1412 comment_row
, comment_index
= old
[3]
1416 if not operator_position
:
1418 target_operator
= target
[operator_position
[0]:operator_position
[1]]
1420 if comment_index
and comment_row
== 1:
1421 self
.source
[line_index
] = '{}{}'.format(
1422 target
[:operator_position
[0]].rstrip(),
1423 target
[comment_index
:])
1425 self
.source
[line_index
] = '{}{}{}'.format(
1426 target
[:operator_position
[0]].rstrip(),
1427 target
[operator_position
[1]:].lstrip(),
1428 target
[operator_position
[1]:])
1430 next_line
= self
.source
[line_index
+ 1]
1431 next_line_indent
= 0
1432 m
= re
.match(r
'\s*', next_line
)
1434 next_line_indent
= m
.span()[1]
1435 self
.source
[line_index
+ 1] = '{}{} {}'.format(
1436 next_line
[:next_line_indent
], target_operator
,
1437 next_line
[next_line_indent
:])
1439 def fix_w605(self
, result
):
1440 (line_index
, offset
, target
) = get_index_offset_contents(result
,
1442 self
.source
[line_index
] = '{}\\{}'.format(
1443 target
[:offset
+ 1], target
[offset
+ 1:])
1446 def get_module_imports_on_top_of_file(source
, import_line_index
):
1447 """return import or from keyword position
1455 def is_string_literal(line
):
1456 if line
[0] in 'uUbB':
1458 if line
and line
[0] in 'rR':
1460 return line
and (line
[0] == '"' or line
[0] == "'")
1462 def is_future_import(line
):
1463 nodes
= ast
.parse(line
)
1464 for n
in nodes
.body
:
1465 if isinstance(n
, ast
.ImportFrom
) and n
.module
== '__future__':
1469 def has_future_import(source
):
1472 for _
, next_line
in source
:
1473 for line_part
in next_line
.strip().splitlines(True):
1474 line
= line
+ line_part
1476 return is_future_import(line
), offset
1480 return False, offset
1482 allowed_try_keywords
= ('try', 'except', 'else', 'finally')
1483 in_docstring
= False
1484 docstring_kind
= '"""'
1485 source_stream
= iter(enumerate(source
))
1486 for cnt
, line
in source_stream
:
1487 if not in_docstring
:
1488 m
= DOCSTRING_START_REGEX
.match(line
.lstrip())
1491 docstring_kind
= m
.group('kind')
1492 remain
= line
[m
.end(): m
.endpos
].rstrip()
1493 if remain
[-3:] == docstring_kind
: # one line doc
1494 in_docstring
= False
1497 if line
.rstrip()[-3:] == docstring_kind
:
1498 in_docstring
= False
1501 if not line
.rstrip():
1503 elif line
.startswith('#'):
1506 if line
.startswith('import '):
1507 if cnt
== import_line_index
:
1510 elif line
.startswith('from '):
1511 if cnt
== import_line_index
:
1513 hit
, offset
= has_future_import(
1514 itertools
.chain([(cnt
, line
)], source_stream
)
1518 return cnt
+ offset
+ 1
1520 elif pycodestyle
.DUNDER_REGEX
.match(line
):
1522 elif any(line
.startswith(kw
) for kw
in allowed_try_keywords
):
1524 elif is_string_literal(line
):
1531 def get_index_offset_contents(result
, source
):
1532 """Return (line_index, column_offset, line_contents)."""
1533 line_index
= result
['line'] - 1
1535 result
['column'] - 1,
1539 def get_fixed_long_line(target
, previous_line
, original
,
1540 indent_word
=' ', max_line_length
=79,
1541 aggressive
=False, experimental
=False, verbose
=False):
1542 """Break up long line and return result.
1544 Do this by generating multiple reformatted candidates and then
1545 ranking the candidates to heuristically select the best option.
1548 indent
= _get_indentation(target
)
1549 source
= target
[len(indent
):]
1550 assert source
.lstrip() == source
1551 assert not target
.lstrip().startswith('#')
1553 # Check for partial multiline.
1554 tokens
= list(generate_tokens(source
))
1556 candidates
= shorten_line(
1557 tokens
, source
, indent
,
1560 aggressive
=aggressive
,
1561 experimental
=experimental
,
1562 previous_line
=previous_line
)
1564 # Also sort alphabetically as a tie breaker (for determinism).
1565 candidates
= sorted(
1566 sorted(set(candidates
).union([target
, original
])),
1567 key
=lambda x
: line_shortening_rank(
1571 experimental
=experimental
))
1574 print(('-' * 79 + '\n').join([''] + candidates
+ ['']),
1575 file=wrap_output(sys
.stderr
, 'utf-8'))
1578 best_candidate
= candidates
[0]
1580 # Don't allow things to get longer.
1581 if longest_line_length(best_candidate
) > longest_line_length(original
):
1584 return best_candidate
1587 def longest_line_length(code
):
1588 """Return length of longest line."""
1591 return max(len(line
) for line
in code
.splitlines())
1594 def join_logical_line(logical_line
):
1595 """Return single line based on logical line input."""
1596 indentation
= _get_indentation(logical_line
)
1598 return indentation
+ untokenize_without_newlines(
1599 generate_tokens(logical_line
.lstrip())) + '\n'
1602 def untokenize_without_newlines(tokens
):
1603 """Return source code based on tokens."""
1610 (start_row
, start_column
) = t
[2]
1611 (end_row
, end_column
) = t
[3]
1613 if start_row
> last_row
:
1616 (start_column
> last_column
or token_string
== '\n') and
1617 not text
.endswith(' ')
1621 if token_string
!= '\n':
1622 text
+= token_string
1625 last_column
= end_column
1627 return text
.rstrip()
1630 def _find_logical(source_lines
):
1631 # Make a variable which is the index of all the starts of lines.
1636 for t
in generate_tokens(''.join(source_lines
)):
1637 if t
[0] in [tokenize
.COMMENT
, tokenize
.DEDENT
,
1638 tokenize
.INDENT
, tokenize
.NL
,
1639 tokenize
.ENDMARKER
]:
1641 if not parens
and t
[0] in [tokenize
.NEWLINE
, tokenize
.SEMI
]:
1643 logical_end
.append((t
[3][0] - 1, t
[2][1]))
1645 if last_newline
and not parens
:
1646 logical_start
.append((t
[2][0] - 1, t
[2][1]))
1647 last_newline
= False
1648 if t
[0] == tokenize
.OP
:
1653 return (logical_start
, logical_end
)
1656 def _get_logical(source_lines
, result
, logical_start
, logical_end
):
1657 """Return the logical line corresponding to the result.
1659 Assumes input is already E702-clean.
1662 row
= result
['line'] - 1
1663 col
= result
['column'] - 1
1666 for i
in range(0, len(logical_start
), 1):
1669 if x
[0] > row
or (x
[0] == row
and x
[1] > col
):
1671 ls
= logical_start
[i
]
1675 original
= source_lines
[ls
[0]:le
[0] + 1]
1676 return ls
, le
, original
1679 def get_item(items
, index
, default
=None):
1680 if 0 <= index
< len(items
):
1686 def reindent(source
, indent_size
, leave_tabs
=False):
1687 """Reindent all lines."""
1688 reindenter
= Reindenter(source
, leave_tabs
)
1689 return reindenter
.run(indent_size
)
1692 def code_almost_equal(a
, b
):
1693 """Return True if code is similar.
1695 Ignore whitespace when comparing specific line.
1698 split_a
= split_and_strip_non_empty_lines(a
)
1699 split_b
= split_and_strip_non_empty_lines(b
)
1701 if len(split_a
) != len(split_b
):
1704 for (index
, _
) in enumerate(split_a
):
1705 if ''.join(split_a
[index
].split()) != ''.join(split_b
[index
].split()):
1711 def split_and_strip_non_empty_lines(text
):
1712 """Return lines split by newline.
1717 return [line
.strip() for line
in text
.splitlines() if line
.strip()]
1720 def refactor(source
, fixer_names
, ignore
=None, filename
=''):
1721 """Return refactored code using lib2to3.
1723 Skip if ignore string is produced in the refactored code.
1726 not_found_end_of_file_newline
= source
and source
.rstrip("\r\n") == source
1727 if not_found_end_of_file_newline
:
1728 input_source
= source
+ "\n"
1730 input_source
= source
1732 from lib2to3
import pgen2
1734 new_text
= refactor_with_2to3(input_source
,
1735 fixer_names
=fixer_names
,
1737 except (pgen2
.parse
.ParseError
,
1740 UnicodeEncodeError):
1744 if ignore
in new_text
and ignore
not in source
:
1747 if not_found_end_of_file_newline
:
1748 return new_text
.rstrip("\r\n")
1753 def code_to_2to3(select
, ignore
, where
='', verbose
=False):
1755 for code
, fix
in CODE_TO_2TO3
.items():
1756 if code_match(code
, select
=select
, ignore
=ignore
):
1758 print('---> Applying {} fix for {}'.format(where
,
1765 def fix_2to3(source
,
1766 aggressive
=True, select
=None, ignore
=None, filename
='',
1767 where
='global', verbose
=False):
1768 """Fix various deprecated code (via lib2to3)."""
1772 select
= select
or []
1773 ignore
= ignore
or []
1775 return refactor(source
,
1776 code_to_2to3(select
=select
,
1783 def find_newline(source
):
1784 """Return type of newline used in source.
1786 Input is a list of lines.
1789 assert not isinstance(source
, str)
1791 counter
= collections
.defaultdict(int)
1793 if line
.endswith(CRLF
):
1795 elif line
.endswith(CR
):
1797 elif line
.endswith(LF
):
1800 return (sorted(counter
, key
=counter
.get
, reverse
=True) or [LF
])[0]
1803 def _get_indentword(source
):
1804 """Return indentation type."""
1805 indent_word
= ' ' # Default in case source has no indentation
1807 for t
in generate_tokens(source
):
1808 if t
[0] == token
.INDENT
:
1811 except (SyntaxError, tokenize
.TokenError
):
1816 def _get_indentation(line
):
1817 """Return leading whitespace."""
1819 non_whitespace_index
= len(line
) - len(line
.lstrip())
1820 return line
[:non_whitespace_index
]
1825 def get_diff_text(old
, new
, filename
):
1826 """Return text of unified diff between old and new."""
1828 diff
= difflib
.unified_diff(
1830 'original/' + filename
,
1831 'fixed/' + filename
,
1838 # Work around missing newline (http://bugs.python.org/issue2142).
1839 if text
and not line
.endswith(newline
):
1840 text
+= newline
+ r
'\ No newline at end of file' + newline
1845 def _priority_key(pep8_result
):
1846 """Key for sorting PEP8 results.
1848 Global fixes should be done first. This is important for things like
1853 # Fix multiline colon-based before semicolon based.
1855 # Break multiline statements early.
1857 # Things that make lines longer.
1859 # Remove extraneous whitespace before breaking lines.
1861 # Shorten whitespace in comment before resorting to wrapping.
1864 middle_index
= 10000
1866 # We need to shorten lines last since the logical fixer can get in a
1867 # loop, which causes us to exit early.
1870 key
= pep8_result
['id'].lower()
1872 return priority
.index(key
)
1875 return middle_index
+ lowest_priority
.index(key
) + 1
1880 def shorten_line(tokens
, source
, indentation
, indent_word
, max_line_length
,
1881 aggressive
=False, experimental
=False, previous_line
=''):
1882 """Separate line at OPERATOR.
1884 Multiple candidates will be yielded.
1887 for candidate
in _shorten_line(tokens
=tokens
,
1889 indentation
=indentation
,
1890 indent_word
=indent_word
,
1891 aggressive
=aggressive
,
1892 previous_line
=previous_line
):
1896 for key_token_strings
in SHORTEN_OPERATOR_GROUPS
:
1897 shortened
= _shorten_line_at_tokens(
1900 indentation
=indentation
,
1901 indent_word
=indent_word
,
1902 key_token_strings
=key_token_strings
,
1903 aggressive
=aggressive
)
1905 if shortened
is not None and shortened
!= source
:
1909 for shortened
in _shorten_line_at_tokens_new(
1912 indentation
=indentation
,
1913 max_line_length
=max_line_length
):
1918 def _shorten_line(tokens
, source
, indentation
, indent_word
,
1919 aggressive
=False, previous_line
=''):
1920 """Separate line at OPERATOR.
1922 The input is expected to be free of newlines except for inside multiline
1923 strings and at the end.
1925 Multiple candidates will be yielded.
1931 end_offset
) in token_offsets(tokens
):
1934 token_type
== tokenize
.COMMENT
and
1935 not is_probably_part_of_multiline(previous_line
) and
1936 not is_probably_part_of_multiline(source
) and
1937 not source
[start_offset
+ 1:].strip().lower().startswith(
1938 ('noqa', 'pragma:', 'pylint:'))
1940 # Move inline comments to previous line.
1941 first
= source
[:start_offset
]
1942 second
= source
[start_offset
:]
1943 yield (indentation
+ second
.strip() + '\n' +
1944 indentation
+ first
.strip() + '\n')
1945 elif token_type
== token
.OP
and token_string
!= '=':
1946 # Don't break on '=' after keyword as this violates PEP 8.
1948 assert token_type
!= token
.INDENT
1950 first
= source
[:end_offset
]
1952 second_indent
= indentation
1953 if (first
.rstrip().endswith('(') and
1954 source
[end_offset
:].lstrip().startswith(')')):
1956 elif first
.rstrip().endswith('('):
1957 second_indent
+= indent_word
1959 second_indent
+= ' ' * (1 + first
.find('('))
1961 second_indent
+= indent_word
1963 second
= (second_indent
+ source
[end_offset
:].lstrip())
1965 not second
.strip() or
1966 second
.lstrip().startswith('#')
1970 # Do not begin a line with a comma
1971 if second
.lstrip().startswith(','):
1973 # Do end a line with a dot
1974 if first
.rstrip().endswith('.'):
1976 if token_string
in '+-*/':
1977 fixed
= first
+ ' \\' + '\n' + second
1979 fixed
= first
+ '\n' + second
1981 # Only fix if syntax is okay.
1982 if check_syntax(normalize_multiline(fixed
)
1983 if aggressive
else fixed
):
1984 yield indentation
+ fixed
1987 def _is_binary_operator(token_type
, text
):
1988 return ((token_type
== tokenize
.OP
or text
in ['and', 'or']) and
1989 text
not in '()[]{},:.;@=%~')
1992 # A convenient way to handle tokens.
1993 Token
= collections
.namedtuple('Token', ['token_type', 'token_string',
1994 'spos', 'epos', 'line'])
1997 class ReformattedLines(object):
1999 """The reflowed lines of atoms.
2001 Each part of the line is represented as an "atom." They can be moved
2002 around when need be to get the optimal formatting.
2006 ###########################################################################
2009 class _Indent(object):
2011 """Represent an indentation in the atom stream."""
2013 def __init__(self
, indent_amt
):
2014 self
._indent
_amt
= indent_amt
2017 return ' ' * self
._indent
_amt
2021 return self
._indent
_amt
2023 class _Space(object):
2025 """Represent a space in the atom stream."""
2034 class _LineBreak(object):
2036 """Represent a line break in the atom stream."""
2045 def __init__(self
, max_line_length
):
2046 self
._max
_line
_length
= max_line_length
2048 self
._bracket
_depth
= 0
2049 self
._prev
_item
= None
2050 self
._prev
_prev
_item
= None
2055 ###########################################################################
2058 def add(self
, obj
, indent_amt
, break_after_open_bracket
):
2059 if isinstance(obj
, Atom
):
2060 self
._add
_item
(obj
, indent_amt
)
2063 self
._add
_container
(obj
, indent_amt
, break_after_open_bracket
)
2065 def add_comment(self
, item
):
2067 if len(self
._lines
) > 1:
2068 if isinstance(self
._lines
[-1], self
._Space
):
2070 if len(self
._lines
) > 2:
2071 if isinstance(self
._lines
[-2], self
._Space
):
2074 while num_spaces
> 0:
2075 self
._lines
.append(self
._Space
())
2077 self
._lines
.append(item
)
2079 def add_indent(self
, indent_amt
):
2080 self
._lines
.append(self
._Indent
(indent_amt
))
2082 def add_line_break(self
, indent
):
2083 self
._lines
.append(self
._LineBreak
())
2084 self
.add_indent(len(indent
))
2086 def add_line_break_at(self
, index
, indent_amt
):
2087 self
._lines
.insert(index
, self
._LineBreak
())
2088 self
._lines
.insert(index
+ 1, self
._Indent
(indent_amt
))
2090 def add_space_if_needed(self
, curr_text
, equal
=False):
2092 not self
._lines
or isinstance(
2093 self
._lines
[-1], (self
._LineBreak
, self
._Indent
, self
._Space
))
2097 prev_text
= str(self
._prev
_item
)
2099 str(self
._prev
_prev
_item
) if self
._prev
_prev
_item
else '')
2102 # The previous item was a keyword or identifier and the current
2103 # item isn't an operator that doesn't require a space.
2104 ((self
._prev
_item
.is_keyword
or self
._prev
_item
.is_string
or
2105 self
._prev
_item
.is_name
or self
._prev
_item
.is_number
) and
2106 (curr_text
[0] not in '([{.,:}])' or
2107 (curr_text
[0] == '=' and equal
))) or
2109 # Don't place spaces around a '.', unless it's in an 'import'
2111 ((prev_prev_text
!= 'from' and prev_text
[-1] != '.' and
2112 curr_text
!= 'import') and
2114 # Don't place a space before a colon.
2115 curr_text
[0] != ':' and
2117 # Don't split up ending brackets by spaces.
2118 ((prev_text
[-1] in '}])' and curr_text
[0] not in '.,}])') or
2120 # Put a space after a colon or comma.
2121 prev_text
[-1] in ':,' or
2123 # Put space around '=' if asked to.
2124 (equal
and prev_text
== '=') or
2126 # Put spaces around non-unary arithmetic operators.
2127 ((self
._prev
_prev
_item
and
2128 (prev_text
not in '+-' and
2129 (self
._prev
_prev
_item
.is_name
or
2130 self
._prev
_prev
_item
.is_number
or
2131 self
._prev
_prev
_item
.is_string
)) and
2132 prev_text
in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
2134 self
._lines
.append(self
._Space
())
2136 def previous_item(self
):
2137 """Return the previous non-whitespace item."""
2138 return self
._prev
_item
2140 def fits_on_current_line(self
, item_extent
):
2141 return self
.current_size() + item_extent
<= self
._max
_line
_length
2143 def current_size(self
):
2144 """The size of the current line minus the indentation."""
2146 for item
in reversed(self
._lines
):
2148 if isinstance(item
, self
._LineBreak
):
2153 def line_empty(self
):
2154 return (self
._lines
and
2155 isinstance(self
._lines
[-1],
2156 (self
._LineBreak
, self
._Indent
)))
2160 for item
in self
._lines
:
2161 if isinstance(item
, self
._LineBreak
):
2162 string
= string
.rstrip()
2163 string
+= item
.emit()
2165 return string
.rstrip() + '\n'
2167 ###########################################################################
2170 def _add_item(self
, item
, indent_amt
):
2171 """Add an item to the line.
2173 Reflow the line to get the best formatting after the item is
2174 inserted. The bracket depth indicates if the item is being
2175 inserted inside of a container or not.
2178 if self
._prev
_item
and self
._prev
_item
.is_string
and item
.is_string
:
2179 # Place consecutive string literals on separate lines.
2180 self
._lines
.append(self
._LineBreak
())
2181 self
._lines
.append(self
._Indent
(indent_amt
))
2183 item_text
= str(item
)
2184 if self
._lines
and self
._bracket
_depth
:
2185 # Adding the item into a container.
2186 self
._prevent
_default
_initializer
_splitting
(item
, indent_amt
)
2188 if item_text
in '.,)]}':
2189 self
._split
_after
_delimiter
(item
, indent_amt
)
2191 elif self
._lines
and not self
.line_empty():
2192 # Adding the item outside of a container.
2193 if self
.fits_on_current_line(len(item_text
)):
2194 self
._enforce
_space
(item
)
2197 # Line break for the new item.
2198 self
._lines
.append(self
._LineBreak
())
2199 self
._lines
.append(self
._Indent
(indent_amt
))
2201 self
._lines
.append(item
)
2202 self
._prev
_item
, self
._prev
_prev
_item
= item
, self
._prev
_item
2204 if item_text
in '([{':
2205 self
._bracket
_depth
+= 1
2207 elif item_text
in '}])':
2208 self
._bracket
_depth
-= 1
2209 assert self
._bracket
_depth
>= 0
2211 def _add_container(self
, container
, indent_amt
, break_after_open_bracket
):
2212 actual_indent
= indent_amt
+ 1
2215 str(self
._prev
_item
) != '=' and
2216 not self
.line_empty() and
2217 not self
.fits_on_current_line(
2218 container
.size
+ self
._bracket
_depth
+ 2)
2221 if str(container
)[0] == '(' and self
._prev
_item
.is_name
:
2222 # Don't split before the opening bracket of a call.
2223 break_after_open_bracket
= True
2224 actual_indent
= indent_amt
+ 4
2226 break_after_open_bracket
or
2227 str(self
._prev
_item
) not in '([{'
2229 # If the container doesn't fit on the current line and the
2230 # current line isn't empty, place the container on the next
2232 self
._lines
.append(self
._LineBreak
())
2233 self
._lines
.append(self
._Indent
(indent_amt
))
2234 break_after_open_bracket
= False
2236 actual_indent
= self
.current_size() + 1
2237 break_after_open_bracket
= False
2239 if isinstance(container
, (ListComprehension
, IfExpression
)):
2240 actual_indent
= indent_amt
2242 # Increase the continued indentation only if recursing on a
2244 container
.reflow(self
, ' ' * actual_indent
,
2245 break_after_open_bracket
=break_after_open_bracket
)
2247 def _prevent_default_initializer_splitting(self
, item
, indent_amt
):
2248 """Prevent splitting between a default initializer.
2250 When there is a default initializer, it's best to keep it all on
2251 the same line. It's nicer and more readable, even if it goes
2252 over the maximum allowable line length. This goes back along the
2253 current line to determine if we have a default initializer, and,
2254 if so, to remove extraneous whitespaces and add a line
2255 break/indent before it if needed.
2258 if str(item
) == '=':
2259 # This is the assignment in the initializer. Just remove spaces for
2261 self
._delete
_whitespace
()
2264 if (not self
._prev
_item
or not self
._prev
_prev
_item
or
2265 str(self
._prev
_item
) != '='):
2268 self
._delete
_whitespace
()
2269 prev_prev_index
= self
._lines
.index(self
._prev
_prev
_item
)
2272 isinstance(self
._lines
[prev_prev_index
- 1], self
._Indent
) or
2273 self
.fits_on_current_line(item
.size
+ 1)
2275 # The default initializer is already the only item on this line.
2276 # Don't insert a newline here.
2279 # Replace the space with a newline/indent combo.
2280 if isinstance(self
._lines
[prev_prev_index
- 1], self
._Space
):
2281 del self
._lines
[prev_prev_index
- 1]
2283 self
.add_line_break_at(self
._lines
.index(self
._prev
_prev
_item
),
2286 def _split_after_delimiter(self
, item
, indent_amt
):
2287 """Split the line only after a delimiter."""
2288 self
._delete
_whitespace
()
2290 if self
.fits_on_current_line(item
.size
):
2294 for current_item
in reversed(self
._lines
):
2297 (not isinstance(current_item
, Atom
) or
2298 not current_item
.is_colon
)
2303 if isinstance(current_item
, self
._Space
):
2304 last_space
= current_item
2305 if isinstance(current_item
, (self
._LineBreak
, self
._Indent
)):
2311 self
.add_line_break_at(self
._lines
.index(last_space
), indent_amt
)
2313 def _enforce_space(self
, item
):
2314 """Enforce a space in certain situations.
2316 There are cases where we will want a space where normally we
2317 wouldn't put one. This just enforces the addition of a space.
2320 if isinstance(self
._lines
[-1],
2321 (self
._Space
, self
._LineBreak
, self
._Indent
)):
2324 if not self
._prev
_item
:
2327 item_text
= str(item
)
2328 prev_text
= str(self
._prev
_item
)
2330 # Prefer a space around a '.' in an import statement, and between the
2333 (item_text
== '.' and prev_text
== 'from') or
2334 (item_text
== 'import' and prev_text
== '.') or
2335 (item_text
== '(' and prev_text
== 'import')
2337 self
._lines
.append(self
._Space
())
2339 def _delete_whitespace(self
):
2340 """Delete all whitespace from the end of the line."""
2341 while isinstance(self
._lines
[-1], (self
._Space
, self
._LineBreak
,
2348 """The smallest unbreakable unit that can be reflowed."""
2350 def __init__(self
, atom
):
2354 return self
._atom
.token_string
2360 self
, reflowed_lines
, continued_indent
, extent
,
2361 break_after_open_bracket
=False,
2362 is_list_comp_or_if_expr
=False,
2365 if self
._atom
.token_type
== tokenize
.COMMENT
:
2366 reflowed_lines
.add_comment(self
)
2369 total_size
= extent
if extent
else self
.size
2371 if self
._atom
.token_string
not in ',:([{}])':
2372 # Some atoms will need an extra 1-sized space token after them.
2375 prev_item
= reflowed_lines
.previous_item()
2377 not is_list_comp_or_if_expr
and
2378 not reflowed_lines
.fits_on_current_line(total_size
) and
2379 not (next_is_dot
and
2380 reflowed_lines
.fits_on_current_line(self
.size
+ 1)) and
2381 not reflowed_lines
.line_empty() and
2382 not self
.is_colon
and
2383 not (prev_item
and prev_item
.is_name
and
2386 # Start a new line if there is already something on the line and
2387 # adding this atom would make it go over the max line length.
2388 reflowed_lines
.add_line_break(continued_indent
)
2390 reflowed_lines
.add_space_if_needed(str(self
))
2392 reflowed_lines
.add(self
, len(continued_indent
),
2393 break_after_open_bracket
)
2396 return self
.__repr
__()
2399 def is_keyword(self
):
2400 return keyword
.iskeyword(self
._atom
.token_string
)
2403 def is_string(self
):
2404 return self
._atom
.token_type
== tokenize
.STRING
2408 return self
._atom
.token_type
== tokenize
.NAME
2411 def is_number(self
):
2412 return self
._atom
.token_type
== tokenize
.NUMBER
2416 return self
._atom
.token_string
== ','
2420 return self
._atom
.token_string
== ':'
2424 return len(self
._atom
.token_string
)
2427 class Container(object):
2429 """Base class for all container types."""
2431 def __init__(self
, items
):
2436 last_was_keyword
= False
2438 for item
in self
._items
:
2444 item_string
= str(item
)
2447 (last_was_keyword
or
2448 (not string
.endswith(tuple('([{,.:}]) ')) and
2449 not item_string
.startswith(tuple('([{,.:}])'))))
2452 string
+= item_string
2454 last_was_keyword
= item
.is_keyword
2458 for element
in self
._items
:
2461 def __getitem__(self
, idx
):
2462 return self
._items
[idx
]
2464 def reflow(self
, reflowed_lines
, continued_indent
,
2465 break_after_open_bracket
=False):
2466 last_was_container
= False
2467 for (index
, item
) in enumerate(self
._items
):
2468 next_item
= get_item(self
._items
, index
+ 1)
2470 if isinstance(item
, Atom
):
2471 is_list_comp_or_if_expr
= (
2472 isinstance(self
, (ListComprehension
, IfExpression
)))
2473 item
.reflow(reflowed_lines
, continued_indent
,
2474 self
._get
_extent
(index
),
2475 is_list_comp_or_if_expr
=is_list_comp_or_if_expr
,
2476 next_is_dot
=(next_item
and
2477 str(next_item
) == '.'))
2478 if last_was_container
and item
.is_comma
:
2479 reflowed_lines
.add_line_break(continued_indent
)
2480 last_was_container
= False
2481 else: # isinstance(item, Container)
2482 reflowed_lines
.add(item
, len(continued_indent
),
2483 break_after_open_bracket
)
2484 last_was_container
= not isinstance(item
, (ListComprehension
,
2488 break_after_open_bracket
and index
== 0 and
2489 # Prefer to keep empty containers together instead of
2491 str(item
) == self
.open_bracket
and
2492 (not next_item
or str(next_item
) != self
.close_bracket
) and
2493 (len(self
._items
) != 3 or not isinstance(next_item
, Atom
))
2495 reflowed_lines
.add_line_break(continued_indent
)
2496 break_after_open_bracket
= False
2498 next_next_item
= get_item(self
._items
, index
+ 2)
2500 str(item
) not in ['.', '%', 'in'] and
2501 next_item
and not isinstance(next_item
, Container
) and
2502 str(next_item
) != ':' and
2503 next_next_item
and (not isinstance(next_next_item
, Atom
) or
2504 str(next_item
) == 'not') and
2505 not reflowed_lines
.line_empty() and
2506 not reflowed_lines
.fits_on_current_line(
2507 self
._get
_extent
(index
+ 1) + 2)
2509 reflowed_lines
.add_line_break(continued_indent
)
2511 def _get_extent(self
, index
):
2512 """The extent of the full element.
2514 E.g., the length of a function call or keyword.
2518 prev_item
= get_item(self
._items
, index
- 1)
2519 seen_dot
= prev_item
and str(prev_item
) == '.'
2520 while index
< len(self
._items
):
2521 item
= get_item(self
._items
, index
)
2524 if isinstance(item
, (ListComprehension
, IfExpression
)):
2527 if isinstance(item
, Container
):
2528 if prev_item
and prev_item
.is_name
:
2536 elif (str(item
) not in ['.', '=', ':', 'not'] and
2537 not item
.is_name
and not item
.is_string
):
2540 if str(item
) == '.':
2549 def is_string(self
):
2554 return len(self
.__repr
__())
2557 def is_keyword(self
):
2573 def open_bracket(self
):
2577 def close_bracket(self
):
2581 class Tuple(Container
):
2583 """A high-level representation of a tuple."""
2586 def open_bracket(self
):
2590 def close_bracket(self
):
2594 class List(Container
):
2596 """A high-level representation of a list."""
2599 def open_bracket(self
):
2603 def close_bracket(self
):
2607 class DictOrSet(Container
):
2609 """A high-level representation of a dictionary or set."""
2612 def open_bracket(self
):
2616 def close_bracket(self
):
2620 class ListComprehension(Container
):
2622 """A high-level representation of a list comprehension."""
2627 for item
in self
._items
:
2628 if isinstance(item
, IfExpression
):
2634 class IfExpression(Container
):
2636 """A high-level representation of an if-expression."""
2639 def _parse_container(tokens
, index
, for_or_if
=None):
2640 """Parse a high-level container, such as a list, tuple, etc."""
2642 # Store the opening bracket.
2643 items
= [Atom(Token(*tokens
[index
]))]
2646 num_tokens
= len(tokens
)
2647 while index
< num_tokens
:
2648 tok
= Token(*tokens
[index
])
2650 if tok
.token_string
in ',)]}':
2651 # First check if we're at the end of a list comprehension or
2652 # if-expression. Don't add the ending token as part of the list
2653 # comprehension or if-expression, because they aren't part of those
2655 if for_or_if
== 'for':
2656 return (ListComprehension(items
), index
- 1)
2658 elif for_or_if
== 'if':
2659 return (IfExpression(items
), index
- 1)
2661 # We've reached the end of a container.
2662 items
.append(Atom(tok
))
2664 # If not, then we are at the end of a container.
2665 if tok
.token_string
== ')':
2666 # The end of a tuple.
2667 return (Tuple(items
), index
)
2669 elif tok
.token_string
== ']':
2670 # The end of a list.
2671 return (List(items
), index
)
2673 elif tok
.token_string
== '}':
2674 # The end of a dictionary or set.
2675 return (DictOrSet(items
), index
)
2677 elif tok
.token_string
in '([{':
2678 # A sub-container is being defined.
2679 (container
, index
) = _parse_container(tokens
, index
)
2680 items
.append(container
)
2682 elif tok
.token_string
== 'for':
2683 (container
, index
) = _parse_container(tokens
, index
, 'for')
2684 items
.append(container
)
2686 elif tok
.token_string
== 'if':
2687 (container
, index
) = _parse_container(tokens
, index
, 'if')
2688 items
.append(container
)
2691 items
.append(Atom(tok
))
2698 def _parse_tokens(tokens
):
2699 """Parse the tokens.
2701 This converts the tokens into a form where we can manipulate them
2709 num_tokens
= len(tokens
)
2710 while index
< num_tokens
:
2711 tok
= Token(*tokens
[index
])
2713 assert tok
.token_type
!= token
.INDENT
2714 if tok
.token_type
== tokenize
.NEWLINE
:
2715 # There's only one newline and it's at the end.
2718 if tok
.token_string
in '([{':
2719 (container
, index
) = _parse_container(tokens
, index
)
2722 parsed_tokens
.append(container
)
2724 parsed_tokens
.append(Atom(tok
))
2728 return parsed_tokens
2731 def _reflow_lines(parsed_tokens
, indentation
, max_line_length
,
2732 start_on_prefix_line
):
2733 """Reflow the lines so that it looks nice."""
2735 if str(parsed_tokens
[0]) == 'def':
2736 # A function definition gets indented a bit more.
2737 continued_indent
= indentation
+ ' ' * 2 * DEFAULT_INDENT_SIZE
2739 continued_indent
= indentation
+ ' ' * DEFAULT_INDENT_SIZE
2741 break_after_open_bracket
= not start_on_prefix_line
2743 lines
= ReformattedLines(max_line_length
)
2744 lines
.add_indent(len(indentation
.lstrip('\r\n')))
2746 if not start_on_prefix_line
:
2747 # If splitting after the opening bracket will cause the first element
2748 # to be aligned weirdly, don't try it.
2749 first_token
= get_item(parsed_tokens
, 0)
2750 second_token
= get_item(parsed_tokens
, 1)
2753 first_token
and second_token
and
2754 str(second_token
)[0] == '(' and
2755 len(indentation
) + len(first_token
) + 1 == len(continued_indent
)
2759 for item
in parsed_tokens
:
2760 lines
.add_space_if_needed(str(item
), equal
=True)
2762 save_continued_indent
= continued_indent
2763 if start_on_prefix_line
and isinstance(item
, Container
):
2764 start_on_prefix_line
= False
2765 continued_indent
= ' ' * (lines
.current_size() + 1)
2767 item
.reflow(lines
, continued_indent
, break_after_open_bracket
)
2768 continued_indent
= save_continued_indent
2773 def _shorten_line_at_tokens_new(tokens
, source
, indentation
,
2775 """Shorten the line taking its length into account.
2777 The input is expected to be free of newlines except for inside
2778 multiline strings and at the end.
2781 # Yield the original source so to see if it's a better choice than the
2782 # shortened candidate lines we generate here.
2783 yield indentation
+ source
2785 parsed_tokens
= _parse_tokens(tokens
)
2788 # Perform two reflows. The first one starts on the same line as the
2789 # prefix. The second starts on the line after the prefix.
2790 fixed
= _reflow_lines(parsed_tokens
, indentation
, max_line_length
,
2791 start_on_prefix_line
=True)
2792 if fixed
and check_syntax(normalize_multiline(fixed
.lstrip())):
2795 fixed
= _reflow_lines(parsed_tokens
, indentation
, max_line_length
,
2796 start_on_prefix_line
=False)
2797 if fixed
and check_syntax(normalize_multiline(fixed
.lstrip())):
2801 def _shorten_line_at_tokens(tokens
, source
, indentation
, indent_word
,
2802 key_token_strings
, aggressive
):
2803 """Separate line by breaking at tokens in key_token_strings.
2805 The input is expected to be free of newlines except for inside
2806 multiline strings and at the end.
2810 for (index
, _t
) in enumerate(token_offsets(tokens
)):
2816 assert token_type
!= token
.INDENT
2818 if token_string
in key_token_strings
:
2819 # Do not break in containers with zero or one items.
2820 unwanted_next_token
= {
2823 '{': '}'}.get(token_string
)
2824 if unwanted_next_token
:
2828 default
=[None, None])[1] == unwanted_next_token
or
2831 default
=[None, None])[1] == unwanted_next_token
2836 index
> 2 and token_string
== '(' and
2837 tokens
[index
- 1][1] in ',(%['
2839 # Don't split after a tuple start, or before a tuple start if
2840 # the tuple is in a list.
2843 if end_offset
< len(source
) - 1:
2844 # Don't split right before newline.
2845 offsets
.append(end_offset
)
2847 # Break at adjacent strings. These were probably meant to be on
2848 # separate lines in the first place.
2849 previous_token
= get_item(tokens
, index
- 1)
2851 token_type
== tokenize
.STRING
and
2852 previous_token
and previous_token
[0] == tokenize
.STRING
2854 offsets
.append(start_offset
)
2856 current_indent
= None
2858 for line
in split_at_offsets(source
, offsets
):
2860 fixed
+= '\n' + current_indent
+ line
2862 for symbol
in '([{':
2863 if line
.endswith(symbol
):
2864 current_indent
+= indent_word
2868 assert not current_indent
2869 current_indent
= indent_word
2871 assert fixed
is not None
2873 if check_syntax(normalize_multiline(fixed
)
2874 if aggressive
> 1 else fixed
):
2875 return indentation
+ fixed
2880 def token_offsets(tokens
):
2881 """Yield tokens and offsets."""
2883 previous_end_row
= 0
2884 previous_end_column
= 0
2888 (start_row
, start_column
) = t
[2]
2889 (end_row
, end_column
) = t
[3]
2891 # Account for the whitespace between tokens.
2892 end_offset
+= start_column
2893 if previous_end_row
== start_row
:
2894 end_offset
-= previous_end_column
2896 # Record the start offset of the token.
2897 start_offset
= end_offset
2899 # Account for the length of the token itself.
2900 end_offset
+= len(token_string
)
2907 previous_end_row
= end_row
2908 previous_end_column
= end_column
2911 def normalize_multiline(line
):
2912 """Normalize multiline-related code that will cause syntax error.
2914 This is for purposes of checking syntax.
2917 if line
.startswith('def ') and line
.rstrip().endswith(':'):
2918 return line
+ ' pass'
2919 elif line
.startswith('return '):
2920 return 'def _(): ' + line
2921 elif line
.startswith('@'):
2922 return line
+ 'def _(): pass'
2923 elif line
.startswith('class '):
2924 return line
+ ' pass'
2925 elif line
.startswith(('if ', 'elif ', 'for ', 'while ')):
2926 return line
+ ' pass'
2931 def fix_whitespace(line
, offset
, replacement
):
2932 """Replace whitespace at offset and return fixed line."""
2933 # Replace escaped newlines too
2934 left
= line
[:offset
].rstrip('\n\r \t\\')
2935 right
= line
[offset
:].lstrip('\n\r \t\\')
2936 if right
.startswith('#'):
2939 return left
+ replacement
+ right
2942 def _execute_pep8(pep8_options
, source
):
2943 """Execute pycodestyle via python method calls."""
2944 class QuietReport(pycodestyle
.BaseReport
):
2946 """Version of checker that does not print."""
2948 def __init__(self
, options
):
2949 super(QuietReport
, self
).__init
__(options
)
2950 self
.__full
_error
_results
= []
2952 def error(self
, line_number
, offset
, text
, check
):
2953 """Collect errors."""
2954 code
= super(QuietReport
, self
).error(line_number
,
2959 self
.__full
_error
_results
.append(
2961 'line': line_number
,
2962 'column': offset
+ 1,
2965 def full_error_results(self
):
2966 """Return error results in detail.
2968 Results are in the form of a list of dictionaries. Each
2969 dictionary contains 'id', 'line', 'column', and 'info'.
2972 return self
.__full
_error
_results
2974 checker
= pycodestyle
.Checker('', lines
=source
, reporter
=QuietReport
,
2977 return checker
.report
.full_error_results()
2980 def _remove_leading_and_normalize(line
, with_rstrip
=True):
2981 # ignore FF in first lstrip()
2983 return line
.lstrip(' \t\v').rstrip(CR
+ LF
) + '\n'
2984 return line
.lstrip(' \t\v')
2987 class Reindenter(object):
2989 """Reindents badly-indented code to uniformly use four-space indentation.
2991 Released to the public domain, by Tim Peters, 03 October 2000.
2995 def __init__(self
, input_text
, leave_tabs
=False):
2996 sio
= io
.StringIO(input_text
)
2997 source_lines
= sio
.readlines()
2999 self
.string_content_line_numbers
= multiline_string_lines(input_text
)
3001 # File lines, rstripped & tab-expanded. Dummy at start is so
3002 # that we can use tokenize's 1-based line numbering easily.
3003 # Note that a line is all-blank iff it is a newline.
3005 for line_number
, line
in enumerate(source_lines
, start
=1):
3006 # Do not modify if inside a multiline string.
3007 if line_number
in self
.string_content_line_numbers
:
3008 self
.lines
.append(line
)
3010 # Only expand leading tabs.
3011 with_rstrip
= line_number
!= len(source_lines
)
3014 _get_indentation(line
) +
3015 _remove_leading_and_normalize(line
, with_rstrip
)
3019 _get_indentation(line
).expandtabs() +
3020 _remove_leading_and_normalize(line
, with_rstrip
)
3023 self
.lines
.insert(0, None)
3024 self
.index
= 1 # index into self.lines of next line
3025 self
.input_text
= input_text
3027 def run(self
, indent_size
=DEFAULT_INDENT_SIZE
):
3028 """Fix indentation and return modified line numbers.
3030 Line numbers are indexed at 1.
3034 return self
.input_text
3037 stats
= _reindent_stats(tokenize
.generate_tokens(self
.getline
))
3038 except (SyntaxError, tokenize
.TokenError
):
3039 return self
.input_text
3040 # Remove trailing empty lines.
3043 stats
.append((len(lines
), 0))
3044 # Map count of leading spaces to # we want.
3046 # Program after transformation.
3048 # Copy over initial empty lines -- there's nothing to do until
3049 # we see a line with *something* on it.
3051 after
.extend(lines
[1:i
])
3052 for i
in range(len(stats
) - 1):
3053 thisstmt
, thislevel
= stats
[i
]
3054 nextstmt
= stats
[i
+ 1][0]
3055 have
= _leading_space_count(lines
[thisstmt
])
3056 want
= thislevel
* indent_size
3060 # An indented comment line. If we saw the same
3061 # indentation before, reuse what it most recently
3063 want
= have2want
.get(have
, -1)
3065 # Then it probably belongs to the next real stmt.
3066 for j
in range(i
+ 1, len(stats
) - 1):
3067 jline
, jlevel
= stats
[j
]
3069 if have
== _leading_space_count(lines
[jline
]):
3070 want
= jlevel
* indent_size
3072 # Maybe it's a hanging comment like this one,
3074 # in which case we should shift it like its base
3076 for j
in range(i
- 1, -1, -1):
3077 jline
, jlevel
= stats
[j
]
3079 want
= (have
+ _leading_space_count(
3081 _leading_space_count(lines
[jline
]))
3084 # Still no luck -- leave it alone.
3089 have2want
[have
] = want
3091 if diff
== 0 or have
== 0:
3092 after
.extend(lines
[thisstmt
:nextstmt
])
3094 for line_number
, line
in enumerate(lines
[thisstmt
:nextstmt
],
3096 if line_number
in self
.string_content_line_numbers
:
3102 after
.append(' ' * diff
+ line
)
3104 remove
= min(_leading_space_count(line
), -diff
)
3105 after
.append(line
[remove
:])
3107 return ''.join(after
)
3110 """Line-getter for tokenize."""
3111 if self
.index
>= len(self
.lines
):
3114 line
= self
.lines
[self
.index
]
3119 def _reindent_stats(tokens
):
3120 """Return list of (lineno, indentlevel) pairs.
3122 One for each stmt and comment line. indentlevel is -1 for comment
3123 lines, as a signal that tokenize doesn't know what to do about them;
3124 indeed, they're our headache!
3127 find_stmt
= 1 # Next token begins a fresh stmt?
3128 level
= 0 # Current indent level.
3136 if token_type
== tokenize
.NEWLINE
:
3137 # A program statement, or ENDMARKER, will eventually follow,
3138 # after some (possibly empty) run of tokens of the form
3139 # (NL | COMMENT)* (INDENT | DEDENT+)?
3142 elif token_type
== tokenize
.INDENT
:
3146 elif token_type
== tokenize
.DEDENT
:
3150 elif token_type
== tokenize
.COMMENT
:
3152 stats
.append((sline
, -1))
3153 # But we're still looking for a new stmt, so leave
3156 elif token_type
== tokenize
.NL
:
3160 # This is the first "real token" following a NEWLINE, so it
3161 # must be the first token of the next program statement, or an
3164 if line
: # Not endmarker.
3165 stats
.append((sline
, level
))
3170 def _leading_space_count(line
):
3171 """Return number of leading spaces in line."""
3173 while i
< len(line
) and line
[i
] == ' ':
3178 def refactor_with_2to3(source_text
, fixer_names
, filename
=''):
3179 """Use lib2to3 to refactor the source.
3181 Return the refactored source code.
3184 from lib2to3
.refactor
import RefactoringTool
3185 fixers
= ['lib2to3.fixes.fix_' + name
for name
in fixer_names
]
3186 tool
= RefactoringTool(fixer_names
=fixers
, explicit
=fixers
)
3188 from lib2to3
.pgen2
import tokenize
as lib2to3_tokenize
3190 # The name parameter is necessary particularly for the "import" fixer.
3191 return str(tool
.refactor_string(source_text
, name
=filename
))
3192 except lib2to3_tokenize
.TokenError
:
3196 def check_syntax(code
):
3197 """Return True if syntax is okay."""
3199 return compile(code
, '<string>', 'exec', dont_inherit
=True)
3200 except (SyntaxError, TypeError, ValueError):
3204 def find_with_line_numbers(pattern
, contents
):
3205 """A wrapper around 're.finditer' to find line numbers.
3207 Returns a list of line numbers where pattern was found in contents.
3209 matches
= list(re
.finditer(pattern
, contents
))
3213 end
= matches
[-1].start()
3215 # -1 so a failed `rfind` maps to the first line.
3219 for line_num
, m
in enumerate(re
.finditer(r
'\n', contents
), 1):
3223 newline_offsets
[offset
] = line_num
3225 def get_line_num(match
, contents
):
3226 """Get the line number of string in a files contents.
3228 Failing to find the newline is OK, -1 maps to 0
3231 newline_offset
= contents
.rfind('\n', 0, match
.start())
3232 return newline_offsets
[newline_offset
]
3234 return [get_line_num(match
, contents
) + 1 for match
in matches
]
3237 def get_disabled_ranges(source
):
3238 """Returns a list of tuples representing the disabled ranges.
3240 If disabled and no re-enable will disable for rest of file.
3243 enable_line_nums
= find_with_line_numbers(ENABLE_REGEX
, source
)
3244 disable_line_nums
= find_with_line_numbers(DISABLE_REGEX
, source
)
3245 total_lines
= len(re
.findall("\n", source
)) + 1
3247 enable_commands
= {}
3248 for num
in enable_line_nums
:
3249 enable_commands
[num
] = True
3250 for num
in disable_line_nums
:
3251 enable_commands
[num
] = False
3253 disabled_ranges
= []
3254 currently_enabled
= True
3255 disabled_start
= None
3257 for line
, commanded_enabled
in sorted(enable_commands
.items()):
3258 if commanded_enabled
is False and currently_enabled
is True:
3259 disabled_start
= line
3260 currently_enabled
= False
3261 elif commanded_enabled
is True and currently_enabled
is False:
3262 disabled_ranges
.append((disabled_start
, line
))
3263 currently_enabled
= True
3265 if currently_enabled
is False:
3266 disabled_ranges
.append((disabled_start
, total_lines
))
3268 return disabled_ranges
3271 def filter_disabled_results(result
, disabled_ranges
):
3272 """Filter out reports based on tuple of disabled ranges.
3275 line
= result
['line']
3276 for disabled_range
in disabled_ranges
:
3277 if disabled_range
[0] <= line
<= disabled_range
[1]:
3282 def filter_results(source
, results
, aggressive
):
3283 """Filter out spurious reports from pycodestyle.
3285 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
3288 non_docstring_string_line_numbers
= multiline_string_lines(
3289 source
, include_docstrings
=False)
3290 all_string_line_numbers
= multiline_string_lines(
3291 source
, include_docstrings
=True)
3293 commented_out_code_line_numbers
= commented_out_code_lines(source
)
3295 # Filter out the disabled ranges
3296 disabled_ranges
= get_disabled_ranges(source
)
3299 result
for result
in results
if filter_disabled_results(
3305 has_e901
= any(result
['id'].lower() == 'e901' for result
in results
)
3308 issue_id
= r
['id'].lower()
3310 if r
['line'] in non_docstring_string_line_numbers
:
3311 if issue_id
.startswith(('e1', 'e501', 'w191')):
3314 if r
['line'] in all_string_line_numbers
:
3315 if issue_id
in ['e501']:
3318 # We must offset by 1 for lines that contain the trailing contents of
3319 # multiline strings.
3320 if not aggressive
and (r
['line'] + 1) in all_string_line_numbers
:
3321 # Do not modify multiline strings in non-aggressive mode. Remove
3322 # trailing whitespace could break doctests.
3323 if issue_id
.startswith(('w29', 'w39')):
3327 if issue_id
.startswith(('e711', 'e72', 'w6')):
3331 if issue_id
.startswith(('e712', 'e713', 'e714')):
3335 if issue_id
.startswith(('e704')):
3338 if r
['line'] in commented_out_code_line_numbers
:
3339 if issue_id
.startswith(('e261', 'e262', 'e501')):
3342 # Do not touch indentation if there is a token error caused by
3343 # incomplete multi-line statement. Otherwise, we risk screwing up the
3346 if issue_id
.startswith(('e1', 'e7')):
3352 def multiline_string_lines(source
, include_docstrings
=False):
3353 """Return line numbers that are within multiline strings.
3355 The line numbers are indexed at 1.
3357 Docstrings are ignored.
3360 line_numbers
= set()
3361 previous_token_type
= ''
3363 for t
in generate_tokens(source
):
3368 if token_type
== tokenize
.STRING
and start_row
!= end_row
:
3370 include_docstrings
or
3371 previous_token_type
!= tokenize
.INDENT
3373 # We increment by one since we want the contents of the
3375 line_numbers |
= set(range(1 + start_row
, 1 + end_row
))
3377 previous_token_type
= token_type
3378 except (SyntaxError, tokenize
.TokenError
):
3384 def commented_out_code_lines(source
):
3385 """Return line numbers of comments that are likely code.
3387 Commented-out code is bad practice, but modifying it just adds even
3393 for t
in generate_tokens(source
):
3399 # Ignore inline comments.
3400 if not line
.lstrip().startswith('#'):
3403 if token_type
== tokenize
.COMMENT
:
3404 stripped_line
= token_string
.lstrip('#').strip()
3405 with warnings
.catch_warnings():
3406 # ignore SyntaxWarning in Python3.8+
3408 # https://bugs.python.org/issue15248
3409 # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
3410 warnings
.filterwarnings("ignore", category
=SyntaxWarning)
3412 ' ' in stripped_line
and
3413 '#' not in stripped_line
and
3414 check_syntax(stripped_line
)
3416 line_numbers
.append(start_row
)
3417 except (SyntaxError, tokenize
.TokenError
):
3423 def shorten_comment(line
, max_line_length
, last_comment
=False):
3424 """Return trimmed or split long comment line.
3426 If there are no comments immediately following it, do a text wrap.
3427 Doing this wrapping on all comments in general would lead to jagged
3431 assert len(line
) > max_line_length
3432 line
= line
.rstrip()
3434 # PEP 8 recommends 72 characters for comment text.
3435 indentation
= _get_indentation(line
) + '# '
3436 max_line_length
= min(max_line_length
,
3437 len(indentation
) + 72)
3439 MIN_CHARACTER_REPEAT
= 5
3441 len(line
) - len(line
.rstrip(line
[-1])) >= MIN_CHARACTER_REPEAT
and
3442 not line
[-1].isalnum()
3444 # Trim comments that end with things like ---------
3445 return line
[:max_line_length
] + '\n'
3446 elif last_comment
and re
.match(r
'\s*#+\s*\w+', line
):
3447 split_lines
= textwrap
.wrap(line
.lstrip(' \t#'),
3448 initial_indent
=indentation
,
3449 subsequent_indent
=indentation
,
3450 width
=max_line_length
,
3451 break_long_words
=False,
3452 break_on_hyphens
=False)
3453 return '\n'.join(split_lines
) + '\n'
3458 def normalize_line_endings(lines
, newline
):
3459 """Return fixed line endings.
3461 All lines will be modified to use the most common line ending.
3463 line
= [line
.rstrip('\n\r') + newline
for line
in lines
]
3464 if line
and lines
[-1] == lines
[-1].rstrip('\n\r'):
3465 line
[-1] = line
[-1].rstrip('\n\r')
3469 def mutual_startswith(a
, b
):
3470 return b
.startswith(a
) or a
.startswith(b
)
3473 def code_match(code
, select
, ignore
):
3475 assert not isinstance(ignore
, str)
3476 for ignored_code
in [c
.strip() for c
in ignore
]:
3477 if mutual_startswith(code
.lower(), ignored_code
.lower()):
3481 assert not isinstance(select
, str)
3482 for selected_code
in [c
.strip() for c
in select
]:
3483 if mutual_startswith(code
.lower(), selected_code
.lower()):
3490 def fix_code(source
, options
=None, encoding
=None, apply_config
=False):
3491 """Return fixed source code.
3493 "encoding" will be used to decode "source" if it is a byte string.
3496 options
= _get_options(options
, apply_config
)
3498 options
.ignore
= [opt
.upper() for opt
in options
.ignore
]
3499 options
.select
= [opt
.upper() for opt
in options
.select
]
3502 # NOTE: If W50x is not included, add W50x because the code
3503 # correction result is indefinite.
3504 ignore_opt
= options
.ignore
3505 if not {"W50", "W503", "W504"} & set(ignore_opt
):
3506 options
.ignore
.append("W50")
3508 if not isinstance(source
, str):
3509 source
= source
.decode(encoding
or get_encoding())
3511 sio
= io
.StringIO(source
)
3512 return fix_lines(sio
.readlines(), options
=options
)
3515 def _get_options(raw_options
, apply_config
):
3516 """Return parsed options."""
3518 return parse_args([''], apply_config
=apply_config
)
3520 if isinstance(raw_options
, dict):
3521 options
= parse_args([''], apply_config
=apply_config
)
3522 for name
, value
in raw_options
.items():
3523 if not hasattr(options
, name
):
3524 raise ValueError("No such option '{}'".format(name
))
3526 # Check for very basic type errors.
3527 expected_type
= type(getattr(options
, name
))
3528 if not isinstance(expected_type
, (str, )):
3529 if isinstance(value
, (str, )):
3531 "Option '{}' should not be a string".format(name
))
3532 setattr(options
, name
, value
)
3534 options
= raw_options
3539 def fix_lines(source_lines
, options
, filename
=''):
3540 """Return fixed source code."""
3541 # Transform everything to line feed. Then change them back to original
3542 # before returning fixed source code.
3543 original_newline
= find_newline(source_lines
)
3544 tmp_source
= ''.join(normalize_line_endings(source_lines
, '\n'))
3546 # Keep a history to break out of cycles.
3547 previous_hashes
= set()
3549 if options
.line_range
:
3550 # Disable "apply_local_fixes()" for now due to issue #175.
3551 fixed_source
= tmp_source
3553 # Apply global fixes only once (for efficiency).
3554 fixed_source
= apply_global_fixes(tmp_source
,
3559 long_line_ignore_cache
= set()
3560 while hash(fixed_source
) not in previous_hashes
:
3561 if options
.pep8_passes
>= 0 and passes
> options
.pep8_passes
:
3565 previous_hashes
.add(hash(fixed_source
))
3567 tmp_source
= copy
.copy(fixed_source
)
3572 contents
=tmp_source
,
3573 long_line_ignore_cache
=long_line_ignore_cache
)
3575 fixed_source
= fix
.fix()
3577 sio
= io
.StringIO(fixed_source
)
3578 return ''.join(normalize_line_endings(sio
.readlines(), original_newline
))
3581 def fix_file(filename
, options
=None, output
=None, apply_config
=False):
3583 options
= parse_args([filename
], apply_config
=apply_config
)
3585 original_source
= readlines_from_file(filename
)
3587 fixed_source
= original_source
3589 if options
.in_place
or options
.diff
or output
:
3590 encoding
= detect_encoding(filename
)
3593 output
= LineEndingWrapper(wrap_output(output
, encoding
=encoding
))
3595 fixed_source
= fix_lines(fixed_source
, options
, filename
=filename
)
3598 new
= io
.StringIO(fixed_source
)
3599 new
= new
.readlines()
3600 diff
= get_diff_text(original_source
, new
, filename
)
3604 elif options
.jobs
> 1:
3605 diff
= diff
.encode(encoding
)
3607 elif options
.in_place
:
3608 original
= "".join(original_source
).splitlines()
3609 fixed
= fixed_source
.splitlines()
3610 original_source_last_line
= (
3611 original_source
[-1].split("\n")[-1] if original_source
else ""
3613 fixed_source_last_line
= fixed_source
.split("\n")[-1]
3614 if original
!= fixed
or (
3615 original_source_last_line
!= fixed_source_last_line
3617 with
open_with_encoding(filename
, 'w', encoding
=encoding
) as fp
:
3618 fp
.write(fixed_source
)
3623 output
.write(fixed_source
)
3629 """Yield multiple (code, function) tuples."""
3630 for function
in list(globals().values()):
3631 if inspect
.isfunction(function
):
3632 arguments
= _get_parameters(function
)
3633 if arguments
[:1] != ['source']:
3636 code
= extract_code_from_function(function
)
3638 yield (code
, function
)
3641 def _get_parameters(function
):
3642 # pylint: disable=deprecated-method
3643 if sys
.version_info
.major
>= 3:
3644 # We need to match "getargspec()", which includes "self" as the first
3645 # value for methods.
3646 # https://bugs.python.org/issue17481#msg209469
3647 if inspect
.ismethod(function
):
3648 function
= function
.__func
__
3650 return list(inspect
.signature(function
).parameters
)
3652 return inspect
.getargspec(function
)[0]
3655 def apply_global_fixes(source
, options
, where
='global', filename
='',
3657 """Run global fixes on source code.
3659 These are fixes that only need be done once (unlike those in
3660 FixPEP8, which are dependent on pycodestyle).
3665 if any(code_match(code
, select
=options
.select
, ignore
=options
.ignore
)
3666 for code
in ['E101', 'E111']):
3669 indent_size
=options
.indent_size
,
3673 select
=options
.select
,
3674 ignore
=options
.ignore
3679 for (code
, function
) in global_fixes():
3680 if code_match(code
, select
=options
.select
, ignore
=options
.ignore
):
3682 print('---> Applying {} fix for {}'.format(where
,
3685 source
= function(source
,
3686 aggressive
=options
.aggressive
)
3688 source
= fix_2to3(source
,
3689 aggressive
=options
.aggressive
,
3690 select
=options
.select
,
3691 ignore
=options
.ignore
,
3694 verbose
=options
.verbose
)
3699 def extract_code_from_function(function
):
3700 """Return code handled by function."""
3701 if not function
.__name
__.startswith('fix_'):
3704 code
= re
.sub('^fix_', '', function
.__name
__)
3716 def _get_package_version():
3717 packages
= ["pycodestyle: {}".format(pycodestyle
.__version
__)]
3718 return ", ".join(packages
)
3721 def create_parser():
3722 """Return command-line parser."""
3723 parser
= argparse
.ArgumentParser(description
=docstring_summary(__doc__
),
3725 parser
.add_argument('--version', action
='version',
3726 version
='%(prog)s {} ({})'.format(
3727 __version__
, _get_package_version()))
3728 parser
.add_argument('-v', '--verbose', action
='count',
3730 help='print verbose messages; '
3731 'multiple -v result in more verbose messages')
3732 parser
.add_argument('-d', '--diff', action
='store_true',
3733 help='print the diff for the fixed source')
3734 parser
.add_argument('-i', '--in-place', action
='store_true',
3735 help='make changes to files in place')
3736 parser
.add_argument('--global-config', metavar
='filename',
3737 default
=DEFAULT_CONFIG
,
3738 help='path to a global pep8 config file; if this file '
3739 'does not exist then this is ignored '
3740 '(default: {})'.format(DEFAULT_CONFIG
))
3741 parser
.add_argument('--ignore-local-config', action
='store_true',
3742 help="don't look for and apply local config files; "
3743 'if not passed, defaults are updated with any '
3744 "config files in the project's root directory")
3745 parser
.add_argument('-r', '--recursive', action
='store_true',
3746 help='run recursively over directories; '
3747 'must be used with --in-place or --diff')
3748 parser
.add_argument('-j', '--jobs', type=int, metavar
='n', default
=1,
3749 help='number of parallel jobs; '
3750 'match CPU count if value is less than 1')
3751 parser
.add_argument('-p', '--pep8-passes', metavar
='n',
3752 default
=-1, type=int,
3753 help='maximum number of additional pep8 passes '
3754 '(default: infinite)')
3755 parser
.add_argument('-a', '--aggressive', action
='count', default
=0,
3756 help='enable non-whitespace changes; '
3757 'multiple -a result in more aggressive changes')
3758 parser
.add_argument('--experimental', action
='store_true',
3759 help='enable experimental fixes')
3760 parser
.add_argument('--exclude', metavar
='globs',
3761 help='exclude file/directory names that match these '
3762 'comma-separated globs')
3763 parser
.add_argument('--list-fixes', action
='store_true',
3764 help='list codes for fixes; '
3765 'used by --ignore and --select')
3766 parser
.add_argument('--ignore', metavar
='errors', default
='',
3767 help='do not fix these errors/warnings '
3768 '(default: {})'.format(DEFAULT_IGNORE
))
3769 parser
.add_argument('--select', metavar
='errors', default
='',
3770 help='fix only these errors/warnings (e.g. E4,W)')
3771 parser
.add_argument('--max-line-length', metavar
='n', default
=79, type=int,
3772 help='set maximum allowed line length '
3773 '(default: %(default)s)')
3774 parser
.add_argument('--line-range', '--range', metavar
='line',
3775 default
=None, type=int, nargs
=2,
3776 help='only fix errors found within this inclusive '
3777 'range of line numbers (e.g. 1 99); '
3778 'line numbers are indexed at 1')
3779 parser
.add_argument('--indent-size', default
=DEFAULT_INDENT_SIZE
,
3780 type=int, help=argparse
.SUPPRESS
)
3781 parser
.add_argument('--hang-closing', action
='store_true',
3782 help='hang-closing option passed to pycodestyle')
3783 parser
.add_argument('--exit-code', action
='store_true',
3784 help='change to behavior of exit code.'
3785 ' default behavior of return value, 0 is no '
3786 'differences, 1 is error exit. return 2 when'
3787 ' add this option. 2 is exists differences.')
3788 parser
.add_argument('files', nargs
='*',
3789 help="files to format or '-' for standard in")
3794 def _expand_codes(codes
, ignore_codes
):
3795 """expand to individual E/W codes"""
3801 conflicting_code
.startswith(code
)
3804 for conflicting_code
in CONFLICTING_CODES
3808 is_ignore_w503
= "W503" in ignore_codes
3809 is_ignore_w504
= "W504" in ignore_codes
3813 if is_ignore_w503
and is_ignore_w504
:
3814 ret
.update({"W1", "W2", "W3", "W505", "W6"})
3815 elif is_ignore_w503
:
3816 ret
.update({"W1", "W2", "W3", "W504", "W505", "W6"})
3818 ret
.update({"W1", "W2", "W3", "W503", "W505", "W6"})
3819 elif code
in ("W5", "W50"):
3820 if is_ignore_w503
and is_ignore_w504
:
3821 ret
.update({"W505"})
3822 elif is_ignore_w503
:
3823 ret
.update({"W504", "W505"})
3825 ret
.update({"W503", "W505"})
3826 elif not (code
in ("W503", "W504") and is_conflict
):
3832 def parse_args(arguments
, apply_config
=False):
3833 """Parse command-line options."""
3834 parser
= create_parser()
3835 args
= parser
.parse_args(arguments
)
3837 if not args
.files
and not args
.list_fixes
:
3838 parser
.exit(EXIT_CODE_ARGPARSE_ERROR
, 'incorrect number of arguments')
3840 args
.files
= [decode_filename(name
) for name
in args
.files
]
3843 parser
= read_config(args
, parser
)
3844 # prioritize settings when exist pyproject.toml's tool.autopep8 section
3846 parser_with_pyproject_toml
= read_pyproject_toml(args
, parser
)
3848 parser_with_pyproject_toml
= None
3849 if parser_with_pyproject_toml
:
3850 parser
= parser_with_pyproject_toml
3851 args
= parser
.parse_args(arguments
)
3852 args
.files
= [decode_filename(name
) for name
in args
.files
]
3854 if '-' in args
.files
:
3855 if len(args
.files
) > 1:
3857 EXIT_CODE_ARGPARSE_ERROR
,
3858 'cannot mix stdin and regular files',
3863 EXIT_CODE_ARGPARSE_ERROR
,
3864 '--diff cannot be used with standard input',
3869 EXIT_CODE_ARGPARSE_ERROR
,
3870 '--in-place cannot be used with standard input',
3875 EXIT_CODE_ARGPARSE_ERROR
,
3876 '--recursive cannot be used with standard input',
3879 if len(args
.files
) > 1 and not (args
.in_place
or args
.diff
):
3881 EXIT_CODE_ARGPARSE_ERROR
,
3882 'autopep8 only takes one filename as argument '
3883 'unless the "--in-place" or "--diff" args are used',
3886 if args
.recursive
and not (args
.in_place
or args
.diff
):
3888 EXIT_CODE_ARGPARSE_ERROR
,
3889 '--recursive must be used with --in-place or --diff',
3892 if args
.in_place
and args
.diff
:
3894 EXIT_CODE_ARGPARSE_ERROR
,
3895 '--in-place and --diff are mutually exclusive',
3898 if args
.max_line_length
<= 0:
3900 EXIT_CODE_ARGPARSE_ERROR
,
3901 '--max-line-length must be greater than 0',
3904 if args
.indent_size
<= 0:
3906 EXIT_CODE_ARGPARSE_ERROR
,
3907 '--indent-size must be greater than 0',
3911 args
.select
= _expand_codes(
3912 _split_comma_separated(args
.select
),
3913 (_split_comma_separated(args
.ignore
) if args
.ignore
else [])
3917 args
.ignore
= _split_comma_separated(args
.ignore
)
3920 conflicting_code
.startswith(ignore_code
)
3921 for ignore_code
in args
.ignore
3923 for conflicting_code
in CONFLICTING_CODES
3925 args
.ignore
.update(CONFLICTING_CODES
)
3926 elif not args
.select
:
3928 # Enable everything by default if aggressive.
3929 args
.select
= {'E', 'W1', 'W2', 'W3', 'W6'}
3931 args
.ignore
= _split_comma_separated(DEFAULT_IGNORE
)
3934 args
.exclude
= _split_comma_separated(args
.exclude
)
3939 # Do not import multiprocessing globally in case it is not supported
3941 import multiprocessing
3942 args
.jobs
= multiprocessing
.cpu_count()
3944 if args
.jobs
> 1 and not (args
.in_place
or args
.diff
):
3946 EXIT_CODE_ARGPARSE_ERROR
,
3947 'parallel jobs requires --in-place',
3951 if args
.line_range
[0] <= 0:
3953 EXIT_CODE_ARGPARSE_ERROR
,
3954 '--range must be positive numbers',
3956 if args
.line_range
[0] > args
.line_range
[1]:
3958 EXIT_CODE_ARGPARSE_ERROR
,
3959 'First value of --range should be less than or equal '
3966 def _get_normalize_options(args
, config
, section
, option_list
):
3967 for (k
, v
) in config
.items(section
):
3968 norm_opt
= k
.lstrip('-').replace('-', '_')
3969 if not option_list
.get(norm_opt
):
3971 opt_type
= option_list
[norm_opt
]
3973 if v
.strip() == "auto":
3974 # skip to special case
3976 print(f
"ignore config: {k}={v}")
3978 value
= config
.getint(section
, k
)
3979 elif opt_type
is bool:
3980 value
= config
.getboolean(section
, k
)
3982 value
= config
.get(section
, k
)
3983 yield norm_opt
, k
, value
3986 def read_config(args
, parser
):
3987 """Read both user configuration and local configuration."""
3988 config
= SafeConfigParser()
3991 if args
.verbose
and os
.path
.exists(args
.global_config
):
3992 print("read config path: {}".format(args
.global_config
))
3993 config
.read(args
.global_config
)
3995 if not args
.ignore_local_config
:
3996 parent
= tail
= args
.files
and os
.path
.abspath(
3997 os
.path
.commonprefix(args
.files
))
3999 if config
.read([os
.path
.join(parent
, fn
)
4000 for fn
in PROJECT_CONFIG
]):
4002 for fn
in PROJECT_CONFIG
:
4003 config_file
= os
.path
.join(parent
, fn
)
4004 if not os
.path
.exists(config_file
):
4007 "read config path: {}".format(
4008 os
.path
.join(parent
, fn
)
4012 (parent
, tail
) = os
.path
.split(parent
)
4015 option_list
= {o
.dest
: o
.type or type(o
.default
)
4016 for o
in parser
._actions
}
4018 for section
in ['pep8', 'pycodestyle', 'flake8']:
4019 if not config
.has_section(section
):
4021 for norm_opt
, k
, value
in _get_normalize_options(
4022 args
, config
, section
, option_list
4025 print("enable config: section={}, key={}, value={}".format(
4027 defaults
[norm_opt
] = value
4029 parser
.set_defaults(**defaults
)
4037 def read_pyproject_toml(args
, parser
):
4038 """Read pyproject.toml and load configuration."""
4039 if sys
.version_info
>= (3, 11):
4042 import tomli
as tomllib
4046 if os
.path
.exists(args
.global_config
):
4047 with
open(args
.global_config
, "rb") as fp
:
4048 config
= tomllib
.load(fp
)
4050 if not args
.ignore_local_config
:
4051 parent
= tail
= args
.files
and os
.path
.abspath(
4052 os
.path
.commonprefix(args
.files
))
4054 pyproject_toml
= os
.path
.join(parent
, "pyproject.toml")
4055 if os
.path
.exists(pyproject_toml
):
4056 with
open(pyproject_toml
, "rb") as fp
:
4057 config
= tomllib
.load(fp
)
4059 (parent
, tail
) = os
.path
.split(parent
)
4064 if config
.get("tool", {}).get("autopep8") is None:
4067 config
= config
.get("tool").get("autopep8")
4070 option_list
= {o
.dest
: o
.type or type(o
.default
)
4071 for o
in parser
._actions
}
4073 TUPLED_OPTIONS
= ("ignore", "select")
4074 for (k
, v
) in config
.items():
4075 norm_opt
= k
.lstrip('-').replace('-', '_')
4076 if not option_list
.get(norm_opt
):
4078 if type(v
) in (list, tuple) and norm_opt
in TUPLED_OPTIONS
:
4083 print("enable pyproject.toml config: "
4084 "key={}, value={}".format(k
, value
))
4085 defaults
[norm_opt
] = value
4088 # set value when exists key-value in defaults dict
4089 parser
.set_defaults(**defaults
)
4094 def _split_comma_separated(string
):
4095 """Return a set of strings."""
4096 return {text
.strip() for text
in string
.split(',') if text
.strip()}
4099 def decode_filename(filename
):
4100 """Return Unicode filename."""
4101 if isinstance(filename
, str):
4104 return filename
.decode(sys
.getfilesystemencoding())
4107 def supported_fixes():
4108 """Yield pep8 error codes that autopep8 fixes.
4110 Each item we yield is a tuple of the code followed by its
4114 yield ('E101', docstring_summary(reindent
.__doc
__))
4116 instance
= FixPEP8(filename
=None, options
=None, contents
='')
4117 for attribute
in dir(instance
):
4118 code
= re
.match('fix_([ew][0-9][0-9][0-9])', attribute
)
4121 code
.group(1).upper(),
4123 docstring_summary(getattr(instance
, attribute
).__doc
__))
4126 for (code
, function
) in sorted(global_fixes()):
4127 yield (code
.upper() + (4 - len(code
)) * ' ',
4128 re
.sub(r
'\s+', ' ', docstring_summary(function
.__doc
__)))
4130 for code
in sorted(CODE_TO_2TO3
):
4131 yield (code
.upper() + (4 - len(code
)) * ' ',
4132 re
.sub(r
'\s+', ' ', docstring_summary(fix_2to3
.__doc__
)))
4135 def docstring_summary(docstring
):
4136 """Return summary of docstring."""
4137 return docstring
.split('\n')[0] if docstring
else ''
4140 def line_shortening_rank(candidate
, indent_word
, max_line_length
,
4141 experimental
=False):
4142 """Return rank of candidate.
4144 This is for sorting candidates.
4147 if not candidate
.strip():
4151 lines
= candidate
.rstrip().split('\n')
4155 not lines
[0].lstrip().startswith('#') and
4156 lines
[0].rstrip()[-1] not in '([{'
4158 for (opening
, closing
) in ('()', '[]', '{}'):
4159 # Don't penalize empty containers that aren't split up. Things like
4160 # this "foo(\n )" aren't particularly good.
4161 opening_loc
= lines
[0].find(opening
)
4162 closing_loc
= lines
[0].find(closing
)
4163 if opening_loc
>= 0:
4164 if closing_loc
< 0 or closing_loc
!= opening_loc
+ 1:
4165 offset
= max(offset
, 1 + opening_loc
)
4167 current_longest
= max(offset
+ len(x
.strip()) for x
in lines
)
4169 rank
+= 4 * max(0, current_longest
- max_line_length
)
4173 # Too much variation in line length is ugly.
4174 rank
+= 2 * standard_deviation(len(line
) for line
in lines
)
4176 bad_staring_symbol
= {
4179 '{': '}'}.get(lines
[0][-1])
4183 bad_staring_symbol
and
4184 lines
[1].lstrip().startswith(bad_staring_symbol
)
4188 for lineno
, current_line
in enumerate(lines
):
4189 current_line
= current_line
.strip()
4191 if current_line
.startswith('#'):
4194 for bad_start
in ['.', '%', '+', '-', '/']:
4195 if current_line
.startswith(bad_start
):
4198 # Do not tolerate operators on their own line.
4199 if current_line
== bad_start
:
4203 current_line
.endswith(('.', '%', '+', '-', '/')) and
4204 "': " in current_line
4208 if current_line
.endswith(('(', '[', '{', '.')):
4209 # Avoid lonely opening. They result in longer lines.
4210 if len(current_line
) <= len(indent_word
):
4213 # Avoid the ugliness of ", (\n".
4215 current_line
.endswith('(') and
4216 current_line
[:-1].rstrip().endswith(',')
4220 # Avoid the ugliness of "something[\n" and something[index][\n.
4222 current_line
.endswith('[') and
4223 len(current_line
) > 1 and
4224 (current_line
[-2].isalnum() or current_line
[-2] in ']')
4228 # Also avoid the ugliness of "foo.\nbar"
4229 if current_line
.endswith('.'):
4232 if has_arithmetic_operator(current_line
):
4235 # Avoid breaking at unary operators.
4236 if re
.match(r
'.*[(\[{]\s*[\-\+~]$', current_line
.rstrip('\\ ')):
4239 if re
.match(r
'.*lambda\s*\*$', current_line
.rstrip('\\ ')):
4242 if current_line
.endswith(('%', '(', '[', '{')):
4245 # Try to break list comprehensions at the "for".
4246 if current_line
.startswith('for '):
4249 if current_line
.endswith('\\'):
4250 # If a line ends in \-newline, it may be part of a
4251 # multiline string. In that case, we would like to know
4252 # how long that line is without the \-newline. If it's
4253 # longer than the maximum, or has comments, then we assume
4254 # that the \-newline is an okay candidate and only
4255 # penalize it a bit.
4256 total_len
= len(current_line
)
4258 while lineno
< len(lines
):
4259 total_len
+= len(lines
[lineno
])
4261 if lines
[lineno
].lstrip().startswith('#'):
4262 total_len
= max_line_length
4265 if not lines
[lineno
].endswith('\\'):
4270 if total_len
< max_line_length
:
4273 rank
+= 100 if experimental
else 1
4275 # Prefer breaking at commas rather than colon.
4276 if ',' in current_line
and current_line
.endswith(':'):
4279 # Avoid splitting dictionaries between key and value.
4280 if current_line
.endswith(':'):
4283 rank
+= 10 * count_unbalanced_brackets(current_line
)
4288 def standard_deviation(numbers
):
4289 """Return standard deviation."""
4290 numbers
= list(numbers
)
4293 mean
= sum(numbers
) / len(numbers
)
4294 return (sum((n
- mean
) ** 2 for n
in numbers
) /
4298 def has_arithmetic_operator(line
):
4299 """Return True if line contains any arithmetic operators."""
4300 for operator
in pycodestyle
.ARITHMETIC_OP
:
4301 if operator
in line
:
4307 def count_unbalanced_brackets(line
):
4308 """Return number of unmatched open/close brackets."""
4310 for opening
, closing
in ['()', '[]', '{}']:
4311 count
+= abs(line
.count(opening
) - line
.count(closing
))
4316 def split_at_offsets(line
, offsets
):
4317 """Split line at offsets.
4319 Return list of strings.
4326 for current_offset
in sorted(offsets
):
4327 if current_offset
< len(line
) and previous_offset
!= current_offset
:
4328 result
.append(line
[previous_offset
:current_offset
].strip())
4329 previous_offset
= current_offset
4331 result
.append(line
[current_offset
:])
4336 class LineEndingWrapper(object):
4338 r
"""Replace line endings to work with sys.stdout.
4340 It seems that sys.stdout expects only '\n' as the line ending, no matter
4341 the platform. Otherwise, we get repeated line endings.
4345 def __init__(self
, output
):
4346 self
.__output
= output
4349 self
.__output
.write(s
.replace('\r\n', '\n').replace('\r', '\n'))
4352 self
.__output
.flush()
4355 def match_file(filename
, exclude
):
4356 """Return True if file is okay for modifying/recursing."""
4357 base_name
= os
.path
.basename(filename
)
4359 if base_name
.startswith('.'):
4362 for pattern
in exclude
:
4363 if fnmatch
.fnmatch(base_name
, pattern
):
4365 if fnmatch
.fnmatch(filename
, pattern
):
4368 if not os
.path
.isdir(filename
) and not is_python_file(filename
):
4374 def find_files(filenames
, recursive
, exclude
):
4375 """Yield filenames."""
4377 name
= filenames
.pop(0)
4378 if recursive
and os
.path
.isdir(name
):
4379 for root
, directories
, children
in os
.walk(name
):
4380 filenames
+= [os
.path
.join(root
, f
) for f
in children
4381 if match_file(os
.path
.join(root
, f
),
4383 directories
[:] = [d
for d
in directories
4384 if match_file(os
.path
.join(root
, d
),
4387 is_exclude_match
= False
4388 for pattern
in exclude
:
4389 if fnmatch
.fnmatch(name
, pattern
):
4390 is_exclude_match
= True
4392 if not is_exclude_match
:
4396 def _fix_file(parameters
):
4397 """Helper function for optionally running fix_file() in parallel."""
4398 if parameters
[1].verbose
:
4399 print('[file:{}]'.format(parameters
[0]), file=sys
.stderr
)
4401 return fix_file(*parameters
)
4402 except IOError as error
:
4403 print(str(error
), file=sys
.stderr
)
4407 def fix_multiple_files(filenames
, options
, output
=None):
4408 """Fix list of files.
4410 Optionally fix files recursively.
4414 filenames
= find_files(filenames
, options
.recursive
, options
.exclude
)
4415 if options
.jobs
> 1:
4416 import multiprocessing
4417 pool
= multiprocessing
.Pool(options
.jobs
)
4419 for name
in filenames
:
4420 ret
= pool
.apply_async(_fix_file
, ((name
, options
),))
4426 sys
.stdout
.write(r
.get().decode())
4428 results
.extend([x
.get() for x
in rets
if x
is not None])
4430 for name
in filenames
:
4431 ret
= _fix_file((name
, options
, output
))
4437 elif options
.in_place
:
4440 original_source
= readlines_from_file(name
)
4441 if "".join(original_source
).splitlines() != ret
.splitlines():
4446 def is_python_file(filename
):
4447 """Return True if filename is Python file."""
4448 if filename
.endswith('.py'):
4452 with
open_with_encoding(
4454 limit_byte_check
=MAX_PYTHON_FILE_DETECTION_BYTES
) as f
:
4455 text
= f
.read(MAX_PYTHON_FILE_DETECTION_BYTES
)
4458 first_line
= text
.splitlines()[0]
4459 except (IOError, IndexError):
4462 if not PYTHON_SHEBANG_REGEX
.match(first_line
):
4468 def is_probably_part_of_multiline(line
):
4469 """Return True if line is likely part of a multiline string.
4471 When multiline strings are involved, pep8 reports the error as being
4472 at the start of the multiline string, which doesn't work for us.
4478 line
.rstrip().endswith('\\')
4482 def wrap_output(output
, encoding
):
4483 """Return output with specified encoding."""
4484 return codecs
.getwriter(encoding
)(output
.buffer
4485 if hasattr(output
, 'buffer')
4490 """Return preferred encoding."""
4491 return locale
.getpreferredencoding() or sys
.getdefaultencoding()
4494 def main(argv
=None, apply_config
=True):
4495 """Command-line entry."""
4500 # Exit on broken pipe.
4501 signal
.signal(signal
.SIGPIPE
, signal
.SIG_DFL
)
4502 except AttributeError: # pragma: no cover
4503 # SIGPIPE is not available on Windows.
4507 args
= parse_args(argv
[1:], apply_config
=apply_config
)
4510 for code
, description
in sorted(supported_fixes()):
4511 print('{code} - {description}'.format(
4512 code
=code
, description
=description
))
4515 if args
.files
== ['-']:
4516 assert not args
.in_place
4518 encoding
= sys
.stdin
.encoding
or get_encoding()
4519 read_stdin
= sys
.stdin
.read()
4520 fixed_stdin
= fix_code(read_stdin
, args
, encoding
=encoding
)
4522 # LineEndingWrapper is unnecessary here due to the symmetry between
4523 # standard in and standard out.
4524 wrap_output(sys
.stdout
, encoding
=encoding
).write(fixed_stdin
)
4526 if hash(read_stdin
) != hash(fixed_stdin
):
4528 return EXIT_CODE_EXISTS_DIFF
4530 if args
.in_place
or args
.diff
:
4531 args
.files
= list(set(args
.files
))
4533 assert len(args
.files
) == 1
4534 assert not args
.recursive
4536 results
= fix_multiple_files(args
.files
, args
, sys
.stdout
)
4538 ret
= any([len(ret
) != 0 for ret
in results
])
4540 # with in-place option
4541 ret
= any([ret
is not None for ret
in results
])
4542 if args
.exit_code
and ret
:
4543 return EXIT_CODE_EXISTS_DIFF
4545 return EXIT_CODE_ERROR
4546 except KeyboardInterrupt:
4547 return EXIT_CODE_ERROR
# pragma: no cover
4550 class CachedTokenizer(object):
4552 """A one-element cache around tokenize.generate_tokens().
4554 Original code written by Ned Batchelder, in coverage.py.
4559 self
.last_text
= None
4560 self
.last_tokens
= None
4562 def generate_tokens(self
, text
):
4563 """A stand-in for tokenize.generate_tokens()."""
4564 if text
!= self
.last_text
:
4565 string_io
= io
.StringIO(text
)
4566 self
.last_tokens
= list(
4567 tokenize
.generate_tokens(string_io
.readline
)
4569 self
.last_text
= text
4570 return self
.last_tokens
4573 _cached_tokenizer
= CachedTokenizer()
4574 generate_tokens
= _cached_tokenizer
.generate_tokens
4577 if __name__
== '__main__':