]> crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/autopep8.py
Reorganización de directorios
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / autopep8.py
... / ...
CommitLineData
1#!/usr/bin/env python
2
3# Copyright (C) 2010-2011 Hideo Hattori
4# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
5# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
6#
7# Permission is hereby granted, free of charge, to any person obtaining
8# a copy of this software and associated documentation files (the
9# "Software"), to deal in the Software without restriction, including
10# without limitation the rights to use, copy, modify, merge, publish,
11# distribute, sublicense, and/or sell copies of the Software, and to
12# permit persons to whom the Software is furnished to do so, subject to
13# the following conditions:
14#
15# The above copyright notice and this permission notice shall be
16# included in all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25# SOFTWARE.
26
27# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
28# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
29#
30# Permission is hereby granted, free of charge, to any person
31# obtaining a copy of this software and associated documentation files
32# (the "Software"), to deal in the Software without restriction,
33# including without limitation the rights to use, copy, modify, merge,
34# publish, distribute, sublicense, and/or sell copies of the Software,
35# and to permit persons to whom the Software is furnished to do so,
36# subject to the following conditions:
37#
38# The above copyright notice and this permission notice shall be
39# included in all copies or substantial portions of the Software.
40#
41# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
45# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
48# SOFTWARE.
49
50"""Automatically formats Python code to conform to the PEP 8 style guide.
51
52Fixes that only need be done once can be added by adding a function of the form
53"fix_<code>(source)" to this module. They should return the fixed source code.
54These fixes are picked up by apply_global_fixes().
55
56Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
57class documentation for more information.
58
59"""
60
61from __future__ import absolute_import
62from __future__ import division
63from __future__ import print_function
64from __future__ import unicode_literals
65
66import argparse
67import codecs
68import collections
69import copy
70import difflib
71import fnmatch
72import inspect
73import io
74import itertools
75import keyword
76import locale
77import os
78import re
79import signal
80import sys
81import textwrap
82import token
83import tokenize
84import warnings
85import ast
86from configparser import ConfigParser as SafeConfigParser, Error
87
88import pycodestyle
89from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX
90
91
92__version__ = '2.0.4'
93
94
95CR = '\r'
96LF = '\n'
97CRLF = '\r\n'
98
99
100PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
101LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
102COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
103COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
104BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
105STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
106DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
107ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
108DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
109
110EXIT_CODE_OK = 0
111EXIT_CODE_ERROR = 1
112EXIT_CODE_EXISTS_DIFF = 2
113EXIT_CODE_ARGPARSE_ERROR = 99
114
115# For generating line shortening candidates.
116SHORTEN_OPERATOR_GROUPS = frozenset([
117 frozenset([',']),
118 frozenset(['%']),
119 frozenset([',', '(', '[', '{']),
120 frozenset(['%', '(', '[', '{']),
121 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
122 frozenset(['%', '+', '-', '*', '/', '//']),
123])
124
125
126DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
127DEFAULT_INDENT_SIZE = 4
128# these fixes conflict with each other, if the `--ignore` setting causes both
129# to be enabled, disable both of them
130CONFLICTING_CODES = ('W503', 'W504')
131
132# W602 is handled separately due to the need to avoid "with_traceback".
133CODE_TO_2TO3 = {
134 'E231': ['ws_comma'],
135 'E721': ['idioms'],
136 'W690': ['apply',
137 'except',
138 'exitfunc',
139 'numliterals',
140 'operator',
141 'paren',
142 'reduce',
143 'renames',
144 'standarderror',
145 'sys_exc',
146 'throw',
147 'tuple_params',
148 'xreadlines']}
149
150
151if sys.platform == 'win32': # pragma: no cover
152 DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
153else:
154 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
155 os.path.expanduser('~/.config'),
156 'pycodestyle')
157# fallback, use .pep8
158if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover
159 if sys.platform == 'win32':
160 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
161 else:
162 DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
163PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
164
165
166MAX_PYTHON_FILE_DETECTION_BYTES = 1024
167
168
169def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
170 """Return opened file with a specific encoding."""
171 if not encoding:
172 encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
173
174 return io.open(filename, mode=mode, encoding=encoding,
175 newline='') # Preserve line endings
176
177
178def detect_encoding(filename, limit_byte_check=-1):
179 """Return file encoding."""
180 try:
181 with open(filename, 'rb') as input_file:
182 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
183 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
184
185 with open_with_encoding(filename, encoding=encoding) as test_file:
186 test_file.read(limit_byte_check)
187
188 return encoding
189 except (LookupError, SyntaxError, UnicodeDecodeError):
190 return 'latin-1'
191
192
193def readlines_from_file(filename):
194 """Return contents of file."""
195 with open_with_encoding(filename) as input_file:
196 return input_file.readlines()
197
198
199def extended_blank_lines(logical_line,
200 blank_lines,
201 blank_before,
202 indent_level,
203 previous_logical):
204 """Check for missing blank lines after class declaration."""
205 if previous_logical.startswith('def '):
206 if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
207 yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
208 elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
209 # Missing blank line between class docstring and method declaration.
210 if (
211 indent_level and
212 not blank_lines and
213 not blank_before and
214 logical_line.startswith(('def ')) and
215 '(self' in logical_line
216 ):
217 yield (0, 'E301 expected 1 blank line, found 0')
218
219
220pycodestyle.register_check(extended_blank_lines)
221
222
223def continued_indentation(logical_line, tokens, indent_level, hang_closing,
224 indent_char, noqa):
225 """Override pycodestyle's function to provide indentation information."""
226 first_row = tokens[0][2][0]
227 nrows = 1 + tokens[-1][2][0] - first_row
228 if noqa or nrows == 1:
229 return
230
231 # indent_next tells us whether the next block is indented. Assuming
232 # that it is indented by 4 spaces, then we should not allow 4-space
233 # indents on the final continuation line. In turn, some other
234 # indents are allowed to have an extra 4 spaces.
235 indent_next = logical_line.endswith(':')
236
237 row = depth = 0
238 valid_hangs = (
239 (DEFAULT_INDENT_SIZE,)
240 if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
241 2 * DEFAULT_INDENT_SIZE)
242 )
243
244 # Remember how many brackets were opened on each line.
245 parens = [0] * nrows
246
247 # Relative indents of physical lines.
248 rel_indent = [0] * nrows
249
250 # For each depth, collect a list of opening rows.
251 open_rows = [[0]]
252 # For each depth, memorize the hanging indentation.
253 hangs = [None]
254
255 # Visual indents.
256 indent_chances = {}
257 last_indent = tokens[0][2]
258 indent = [last_indent[1]]
259
260 last_token_multiline = None
261 line = None
262 last_line = ''
263 last_line_begins_with_multiline = False
264 for token_type, text, start, end, line in tokens:
265
266 newline = row < start[0] - first_row
267 if newline:
268 row = start[0] - first_row
269 newline = (not last_token_multiline and
270 token_type not in (tokenize.NL, tokenize.NEWLINE))
271 last_line_begins_with_multiline = last_token_multiline
272
273 if newline:
274 # This is the beginning of a continuation line.
275 last_indent = start
276
277 # Record the initial indent.
278 rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
279
280 # Identify closing bracket.
281 close_bracket = (token_type == tokenize.OP and text in ']})')
282
283 # Is the indent relative to an opening bracket line?
284 for open_row in reversed(open_rows[depth]):
285 hang = rel_indent[row] - rel_indent[open_row]
286 hanging_indent = hang in valid_hangs
287 if hanging_indent:
288 break
289 if hangs[depth]:
290 hanging_indent = (hang == hangs[depth])
291
292 visual_indent = (not close_bracket and hang > 0 and
293 indent_chances.get(start[1]))
294
295 if close_bracket and indent[depth]:
296 # Closing bracket for visual indent.
297 if start[1] != indent[depth]:
298 yield (start, 'E124 {}'.format(indent[depth]))
299 elif close_bracket and not hang:
300 # closing bracket matches indentation of opening bracket's line
301 if hang_closing:
302 yield (start, 'E133 {}'.format(indent[depth]))
303 elif indent[depth] and start[1] < indent[depth]:
304 if visual_indent is not True:
305 # Visual indent is broken.
306 yield (start, 'E128 {}'.format(indent[depth]))
307 elif (hanging_indent or
308 (indent_next and
309 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
310 # Hanging indent is verified.
311 if close_bracket and not hang_closing:
312 yield (start, 'E123 {}'.format(indent_level +
313 rel_indent[open_row]))
314 hangs[depth] = hang
315 elif visual_indent is True:
316 # Visual indent is verified.
317 indent[depth] = start[1]
318 elif visual_indent in (text, str):
319 # Ignore token lined up with matching one from a previous line.
320 pass
321 else:
322 one_indented = (indent_level + rel_indent[open_row] +
323 DEFAULT_INDENT_SIZE)
324 # Indent is broken.
325 if hang <= 0:
326 error = ('E122', one_indented)
327 elif indent[depth]:
328 error = ('E127', indent[depth])
329 elif not close_bracket and hangs[depth]:
330 error = ('E131', one_indented)
331 elif hang > DEFAULT_INDENT_SIZE:
332 error = ('E126', one_indented)
333 else:
334 hangs[depth] = hang
335 error = ('E121', one_indented)
336
337 yield (start, '{} {}'.format(*error))
338
339 # Look for visual indenting.
340 if (
341 parens[row] and
342 token_type not in (tokenize.NL, tokenize.COMMENT) and
343 not indent[depth]
344 ):
345 indent[depth] = start[1]
346 indent_chances[start[1]] = True
347 # Deal with implicit string concatenation.
348 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
349 text in ('u', 'ur', 'b', 'br')):
350 indent_chances[start[1]] = str
351 # Special case for the "if" statement because len("if (") is equal to
352 # 4.
353 elif not indent_chances and not row and not depth and text == 'if':
354 indent_chances[end[1] + 1] = True
355 elif text == ':' and line[end[1]:].isspace():
356 open_rows[depth].append(row)
357
358 # Keep track of bracket depth.
359 if token_type == tokenize.OP:
360 if text in '([{':
361 depth += 1
362 indent.append(0)
363 hangs.append(None)
364 if len(open_rows) == depth:
365 open_rows.append([])
366 open_rows[depth].append(row)
367 parens[row] += 1
368 elif text in ')]}' and depth > 0:
369 # Parent indents should not be more than this one.
370 prev_indent = indent.pop() or last_indent[1]
371 hangs.pop()
372 for d in range(depth):
373 if indent[d] > prev_indent:
374 indent[d] = 0
375 for ind in list(indent_chances):
376 if ind >= prev_indent:
377 del indent_chances[ind]
378 del open_rows[depth + 1:]
379 depth -= 1
380 if depth:
381 indent_chances[indent[depth]] = True
382 for idx in range(row, -1, -1):
383 if parens[idx]:
384 parens[idx] -= 1
385 break
386 assert len(indent) == depth + 1
387 if (
388 start[1] not in indent_chances and
389 # This is for purposes of speeding up E121 (GitHub #90).
390 not last_line.rstrip().endswith(',')
391 ):
392 # Allow to line up tokens.
393 indent_chances[start[1]] = text
394
395 last_token_multiline = (start[0] != end[0])
396 if last_token_multiline:
397 rel_indent[end[0] - first_row] = rel_indent[row]
398
399 last_line = line
400
401 if (
402 indent_next and
403 not last_line_begins_with_multiline and
404 pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
405 ):
406 pos = (start[0], indent[0] + 4)
407 desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
408 if visual_indent:
409 yield (pos, 'E129 {}'.format(desired_indent))
410 else:
411 yield (pos, 'E125 {}'.format(desired_indent))
412
413
414del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
415pycodestyle.register_check(continued_indentation)
416
417
418class FixPEP8(object):
419
420 """Fix invalid code.
421
422 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
423 automatically.
424
425 The fixer method can take either one or two arguments (in addition to
426 self). The first argument is "result", which is the error information from
427 pycodestyle. The second argument, "logical", is required only for
428 logical-line fixes.
429
430 The fixer method can return the list of modified lines or None. An empty
431 list would mean that no changes were made. None would mean that only the
432 line reported in the pycodestyle error was modified. Note that the modified
433 line numbers that are returned are indexed at 1. This typically would
434 correspond with the line number reported in the pycodestyle error
435 information.
436
437 [fixed method list]
438 - e111,e114,e115,e116
439 - e121,e122,e123,e124,e125,e126,e127,e128,e129
440 - e201,e202,e203
441 - e211
442 - e221,e222,e223,e224,e225
443 - e231
444 - e251,e252
445 - e261,e262
446 - e271,e272,e273,e274,e275
447 - e301,e302,e303,e304,e305,e306
448 - e401,e402
449 - e502
450 - e701,e702,e703,e704
451 - e711,e712,e713,e714
452 - e722
453 - e731
454 - w291
455 - w503,504
456
457 """
458
459 def __init__(self, filename,
460 options,
461 contents=None,
462 long_line_ignore_cache=None):
463 self.filename = filename
464 if contents is None:
465 self.source = readlines_from_file(filename)
466 else:
467 sio = io.StringIO(contents)
468 self.source = sio.readlines()
469 self.options = options
470 self.indent_word = _get_indentword(''.join(self.source))
471
472 # collect imports line
473 self.imports = {}
474 for i, line in enumerate(self.source):
475 if (line.find("import ") == 0 or line.find("from ") == 0) and \
476 line not in self.imports:
477 # collect only import statements that first appeared
478 self.imports[line] = i
479
480 self.long_line_ignore_cache = (
481 set() if long_line_ignore_cache is None
482 else long_line_ignore_cache)
483
484 # Many fixers are the same even though pycodestyle categorizes them
485 # differently.
486 self.fix_e115 = self.fix_e112
487 self.fix_e121 = self._fix_reindent
488 self.fix_e122 = self._fix_reindent
489 self.fix_e123 = self._fix_reindent
490 self.fix_e124 = self._fix_reindent
491 self.fix_e126 = self._fix_reindent
492 self.fix_e127 = self._fix_reindent
493 self.fix_e128 = self._fix_reindent
494 self.fix_e129 = self._fix_reindent
495 self.fix_e133 = self.fix_e131
496 self.fix_e202 = self.fix_e201
497 self.fix_e203 = self.fix_e201
498 self.fix_e211 = self.fix_e201
499 self.fix_e221 = self.fix_e271
500 self.fix_e222 = self.fix_e271
501 self.fix_e223 = self.fix_e271
502 self.fix_e226 = self.fix_e225
503 self.fix_e227 = self.fix_e225
504 self.fix_e228 = self.fix_e225
505 self.fix_e241 = self.fix_e271
506 self.fix_e242 = self.fix_e224
507 self.fix_e252 = self.fix_e225
508 self.fix_e261 = self.fix_e262
509 self.fix_e272 = self.fix_e271
510 self.fix_e273 = self.fix_e271
511 self.fix_e274 = self.fix_e271
512 self.fix_e275 = self.fix_e271
513 self.fix_e306 = self.fix_e301
514 self.fix_e501 = (
515 self.fix_long_line_logically if
516 options and (options.aggressive >= 2 or options.experimental) else
517 self.fix_long_line_physically)
518 self.fix_e703 = self.fix_e702
519 self.fix_w292 = self.fix_w291
520 self.fix_w293 = self.fix_w291
521
522 def _fix_source(self, results):
523 try:
524 (logical_start, logical_end) = _find_logical(self.source)
525 logical_support = True
526 except (SyntaxError, tokenize.TokenError): # pragma: no cover
527 logical_support = False
528
529 completed_lines = set()
530 for result in sorted(results, key=_priority_key):
531 if result['line'] in completed_lines:
532 continue
533
534 fixed_methodname = 'fix_' + result['id'].lower()
535 if hasattr(self, fixed_methodname):
536 fix = getattr(self, fixed_methodname)
537
538 line_index = result['line'] - 1
539 original_line = self.source[line_index]
540
541 is_logical_fix = len(_get_parameters(fix)) > 2
542 if is_logical_fix:
543 logical = None
544 if logical_support:
545 logical = _get_logical(self.source,
546 result,
547 logical_start,
548 logical_end)
549 if logical and set(range(
550 logical[0][0] + 1,
551 logical[1][0] + 1)).intersection(
552 completed_lines):
553 continue
554
555 modified_lines = fix(result, logical)
556 else:
557 modified_lines = fix(result)
558
559 if modified_lines is None:
560 # Force logical fixes to report what they modified.
561 assert not is_logical_fix
562
563 if self.source[line_index] == original_line:
564 modified_lines = []
565
566 if modified_lines:
567 completed_lines.update(modified_lines)
568 elif modified_lines == []: # Empty list means no fix
569 if self.options.verbose >= 2:
570 print(
571 '---> Not fixing {error} on line {line}'.format(
572 error=result['id'], line=result['line']),
573 file=sys.stderr)
574 else: # We assume one-line fix when None.
575 completed_lines.add(result['line'])
576 else:
577 if self.options.verbose >= 3:
578 print(
579 "---> '{}' is not defined.".format(fixed_methodname),
580 file=sys.stderr)
581
582 info = result['info'].strip()
583 print('---> {}:{}:{}:{}'.format(self.filename,
584 result['line'],
585 result['column'],
586 info),
587 file=sys.stderr)
588
589 def fix(self):
590 """Return a version of the source code with PEP 8 violations fixed."""
591 pep8_options = {
592 'ignore': self.options.ignore,
593 'select': self.options.select,
594 'max_line_length': self.options.max_line_length,
595 'hang_closing': self.options.hang_closing,
596 }
597 results = _execute_pep8(pep8_options, self.source)
598
599 if self.options.verbose:
600 progress = {}
601 for r in results:
602 if r['id'] not in progress:
603 progress[r['id']] = set()
604 progress[r['id']].add(r['line'])
605 print('---> {n} issue(s) to fix {progress}'.format(
606 n=len(results), progress=progress), file=sys.stderr)
607
608 if self.options.line_range:
609 start, end = self.options.line_range
610 results = [r for r in results
611 if start <= r['line'] <= end]
612
613 self._fix_source(filter_results(source=''.join(self.source),
614 results=results,
615 aggressive=self.options.aggressive))
616
617 if self.options.line_range:
618 # If number of lines has changed then change line_range.
619 count = sum(sline.count('\n')
620 for sline in self.source[start - 1:end])
621 self.options.line_range[1] = start + count - 1
622
623 return ''.join(self.source)
624
625 def _fix_reindent(self, result):
626 """Fix a badly indented line.
627
628 This is done by adding or removing from its initial indent only.
629
630 """
631 num_indent_spaces = int(result['info'].split()[1])
632 line_index = result['line'] - 1
633 target = self.source[line_index]
634
635 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
636
637 def fix_e112(self, result):
638 """Fix under-indented comments."""
639 line_index = result['line'] - 1
640 target = self.source[line_index]
641
642 if not target.lstrip().startswith('#'):
643 # Don't screw with invalid syntax.
644 return []
645
646 self.source[line_index] = self.indent_word + target
647
648 def fix_e113(self, result):
649 """Fix unexpected indentation."""
650 line_index = result['line'] - 1
651 target = self.source[line_index]
652 indent = _get_indentation(target)
653 stripped = target.lstrip()
654 self.source[line_index] = indent[1:] + stripped
655
656 def fix_e116(self, result):
657 """Fix over-indented comments."""
658 line_index = result['line'] - 1
659 target = self.source[line_index]
660
661 indent = _get_indentation(target)
662 stripped = target.lstrip()
663
664 if not stripped.startswith('#'):
665 # Don't screw with invalid syntax.
666 return []
667
668 self.source[line_index] = indent[1:] + stripped
669
670 def fix_e117(self, result):
671 """Fix over-indented."""
672 line_index = result['line'] - 1
673 target = self.source[line_index]
674
675 indent = _get_indentation(target)
676 if indent == '\t':
677 return []
678
679 stripped = target.lstrip()
680
681 self.source[line_index] = indent[1:] + stripped
682
683 def fix_e125(self, result):
684 """Fix indentation undistinguish from the next logical line."""
685 num_indent_spaces = int(result['info'].split()[1])
686 line_index = result['line'] - 1
687 target = self.source[line_index]
688
689 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
690 indent = len(_get_indentation(target))
691 modified_lines = []
692
693 while len(_get_indentation(self.source[line_index])) >= indent:
694 self.source[line_index] = (' ' * spaces_to_add +
695 self.source[line_index])
696 modified_lines.append(1 + line_index) # Line indexed at 1.
697 line_index -= 1
698
699 return modified_lines
700
701 def fix_e131(self, result):
702 """Fix indentation undistinguish from the next logical line."""
703 num_indent_spaces = int(result['info'].split()[1])
704 line_index = result['line'] - 1
705 target = self.source[line_index]
706
707 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
708
709 indent_length = len(_get_indentation(target))
710 spaces_to_add = num_indent_spaces - indent_length
711 if num_indent_spaces == 0 and indent_length == 0:
712 spaces_to_add = 4
713
714 if spaces_to_add >= 0:
715 self.source[line_index] = (' ' * spaces_to_add +
716 self.source[line_index])
717 else:
718 offset = abs(spaces_to_add)
719 self.source[line_index] = self.source[line_index][offset:]
720
721 def fix_e201(self, result):
722 """Remove extraneous whitespace."""
723 line_index = result['line'] - 1
724 target = self.source[line_index]
725 offset = result['column'] - 1
726
727 fixed = fix_whitespace(target,
728 offset=offset,
729 replacement='')
730
731 self.source[line_index] = fixed
732
733 def fix_e224(self, result):
734 """Remove extraneous whitespace around operator."""
735 target = self.source[result['line'] - 1]
736 offset = result['column'] - 1
737 fixed = target[:offset] + target[offset:].replace('\t', ' ')
738 self.source[result['line'] - 1] = fixed
739
740 def fix_e225(self, result):
741 """Fix missing whitespace around operator."""
742 target = self.source[result['line'] - 1]
743 offset = result['column'] - 1
744 fixed = target[:offset] + ' ' + target[offset:]
745
746 # Only proceed if non-whitespace characters match.
747 # And make sure we don't break the indentation.
748 if (
749 fixed.replace(' ', '') == target.replace(' ', '') and
750 _get_indentation(fixed) == _get_indentation(target)
751 ):
752 self.source[result['line'] - 1] = fixed
753 error_code = result.get('id', 0)
754 try:
755 ts = generate_tokens(fixed)
756 except (SyntaxError, tokenize.TokenError):
757 return
758 if not check_syntax(fixed.lstrip()):
759 return
760 try:
761 _missing_whitespace = (
762 pycodestyle.missing_whitespace_around_operator
763 )
764 except AttributeError:
765 # pycodestyle >= 2.11.0
766 _missing_whitespace = pycodestyle.missing_whitespace
767 errors = list(_missing_whitespace(fixed, ts))
768 for e in reversed(errors):
769 if error_code != e[1].split()[0]:
770 continue
771 offset = e[0][1]
772 fixed = fixed[:offset] + ' ' + fixed[offset:]
773 self.source[result['line'] - 1] = fixed
774 else:
775 return []
776
777 def fix_e231(self, result):
778 """Add missing whitespace."""
779 line_index = result['line'] - 1
780 target = self.source[line_index]
781 offset = result['column']
782 fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
783 self.source[line_index] = fixed
784
785 def fix_e251(self, result):
786 """Remove whitespace around parameter '=' sign."""
787 line_index = result['line'] - 1
788 target = self.source[line_index]
789
790 # This is necessary since pycodestyle sometimes reports columns that
791 # goes past the end of the physical line. This happens in cases like,
792 # foo(bar\n=None)
793 c = min(result['column'] - 1,
794 len(target) - 1)
795
796 if target[c].strip():
797 fixed = target
798 else:
799 fixed = target[:c].rstrip() + target[c:].lstrip()
800
801 # There could be an escaped newline
802 #
803 # def foo(a=\
804 # 1)
805 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
806 self.source[line_index] = fixed.rstrip('\n\r \t\\')
807 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
808 return [line_index + 1, line_index + 2] # Line indexed at 1
809
810 self.source[result['line'] - 1] = fixed
811
812 def fix_e262(self, result):
813 """Fix spacing after inline comment hash."""
814 target = self.source[result['line'] - 1]
815 offset = result['column']
816
817 code = target[:offset].rstrip(' \t#')
818 comment = target[offset:].lstrip(' \t#')
819
820 fixed = code + (' # ' + comment if comment.strip() else '\n')
821
822 self.source[result['line'] - 1] = fixed
823
824 def fix_e265(self, result):
825 """Fix spacing after block comment hash."""
826 target = self.source[result['line'] - 1]
827
828 indent = _get_indentation(target)
829 line = target.lstrip(' \t')
830 pos = next((index for index, c in enumerate(line) if c != '#'))
831 hashes = line[:pos]
832 comment = line[pos:].lstrip(' \t')
833
834 # Ignore special comments, even in the middle of the file.
835 if comment.startswith('!'):
836 return
837
838 fixed = indent + hashes + (' ' + comment if comment.strip() else '\n')
839
840 self.source[result['line'] - 1] = fixed
841
842 def fix_e266(self, result):
843 """Fix too many block comment hashes."""
844 target = self.source[result['line'] - 1]
845
846 # Leave stylistic outlined blocks alone.
847 if target.strip().endswith('#'):
848 return
849
850 indentation = _get_indentation(target)
851 fixed = indentation + '# ' + target.lstrip('# \t')
852
853 self.source[result['line'] - 1] = fixed
854
855 def fix_e271(self, result):
856 """Fix extraneous whitespace around keywords."""
857 line_index = result['line'] - 1
858 target = self.source[line_index]
859 offset = result['column'] - 1
860
861 fixed = fix_whitespace(target,
862 offset=offset,
863 replacement=' ')
864
865 if fixed == target:
866 return []
867 else:
868 self.source[line_index] = fixed
869
870 def fix_e301(self, result):
871 """Add missing blank line."""
872 cr = '\n'
873 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
874
875 def fix_e302(self, result):
876 """Add missing 2 blank lines."""
877 add_linenum = 2 - int(result['info'].split()[-1])
878 offset = 1
879 if self.source[result['line'] - 2].strip() == "\\":
880 offset = 2
881 cr = '\n' * add_linenum
882 self.source[result['line'] - offset] = (
883 cr + self.source[result['line'] - offset]
884 )
885
886 def fix_e303(self, result):
887 """Remove extra blank lines."""
888 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
889 delete_linenum = max(1, delete_linenum)
890
891 # We need to count because pycodestyle reports an offset line number if
892 # there are comments.
893 cnt = 0
894 line = result['line'] - 2
895 modified_lines = []
896 while cnt < delete_linenum and line >= 0:
897 if not self.source[line].strip():
898 self.source[line] = ''
899 modified_lines.append(1 + line) # Line indexed at 1
900 cnt += 1
901 line -= 1
902
903 return modified_lines
904
905 def fix_e304(self, result):
906 """Remove blank line following function decorator."""
907 line = result['line'] - 2
908 if not self.source[line].strip():
909 self.source[line] = ''
910
911 def fix_e305(self, result):
912 """Add missing 2 blank lines after end of function or class."""
913 add_delete_linenum = 2 - int(result['info'].split()[-1])
914 cnt = 0
915 offset = result['line'] - 2
916 modified_lines = []
917 if add_delete_linenum < 0:
918 # delete cr
919 add_delete_linenum = abs(add_delete_linenum)
920 while cnt < add_delete_linenum and offset >= 0:
921 if not self.source[offset].strip():
922 self.source[offset] = ''
923 modified_lines.append(1 + offset) # Line indexed at 1
924 cnt += 1
925 offset -= 1
926 else:
927 # add cr
928 cr = '\n'
929 # check comment line
930 while True:
931 if offset < 0:
932 break
933 line = self.source[offset].lstrip()
934 if not line:
935 break
936 if line[0] != '#':
937 break
938 offset -= 1
939 offset += 1
940 self.source[offset] = cr + self.source[offset]
941 modified_lines.append(1 + offset) # Line indexed at 1.
942 return modified_lines
943
944 def fix_e401(self, result):
945 """Put imports on separate lines."""
946 line_index = result['line'] - 1
947 target = self.source[line_index]
948 offset = result['column'] - 1
949
950 if not target.lstrip().startswith('import'):
951 return []
952
953 indentation = re.split(pattern=r'\bimport\b',
954 string=target, maxsplit=1)[0]
955 fixed = (target[:offset].rstrip('\t ,') + '\n' +
956 indentation + 'import ' + target[offset:].lstrip('\t ,'))
957 self.source[line_index] = fixed
958
959 def fix_e402(self, result):
960 (line_index, offset, target) = get_index_offset_contents(result,
961 self.source)
962 for i in range(1, 100):
963 line = "".join(self.source[line_index:line_index+i])
964 try:
965 generate_tokens("".join(line))
966 except (SyntaxError, tokenize.TokenError):
967 continue
968 break
969 if not (target in self.imports and self.imports[target] != line_index):
970 mod_offset = get_module_imports_on_top_of_file(self.source,
971 line_index)
972 self.source[mod_offset] = line + self.source[mod_offset]
973 for offset in range(i):
974 self.source[line_index+offset] = ''
975
976 def fix_long_line_logically(self, result, logical):
977 """Try to make lines fit within --max-line-length characters."""
978 if (
979 not logical or
980 len(logical[2]) == 1 or
981 self.source[result['line'] - 1].lstrip().startswith('#')
982 ):
983 return self.fix_long_line_physically(result)
984
985 start_line_index = logical[0][0]
986 end_line_index = logical[1][0]
987 logical_lines = logical[2]
988
989 previous_line = get_item(self.source, start_line_index - 1, default='')
990 next_line = get_item(self.source, end_line_index + 1, default='')
991
992 single_line = join_logical_line(''.join(logical_lines))
993
994 try:
995 fixed = self.fix_long_line(
996 target=single_line,
997 previous_line=previous_line,
998 next_line=next_line,
999 original=''.join(logical_lines))
1000 except (SyntaxError, tokenize.TokenError):
1001 return self.fix_long_line_physically(result)
1002
1003 if fixed:
1004 for line_index in range(start_line_index, end_line_index + 1):
1005 self.source[line_index] = ''
1006 self.source[start_line_index] = fixed
1007 return range(start_line_index + 1, end_line_index + 1)
1008
1009 return []
1010
1011 def fix_long_line_physically(self, result):
1012 """Try to make lines fit within --max-line-length characters."""
1013 line_index = result['line'] - 1
1014 target = self.source[line_index]
1015
1016 previous_line = get_item(self.source, line_index - 1, default='')
1017 next_line = get_item(self.source, line_index + 1, default='')
1018
1019 try:
1020 fixed = self.fix_long_line(
1021 target=target,
1022 previous_line=previous_line,
1023 next_line=next_line,
1024 original=target)
1025 except (SyntaxError, tokenize.TokenError):
1026 return []
1027
1028 if fixed:
1029 self.source[line_index] = fixed
1030 return [line_index + 1]
1031
1032 return []
1033
1034 def fix_long_line(self, target, previous_line,
1035 next_line, original):
1036 cache_entry = (target, previous_line, next_line)
1037 if cache_entry in self.long_line_ignore_cache:
1038 return []
1039
1040 if target.lstrip().startswith('#'):
1041 if self.options.aggressive:
1042 # Wrap commented lines.
1043 return shorten_comment(
1044 line=target,
1045 max_line_length=self.options.max_line_length,
1046 last_comment=not next_line.lstrip().startswith('#'))
1047 return []
1048
1049 fixed = get_fixed_long_line(
1050 target=target,
1051 previous_line=previous_line,
1052 original=original,
1053 indent_word=self.indent_word,
1054 max_line_length=self.options.max_line_length,
1055 aggressive=self.options.aggressive,
1056 experimental=self.options.experimental,
1057 verbose=self.options.verbose)
1058
1059 if fixed and not code_almost_equal(original, fixed):
1060 return fixed
1061
1062 self.long_line_ignore_cache.add(cache_entry)
1063 return None
1064
1065 def fix_e502(self, result):
1066 """Remove extraneous escape of newline."""
1067 (line_index, _, target) = get_index_offset_contents(result,
1068 self.source)
1069 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
1070
1071 def fix_e701(self, result):
1072 """Put colon-separated compound statement on separate lines."""
1073 line_index = result['line'] - 1
1074 target = self.source[line_index]
1075 c = result['column']
1076
1077 fixed_source = (target[:c] + '\n' +
1078 _get_indentation(target) + self.indent_word +
1079 target[c:].lstrip('\n\r \t\\'))
1080 self.source[result['line'] - 1] = fixed_source
1081 return [result['line'], result['line'] + 1]
1082
1083 def fix_e702(self, result, logical):
1084 """Put semicolon-separated compound statement on separate lines."""
1085 if not logical:
1086 return [] # pragma: no cover
1087 logical_lines = logical[2]
1088
1089 # Avoid applying this when indented.
1090 # https://docs.python.org/reference/compound_stmts.html
1091 for line in logical_lines:
1092 if (result['id'] == 'E702' and ':' in line
1093 and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)):
1094 if self.options.verbose:
1095 print(
1096 '---> avoid fixing {error} with '
1097 'other compound statements'.format(error=result['id']),
1098 file=sys.stderr
1099 )
1100 return []
1101
1102 line_index = result['line'] - 1
1103 target = self.source[line_index]
1104
1105 if target.rstrip().endswith('\\'):
1106 # Normalize '1; \\\n2' into '1; 2'.
1107 self.source[line_index] = target.rstrip('\n \r\t\\')
1108 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
1109 return [line_index + 1, line_index + 2]
1110
1111 if target.rstrip().endswith(';'):
1112 self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
1113 return [line_index + 1]
1114
1115 offset = result['column'] - 1
1116 first = target[:offset].rstrip(';').rstrip()
1117 second = (_get_indentation(logical_lines[0]) +
1118 target[offset:].lstrip(';').lstrip())
1119
1120 # Find inline comment.
1121 inline_comment = None
1122 if target[offset:].lstrip(';').lstrip()[:2] == '# ':
1123 inline_comment = target[offset:].lstrip(';')
1124
1125 if inline_comment:
1126 self.source[line_index] = first + inline_comment
1127 else:
1128 self.source[line_index] = first + '\n' + second
1129 return [line_index + 1]
1130
1131 def fix_e704(self, result):
1132 """Fix multiple statements on one line def"""
1133 (line_index, _, target) = get_index_offset_contents(result,
1134 self.source)
1135 match = STARTSWITH_DEF_REGEX.match(target)
1136 if match:
1137 self.source[line_index] = '{}\n{}{}'.format(
1138 match.group(0),
1139 _get_indentation(target) + self.indent_word,
1140 target[match.end(0):].lstrip())
1141
1142 def fix_e711(self, result):
1143 """Fix comparison with None."""
1144 (line_index, offset, target) = get_index_offset_contents(result,
1145 self.source)
1146
1147 right_offset = offset + 2
1148 if right_offset >= len(target):
1149 return []
1150
1151 left = target[:offset].rstrip()
1152 center = target[offset:right_offset]
1153 right = target[right_offset:].lstrip()
1154
1155 if center.strip() == '==':
1156 new_center = 'is'
1157 elif center.strip() == '!=':
1158 new_center = 'is not'
1159 else:
1160 return []
1161
1162 self.source[line_index] = ' '.join([left, new_center, right])
1163
1164 def fix_e712(self, result):
1165 """Fix (trivial case of) comparison with boolean."""
1166 (line_index, offset, target) = get_index_offset_contents(result,
1167 self.source)
1168
1169 # Handle very easy "not" special cases.
1170 if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
1171 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
1172 r'if not \1:', target, count=1)
1173 elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
1174 self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
1175 r'if not \1:', target, count=1)
1176 else:
1177 right_offset = offset + 2
1178 if right_offset >= len(target):
1179 return []
1180
1181 left = target[:offset].rstrip()
1182 center = target[offset:right_offset]
1183 right = target[right_offset:].lstrip()
1184
1185 # Handle simple cases only.
1186 new_right = None
1187 if center.strip() == '==':
1188 if re.match(r'\bTrue\b', right):
1189 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
1190 elif center.strip() == '!=':
1191 if re.match(r'\bFalse\b', right):
1192 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
1193
1194 if new_right is None:
1195 return []
1196
1197 if new_right[0].isalnum():
1198 new_right = ' ' + new_right
1199
1200 self.source[line_index] = left + new_right
1201
1202 def fix_e713(self, result):
1203 """Fix (trivial case of) non-membership check."""
1204 (line_index, offset, target) = get_index_offset_contents(result,
1205 self.source)
1206
1207 # to convert once 'not in' -> 'in'
1208 before_target = target[:offset]
1209 target = target[offset:]
1210 match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1211 notin_pos_start, notin_pos_end = 0, 0
1212 if match_notin:
1213 notin_pos_start = match_notin.start(1)
1214 notin_pos_end = match_notin.end()
1215 target = '{}{} {}'.format(
1216 target[:notin_pos_start], 'in', target[notin_pos_end:])
1217
1218 # fix 'not in'
1219 match = COMPARE_NEGATIVE_REGEX.search(target)
1220 if match:
1221 if match.group(3) == 'in':
1222 pos_start = match.start(1)
1223 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1224 target[:pos_start], match.group(2), match.group(1),
1225 match.group(3), target[match.end():], before_target)
1226 if match_notin:
1227 # revert 'in' -> 'not in'
1228 pos_start = notin_pos_start + offset
1229 pos_end = notin_pos_end + offset - 4 # len('not ')
1230 new_target = '{}{} {}'.format(
1231 new_target[:pos_start], 'not in', new_target[pos_end:])
1232 self.source[line_index] = new_target
1233
1234 def fix_e714(self, result):
1235 """Fix object identity should be 'is not' case."""
1236 (line_index, offset, target) = get_index_offset_contents(result,
1237 self.source)
1238
1239 # to convert once 'is not' -> 'is'
1240 before_target = target[:offset]
1241 target = target[offset:]
1242 match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1243 isnot_pos_start, isnot_pos_end = 0, 0
1244 if match_isnot:
1245 isnot_pos_start = match_isnot.start(1)
1246 isnot_pos_end = match_isnot.end()
1247 target = '{}{} {}'.format(
1248 target[:isnot_pos_start], 'in', target[isnot_pos_end:])
1249
1250 match = COMPARE_NEGATIVE_REGEX.search(target)
1251 if match:
1252 if match.group(3).startswith('is'):
1253 pos_start = match.start(1)
1254 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1255 target[:pos_start], match.group(2), match.group(3),
1256 match.group(1), target[match.end():], before_target)
1257 if match_isnot:
1258 # revert 'is' -> 'is not'
1259 pos_start = isnot_pos_start + offset
1260 pos_end = isnot_pos_end + offset - 4 # len('not ')
1261 new_target = '{}{} {}'.format(
1262 new_target[:pos_start], 'is not', new_target[pos_end:])
1263 self.source[line_index] = new_target
1264
1265 def fix_e722(self, result):
1266 """fix bare except"""
1267 (line_index, _, target) = get_index_offset_contents(result,
1268 self.source)
1269 match = BARE_EXCEPT_REGEX.search(target)
1270 if match:
1271 self.source[line_index] = '{}{}{}'.format(
1272 target[:result['column'] - 1], "except BaseException:",
1273 target[match.end():])
1274
1275 def fix_e731(self, result):
1276 """Fix do not assign a lambda expression check."""
1277 (line_index, _, target) = get_index_offset_contents(result,
1278 self.source)
1279 match = LAMBDA_REGEX.search(target)
1280 if match:
1281 end = match.end()
1282 self.source[line_index] = '{}def {}({}): return {}'.format(
1283 target[:match.start(0)], match.group(1), match.group(2),
1284 target[end:].lstrip())
1285
1286 def fix_w291(self, result):
1287 """Remove trailing whitespace."""
1288 fixed_line = self.source[result['line'] - 1].rstrip()
1289 self.source[result['line'] - 1] = fixed_line + '\n'
1290
1291 def fix_w391(self, _):
1292 """Remove trailing blank lines."""
1293 blank_count = 0
1294 for line in reversed(self.source):
1295 line = line.rstrip()
1296 if line:
1297 break
1298 else:
1299 blank_count += 1
1300
1301 original_length = len(self.source)
1302 self.source = self.source[:original_length - blank_count]
1303 return range(1, 1 + original_length)
1304
1305 def fix_w503(self, result):
1306 (line_index, _, target) = get_index_offset_contents(result,
1307 self.source)
1308 one_string_token = target.split()[0]
1309 try:
1310 ts = generate_tokens(one_string_token)
1311 except (SyntaxError, tokenize.TokenError):
1312 return
1313 if not _is_binary_operator(ts[0][0], one_string_token):
1314 return
1315 # find comment
1316 comment_index = 0
1317 found_not_comment_only_line = False
1318 comment_only_linenum = 0
1319 for i in range(5):
1320 # NOTE: try to parse code in 5 times
1321 if (line_index - i) < 0:
1322 break
1323 from_index = line_index - i - 1
1324 if from_index < 0 or len(self.source) <= from_index:
1325 break
1326 to_index = line_index + 1
1327 strip_line = self.source[from_index].lstrip()
1328 if (
1329 not found_not_comment_only_line and
1330 strip_line and strip_line[0] == '#'
1331 ):
1332 comment_only_linenum += 1
1333 continue
1334 found_not_comment_only_line = True
1335 try:
1336 ts = generate_tokens("".join(self.source[from_index:to_index]))
1337 except (SyntaxError, tokenize.TokenError):
1338 continue
1339 newline_count = 0
1340 newline_index = []
1341 for index, t in enumerate(ts):
1342 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1343 newline_index.append(index)
1344 newline_count += 1
1345 if newline_count > 2:
1346 tts = ts[newline_index[-3]:]
1347 else:
1348 tts = ts
1349 old = []
1350 for t in tts:
1351 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1352 newline_count -= 1
1353 if newline_count <= 1:
1354 break
1355 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
1356 comment_index = old[3][1]
1357 break
1358 old = t
1359 break
1360 i = target.index(one_string_token)
1361 fix_target_line = line_index - 1 - comment_only_linenum
1362 self.source[line_index] = '{}{}'.format(
1363 target[:i], target[i + len(one_string_token):].lstrip())
1364 nl = find_newline(self.source[fix_target_line:line_index])
1365 before_line = self.source[fix_target_line]
1366 bl = before_line.index(nl)
1367 if comment_index:
1368 self.source[fix_target_line] = '{} {} {}'.format(
1369 before_line[:comment_index], one_string_token,
1370 before_line[comment_index + 1:])
1371 else:
1372 if before_line[:bl].endswith("#"):
1373 # special case
1374 # see: https://github.com/hhatto/autopep8/issues/503
1375 self.source[fix_target_line] = '{}{} {}'.format(
1376 before_line[:bl-2], one_string_token, before_line[bl-2:])
1377 else:
1378 self.source[fix_target_line] = '{} {}{}'.format(
1379 before_line[:bl], one_string_token, before_line[bl:])
1380
1381 def fix_w504(self, result):
1382 (line_index, _, target) = get_index_offset_contents(result,
1383 self.source)
1384 # NOTE: is not collect pointed out in pycodestyle==2.4.0
1385 comment_index = 0
1386 operator_position = None # (start_position, end_position)
1387 for i in range(1, 6):
1388 to_index = line_index + i
1389 try:
1390 ts = generate_tokens("".join(self.source[line_index:to_index]))
1391 except (SyntaxError, tokenize.TokenError):
1392 continue
1393 newline_count = 0
1394 newline_index = []
1395 for index, t in enumerate(ts):
1396 if _is_binary_operator(t[0], t[1]):
1397 if t[2][0] == 1 and t[3][0] == 1:
1398 operator_position = (t[2][1], t[3][1])
1399 elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
1400 if t[2][0] == 1 and t[3][0] == 1:
1401 operator_position = (t[2][1], t[3][1])
1402 elif t[0] in (tokenize.NEWLINE, tokenize.NL):
1403 newline_index.append(index)
1404 newline_count += 1
1405 if newline_count > 2:
1406 tts = ts[:newline_index[-3]]
1407 else:
1408 tts = ts
1409 old = []
1410 for t in tts:
1411 if tokenize.COMMENT == t[0] and old:
1412 comment_row, comment_index = old[3]
1413 break
1414 old = t
1415 break
1416 if not operator_position:
1417 return
1418 target_operator = target[operator_position[0]:operator_position[1]]
1419
1420 if comment_index and comment_row == 1:
1421 self.source[line_index] = '{}{}'.format(
1422 target[:operator_position[0]].rstrip(),
1423 target[comment_index:])
1424 else:
1425 self.source[line_index] = '{}{}{}'.format(
1426 target[:operator_position[0]].rstrip(),
1427 target[operator_position[1]:].lstrip(),
1428 target[operator_position[1]:])
1429
1430 next_line = self.source[line_index + 1]
1431 next_line_indent = 0
1432 m = re.match(r'\s*', next_line)
1433 if m:
1434 next_line_indent = m.span()[1]
1435 self.source[line_index + 1] = '{}{} {}'.format(
1436 next_line[:next_line_indent], target_operator,
1437 next_line[next_line_indent:])
1438
1439 def fix_w605(self, result):
1440 (line_index, offset, target) = get_index_offset_contents(result,
1441 self.source)
1442 self.source[line_index] = '{}\\{}'.format(
1443 target[:offset + 1], target[offset + 1:])
1444
1445
1446def get_module_imports_on_top_of_file(source, import_line_index):
1447 """return import or from keyword position
1448
1449 example:
1450 > 0: import sys
1451 1: import os
1452 2:
1453 3: def function():
1454 """
1455 def is_string_literal(line):
1456 if line[0] in 'uUbB':
1457 line = line[1:]
1458 if line and line[0] in 'rR':
1459 line = line[1:]
1460 return line and (line[0] == '"' or line[0] == "'")
1461
1462 def is_future_import(line):
1463 nodes = ast.parse(line)
1464 for n in nodes.body:
1465 if isinstance(n, ast.ImportFrom) and n.module == '__future__':
1466 return True
1467 return False
1468
1469 def has_future_import(source):
1470 offset = 0
1471 line = ''
1472 for _, next_line in source:
1473 for line_part in next_line.strip().splitlines(True):
1474 line = line + line_part
1475 try:
1476 return is_future_import(line), offset
1477 except SyntaxError:
1478 continue
1479 offset += 1
1480 return False, offset
1481
1482 allowed_try_keywords = ('try', 'except', 'else', 'finally')
1483 in_docstring = False
1484 docstring_kind = '"""'
1485 source_stream = iter(enumerate(source))
1486 for cnt, line in source_stream:
1487 if not in_docstring:
1488 m = DOCSTRING_START_REGEX.match(line.lstrip())
1489 if m is not None:
1490 in_docstring = True
1491 docstring_kind = m.group('kind')
1492 remain = line[m.end(): m.endpos].rstrip()
1493 if remain[-3:] == docstring_kind: # one line doc
1494 in_docstring = False
1495 continue
1496 if in_docstring:
1497 if line.rstrip()[-3:] == docstring_kind:
1498 in_docstring = False
1499 continue
1500
1501 if not line.rstrip():
1502 continue
1503 elif line.startswith('#'):
1504 continue
1505
1506 if line.startswith('import '):
1507 if cnt == import_line_index:
1508 continue
1509 return cnt
1510 elif line.startswith('from '):
1511 if cnt == import_line_index:
1512 continue
1513 hit, offset = has_future_import(
1514 itertools.chain([(cnt, line)], source_stream)
1515 )
1516 if hit:
1517 # move to the back
1518 return cnt + offset + 1
1519 return cnt
1520 elif pycodestyle.DUNDER_REGEX.match(line):
1521 return cnt
1522 elif any(line.startswith(kw) for kw in allowed_try_keywords):
1523 continue
1524 elif is_string_literal(line):
1525 return cnt
1526 else:
1527 return cnt
1528 return 0
1529
1530
1531def get_index_offset_contents(result, source):
1532 """Return (line_index, column_offset, line_contents)."""
1533 line_index = result['line'] - 1
1534 return (line_index,
1535 result['column'] - 1,
1536 source[line_index])
1537
1538
1539def get_fixed_long_line(target, previous_line, original,
1540 indent_word=' ', max_line_length=79,
1541 aggressive=False, experimental=False, verbose=False):
1542 """Break up long line and return result.
1543
1544 Do this by generating multiple reformatted candidates and then
1545 ranking the candidates to heuristically select the best option.
1546
1547 """
1548 indent = _get_indentation(target)
1549 source = target[len(indent):]
1550 assert source.lstrip() == source
1551 assert not target.lstrip().startswith('#')
1552
1553 # Check for partial multiline.
1554 tokens = list(generate_tokens(source))
1555
1556 candidates = shorten_line(
1557 tokens, source, indent,
1558 indent_word,
1559 max_line_length,
1560 aggressive=aggressive,
1561 experimental=experimental,
1562 previous_line=previous_line)
1563
1564 # Also sort alphabetically as a tie breaker (for determinism).
1565 candidates = sorted(
1566 sorted(set(candidates).union([target, original])),
1567 key=lambda x: line_shortening_rank(
1568 x,
1569 indent_word,
1570 max_line_length,
1571 experimental=experimental))
1572
1573 if verbose >= 4:
1574 print(('-' * 79 + '\n').join([''] + candidates + ['']),
1575 file=wrap_output(sys.stderr, 'utf-8'))
1576
1577 if candidates:
1578 best_candidate = candidates[0]
1579
1580 # Don't allow things to get longer.
1581 if longest_line_length(best_candidate) > longest_line_length(original):
1582 return None
1583
1584 return best_candidate
1585
1586
1587def longest_line_length(code):
1588 """Return length of longest line."""
1589 if len(code) == 0:
1590 return 0
1591 return max(len(line) for line in code.splitlines())
1592
1593
1594def join_logical_line(logical_line):
1595 """Return single line based on logical line input."""
1596 indentation = _get_indentation(logical_line)
1597
1598 return indentation + untokenize_without_newlines(
1599 generate_tokens(logical_line.lstrip())) + '\n'
1600
1601
1602def untokenize_without_newlines(tokens):
1603 """Return source code based on tokens."""
1604 text = ''
1605 last_row = 0
1606 last_column = -1
1607
1608 for t in tokens:
1609 token_string = t[1]
1610 (start_row, start_column) = t[2]
1611 (end_row, end_column) = t[3]
1612
1613 if start_row > last_row:
1614 last_column = 0
1615 if (
1616 (start_column > last_column or token_string == '\n') and
1617 not text.endswith(' ')
1618 ):
1619 text += ' '
1620
1621 if token_string != '\n':
1622 text += token_string
1623
1624 last_row = end_row
1625 last_column = end_column
1626
1627 return text.rstrip()
1628
1629
1630def _find_logical(source_lines):
1631 # Make a variable which is the index of all the starts of lines.
1632 logical_start = []
1633 logical_end = []
1634 last_newline = True
1635 parens = 0
1636 for t in generate_tokens(''.join(source_lines)):
1637 if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1638 tokenize.INDENT, tokenize.NL,
1639 tokenize.ENDMARKER]:
1640 continue
1641 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1642 last_newline = True
1643 logical_end.append((t[3][0] - 1, t[2][1]))
1644 continue
1645 if last_newline and not parens:
1646 logical_start.append((t[2][0] - 1, t[2][1]))
1647 last_newline = False
1648 if t[0] == tokenize.OP:
1649 if t[1] in '([{':
1650 parens += 1
1651 elif t[1] in '}])':
1652 parens -= 1
1653 return (logical_start, logical_end)
1654
1655
1656def _get_logical(source_lines, result, logical_start, logical_end):
1657 """Return the logical line corresponding to the result.
1658
1659 Assumes input is already E702-clean.
1660
1661 """
1662 row = result['line'] - 1
1663 col = result['column'] - 1
1664 ls = None
1665 le = None
1666 for i in range(0, len(logical_start), 1):
1667 assert logical_end
1668 x = logical_end[i]
1669 if x[0] > row or (x[0] == row and x[1] > col):
1670 le = x
1671 ls = logical_start[i]
1672 break
1673 if ls is None:
1674 return None
1675 original = source_lines[ls[0]:le[0] + 1]
1676 return ls, le, original
1677
1678
1679def get_item(items, index, default=None):
1680 if 0 <= index < len(items):
1681 return items[index]
1682
1683 return default
1684
1685
1686def reindent(source, indent_size, leave_tabs=False):
1687 """Reindent all lines."""
1688 reindenter = Reindenter(source, leave_tabs)
1689 return reindenter.run(indent_size)
1690
1691
1692def code_almost_equal(a, b):
1693 """Return True if code is similar.
1694
1695 Ignore whitespace when comparing specific line.
1696
1697 """
1698 split_a = split_and_strip_non_empty_lines(a)
1699 split_b = split_and_strip_non_empty_lines(b)
1700
1701 if len(split_a) != len(split_b):
1702 return False
1703
1704 for (index, _) in enumerate(split_a):
1705 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1706 return False
1707
1708 return True
1709
1710
1711def split_and_strip_non_empty_lines(text):
1712 """Return lines split by newline.
1713
1714 Ignore empty lines.
1715
1716 """
1717 return [line.strip() for line in text.splitlines() if line.strip()]
1718
1719
1720def refactor(source, fixer_names, ignore=None, filename=''):
1721 """Return refactored code using lib2to3.
1722
1723 Skip if ignore string is produced in the refactored code.
1724
1725 """
1726 not_found_end_of_file_newline = source and source.rstrip("\r\n") == source
1727 if not_found_end_of_file_newline:
1728 input_source = source + "\n"
1729 else:
1730 input_source = source
1731
1732 from lib2to3 import pgen2
1733 try:
1734 new_text = refactor_with_2to3(input_source,
1735 fixer_names=fixer_names,
1736 filename=filename)
1737 except (pgen2.parse.ParseError,
1738 SyntaxError,
1739 UnicodeDecodeError,
1740 UnicodeEncodeError):
1741 return source
1742
1743 if ignore:
1744 if ignore in new_text and ignore not in source:
1745 return source
1746
1747 if not_found_end_of_file_newline:
1748 return new_text.rstrip("\r\n")
1749
1750 return new_text
1751
1752
1753def code_to_2to3(select, ignore, where='', verbose=False):
1754 fixes = set()
1755 for code, fix in CODE_TO_2TO3.items():
1756 if code_match(code, select=select, ignore=ignore):
1757 if verbose:
1758 print('---> Applying {} fix for {}'.format(where,
1759 code.upper()),
1760 file=sys.stderr)
1761 fixes |= set(fix)
1762 return fixes
1763
1764
1765def fix_2to3(source,
1766 aggressive=True, select=None, ignore=None, filename='',
1767 where='global', verbose=False):
1768 """Fix various deprecated code (via lib2to3)."""
1769 if not aggressive:
1770 return source
1771
1772 select = select or []
1773 ignore = ignore or []
1774
1775 return refactor(source,
1776 code_to_2to3(select=select,
1777 ignore=ignore,
1778 where=where,
1779 verbose=verbose),
1780 filename=filename)
1781
1782
1783def find_newline(source):
1784 """Return type of newline used in source.
1785
1786 Input is a list of lines.
1787
1788 """
1789 assert not isinstance(source, str)
1790
1791 counter = collections.defaultdict(int)
1792 for line in source:
1793 if line.endswith(CRLF):
1794 counter[CRLF] += 1
1795 elif line.endswith(CR):
1796 counter[CR] += 1
1797 elif line.endswith(LF):
1798 counter[LF] += 1
1799
1800 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1801
1802
1803def _get_indentword(source):
1804 """Return indentation type."""
1805 indent_word = ' ' # Default in case source has no indentation
1806 try:
1807 for t in generate_tokens(source):
1808 if t[0] == token.INDENT:
1809 indent_word = t[1]
1810 break
1811 except (SyntaxError, tokenize.TokenError):
1812 pass
1813 return indent_word
1814
1815
1816def _get_indentation(line):
1817 """Return leading whitespace."""
1818 if line.strip():
1819 non_whitespace_index = len(line) - len(line.lstrip())
1820 return line[:non_whitespace_index]
1821
1822 return ''
1823
1824
1825def get_diff_text(old, new, filename):
1826 """Return text of unified diff between old and new."""
1827 newline = '\n'
1828 diff = difflib.unified_diff(
1829 old, new,
1830 'original/' + filename,
1831 'fixed/' + filename,
1832 lineterm=newline)
1833
1834 text = ''
1835 for line in diff:
1836 text += line
1837
1838 # Work around missing newline (http://bugs.python.org/issue2142).
1839 if text and not line.endswith(newline):
1840 text += newline + r'\ No newline at end of file' + newline
1841
1842 return text
1843
1844
1845def _priority_key(pep8_result):
1846 """Key for sorting PEP8 results.
1847
1848 Global fixes should be done first. This is important for things like
1849 indentation.
1850
1851 """
1852 priority = [
1853 # Fix multiline colon-based before semicolon based.
1854 'e701',
1855 # Break multiline statements early.
1856 'e702',
1857 # Things that make lines longer.
1858 'e225', 'e231',
1859 # Remove extraneous whitespace before breaking lines.
1860 'e201',
1861 # Shorten whitespace in comment before resorting to wrapping.
1862 'e262'
1863 ]
1864 middle_index = 10000
1865 lowest_priority = [
1866 # We need to shorten lines last since the logical fixer can get in a
1867 # loop, which causes us to exit early.
1868 'e501',
1869 ]
1870 key = pep8_result['id'].lower()
1871 try:
1872 return priority.index(key)
1873 except ValueError:
1874 try:
1875 return middle_index + lowest_priority.index(key) + 1
1876 except ValueError:
1877 return middle_index
1878
1879
1880def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1881 aggressive=False, experimental=False, previous_line=''):
1882 """Separate line at OPERATOR.
1883
1884 Multiple candidates will be yielded.
1885
1886 """
1887 for candidate in _shorten_line(tokens=tokens,
1888 source=source,
1889 indentation=indentation,
1890 indent_word=indent_word,
1891 aggressive=aggressive,
1892 previous_line=previous_line):
1893 yield candidate
1894
1895 if aggressive:
1896 for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1897 shortened = _shorten_line_at_tokens(
1898 tokens=tokens,
1899 source=source,
1900 indentation=indentation,
1901 indent_word=indent_word,
1902 key_token_strings=key_token_strings,
1903 aggressive=aggressive)
1904
1905 if shortened is not None and shortened != source:
1906 yield shortened
1907
1908 if experimental:
1909 for shortened in _shorten_line_at_tokens_new(
1910 tokens=tokens,
1911 source=source,
1912 indentation=indentation,
1913 max_line_length=max_line_length):
1914
1915 yield shortened
1916
1917
1918def _shorten_line(tokens, source, indentation, indent_word,
1919 aggressive=False, previous_line=''):
1920 """Separate line at OPERATOR.
1921
1922 The input is expected to be free of newlines except for inside multiline
1923 strings and at the end.
1924
1925 Multiple candidates will be yielded.
1926
1927 """
1928 for (token_type,
1929 token_string,
1930 start_offset,
1931 end_offset) in token_offsets(tokens):
1932
1933 if (
1934 token_type == tokenize.COMMENT and
1935 not is_probably_part_of_multiline(previous_line) and
1936 not is_probably_part_of_multiline(source) and
1937 not source[start_offset + 1:].strip().lower().startswith(
1938 ('noqa', 'pragma:', 'pylint:'))
1939 ):
1940 # Move inline comments to previous line.
1941 first = source[:start_offset]
1942 second = source[start_offset:]
1943 yield (indentation + second.strip() + '\n' +
1944 indentation + first.strip() + '\n')
1945 elif token_type == token.OP and token_string != '=':
1946 # Don't break on '=' after keyword as this violates PEP 8.
1947
1948 assert token_type != token.INDENT
1949
1950 first = source[:end_offset]
1951
1952 second_indent = indentation
1953 if (first.rstrip().endswith('(') and
1954 source[end_offset:].lstrip().startswith(')')):
1955 pass
1956 elif first.rstrip().endswith('('):
1957 second_indent += indent_word
1958 elif '(' in first:
1959 second_indent += ' ' * (1 + first.find('('))
1960 else:
1961 second_indent += indent_word
1962
1963 second = (second_indent + source[end_offset:].lstrip())
1964 if (
1965 not second.strip() or
1966 second.lstrip().startswith('#')
1967 ):
1968 continue
1969
1970 # Do not begin a line with a comma
1971 if second.lstrip().startswith(','):
1972 continue
1973 # Do end a line with a dot
1974 if first.rstrip().endswith('.'):
1975 continue
1976 if token_string in '+-*/':
1977 fixed = first + ' \\' + '\n' + second
1978 else:
1979 fixed = first + '\n' + second
1980
1981 # Only fix if syntax is okay.
1982 if check_syntax(normalize_multiline(fixed)
1983 if aggressive else fixed):
1984 yield indentation + fixed
1985
1986
1987def _is_binary_operator(token_type, text):
1988 return ((token_type == tokenize.OP or text in ['and', 'or']) and
1989 text not in '()[]{},:.;@=%~')
1990
1991
1992# A convenient way to handle tokens.
1993Token = collections.namedtuple('Token', ['token_type', 'token_string',
1994 'spos', 'epos', 'line'])
1995
1996
1997class ReformattedLines(object):
1998
1999 """The reflowed lines of atoms.
2000
2001 Each part of the line is represented as an "atom." They can be moved
2002 around when need be to get the optimal formatting.
2003
2004 """
2005
2006 ###########################################################################
2007 # Private Classes
2008
2009 class _Indent(object):
2010
2011 """Represent an indentation in the atom stream."""
2012
2013 def __init__(self, indent_amt):
2014 self._indent_amt = indent_amt
2015
2016 def emit(self):
2017 return ' ' * self._indent_amt
2018
2019 @property
2020 def size(self):
2021 return self._indent_amt
2022
2023 class _Space(object):
2024
2025 """Represent a space in the atom stream."""
2026
2027 def emit(self):
2028 return ' '
2029
2030 @property
2031 def size(self):
2032 return 1
2033
2034 class _LineBreak(object):
2035
2036 """Represent a line break in the atom stream."""
2037
2038 def emit(self):
2039 return '\n'
2040
2041 @property
2042 def size(self):
2043 return 0
2044
2045 def __init__(self, max_line_length):
2046 self._max_line_length = max_line_length
2047 self._lines = []
2048 self._bracket_depth = 0
2049 self._prev_item = None
2050 self._prev_prev_item = None
2051
2052 def __repr__(self):
2053 return self.emit()
2054
2055 ###########################################################################
2056 # Public Methods
2057
2058 def add(self, obj, indent_amt, break_after_open_bracket):
2059 if isinstance(obj, Atom):
2060 self._add_item(obj, indent_amt)
2061 return
2062
2063 self._add_container(obj, indent_amt, break_after_open_bracket)
2064
2065 def add_comment(self, item):
2066 num_spaces = 2
2067 if len(self._lines) > 1:
2068 if isinstance(self._lines[-1], self._Space):
2069 num_spaces -= 1
2070 if len(self._lines) > 2:
2071 if isinstance(self._lines[-2], self._Space):
2072 num_spaces -= 1
2073
2074 while num_spaces > 0:
2075 self._lines.append(self._Space())
2076 num_spaces -= 1
2077 self._lines.append(item)
2078
2079 def add_indent(self, indent_amt):
2080 self._lines.append(self._Indent(indent_amt))
2081
2082 def add_line_break(self, indent):
2083 self._lines.append(self._LineBreak())
2084 self.add_indent(len(indent))
2085
2086 def add_line_break_at(self, index, indent_amt):
2087 self._lines.insert(index, self._LineBreak())
2088 self._lines.insert(index + 1, self._Indent(indent_amt))
2089
2090 def add_space_if_needed(self, curr_text, equal=False):
2091 if (
2092 not self._lines or isinstance(
2093 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
2094 ):
2095 return
2096
2097 prev_text = str(self._prev_item)
2098 prev_prev_text = (
2099 str(self._prev_prev_item) if self._prev_prev_item else '')
2100
2101 if (
2102 # The previous item was a keyword or identifier and the current
2103 # item isn't an operator that doesn't require a space.
2104 ((self._prev_item.is_keyword or self._prev_item.is_string or
2105 self._prev_item.is_name or self._prev_item.is_number) and
2106 (curr_text[0] not in '([{.,:}])' or
2107 (curr_text[0] == '=' and equal))) or
2108
2109 # Don't place spaces around a '.', unless it's in an 'import'
2110 # statement.
2111 ((prev_prev_text != 'from' and prev_text[-1] != '.' and
2112 curr_text != 'import') and
2113
2114 # Don't place a space before a colon.
2115 curr_text[0] != ':' and
2116
2117 # Don't split up ending brackets by spaces.
2118 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
2119
2120 # Put a space after a colon or comma.
2121 prev_text[-1] in ':,' or
2122
2123 # Put space around '=' if asked to.
2124 (equal and prev_text == '=') or
2125
2126 # Put spaces around non-unary arithmetic operators.
2127 ((self._prev_prev_item and
2128 (prev_text not in '+-' and
2129 (self._prev_prev_item.is_name or
2130 self._prev_prev_item.is_number or
2131 self._prev_prev_item.is_string)) and
2132 prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
2133 ):
2134 self._lines.append(self._Space())
2135
2136 def previous_item(self):
2137 """Return the previous non-whitespace item."""
2138 return self._prev_item
2139
2140 def fits_on_current_line(self, item_extent):
2141 return self.current_size() + item_extent <= self._max_line_length
2142
2143 def current_size(self):
2144 """The size of the current line minus the indentation."""
2145 size = 0
2146 for item in reversed(self._lines):
2147 size += item.size
2148 if isinstance(item, self._LineBreak):
2149 break
2150
2151 return size
2152
2153 def line_empty(self):
2154 return (self._lines and
2155 isinstance(self._lines[-1],
2156 (self._LineBreak, self._Indent)))
2157
2158 def emit(self):
2159 string = ''
2160 for item in self._lines:
2161 if isinstance(item, self._LineBreak):
2162 string = string.rstrip()
2163 string += item.emit()
2164
2165 return string.rstrip() + '\n'
2166
2167 ###########################################################################
2168 # Private Methods
2169
2170 def _add_item(self, item, indent_amt):
2171 """Add an item to the line.
2172
2173 Reflow the line to get the best formatting after the item is
2174 inserted. The bracket depth indicates if the item is being
2175 inserted inside of a container or not.
2176
2177 """
2178 if self._prev_item and self._prev_item.is_string and item.is_string:
2179 # Place consecutive string literals on separate lines.
2180 self._lines.append(self._LineBreak())
2181 self._lines.append(self._Indent(indent_amt))
2182
2183 item_text = str(item)
2184 if self._lines and self._bracket_depth:
2185 # Adding the item into a container.
2186 self._prevent_default_initializer_splitting(item, indent_amt)
2187
2188 if item_text in '.,)]}':
2189 self._split_after_delimiter(item, indent_amt)
2190
2191 elif self._lines and not self.line_empty():
2192 # Adding the item outside of a container.
2193 if self.fits_on_current_line(len(item_text)):
2194 self._enforce_space(item)
2195
2196 else:
2197 # Line break for the new item.
2198 self._lines.append(self._LineBreak())
2199 self._lines.append(self._Indent(indent_amt))
2200
2201 self._lines.append(item)
2202 self._prev_item, self._prev_prev_item = item, self._prev_item
2203
2204 if item_text in '([{':
2205 self._bracket_depth += 1
2206
2207 elif item_text in '}])':
2208 self._bracket_depth -= 1
2209 assert self._bracket_depth >= 0
2210
2211 def _add_container(self, container, indent_amt, break_after_open_bracket):
2212 actual_indent = indent_amt + 1
2213
2214 if (
2215 str(self._prev_item) != '=' and
2216 not self.line_empty() and
2217 not self.fits_on_current_line(
2218 container.size + self._bracket_depth + 2)
2219 ):
2220
2221 if str(container)[0] == '(' and self._prev_item.is_name:
2222 # Don't split before the opening bracket of a call.
2223 break_after_open_bracket = True
2224 actual_indent = indent_amt + 4
2225 elif (
2226 break_after_open_bracket or
2227 str(self._prev_item) not in '([{'
2228 ):
2229 # If the container doesn't fit on the current line and the
2230 # current line isn't empty, place the container on the next
2231 # line.
2232 self._lines.append(self._LineBreak())
2233 self._lines.append(self._Indent(indent_amt))
2234 break_after_open_bracket = False
2235 else:
2236 actual_indent = self.current_size() + 1
2237 break_after_open_bracket = False
2238
2239 if isinstance(container, (ListComprehension, IfExpression)):
2240 actual_indent = indent_amt
2241
2242 # Increase the continued indentation only if recursing on a
2243 # container.
2244 container.reflow(self, ' ' * actual_indent,
2245 break_after_open_bracket=break_after_open_bracket)
2246
2247 def _prevent_default_initializer_splitting(self, item, indent_amt):
2248 """Prevent splitting between a default initializer.
2249
2250 When there is a default initializer, it's best to keep it all on
2251 the same line. It's nicer and more readable, even if it goes
2252 over the maximum allowable line length. This goes back along the
2253 current line to determine if we have a default initializer, and,
2254 if so, to remove extraneous whitespaces and add a line
2255 break/indent before it if needed.
2256
2257 """
2258 if str(item) == '=':
2259 # This is the assignment in the initializer. Just remove spaces for
2260 # now.
2261 self._delete_whitespace()
2262 return
2263
2264 if (not self._prev_item or not self._prev_prev_item or
2265 str(self._prev_item) != '='):
2266 return
2267
2268 self._delete_whitespace()
2269 prev_prev_index = self._lines.index(self._prev_prev_item)
2270
2271 if (
2272 isinstance(self._lines[prev_prev_index - 1], self._Indent) or
2273 self.fits_on_current_line(item.size + 1)
2274 ):
2275 # The default initializer is already the only item on this line.
2276 # Don't insert a newline here.
2277 return
2278
2279 # Replace the space with a newline/indent combo.
2280 if isinstance(self._lines[prev_prev_index - 1], self._Space):
2281 del self._lines[prev_prev_index - 1]
2282
2283 self.add_line_break_at(self._lines.index(self._prev_prev_item),
2284 indent_amt)
2285
2286 def _split_after_delimiter(self, item, indent_amt):
2287 """Split the line only after a delimiter."""
2288 self._delete_whitespace()
2289
2290 if self.fits_on_current_line(item.size):
2291 return
2292
2293 last_space = None
2294 for current_item in reversed(self._lines):
2295 if (
2296 last_space and
2297 (not isinstance(current_item, Atom) or
2298 not current_item.is_colon)
2299 ):
2300 break
2301 else:
2302 last_space = None
2303 if isinstance(current_item, self._Space):
2304 last_space = current_item
2305 if isinstance(current_item, (self._LineBreak, self._Indent)):
2306 return
2307
2308 if not last_space:
2309 return
2310
2311 self.add_line_break_at(self._lines.index(last_space), indent_amt)
2312
2313 def _enforce_space(self, item):
2314 """Enforce a space in certain situations.
2315
2316 There are cases where we will want a space where normally we
2317 wouldn't put one. This just enforces the addition of a space.
2318
2319 """
2320 if isinstance(self._lines[-1],
2321 (self._Space, self._LineBreak, self._Indent)):
2322 return
2323
2324 if not self._prev_item:
2325 return
2326
2327 item_text = str(item)
2328 prev_text = str(self._prev_item)
2329
2330 # Prefer a space around a '.' in an import statement, and between the
2331 # 'import' and '('.
2332 if (
2333 (item_text == '.' and prev_text == 'from') or
2334 (item_text == 'import' and prev_text == '.') or
2335 (item_text == '(' and prev_text == 'import')
2336 ):
2337 self._lines.append(self._Space())
2338
2339 def _delete_whitespace(self):
2340 """Delete all whitespace from the end of the line."""
2341 while isinstance(self._lines[-1], (self._Space, self._LineBreak,
2342 self._Indent)):
2343 del self._lines[-1]
2344
2345
2346class Atom(object):
2347
2348 """The smallest unbreakable unit that can be reflowed."""
2349
2350 def __init__(self, atom):
2351 self._atom = atom
2352
2353 def __repr__(self):
2354 return self._atom.token_string
2355
2356 def __len__(self):
2357 return self.size
2358
2359 def reflow(
2360 self, reflowed_lines, continued_indent, extent,
2361 break_after_open_bracket=False,
2362 is_list_comp_or_if_expr=False,
2363 next_is_dot=False
2364 ):
2365 if self._atom.token_type == tokenize.COMMENT:
2366 reflowed_lines.add_comment(self)
2367 return
2368
2369 total_size = extent if extent else self.size
2370
2371 if self._atom.token_string not in ',:([{}])':
2372 # Some atoms will need an extra 1-sized space token after them.
2373 total_size += 1
2374
2375 prev_item = reflowed_lines.previous_item()
2376 if (
2377 not is_list_comp_or_if_expr and
2378 not reflowed_lines.fits_on_current_line(total_size) and
2379 not (next_is_dot and
2380 reflowed_lines.fits_on_current_line(self.size + 1)) and
2381 not reflowed_lines.line_empty() and
2382 not self.is_colon and
2383 not (prev_item and prev_item.is_name and
2384 str(self) == '(')
2385 ):
2386 # Start a new line if there is already something on the line and
2387 # adding this atom would make it go over the max line length.
2388 reflowed_lines.add_line_break(continued_indent)
2389 else:
2390 reflowed_lines.add_space_if_needed(str(self))
2391
2392 reflowed_lines.add(self, len(continued_indent),
2393 break_after_open_bracket)
2394
2395 def emit(self):
2396 return self.__repr__()
2397
2398 @property
2399 def is_keyword(self):
2400 return keyword.iskeyword(self._atom.token_string)
2401
2402 @property
2403 def is_string(self):
2404 return self._atom.token_type == tokenize.STRING
2405
2406 @property
2407 def is_name(self):
2408 return self._atom.token_type == tokenize.NAME
2409
2410 @property
2411 def is_number(self):
2412 return self._atom.token_type == tokenize.NUMBER
2413
2414 @property
2415 def is_comma(self):
2416 return self._atom.token_string == ','
2417
2418 @property
2419 def is_colon(self):
2420 return self._atom.token_string == ':'
2421
2422 @property
2423 def size(self):
2424 return len(self._atom.token_string)
2425
2426
2427class Container(object):
2428
2429 """Base class for all container types."""
2430
2431 def __init__(self, items):
2432 self._items = items
2433
2434 def __repr__(self):
2435 string = ''
2436 last_was_keyword = False
2437
2438 for item in self._items:
2439 if item.is_comma:
2440 string += ', '
2441 elif item.is_colon:
2442 string += ': '
2443 else:
2444 item_string = str(item)
2445 if (
2446 string and
2447 (last_was_keyword or
2448 (not string.endswith(tuple('([{,.:}]) ')) and
2449 not item_string.startswith(tuple('([{,.:}])'))))
2450 ):
2451 string += ' '
2452 string += item_string
2453
2454 last_was_keyword = item.is_keyword
2455 return string
2456
2457 def __iter__(self):
2458 for element in self._items:
2459 yield element
2460
2461 def __getitem__(self, idx):
2462 return self._items[idx]
2463
2464 def reflow(self, reflowed_lines, continued_indent,
2465 break_after_open_bracket=False):
2466 last_was_container = False
2467 for (index, item) in enumerate(self._items):
2468 next_item = get_item(self._items, index + 1)
2469
2470 if isinstance(item, Atom):
2471 is_list_comp_or_if_expr = (
2472 isinstance(self, (ListComprehension, IfExpression)))
2473 item.reflow(reflowed_lines, continued_indent,
2474 self._get_extent(index),
2475 is_list_comp_or_if_expr=is_list_comp_or_if_expr,
2476 next_is_dot=(next_item and
2477 str(next_item) == '.'))
2478 if last_was_container and item.is_comma:
2479 reflowed_lines.add_line_break(continued_indent)
2480 last_was_container = False
2481 else: # isinstance(item, Container)
2482 reflowed_lines.add(item, len(continued_indent),
2483 break_after_open_bracket)
2484 last_was_container = not isinstance(item, (ListComprehension,
2485 IfExpression))
2486
2487 if (
2488 break_after_open_bracket and index == 0 and
2489 # Prefer to keep empty containers together instead of
2490 # separating them.
2491 str(item) == self.open_bracket and
2492 (not next_item or str(next_item) != self.close_bracket) and
2493 (len(self._items) != 3 or not isinstance(next_item, Atom))
2494 ):
2495 reflowed_lines.add_line_break(continued_indent)
2496 break_after_open_bracket = False
2497 else:
2498 next_next_item = get_item(self._items, index + 2)
2499 if (
2500 str(item) not in ['.', '%', 'in'] and
2501 next_item and not isinstance(next_item, Container) and
2502 str(next_item) != ':' and
2503 next_next_item and (not isinstance(next_next_item, Atom) or
2504 str(next_item) == 'not') and
2505 not reflowed_lines.line_empty() and
2506 not reflowed_lines.fits_on_current_line(
2507 self._get_extent(index + 1) + 2)
2508 ):
2509 reflowed_lines.add_line_break(continued_indent)
2510
2511 def _get_extent(self, index):
2512 """The extent of the full element.
2513
2514 E.g., the length of a function call or keyword.
2515
2516 """
2517 extent = 0
2518 prev_item = get_item(self._items, index - 1)
2519 seen_dot = prev_item and str(prev_item) == '.'
2520 while index < len(self._items):
2521 item = get_item(self._items, index)
2522 index += 1
2523
2524 if isinstance(item, (ListComprehension, IfExpression)):
2525 break
2526
2527 if isinstance(item, Container):
2528 if prev_item and prev_item.is_name:
2529 if seen_dot:
2530 extent += 1
2531 else:
2532 extent += item.size
2533
2534 prev_item = item
2535 continue
2536 elif (str(item) not in ['.', '=', ':', 'not'] and
2537 not item.is_name and not item.is_string):
2538 break
2539
2540 if str(item) == '.':
2541 seen_dot = True
2542
2543 extent += item.size
2544 prev_item = item
2545
2546 return extent
2547
2548 @property
2549 def is_string(self):
2550 return False
2551
2552 @property
2553 def size(self):
2554 return len(self.__repr__())
2555
2556 @property
2557 def is_keyword(self):
2558 return False
2559
2560 @property
2561 def is_name(self):
2562 return False
2563
2564 @property
2565 def is_comma(self):
2566 return False
2567
2568 @property
2569 def is_colon(self):
2570 return False
2571
2572 @property
2573 def open_bracket(self):
2574 return None
2575
2576 @property
2577 def close_bracket(self):
2578 return None
2579
2580
2581class Tuple(Container):
2582
2583 """A high-level representation of a tuple."""
2584
2585 @property
2586 def open_bracket(self):
2587 return '('
2588
2589 @property
2590 def close_bracket(self):
2591 return ')'
2592
2593
2594class List(Container):
2595
2596 """A high-level representation of a list."""
2597
2598 @property
2599 def open_bracket(self):
2600 return '['
2601
2602 @property
2603 def close_bracket(self):
2604 return ']'
2605
2606
2607class DictOrSet(Container):
2608
2609 """A high-level representation of a dictionary or set."""
2610
2611 @property
2612 def open_bracket(self):
2613 return '{'
2614
2615 @property
2616 def close_bracket(self):
2617 return '}'
2618
2619
2620class ListComprehension(Container):
2621
2622 """A high-level representation of a list comprehension."""
2623
2624 @property
2625 def size(self):
2626 length = 0
2627 for item in self._items:
2628 if isinstance(item, IfExpression):
2629 break
2630 length += item.size
2631 return length
2632
2633
2634class IfExpression(Container):
2635
2636 """A high-level representation of an if-expression."""
2637
2638
2639def _parse_container(tokens, index, for_or_if=None):
2640 """Parse a high-level container, such as a list, tuple, etc."""
2641
2642 # Store the opening bracket.
2643 items = [Atom(Token(*tokens[index]))]
2644 index += 1
2645
2646 num_tokens = len(tokens)
2647 while index < num_tokens:
2648 tok = Token(*tokens[index])
2649
2650 if tok.token_string in ',)]}':
2651 # First check if we're at the end of a list comprehension or
2652 # if-expression. Don't add the ending token as part of the list
2653 # comprehension or if-expression, because they aren't part of those
2654 # constructs.
2655 if for_or_if == 'for':
2656 return (ListComprehension(items), index - 1)
2657
2658 elif for_or_if == 'if':
2659 return (IfExpression(items), index - 1)
2660
2661 # We've reached the end of a container.
2662 items.append(Atom(tok))
2663
2664 # If not, then we are at the end of a container.
2665 if tok.token_string == ')':
2666 # The end of a tuple.
2667 return (Tuple(items), index)
2668
2669 elif tok.token_string == ']':
2670 # The end of a list.
2671 return (List(items), index)
2672
2673 elif tok.token_string == '}':
2674 # The end of a dictionary or set.
2675 return (DictOrSet(items), index)
2676
2677 elif tok.token_string in '([{':
2678 # A sub-container is being defined.
2679 (container, index) = _parse_container(tokens, index)
2680 items.append(container)
2681
2682 elif tok.token_string == 'for':
2683 (container, index) = _parse_container(tokens, index, 'for')
2684 items.append(container)
2685
2686 elif tok.token_string == 'if':
2687 (container, index) = _parse_container(tokens, index, 'if')
2688 items.append(container)
2689
2690 else:
2691 items.append(Atom(tok))
2692
2693 index += 1
2694
2695 return (None, None)
2696
2697
2698def _parse_tokens(tokens):
2699 """Parse the tokens.
2700
2701 This converts the tokens into a form where we can manipulate them
2702 more easily.
2703
2704 """
2705
2706 index = 0
2707 parsed_tokens = []
2708
2709 num_tokens = len(tokens)
2710 while index < num_tokens:
2711 tok = Token(*tokens[index])
2712
2713 assert tok.token_type != token.INDENT
2714 if tok.token_type == tokenize.NEWLINE:
2715 # There's only one newline and it's at the end.
2716 break
2717
2718 if tok.token_string in '([{':
2719 (container, index) = _parse_container(tokens, index)
2720 if not container:
2721 return None
2722 parsed_tokens.append(container)
2723 else:
2724 parsed_tokens.append(Atom(tok))
2725
2726 index += 1
2727
2728 return parsed_tokens
2729
2730
2731def _reflow_lines(parsed_tokens, indentation, max_line_length,
2732 start_on_prefix_line):
2733 """Reflow the lines so that it looks nice."""
2734
2735 if str(parsed_tokens[0]) == 'def':
2736 # A function definition gets indented a bit more.
2737 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2738 else:
2739 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2740
2741 break_after_open_bracket = not start_on_prefix_line
2742
2743 lines = ReformattedLines(max_line_length)
2744 lines.add_indent(len(indentation.lstrip('\r\n')))
2745
2746 if not start_on_prefix_line:
2747 # If splitting after the opening bracket will cause the first element
2748 # to be aligned weirdly, don't try it.
2749 first_token = get_item(parsed_tokens, 0)
2750 second_token = get_item(parsed_tokens, 1)
2751
2752 if (
2753 first_token and second_token and
2754 str(second_token)[0] == '(' and
2755 len(indentation) + len(first_token) + 1 == len(continued_indent)
2756 ):
2757 return None
2758
2759 for item in parsed_tokens:
2760 lines.add_space_if_needed(str(item), equal=True)
2761
2762 save_continued_indent = continued_indent
2763 if start_on_prefix_line and isinstance(item, Container):
2764 start_on_prefix_line = False
2765 continued_indent = ' ' * (lines.current_size() + 1)
2766
2767 item.reflow(lines, continued_indent, break_after_open_bracket)
2768 continued_indent = save_continued_indent
2769
2770 return lines.emit()
2771
2772
2773def _shorten_line_at_tokens_new(tokens, source, indentation,
2774 max_line_length):
2775 """Shorten the line taking its length into account.
2776
2777 The input is expected to be free of newlines except for inside
2778 multiline strings and at the end.
2779
2780 """
2781 # Yield the original source so to see if it's a better choice than the
2782 # shortened candidate lines we generate here.
2783 yield indentation + source
2784
2785 parsed_tokens = _parse_tokens(tokens)
2786
2787 if parsed_tokens:
2788 # Perform two reflows. The first one starts on the same line as the
2789 # prefix. The second starts on the line after the prefix.
2790 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2791 start_on_prefix_line=True)
2792 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2793 yield fixed
2794
2795 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2796 start_on_prefix_line=False)
2797 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2798 yield fixed
2799
2800
2801def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2802 key_token_strings, aggressive):
2803 """Separate line by breaking at tokens in key_token_strings.
2804
2805 The input is expected to be free of newlines except for inside
2806 multiline strings and at the end.
2807
2808 """
2809 offsets = []
2810 for (index, _t) in enumerate(token_offsets(tokens)):
2811 (token_type,
2812 token_string,
2813 start_offset,
2814 end_offset) = _t
2815
2816 assert token_type != token.INDENT
2817
2818 if token_string in key_token_strings:
2819 # Do not break in containers with zero or one items.
2820 unwanted_next_token = {
2821 '(': ')',
2822 '[': ']',
2823 '{': '}'}.get(token_string)
2824 if unwanted_next_token:
2825 if (
2826 get_item(tokens,
2827 index + 1,
2828 default=[None, None])[1] == unwanted_next_token or
2829 get_item(tokens,
2830 index + 2,
2831 default=[None, None])[1] == unwanted_next_token
2832 ):
2833 continue
2834
2835 if (
2836 index > 2 and token_string == '(' and
2837 tokens[index - 1][1] in ',(%['
2838 ):
2839 # Don't split after a tuple start, or before a tuple start if
2840 # the tuple is in a list.
2841 continue
2842
2843 if end_offset < len(source) - 1:
2844 # Don't split right before newline.
2845 offsets.append(end_offset)
2846 else:
2847 # Break at adjacent strings. These were probably meant to be on
2848 # separate lines in the first place.
2849 previous_token = get_item(tokens, index - 1)
2850 if (
2851 token_type == tokenize.STRING and
2852 previous_token and previous_token[0] == tokenize.STRING
2853 ):
2854 offsets.append(start_offset)
2855
2856 current_indent = None
2857 fixed = None
2858 for line in split_at_offsets(source, offsets):
2859 if fixed:
2860 fixed += '\n' + current_indent + line
2861
2862 for symbol in '([{':
2863 if line.endswith(symbol):
2864 current_indent += indent_word
2865 else:
2866 # First line.
2867 fixed = line
2868 assert not current_indent
2869 current_indent = indent_word
2870
2871 assert fixed is not None
2872
2873 if check_syntax(normalize_multiline(fixed)
2874 if aggressive > 1 else fixed):
2875 return indentation + fixed
2876
2877 return None
2878
2879
2880def token_offsets(tokens):
2881 """Yield tokens and offsets."""
2882 end_offset = 0
2883 previous_end_row = 0
2884 previous_end_column = 0
2885 for t in tokens:
2886 token_type = t[0]
2887 token_string = t[1]
2888 (start_row, start_column) = t[2]
2889 (end_row, end_column) = t[3]
2890
2891 # Account for the whitespace between tokens.
2892 end_offset += start_column
2893 if previous_end_row == start_row:
2894 end_offset -= previous_end_column
2895
2896 # Record the start offset of the token.
2897 start_offset = end_offset
2898
2899 # Account for the length of the token itself.
2900 end_offset += len(token_string)
2901
2902 yield (token_type,
2903 token_string,
2904 start_offset,
2905 end_offset)
2906
2907 previous_end_row = end_row
2908 previous_end_column = end_column
2909
2910
2911def normalize_multiline(line):
2912 """Normalize multiline-related code that will cause syntax error.
2913
2914 This is for purposes of checking syntax.
2915
2916 """
2917 if line.startswith('def ') and line.rstrip().endswith(':'):
2918 return line + ' pass'
2919 elif line.startswith('return '):
2920 return 'def _(): ' + line
2921 elif line.startswith('@'):
2922 return line + 'def _(): pass'
2923 elif line.startswith('class '):
2924 return line + ' pass'
2925 elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
2926 return line + ' pass'
2927
2928 return line
2929
2930
2931def fix_whitespace(line, offset, replacement):
2932 """Replace whitespace at offset and return fixed line."""
2933 # Replace escaped newlines too
2934 left = line[:offset].rstrip('\n\r \t\\')
2935 right = line[offset:].lstrip('\n\r \t\\')
2936 if right.startswith('#'):
2937 return line
2938
2939 return left + replacement + right
2940
2941
2942def _execute_pep8(pep8_options, source):
2943 """Execute pycodestyle via python method calls."""
2944 class QuietReport(pycodestyle.BaseReport):
2945
2946 """Version of checker that does not print."""
2947
2948 def __init__(self, options):
2949 super(QuietReport, self).__init__(options)
2950 self.__full_error_results = []
2951
2952 def error(self, line_number, offset, text, check):
2953 """Collect errors."""
2954 code = super(QuietReport, self).error(line_number,
2955 offset,
2956 text,
2957 check)
2958 if code:
2959 self.__full_error_results.append(
2960 {'id': code,
2961 'line': line_number,
2962 'column': offset + 1,
2963 'info': text})
2964
2965 def full_error_results(self):
2966 """Return error results in detail.
2967
2968 Results are in the form of a list of dictionaries. Each
2969 dictionary contains 'id', 'line', 'column', and 'info'.
2970
2971 """
2972 return self.__full_error_results
2973
2974 checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
2975 **pep8_options)
2976 checker.check_all()
2977 return checker.report.full_error_results()
2978
2979
2980def _remove_leading_and_normalize(line, with_rstrip=True):
2981 # ignore FF in first lstrip()
2982 if with_rstrip:
2983 return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
2984 return line.lstrip(' \t\v')
2985
2986
2987class Reindenter(object):
2988
2989 """Reindents badly-indented code to uniformly use four-space indentation.
2990
2991 Released to the public domain, by Tim Peters, 03 October 2000.
2992
2993 """
2994
2995 def __init__(self, input_text, leave_tabs=False):
2996 sio = io.StringIO(input_text)
2997 source_lines = sio.readlines()
2998
2999 self.string_content_line_numbers = multiline_string_lines(input_text)
3000
3001 # File lines, rstripped & tab-expanded. Dummy at start is so
3002 # that we can use tokenize's 1-based line numbering easily.
3003 # Note that a line is all-blank iff it is a newline.
3004 self.lines = []
3005 for line_number, line in enumerate(source_lines, start=1):
3006 # Do not modify if inside a multiline string.
3007 if line_number in self.string_content_line_numbers:
3008 self.lines.append(line)
3009 else:
3010 # Only expand leading tabs.
3011 with_rstrip = line_number != len(source_lines)
3012 if leave_tabs:
3013 self.lines.append(
3014 _get_indentation(line) +
3015 _remove_leading_and_normalize(line, with_rstrip)
3016 )
3017 else:
3018 self.lines.append(
3019 _get_indentation(line).expandtabs() +
3020 _remove_leading_and_normalize(line, with_rstrip)
3021 )
3022
3023 self.lines.insert(0, None)
3024 self.index = 1 # index into self.lines of next line
3025 self.input_text = input_text
3026
3027 def run(self, indent_size=DEFAULT_INDENT_SIZE):
3028 """Fix indentation and return modified line numbers.
3029
3030 Line numbers are indexed at 1.
3031
3032 """
3033 if indent_size < 1:
3034 return self.input_text
3035
3036 try:
3037 stats = _reindent_stats(tokenize.generate_tokens(self.getline))
3038 except (SyntaxError, tokenize.TokenError):
3039 return self.input_text
3040 # Remove trailing empty lines.
3041 lines = self.lines
3042 # Sentinel.
3043 stats.append((len(lines), 0))
3044 # Map count of leading spaces to # we want.
3045 have2want = {}
3046 # Program after transformation.
3047 after = []
3048 # Copy over initial empty lines -- there's nothing to do until
3049 # we see a line with *something* on it.
3050 i = stats[0][0]
3051 after.extend(lines[1:i])
3052 for i in range(len(stats) - 1):
3053 thisstmt, thislevel = stats[i]
3054 nextstmt = stats[i + 1][0]
3055 have = _leading_space_count(lines[thisstmt])
3056 want = thislevel * indent_size
3057 if want < 0:
3058 # A comment line.
3059 if have:
3060 # An indented comment line. If we saw the same
3061 # indentation before, reuse what it most recently
3062 # mapped to.
3063 want = have2want.get(have, -1)
3064 if want < 0:
3065 # Then it probably belongs to the next real stmt.
3066 for j in range(i + 1, len(stats) - 1):
3067 jline, jlevel = stats[j]
3068 if jlevel >= 0:
3069 if have == _leading_space_count(lines[jline]):
3070 want = jlevel * indent_size
3071 break
3072 # Maybe it's a hanging comment like this one,
3073 if want < 0:
3074 # in which case we should shift it like its base
3075 # line got shifted.
3076 for j in range(i - 1, -1, -1):
3077 jline, jlevel = stats[j]
3078 if jlevel >= 0:
3079 want = (have + _leading_space_count(
3080 after[jline - 1]) -
3081 _leading_space_count(lines[jline]))
3082 break
3083 if want < 0:
3084 # Still no luck -- leave it alone.
3085 want = have
3086 else:
3087 want = 0
3088 assert want >= 0
3089 have2want[have] = want
3090 diff = want - have
3091 if diff == 0 or have == 0:
3092 after.extend(lines[thisstmt:nextstmt])
3093 else:
3094 for line_number, line in enumerate(lines[thisstmt:nextstmt],
3095 start=thisstmt):
3096 if line_number in self.string_content_line_numbers:
3097 after.append(line)
3098 elif diff > 0:
3099 if line == '\n':
3100 after.append(line)
3101 else:
3102 after.append(' ' * diff + line)
3103 else:
3104 remove = min(_leading_space_count(line), -diff)
3105 after.append(line[remove:])
3106
3107 return ''.join(after)
3108
3109 def getline(self):
3110 """Line-getter for tokenize."""
3111 if self.index >= len(self.lines):
3112 line = ''
3113 else:
3114 line = self.lines[self.index]
3115 self.index += 1
3116 return line
3117
3118
3119def _reindent_stats(tokens):
3120 """Return list of (lineno, indentlevel) pairs.
3121
3122 One for each stmt and comment line. indentlevel is -1 for comment
3123 lines, as a signal that tokenize doesn't know what to do about them;
3124 indeed, they're our headache!
3125
3126 """
3127 find_stmt = 1 # Next token begins a fresh stmt?
3128 level = 0 # Current indent level.
3129 stats = []
3130
3131 for t in tokens:
3132 token_type = t[0]
3133 sline = t[2][0]
3134 line = t[4]
3135
3136 if token_type == tokenize.NEWLINE:
3137 # A program statement, or ENDMARKER, will eventually follow,
3138 # after some (possibly empty) run of tokens of the form
3139 # (NL | COMMENT)* (INDENT | DEDENT+)?
3140 find_stmt = 1
3141
3142 elif token_type == tokenize.INDENT:
3143 find_stmt = 1
3144 level += 1
3145
3146 elif token_type == tokenize.DEDENT:
3147 find_stmt = 1
3148 level -= 1
3149
3150 elif token_type == tokenize.COMMENT:
3151 if find_stmt:
3152 stats.append((sline, -1))
3153 # But we're still looking for a new stmt, so leave
3154 # find_stmt alone.
3155
3156 elif token_type == tokenize.NL:
3157 pass
3158
3159 elif find_stmt:
3160 # This is the first "real token" following a NEWLINE, so it
3161 # must be the first token of the next program statement, or an
3162 # ENDMARKER.
3163 find_stmt = 0
3164 if line: # Not endmarker.
3165 stats.append((sline, level))
3166
3167 return stats
3168
3169
3170def _leading_space_count(line):
3171 """Return number of leading spaces in line."""
3172 i = 0
3173 while i < len(line) and line[i] == ' ':
3174 i += 1
3175 return i
3176
3177
3178def refactor_with_2to3(source_text, fixer_names, filename=''):
3179 """Use lib2to3 to refactor the source.
3180
3181 Return the refactored source code.
3182
3183 """
3184 from lib2to3.refactor import RefactoringTool
3185 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
3186 tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
3187
3188 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
3189 try:
3190 # The name parameter is necessary particularly for the "import" fixer.
3191 return str(tool.refactor_string(source_text, name=filename))
3192 except lib2to3_tokenize.TokenError:
3193 return source_text
3194
3195
3196def check_syntax(code):
3197 """Return True if syntax is okay."""
3198 try:
3199 return compile(code, '<string>', 'exec', dont_inherit=True)
3200 except (SyntaxError, TypeError, ValueError):
3201 return False
3202
3203
3204def find_with_line_numbers(pattern, contents):
3205 """A wrapper around 're.finditer' to find line numbers.
3206
3207 Returns a list of line numbers where pattern was found in contents.
3208 """
3209 matches = list(re.finditer(pattern, contents))
3210 if not matches:
3211 return []
3212
3213 end = matches[-1].start()
3214
3215 # -1 so a failed `rfind` maps to the first line.
3216 newline_offsets = {
3217 -1: 0
3218 }
3219 for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
3220 offset = m.start()
3221 if offset > end:
3222 break
3223 newline_offsets[offset] = line_num
3224
3225 def get_line_num(match, contents):
3226 """Get the line number of string in a files contents.
3227
3228 Failing to find the newline is OK, -1 maps to 0
3229
3230 """
3231 newline_offset = contents.rfind('\n', 0, match.start())
3232 return newline_offsets[newline_offset]
3233
3234 return [get_line_num(match, contents) + 1 for match in matches]
3235
3236
3237def get_disabled_ranges(source):
3238 """Returns a list of tuples representing the disabled ranges.
3239
3240 If disabled and no re-enable will disable for rest of file.
3241
3242 """
3243 enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
3244 disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
3245 total_lines = len(re.findall("\n", source)) + 1
3246
3247 enable_commands = {}
3248 for num in enable_line_nums:
3249 enable_commands[num] = True
3250 for num in disable_line_nums:
3251 enable_commands[num] = False
3252
3253 disabled_ranges = []
3254 currently_enabled = True
3255 disabled_start = None
3256
3257 for line, commanded_enabled in sorted(enable_commands.items()):
3258 if commanded_enabled is False and currently_enabled is True:
3259 disabled_start = line
3260 currently_enabled = False
3261 elif commanded_enabled is True and currently_enabled is False:
3262 disabled_ranges.append((disabled_start, line))
3263 currently_enabled = True
3264
3265 if currently_enabled is False:
3266 disabled_ranges.append((disabled_start, total_lines))
3267
3268 return disabled_ranges
3269
3270
3271def filter_disabled_results(result, disabled_ranges):
3272 """Filter out reports based on tuple of disabled ranges.
3273
3274 """
3275 line = result['line']
3276 for disabled_range in disabled_ranges:
3277 if disabled_range[0] <= line <= disabled_range[1]:
3278 return False
3279 return True
3280
3281
3282def filter_results(source, results, aggressive):
3283 """Filter out spurious reports from pycodestyle.
3284
3285 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
3286
3287 """
3288 non_docstring_string_line_numbers = multiline_string_lines(
3289 source, include_docstrings=False)
3290 all_string_line_numbers = multiline_string_lines(
3291 source, include_docstrings=True)
3292
3293 commented_out_code_line_numbers = commented_out_code_lines(source)
3294
3295 # Filter out the disabled ranges
3296 disabled_ranges = get_disabled_ranges(source)
3297 if disabled_ranges:
3298 results = [
3299 result for result in results if filter_disabled_results(
3300 result,
3301 disabled_ranges,
3302 )
3303 ]
3304
3305 has_e901 = any(result['id'].lower() == 'e901' for result in results)
3306
3307 for r in results:
3308 issue_id = r['id'].lower()
3309
3310 if r['line'] in non_docstring_string_line_numbers:
3311 if issue_id.startswith(('e1', 'e501', 'w191')):
3312 continue
3313
3314 if r['line'] in all_string_line_numbers:
3315 if issue_id in ['e501']:
3316 continue
3317
3318 # We must offset by 1 for lines that contain the trailing contents of
3319 # multiline strings.
3320 if not aggressive and (r['line'] + 1) in all_string_line_numbers:
3321 # Do not modify multiline strings in non-aggressive mode. Remove
3322 # trailing whitespace could break doctests.
3323 if issue_id.startswith(('w29', 'w39')):
3324 continue
3325
3326 if aggressive <= 0:
3327 if issue_id.startswith(('e711', 'e72', 'w6')):
3328 continue
3329
3330 if aggressive <= 1:
3331 if issue_id.startswith(('e712', 'e713', 'e714')):
3332 continue
3333
3334 if aggressive <= 2:
3335 if issue_id.startswith(('e704')):
3336 continue
3337
3338 if r['line'] in commented_out_code_line_numbers:
3339 if issue_id.startswith(('e261', 'e262', 'e501')):
3340 continue
3341
3342 # Do not touch indentation if there is a token error caused by
3343 # incomplete multi-line statement. Otherwise, we risk screwing up the
3344 # indentation.
3345 if has_e901:
3346 if issue_id.startswith(('e1', 'e7')):
3347 continue
3348
3349 yield r
3350
3351
3352def multiline_string_lines(source, include_docstrings=False):
3353 """Return line numbers that are within multiline strings.
3354
3355 The line numbers are indexed at 1.
3356
3357 Docstrings are ignored.
3358
3359 """
3360 line_numbers = set()
3361 previous_token_type = ''
3362 try:
3363 for t in generate_tokens(source):
3364 token_type = t[0]
3365 start_row = t[2][0]
3366 end_row = t[3][0]
3367
3368 if token_type == tokenize.STRING and start_row != end_row:
3369 if (
3370 include_docstrings or
3371 previous_token_type != tokenize.INDENT
3372 ):
3373 # We increment by one since we want the contents of the
3374 # string.
3375 line_numbers |= set(range(1 + start_row, 1 + end_row))
3376
3377 previous_token_type = token_type
3378 except (SyntaxError, tokenize.TokenError):
3379 pass
3380
3381 return line_numbers
3382
3383
3384def commented_out_code_lines(source):
3385 """Return line numbers of comments that are likely code.
3386
3387 Commented-out code is bad practice, but modifying it just adds even
3388 more clutter.
3389
3390 """
3391 line_numbers = []
3392 try:
3393 for t in generate_tokens(source):
3394 token_type = t[0]
3395 token_string = t[1]
3396 start_row = t[2][0]
3397 line = t[4]
3398
3399 # Ignore inline comments.
3400 if not line.lstrip().startswith('#'):
3401 continue
3402
3403 if token_type == tokenize.COMMENT:
3404 stripped_line = token_string.lstrip('#').strip()
3405 with warnings.catch_warnings():
3406 # ignore SyntaxWarning in Python3.8+
3407 # refs:
3408 # https://bugs.python.org/issue15248
3409 # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
3410 warnings.filterwarnings("ignore", category=SyntaxWarning)
3411 if (
3412 ' ' in stripped_line and
3413 '#' not in stripped_line and
3414 check_syntax(stripped_line)
3415 ):
3416 line_numbers.append(start_row)
3417 except (SyntaxError, tokenize.TokenError):
3418 pass
3419
3420 return line_numbers
3421
3422
3423def shorten_comment(line, max_line_length, last_comment=False):
3424 """Return trimmed or split long comment line.
3425
3426 If there are no comments immediately following it, do a text wrap.
3427 Doing this wrapping on all comments in general would lead to jagged
3428 comment text.
3429
3430 """
3431 assert len(line) > max_line_length
3432 line = line.rstrip()
3433
3434 # PEP 8 recommends 72 characters for comment text.
3435 indentation = _get_indentation(line) + '# '
3436 max_line_length = min(max_line_length,
3437 len(indentation) + 72)
3438
3439 MIN_CHARACTER_REPEAT = 5
3440 if (
3441 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
3442 not line[-1].isalnum()
3443 ):
3444 # Trim comments that end with things like ---------
3445 return line[:max_line_length] + '\n'
3446 elif last_comment and re.match(r'\s*#+\s*\w+', line):
3447 split_lines = textwrap.wrap(line.lstrip(' \t#'),
3448 initial_indent=indentation,
3449 subsequent_indent=indentation,
3450 width=max_line_length,
3451 break_long_words=False,
3452 break_on_hyphens=False)
3453 return '\n'.join(split_lines) + '\n'
3454
3455 return line + '\n'
3456
3457
3458def normalize_line_endings(lines, newline):
3459 """Return fixed line endings.
3460
3461 All lines will be modified to use the most common line ending.
3462 """
3463 line = [line.rstrip('\n\r') + newline for line in lines]
3464 if line and lines[-1] == lines[-1].rstrip('\n\r'):
3465 line[-1] = line[-1].rstrip('\n\r')
3466 return line
3467
3468
3469def mutual_startswith(a, b):
3470 return b.startswith(a) or a.startswith(b)
3471
3472
3473def code_match(code, select, ignore):
3474 if ignore:
3475 assert not isinstance(ignore, str)
3476 for ignored_code in [c.strip() for c in ignore]:
3477 if mutual_startswith(code.lower(), ignored_code.lower()):
3478 return False
3479
3480 if select:
3481 assert not isinstance(select, str)
3482 for selected_code in [c.strip() for c in select]:
3483 if mutual_startswith(code.lower(), selected_code.lower()):
3484 return True
3485 return False
3486
3487 return True
3488
3489
3490def fix_code(source, options=None, encoding=None, apply_config=False):
3491 """Return fixed source code.
3492
3493 "encoding" will be used to decode "source" if it is a byte string.
3494
3495 """
3496 options = _get_options(options, apply_config)
3497 # normalize
3498 options.ignore = [opt.upper() for opt in options.ignore]
3499 options.select = [opt.upper() for opt in options.select]
3500
3501 # check ignore args
3502 # NOTE: If W50x is not included, add W50x because the code
3503 # correction result is indefinite.
3504 ignore_opt = options.ignore
3505 if not {"W50", "W503", "W504"} & set(ignore_opt):
3506 options.ignore.append("W50")
3507
3508 if not isinstance(source, str):
3509 source = source.decode(encoding or get_encoding())
3510
3511 sio = io.StringIO(source)
3512 return fix_lines(sio.readlines(), options=options)
3513
3514
3515def _get_options(raw_options, apply_config):
3516 """Return parsed options."""
3517 if not raw_options:
3518 return parse_args([''], apply_config=apply_config)
3519
3520 if isinstance(raw_options, dict):
3521 options = parse_args([''], apply_config=apply_config)
3522 for name, value in raw_options.items():
3523 if not hasattr(options, name):
3524 raise ValueError("No such option '{}'".format(name))
3525
3526 # Check for very basic type errors.
3527 expected_type = type(getattr(options, name))
3528 if not isinstance(expected_type, (str, )):
3529 if isinstance(value, (str, )):
3530 raise ValueError(
3531 "Option '{}' should not be a string".format(name))
3532 setattr(options, name, value)
3533 else:
3534 options = raw_options
3535
3536 return options
3537
3538
3539def fix_lines(source_lines, options, filename=''):
3540 """Return fixed source code."""
3541 # Transform everything to line feed. Then change them back to original
3542 # before returning fixed source code.
3543 original_newline = find_newline(source_lines)
3544 tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
3545
3546 # Keep a history to break out of cycles.
3547 previous_hashes = set()
3548
3549 if options.line_range:
3550 # Disable "apply_local_fixes()" for now due to issue #175.
3551 fixed_source = tmp_source
3552 else:
3553 # Apply global fixes only once (for efficiency).
3554 fixed_source = apply_global_fixes(tmp_source,
3555 options,
3556 filename=filename)
3557
3558 passes = 0
3559 long_line_ignore_cache = set()
3560 while hash(fixed_source) not in previous_hashes:
3561 if options.pep8_passes >= 0 and passes > options.pep8_passes:
3562 break
3563 passes += 1
3564
3565 previous_hashes.add(hash(fixed_source))
3566
3567 tmp_source = copy.copy(fixed_source)
3568
3569 fix = FixPEP8(
3570 filename,
3571 options,
3572 contents=tmp_source,
3573 long_line_ignore_cache=long_line_ignore_cache)
3574
3575 fixed_source = fix.fix()
3576
3577 sio = io.StringIO(fixed_source)
3578 return ''.join(normalize_line_endings(sio.readlines(), original_newline))
3579
3580
3581def fix_file(filename, options=None, output=None, apply_config=False):
3582 if not options:
3583 options = parse_args([filename], apply_config=apply_config)
3584
3585 original_source = readlines_from_file(filename)
3586
3587 fixed_source = original_source
3588
3589 if options.in_place or options.diff or output:
3590 encoding = detect_encoding(filename)
3591
3592 if output:
3593 output = LineEndingWrapper(wrap_output(output, encoding=encoding))
3594
3595 fixed_source = fix_lines(fixed_source, options, filename=filename)
3596
3597 if options.diff:
3598 new = io.StringIO(fixed_source)
3599 new = new.readlines()
3600 diff = get_diff_text(original_source, new, filename)
3601 if output:
3602 output.write(diff)
3603 output.flush()
3604 elif options.jobs > 1:
3605 diff = diff.encode(encoding)
3606 return diff
3607 elif options.in_place:
3608 original = "".join(original_source).splitlines()
3609 fixed = fixed_source.splitlines()
3610 original_source_last_line = (
3611 original_source[-1].split("\n")[-1] if original_source else ""
3612 )
3613 fixed_source_last_line = fixed_source.split("\n")[-1]
3614 if original != fixed or (
3615 original_source_last_line != fixed_source_last_line
3616 ):
3617 with open_with_encoding(filename, 'w', encoding=encoding) as fp:
3618 fp.write(fixed_source)
3619 return fixed_source
3620 return None
3621 else:
3622 if output:
3623 output.write(fixed_source)
3624 output.flush()
3625 return fixed_source
3626
3627
3628def global_fixes():
3629 """Yield multiple (code, function) tuples."""
3630 for function in list(globals().values()):
3631 if inspect.isfunction(function):
3632 arguments = _get_parameters(function)
3633 if arguments[:1] != ['source']:
3634 continue
3635
3636 code = extract_code_from_function(function)
3637 if code:
3638 yield (code, function)
3639
3640
3641def _get_parameters(function):
3642 # pylint: disable=deprecated-method
3643 if sys.version_info.major >= 3:
3644 # We need to match "getargspec()", which includes "self" as the first
3645 # value for methods.
3646 # https://bugs.python.org/issue17481#msg209469
3647 if inspect.ismethod(function):
3648 function = function.__func__
3649
3650 return list(inspect.signature(function).parameters)
3651 else:
3652 return inspect.getargspec(function)[0]
3653
3654
3655def apply_global_fixes(source, options, where='global', filename='',
3656 codes=None):
3657 """Run global fixes on source code.
3658
3659 These are fixes that only need be done once (unlike those in
3660 FixPEP8, which are dependent on pycodestyle).
3661
3662 """
3663 if codes is None:
3664 codes = []
3665 if any(code_match(code, select=options.select, ignore=options.ignore)
3666 for code in ['E101', 'E111']):
3667 source = reindent(
3668 source,
3669 indent_size=options.indent_size,
3670 leave_tabs=not (
3671 code_match(
3672 'W191',
3673 select=options.select,
3674 ignore=options.ignore
3675 )
3676 )
3677 )
3678
3679 for (code, function) in global_fixes():
3680 if code_match(code, select=options.select, ignore=options.ignore):
3681 if options.verbose:
3682 print('---> Applying {} fix for {}'.format(where,
3683 code.upper()),
3684 file=sys.stderr)
3685 source = function(source,
3686 aggressive=options.aggressive)
3687
3688 source = fix_2to3(source,
3689 aggressive=options.aggressive,
3690 select=options.select,
3691 ignore=options.ignore,
3692 filename=filename,
3693 where=where,
3694 verbose=options.verbose)
3695
3696 return source
3697
3698
3699def extract_code_from_function(function):
3700 """Return code handled by function."""
3701 if not function.__name__.startswith('fix_'):
3702 return None
3703
3704 code = re.sub('^fix_', '', function.__name__)
3705 if not code:
3706 return None
3707
3708 try:
3709 int(code[1:])
3710 except ValueError:
3711 return None
3712
3713 return code
3714
3715
3716def _get_package_version():
3717 packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
3718 return ", ".join(packages)
3719
3720
3721def create_parser():
3722 """Return command-line parser."""
3723 parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3724 prog='autopep8')
3725 parser.add_argument('--version', action='version',
3726 version='%(prog)s {} ({})'.format(
3727 __version__, _get_package_version()))
3728 parser.add_argument('-v', '--verbose', action='count',
3729 default=0,
3730 help='print verbose messages; '
3731 'multiple -v result in more verbose messages')
3732 parser.add_argument('-d', '--diff', action='store_true',
3733 help='print the diff for the fixed source')
3734 parser.add_argument('-i', '--in-place', action='store_true',
3735 help='make changes to files in place')
3736 parser.add_argument('--global-config', metavar='filename',
3737 default=DEFAULT_CONFIG,
3738 help='path to a global pep8 config file; if this file '
3739 'does not exist then this is ignored '
3740 '(default: {})'.format(DEFAULT_CONFIG))
3741 parser.add_argument('--ignore-local-config', action='store_true',
3742 help="don't look for and apply local config files; "
3743 'if not passed, defaults are updated with any '
3744 "config files in the project's root directory")
3745 parser.add_argument('-r', '--recursive', action='store_true',
3746 help='run recursively over directories; '
3747 'must be used with --in-place or --diff')
3748 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3749 help='number of parallel jobs; '
3750 'match CPU count if value is less than 1')
3751 parser.add_argument('-p', '--pep8-passes', metavar='n',
3752 default=-1, type=int,
3753 help='maximum number of additional pep8 passes '
3754 '(default: infinite)')
3755 parser.add_argument('-a', '--aggressive', action='count', default=0,
3756 help='enable non-whitespace changes; '
3757 'multiple -a result in more aggressive changes')
3758 parser.add_argument('--experimental', action='store_true',
3759 help='enable experimental fixes')
3760 parser.add_argument('--exclude', metavar='globs',
3761 help='exclude file/directory names that match these '
3762 'comma-separated globs')
3763 parser.add_argument('--list-fixes', action='store_true',
3764 help='list codes for fixes; '
3765 'used by --ignore and --select')
3766 parser.add_argument('--ignore', metavar='errors', default='',
3767 help='do not fix these errors/warnings '
3768 '(default: {})'.format(DEFAULT_IGNORE))
3769 parser.add_argument('--select', metavar='errors', default='',
3770 help='fix only these errors/warnings (e.g. E4,W)')
3771 parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3772 help='set maximum allowed line length '
3773 '(default: %(default)s)')
3774 parser.add_argument('--line-range', '--range', metavar='line',
3775 default=None, type=int, nargs=2,
3776 help='only fix errors found within this inclusive '
3777 'range of line numbers (e.g. 1 99); '
3778 'line numbers are indexed at 1')
3779 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3780 type=int, help=argparse.SUPPRESS)
3781 parser.add_argument('--hang-closing', action='store_true',
3782 help='hang-closing option passed to pycodestyle')
3783 parser.add_argument('--exit-code', action='store_true',
3784 help='change to behavior of exit code.'
3785 ' default behavior of return value, 0 is no '
3786 'differences, 1 is error exit. return 2 when'
3787 ' add this option. 2 is exists differences.')
3788 parser.add_argument('files', nargs='*',
3789 help="files to format or '-' for standard in")
3790
3791 return parser
3792
3793
3794def _expand_codes(codes, ignore_codes):
3795 """expand to individual E/W codes"""
3796 ret = set()
3797
3798 is_conflict = False
3799 if all(
3800 any(
3801 conflicting_code.startswith(code)
3802 for code in codes
3803 )
3804 for conflicting_code in CONFLICTING_CODES
3805 ):
3806 is_conflict = True
3807
3808 is_ignore_w503 = "W503" in ignore_codes
3809 is_ignore_w504 = "W504" in ignore_codes
3810
3811 for code in codes:
3812 if code == "W":
3813 if is_ignore_w503 and is_ignore_w504:
3814 ret.update({"W1", "W2", "W3", "W505", "W6"})
3815 elif is_ignore_w503:
3816 ret.update({"W1", "W2", "W3", "W504", "W505", "W6"})
3817 else:
3818 ret.update({"W1", "W2", "W3", "W503", "W505", "W6"})
3819 elif code in ("W5", "W50"):
3820 if is_ignore_w503 and is_ignore_w504:
3821 ret.update({"W505"})
3822 elif is_ignore_w503:
3823 ret.update({"W504", "W505"})
3824 else:
3825 ret.update({"W503", "W505"})
3826 elif not (code in ("W503", "W504") and is_conflict):
3827 ret.add(code)
3828
3829 return ret
3830
3831
3832def parse_args(arguments, apply_config=False):
3833 """Parse command-line options."""
3834 parser = create_parser()
3835 args = parser.parse_args(arguments)
3836
3837 if not args.files and not args.list_fixes:
3838 parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments')
3839
3840 args.files = [decode_filename(name) for name in args.files]
3841
3842 if apply_config:
3843 parser = read_config(args, parser)
3844 # prioritize settings when exist pyproject.toml's tool.autopep8 section
3845 try:
3846 parser_with_pyproject_toml = read_pyproject_toml(args, parser)
3847 except Exception:
3848 parser_with_pyproject_toml = None
3849 if parser_with_pyproject_toml:
3850 parser = parser_with_pyproject_toml
3851 args = parser.parse_args(arguments)
3852 args.files = [decode_filename(name) for name in args.files]
3853
3854 if '-' in args.files:
3855 if len(args.files) > 1:
3856 parser.exit(
3857 EXIT_CODE_ARGPARSE_ERROR,
3858 'cannot mix stdin and regular files',
3859 )
3860
3861 if args.diff:
3862 parser.exit(
3863 EXIT_CODE_ARGPARSE_ERROR,
3864 '--diff cannot be used with standard input',
3865 )
3866
3867 if args.in_place:
3868 parser.exit(
3869 EXIT_CODE_ARGPARSE_ERROR,
3870 '--in-place cannot be used with standard input',
3871 )
3872
3873 if args.recursive:
3874 parser.exit(
3875 EXIT_CODE_ARGPARSE_ERROR,
3876 '--recursive cannot be used with standard input',
3877 )
3878
3879 if len(args.files) > 1 and not (args.in_place or args.diff):
3880 parser.exit(
3881 EXIT_CODE_ARGPARSE_ERROR,
3882 'autopep8 only takes one filename as argument '
3883 'unless the "--in-place" or "--diff" args are used',
3884 )
3885
3886 if args.recursive and not (args.in_place or args.diff):
3887 parser.exit(
3888 EXIT_CODE_ARGPARSE_ERROR,
3889 '--recursive must be used with --in-place or --diff',
3890 )
3891
3892 if args.in_place and args.diff:
3893 parser.exit(
3894 EXIT_CODE_ARGPARSE_ERROR,
3895 '--in-place and --diff are mutually exclusive',
3896 )
3897
3898 if args.max_line_length <= 0:
3899 parser.exit(
3900 EXIT_CODE_ARGPARSE_ERROR,
3901 '--max-line-length must be greater than 0',
3902 )
3903
3904 if args.indent_size <= 0:
3905 parser.exit(
3906 EXIT_CODE_ARGPARSE_ERROR,
3907 '--indent-size must be greater than 0',
3908 )
3909
3910 if args.select:
3911 args.select = _expand_codes(
3912 _split_comma_separated(args.select),
3913 (_split_comma_separated(args.ignore) if args.ignore else [])
3914 )
3915
3916 if args.ignore:
3917 args.ignore = _split_comma_separated(args.ignore)
3918 if all(
3919 not any(
3920 conflicting_code.startswith(ignore_code)
3921 for ignore_code in args.ignore
3922 )
3923 for conflicting_code in CONFLICTING_CODES
3924 ):
3925 args.ignore.update(CONFLICTING_CODES)
3926 elif not args.select:
3927 if args.aggressive:
3928 # Enable everything by default if aggressive.
3929 args.select = {'E', 'W1', 'W2', 'W3', 'W6'}
3930 else:
3931 args.ignore = _split_comma_separated(DEFAULT_IGNORE)
3932
3933 if args.exclude:
3934 args.exclude = _split_comma_separated(args.exclude)
3935 else:
3936 args.exclude = {}
3937
3938 if args.jobs < 1:
3939 # Do not import multiprocessing globally in case it is not supported
3940 # on the platform.
3941 import multiprocessing
3942 args.jobs = multiprocessing.cpu_count()
3943
3944 if args.jobs > 1 and not (args.in_place or args.diff):
3945 parser.exit(
3946 EXIT_CODE_ARGPARSE_ERROR,
3947 'parallel jobs requires --in-place',
3948 )
3949
3950 if args.line_range:
3951 if args.line_range[0] <= 0:
3952 parser.exit(
3953 EXIT_CODE_ARGPARSE_ERROR,
3954 '--range must be positive numbers',
3955 )
3956 if args.line_range[0] > args.line_range[1]:
3957 parser.exit(
3958 EXIT_CODE_ARGPARSE_ERROR,
3959 'First value of --range should be less than or equal '
3960 'to the second',
3961 )
3962
3963 return args
3964
3965
3966def _get_normalize_options(args, config, section, option_list):
3967 for (k, v) in config.items(section):
3968 norm_opt = k.lstrip('-').replace('-', '_')
3969 if not option_list.get(norm_opt):
3970 continue
3971 opt_type = option_list[norm_opt]
3972 if opt_type is int:
3973 if v.strip() == "auto":
3974 # skip to special case
3975 if args.verbose:
3976 print(f"ignore config: {k}={v}")
3977 continue
3978 value = config.getint(section, k)
3979 elif opt_type is bool:
3980 value = config.getboolean(section, k)
3981 else:
3982 value = config.get(section, k)
3983 yield norm_opt, k, value
3984
3985
3986def read_config(args, parser):
3987 """Read both user configuration and local configuration."""
3988 config = SafeConfigParser()
3989
3990 try:
3991 if args.verbose and os.path.exists(args.global_config):
3992 print("read config path: {}".format(args.global_config))
3993 config.read(args.global_config)
3994
3995 if not args.ignore_local_config:
3996 parent = tail = args.files and os.path.abspath(
3997 os.path.commonprefix(args.files))
3998 while tail:
3999 if config.read([os.path.join(parent, fn)
4000 for fn in PROJECT_CONFIG]):
4001 if args.verbose:
4002 for fn in PROJECT_CONFIG:
4003 config_file = os.path.join(parent, fn)
4004 if not os.path.exists(config_file):
4005 continue
4006 print(
4007 "read config path: {}".format(
4008 os.path.join(parent, fn)
4009 )
4010 )
4011 break
4012 (parent, tail) = os.path.split(parent)
4013
4014 defaults = {}
4015 option_list = {o.dest: o.type or type(o.default)
4016 for o in parser._actions}
4017
4018 for section in ['pep8', 'pycodestyle', 'flake8']:
4019 if not config.has_section(section):
4020 continue
4021 for norm_opt, k, value in _get_normalize_options(
4022 args, config, section, option_list
4023 ):
4024 if args.verbose:
4025 print("enable config: section={}, key={}, value={}".format(
4026 section, k, value))
4027 defaults[norm_opt] = value
4028
4029 parser.set_defaults(**defaults)
4030 except Error:
4031 # Ignore for now.
4032 pass
4033
4034 return parser
4035
4036
4037def read_pyproject_toml(args, parser):
4038 """Read pyproject.toml and load configuration."""
4039 if sys.version_info >= (3, 11):
4040 import tomllib
4041 else:
4042 import tomli as tomllib
4043
4044 config = None
4045
4046 if os.path.exists(args.global_config):
4047 with open(args.global_config, "rb") as fp:
4048 config = tomllib.load(fp)
4049
4050 if not args.ignore_local_config:
4051 parent = tail = args.files and os.path.abspath(
4052 os.path.commonprefix(args.files))
4053 while tail:
4054 pyproject_toml = os.path.join(parent, "pyproject.toml")
4055 if os.path.exists(pyproject_toml):
4056 with open(pyproject_toml, "rb") as fp:
4057 config = tomllib.load(fp)
4058 break
4059 (parent, tail) = os.path.split(parent)
4060
4061 if not config:
4062 return None
4063
4064 if config.get("tool", {}).get("autopep8") is None:
4065 return None
4066
4067 config = config.get("tool").get("autopep8")
4068
4069 defaults = {}
4070 option_list = {o.dest: o.type or type(o.default)
4071 for o in parser._actions}
4072
4073 TUPLED_OPTIONS = ("ignore", "select")
4074 for (k, v) in config.items():
4075 norm_opt = k.lstrip('-').replace('-', '_')
4076 if not option_list.get(norm_opt):
4077 continue
4078 if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS:
4079 value = ",".join(v)
4080 else:
4081 value = v
4082 if args.verbose:
4083 print("enable pyproject.toml config: "
4084 "key={}, value={}".format(k, value))
4085 defaults[norm_opt] = value
4086
4087 if defaults:
4088 # set value when exists key-value in defaults dict
4089 parser.set_defaults(**defaults)
4090
4091 return parser
4092
4093
4094def _split_comma_separated(string):
4095 """Return a set of strings."""
4096 return {text.strip() for text in string.split(',') if text.strip()}
4097
4098
4099def decode_filename(filename):
4100 """Return Unicode filename."""
4101 if isinstance(filename, str):
4102 return filename
4103
4104 return filename.decode(sys.getfilesystemencoding())
4105
4106
4107def supported_fixes():
4108 """Yield pep8 error codes that autopep8 fixes.
4109
4110 Each item we yield is a tuple of the code followed by its
4111 description.
4112
4113 """
4114 yield ('E101', docstring_summary(reindent.__doc__))
4115
4116 instance = FixPEP8(filename=None, options=None, contents='')
4117 for attribute in dir(instance):
4118 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
4119 if code:
4120 yield (
4121 code.group(1).upper(),
4122 re.sub(r'\s+', ' ',
4123 docstring_summary(getattr(instance, attribute).__doc__))
4124 )
4125
4126 for (code, function) in sorted(global_fixes()):
4127 yield (code.upper() + (4 - len(code)) * ' ',
4128 re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
4129
4130 for code in sorted(CODE_TO_2TO3):
4131 yield (code.upper() + (4 - len(code)) * ' ',
4132 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
4133
4134
4135def docstring_summary(docstring):
4136 """Return summary of docstring."""
4137 return docstring.split('\n')[0] if docstring else ''
4138
4139
4140def line_shortening_rank(candidate, indent_word, max_line_length,
4141 experimental=False):
4142 """Return rank of candidate.
4143
4144 This is for sorting candidates.
4145
4146 """
4147 if not candidate.strip():
4148 return 0
4149
4150 rank = 0
4151 lines = candidate.rstrip().split('\n')
4152
4153 offset = 0
4154 if (
4155 not lines[0].lstrip().startswith('#') and
4156 lines[0].rstrip()[-1] not in '([{'
4157 ):
4158 for (opening, closing) in ('()', '[]', '{}'):
4159 # Don't penalize empty containers that aren't split up. Things like
4160 # this "foo(\n )" aren't particularly good.
4161 opening_loc = lines[0].find(opening)
4162 closing_loc = lines[0].find(closing)
4163 if opening_loc >= 0:
4164 if closing_loc < 0 or closing_loc != opening_loc + 1:
4165 offset = max(offset, 1 + opening_loc)
4166
4167 current_longest = max(offset + len(x.strip()) for x in lines)
4168
4169 rank += 4 * max(0, current_longest - max_line_length)
4170
4171 rank += len(lines)
4172
4173 # Too much variation in line length is ugly.
4174 rank += 2 * standard_deviation(len(line) for line in lines)
4175
4176 bad_staring_symbol = {
4177 '(': ')',
4178 '[': ']',
4179 '{': '}'}.get(lines[0][-1])
4180
4181 if len(lines) > 1:
4182 if (
4183 bad_staring_symbol and
4184 lines[1].lstrip().startswith(bad_staring_symbol)
4185 ):
4186 rank += 20
4187
4188 for lineno, current_line in enumerate(lines):
4189 current_line = current_line.strip()
4190
4191 if current_line.startswith('#'):
4192 continue
4193
4194 for bad_start in ['.', '%', '+', '-', '/']:
4195 if current_line.startswith(bad_start):
4196 rank += 100
4197
4198 # Do not tolerate operators on their own line.
4199 if current_line == bad_start:
4200 rank += 1000
4201
4202 if (
4203 current_line.endswith(('.', '%', '+', '-', '/')) and
4204 "': " in current_line
4205 ):
4206 rank += 1000
4207
4208 if current_line.endswith(('(', '[', '{', '.')):
4209 # Avoid lonely opening. They result in longer lines.
4210 if len(current_line) <= len(indent_word):
4211 rank += 100
4212
4213 # Avoid the ugliness of ", (\n".
4214 if (
4215 current_line.endswith('(') and
4216 current_line[:-1].rstrip().endswith(',')
4217 ):
4218 rank += 100
4219
4220 # Avoid the ugliness of "something[\n" and something[index][\n.
4221 if (
4222 current_line.endswith('[') and
4223 len(current_line) > 1 and
4224 (current_line[-2].isalnum() or current_line[-2] in ']')
4225 ):
4226 rank += 300
4227
4228 # Also avoid the ugliness of "foo.\nbar"
4229 if current_line.endswith('.'):
4230 rank += 100
4231
4232 if has_arithmetic_operator(current_line):
4233 rank += 100
4234
4235 # Avoid breaking at unary operators.
4236 if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
4237 rank += 1000
4238
4239 if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
4240 rank += 1000
4241
4242 if current_line.endswith(('%', '(', '[', '{')):
4243 rank -= 20
4244
4245 # Try to break list comprehensions at the "for".
4246 if current_line.startswith('for '):
4247 rank -= 50
4248
4249 if current_line.endswith('\\'):
4250 # If a line ends in \-newline, it may be part of a
4251 # multiline string. In that case, we would like to know
4252 # how long that line is without the \-newline. If it's
4253 # longer than the maximum, or has comments, then we assume
4254 # that the \-newline is an okay candidate and only
4255 # penalize it a bit.
4256 total_len = len(current_line)
4257 lineno += 1
4258 while lineno < len(lines):
4259 total_len += len(lines[lineno])
4260
4261 if lines[lineno].lstrip().startswith('#'):
4262 total_len = max_line_length
4263 break
4264
4265 if not lines[lineno].endswith('\\'):
4266 break
4267
4268 lineno += 1
4269
4270 if total_len < max_line_length:
4271 rank += 10
4272 else:
4273 rank += 100 if experimental else 1
4274
4275 # Prefer breaking at commas rather than colon.
4276 if ',' in current_line and current_line.endswith(':'):
4277 rank += 10
4278
4279 # Avoid splitting dictionaries between key and value.
4280 if current_line.endswith(':'):
4281 rank += 100
4282
4283 rank += 10 * count_unbalanced_brackets(current_line)
4284
4285 return max(0, rank)
4286
4287
4288def standard_deviation(numbers):
4289 """Return standard deviation."""
4290 numbers = list(numbers)
4291 if not numbers:
4292 return 0
4293 mean = sum(numbers) / len(numbers)
4294 return (sum((n - mean) ** 2 for n in numbers) /
4295 len(numbers)) ** .5
4296
4297
4298def has_arithmetic_operator(line):
4299 """Return True if line contains any arithmetic operators."""
4300 for operator in pycodestyle.ARITHMETIC_OP:
4301 if operator in line:
4302 return True
4303
4304 return False
4305
4306
4307def count_unbalanced_brackets(line):
4308 """Return number of unmatched open/close brackets."""
4309 count = 0
4310 for opening, closing in ['()', '[]', '{}']:
4311 count += abs(line.count(opening) - line.count(closing))
4312
4313 return count
4314
4315
4316def split_at_offsets(line, offsets):
4317 """Split line at offsets.
4318
4319 Return list of strings.
4320
4321 """
4322 result = []
4323
4324 previous_offset = 0
4325 current_offset = 0
4326 for current_offset in sorted(offsets):
4327 if current_offset < len(line) and previous_offset != current_offset:
4328 result.append(line[previous_offset:current_offset].strip())
4329 previous_offset = current_offset
4330
4331 result.append(line[current_offset:])
4332
4333 return result
4334
4335
4336class LineEndingWrapper(object):
4337
4338 r"""Replace line endings to work with sys.stdout.
4339
4340 It seems that sys.stdout expects only '\n' as the line ending, no matter
4341 the platform. Otherwise, we get repeated line endings.
4342
4343 """
4344
4345 def __init__(self, output):
4346 self.__output = output
4347
4348 def write(self, s):
4349 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
4350
4351 def flush(self):
4352 self.__output.flush()
4353
4354
4355def match_file(filename, exclude):
4356 """Return True if file is okay for modifying/recursing."""
4357 base_name = os.path.basename(filename)
4358
4359 if base_name.startswith('.'):
4360 return False
4361
4362 for pattern in exclude:
4363 if fnmatch.fnmatch(base_name, pattern):
4364 return False
4365 if fnmatch.fnmatch(filename, pattern):
4366 return False
4367
4368 if not os.path.isdir(filename) and not is_python_file(filename):
4369 return False
4370
4371 return True
4372
4373
4374def find_files(filenames, recursive, exclude):
4375 """Yield filenames."""
4376 while filenames:
4377 name = filenames.pop(0)
4378 if recursive and os.path.isdir(name):
4379 for root, directories, children in os.walk(name):
4380 filenames += [os.path.join(root, f) for f in children
4381 if match_file(os.path.join(root, f),
4382 exclude)]
4383 directories[:] = [d for d in directories
4384 if match_file(os.path.join(root, d),
4385 exclude)]
4386 else:
4387 is_exclude_match = False
4388 for pattern in exclude:
4389 if fnmatch.fnmatch(name, pattern):
4390 is_exclude_match = True
4391 break
4392 if not is_exclude_match:
4393 yield name
4394
4395
4396def _fix_file(parameters):
4397 """Helper function for optionally running fix_file() in parallel."""
4398 if parameters[1].verbose:
4399 print('[file:{}]'.format(parameters[0]), file=sys.stderr)
4400 try:
4401 return fix_file(*parameters)
4402 except IOError as error:
4403 print(str(error), file=sys.stderr)
4404 raise error
4405
4406
4407def fix_multiple_files(filenames, options, output=None):
4408 """Fix list of files.
4409
4410 Optionally fix files recursively.
4411
4412 """
4413 results = []
4414 filenames = find_files(filenames, options.recursive, options.exclude)
4415 if options.jobs > 1:
4416 import multiprocessing
4417 pool = multiprocessing.Pool(options.jobs)
4418 rets = []
4419 for name in filenames:
4420 ret = pool.apply_async(_fix_file, ((name, options),))
4421 rets.append(ret)
4422 pool.close()
4423 pool.join()
4424 if options.diff:
4425 for r in rets:
4426 sys.stdout.write(r.get().decode())
4427 sys.stdout.flush()
4428 results.extend([x.get() for x in rets if x is not None])
4429 else:
4430 for name in filenames:
4431 ret = _fix_file((name, options, output))
4432 if ret is None:
4433 continue
4434 if options.diff:
4435 if ret != '':
4436 results.append(ret)
4437 elif options.in_place:
4438 results.append(ret)
4439 else:
4440 original_source = readlines_from_file(name)
4441 if "".join(original_source).splitlines() != ret.splitlines():
4442 results.append(ret)
4443 return results
4444
4445
4446def is_python_file(filename):
4447 """Return True if filename is Python file."""
4448 if filename.endswith('.py'):
4449 return True
4450
4451 try:
4452 with open_with_encoding(
4453 filename,
4454 limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
4455 text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
4456 if not text:
4457 return False
4458 first_line = text.splitlines()[0]
4459 except (IOError, IndexError):
4460 return False
4461
4462 if not PYTHON_SHEBANG_REGEX.match(first_line):
4463 return False
4464
4465 return True
4466
4467
4468def is_probably_part_of_multiline(line):
4469 """Return True if line is likely part of a multiline string.
4470
4471 When multiline strings are involved, pep8 reports the error as being
4472 at the start of the multiline string, which doesn't work for us.
4473
4474 """
4475 return (
4476 '"""' in line or
4477 "'''" in line or
4478 line.rstrip().endswith('\\')
4479 )
4480
4481
4482def wrap_output(output, encoding):
4483 """Return output with specified encoding."""
4484 return codecs.getwriter(encoding)(output.buffer
4485 if hasattr(output, 'buffer')
4486 else output)
4487
4488
4489def get_encoding():
4490 """Return preferred encoding."""
4491 return locale.getpreferredencoding() or sys.getdefaultencoding()
4492
4493
4494def main(argv=None, apply_config=True):
4495 """Command-line entry."""
4496 if argv is None:
4497 argv = sys.argv
4498
4499 try:
4500 # Exit on broken pipe.
4501 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
4502 except AttributeError: # pragma: no cover
4503 # SIGPIPE is not available on Windows.
4504 pass
4505
4506 try:
4507 args = parse_args(argv[1:], apply_config=apply_config)
4508
4509 if args.list_fixes:
4510 for code, description in sorted(supported_fixes()):
4511 print('{code} - {description}'.format(
4512 code=code, description=description))
4513 return EXIT_CODE_OK
4514
4515 if args.files == ['-']:
4516 assert not args.in_place
4517
4518 encoding = sys.stdin.encoding or get_encoding()
4519 read_stdin = sys.stdin.read()
4520 fixed_stdin = fix_code(read_stdin, args, encoding=encoding)
4521
4522 # LineEndingWrapper is unnecessary here due to the symmetry between
4523 # standard in and standard out.
4524 wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin)
4525
4526 if hash(read_stdin) != hash(fixed_stdin):
4527 if args.exit_code:
4528 return EXIT_CODE_EXISTS_DIFF
4529 else:
4530 if args.in_place or args.diff:
4531 args.files = list(set(args.files))
4532 else:
4533 assert len(args.files) == 1
4534 assert not args.recursive
4535
4536 results = fix_multiple_files(args.files, args, sys.stdout)
4537 if args.diff:
4538 ret = any([len(ret) != 0 for ret in results])
4539 else:
4540 # with in-place option
4541 ret = any([ret is not None for ret in results])
4542 if args.exit_code and ret:
4543 return EXIT_CODE_EXISTS_DIFF
4544 except IOError:
4545 return EXIT_CODE_ERROR
4546 except KeyboardInterrupt:
4547 return EXIT_CODE_ERROR # pragma: no cover
4548
4549
4550class CachedTokenizer(object):
4551
4552 """A one-element cache around tokenize.generate_tokens().
4553
4554 Original code written by Ned Batchelder, in coverage.py.
4555
4556 """
4557
4558 def __init__(self):
4559 self.last_text = None
4560 self.last_tokens = None
4561
4562 def generate_tokens(self, text):
4563 """A stand-in for tokenize.generate_tokens()."""
4564 if text != self.last_text:
4565 string_io = io.StringIO(text)
4566 self.last_tokens = list(
4567 tokenize.generate_tokens(string_io.readline)
4568 )
4569 self.last_text = text
4570 return self.last_tokens
4571
4572
4573_cached_tokenizer = CachedTokenizer()
4574generate_tokens = _cached_tokenizer.generate_tokens
4575
4576
4577if __name__ == '__main__':
4578 sys.exit(main())