djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/autopep8.py

   1 #!/usr/bin/env python
   2
   3 # Copyright (C) 2010-2011 Hideo Hattori
   4 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
   5 # Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining
   8 # a copy of this software and associated documentation files (the
   9 # "Software"), to deal in the Software without restriction, including
  10 # without limitation the rights to use, copy, modify, merge, publish,
  11 # distribute, sublicense, and/or sell copies of the Software, and to
  12 # permit persons to whom the Software is furnished to do so, subject to
  13 # the following conditions:
  14 #
  15 # The above copyright notice and this permission notice shall be
  16 # included in all copies or substantial portions of the Software.
  17 #
  18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  25 # SOFTWARE.
  26
  27 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
  28 # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
  29 #
  30 # Permission is hereby granted, free of charge, to any person
  31 # obtaining a copy of this software and associated documentation files
  32 # (the "Software"), to deal in the Software without restriction,
  33 # including without limitation the rights to use, copy, modify, merge,
  34 # publish, distribute, sublicense, and/or sell copies of the Software,
  35 # and to permit persons to whom the Software is furnished to do so,
  36 # subject to the following conditions:
  37 #
  38 # The above copyright notice and this permission notice shall be
  39 # included in all copies or substantial portions of the Software.
  40 #
  41 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  42 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  43 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  44 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  45 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  46 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  47 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  48 # SOFTWARE.
  49
  50 """Automatically formats Python code to conform to the PEP 8 style guide.
  51
  52 Fixes that only need be done once can be added by adding a function of the form
  53 "fix_<code>(source)" to this module. They should return the fixed source code.
  54 These fixes are picked up by apply_global_fixes().
  55
  56 Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
  57 class documentation for more information.
  58
  59 """
  60
  61 from __future__ import absolute_import
  62 from __future__ import division
  63 from __future__ import print_function
  64 from __future__ import unicode_literals
  65
  66 import argparse
  67 import codecs
  68 import collections
  69 import copy
  70 import difflib
  71 import fnmatch
  72 import inspect
  73 import io
  74 import itertools
  75 import keyword
  76 import locale
  77 import os
  78 import re
  79 import signal
  80 import sys
  81 import textwrap
  82 import token
  83 import tokenize
  84 import warnings
  85 import ast
  86 from configparser import ConfigParser as SafeConfigParser, Error
  87
  88 import pycodestyle
  89 from pycodestyle import STARTSWITH_INDENT_STATEMENT_REGEX
  90
  91
  92 __version__ = '2.0.4'
  93
  94
  95 CR = '\r'
  96 LF = '\n'
  97 CRLF = '\r\n'
  98
  99
 100 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
 101 LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
 102 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
 103 COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
 104 BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
 105 STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
 106 DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
 107 ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
 108 DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
 109
 110 EXIT_CODE_OK = 0
 111 EXIT_CODE_ERROR = 1
 112 EXIT_CODE_EXISTS_DIFF = 2
 113 EXIT_CODE_ARGPARSE_ERROR = 99
 114
 115 # For generating line shortening candidates.
 116 SHORTEN_OPERATOR_GROUPS = frozenset([
 117     frozenset([',']),
 118     frozenset(['%']),
 119     frozenset([',', '(', '[', '{']),
 120     frozenset(['%', '(', '[', '{']),
 121     frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
 122     frozenset(['%', '+', '-', '*', '/', '//']),
 123 ])
 124
 125
 126 DEFAULT_IGNORE = 'E226,E24,W50,W690'    # TODO: use pycodestyle.DEFAULT_IGNORE
 127 DEFAULT_INDENT_SIZE = 4
 128 # these fixes conflict with each other, if the `--ignore` setting causes both
 129 # to be enabled, disable both of them
 130 CONFLICTING_CODES = ('W503', 'W504')
 131
 132 # W602 is handled separately due to the need to avoid "with_traceback".
 133 CODE_TO_2TO3 = {
 134     'E231': ['ws_comma'],
 135     'E721': ['idioms'],
 136     'W690': ['apply',
 137              'except',
 138              'exitfunc',
 139              'numliterals',
 140              'operator',
 141              'paren',
 142              'reduce',
 143              'renames',
 144              'standarderror',
 145              'sys_exc',
 146              'throw',
 147              'tuple_params',
 148              'xreadlines']}
 149
 150
 151 if sys.platform == 'win32':  # pragma: no cover
 152     DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
 153 else:
 154     DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
 155                                   os.path.expanduser('~/.config'),
 156                                   'pycodestyle')
 157 # fallback, use .pep8
 158 if not os.path.exists(DEFAULT_CONFIG):  # pragma: no cover
 159     if sys.platform == 'win32':
 160         DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
 161     else:
 162         DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
 163 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
 164
 165
 166 MAX_PYTHON_FILE_DETECTION_BYTES = 1024
 167
 168
 169 def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
 170     """Return opened file with a specific encoding."""
 171     if not encoding:
 172         encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
 173
 174     return io.open(filename, mode=mode, encoding=encoding,
 175                    newline='')  # Preserve line endings
 176
 177
 178 def detect_encoding(filename, limit_byte_check=-1):
 179     """Return file encoding."""
 180     try:
 181         with open(filename, 'rb') as input_file:
 182             from lib2to3.pgen2 import tokenize as lib2to3_tokenize
 183             encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
 184
 185         with open_with_encoding(filename, encoding=encoding) as test_file:
 186             test_file.read(limit_byte_check)
 187
 188         return encoding
 189     except (LookupError, SyntaxError, UnicodeDecodeError):
 190         return 'latin-1'
 191
 192
 193 def readlines_from_file(filename):
 194     """Return contents of file."""
 195     with open_with_encoding(filename) as input_file:
 196         return input_file.readlines()
 197
 198
 199 def extended_blank_lines(logical_line,
 200                          blank_lines,
 201                          blank_before,
 202                          indent_level,
 203                          previous_logical):
 204     """Check for missing blank lines after class declaration."""
 205     if previous_logical.startswith('def '):
 206         if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
 207             yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
 208     elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
 209         # Missing blank line between class docstring and method declaration.
 210         if (
 211             indent_level and
 212             not blank_lines and
 213             not blank_before and
 214             logical_line.startswith(('def ')) and
 215             '(self' in logical_line
 216         ):
 217             yield (0, 'E301 expected 1 blank line, found 0')
 218
 219
 220 pycodestyle.register_check(extended_blank_lines)
 221
 222
 223 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
 224                           indent_char, noqa):
 225     """Override pycodestyle's function to provide indentation information."""
 226     first_row = tokens[0][2][0]
 227     nrows = 1 + tokens[-1][2][0] - first_row
 228     if noqa or nrows == 1:
 229         return
 230
 231     # indent_next tells us whether the next block is indented. Assuming
 232     # that it is indented by 4 spaces, then we should not allow 4-space
 233     # indents on the final continuation line. In turn, some other
 234     # indents are allowed to have an extra 4 spaces.
 235     indent_next = logical_line.endswith(':')
 236
 237     row = depth = 0
 238     valid_hangs = (
 239         (DEFAULT_INDENT_SIZE,)
 240         if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
 241                                      2 * DEFAULT_INDENT_SIZE)
 242     )
 243
 244     # Remember how many brackets were opened on each line.
 245     parens = [0] * nrows
 246
 247     # Relative indents of physical lines.
 248     rel_indent = [0] * nrows
 249
 250     # For each depth, collect a list of opening rows.
 251     open_rows = [[0]]
 252     # For each depth, memorize the hanging indentation.
 253     hangs = [None]
 254
 255     # Visual indents.
 256     indent_chances = {}
 257     last_indent = tokens[0][2]
 258     indent = [last_indent[1]]
 259
 260     last_token_multiline = None
 261     line = None
 262     last_line = ''
 263     last_line_begins_with_multiline = False
 264     for token_type, text, start, end, line in tokens:
 265
 266         newline = row < start[0] - first_row
 267         if newline:
 268             row = start[0] - first_row
 269             newline = (not last_token_multiline and
 270                        token_type not in (tokenize.NL, tokenize.NEWLINE))
 271             last_line_begins_with_multiline = last_token_multiline
 272
 273         if newline:
 274             # This is the beginning of a continuation line.
 275             last_indent = start
 276
 277             # Record the initial indent.
 278             rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
 279
 280             # Identify closing bracket.
 281             close_bracket = (token_type == tokenize.OP and text in ']})')
 282
 283             # Is the indent relative to an opening bracket line?
 284             for open_row in reversed(open_rows[depth]):
 285                 hang = rel_indent[row] - rel_indent[open_row]
 286                 hanging_indent = hang in valid_hangs
 287                 if hanging_indent:
 288                     break
 289             if hangs[depth]:
 290                 hanging_indent = (hang == hangs[depth])
 291
 292             visual_indent = (not close_bracket and hang > 0 and
 293                              indent_chances.get(start[1]))
 294
 295             if close_bracket and indent[depth]:
 296                 # Closing bracket for visual indent.
 297                 if start[1] != indent[depth]:
 298                     yield (start, 'E124 {}'.format(indent[depth]))
 299             elif close_bracket and not hang:
 300                 # closing bracket matches indentation of opening bracket's line
 301                 if hang_closing:
 302                     yield (start, 'E133 {}'.format(indent[depth]))
 303             elif indent[depth] and start[1] < indent[depth]:
 304                 if visual_indent is not True:
 305                     # Visual indent is broken.
 306                     yield (start, 'E128 {}'.format(indent[depth]))
 307             elif (hanging_indent or
 308                   (indent_next and
 309                    rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
 310                 # Hanging indent is verified.
 311                 if close_bracket and not hang_closing:
 312                     yield (start, 'E123 {}'.format(indent_level +
 313                                                    rel_indent[open_row]))
 314                 hangs[depth] = hang
 315             elif visual_indent is True:
 316                 # Visual indent is verified.
 317                 indent[depth] = start[1]
 318             elif visual_indent in (text, str):
 319                 # Ignore token lined up with matching one from a previous line.
 320                 pass
 321             else:
 322                 one_indented = (indent_level + rel_indent[open_row] +
 323                                 DEFAULT_INDENT_SIZE)
 324                 # Indent is broken.
 325                 if hang <= 0:
 326                     error = ('E122', one_indented)
 327                 elif indent[depth]:
 328                     error = ('E127', indent[depth])
 329                 elif not close_bracket and hangs[depth]:
 330                     error = ('E131', one_indented)
 331                 elif hang > DEFAULT_INDENT_SIZE:
 332                     error = ('E126', one_indented)
 333                 else:
 334                     hangs[depth] = hang
 335                     error = ('E121', one_indented)
 336
 337                 yield (start, '{} {}'.format(*error))
 338
 339         # Look for visual indenting.
 340         if (
 341             parens[row] and
 342             token_type not in (tokenize.NL, tokenize.COMMENT) and
 343             not indent[depth]
 344         ):
 345             indent[depth] = start[1]
 346             indent_chances[start[1]] = True
 347         # Deal with implicit string concatenation.
 348         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
 349               text in ('u', 'ur', 'b', 'br')):
 350             indent_chances[start[1]] = str
 351         # Special case for the "if" statement because len("if (") is equal to
 352         # 4.
 353         elif not indent_chances and not row and not depth and text == 'if':
 354             indent_chances[end[1] + 1] = True
 355         elif text == ':' and line[end[1]:].isspace():
 356             open_rows[depth].append(row)
 357
 358         # Keep track of bracket depth.
 359         if token_type == tokenize.OP:
 360             if text in '([{':
 361                 depth += 1
 362                 indent.append(0)
 363                 hangs.append(None)
 364                 if len(open_rows) == depth:
 365                     open_rows.append([])
 366                 open_rows[depth].append(row)
 367                 parens[row] += 1
 368             elif text in ')]}' and depth > 0:
 369                 # Parent indents should not be more than this one.
 370                 prev_indent = indent.pop() or last_indent[1]
 371                 hangs.pop()
 372                 for d in range(depth):
 373                     if indent[d] > prev_indent:
 374                         indent[d] = 0
 375                 for ind in list(indent_chances):
 376                     if ind >= prev_indent:
 377                         del indent_chances[ind]
 378                 del open_rows[depth + 1:]
 379                 depth -= 1
 380                 if depth:
 381                     indent_chances[indent[depth]] = True
 382                 for idx in range(row, -1, -1):
 383                     if parens[idx]:
 384                         parens[idx] -= 1
 385                         break
 386             assert len(indent) == depth + 1
 387             if (
 388                 start[1] not in indent_chances and
 389                 # This is for purposes of speeding up E121 (GitHub #90).
 390                 not last_line.rstrip().endswith(',')
 391             ):
 392                 # Allow to line up tokens.
 393                 indent_chances[start[1]] = text
 394
 395         last_token_multiline = (start[0] != end[0])
 396         if last_token_multiline:
 397             rel_indent[end[0] - first_row] = rel_indent[row]
 398
 399         last_line = line
 400
 401     if (
 402         indent_next and
 403         not last_line_begins_with_multiline and
 404         pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
 405     ):
 406         pos = (start[0], indent[0] + 4)
 407         desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
 408         if visual_indent:
 409             yield (pos, 'E129 {}'.format(desired_indent))
 410         else:
 411             yield (pos, 'E125 {}'.format(desired_indent))
 412
 413
 414 del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
 415 pycodestyle.register_check(continued_indentation)
 416
 417
 418 class FixPEP8(object):
 419
 420     """Fix invalid code.
 421
 422     Fixer methods are prefixed "fix_". The _fix_source() method looks for these
 423     automatically.
 424
 425     The fixer method can take either one or two arguments (in addition to
 426     self). The first argument is "result", which is the error information from
 427     pycodestyle. The second argument, "logical", is required only for
 428     logical-line fixes.
 429
 430     The fixer method can return the list of modified lines or None. An empty
 431     list would mean that no changes were made. None would mean that only the
 432     line reported in the pycodestyle error was modified. Note that the modified
 433     line numbers that are returned are indexed at 1. This typically would
 434     correspond with the line number reported in the pycodestyle error
 435     information.
 436
 437     [fixed method list]
 438         - e111,e114,e115,e116
 439         - e121,e122,e123,e124,e125,e126,e127,e128,e129
 440         - e201,e202,e203
 441         - e211
 442         - e221,e222,e223,e224,e225
 443         - e231
 444         - e251,e252
 445         - e261,e262
 446         - e271,e272,e273,e274,e275
 447         - e301,e302,e303,e304,e305,e306
 448         - e401,e402
 449         - e502
 450         - e701,e702,e703,e704
 451         - e711,e712,e713,e714
 452         - e722
 453         - e731
 454         - w291
 455         - w503,504
 456
 457     """
 458
 459     def __init__(self, filename,
 460                  options,
 461                  contents=None,
 462                  long_line_ignore_cache=None):
 463         self.filename = filename
 464         if contents is None:
 465             self.source = readlines_from_file(filename)
 466         else:
 467             sio = io.StringIO(contents)
 468             self.source = sio.readlines()
 469         self.options = options
 470         self.indent_word = _get_indentword(''.join(self.source))
 471
 472         # collect imports line
 473         self.imports = {}
 474         for i, line in enumerate(self.source):
 475             if (line.find("import ") == 0 or line.find("from ") == 0) and \
 476                     line not in self.imports:
 477                 # collect only import statements that first appeared
 478                 self.imports[line] = i
 479
 480         self.long_line_ignore_cache = (
 481             set() if long_line_ignore_cache is None
 482             else long_line_ignore_cache)
 483
 484         # Many fixers are the same even though pycodestyle categorizes them
 485         # differently.
 486         self.fix_e115 = self.fix_e112
 487         self.fix_e121 = self._fix_reindent
 488         self.fix_e122 = self._fix_reindent
 489         self.fix_e123 = self._fix_reindent
 490         self.fix_e124 = self._fix_reindent
 491         self.fix_e126 = self._fix_reindent
 492         self.fix_e127 = self._fix_reindent
 493         self.fix_e128 = self._fix_reindent
 494         self.fix_e129 = self._fix_reindent
 495         self.fix_e133 = self.fix_e131
 496         self.fix_e202 = self.fix_e201
 497         self.fix_e203 = self.fix_e201
 498         self.fix_e211 = self.fix_e201
 499         self.fix_e221 = self.fix_e271
 500         self.fix_e222 = self.fix_e271
 501         self.fix_e223 = self.fix_e271
 502         self.fix_e226 = self.fix_e225
 503         self.fix_e227 = self.fix_e225
 504         self.fix_e228 = self.fix_e225
 505         self.fix_e241 = self.fix_e271
 506         self.fix_e242 = self.fix_e224
 507         self.fix_e252 = self.fix_e225
 508         self.fix_e261 = self.fix_e262
 509         self.fix_e272 = self.fix_e271
 510         self.fix_e273 = self.fix_e271
 511         self.fix_e274 = self.fix_e271
 512         self.fix_e275 = self.fix_e271
 513         self.fix_e306 = self.fix_e301
 514         self.fix_e501 = (
 515             self.fix_long_line_logically if
 516             options and (options.aggressive >= 2 or options.experimental) else
 517             self.fix_long_line_physically)
 518         self.fix_e703 = self.fix_e702
 519         self.fix_w292 = self.fix_w291
 520         self.fix_w293 = self.fix_w291
 521
 522     def _fix_source(self, results):
 523         try:
 524             (logical_start, logical_end) = _find_logical(self.source)
 525             logical_support = True
 526         except (SyntaxError, tokenize.TokenError):  # pragma: no cover
 527             logical_support = False
 528
 529         completed_lines = set()
 530         for result in sorted(results, key=_priority_key):
 531             if result['line'] in completed_lines:
 532                 continue
 533
 534             fixed_methodname = 'fix_' + result['id'].lower()
 535             if hasattr(self, fixed_methodname):
 536                 fix = getattr(self, fixed_methodname)
 537
 538                 line_index = result['line'] - 1
 539                 original_line = self.source[line_index]
 540
 541                 is_logical_fix = len(_get_parameters(fix)) > 2
 542                 if is_logical_fix:
 543                     logical = None
 544                     if logical_support:
 545                         logical = _get_logical(self.source,
 546                                                result,
 547                                                logical_start,
 548                                                logical_end)
 549                         if logical and set(range(
 550                             logical[0][0] + 1,
 551                             logical[1][0] + 1)).intersection(
 552                                 completed_lines):
 553                             continue
 554
 555                     modified_lines = fix(result, logical)
 556                 else:
 557                     modified_lines = fix(result)
 558
 559                 if modified_lines is None:
 560                     # Force logical fixes to report what they modified.
 561                     assert not is_logical_fix
 562
 563                     if self.source[line_index] == original_line:
 564                         modified_lines = []
 565
 566                 if modified_lines:
 567                     completed_lines.update(modified_lines)
 568                 elif modified_lines == []:  # Empty list means no fix
 569                     if self.options.verbose >= 2:
 570                         print(
 571                             '--->  Not fixing {error} on line {line}'.format(
 572                                 error=result['id'], line=result['line']),
 573                             file=sys.stderr)
 574                 else:  # We assume one-line fix when None.
 575                     completed_lines.add(result['line'])
 576             else:
 577                 if self.options.verbose >= 3:
 578                     print(
 579                         "--->  '{}' is not defined.".format(fixed_methodname),
 580                         file=sys.stderr)
 581
 582                     info = result['info'].strip()
 583                     print('--->  {}:{}:{}:{}'.format(self.filename,
 584                                                      result['line'],
 585                                                      result['column'],
 586                                                      info),
 587                           file=sys.stderr)
 588
 589     def fix(self):
 590         """Return a version of the source code with PEP 8 violations fixed."""
 591         pep8_options = {
 592             'ignore': self.options.ignore,
 593             'select': self.options.select,
 594             'max_line_length': self.options.max_line_length,
 595             'hang_closing': self.options.hang_closing,
 596         }
 597         results = _execute_pep8(pep8_options, self.source)
 598
 599         if self.options.verbose:
 600             progress = {}
 601             for r in results:
 602                 if r['id'] not in progress:
 603                     progress[r['id']] = set()
 604                 progress[r['id']].add(r['line'])
 605             print('--->  {n} issue(s) to fix {progress}'.format(
 606                 n=len(results), progress=progress), file=sys.stderr)
 607
 608         if self.options.line_range:
 609             start, end = self.options.line_range
 610             results = [r for r in results
 611                        if start <= r['line'] <= end]
 612
 613         self._fix_source(filter_results(source=''.join(self.source),
 614                                         results=results,
 615                                         aggressive=self.options.aggressive))
 616
 617         if self.options.line_range:
 618             # If number of lines has changed then change line_range.
 619             count = sum(sline.count('\n')
 620                         for sline in self.source[start - 1:end])
 621             self.options.line_range[1] = start + count - 1
 622
 623         return ''.join(self.source)
 624
 625     def _fix_reindent(self, result):
 626         """Fix a badly indented line.
 627
 628         This is done by adding or removing from its initial indent only.
 629
 630         """
 631         num_indent_spaces = int(result['info'].split()[1])
 632         line_index = result['line'] - 1
 633         target = self.source[line_index]
 634
 635         self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
 636
 637     def fix_e112(self, result):
 638         """Fix under-indented comments."""
 639         line_index = result['line'] - 1
 640         target = self.source[line_index]
 641
 642         if not target.lstrip().startswith('#'):
 643             # Don't screw with invalid syntax.
 644             return []
 645
 646         self.source[line_index] = self.indent_word + target
 647
 648     def fix_e113(self, result):
 649         """Fix unexpected indentation."""
 650         line_index = result['line'] - 1
 651         target = self.source[line_index]
 652         indent = _get_indentation(target)
 653         stripped = target.lstrip()
 654         self.source[line_index] = indent[1:] + stripped
 655
 656     def fix_e116(self, result):
 657         """Fix over-indented comments."""
 658         line_index = result['line'] - 1
 659         target = self.source[line_index]
 660
 661         indent = _get_indentation(target)
 662         stripped = target.lstrip()
 663
 664         if not stripped.startswith('#'):
 665             # Don't screw with invalid syntax.
 666             return []
 667
 668         self.source[line_index] = indent[1:] + stripped
 669
 670     def fix_e117(self, result):
 671         """Fix over-indented."""
 672         line_index = result['line'] - 1
 673         target = self.source[line_index]
 674
 675         indent = _get_indentation(target)
 676         if indent == '\t':
 677             return []
 678
 679         stripped = target.lstrip()
 680
 681         self.source[line_index] = indent[1:] + stripped
 682
 683     def fix_e125(self, result):
 684         """Fix indentation undistinguish from the next logical line."""
 685         num_indent_spaces = int(result['info'].split()[1])
 686         line_index = result['line'] - 1
 687         target = self.source[line_index]
 688
 689         spaces_to_add = num_indent_spaces - len(_get_indentation(target))
 690         indent = len(_get_indentation(target))
 691         modified_lines = []
 692
 693         while len(_get_indentation(self.source[line_index])) >= indent:
 694             self.source[line_index] = (' ' * spaces_to_add +
 695                                        self.source[line_index])
 696             modified_lines.append(1 + line_index)  # Line indexed at 1.
 697             line_index -= 1
 698
 699         return modified_lines
 700
 701     def fix_e131(self, result):
 702         """Fix indentation undistinguish from the next logical line."""
 703         num_indent_spaces = int(result['info'].split()[1])
 704         line_index = result['line'] - 1
 705         target = self.source[line_index]
 706
 707         spaces_to_add = num_indent_spaces - len(_get_indentation(target))
 708
 709         indent_length = len(_get_indentation(target))
 710         spaces_to_add = num_indent_spaces - indent_length
 711         if num_indent_spaces == 0 and indent_length == 0:
 712             spaces_to_add = 4
 713
 714         if spaces_to_add >= 0:
 715             self.source[line_index] = (' ' * spaces_to_add +
 716                                        self.source[line_index])
 717         else:
 718             offset = abs(spaces_to_add)
 719             self.source[line_index] = self.source[line_index][offset:]
 720
 721     def fix_e201(self, result):
 722         """Remove extraneous whitespace."""
 723         line_index = result['line'] - 1
 724         target = self.source[line_index]
 725         offset = result['column'] - 1
 726
 727         fixed = fix_whitespace(target,
 728                                offset=offset,
 729                                replacement='')
 730
 731         self.source[line_index] = fixed
 732
 733     def fix_e224(self, result):
 734         """Remove extraneous whitespace around operator."""
 735         target = self.source[result['line'] - 1]
 736         offset = result['column'] - 1
 737         fixed = target[:offset] + target[offset:].replace('\t', ' ')
 738         self.source[result['line'] - 1] = fixed
 739
 740     def fix_e225(self, result):
 741         """Fix missing whitespace around operator."""
 742         target = self.source[result['line'] - 1]
 743         offset = result['column'] - 1
 744         fixed = target[:offset] + ' ' + target[offset:]
 745
 746         # Only proceed if non-whitespace characters match.
 747         # And make sure we don't break the indentation.
 748         if (
 749             fixed.replace(' ', '') == target.replace(' ', '') and
 750             _get_indentation(fixed) == _get_indentation(target)
 751         ):
 752             self.source[result['line'] - 1] = fixed
 753             error_code = result.get('id', 0)
 754             try:
 755                 ts = generate_tokens(fixed)
 756             except (SyntaxError, tokenize.TokenError):
 757                 return
 758             if not check_syntax(fixed.lstrip()):
 759                 return
 760             try:
 761                 _missing_whitespace = (
 762                     pycodestyle.missing_whitespace_around_operator
 763                 )
 764             except AttributeError:
 765                 # pycodestyle >= 2.11.0
 766                 _missing_whitespace = pycodestyle.missing_whitespace
 767             errors = list(_missing_whitespace(fixed, ts))
 768             for e in reversed(errors):
 769                 if error_code != e[1].split()[0]:
 770                     continue
 771                 offset = e[0][1]
 772                 fixed = fixed[:offset] + ' ' + fixed[offset:]
 773             self.source[result['line'] - 1] = fixed
 774         else:
 775             return []
 776
 777     def fix_e231(self, result):
 778         """Add missing whitespace."""
 779         line_index = result['line'] - 1
 780         target = self.source[line_index]
 781         offset = result['column']
 782         fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
 783         self.source[line_index] = fixed
 784
 785     def fix_e251(self, result):
 786         """Remove whitespace around parameter '=' sign."""
 787         line_index = result['line'] - 1
 788         target = self.source[line_index]
 789
 790         # This is necessary since pycodestyle sometimes reports columns that
 791         # goes past the end of the physical line. This happens in cases like,
 792         # foo(bar\n=None)
 793         c = min(result['column'] - 1,
 794                 len(target) - 1)
 795
 796         if target[c].strip():
 797             fixed = target
 798         else:
 799             fixed = target[:c].rstrip() + target[c:].lstrip()
 800
 801         # There could be an escaped newline
 802         #
 803         #     def foo(a=\
 804         #             1)
 805         if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
 806             self.source[line_index] = fixed.rstrip('\n\r \t\\')
 807             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
 808             return [line_index + 1, line_index + 2]  # Line indexed at 1
 809
 810         self.source[result['line'] - 1] = fixed
 811
 812     def fix_e262(self, result):
 813         """Fix spacing after inline comment hash."""
 814         target = self.source[result['line'] - 1]
 815         offset = result['column']
 816
 817         code = target[:offset].rstrip(' \t#')
 818         comment = target[offset:].lstrip(' \t#')
 819
 820         fixed = code + ('  # ' + comment if comment.strip() else '\n')
 821
 822         self.source[result['line'] - 1] = fixed
 823
 824     def fix_e265(self, result):
 825         """Fix spacing after block comment hash."""
 826         target = self.source[result['line'] - 1]
 827
 828         indent = _get_indentation(target)
 829         line = target.lstrip(' \t')
 830         pos = next((index for index, c in enumerate(line) if c != '#'))
 831         hashes = line[:pos]
 832         comment = line[pos:].lstrip(' \t')
 833
 834         # Ignore special comments, even in the middle of the file.
 835         if comment.startswith('!'):
 836             return
 837
 838         fixed = indent + hashes + (' ' + comment if comment.strip() else '\n')
 839
 840         self.source[result['line'] - 1] = fixed
 841
 842     def fix_e266(self, result):
 843         """Fix too many block comment hashes."""
 844         target = self.source[result['line'] - 1]
 845
 846         # Leave stylistic outlined blocks alone.
 847         if target.strip().endswith('#'):
 848             return
 849
 850         indentation = _get_indentation(target)
 851         fixed = indentation + '# ' + target.lstrip('# \t')
 852
 853         self.source[result['line'] - 1] = fixed
 854
 855     def fix_e271(self, result):
 856         """Fix extraneous whitespace around keywords."""
 857         line_index = result['line'] - 1
 858         target = self.source[line_index]
 859         offset = result['column'] - 1
 860
 861         fixed = fix_whitespace(target,
 862                                offset=offset,
 863                                replacement=' ')
 864
 865         if fixed == target:
 866             return []
 867         else:
 868             self.source[line_index] = fixed
 869
 870     def fix_e301(self, result):
 871         """Add missing blank line."""
 872         cr = '\n'
 873         self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
 874
 875     def fix_e302(self, result):
 876         """Add missing 2 blank lines."""
 877         add_linenum = 2 - int(result['info'].split()[-1])
 878         offset = 1
 879         if self.source[result['line'] - 2].strip() == "\\":
 880             offset = 2
 881         cr = '\n' * add_linenum
 882         self.source[result['line'] - offset] = (
 883             cr + self.source[result['line'] - offset]
 884         )
 885
 886     def fix_e303(self, result):
 887         """Remove extra blank lines."""
 888         delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
 889         delete_linenum = max(1, delete_linenum)
 890
 891         # We need to count because pycodestyle reports an offset line number if
 892         # there are comments.
 893         cnt = 0
 894         line = result['line'] - 2
 895         modified_lines = []
 896         while cnt < delete_linenum and line >= 0:
 897             if not self.source[line].strip():
 898                 self.source[line] = ''
 899                 modified_lines.append(1 + line)  # Line indexed at 1
 900                 cnt += 1
 901             line -= 1
 902
 903         return modified_lines
 904
 905     def fix_e304(self, result):
 906         """Remove blank line following function decorator."""
 907         line = result['line'] - 2
 908         if not self.source[line].strip():
 909             self.source[line] = ''
 910
 911     def fix_e305(self, result):
 912         """Add missing 2 blank lines after end of function or class."""
 913         add_delete_linenum = 2 - int(result['info'].split()[-1])
 914         cnt = 0
 915         offset = result['line'] - 2
 916         modified_lines = []
 917         if add_delete_linenum < 0:
 918             # delete cr
 919             add_delete_linenum = abs(add_delete_linenum)
 920             while cnt < add_delete_linenum and offset >= 0:
 921                 if not self.source[offset].strip():
 922                     self.source[offset] = ''
 923                     modified_lines.append(1 + offset)  # Line indexed at 1
 924                     cnt += 1
 925                 offset -= 1
 926         else:
 927             # add cr
 928             cr = '\n'
 929             # check comment line
 930             while True:
 931                 if offset < 0:
 932                     break
 933                 line = self.source[offset].lstrip()
 934                 if not line:
 935                     break
 936                 if line[0] != '#':
 937                     break
 938                 offset -= 1
 939             offset += 1
 940             self.source[offset] = cr + self.source[offset]
 941             modified_lines.append(1 + offset)   # Line indexed at 1.
 942         return modified_lines
 943
 944     def fix_e401(self, result):
 945         """Put imports on separate lines."""
 946         line_index = result['line'] - 1
 947         target = self.source[line_index]
 948         offset = result['column'] - 1
 949
 950         if not target.lstrip().startswith('import'):
 951             return []
 952
 953         indentation = re.split(pattern=r'\bimport\b',
 954                                string=target, maxsplit=1)[0]
 955         fixed = (target[:offset].rstrip('\t ,') + '\n' +
 956                  indentation + 'import ' + target[offset:].lstrip('\t ,'))
 957         self.source[line_index] = fixed
 958
 959     def fix_e402(self, result):
 960         (line_index, offset, target) = get_index_offset_contents(result,
 961                                                                  self.source)
 962         for i in range(1, 100):
 963             line = "".join(self.source[line_index:line_index+i])
 964             try:
 965                 generate_tokens("".join(line))
 966             except (SyntaxError, tokenize.TokenError):
 967                 continue
 968             break
 969         if not (target in self.imports and self.imports[target] != line_index):
 970             mod_offset = get_module_imports_on_top_of_file(self.source,
 971                                                            line_index)
 972             self.source[mod_offset] = line + self.source[mod_offset]
 973         for offset in range(i):
 974             self.source[line_index+offset] = ''
 975
 976     def fix_long_line_logically(self, result, logical):
 977         """Try to make lines fit within --max-line-length characters."""
 978         if (
 979             not logical or
 980             len(logical[2]) == 1 or
 981             self.source[result['line'] - 1].lstrip().startswith('#')
 982         ):
 983             return self.fix_long_line_physically(result)
 984
 985         start_line_index = logical[0][0]
 986         end_line_index = logical[1][0]
 987         logical_lines = logical[2]
 988
 989         previous_line = get_item(self.source, start_line_index - 1, default='')
 990         next_line = get_item(self.source, end_line_index + 1, default='')
 991
 992         single_line = join_logical_line(''.join(logical_lines))
 993
 994         try:
 995             fixed = self.fix_long_line(
 996                 target=single_line,
 997                 previous_line=previous_line,
 998                 next_line=next_line,
 999                 original=''.join(logical_lines))
1000         except (SyntaxError, tokenize.TokenError):
1001             return self.fix_long_line_physically(result)
1002
1003         if fixed:
1004             for line_index in range(start_line_index, end_line_index + 1):
1005                 self.source[line_index] = ''
1006             self.source[start_line_index] = fixed
1007             return range(start_line_index + 1, end_line_index + 1)
1008
1009         return []
1010
1011     def fix_long_line_physically(self, result):
1012         """Try to make lines fit within --max-line-length characters."""
1013         line_index = result['line'] - 1
1014         target = self.source[line_index]
1015
1016         previous_line = get_item(self.source, line_index - 1, default='')
1017         next_line = get_item(self.source, line_index + 1, default='')
1018
1019         try:
1020             fixed = self.fix_long_line(
1021                 target=target,
1022                 previous_line=previous_line,
1023                 next_line=next_line,
1024                 original=target)
1025         except (SyntaxError, tokenize.TokenError):
1026             return []
1027
1028         if fixed:
1029             self.source[line_index] = fixed
1030             return [line_index + 1]
1031
1032         return []
1033
1034     def fix_long_line(self, target, previous_line,
1035                       next_line, original):
1036         cache_entry = (target, previous_line, next_line)
1037         if cache_entry in self.long_line_ignore_cache:
1038             return []
1039
1040         if target.lstrip().startswith('#'):
1041             if self.options.aggressive:
1042                 # Wrap commented lines.
1043                 return shorten_comment(
1044                     line=target,
1045                     max_line_length=self.options.max_line_length,
1046                     last_comment=not next_line.lstrip().startswith('#'))
1047             return []
1048
1049         fixed = get_fixed_long_line(
1050             target=target,
1051             previous_line=previous_line,
1052             original=original,
1053             indent_word=self.indent_word,
1054             max_line_length=self.options.max_line_length,
1055             aggressive=self.options.aggressive,
1056             experimental=self.options.experimental,
1057             verbose=self.options.verbose)
1058
1059         if fixed and not code_almost_equal(original, fixed):
1060             return fixed
1061
1062         self.long_line_ignore_cache.add(cache_entry)
1063         return None
1064
1065     def fix_e502(self, result):
1066         """Remove extraneous escape of newline."""
1067         (line_index, _, target) = get_index_offset_contents(result,
1068                                                             self.source)
1069         self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
1070
1071     def fix_e701(self, result):
1072         """Put colon-separated compound statement on separate lines."""
1073         line_index = result['line'] - 1
1074         target = self.source[line_index]
1075         c = result['column']
1076
1077         fixed_source = (target[:c] + '\n' +
1078                         _get_indentation(target) + self.indent_word +
1079                         target[c:].lstrip('\n\r \t\\'))
1080         self.source[result['line'] - 1] = fixed_source
1081         return [result['line'], result['line'] + 1]
1082
1083     def fix_e702(self, result, logical):
1084         """Put semicolon-separated compound statement on separate lines."""
1085         if not logical:
1086             return []  # pragma: no cover
1087         logical_lines = logical[2]
1088
1089         # Avoid applying this when indented.
1090         # https://docs.python.org/reference/compound_stmts.html
1091         for line in logical_lines:
1092             if (result['id'] == 'E702' and ':' in line
1093                     and STARTSWITH_INDENT_STATEMENT_REGEX.match(line)):
1094                 if self.options.verbose:
1095                     print(
1096                         '---> avoid fixing {error} with '
1097                         'other compound statements'.format(error=result['id']),
1098                         file=sys.stderr
1099                     )
1100                 return []
1101
1102         line_index = result['line'] - 1
1103         target = self.source[line_index]
1104
1105         if target.rstrip().endswith('\\'):
1106             # Normalize '1; \\\n2' into '1; 2'.
1107             self.source[line_index] = target.rstrip('\n \r\t\\')
1108             self.source[line_index + 1] = self.source[line_index + 1].lstrip()
1109             return [line_index + 1, line_index + 2]
1110
1111         if target.rstrip().endswith(';'):
1112             self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
1113             return [line_index + 1]
1114
1115         offset = result['column'] - 1
1116         first = target[:offset].rstrip(';').rstrip()
1117         second = (_get_indentation(logical_lines[0]) +
1118                   target[offset:].lstrip(';').lstrip())
1119
1120         # Find inline comment.
1121         inline_comment = None
1122         if target[offset:].lstrip(';').lstrip()[:2] == '# ':
1123             inline_comment = target[offset:].lstrip(';')
1124
1125         if inline_comment:
1126             self.source[line_index] = first + inline_comment
1127         else:
1128             self.source[line_index] = first + '\n' + second
1129         return [line_index + 1]
1130
1131     def fix_e704(self, result):
1132         """Fix multiple statements on one line def"""
1133         (line_index, _, target) = get_index_offset_contents(result,
1134                                                             self.source)
1135         match = STARTSWITH_DEF_REGEX.match(target)
1136         if match:
1137             self.source[line_index] = '{}\n{}{}'.format(
1138                 match.group(0),
1139                 _get_indentation(target) + self.indent_word,
1140                 target[match.end(0):].lstrip())
1141
1142     def fix_e711(self, result):
1143         """Fix comparison with None."""
1144         (line_index, offset, target) = get_index_offset_contents(result,
1145                                                                  self.source)
1146
1147         right_offset = offset + 2
1148         if right_offset >= len(target):
1149             return []
1150
1151         left = target[:offset].rstrip()
1152         center = target[offset:right_offset]
1153         right = target[right_offset:].lstrip()
1154
1155         if center.strip() == '==':
1156             new_center = 'is'
1157         elif center.strip() == '!=':
1158             new_center = 'is not'
1159         else:
1160             return []
1161
1162         self.source[line_index] = ' '.join([left, new_center, right])
1163
1164     def fix_e712(self, result):
1165         """Fix (trivial case of) comparison with boolean."""
1166         (line_index, offset, target) = get_index_offset_contents(result,
1167                                                                  self.source)
1168
1169         # Handle very easy "not" special cases.
1170         if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
1171             self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
1172                                              r'if not \1:', target, count=1)
1173         elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
1174             self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
1175                                              r'if not \1:', target, count=1)
1176         else:
1177             right_offset = offset + 2
1178             if right_offset >= len(target):
1179                 return []
1180
1181             left = target[:offset].rstrip()
1182             center = target[offset:right_offset]
1183             right = target[right_offset:].lstrip()
1184
1185             # Handle simple cases only.
1186             new_right = None
1187             if center.strip() == '==':
1188                 if re.match(r'\bTrue\b', right):
1189                     new_right = re.sub(r'\bTrue\b *', '', right, count=1)
1190             elif center.strip() == '!=':
1191                 if re.match(r'\bFalse\b', right):
1192                     new_right = re.sub(r'\bFalse\b *', '', right, count=1)
1193
1194             if new_right is None:
1195                 return []
1196
1197             if new_right[0].isalnum():
1198                 new_right = ' ' + new_right
1199
1200             self.source[line_index] = left + new_right
1201
1202     def fix_e713(self, result):
1203         """Fix (trivial case of) non-membership check."""
1204         (line_index, offset, target) = get_index_offset_contents(result,
1205                                                                  self.source)
1206
1207         # to convert once 'not in' -> 'in'
1208         before_target = target[:offset]
1209         target = target[offset:]
1210         match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1211         notin_pos_start, notin_pos_end = 0, 0
1212         if match_notin:
1213             notin_pos_start = match_notin.start(1)
1214             notin_pos_end = match_notin.end()
1215             target = '{}{} {}'.format(
1216                 target[:notin_pos_start], 'in', target[notin_pos_end:])
1217
1218         # fix 'not in'
1219         match = COMPARE_NEGATIVE_REGEX.search(target)
1220         if match:
1221             if match.group(3) == 'in':
1222                 pos_start = match.start(1)
1223                 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1224                     target[:pos_start], match.group(2), match.group(1),
1225                     match.group(3), target[match.end():], before_target)
1226                 if match_notin:
1227                     # revert 'in' -> 'not in'
1228                     pos_start = notin_pos_start + offset
1229                     pos_end = notin_pos_end + offset - 4     # len('not ')
1230                     new_target = '{}{} {}'.format(
1231                         new_target[:pos_start], 'not in', new_target[pos_end:])
1232                 self.source[line_index] = new_target
1233
1234     def fix_e714(self, result):
1235         """Fix object identity should be 'is not' case."""
1236         (line_index, offset, target) = get_index_offset_contents(result,
1237                                                                  self.source)
1238
1239         # to convert once 'is not' -> 'is'
1240         before_target = target[:offset]
1241         target = target[offset:]
1242         match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
1243         isnot_pos_start, isnot_pos_end = 0, 0
1244         if match_isnot:
1245             isnot_pos_start = match_isnot.start(1)
1246             isnot_pos_end = match_isnot.end()
1247             target = '{}{} {}'.format(
1248                 target[:isnot_pos_start], 'in', target[isnot_pos_end:])
1249
1250         match = COMPARE_NEGATIVE_REGEX.search(target)
1251         if match:
1252             if match.group(3).startswith('is'):
1253                 pos_start = match.start(1)
1254                 new_target = '{5}{0}{1} {2} {3} {4}'.format(
1255                     target[:pos_start], match.group(2), match.group(3),
1256                     match.group(1), target[match.end():], before_target)
1257                 if match_isnot:
1258                     # revert 'is' -> 'is not'
1259                     pos_start = isnot_pos_start + offset
1260                     pos_end = isnot_pos_end + offset - 4     # len('not ')
1261                     new_target = '{}{} {}'.format(
1262                         new_target[:pos_start], 'is not', new_target[pos_end:])
1263                 self.source[line_index] = new_target
1264
1265     def fix_e722(self, result):
1266         """fix bare except"""
1267         (line_index, _, target) = get_index_offset_contents(result,
1268                                                             self.source)
1269         match = BARE_EXCEPT_REGEX.search(target)
1270         if match:
1271             self.source[line_index] = '{}{}{}'.format(
1272                 target[:result['column'] - 1], "except BaseException:",
1273                 target[match.end():])
1274
1275     def fix_e731(self, result):
1276         """Fix do not assign a lambda expression check."""
1277         (line_index, _, target) = get_index_offset_contents(result,
1278                                                             self.source)
1279         match = LAMBDA_REGEX.search(target)
1280         if match:
1281             end = match.end()
1282             self.source[line_index] = '{}def {}({}): return {}'.format(
1283                 target[:match.start(0)], match.group(1), match.group(2),
1284                 target[end:].lstrip())
1285
1286     def fix_w291(self, result):
1287         """Remove trailing whitespace."""
1288         fixed_line = self.source[result['line'] - 1].rstrip()
1289         self.source[result['line'] - 1] = fixed_line + '\n'
1290
1291     def fix_w391(self, _):
1292         """Remove trailing blank lines."""
1293         blank_count = 0
1294         for line in reversed(self.source):
1295             line = line.rstrip()
1296             if line:
1297                 break
1298             else:
1299                 blank_count += 1
1300
1301         original_length = len(self.source)
1302         self.source = self.source[:original_length - blank_count]
1303         return range(1, 1 + original_length)
1304
1305     def fix_w503(self, result):
1306         (line_index, _, target) = get_index_offset_contents(result,
1307                                                             self.source)
1308         one_string_token = target.split()[0]
1309         try:
1310             ts = generate_tokens(one_string_token)
1311         except (SyntaxError, tokenize.TokenError):
1312             return
1313         if not _is_binary_operator(ts[0][0], one_string_token):
1314             return
1315         # find comment
1316         comment_index = 0
1317         found_not_comment_only_line = False
1318         comment_only_linenum = 0
1319         for i in range(5):
1320             # NOTE: try to parse code in 5 times
1321             if (line_index - i) < 0:
1322                 break
1323             from_index = line_index - i - 1
1324             if from_index < 0 or len(self.source) <= from_index:
1325                 break
1326             to_index = line_index + 1
1327             strip_line = self.source[from_index].lstrip()
1328             if (
1329                 not found_not_comment_only_line and
1330                 strip_line and strip_line[0] == '#'
1331             ):
1332                 comment_only_linenum += 1
1333                 continue
1334             found_not_comment_only_line = True
1335             try:
1336                 ts = generate_tokens("".join(self.source[from_index:to_index]))
1337             except (SyntaxError, tokenize.TokenError):
1338                 continue
1339             newline_count = 0
1340             newline_index = []
1341             for index, t in enumerate(ts):
1342                 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1343                     newline_index.append(index)
1344                     newline_count += 1
1345             if newline_count > 2:
1346                 tts = ts[newline_index[-3]:]
1347             else:
1348                 tts = ts
1349             old = []
1350             for t in tts:
1351                 if t[0] in (tokenize.NEWLINE, tokenize.NL):
1352                     newline_count -= 1
1353                 if newline_count <= 1:
1354                     break
1355                 if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
1356                     comment_index = old[3][1]
1357                     break
1358                 old = t
1359             break
1360         i = target.index(one_string_token)
1361         fix_target_line = line_index - 1 - comment_only_linenum
1362         self.source[line_index] = '{}{}'.format(
1363             target[:i], target[i + len(one_string_token):].lstrip())
1364         nl = find_newline(self.source[fix_target_line:line_index])
1365         before_line = self.source[fix_target_line]
1366         bl = before_line.index(nl)
1367         if comment_index:
1368             self.source[fix_target_line] = '{} {} {}'.format(
1369                 before_line[:comment_index], one_string_token,
1370                 before_line[comment_index + 1:])
1371         else:
1372             if before_line[:bl].endswith("#"):
1373                 # special case
1374                 # see: https://github.com/hhatto/autopep8/issues/503
1375                 self.source[fix_target_line] = '{}{} {}'.format(
1376                     before_line[:bl-2], one_string_token, before_line[bl-2:])
1377             else:
1378                 self.source[fix_target_line] = '{} {}{}'.format(
1379                     before_line[:bl], one_string_token, before_line[bl:])
1380
1381     def fix_w504(self, result):
1382         (line_index, _, target) = get_index_offset_contents(result,
1383                                                             self.source)
1384         # NOTE: is not collect pointed out in pycodestyle==2.4.0
1385         comment_index = 0
1386         operator_position = None    # (start_position, end_position)
1387         for i in range(1, 6):
1388             to_index = line_index + i
1389             try:
1390                 ts = generate_tokens("".join(self.source[line_index:to_index]))
1391             except (SyntaxError, tokenize.TokenError):
1392                 continue
1393             newline_count = 0
1394             newline_index = []
1395             for index, t in enumerate(ts):
1396                 if _is_binary_operator(t[0], t[1]):
1397                     if t[2][0] == 1 and t[3][0] == 1:
1398                         operator_position = (t[2][1], t[3][1])
1399                 elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
1400                     if t[2][0] == 1 and t[3][0] == 1:
1401                         operator_position = (t[2][1], t[3][1])
1402                 elif t[0] in (tokenize.NEWLINE, tokenize.NL):
1403                     newline_index.append(index)
1404                     newline_count += 1
1405             if newline_count > 2:
1406                 tts = ts[:newline_index[-3]]
1407             else:
1408                 tts = ts
1409             old = []
1410             for t in tts:
1411                 if tokenize.COMMENT == t[0] and old:
1412                     comment_row, comment_index = old[3]
1413                     break
1414                 old = t
1415             break
1416         if not operator_position:
1417             return
1418         target_operator = target[operator_position[0]:operator_position[1]]
1419
1420         if comment_index and comment_row == 1:
1421             self.source[line_index] = '{}{}'.format(
1422                 target[:operator_position[0]].rstrip(),
1423                 target[comment_index:])
1424         else:
1425             self.source[line_index] = '{}{}{}'.format(
1426                 target[:operator_position[0]].rstrip(),
1427                 target[operator_position[1]:].lstrip(),
1428                 target[operator_position[1]:])
1429
1430         next_line = self.source[line_index + 1]
1431         next_line_indent = 0
1432         m = re.match(r'\s*', next_line)
1433         if m:
1434             next_line_indent = m.span()[1]
1435         self.source[line_index + 1] = '{}{} {}'.format(
1436             next_line[:next_line_indent], target_operator,
1437             next_line[next_line_indent:])
1438
1439     def fix_w605(self, result):
1440         (line_index, offset, target) = get_index_offset_contents(result,
1441                                                                  self.source)
1442         self.source[line_index] = '{}\\{}'.format(
1443             target[:offset + 1], target[offset + 1:])
1444
1445
1446 def get_module_imports_on_top_of_file(source, import_line_index):
1447     """return import or from keyword position
1448
1449     example:
1450       > 0: import sys
1451         1: import os
1452         2:
1453         3: def function():
1454     """
1455     def is_string_literal(line):
1456         if line[0] in 'uUbB':
1457             line = line[1:]
1458         if line and line[0] in 'rR':
1459             line = line[1:]
1460         return line and (line[0] == '"' or line[0] == "'")
1461
1462     def is_future_import(line):
1463         nodes = ast.parse(line)
1464         for n in nodes.body:
1465             if isinstance(n, ast.ImportFrom) and n.module == '__future__':
1466                 return True
1467         return False
1468
1469     def has_future_import(source):
1470         offset = 0
1471         line = ''
1472         for _, next_line in source:
1473             for line_part in next_line.strip().splitlines(True):
1474                 line = line + line_part
1475                 try:
1476                     return is_future_import(line), offset
1477                 except SyntaxError:
1478                     continue
1479             offset += 1
1480         return False, offset
1481
1482     allowed_try_keywords = ('try', 'except', 'else', 'finally')
1483     in_docstring = False
1484     docstring_kind = '"""'
1485     source_stream = iter(enumerate(source))
1486     for cnt, line in source_stream:
1487         if not in_docstring:
1488             m = DOCSTRING_START_REGEX.match(line.lstrip())
1489             if m is not None:
1490                 in_docstring = True
1491                 docstring_kind = m.group('kind')
1492                 remain = line[m.end(): m.endpos].rstrip()
1493                 if remain[-3:] == docstring_kind:  # one line doc
1494                     in_docstring = False
1495                 continue
1496         if in_docstring:
1497             if line.rstrip()[-3:] == docstring_kind:
1498                 in_docstring = False
1499             continue
1500
1501         if not line.rstrip():
1502             continue
1503         elif line.startswith('#'):
1504             continue
1505
1506         if line.startswith('import '):
1507             if cnt == import_line_index:
1508                 continue
1509             return cnt
1510         elif line.startswith('from '):
1511             if cnt == import_line_index:
1512                 continue
1513             hit, offset = has_future_import(
1514                 itertools.chain([(cnt, line)], source_stream)
1515             )
1516             if hit:
1517                 # move to the back
1518                 return cnt + offset + 1
1519             return cnt
1520         elif pycodestyle.DUNDER_REGEX.match(line):
1521             return cnt
1522         elif any(line.startswith(kw) for kw in allowed_try_keywords):
1523             continue
1524         elif is_string_literal(line):
1525             return cnt
1526         else:
1527             return cnt
1528     return 0
1529
1530
1531 def get_index_offset_contents(result, source):
1532     """Return (line_index, column_offset, line_contents)."""
1533     line_index = result['line'] - 1
1534     return (line_index,
1535             result['column'] - 1,
1536             source[line_index])
1537
1538
1539 def get_fixed_long_line(target, previous_line, original,
1540                         indent_word='    ', max_line_length=79,
1541                         aggressive=False, experimental=False, verbose=False):
1542     """Break up long line and return result.
1543
1544     Do this by generating multiple reformatted candidates and then
1545     ranking the candidates to heuristically select the best option.
1546
1547     """
1548     indent = _get_indentation(target)
1549     source = target[len(indent):]
1550     assert source.lstrip() == source
1551     assert not target.lstrip().startswith('#')
1552
1553     # Check for partial multiline.
1554     tokens = list(generate_tokens(source))
1555
1556     candidates = shorten_line(
1557         tokens, source, indent,
1558         indent_word,
1559         max_line_length,
1560         aggressive=aggressive,
1561         experimental=experimental,
1562         previous_line=previous_line)
1563
1564     # Also sort alphabetically as a tie breaker (for determinism).
1565     candidates = sorted(
1566         sorted(set(candidates).union([target, original])),
1567         key=lambda x: line_shortening_rank(
1568             x,
1569             indent_word,
1570             max_line_length,
1571             experimental=experimental))
1572
1573     if verbose >= 4:
1574         print(('-' * 79 + '\n').join([''] + candidates + ['']),
1575               file=wrap_output(sys.stderr, 'utf-8'))
1576
1577     if candidates:
1578         best_candidate = candidates[0]
1579
1580         # Don't allow things to get longer.
1581         if longest_line_length(best_candidate) > longest_line_length(original):
1582             return None
1583
1584         return best_candidate
1585
1586
1587 def longest_line_length(code):
1588     """Return length of longest line."""
1589     if len(code) == 0:
1590         return 0
1591     return max(len(line) for line in code.splitlines())
1592
1593
1594 def join_logical_line(logical_line):
1595     """Return single line based on logical line input."""
1596     indentation = _get_indentation(logical_line)
1597
1598     return indentation + untokenize_without_newlines(
1599         generate_tokens(logical_line.lstrip())) + '\n'
1600
1601
1602 def untokenize_without_newlines(tokens):
1603     """Return source code based on tokens."""
1604     text = ''
1605     last_row = 0
1606     last_column = -1
1607
1608     for t in tokens:
1609         token_string = t[1]
1610         (start_row, start_column) = t[2]
1611         (end_row, end_column) = t[3]
1612
1613         if start_row > last_row:
1614             last_column = 0
1615         if (
1616             (start_column > last_column or token_string == '\n') and
1617             not text.endswith(' ')
1618         ):
1619             text += ' '
1620
1621         if token_string != '\n':
1622             text += token_string
1623
1624         last_row = end_row
1625         last_column = end_column
1626
1627     return text.rstrip()
1628
1629
1630 def _find_logical(source_lines):
1631     # Make a variable which is the index of all the starts of lines.
1632     logical_start = []
1633     logical_end = []
1634     last_newline = True
1635     parens = 0
1636     for t in generate_tokens(''.join(source_lines)):
1637         if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1638                     tokenize.INDENT, tokenize.NL,
1639                     tokenize.ENDMARKER]:
1640             continue
1641         if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1642             last_newline = True
1643             logical_end.append((t[3][0] - 1, t[2][1]))
1644             continue
1645         if last_newline and not parens:
1646             logical_start.append((t[2][0] - 1, t[2][1]))
1647             last_newline = False
1648         if t[0] == tokenize.OP:
1649             if t[1] in '([{':
1650                 parens += 1
1651             elif t[1] in '}])':
1652                 parens -= 1
1653     return (logical_start, logical_end)
1654
1655
1656 def _get_logical(source_lines, result, logical_start, logical_end):
1657     """Return the logical line corresponding to the result.
1658
1659     Assumes input is already E702-clean.
1660
1661     """
1662     row = result['line'] - 1
1663     col = result['column'] - 1
1664     ls = None
1665     le = None
1666     for i in range(0, len(logical_start), 1):
1667         assert logical_end
1668         x = logical_end[i]
1669         if x[0] > row or (x[0] == row and x[1] > col):
1670             le = x
1671             ls = logical_start[i]
1672             break
1673     if ls is None:
1674         return None
1675     original = source_lines[ls[0]:le[0] + 1]
1676     return ls, le, original
1677
1678
1679 def get_item(items, index, default=None):
1680     if 0 <= index < len(items):
1681         return items[index]
1682
1683     return default
1684
1685
1686 def reindent(source, indent_size, leave_tabs=False):
1687     """Reindent all lines."""
1688     reindenter = Reindenter(source, leave_tabs)
1689     return reindenter.run(indent_size)
1690
1691
1692 def code_almost_equal(a, b):
1693     """Return True if code is similar.
1694
1695     Ignore whitespace when comparing specific line.
1696
1697     """
1698     split_a = split_and_strip_non_empty_lines(a)
1699     split_b = split_and_strip_non_empty_lines(b)
1700
1701     if len(split_a) != len(split_b):
1702         return False
1703
1704     for (index, _) in enumerate(split_a):
1705         if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1706             return False
1707
1708     return True
1709
1710
1711 def split_and_strip_non_empty_lines(text):
1712     """Return lines split by newline.
1713
1714     Ignore empty lines.
1715
1716     """
1717     return [line.strip() for line in text.splitlines() if line.strip()]
1718
1719
1720 def refactor(source, fixer_names, ignore=None, filename=''):
1721     """Return refactored code using lib2to3.
1722
1723     Skip if ignore string is produced in the refactored code.
1724
1725     """
1726     not_found_end_of_file_newline = source and source.rstrip("\r\n") == source
1727     if not_found_end_of_file_newline:
1728         input_source = source + "\n"
1729     else:
1730         input_source = source
1731
1732     from lib2to3 import pgen2
1733     try:
1734         new_text = refactor_with_2to3(input_source,
1735                                       fixer_names=fixer_names,
1736                                       filename=filename)
1737     except (pgen2.parse.ParseError,
1738             SyntaxError,
1739             UnicodeDecodeError,
1740             UnicodeEncodeError):
1741         return source
1742
1743     if ignore:
1744         if ignore in new_text and ignore not in source:
1745             return source
1746
1747     if not_found_end_of_file_newline:
1748         return new_text.rstrip("\r\n")
1749
1750     return new_text
1751
1752
1753 def code_to_2to3(select, ignore, where='', verbose=False):
1754     fixes = set()
1755     for code, fix in CODE_TO_2TO3.items():
1756         if code_match(code, select=select, ignore=ignore):
1757             if verbose:
1758                 print('--->  Applying {} fix for {}'.format(where,
1759                                                             code.upper()),
1760                       file=sys.stderr)
1761             fixes |= set(fix)
1762     return fixes
1763
1764
1765 def fix_2to3(source,
1766              aggressive=True, select=None, ignore=None, filename='',
1767              where='global', verbose=False):
1768     """Fix various deprecated code (via lib2to3)."""
1769     if not aggressive:
1770         return source
1771
1772     select = select or []
1773     ignore = ignore or []
1774
1775     return refactor(source,
1776                     code_to_2to3(select=select,
1777                                  ignore=ignore,
1778                                  where=where,
1779                                  verbose=verbose),
1780                     filename=filename)
1781
1782
1783 def find_newline(source):
1784     """Return type of newline used in source.
1785
1786     Input is a list of lines.
1787
1788     """
1789     assert not isinstance(source, str)
1790
1791     counter = collections.defaultdict(int)
1792     for line in source:
1793         if line.endswith(CRLF):
1794             counter[CRLF] += 1
1795         elif line.endswith(CR):
1796             counter[CR] += 1
1797         elif line.endswith(LF):
1798             counter[LF] += 1
1799
1800     return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1801
1802
1803 def _get_indentword(source):
1804     """Return indentation type."""
1805     indent_word = '    '  # Default in case source has no indentation
1806     try:
1807         for t in generate_tokens(source):
1808             if t[0] == token.INDENT:
1809                 indent_word = t[1]
1810                 break
1811     except (SyntaxError, tokenize.TokenError):
1812         pass
1813     return indent_word
1814
1815
1816 def _get_indentation(line):
1817     """Return leading whitespace."""
1818     if line.strip():
1819         non_whitespace_index = len(line) - len(line.lstrip())
1820         return line[:non_whitespace_index]
1821
1822     return ''
1823
1824
1825 def get_diff_text(old, new, filename):
1826     """Return text of unified diff between old and new."""
1827     newline = '\n'
1828     diff = difflib.unified_diff(
1829         old, new,
1830         'original/' + filename,
1831         'fixed/' + filename,
1832         lineterm=newline)
1833
1834     text = ''
1835     for line in diff:
1836         text += line
1837
1838         # Work around missing newline (http://bugs.python.org/issue2142).
1839         if text and not line.endswith(newline):
1840             text += newline + r'\ No newline at end of file' + newline
1841
1842     return text
1843
1844
1845 def _priority_key(pep8_result):
1846     """Key for sorting PEP8 results.
1847
1848     Global fixes should be done first. This is important for things like
1849     indentation.
1850
1851     """
1852     priority = [
1853         # Fix multiline colon-based before semicolon based.
1854         'e701',
1855         # Break multiline statements early.
1856         'e702',
1857         # Things that make lines longer.
1858         'e225', 'e231',
1859         # Remove extraneous whitespace before breaking lines.
1860         'e201',
1861         # Shorten whitespace in comment before resorting to wrapping.
1862         'e262'
1863     ]
1864     middle_index = 10000
1865     lowest_priority = [
1866         # We need to shorten lines last since the logical fixer can get in a
1867         # loop, which causes us to exit early.
1868         'e501',
1869     ]
1870     key = pep8_result['id'].lower()
1871     try:
1872         return priority.index(key)
1873     except ValueError:
1874         try:
1875             return middle_index + lowest_priority.index(key) + 1
1876         except ValueError:
1877             return middle_index
1878
1879
1880 def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1881                  aggressive=False, experimental=False, previous_line=''):
1882     """Separate line at OPERATOR.
1883
1884     Multiple candidates will be yielded.
1885
1886     """
1887     for candidate in _shorten_line(tokens=tokens,
1888                                    source=source,
1889                                    indentation=indentation,
1890                                    indent_word=indent_word,
1891                                    aggressive=aggressive,
1892                                    previous_line=previous_line):
1893         yield candidate
1894
1895     if aggressive:
1896         for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1897             shortened = _shorten_line_at_tokens(
1898                 tokens=tokens,
1899                 source=source,
1900                 indentation=indentation,
1901                 indent_word=indent_word,
1902                 key_token_strings=key_token_strings,
1903                 aggressive=aggressive)
1904
1905             if shortened is not None and shortened != source:
1906                 yield shortened
1907
1908     if experimental:
1909         for shortened in _shorten_line_at_tokens_new(
1910                 tokens=tokens,
1911                 source=source,
1912                 indentation=indentation,
1913                 max_line_length=max_line_length):
1914
1915             yield shortened
1916
1917
1918 def _shorten_line(tokens, source, indentation, indent_word,
1919                   aggressive=False, previous_line=''):
1920     """Separate line at OPERATOR.
1921
1922     The input is expected to be free of newlines except for inside multiline
1923     strings and at the end.
1924
1925     Multiple candidates will be yielded.
1926
1927     """
1928     for (token_type,
1929          token_string,
1930          start_offset,
1931          end_offset) in token_offsets(tokens):
1932
1933         if (
1934             token_type == tokenize.COMMENT and
1935             not is_probably_part_of_multiline(previous_line) and
1936             not is_probably_part_of_multiline(source) and
1937             not source[start_offset + 1:].strip().lower().startswith(
1938                 ('noqa', 'pragma:', 'pylint:'))
1939         ):
1940             # Move inline comments to previous line.
1941             first = source[:start_offset]
1942             second = source[start_offset:]
1943             yield (indentation + second.strip() + '\n' +
1944                    indentation + first.strip() + '\n')
1945         elif token_type == token.OP and token_string != '=':
1946             # Don't break on '=' after keyword as this violates PEP 8.
1947
1948             assert token_type != token.INDENT
1949
1950             first = source[:end_offset]
1951
1952             second_indent = indentation
1953             if (first.rstrip().endswith('(') and
1954                     source[end_offset:].lstrip().startswith(')')):
1955                 pass
1956             elif first.rstrip().endswith('('):
1957                 second_indent += indent_word
1958             elif '(' in first:
1959                 second_indent += ' ' * (1 + first.find('('))
1960             else:
1961                 second_indent += indent_word
1962
1963             second = (second_indent + source[end_offset:].lstrip())
1964             if (
1965                 not second.strip() or
1966                 second.lstrip().startswith('#')
1967             ):
1968                 continue
1969
1970             # Do not begin a line with a comma
1971             if second.lstrip().startswith(','):
1972                 continue
1973             # Do end a line with a dot
1974             if first.rstrip().endswith('.'):
1975                 continue
1976             if token_string in '+-*/':
1977                 fixed = first + ' \\' + '\n' + second
1978             else:
1979                 fixed = first + '\n' + second
1980
1981             # Only fix if syntax is okay.
1982             if check_syntax(normalize_multiline(fixed)
1983                             if aggressive else fixed):
1984                 yield indentation + fixed
1985
1986
1987 def _is_binary_operator(token_type, text):
1988     return ((token_type == tokenize.OP or text in ['and', 'or']) and
1989             text not in '()[]{},:.;@=%~')
1990
1991
1992 # A convenient way to handle tokens.
1993 Token = collections.namedtuple('Token', ['token_type', 'token_string',
1994                                          'spos', 'epos', 'line'])
1995
1996
1997 class ReformattedLines(object):
1998
1999     """The reflowed lines of atoms.
2000
2001     Each part of the line is represented as an "atom." They can be moved
2002     around when need be to get the optimal formatting.
2003
2004     """
2005
2006     ###########################################################################
2007     # Private Classes
2008
2009     class _Indent(object):
2010
2011         """Represent an indentation in the atom stream."""
2012
2013         def __init__(self, indent_amt):
2014             self._indent_amt = indent_amt
2015
2016         def emit(self):
2017             return ' ' * self._indent_amt
2018
2019         @property
2020         def size(self):
2021             return self._indent_amt
2022
2023     class _Space(object):
2024
2025         """Represent a space in the atom stream."""
2026
2027         def emit(self):
2028             return ' '
2029
2030         @property
2031         def size(self):
2032             return 1
2033
2034     class _LineBreak(object):
2035
2036         """Represent a line break in the atom stream."""
2037
2038         def emit(self):
2039             return '\n'
2040
2041         @property
2042         def size(self):
2043             return 0
2044
2045     def __init__(self, max_line_length):
2046         self._max_line_length = max_line_length
2047         self._lines = []
2048         self._bracket_depth = 0
2049         self._prev_item = None
2050         self._prev_prev_item = None
2051
2052     def __repr__(self):
2053         return self.emit()
2054
2055     ###########################################################################
2056     # Public Methods
2057
2058     def add(self, obj, indent_amt, break_after_open_bracket):
2059         if isinstance(obj, Atom):
2060             self._add_item(obj, indent_amt)
2061             return
2062
2063         self._add_container(obj, indent_amt, break_after_open_bracket)
2064
2065     def add_comment(self, item):
2066         num_spaces = 2
2067         if len(self._lines) > 1:
2068             if isinstance(self._lines[-1], self._Space):
2069                 num_spaces -= 1
2070             if len(self._lines) > 2:
2071                 if isinstance(self._lines[-2], self._Space):
2072                     num_spaces -= 1
2073
2074         while num_spaces > 0:
2075             self._lines.append(self._Space())
2076             num_spaces -= 1
2077         self._lines.append(item)
2078
2079     def add_indent(self, indent_amt):
2080         self._lines.append(self._Indent(indent_amt))
2081
2082     def add_line_break(self, indent):
2083         self._lines.append(self._LineBreak())
2084         self.add_indent(len(indent))
2085
2086     def add_line_break_at(self, index, indent_amt):
2087         self._lines.insert(index, self._LineBreak())
2088         self._lines.insert(index + 1, self._Indent(indent_amt))
2089
2090     def add_space_if_needed(self, curr_text, equal=False):
2091         if (
2092             not self._lines or isinstance(
2093                 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
2094         ):
2095             return
2096
2097         prev_text = str(self._prev_item)
2098         prev_prev_text = (
2099             str(self._prev_prev_item) if self._prev_prev_item else '')
2100
2101         if (
2102             # The previous item was a keyword or identifier and the current
2103             # item isn't an operator that doesn't require a space.
2104             ((self._prev_item.is_keyword or self._prev_item.is_string or
2105               self._prev_item.is_name or self._prev_item.is_number) and
2106              (curr_text[0] not in '([{.,:}])' or
2107               (curr_text[0] == '=' and equal))) or
2108
2109             # Don't place spaces around a '.', unless it's in an 'import'
2110             # statement.
2111             ((prev_prev_text != 'from' and prev_text[-1] != '.' and
2112               curr_text != 'import') and
2113
2114              # Don't place a space before a colon.
2115              curr_text[0] != ':' and
2116
2117              # Don't split up ending brackets by spaces.
2118              ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
2119
2120               # Put a space after a colon or comma.
2121               prev_text[-1] in ':,' or
2122
2123               # Put space around '=' if asked to.
2124               (equal and prev_text == '=') or
2125
2126               # Put spaces around non-unary arithmetic operators.
2127               ((self._prev_prev_item and
2128                 (prev_text not in '+-' and
2129                  (self._prev_prev_item.is_name or
2130                   self._prev_prev_item.is_number or
2131                   self._prev_prev_item.is_string)) and
2132                 prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
2133         ):
2134             self._lines.append(self._Space())
2135
2136     def previous_item(self):
2137         """Return the previous non-whitespace item."""
2138         return self._prev_item
2139
2140     def fits_on_current_line(self, item_extent):
2141         return self.current_size() + item_extent <= self._max_line_length
2142
2143     def current_size(self):
2144         """The size of the current line minus the indentation."""
2145         size = 0
2146         for item in reversed(self._lines):
2147             size += item.size
2148             if isinstance(item, self._LineBreak):
2149                 break
2150
2151         return size
2152
2153     def line_empty(self):
2154         return (self._lines and
2155                 isinstance(self._lines[-1],
2156                            (self._LineBreak, self._Indent)))
2157
2158     def emit(self):
2159         string = ''
2160         for item in self._lines:
2161             if isinstance(item, self._LineBreak):
2162                 string = string.rstrip()
2163             string += item.emit()
2164
2165         return string.rstrip() + '\n'
2166
2167     ###########################################################################
2168     # Private Methods
2169
2170     def _add_item(self, item, indent_amt):
2171         """Add an item to the line.
2172
2173         Reflow the line to get the best formatting after the item is
2174         inserted. The bracket depth indicates if the item is being
2175         inserted inside of a container or not.
2176
2177         """
2178         if self._prev_item and self._prev_item.is_string and item.is_string:
2179             # Place consecutive string literals on separate lines.
2180             self._lines.append(self._LineBreak())
2181             self._lines.append(self._Indent(indent_amt))
2182
2183         item_text = str(item)
2184         if self._lines and self._bracket_depth:
2185             # Adding the item into a container.
2186             self._prevent_default_initializer_splitting(item, indent_amt)
2187
2188             if item_text in '.,)]}':
2189                 self._split_after_delimiter(item, indent_amt)
2190
2191         elif self._lines and not self.line_empty():
2192             # Adding the item outside of a container.
2193             if self.fits_on_current_line(len(item_text)):
2194                 self._enforce_space(item)
2195
2196             else:
2197                 # Line break for the new item.
2198                 self._lines.append(self._LineBreak())
2199                 self._lines.append(self._Indent(indent_amt))
2200
2201         self._lines.append(item)
2202         self._prev_item, self._prev_prev_item = item, self._prev_item
2203
2204         if item_text in '([{':
2205             self._bracket_depth += 1
2206
2207         elif item_text in '}])':
2208             self._bracket_depth -= 1
2209             assert self._bracket_depth >= 0
2210
2211     def _add_container(self, container, indent_amt, break_after_open_bracket):
2212         actual_indent = indent_amt + 1
2213
2214         if (
2215             str(self._prev_item) != '=' and
2216             not self.line_empty() and
2217             not self.fits_on_current_line(
2218                 container.size + self._bracket_depth + 2)
2219         ):
2220
2221             if str(container)[0] == '(' and self._prev_item.is_name:
2222                 # Don't split before the opening bracket of a call.
2223                 break_after_open_bracket = True
2224                 actual_indent = indent_amt + 4
2225             elif (
2226                 break_after_open_bracket or
2227                 str(self._prev_item) not in '([{'
2228             ):
2229                 # If the container doesn't fit on the current line and the
2230                 # current line isn't empty, place the container on the next
2231                 # line.
2232                 self._lines.append(self._LineBreak())
2233                 self._lines.append(self._Indent(indent_amt))
2234                 break_after_open_bracket = False
2235         else:
2236             actual_indent = self.current_size() + 1
2237             break_after_open_bracket = False
2238
2239         if isinstance(container, (ListComprehension, IfExpression)):
2240             actual_indent = indent_amt
2241
2242         # Increase the continued indentation only if recursing on a
2243         # container.
2244         container.reflow(self, ' ' * actual_indent,
2245                          break_after_open_bracket=break_after_open_bracket)
2246
2247     def _prevent_default_initializer_splitting(self, item, indent_amt):
2248         """Prevent splitting between a default initializer.
2249
2250         When there is a default initializer, it's best to keep it all on
2251         the same line. It's nicer and more readable, even if it goes
2252         over the maximum allowable line length. This goes back along the
2253         current line to determine if we have a default initializer, and,
2254         if so, to remove extraneous whitespaces and add a line
2255         break/indent before it if needed.
2256
2257         """
2258         if str(item) == '=':
2259             # This is the assignment in the initializer. Just remove spaces for
2260             # now.
2261             self._delete_whitespace()
2262             return
2263
2264         if (not self._prev_item or not self._prev_prev_item or
2265                 str(self._prev_item) != '='):
2266             return
2267
2268         self._delete_whitespace()
2269         prev_prev_index = self._lines.index(self._prev_prev_item)
2270
2271         if (
2272             isinstance(self._lines[prev_prev_index - 1], self._Indent) or
2273             self.fits_on_current_line(item.size + 1)
2274         ):
2275             # The default initializer is already the only item on this line.
2276             # Don't insert a newline here.
2277             return
2278
2279         # Replace the space with a newline/indent combo.
2280         if isinstance(self._lines[prev_prev_index - 1], self._Space):
2281             del self._lines[prev_prev_index - 1]
2282
2283         self.add_line_break_at(self._lines.index(self._prev_prev_item),
2284                                indent_amt)
2285
2286     def _split_after_delimiter(self, item, indent_amt):
2287         """Split the line only after a delimiter."""
2288         self._delete_whitespace()
2289
2290         if self.fits_on_current_line(item.size):
2291             return
2292
2293         last_space = None
2294         for current_item in reversed(self._lines):
2295             if (
2296                 last_space and
2297                 (not isinstance(current_item, Atom) or
2298                  not current_item.is_colon)
2299             ):
2300                 break
2301             else:
2302                 last_space = None
2303             if isinstance(current_item, self._Space):
2304                 last_space = current_item
2305             if isinstance(current_item, (self._LineBreak, self._Indent)):
2306                 return
2307
2308         if not last_space:
2309             return
2310
2311         self.add_line_break_at(self._lines.index(last_space), indent_amt)
2312
2313     def _enforce_space(self, item):
2314         """Enforce a space in certain situations.
2315
2316         There are cases where we will want a space where normally we
2317         wouldn't put one. This just enforces the addition of a space.
2318
2319         """
2320         if isinstance(self._lines[-1],
2321                       (self._Space, self._LineBreak, self._Indent)):
2322             return
2323
2324         if not self._prev_item:
2325             return
2326
2327         item_text = str(item)
2328         prev_text = str(self._prev_item)
2329
2330         # Prefer a space around a '.' in an import statement, and between the
2331         # 'import' and '('.
2332         if (
2333             (item_text == '.' and prev_text == 'from') or
2334             (item_text == 'import' and prev_text == '.') or
2335             (item_text == '(' and prev_text == 'import')
2336         ):
2337             self._lines.append(self._Space())
2338
2339     def _delete_whitespace(self):
2340         """Delete all whitespace from the end of the line."""
2341         while isinstance(self._lines[-1], (self._Space, self._LineBreak,
2342                                            self._Indent)):
2343             del self._lines[-1]
2344
2345
2346 class Atom(object):
2347
2348     """The smallest unbreakable unit that can be reflowed."""
2349
2350     def __init__(self, atom):
2351         self._atom = atom
2352
2353     def __repr__(self):
2354         return self._atom.token_string
2355
2356     def __len__(self):
2357         return self.size
2358
2359     def reflow(
2360         self, reflowed_lines, continued_indent, extent,
2361         break_after_open_bracket=False,
2362         is_list_comp_or_if_expr=False,
2363         next_is_dot=False
2364     ):
2365         if self._atom.token_type == tokenize.COMMENT:
2366             reflowed_lines.add_comment(self)
2367             return
2368
2369         total_size = extent if extent else self.size
2370
2371         if self._atom.token_string not in ',:([{}])':
2372             # Some atoms will need an extra 1-sized space token after them.
2373             total_size += 1
2374
2375         prev_item = reflowed_lines.previous_item()
2376         if (
2377             not is_list_comp_or_if_expr and
2378             not reflowed_lines.fits_on_current_line(total_size) and
2379             not (next_is_dot and
2380                  reflowed_lines.fits_on_current_line(self.size + 1)) and
2381             not reflowed_lines.line_empty() and
2382             not self.is_colon and
2383             not (prev_item and prev_item.is_name and
2384                  str(self) == '(')
2385         ):
2386             # Start a new line if there is already something on the line and
2387             # adding this atom would make it go over the max line length.
2388             reflowed_lines.add_line_break(continued_indent)
2389         else:
2390             reflowed_lines.add_space_if_needed(str(self))
2391
2392         reflowed_lines.add(self, len(continued_indent),
2393                            break_after_open_bracket)
2394
2395     def emit(self):
2396         return self.__repr__()
2397
2398     @property
2399     def is_keyword(self):
2400         return keyword.iskeyword(self._atom.token_string)
2401
2402     @property
2403     def is_string(self):
2404         return self._atom.token_type == tokenize.STRING
2405
2406     @property
2407     def is_name(self):
2408         return self._atom.token_type == tokenize.NAME
2409
2410     @property
2411     def is_number(self):
2412         return self._atom.token_type == tokenize.NUMBER
2413
2414     @property
2415     def is_comma(self):
2416         return self._atom.token_string == ','
2417
2418     @property
2419     def is_colon(self):
2420         return self._atom.token_string == ':'
2421
2422     @property
2423     def size(self):
2424         return len(self._atom.token_string)
2425
2426
2427 class Container(object):
2428
2429     """Base class for all container types."""
2430
2431     def __init__(self, items):
2432         self._items = items
2433
2434     def __repr__(self):
2435         string = ''
2436         last_was_keyword = False
2437
2438         for item in self._items:
2439             if item.is_comma:
2440                 string += ', '
2441             elif item.is_colon:
2442                 string += ': '
2443             else:
2444                 item_string = str(item)
2445                 if (
2446                     string and
2447                     (last_was_keyword or
2448                      (not string.endswith(tuple('([{,.:}]) ')) and
2449                       not item_string.startswith(tuple('([{,.:}])'))))
2450                 ):
2451                     string += ' '
2452                 string += item_string
2453
2454             last_was_keyword = item.is_keyword
2455         return string
2456
2457     def __iter__(self):
2458         for element in self._items:
2459             yield element
2460
2461     def __getitem__(self, idx):
2462         return self._items[idx]
2463
2464     def reflow(self, reflowed_lines, continued_indent,
2465                break_after_open_bracket=False):
2466         last_was_container = False
2467         for (index, item) in enumerate(self._items):
2468             next_item = get_item(self._items, index + 1)
2469
2470             if isinstance(item, Atom):
2471                 is_list_comp_or_if_expr = (
2472                     isinstance(self, (ListComprehension, IfExpression)))
2473                 item.reflow(reflowed_lines, continued_indent,
2474                             self._get_extent(index),
2475                             is_list_comp_or_if_expr=is_list_comp_or_if_expr,
2476                             next_is_dot=(next_item and
2477                                          str(next_item) == '.'))
2478                 if last_was_container and item.is_comma:
2479                     reflowed_lines.add_line_break(continued_indent)
2480                 last_was_container = False
2481             else:  # isinstance(item, Container)
2482                 reflowed_lines.add(item, len(continued_indent),
2483                                    break_after_open_bracket)
2484                 last_was_container = not isinstance(item, (ListComprehension,
2485                                                            IfExpression))
2486
2487             if (
2488                 break_after_open_bracket and index == 0 and
2489                 # Prefer to keep empty containers together instead of
2490                 # separating them.
2491                 str(item) == self.open_bracket and
2492                 (not next_item or str(next_item) != self.close_bracket) and
2493                 (len(self._items) != 3 or not isinstance(next_item, Atom))
2494             ):
2495                 reflowed_lines.add_line_break(continued_indent)
2496                 break_after_open_bracket = False
2497             else:
2498                 next_next_item = get_item(self._items, index + 2)
2499                 if (
2500                     str(item) not in ['.', '%', 'in'] and
2501                     next_item and not isinstance(next_item, Container) and
2502                     str(next_item) != ':' and
2503                     next_next_item and (not isinstance(next_next_item, Atom) or
2504                                         str(next_item) == 'not') and
2505                     not reflowed_lines.line_empty() and
2506                     not reflowed_lines.fits_on_current_line(
2507                         self._get_extent(index + 1) + 2)
2508                 ):
2509                     reflowed_lines.add_line_break(continued_indent)
2510
2511     def _get_extent(self, index):
2512         """The extent of the full element.
2513
2514         E.g., the length of a function call or keyword.
2515
2516         """
2517         extent = 0
2518         prev_item = get_item(self._items, index - 1)
2519         seen_dot = prev_item and str(prev_item) == '.'
2520         while index < len(self._items):
2521             item = get_item(self._items, index)
2522             index += 1
2523
2524             if isinstance(item, (ListComprehension, IfExpression)):
2525                 break
2526
2527             if isinstance(item, Container):
2528                 if prev_item and prev_item.is_name:
2529                     if seen_dot:
2530                         extent += 1
2531                     else:
2532                         extent += item.size
2533
2534                     prev_item = item
2535                     continue
2536             elif (str(item) not in ['.', '=', ':', 'not'] and
2537                   not item.is_name and not item.is_string):
2538                 break
2539
2540             if str(item) == '.':
2541                 seen_dot = True
2542
2543             extent += item.size
2544             prev_item = item
2545
2546         return extent
2547
2548     @property
2549     def is_string(self):
2550         return False
2551
2552     @property
2553     def size(self):
2554         return len(self.__repr__())
2555
2556     @property
2557     def is_keyword(self):
2558         return False
2559
2560     @property
2561     def is_name(self):
2562         return False
2563
2564     @property
2565     def is_comma(self):
2566         return False
2567
2568     @property
2569     def is_colon(self):
2570         return False
2571
2572     @property
2573     def open_bracket(self):
2574         return None
2575
2576     @property
2577     def close_bracket(self):
2578         return None
2579
2580
2581 class Tuple(Container):
2582
2583     """A high-level representation of a tuple."""
2584
2585     @property
2586     def open_bracket(self):
2587         return '('
2588
2589     @property
2590     def close_bracket(self):
2591         return ')'
2592
2593
2594 class List(Container):
2595
2596     """A high-level representation of a list."""
2597
2598     @property
2599     def open_bracket(self):
2600         return '['
2601
2602     @property
2603     def close_bracket(self):
2604         return ']'
2605
2606
2607 class DictOrSet(Container):
2608
2609     """A high-level representation of a dictionary or set."""
2610
2611     @property
2612     def open_bracket(self):
2613         return '{'
2614
2615     @property
2616     def close_bracket(self):
2617         return '}'
2618
2619
2620 class ListComprehension(Container):
2621
2622     """A high-level representation of a list comprehension."""
2623
2624     @property
2625     def size(self):
2626         length = 0
2627         for item in self._items:
2628             if isinstance(item, IfExpression):
2629                 break
2630             length += item.size
2631         return length
2632
2633
2634 class IfExpression(Container):
2635
2636     """A high-level representation of an if-expression."""
2637
2638
2639 def _parse_container(tokens, index, for_or_if=None):
2640     """Parse a high-level container, such as a list, tuple, etc."""
2641
2642     # Store the opening bracket.
2643     items = [Atom(Token(*tokens[index]))]
2644     index += 1
2645
2646     num_tokens = len(tokens)
2647     while index < num_tokens:
2648         tok = Token(*tokens[index])
2649
2650         if tok.token_string in ',)]}':
2651             # First check if we're at the end of a list comprehension or
2652             # if-expression. Don't add the ending token as part of the list
2653             # comprehension or if-expression, because they aren't part of those
2654             # constructs.
2655             if for_or_if == 'for':
2656                 return (ListComprehension(items), index - 1)
2657
2658             elif for_or_if == 'if':
2659                 return (IfExpression(items), index - 1)
2660
2661             # We've reached the end of a container.
2662             items.append(Atom(tok))
2663
2664             # If not, then we are at the end of a container.
2665             if tok.token_string == ')':
2666                 # The end of a tuple.
2667                 return (Tuple(items), index)
2668
2669             elif tok.token_string == ']':
2670                 # The end of a list.
2671                 return (List(items), index)
2672
2673             elif tok.token_string == '}':
2674                 # The end of a dictionary or set.
2675                 return (DictOrSet(items), index)
2676
2677         elif tok.token_string in '([{':
2678             # A sub-container is being defined.
2679             (container, index) = _parse_container(tokens, index)
2680             items.append(container)
2681
2682         elif tok.token_string == 'for':
2683             (container, index) = _parse_container(tokens, index, 'for')
2684             items.append(container)
2685
2686         elif tok.token_string == 'if':
2687             (container, index) = _parse_container(tokens, index, 'if')
2688             items.append(container)
2689
2690         else:
2691             items.append(Atom(tok))
2692
2693         index += 1
2694
2695     return (None, None)
2696
2697
2698 def _parse_tokens(tokens):
2699     """Parse the tokens.
2700
2701     This converts the tokens into a form where we can manipulate them
2702     more easily.
2703
2704     """
2705
2706     index = 0
2707     parsed_tokens = []
2708
2709     num_tokens = len(tokens)
2710     while index < num_tokens:
2711         tok = Token(*tokens[index])
2712
2713         assert tok.token_type != token.INDENT
2714         if tok.token_type == tokenize.NEWLINE:
2715             # There's only one newline and it's at the end.
2716             break
2717
2718         if tok.token_string in '([{':
2719             (container, index) = _parse_container(tokens, index)
2720             if not container:
2721                 return None
2722             parsed_tokens.append(container)
2723         else:
2724             parsed_tokens.append(Atom(tok))
2725
2726         index += 1
2727
2728     return parsed_tokens
2729
2730
2731 def _reflow_lines(parsed_tokens, indentation, max_line_length,
2732                   start_on_prefix_line):
2733     """Reflow the lines so that it looks nice."""
2734
2735     if str(parsed_tokens[0]) == 'def':
2736         # A function definition gets indented a bit more.
2737         continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2738     else:
2739         continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2740
2741     break_after_open_bracket = not start_on_prefix_line
2742
2743     lines = ReformattedLines(max_line_length)
2744     lines.add_indent(len(indentation.lstrip('\r\n')))
2745
2746     if not start_on_prefix_line:
2747         # If splitting after the opening bracket will cause the first element
2748         # to be aligned weirdly, don't try it.
2749         first_token = get_item(parsed_tokens, 0)
2750         second_token = get_item(parsed_tokens, 1)
2751
2752         if (
2753             first_token and second_token and
2754             str(second_token)[0] == '(' and
2755             len(indentation) + len(first_token) + 1 == len(continued_indent)
2756         ):
2757             return None
2758
2759     for item in parsed_tokens:
2760         lines.add_space_if_needed(str(item), equal=True)
2761
2762         save_continued_indent = continued_indent
2763         if start_on_prefix_line and isinstance(item, Container):
2764             start_on_prefix_line = False
2765             continued_indent = ' ' * (lines.current_size() + 1)
2766
2767         item.reflow(lines, continued_indent, break_after_open_bracket)
2768         continued_indent = save_continued_indent
2769
2770     return lines.emit()
2771
2772
2773 def _shorten_line_at_tokens_new(tokens, source, indentation,
2774                                 max_line_length):
2775     """Shorten the line taking its length into account.
2776
2777     The input is expected to be free of newlines except for inside
2778     multiline strings and at the end.
2779
2780     """
2781     # Yield the original source so to see if it's a better choice than the
2782     # shortened candidate lines we generate here.
2783     yield indentation + source
2784
2785     parsed_tokens = _parse_tokens(tokens)
2786
2787     if parsed_tokens:
2788         # Perform two reflows. The first one starts on the same line as the
2789         # prefix. The second starts on the line after the prefix.
2790         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2791                               start_on_prefix_line=True)
2792         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2793             yield fixed
2794
2795         fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2796                               start_on_prefix_line=False)
2797         if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2798             yield fixed
2799
2800
2801 def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2802                             key_token_strings, aggressive):
2803     """Separate line by breaking at tokens in key_token_strings.
2804
2805     The input is expected to be free of newlines except for inside
2806     multiline strings and at the end.
2807
2808     """
2809     offsets = []
2810     for (index, _t) in enumerate(token_offsets(tokens)):
2811         (token_type,
2812          token_string,
2813          start_offset,
2814          end_offset) = _t
2815
2816         assert token_type != token.INDENT
2817
2818         if token_string in key_token_strings:
2819             # Do not break in containers with zero or one items.
2820             unwanted_next_token = {
2821                 '(': ')',
2822                 '[': ']',
2823                 '{': '}'}.get(token_string)
2824             if unwanted_next_token:
2825                 if (
2826                     get_item(tokens,
2827                              index + 1,
2828                              default=[None, None])[1] == unwanted_next_token or
2829                     get_item(tokens,
2830                              index + 2,
2831                              default=[None, None])[1] == unwanted_next_token
2832                 ):
2833                     continue
2834
2835             if (
2836                 index > 2 and token_string == '(' and
2837                 tokens[index - 1][1] in ',(%['
2838             ):
2839                 # Don't split after a tuple start, or before a tuple start if
2840                 # the tuple is in a list.
2841                 continue
2842
2843             if end_offset < len(source) - 1:
2844                 # Don't split right before newline.
2845                 offsets.append(end_offset)
2846         else:
2847             # Break at adjacent strings. These were probably meant to be on
2848             # separate lines in the first place.
2849             previous_token = get_item(tokens, index - 1)
2850             if (
2851                 token_type == tokenize.STRING and
2852                 previous_token and previous_token[0] == tokenize.STRING
2853             ):
2854                 offsets.append(start_offset)
2855
2856     current_indent = None
2857     fixed = None
2858     for line in split_at_offsets(source, offsets):
2859         if fixed:
2860             fixed += '\n' + current_indent + line
2861
2862             for symbol in '([{':
2863                 if line.endswith(symbol):
2864                     current_indent += indent_word
2865         else:
2866             # First line.
2867             fixed = line
2868             assert not current_indent
2869             current_indent = indent_word
2870
2871     assert fixed is not None
2872
2873     if check_syntax(normalize_multiline(fixed)
2874                     if aggressive > 1 else fixed):
2875         return indentation + fixed
2876
2877     return None
2878
2879
2880 def token_offsets(tokens):
2881     """Yield tokens and offsets."""
2882     end_offset = 0
2883     previous_end_row = 0
2884     previous_end_column = 0
2885     for t in tokens:
2886         token_type = t[0]
2887         token_string = t[1]
2888         (start_row, start_column) = t[2]
2889         (end_row, end_column) = t[3]
2890
2891         # Account for the whitespace between tokens.
2892         end_offset += start_column
2893         if previous_end_row == start_row:
2894             end_offset -= previous_end_column
2895
2896         # Record the start offset of the token.
2897         start_offset = end_offset
2898
2899         # Account for the length of the token itself.
2900         end_offset += len(token_string)
2901
2902         yield (token_type,
2903                token_string,
2904                start_offset,
2905                end_offset)
2906
2907         previous_end_row = end_row
2908         previous_end_column = end_column
2909
2910
2911 def normalize_multiline(line):
2912     """Normalize multiline-related code that will cause syntax error.
2913
2914     This is for purposes of checking syntax.
2915
2916     """
2917     if line.startswith('def ') and line.rstrip().endswith(':'):
2918         return line + ' pass'
2919     elif line.startswith('return '):
2920         return 'def _(): ' + line
2921     elif line.startswith('@'):
2922         return line + 'def _(): pass'
2923     elif line.startswith('class '):
2924         return line + ' pass'
2925     elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
2926         return line + ' pass'
2927
2928     return line
2929
2930
2931 def fix_whitespace(line, offset, replacement):
2932     """Replace whitespace at offset and return fixed line."""
2933     # Replace escaped newlines too
2934     left = line[:offset].rstrip('\n\r \t\\')
2935     right = line[offset:].lstrip('\n\r \t\\')
2936     if right.startswith('#'):
2937         return line
2938
2939     return left + replacement + right
2940
2941
2942 def _execute_pep8(pep8_options, source):
2943     """Execute pycodestyle via python method calls."""
2944     class QuietReport(pycodestyle.BaseReport):
2945
2946         """Version of checker that does not print."""
2947
2948         def __init__(self, options):
2949             super(QuietReport, self).__init__(options)
2950             self.__full_error_results = []
2951
2952         def error(self, line_number, offset, text, check):
2953             """Collect errors."""
2954             code = super(QuietReport, self).error(line_number,
2955                                                   offset,
2956                                                   text,
2957                                                   check)
2958             if code:
2959                 self.__full_error_results.append(
2960                     {'id': code,
2961                      'line': line_number,
2962                      'column': offset + 1,
2963                      'info': text})
2964
2965         def full_error_results(self):
2966             """Return error results in detail.
2967
2968             Results are in the form of a list of dictionaries. Each
2969             dictionary contains 'id', 'line', 'column', and 'info'.
2970
2971             """
2972             return self.__full_error_results
2973
2974     checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
2975                                   **pep8_options)
2976     checker.check_all()
2977     return checker.report.full_error_results()
2978
2979
2980 def _remove_leading_and_normalize(line, with_rstrip=True):
2981     # ignore FF in first lstrip()
2982     if with_rstrip:
2983         return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
2984     return line.lstrip(' \t\v')
2985
2986
2987 class Reindenter(object):
2988
2989     """Reindents badly-indented code to uniformly use four-space indentation.
2990
2991     Released to the public domain, by Tim Peters, 03 October 2000.
2992
2993     """
2994
2995     def __init__(self, input_text, leave_tabs=False):
2996         sio = io.StringIO(input_text)
2997         source_lines = sio.readlines()
2998
2999         self.string_content_line_numbers = multiline_string_lines(input_text)
3000
3001         # File lines, rstripped & tab-expanded. Dummy at start is so
3002         # that we can use tokenize's 1-based line numbering easily.
3003         # Note that a line is all-blank iff it is a newline.
3004         self.lines = []
3005         for line_number, line in enumerate(source_lines, start=1):
3006             # Do not modify if inside a multiline string.
3007             if line_number in self.string_content_line_numbers:
3008                 self.lines.append(line)
3009             else:
3010                 # Only expand leading tabs.
3011                 with_rstrip = line_number != len(source_lines)
3012                 if leave_tabs:
3013                     self.lines.append(
3014                         _get_indentation(line) +
3015                         _remove_leading_and_normalize(line, with_rstrip)
3016                     )
3017                 else:
3018                     self.lines.append(
3019                         _get_indentation(line).expandtabs() +
3020                         _remove_leading_and_normalize(line, with_rstrip)
3021                     )
3022
3023         self.lines.insert(0, None)
3024         self.index = 1  # index into self.lines of next line
3025         self.input_text = input_text
3026
3027     def run(self, indent_size=DEFAULT_INDENT_SIZE):
3028         """Fix indentation and return modified line numbers.
3029
3030         Line numbers are indexed at 1.
3031
3032         """
3033         if indent_size < 1:
3034             return self.input_text
3035
3036         try:
3037             stats = _reindent_stats(tokenize.generate_tokens(self.getline))
3038         except (SyntaxError, tokenize.TokenError):
3039             return self.input_text
3040         # Remove trailing empty lines.
3041         lines = self.lines
3042         # Sentinel.
3043         stats.append((len(lines), 0))
3044         # Map count of leading spaces to # we want.
3045         have2want = {}
3046         # Program after transformation.
3047         after = []
3048         # Copy over initial empty lines -- there's nothing to do until
3049         # we see a line with *something* on it.
3050         i = stats[0][0]
3051         after.extend(lines[1:i])
3052         for i in range(len(stats) - 1):
3053             thisstmt, thislevel = stats[i]
3054             nextstmt = stats[i + 1][0]
3055             have = _leading_space_count(lines[thisstmt])
3056             want = thislevel * indent_size
3057             if want < 0:
3058                 # A comment line.
3059                 if have:
3060                     # An indented comment line. If we saw the same
3061                     # indentation before, reuse what it most recently
3062                     # mapped to.
3063                     want = have2want.get(have, -1)
3064                     if want < 0:
3065                         # Then it probably belongs to the next real stmt.
3066                         for j in range(i + 1, len(stats) - 1):
3067                             jline, jlevel = stats[j]
3068                             if jlevel >= 0:
3069                                 if have == _leading_space_count(lines[jline]):
3070                                     want = jlevel * indent_size
3071                                 break
3072                     # Maybe it's a hanging comment like this one,
3073                     if want < 0:
3074                         # in which case we should shift it like its base
3075                         # line got shifted.
3076                         for j in range(i - 1, -1, -1):
3077                             jline, jlevel = stats[j]
3078                             if jlevel >= 0:
3079                                 want = (have + _leading_space_count(
3080                                         after[jline - 1]) -
3081                                         _leading_space_count(lines[jline]))
3082                                 break
3083                     if want < 0:
3084                         # Still no luck -- leave it alone.
3085                         want = have
3086                 else:
3087                     want = 0
3088             assert want >= 0
3089             have2want[have] = want
3090             diff = want - have
3091             if diff == 0 or have == 0:
3092                 after.extend(lines[thisstmt:nextstmt])
3093             else:
3094                 for line_number, line in enumerate(lines[thisstmt:nextstmt],
3095                                                    start=thisstmt):
3096                     if line_number in self.string_content_line_numbers:
3097                         after.append(line)
3098                     elif diff > 0:
3099                         if line == '\n':
3100                             after.append(line)
3101                         else:
3102                             after.append(' ' * diff + line)
3103                     else:
3104                         remove = min(_leading_space_count(line), -diff)
3105                         after.append(line[remove:])
3106
3107         return ''.join(after)
3108
3109     def getline(self):
3110         """Line-getter for tokenize."""
3111         if self.index >= len(self.lines):
3112             line = ''
3113         else:
3114             line = self.lines[self.index]
3115             self.index += 1
3116         return line
3117
3118
3119 def _reindent_stats(tokens):
3120     """Return list of (lineno, indentlevel) pairs.
3121
3122     One for each stmt and comment line. indentlevel is -1 for comment
3123     lines, as a signal that tokenize doesn't know what to do about them;
3124     indeed, they're our headache!
3125
3126     """
3127     find_stmt = 1  # Next token begins a fresh stmt?
3128     level = 0  # Current indent level.
3129     stats = []
3130
3131     for t in tokens:
3132         token_type = t[0]
3133         sline = t[2][0]
3134         line = t[4]
3135
3136         if token_type == tokenize.NEWLINE:
3137             # A program statement, or ENDMARKER, will eventually follow,
3138             # after some (possibly empty) run of tokens of the form
3139             #     (NL | COMMENT)* (INDENT | DEDENT+)?
3140             find_stmt = 1
3141
3142         elif token_type == tokenize.INDENT:
3143             find_stmt = 1
3144             level += 1
3145
3146         elif token_type == tokenize.DEDENT:
3147             find_stmt = 1
3148             level -= 1
3149
3150         elif token_type == tokenize.COMMENT:
3151             if find_stmt:
3152                 stats.append((sline, -1))
3153                 # But we're still looking for a new stmt, so leave
3154                 # find_stmt alone.
3155
3156         elif token_type == tokenize.NL:
3157             pass
3158
3159         elif find_stmt:
3160             # This is the first "real token" following a NEWLINE, so it
3161             # must be the first token of the next program statement, or an
3162             # ENDMARKER.
3163             find_stmt = 0
3164             if line:   # Not endmarker.
3165                 stats.append((sline, level))
3166
3167     return stats
3168
3169
3170 def _leading_space_count(line):
3171     """Return number of leading spaces in line."""
3172     i = 0
3173     while i < len(line) and line[i] == ' ':
3174         i += 1
3175     return i
3176
3177
3178 def refactor_with_2to3(source_text, fixer_names, filename=''):
3179     """Use lib2to3 to refactor the source.
3180
3181     Return the refactored source code.
3182
3183     """
3184     from lib2to3.refactor import RefactoringTool
3185     fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
3186     tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
3187
3188     from lib2to3.pgen2 import tokenize as lib2to3_tokenize
3189     try:
3190         # The name parameter is necessary particularly for the "import" fixer.
3191         return str(tool.refactor_string(source_text, name=filename))
3192     except lib2to3_tokenize.TokenError:
3193         return source_text
3194
3195
3196 def check_syntax(code):
3197     """Return True if syntax is okay."""
3198     try:
3199         return compile(code, '<string>', 'exec', dont_inherit=True)
3200     except (SyntaxError, TypeError, ValueError):
3201         return False
3202
3203
3204 def find_with_line_numbers(pattern, contents):
3205     """A wrapper around 're.finditer' to find line numbers.
3206
3207     Returns a list of line numbers where pattern was found in contents.
3208     """
3209     matches = list(re.finditer(pattern, contents))
3210     if not matches:
3211         return []
3212
3213     end = matches[-1].start()
3214
3215     # -1 so a failed `rfind` maps to the first line.
3216     newline_offsets = {
3217         -1: 0
3218     }
3219     for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
3220         offset = m.start()
3221         if offset > end:
3222             break
3223         newline_offsets[offset] = line_num
3224
3225     def get_line_num(match, contents):
3226         """Get the line number of string in a files contents.
3227
3228         Failing to find the newline is OK, -1 maps to 0
3229
3230         """
3231         newline_offset = contents.rfind('\n', 0, match.start())
3232         return newline_offsets[newline_offset]
3233
3234     return [get_line_num(match, contents) + 1 for match in matches]
3235
3236
3237 def get_disabled_ranges(source):
3238     """Returns a list of tuples representing the disabled ranges.
3239
3240     If disabled and no re-enable will disable for rest of file.
3241
3242     """
3243     enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
3244     disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
3245     total_lines = len(re.findall("\n", source)) + 1
3246
3247     enable_commands = {}
3248     for num in enable_line_nums:
3249         enable_commands[num] = True
3250     for num in disable_line_nums:
3251         enable_commands[num] = False
3252
3253     disabled_ranges = []
3254     currently_enabled = True
3255     disabled_start = None
3256
3257     for line, commanded_enabled in sorted(enable_commands.items()):
3258         if commanded_enabled is False and currently_enabled is True:
3259             disabled_start = line
3260             currently_enabled = False
3261         elif commanded_enabled is True and currently_enabled is False:
3262             disabled_ranges.append((disabled_start, line))
3263             currently_enabled = True
3264
3265     if currently_enabled is False:
3266         disabled_ranges.append((disabled_start, total_lines))
3267
3268     return disabled_ranges
3269
3270
3271 def filter_disabled_results(result, disabled_ranges):
3272     """Filter out reports based on tuple of disabled ranges.
3273
3274     """
3275     line = result['line']
3276     for disabled_range in disabled_ranges:
3277         if disabled_range[0] <= line <= disabled_range[1]:
3278             return False
3279     return True
3280
3281
3282 def filter_results(source, results, aggressive):
3283     """Filter out spurious reports from pycodestyle.
3284
3285     If aggressive is True, we allow possibly unsafe fixes (E711, E712).
3286
3287     """
3288     non_docstring_string_line_numbers = multiline_string_lines(
3289         source, include_docstrings=False)
3290     all_string_line_numbers = multiline_string_lines(
3291         source, include_docstrings=True)
3292
3293     commented_out_code_line_numbers = commented_out_code_lines(source)
3294
3295     # Filter out the disabled ranges
3296     disabled_ranges = get_disabled_ranges(source)
3297     if disabled_ranges:
3298         results = [
3299             result for result in results if filter_disabled_results(
3300                 result,
3301                 disabled_ranges,
3302             )
3303         ]
3304
3305     has_e901 = any(result['id'].lower() == 'e901' for result in results)
3306
3307     for r in results:
3308         issue_id = r['id'].lower()
3309
3310         if r['line'] in non_docstring_string_line_numbers:
3311             if issue_id.startswith(('e1', 'e501', 'w191')):
3312                 continue
3313
3314         if r['line'] in all_string_line_numbers:
3315             if issue_id in ['e501']:
3316                 continue
3317
3318         # We must offset by 1 for lines that contain the trailing contents of
3319         # multiline strings.
3320         if not aggressive and (r['line'] + 1) in all_string_line_numbers:
3321             # Do not modify multiline strings in non-aggressive mode. Remove
3322             # trailing whitespace could break doctests.
3323             if issue_id.startswith(('w29', 'w39')):
3324                 continue
3325
3326         if aggressive <= 0:
3327             if issue_id.startswith(('e711', 'e72', 'w6')):
3328                 continue
3329
3330         if aggressive <= 1:
3331             if issue_id.startswith(('e712', 'e713', 'e714')):
3332                 continue
3333
3334         if aggressive <= 2:
3335             if issue_id.startswith(('e704')):
3336                 continue
3337
3338         if r['line'] in commented_out_code_line_numbers:
3339             if issue_id.startswith(('e261', 'e262', 'e501')):
3340                 continue
3341
3342         # Do not touch indentation if there is a token error caused by
3343         # incomplete multi-line statement. Otherwise, we risk screwing up the
3344         # indentation.
3345         if has_e901:
3346             if issue_id.startswith(('e1', 'e7')):
3347                 continue
3348
3349         yield r
3350
3351
3352 def multiline_string_lines(source, include_docstrings=False):
3353     """Return line numbers that are within multiline strings.
3354
3355     The line numbers are indexed at 1.
3356
3357     Docstrings are ignored.
3358
3359     """
3360     line_numbers = set()
3361     previous_token_type = ''
3362     try:
3363         for t in generate_tokens(source):
3364             token_type = t[0]
3365             start_row = t[2][0]
3366             end_row = t[3][0]
3367
3368             if token_type == tokenize.STRING and start_row != end_row:
3369                 if (
3370                     include_docstrings or
3371                     previous_token_type != tokenize.INDENT
3372                 ):
3373                     # We increment by one since we want the contents of the
3374                     # string.
3375                     line_numbers |= set(range(1 + start_row, 1 + end_row))
3376
3377             previous_token_type = token_type
3378     except (SyntaxError, tokenize.TokenError):
3379         pass
3380
3381     return line_numbers
3382
3383
3384 def commented_out_code_lines(source):
3385     """Return line numbers of comments that are likely code.
3386
3387     Commented-out code is bad practice, but modifying it just adds even
3388     more clutter.
3389
3390     """
3391     line_numbers = []
3392     try:
3393         for t in generate_tokens(source):
3394             token_type = t[0]
3395             token_string = t[1]
3396             start_row = t[2][0]
3397             line = t[4]
3398
3399             # Ignore inline comments.
3400             if not line.lstrip().startswith('#'):
3401                 continue
3402
3403             if token_type == tokenize.COMMENT:
3404                 stripped_line = token_string.lstrip('#').strip()
3405                 with warnings.catch_warnings():
3406                     # ignore SyntaxWarning in Python3.8+
3407                     # refs:
3408                     #   https://bugs.python.org/issue15248
3409                     #   https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
3410                     warnings.filterwarnings("ignore", category=SyntaxWarning)
3411                     if (
3412                         ' ' in stripped_line and
3413                         '#' not in stripped_line and
3414                         check_syntax(stripped_line)
3415                     ):
3416                         line_numbers.append(start_row)
3417     except (SyntaxError, tokenize.TokenError):
3418         pass
3419
3420     return line_numbers
3421
3422
3423 def shorten_comment(line, max_line_length, last_comment=False):
3424     """Return trimmed or split long comment line.
3425
3426     If there are no comments immediately following it, do a text wrap.
3427     Doing this wrapping on all comments in general would lead to jagged
3428     comment text.
3429
3430     """
3431     assert len(line) > max_line_length
3432     line = line.rstrip()
3433
3434     # PEP 8 recommends 72 characters for comment text.
3435     indentation = _get_indentation(line) + '# '
3436     max_line_length = min(max_line_length,
3437                           len(indentation) + 72)
3438
3439     MIN_CHARACTER_REPEAT = 5
3440     if (
3441         len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
3442         not line[-1].isalnum()
3443     ):
3444         # Trim comments that end with things like ---------
3445         return line[:max_line_length] + '\n'
3446     elif last_comment and re.match(r'\s*#+\s*\w+', line):
3447         split_lines = textwrap.wrap(line.lstrip(' \t#'),
3448                                     initial_indent=indentation,
3449                                     subsequent_indent=indentation,
3450                                     width=max_line_length,
3451                                     break_long_words=False,
3452                                     break_on_hyphens=False)
3453         return '\n'.join(split_lines) + '\n'
3454
3455     return line + '\n'
3456
3457
3458 def normalize_line_endings(lines, newline):
3459     """Return fixed line endings.
3460
3461     All lines will be modified to use the most common line ending.
3462     """
3463     line = [line.rstrip('\n\r') + newline for line in lines]
3464     if line and lines[-1] == lines[-1].rstrip('\n\r'):
3465         line[-1] = line[-1].rstrip('\n\r')
3466     return line
3467
3468
3469 def mutual_startswith(a, b):
3470     return b.startswith(a) or a.startswith(b)
3471
3472
3473 def code_match(code, select, ignore):
3474     if ignore:
3475         assert not isinstance(ignore, str)
3476         for ignored_code in [c.strip() for c in ignore]:
3477             if mutual_startswith(code.lower(), ignored_code.lower()):
3478                 return False
3479
3480     if select:
3481         assert not isinstance(select, str)
3482         for selected_code in [c.strip() for c in select]:
3483             if mutual_startswith(code.lower(), selected_code.lower()):
3484                 return True
3485         return False
3486
3487     return True
3488
3489
3490 def fix_code(source, options=None, encoding=None, apply_config=False):
3491     """Return fixed source code.
3492
3493     "encoding" will be used to decode "source" if it is a byte string.
3494
3495     """
3496     options = _get_options(options, apply_config)
3497     # normalize
3498     options.ignore = [opt.upper() for opt in options.ignore]
3499     options.select = [opt.upper() for opt in options.select]
3500
3501     # check ignore args
3502     # NOTE: If W50x is not included, add W50x because the code
3503     #       correction result is indefinite.
3504     ignore_opt = options.ignore
3505     if not {"W50", "W503", "W504"} & set(ignore_opt):
3506         options.ignore.append("W50")
3507
3508     if not isinstance(source, str):
3509         source = source.decode(encoding or get_encoding())
3510
3511     sio = io.StringIO(source)
3512     return fix_lines(sio.readlines(), options=options)
3513
3514
3515 def _get_options(raw_options, apply_config):
3516     """Return parsed options."""
3517     if not raw_options:
3518         return parse_args([''], apply_config=apply_config)
3519
3520     if isinstance(raw_options, dict):
3521         options = parse_args([''], apply_config=apply_config)
3522         for name, value in raw_options.items():
3523             if not hasattr(options, name):
3524                 raise ValueError("No such option '{}'".format(name))
3525
3526             # Check for very basic type errors.
3527             expected_type = type(getattr(options, name))
3528             if not isinstance(expected_type, (str, )):
3529                 if isinstance(value, (str, )):
3530                     raise ValueError(
3531                         "Option '{}' should not be a string".format(name))
3532             setattr(options, name, value)
3533     else:
3534         options = raw_options
3535
3536     return options
3537
3538
3539 def fix_lines(source_lines, options, filename=''):
3540     """Return fixed source code."""
3541     # Transform everything to line feed. Then change them back to original
3542     # before returning fixed source code.
3543     original_newline = find_newline(source_lines)
3544     tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
3545
3546     # Keep a history to break out of cycles.
3547     previous_hashes = set()
3548
3549     if options.line_range:
3550         # Disable "apply_local_fixes()" for now due to issue #175.
3551         fixed_source = tmp_source
3552     else:
3553         # Apply global fixes only once (for efficiency).
3554         fixed_source = apply_global_fixes(tmp_source,
3555                                           options,
3556                                           filename=filename)
3557
3558     passes = 0
3559     long_line_ignore_cache = set()
3560     while hash(fixed_source) not in previous_hashes:
3561         if options.pep8_passes >= 0 and passes > options.pep8_passes:
3562             break
3563         passes += 1
3564
3565         previous_hashes.add(hash(fixed_source))
3566
3567         tmp_source = copy.copy(fixed_source)
3568
3569         fix = FixPEP8(
3570             filename,
3571             options,
3572             contents=tmp_source,
3573             long_line_ignore_cache=long_line_ignore_cache)
3574
3575         fixed_source = fix.fix()
3576
3577     sio = io.StringIO(fixed_source)
3578     return ''.join(normalize_line_endings(sio.readlines(), original_newline))
3579
3580
3581 def fix_file(filename, options=None, output=None, apply_config=False):
3582     if not options:
3583         options = parse_args([filename], apply_config=apply_config)
3584
3585     original_source = readlines_from_file(filename)
3586
3587     fixed_source = original_source
3588
3589     if options.in_place or options.diff or output:
3590         encoding = detect_encoding(filename)
3591
3592     if output:
3593         output = LineEndingWrapper(wrap_output(output, encoding=encoding))
3594
3595     fixed_source = fix_lines(fixed_source, options, filename=filename)
3596
3597     if options.diff:
3598         new = io.StringIO(fixed_source)
3599         new = new.readlines()
3600         diff = get_diff_text(original_source, new, filename)
3601         if output:
3602             output.write(diff)
3603             output.flush()
3604         elif options.jobs > 1:
3605             diff = diff.encode(encoding)
3606         return diff
3607     elif options.in_place:
3608         original = "".join(original_source).splitlines()
3609         fixed = fixed_source.splitlines()
3610         original_source_last_line = (
3611             original_source[-1].split("\n")[-1] if original_source else ""
3612         )
3613         fixed_source_last_line = fixed_source.split("\n")[-1]
3614         if original != fixed or (
3615             original_source_last_line != fixed_source_last_line
3616         ):
3617             with open_with_encoding(filename, 'w', encoding=encoding) as fp:
3618                 fp.write(fixed_source)
3619             return fixed_source
3620         return None
3621     else:
3622         if output:
3623             output.write(fixed_source)
3624             output.flush()
3625     return fixed_source
3626
3627
3628 def global_fixes():
3629     """Yield multiple (code, function) tuples."""
3630     for function in list(globals().values()):
3631         if inspect.isfunction(function):
3632             arguments = _get_parameters(function)
3633             if arguments[:1] != ['source']:
3634                 continue
3635
3636             code = extract_code_from_function(function)
3637             if code:
3638                 yield (code, function)
3639
3640
3641 def _get_parameters(function):
3642     # pylint: disable=deprecated-method
3643     if sys.version_info.major >= 3:
3644         # We need to match "getargspec()", which includes "self" as the first
3645         # value for methods.
3646         # https://bugs.python.org/issue17481#msg209469
3647         if inspect.ismethod(function):
3648             function = function.__func__
3649
3650         return list(inspect.signature(function).parameters)
3651     else:
3652         return inspect.getargspec(function)[0]
3653
3654
3655 def apply_global_fixes(source, options, where='global', filename='',
3656                        codes=None):
3657     """Run global fixes on source code.
3658
3659     These are fixes that only need be done once (unlike those in
3660     FixPEP8, which are dependent on pycodestyle).
3661
3662     """
3663     if codes is None:
3664         codes = []
3665     if any(code_match(code, select=options.select, ignore=options.ignore)
3666            for code in ['E101', 'E111']):
3667         source = reindent(
3668             source,
3669             indent_size=options.indent_size,
3670             leave_tabs=not (
3671                 code_match(
3672                     'W191',
3673                     select=options.select,
3674                     ignore=options.ignore
3675                 )
3676             )
3677         )
3678
3679     for (code, function) in global_fixes():
3680         if code_match(code, select=options.select, ignore=options.ignore):
3681             if options.verbose:
3682                 print('--->  Applying {} fix for {}'.format(where,
3683                                                             code.upper()),
3684                       file=sys.stderr)
3685             source = function(source,
3686                               aggressive=options.aggressive)
3687
3688     source = fix_2to3(source,
3689                       aggressive=options.aggressive,
3690                       select=options.select,
3691                       ignore=options.ignore,
3692                       filename=filename,
3693                       where=where,
3694                       verbose=options.verbose)
3695
3696     return source
3697
3698
3699 def extract_code_from_function(function):
3700     """Return code handled by function."""
3701     if not function.__name__.startswith('fix_'):
3702         return None
3703
3704     code = re.sub('^fix_', '', function.__name__)
3705     if not code:
3706         return None
3707
3708     try:
3709         int(code[1:])
3710     except ValueError:
3711         return None
3712
3713     return code
3714
3715
3716 def _get_package_version():
3717     packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
3718     return ", ".join(packages)
3719
3720
3721 def create_parser():
3722     """Return command-line parser."""
3723     parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3724                                      prog='autopep8')
3725     parser.add_argument('--version', action='version',
3726                         version='%(prog)s {} ({})'.format(
3727                             __version__, _get_package_version()))
3728     parser.add_argument('-v', '--verbose', action='count',
3729                         default=0,
3730                         help='print verbose messages; '
3731                              'multiple -v result in more verbose messages')
3732     parser.add_argument('-d', '--diff', action='store_true',
3733                         help='print the diff for the fixed source')
3734     parser.add_argument('-i', '--in-place', action='store_true',
3735                         help='make changes to files in place')
3736     parser.add_argument('--global-config', metavar='filename',
3737                         default=DEFAULT_CONFIG,
3738                         help='path to a global pep8 config file; if this file '
3739                              'does not exist then this is ignored '
3740                              '(default: {})'.format(DEFAULT_CONFIG))
3741     parser.add_argument('--ignore-local-config', action='store_true',
3742                         help="don't look for and apply local config files; "
3743                              'if not passed, defaults are updated with any '
3744                              "config files in the project's root directory")
3745     parser.add_argument('-r', '--recursive', action='store_true',
3746                         help='run recursively over directories; '
3747                              'must be used with --in-place or --diff')
3748     parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3749                         help='number of parallel jobs; '
3750                              'match CPU count if value is less than 1')
3751     parser.add_argument('-p', '--pep8-passes', metavar='n',
3752                         default=-1, type=int,
3753                         help='maximum number of additional pep8 passes '
3754                              '(default: infinite)')
3755     parser.add_argument('-a', '--aggressive', action='count', default=0,
3756                         help='enable non-whitespace changes; '
3757                              'multiple -a result in more aggressive changes')
3758     parser.add_argument('--experimental', action='store_true',
3759                         help='enable experimental fixes')
3760     parser.add_argument('--exclude', metavar='globs',
3761                         help='exclude file/directory names that match these '
3762                              'comma-separated globs')
3763     parser.add_argument('--list-fixes', action='store_true',
3764                         help='list codes for fixes; '
3765                         'used by --ignore and --select')
3766     parser.add_argument('--ignore', metavar='errors', default='',
3767                         help='do not fix these errors/warnings '
3768                              '(default: {})'.format(DEFAULT_IGNORE))
3769     parser.add_argument('--select', metavar='errors', default='',
3770                         help='fix only these errors/warnings (e.g. E4,W)')
3771     parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3772                         help='set maximum allowed line length '
3773                              '(default: %(default)s)')
3774     parser.add_argument('--line-range', '--range', metavar='line',
3775                         default=None, type=int, nargs=2,
3776                         help='only fix errors found within this inclusive '
3777                              'range of line numbers (e.g. 1 99); '
3778                              'line numbers are indexed at 1')
3779     parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3780                         type=int, help=argparse.SUPPRESS)
3781     parser.add_argument('--hang-closing', action='store_true',
3782                         help='hang-closing option passed to pycodestyle')
3783     parser.add_argument('--exit-code', action='store_true',
3784                         help='change to behavior of exit code.'
3785                              ' default behavior of return value, 0 is no '
3786                              'differences, 1 is error exit. return 2 when'
3787                              ' add this option. 2 is exists differences.')
3788     parser.add_argument('files', nargs='*',
3789                         help="files to format or '-' for standard in")
3790
3791     return parser
3792
3793
3794 def _expand_codes(codes, ignore_codes):
3795     """expand to individual E/W codes"""
3796     ret = set()
3797
3798     is_conflict = False
3799     if all(
3800             any(
3801                 conflicting_code.startswith(code)
3802                 for code in codes
3803             )
3804             for conflicting_code in CONFLICTING_CODES
3805     ):
3806         is_conflict = True
3807
3808     is_ignore_w503 = "W503" in ignore_codes
3809     is_ignore_w504 = "W504" in ignore_codes
3810
3811     for code in codes:
3812         if code == "W":
3813             if is_ignore_w503 and is_ignore_w504:
3814                 ret.update({"W1", "W2", "W3", "W505", "W6"})
3815             elif is_ignore_w503:
3816                 ret.update({"W1", "W2", "W3", "W504", "W505", "W6"})
3817             else:
3818                 ret.update({"W1", "W2", "W3", "W503", "W505", "W6"})
3819         elif code in ("W5", "W50"):
3820             if is_ignore_w503 and is_ignore_w504:
3821                 ret.update({"W505"})
3822             elif is_ignore_w503:
3823                 ret.update({"W504", "W505"})
3824             else:
3825                 ret.update({"W503", "W505"})
3826         elif not (code in ("W503", "W504") and is_conflict):
3827             ret.add(code)
3828
3829     return ret
3830
3831
3832 def parse_args(arguments, apply_config=False):
3833     """Parse command-line options."""
3834     parser = create_parser()
3835     args = parser.parse_args(arguments)
3836
3837     if not args.files and not args.list_fixes:
3838         parser.exit(EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments')
3839
3840     args.files = [decode_filename(name) for name in args.files]
3841
3842     if apply_config:
3843         parser = read_config(args, parser)
3844         # prioritize settings when exist pyproject.toml's tool.autopep8 section
3845         try:
3846             parser_with_pyproject_toml = read_pyproject_toml(args, parser)
3847         except Exception:
3848             parser_with_pyproject_toml = None
3849         if parser_with_pyproject_toml:
3850             parser = parser_with_pyproject_toml
3851         args = parser.parse_args(arguments)
3852         args.files = [decode_filename(name) for name in args.files]
3853
3854     if '-' in args.files:
3855         if len(args.files) > 1:
3856             parser.exit(
3857                 EXIT_CODE_ARGPARSE_ERROR,
3858                 'cannot mix stdin and regular files',
3859             )
3860
3861         if args.diff:
3862             parser.exit(
3863                 EXIT_CODE_ARGPARSE_ERROR,
3864                 '--diff cannot be used with standard input',
3865             )
3866
3867         if args.in_place:
3868             parser.exit(
3869                 EXIT_CODE_ARGPARSE_ERROR,
3870                 '--in-place cannot be used with standard input',
3871             )
3872
3873         if args.recursive:
3874             parser.exit(
3875                 EXIT_CODE_ARGPARSE_ERROR,
3876                 '--recursive cannot be used with standard input',
3877             )
3878
3879     if len(args.files) > 1 and not (args.in_place or args.diff):
3880         parser.exit(
3881             EXIT_CODE_ARGPARSE_ERROR,
3882             'autopep8 only takes one filename as argument '
3883             'unless the "--in-place" or "--diff" args are used',
3884         )
3885
3886     if args.recursive and not (args.in_place or args.diff):
3887         parser.exit(
3888             EXIT_CODE_ARGPARSE_ERROR,
3889             '--recursive must be used with --in-place or --diff',
3890         )
3891
3892     if args.in_place and args.diff:
3893         parser.exit(
3894             EXIT_CODE_ARGPARSE_ERROR,
3895             '--in-place and --diff are mutually exclusive',
3896         )
3897
3898     if args.max_line_length <= 0:
3899         parser.exit(
3900             EXIT_CODE_ARGPARSE_ERROR,
3901             '--max-line-length must be greater than 0',
3902         )
3903
3904     if args.indent_size <= 0:
3905         parser.exit(
3906             EXIT_CODE_ARGPARSE_ERROR,
3907             '--indent-size must be greater than 0',
3908         )
3909
3910     if args.select:
3911         args.select = _expand_codes(
3912             _split_comma_separated(args.select),
3913             (_split_comma_separated(args.ignore) if args.ignore else [])
3914         )
3915
3916     if args.ignore:
3917         args.ignore = _split_comma_separated(args.ignore)
3918         if all(
3919                 not any(
3920                     conflicting_code.startswith(ignore_code)
3921                     for ignore_code in args.ignore
3922                 )
3923                 for conflicting_code in CONFLICTING_CODES
3924         ):
3925             args.ignore.update(CONFLICTING_CODES)
3926     elif not args.select:
3927         if args.aggressive:
3928             # Enable everything by default if aggressive.
3929             args.select = {'E', 'W1', 'W2', 'W3', 'W6'}
3930         else:
3931             args.ignore = _split_comma_separated(DEFAULT_IGNORE)
3932
3933     if args.exclude:
3934         args.exclude = _split_comma_separated(args.exclude)
3935     else:
3936         args.exclude = {}
3937
3938     if args.jobs < 1:
3939         # Do not import multiprocessing globally in case it is not supported
3940         # on the platform.
3941         import multiprocessing
3942         args.jobs = multiprocessing.cpu_count()
3943
3944     if args.jobs > 1 and not (args.in_place or args.diff):
3945         parser.exit(
3946             EXIT_CODE_ARGPARSE_ERROR,
3947             'parallel jobs requires --in-place',
3948         )
3949
3950     if args.line_range:
3951         if args.line_range[0] <= 0:
3952             parser.exit(
3953                 EXIT_CODE_ARGPARSE_ERROR,
3954                 '--range must be positive numbers',
3955             )
3956         if args.line_range[0] > args.line_range[1]:
3957             parser.exit(
3958                 EXIT_CODE_ARGPARSE_ERROR,
3959                 'First value of --range should be less than or equal '
3960                 'to the second',
3961             )
3962
3963     return args
3964
3965
3966 def _get_normalize_options(args, config, section, option_list):
3967     for (k, v) in config.items(section):
3968         norm_opt = k.lstrip('-').replace('-', '_')
3969         if not option_list.get(norm_opt):
3970             continue
3971         opt_type = option_list[norm_opt]
3972         if opt_type is int:
3973             if v.strip() == "auto":
3974                 # skip to special case
3975                 if args.verbose:
3976                     print(f"ignore config: {k}={v}")
3977                 continue
3978             value = config.getint(section, k)
3979         elif opt_type is bool:
3980             value = config.getboolean(section, k)
3981         else:
3982             value = config.get(section, k)
3983         yield norm_opt, k, value
3984
3985
3986 def read_config(args, parser):
3987     """Read both user configuration and local configuration."""
3988     config = SafeConfigParser()
3989
3990     try:
3991         if args.verbose and os.path.exists(args.global_config):
3992             print("read config path: {}".format(args.global_config))
3993         config.read(args.global_config)
3994
3995         if not args.ignore_local_config:
3996             parent = tail = args.files and os.path.abspath(
3997                 os.path.commonprefix(args.files))
3998             while tail:
3999                 if config.read([os.path.join(parent, fn)
4000                                 for fn in PROJECT_CONFIG]):
4001                     if args.verbose:
4002                         for fn in PROJECT_CONFIG:
4003                             config_file = os.path.join(parent, fn)
4004                             if not os.path.exists(config_file):
4005                                 continue
4006                             print(
4007                                 "read config path: {}".format(
4008                                     os.path.join(parent, fn)
4009                                 )
4010                             )
4011                     break
4012                 (parent, tail) = os.path.split(parent)
4013
4014         defaults = {}
4015         option_list = {o.dest: o.type or type(o.default)
4016                        for o in parser._actions}
4017
4018         for section in ['pep8', 'pycodestyle', 'flake8']:
4019             if not config.has_section(section):
4020                 continue
4021             for norm_opt, k, value in _get_normalize_options(
4022                 args, config, section, option_list
4023             ):
4024                 if args.verbose:
4025                     print("enable config: section={}, key={}, value={}".format(
4026                         section, k, value))
4027                 defaults[norm_opt] = value
4028
4029         parser.set_defaults(**defaults)
4030     except Error:
4031         # Ignore for now.
4032         pass
4033
4034     return parser
4035
4036
4037 def read_pyproject_toml(args, parser):
4038     """Read pyproject.toml and load configuration."""
4039     if sys.version_info >= (3, 11):
4040         import tomllib
4041     else:
4042         import tomli as tomllib
4043
4044     config = None
4045
4046     if os.path.exists(args.global_config):
4047         with open(args.global_config, "rb") as fp:
4048             config = tomllib.load(fp)
4049
4050     if not args.ignore_local_config:
4051         parent = tail = args.files and os.path.abspath(
4052             os.path.commonprefix(args.files))
4053         while tail:
4054             pyproject_toml = os.path.join(parent, "pyproject.toml")
4055             if os.path.exists(pyproject_toml):
4056                 with open(pyproject_toml, "rb") as fp:
4057                     config = tomllib.load(fp)
4058                     break
4059             (parent, tail) = os.path.split(parent)
4060
4061     if not config:
4062         return None
4063
4064     if config.get("tool", {}).get("autopep8") is None:
4065         return None
4066
4067     config = config.get("tool").get("autopep8")
4068
4069     defaults = {}
4070     option_list = {o.dest: o.type or type(o.default)
4071                    for o in parser._actions}
4072
4073     TUPLED_OPTIONS = ("ignore", "select")
4074     for (k, v) in config.items():
4075         norm_opt = k.lstrip('-').replace('-', '_')
4076         if not option_list.get(norm_opt):
4077             continue
4078         if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS:
4079             value = ",".join(v)
4080         else:
4081             value = v
4082         if args.verbose:
4083             print("enable pyproject.toml config: "
4084                   "key={}, value={}".format(k, value))
4085         defaults[norm_opt] = value
4086
4087     if defaults:
4088         # set value when exists key-value in defaults dict
4089         parser.set_defaults(**defaults)
4090
4091     return parser
4092
4093
4094 def _split_comma_separated(string):
4095     """Return a set of strings."""
4096     return {text.strip() for text in string.split(',') if text.strip()}
4097
4098
4099 def decode_filename(filename):
4100     """Return Unicode filename."""
4101     if isinstance(filename, str):
4102         return filename
4103
4104     return filename.decode(sys.getfilesystemencoding())
4105
4106
4107 def supported_fixes():
4108     """Yield pep8 error codes that autopep8 fixes.
4109
4110     Each item we yield is a tuple of the code followed by its
4111     description.
4112
4113     """
4114     yield ('E101', docstring_summary(reindent.__doc__))
4115
4116     instance = FixPEP8(filename=None, options=None, contents='')
4117     for attribute in dir(instance):
4118         code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
4119         if code:
4120             yield (
4121                 code.group(1).upper(),
4122                 re.sub(r'\s+', ' ',
4123                        docstring_summary(getattr(instance, attribute).__doc__))
4124             )
4125
4126     for (code, function) in sorted(global_fixes()):
4127         yield (code.upper() + (4 - len(code)) * ' ',
4128                re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
4129
4130     for code in sorted(CODE_TO_2TO3):
4131         yield (code.upper() + (4 - len(code)) * ' ',
4132                re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
4133
4134
4135 def docstring_summary(docstring):
4136     """Return summary of docstring."""
4137     return docstring.split('\n')[0] if docstring else ''
4138
4139
4140 def line_shortening_rank(candidate, indent_word, max_line_length,
4141                          experimental=False):
4142     """Return rank of candidate.
4143
4144     This is for sorting candidates.
4145
4146     """
4147     if not candidate.strip():
4148         return 0
4149
4150     rank = 0
4151     lines = candidate.rstrip().split('\n')
4152
4153     offset = 0
4154     if (
4155         not lines[0].lstrip().startswith('#') and
4156         lines[0].rstrip()[-1] not in '([{'
4157     ):
4158         for (opening, closing) in ('()', '[]', '{}'):
4159             # Don't penalize empty containers that aren't split up. Things like
4160             # this "foo(\n    )" aren't particularly good.
4161             opening_loc = lines[0].find(opening)
4162             closing_loc = lines[0].find(closing)
4163             if opening_loc >= 0:
4164                 if closing_loc < 0 or closing_loc != opening_loc + 1:
4165                     offset = max(offset, 1 + opening_loc)
4166
4167     current_longest = max(offset + len(x.strip()) for x in lines)
4168
4169     rank += 4 * max(0, current_longest - max_line_length)
4170
4171     rank += len(lines)
4172
4173     # Too much variation in line length is ugly.
4174     rank += 2 * standard_deviation(len(line) for line in lines)
4175
4176     bad_staring_symbol = {
4177         '(': ')',
4178         '[': ']',
4179         '{': '}'}.get(lines[0][-1])
4180
4181     if len(lines) > 1:
4182         if (
4183             bad_staring_symbol and
4184             lines[1].lstrip().startswith(bad_staring_symbol)
4185         ):
4186             rank += 20
4187
4188     for lineno, current_line in enumerate(lines):
4189         current_line = current_line.strip()
4190
4191         if current_line.startswith('#'):
4192             continue
4193
4194         for bad_start in ['.', '%', '+', '-', '/']:
4195             if current_line.startswith(bad_start):
4196                 rank += 100
4197
4198             # Do not tolerate operators on their own line.
4199             if current_line == bad_start:
4200                 rank += 1000
4201
4202         if (
4203             current_line.endswith(('.', '%', '+', '-', '/')) and
4204             "': " in current_line
4205         ):
4206             rank += 1000
4207
4208         if current_line.endswith(('(', '[', '{', '.')):
4209             # Avoid lonely opening. They result in longer lines.
4210             if len(current_line) <= len(indent_word):
4211                 rank += 100
4212
4213             # Avoid the ugliness of ", (\n".
4214             if (
4215                 current_line.endswith('(') and
4216                 current_line[:-1].rstrip().endswith(',')
4217             ):
4218                 rank += 100
4219
4220             # Avoid the ugliness of "something[\n" and something[index][\n.
4221             if (
4222                 current_line.endswith('[') and
4223                 len(current_line) > 1 and
4224                 (current_line[-2].isalnum() or current_line[-2] in ']')
4225             ):
4226                 rank += 300
4227
4228             # Also avoid the ugliness of "foo.\nbar"
4229             if current_line.endswith('.'):
4230                 rank += 100
4231
4232             if has_arithmetic_operator(current_line):
4233                 rank += 100
4234
4235         # Avoid breaking at unary operators.
4236         if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
4237             rank += 1000
4238
4239         if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
4240             rank += 1000
4241
4242         if current_line.endswith(('%', '(', '[', '{')):
4243             rank -= 20
4244
4245         # Try to break list comprehensions at the "for".
4246         if current_line.startswith('for '):
4247             rank -= 50
4248
4249         if current_line.endswith('\\'):
4250             # If a line ends in \-newline, it may be part of a
4251             # multiline string. In that case, we would like to know
4252             # how long that line is without the \-newline. If it's
4253             # longer than the maximum, or has comments, then we assume
4254             # that the \-newline is an okay candidate and only
4255             # penalize it a bit.
4256             total_len = len(current_line)
4257             lineno += 1
4258             while lineno < len(lines):
4259                 total_len += len(lines[lineno])
4260
4261                 if lines[lineno].lstrip().startswith('#'):
4262                     total_len = max_line_length
4263                     break
4264
4265                 if not lines[lineno].endswith('\\'):
4266                     break
4267
4268                 lineno += 1
4269
4270             if total_len < max_line_length:
4271                 rank += 10
4272             else:
4273                 rank += 100 if experimental else 1
4274
4275         # Prefer breaking at commas rather than colon.
4276         if ',' in current_line and current_line.endswith(':'):
4277             rank += 10
4278
4279         # Avoid splitting dictionaries between key and value.
4280         if current_line.endswith(':'):
4281             rank += 100
4282
4283         rank += 10 * count_unbalanced_brackets(current_line)
4284
4285     return max(0, rank)
4286
4287
4288 def standard_deviation(numbers):
4289     """Return standard deviation."""
4290     numbers = list(numbers)
4291     if not numbers:
4292         return 0
4293     mean = sum(numbers) / len(numbers)
4294     return (sum((n - mean) ** 2 for n in numbers) /
4295             len(numbers)) ** .5
4296
4297
4298 def has_arithmetic_operator(line):
4299     """Return True if line contains any arithmetic operators."""
4300     for operator in pycodestyle.ARITHMETIC_OP:
4301         if operator in line:
4302             return True
4303
4304     return False
4305
4306
4307 def count_unbalanced_brackets(line):
4308     """Return number of unmatched open/close brackets."""
4309     count = 0
4310     for opening, closing in ['()', '[]', '{}']:
4311         count += abs(line.count(opening) - line.count(closing))
4312
4313     return count
4314
4315
4316 def split_at_offsets(line, offsets):
4317     """Split line at offsets.
4318
4319     Return list of strings.
4320
4321     """
4322     result = []
4323
4324     previous_offset = 0
4325     current_offset = 0
4326     for current_offset in sorted(offsets):
4327         if current_offset < len(line) and previous_offset != current_offset:
4328             result.append(line[previous_offset:current_offset].strip())
4329         previous_offset = current_offset
4330
4331     result.append(line[current_offset:])
4332
4333     return result
4334
4335
4336 class LineEndingWrapper(object):
4337
4338     r"""Replace line endings to work with sys.stdout.
4339
4340     It seems that sys.stdout expects only '\n' as the line ending, no matter
4341     the platform. Otherwise, we get repeated line endings.
4342
4343     """
4344
4345     def __init__(self, output):
4346         self.__output = output
4347
4348     def write(self, s):
4349         self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
4350
4351     def flush(self):
4352         self.__output.flush()
4353
4354
4355 def match_file(filename, exclude):
4356     """Return True if file is okay for modifying/recursing."""
4357     base_name = os.path.basename(filename)
4358
4359     if base_name.startswith('.'):
4360         return False
4361
4362     for pattern in exclude:
4363         if fnmatch.fnmatch(base_name, pattern):
4364             return False
4365         if fnmatch.fnmatch(filename, pattern):
4366             return False
4367
4368     if not os.path.isdir(filename) and not is_python_file(filename):
4369         return False
4370
4371     return True
4372
4373
4374 def find_files(filenames, recursive, exclude):
4375     """Yield filenames."""
4376     while filenames:
4377         name = filenames.pop(0)
4378         if recursive and os.path.isdir(name):
4379             for root, directories, children in os.walk(name):
4380                 filenames += [os.path.join(root, f) for f in children
4381                               if match_file(os.path.join(root, f),
4382                                             exclude)]
4383                 directories[:] = [d for d in directories
4384                                   if match_file(os.path.join(root, d),
4385                                                 exclude)]
4386         else:
4387             is_exclude_match = False
4388             for pattern in exclude:
4389                 if fnmatch.fnmatch(name, pattern):
4390                     is_exclude_match = True
4391                     break
4392             if not is_exclude_match:
4393                 yield name
4394
4395
4396 def _fix_file(parameters):
4397     """Helper function for optionally running fix_file() in parallel."""
4398     if parameters[1].verbose:
4399         print('[file:{}]'.format(parameters[0]), file=sys.stderr)
4400     try:
4401         return fix_file(*parameters)
4402     except IOError as error:
4403         print(str(error), file=sys.stderr)
4404         raise error
4405
4406
4407 def fix_multiple_files(filenames, options, output=None):
4408     """Fix list of files.
4409
4410     Optionally fix files recursively.
4411
4412     """
4413     results = []
4414     filenames = find_files(filenames, options.recursive, options.exclude)
4415     if options.jobs > 1:
4416         import multiprocessing
4417         pool = multiprocessing.Pool(options.jobs)
4418         rets = []
4419         for name in filenames:
4420             ret = pool.apply_async(_fix_file, ((name, options),))
4421             rets.append(ret)
4422         pool.close()
4423         pool.join()
4424         if options.diff:
4425             for r in rets:
4426                 sys.stdout.write(r.get().decode())
4427                 sys.stdout.flush()
4428         results.extend([x.get() for x in rets if x is not None])
4429     else:
4430         for name in filenames:
4431             ret = _fix_file((name, options, output))
4432             if ret is None:
4433                 continue
4434             if options.diff:
4435                 if ret != '':
4436                     results.append(ret)
4437             elif options.in_place:
4438                 results.append(ret)
4439             else:
4440                 original_source = readlines_from_file(name)
4441                 if "".join(original_source).splitlines() != ret.splitlines():
4442                     results.append(ret)
4443     return results
4444
4445
4446 def is_python_file(filename):
4447     """Return True if filename is Python file."""
4448     if filename.endswith('.py'):
4449         return True
4450
4451     try:
4452         with open_with_encoding(
4453                 filename,
4454                 limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
4455             text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
4456             if not text:
4457                 return False
4458             first_line = text.splitlines()[0]
4459     except (IOError, IndexError):
4460         return False
4461
4462     if not PYTHON_SHEBANG_REGEX.match(first_line):
4463         return False
4464
4465     return True
4466
4467
4468 def is_probably_part_of_multiline(line):
4469     """Return True if line is likely part of a multiline string.
4470
4471     When multiline strings are involved, pep8 reports the error as being
4472     at the start of the multiline string, which doesn't work for us.
4473
4474     """
4475     return (
4476         '"""' in line or
4477         "'''" in line or
4478         line.rstrip().endswith('\\')
4479     )
4480
4481
4482 def wrap_output(output, encoding):
4483     """Return output with specified encoding."""
4484     return codecs.getwriter(encoding)(output.buffer
4485                                       if hasattr(output, 'buffer')
4486                                       else output)
4487
4488
4489 def get_encoding():
4490     """Return preferred encoding."""
4491     return locale.getpreferredencoding() or sys.getdefaultencoding()
4492
4493
4494 def main(argv=None, apply_config=True):
4495     """Command-line entry."""
4496     if argv is None:
4497         argv = sys.argv
4498
4499     try:
4500         # Exit on broken pipe.
4501         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
4502     except AttributeError:  # pragma: no cover
4503         # SIGPIPE is not available on Windows.
4504         pass
4505
4506     try:
4507         args = parse_args(argv[1:], apply_config=apply_config)
4508
4509         if args.list_fixes:
4510             for code, description in sorted(supported_fixes()):
4511                 print('{code} - {description}'.format(
4512                     code=code, description=description))
4513             return EXIT_CODE_OK
4514
4515         if args.files == ['-']:
4516             assert not args.in_place
4517
4518             encoding = sys.stdin.encoding or get_encoding()
4519             read_stdin = sys.stdin.read()
4520             fixed_stdin = fix_code(read_stdin, args, encoding=encoding)
4521
4522             # LineEndingWrapper is unnecessary here due to the symmetry between
4523             # standard in and standard out.
4524             wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin)
4525
4526             if hash(read_stdin) != hash(fixed_stdin):
4527                 if args.exit_code:
4528                     return EXIT_CODE_EXISTS_DIFF
4529         else:
4530             if args.in_place or args.diff:
4531                 args.files = list(set(args.files))
4532             else:
4533                 assert len(args.files) == 1
4534                 assert not args.recursive
4535
4536             results = fix_multiple_files(args.files, args, sys.stdout)
4537             if args.diff:
4538                 ret = any([len(ret) != 0 for ret in results])
4539             else:
4540                 # with in-place option
4541                 ret = any([ret is not None for ret in results])
4542             if args.exit_code and ret:
4543                 return EXIT_CODE_EXISTS_DIFF
4544     except IOError:
4545         return EXIT_CODE_ERROR
4546     except KeyboardInterrupt:
4547         return EXIT_CODE_ERROR  # pragma: no cover
4548
4549
4550 class CachedTokenizer(object):
4551
4552     """A one-element cache around tokenize.generate_tokens().
4553
4554     Original code written by Ned Batchelder, in coverage.py.
4555
4556     """
4557
4558     def __init__(self):
4559         self.last_text = None
4560         self.last_tokens = None
4561
4562     def generate_tokens(self, text):
4563         """A stand-in for tokenize.generate_tokens()."""
4564         if text != self.last_text:
4565             string_io = io.StringIO(text)
4566             self.last_tokens = list(
4567                 tokenize.generate_tokens(string_io.readline)
4568             )
4569             self.last_text = text
4570         return self.last_tokens
4571
4572
4573 _cached_tokenizer = CachedTokenizer()
4574 generate_tokens = _cached_tokenizer.generate_tokens
4575
4576
4577 if __name__ == '__main__':
4578     sys.exit(main())