]> crepu.dev Git - config.git/blob - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/pycodestyle.py
Actualizado el Readme
[config.git] / djavu-asus / elpy / rpc-venv / lib / python3.11 / site-packages / pycodestyle.py
1 #!/usr/bin/env python
2 # pycodestyle.py - Check Python source code formatting, according to
3 # PEP 8
4 #
5 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
6 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
7 # Copyright (C) 2014-2016 Ian Lee <ianlee1521@gmail.com>
8 #
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation files
11 # (the "Software"), to deal in the Software without restriction,
12 # including without limitation the rights to use, copy, modify, merge,
13 # publish, distribute, sublicense, and/or sell copies of the Software,
14 # and to permit persons to whom the Software is furnished to do so,
15 # subject to the following conditions:
16 #
17 # The above copyright notice and this permission notice shall be
18 # included in all copies or substantial portions of the Software.
19 #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 # SOFTWARE.
28 r"""
29 Check Python source code formatting, according to PEP 8.
30
31 For usage and a list of options, try this:
32 $ python pycodestyle.py -h
33
34 This program and its regression test suite live here:
35 https://github.com/pycqa/pycodestyle
36
37 Groups of errors and warnings:
38 E errors
39 W warnings
40 100 indentation
41 200 whitespace
42 300 blank lines
43 400 imports
44 500 line length
45 600 deprecation
46 700 statements
47 900 syntax error
48 """
49 import bisect
50 import configparser
51 import inspect
52 import io
53 import keyword
54 import os
55 import re
56 import sys
57 import time
58 import tokenize
59 import warnings
60 from fnmatch import fnmatch
61 from functools import lru_cache
62 from optparse import OptionParser
63
64 # this is a performance hack. see https://bugs.python.org/issue43014
65 if (
66 sys.version_info < (3, 10) and
67 callable(getattr(tokenize, '_compile', None))
68 ): # pragma: no cover (<py310)
69 tokenize._compile = lru_cache(tokenize._compile) # type: ignore
70
71 __version__ = '2.11.0'
72
73 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
74 DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
75 try:
76 if sys.platform == 'win32': # pragma: win32 cover
77 USER_CONFIG = os.path.expanduser(r'~\.pycodestyle')
78 else: # pragma: win32 no cover
79 USER_CONFIG = os.path.join(
80 os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
81 'pycodestyle'
82 )
83 except ImportError:
84 USER_CONFIG = None
85
86 PROJECT_CONFIG = ('setup.cfg', 'tox.ini')
87 MAX_LINE_LENGTH = 79
88 # Number of blank lines between various code parts.
89 BLANK_LINES_CONFIG = {
90 # Top level class and function.
91 'top_level': 2,
92 # Methods and nested class and function.
93 'method': 1,
94 }
95 MAX_DOC_LENGTH = 72
96 INDENT_SIZE = 4
97 REPORT_FORMAT = {
98 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
99 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
100 }
101
102 PyCF_ONLY_AST = 1024
103 SINGLETONS = frozenset(['False', 'None', 'True'])
104 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
105 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
106 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-', '@'])
107 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
108 WS_NEEDED_OPERATORS = frozenset([
109 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<', '>',
110 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=',
111 'and', 'in', 'is', 'or', '->', ':='])
112 WHITESPACE = frozenset(' \t\xa0')
113 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
114 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
115 # ERRORTOKEN is triggered by backticks in Python 3
116 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
117 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
118
119 INDENT_REGEX = re.compile(r'([ \t]*)')
120 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
121 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
122 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({][ \t]|[ \t][\]}),;:](?!=)')
123 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
124 COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)'
125 r'\s*(?(1)|(None|False|True))\b')
126 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(?<!is\s)(not)\s+[^][)(}{ ]+\s+'
127 r'(in|is)\s')
128 COMPARE_TYPE_REGEX = re.compile(
129 r'[=!]=\s+type(?:\s*\(\s*([^)]*[^ )])\s*\))'
130 r'|\btype(?:\s*\(\s*([^)]*[^ )])\s*\))\s+[=!]='
131 )
132 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
133 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)')
134 LAMBDA_REGEX = re.compile(r'\blambda\b')
135 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
136 STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
137 STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)')
138 STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
139 r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
140 'def', 'async def',
141 'for', 'async for',
142 'if', 'elif', 'else',
143 'try', 'except', 'finally',
144 'with', 'async with',
145 'class',
146 'while',
147 )))
148 )
149 DUNDER_REGEX = re.compile(r"^__([^\s]+)__(?::\s*[a-zA-Z.0-9_\[\]\"]+)? = ")
150 BLANK_EXCEPT_REGEX = re.compile(r"except\s*:")
151
152 if sys.version_info >= (3, 12): # pragma: >=3.12 cover
153 FSTRING_START = tokenize.FSTRING_START
154 FSTRING_MIDDLE = tokenize.FSTRING_MIDDLE
155 FSTRING_END = tokenize.FSTRING_END
156 else: # pragma: <3.12 cover
157 FSTRING_START = FSTRING_MIDDLE = FSTRING_END = -1
158
159 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
160
161
162 def _get_parameters(function):
163 return [parameter.name
164 for parameter
165 in inspect.signature(function).parameters.values()
166 if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
167
168
169 def register_check(check, codes=None):
170 """Register a new check object."""
171 def _add_check(check, kind, codes, args):
172 if check in _checks[kind]:
173 _checks[kind][check][0].extend(codes or [])
174 else:
175 _checks[kind][check] = (codes or [''], args)
176 if inspect.isfunction(check):
177 args = _get_parameters(check)
178 if args and args[0] in ('physical_line', 'logical_line'):
179 if codes is None:
180 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
181 _add_check(check, args[0], codes, args)
182 elif inspect.isclass(check):
183 if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
184 _add_check(check, 'tree', codes, None)
185 return check
186
187
188 ########################################################################
189 # Plugins (check functions) for physical lines
190 ########################################################################
191
192 @register_check
193 def tabs_or_spaces(physical_line, indent_char):
194 r"""Never mix tabs and spaces.
195
196 The most popular way of indenting Python is with spaces only. The
197 second-most popular way is with tabs only. Code indented with a
198 mixture of tabs and spaces should be converted to using spaces
199 exclusively. When invoking the Python command line interpreter with
200 the -t option, it issues warnings about code that illegally mixes
201 tabs and spaces. When using -tt these warnings become errors.
202 These options are highly recommended!
203
204 Okay: if a == 0:\n a = 1\n b = 1
205 """
206 indent = INDENT_REGEX.match(physical_line).group(1)
207 for offset, char in enumerate(indent):
208 if char != indent_char:
209 return offset, "E101 indentation contains mixed spaces and tabs"
210
211
212 @register_check
213 def tabs_obsolete(physical_line):
214 r"""On new projects, spaces-only are strongly recommended over tabs.
215
216 Okay: if True:\n return
217 W191: if True:\n\treturn
218 """
219 indent = INDENT_REGEX.match(physical_line).group(1)
220 if '\t' in indent:
221 return indent.index('\t'), "W191 indentation contains tabs"
222
223
224 @register_check
225 def trailing_whitespace(physical_line):
226 r"""Trailing whitespace is superfluous.
227
228 The warning returned varies on whether the line itself is blank,
229 for easier filtering for those who want to indent their blank lines.
230
231 Okay: spam(1)\n#
232 W291: spam(1) \n#
233 W293: class Foo(object):\n \n bang = 12
234 """
235 physical_line = physical_line.rstrip('\n') # chr(10), newline
236 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
237 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
238 stripped = physical_line.rstrip(' \t\v')
239 if physical_line != stripped:
240 if stripped:
241 return len(stripped), "W291 trailing whitespace"
242 else:
243 return 0, "W293 blank line contains whitespace"
244
245
246 @register_check
247 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
248 r"""Trailing blank lines are superfluous.
249
250 Okay: spam(1)
251 W391: spam(1)\n
252
253 However the last line should end with a new line (warning W292).
254 """
255 if line_number == total_lines:
256 stripped_last_line = physical_line.rstrip('\r\n')
257 if physical_line and not stripped_last_line:
258 return 0, "W391 blank line at end of file"
259 if stripped_last_line == physical_line:
260 return len(lines[-1]), "W292 no newline at end of file"
261
262
263 @register_check
264 def maximum_line_length(physical_line, max_line_length, multiline,
265 line_number, noqa):
266 r"""Limit all lines to a maximum of 79 characters.
267
268 There are still many devices around that are limited to 80 character
269 lines; plus, limiting windows to 80 characters makes it possible to
270 have several windows side-by-side. The default wrapping on such
271 devices looks ugly. Therefore, please limit all lines to a maximum
272 of 79 characters. For flowing long blocks of text (docstrings or
273 comments), limiting the length to 72 characters is recommended.
274
275 Reports error E501.
276 """
277 line = physical_line.rstrip()
278 length = len(line)
279 if length > max_line_length and not noqa:
280 # Special case: ignore long shebang lines.
281 if line_number == 1 and line.startswith('#!'):
282 return
283 # Special case for long URLs in multi-line docstrings or
284 # comments, but still report the error when the 72 first chars
285 # are whitespaces.
286 chunks = line.split()
287 if ((len(chunks) == 1 and multiline) or
288 (len(chunks) == 2 and chunks[0] == '#')) and \
289 len(line) - len(chunks[-1]) < max_line_length - 7:
290 return
291 if length > max_line_length:
292 return (max_line_length, "E501 line too long "
293 "(%d > %d characters)" % (length, max_line_length))
294
295
296 ########################################################################
297 # Plugins (check functions) for logical lines
298 ########################################################################
299
300
301 def _is_one_liner(logical_line, indent_level, lines, line_number):
302 if not STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
303 return False
304
305 line_idx = line_number - 1
306
307 if line_idx < 1:
308 prev_indent = 0
309 else:
310 prev_indent = expand_indent(lines[line_idx - 1])
311
312 if prev_indent > indent_level:
313 return False
314
315 while line_idx < len(lines):
316 line = lines[line_idx].strip()
317 if not line.startswith('@') and STARTSWITH_TOP_LEVEL_REGEX.match(line):
318 break
319 else:
320 line_idx += 1
321 else:
322 return False # invalid syntax: EOF while searching for def/class
323
324 next_idx = line_idx + 1
325 while next_idx < len(lines):
326 if lines[next_idx].strip():
327 break
328 else:
329 next_idx += 1
330 else:
331 return True # line is last in the file
332
333 return expand_indent(lines[next_idx]) <= indent_level
334
335
336 @register_check
337 def blank_lines(logical_line, blank_lines, indent_level, line_number,
338 blank_before, previous_logical,
339 previous_unindented_logical_line, previous_indent_level,
340 lines):
341 r"""Separate top-level function and class definitions with two blank
342 lines.
343
344 Method definitions inside a class are separated by a single blank
345 line.
346
347 Extra blank lines may be used (sparingly) to separate groups of
348 related functions. Blank lines may be omitted between a bunch of
349 related one-liners (e.g. a set of dummy implementations).
350
351 Use blank lines in functions, sparingly, to indicate logical
352 sections.
353
354 Okay: def a():\n pass\n\n\ndef b():\n pass
355 Okay: def a():\n pass\n\n\nasync def b():\n pass
356 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
357 Okay: default = 1\nfoo = 1
358 Okay: classify = 1\nfoo = 1
359
360 E301: class Foo:\n b = 0\n def bar():\n pass
361 E302: def a():\n pass\n\ndef b(n):\n pass
362 E302: def a():\n pass\n\nasync def b(n):\n pass
363 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
364 E303: def a():\n\n\n\n pass
365 E304: @decorator\n\ndef a():\n pass
366 E305: def a():\n pass\na()
367 E306: def a():\n def b():\n pass\n def c():\n pass
368 """ # noqa
369 top_level_lines = BLANK_LINES_CONFIG['top_level']
370 method_lines = BLANK_LINES_CONFIG['method']
371
372 if not previous_logical and blank_before < top_level_lines:
373 return # Don't expect blank lines before the first line
374 if previous_logical.startswith('@'):
375 if blank_lines:
376 yield 0, "E304 blank lines found after function decorator"
377 elif (blank_lines > top_level_lines or
378 (indent_level and blank_lines == method_lines + 1)
379 ):
380 yield 0, "E303 too many blank lines (%d)" % blank_lines
381 elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
382 # allow a group of one-liners
383 if (
384 _is_one_liner(logical_line, indent_level, lines, line_number) and
385 blank_before == 0
386 ):
387 return
388 if indent_level:
389 if not (blank_before == method_lines or
390 previous_indent_level < indent_level or
391 DOCSTRING_REGEX.match(previous_logical)
392 ):
393 ancestor_level = indent_level
394 nested = False
395 # Search backwards for a def ancestor or tree root
396 # (top level).
397 for line in lines[line_number - top_level_lines::-1]:
398 if line.strip() and expand_indent(line) < ancestor_level:
399 ancestor_level = expand_indent(line)
400 nested = STARTSWITH_DEF_REGEX.match(line.lstrip())
401 if nested or ancestor_level == 0:
402 break
403 if nested:
404 yield 0, "E306 expected %s blank line before a " \
405 "nested definition, found 0" % (method_lines,)
406 else:
407 yield 0, "E301 expected {} blank line, found 0".format(
408 method_lines)
409 elif blank_before != top_level_lines:
410 yield 0, "E302 expected %s blank lines, found %d" % (
411 top_level_lines, blank_before)
412 elif (logical_line and
413 not indent_level and
414 blank_before != top_level_lines and
415 previous_unindented_logical_line.startswith(('def ', 'class '))
416 ):
417 yield 0, "E305 expected %s blank lines after " \
418 "class or function definition, found %d" % (
419 top_level_lines, blank_before)
420
421
422 @register_check
423 def extraneous_whitespace(logical_line):
424 r"""Avoid extraneous whitespace.
425
426 Avoid extraneous whitespace in these situations:
427 - Immediately inside parentheses, brackets or braces.
428 - Immediately before a comma, semicolon, or colon.
429
430 Okay: spam(ham[1], {eggs: 2})
431 E201: spam( ham[1], {eggs: 2})
432 E201: spam(ham[ 1], {eggs: 2})
433 E201: spam(ham[1], { eggs: 2})
434 E202: spam(ham[1], {eggs: 2} )
435 E202: spam(ham[1 ], {eggs: 2})
436 E202: spam(ham[1], {eggs: 2 })
437
438 E203: if x == 4: print x, y; x, y = y , x
439 E203: if x == 4: print x, y ; x, y = y, x
440 E203: if x == 4 : print x, y; x, y = y, x
441 """
442 line = logical_line
443 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
444 text = match.group()
445 char = text.strip()
446 found = match.start()
447 if text[-1].isspace():
448 # assert char in '([{'
449 yield found + 1, "E201 whitespace after '%s'" % char
450 elif line[found - 1] != ',':
451 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
452 yield found, f"{code} whitespace before '{char}'"
453
454
455 @register_check
456 def whitespace_around_keywords(logical_line):
457 r"""Avoid extraneous whitespace around keywords.
458
459 Okay: True and False
460 E271: True and False
461 E272: True and False
462 E273: True and\tFalse
463 E274: True\tand False
464 """
465 for match in KEYWORD_REGEX.finditer(logical_line):
466 before, after = match.groups()
467
468 if '\t' in before:
469 yield match.start(1), "E274 tab before keyword"
470 elif len(before) > 1:
471 yield match.start(1), "E272 multiple spaces before keyword"
472
473 if '\t' in after:
474 yield match.start(2), "E273 tab after keyword"
475 elif len(after) > 1:
476 yield match.start(2), "E271 multiple spaces after keyword"
477
478
479 @register_check
480 def missing_whitespace_after_keyword(logical_line, tokens):
481 r"""Keywords should be followed by whitespace.
482
483 Okay: from foo import (bar, baz)
484 E275: from foo import(bar, baz)
485 E275: from importable.module import(bar, baz)
486 E275: if(foo): bar
487 """
488 for tok0, tok1 in zip(tokens, tokens[1:]):
489 # This must exclude the True/False/None singletons, which can
490 # appear e.g. as "if x is None:", and async/await, which were
491 # valid identifier names in old Python versions.
492 if (tok0.end == tok1.start and
493 keyword.iskeyword(tok0.string) and
494 tok0.string not in SINGLETONS and
495 not (tok0.string == 'except' and tok1.string == '*') and
496 not (tok0.string == 'yield' and tok1.string == ')') and
497 tok1.string not in ':\n'):
498 yield tok0.end, "E275 missing whitespace after keyword"
499
500
501 @register_check
502 def indentation(logical_line, previous_logical, indent_char,
503 indent_level, previous_indent_level,
504 indent_size):
505 r"""Use indent_size (PEP8 says 4) spaces per indentation level.
506
507 For really old code that you don't want to mess up, you can continue
508 to use 8-space tabs.
509
510 Okay: a = 1
511 Okay: if a == 0:\n a = 1
512 E111: a = 1
513 E114: # a = 1
514
515 Okay: for item in items:\n pass
516 E112: for item in items:\npass
517 E115: for item in items:\n# Hi\n pass
518
519 Okay: a = 1\nb = 2
520 E113: a = 1\n b = 2
521 E116: a = 1\n # b = 2
522 """
523 c = 0 if logical_line else 3
524 tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
525 if indent_level % indent_size:
526 yield 0, tmpl % (
527 1 + c,
528 "indentation is not a multiple of " + str(indent_size),
529 )
530 indent_expect = previous_logical.endswith(':')
531 if indent_expect and indent_level <= previous_indent_level:
532 yield 0, tmpl % (2 + c, "expected an indented block")
533 elif not indent_expect and indent_level > previous_indent_level:
534 yield 0, tmpl % (3 + c, "unexpected indentation")
535
536 if indent_expect:
537 expected_indent_amount = 8 if indent_char == '\t' else 4
538 expected_indent_level = previous_indent_level + expected_indent_amount
539 if indent_level > expected_indent_level:
540 yield 0, tmpl % (7, 'over-indented')
541
542
543 @register_check
544 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
545 indent_char, indent_size, noqa, verbose):
546 r"""Continuation lines indentation.
547
548 Continuation lines should align wrapped elements either vertically
549 using Python's implicit line joining inside parentheses, brackets
550 and braces, or using a hanging indent.
551
552 When using a hanging indent these considerations should be applied:
553 - there should be no arguments on the first line, and
554 - further indentation should be used to clearly distinguish itself
555 as a continuation line.
556
557 Okay: a = (\n)
558 E123: a = (\n )
559
560 Okay: a = (\n 42)
561 E121: a = (\n 42)
562 E122: a = (\n42)
563 E123: a = (\n 42\n )
564 E124: a = (24,\n 42\n)
565 E125: if (\n b):\n pass
566 E126: a = (\n 42)
567 E127: a = (24,\n 42)
568 E128: a = (24,\n 42)
569 E129: if (a or\n b):\n pass
570 E131: a = (\n 42\n 24)
571 """
572 first_row = tokens[0][2][0]
573 nrows = 1 + tokens[-1][2][0] - first_row
574 if noqa or nrows == 1:
575 return
576
577 # indent_next tells us whether the next block is indented; assuming
578 # that it is indented by 4 spaces, then we should not allow 4-space
579 # indents on the final continuation line; in turn, some other
580 # indents are allowed to have an extra 4 spaces.
581 indent_next = logical_line.endswith(':')
582
583 row = depth = 0
584 valid_hangs = (indent_size,) if indent_char != '\t' \
585 else (indent_size, indent_size * 2)
586 # remember how many brackets were opened on each line
587 parens = [0] * nrows
588 # relative indents of physical lines
589 rel_indent = [0] * nrows
590 # for each depth, collect a list of opening rows
591 open_rows = [[0]]
592 # for each depth, memorize the hanging indentation
593 hangs = [None]
594 # visual indents
595 indent_chances = {}
596 last_indent = tokens[0][2]
597 visual_indent = None
598 last_token_multiline = False
599 # for each depth, memorize the visual indent column
600 indent = [last_indent[1]]
601 if verbose >= 3:
602 print(">>> " + tokens[0][4].rstrip())
603
604 for token_type, text, start, end, line in tokens:
605
606 newline = row < start[0] - first_row
607 if newline:
608 row = start[0] - first_row
609 newline = not last_token_multiline and token_type not in NEWLINE
610
611 if newline:
612 # this is the beginning of a continuation line.
613 last_indent = start
614 if verbose >= 3:
615 print("... " + line.rstrip())
616
617 # record the initial indent.
618 rel_indent[row] = expand_indent(line) - indent_level
619
620 # identify closing bracket
621 close_bracket = (token_type == tokenize.OP and text in ']})')
622
623 # is the indent relative to an opening bracket line?
624 for open_row in reversed(open_rows[depth]):
625 hang = rel_indent[row] - rel_indent[open_row]
626 hanging_indent = hang in valid_hangs
627 if hanging_indent:
628 break
629 if hangs[depth]:
630 hanging_indent = (hang == hangs[depth])
631 # is there any chance of visual indent?
632 visual_indent = (not close_bracket and hang > 0 and
633 indent_chances.get(start[1]))
634
635 if close_bracket and indent[depth]:
636 # closing bracket for visual indent
637 if start[1] != indent[depth]:
638 yield (start, "E124 closing bracket does not match "
639 "visual indentation")
640 elif close_bracket and not hang:
641 # closing bracket matches indentation of opening
642 # bracket's line
643 if hang_closing:
644 yield start, "E133 closing bracket is missing indentation"
645 elif indent[depth] and start[1] < indent[depth]:
646 if visual_indent is not True:
647 # visual indent is broken
648 yield (start, "E128 continuation line "
649 "under-indented for visual indent")
650 elif hanging_indent or (indent_next and
651 rel_indent[row] == 2 * indent_size):
652 # hanging indent is verified
653 if close_bracket and not hang_closing:
654 yield (start, "E123 closing bracket does not match "
655 "indentation of opening bracket's line")
656 hangs[depth] = hang
657 elif visual_indent is True:
658 # visual indent is verified
659 indent[depth] = start[1]
660 elif visual_indent in (text, str):
661 # ignore token lined up with matching one from a
662 # previous line
663 pass
664 else:
665 # indent is broken
666 if hang <= 0:
667 error = "E122", "missing indentation or outdented"
668 elif indent[depth]:
669 error = "E127", "over-indented for visual indent"
670 elif not close_bracket and hangs[depth]:
671 error = "E131", "unaligned for hanging indent"
672 else:
673 hangs[depth] = hang
674 if hang > indent_size:
675 error = "E126", "over-indented for hanging indent"
676 else:
677 error = "E121", "under-indented for hanging indent"
678 yield start, "%s continuation line %s" % error
679
680 # look for visual indenting
681 if (parens[row] and
682 token_type not in (tokenize.NL, tokenize.COMMENT) and
683 not indent[depth]):
684 indent[depth] = start[1]
685 indent_chances[start[1]] = True
686 if verbose >= 4:
687 print(f"bracket depth {depth} indent to {start[1]}")
688 # deal with implicit string concatenation
689 elif token_type in (tokenize.STRING, tokenize.COMMENT, FSTRING_START):
690 indent_chances[start[1]] = str
691 # visual indent after assert/raise/with
692 elif not row and not depth and text in ["assert", "raise", "with"]:
693 indent_chances[end[1] + 1] = True
694 # special case for the "if" statement because len("if (") == 4
695 elif not indent_chances and not row and not depth and text == 'if':
696 indent_chances[end[1] + 1] = True
697 elif text == ':' and line[end[1]:].isspace():
698 open_rows[depth].append(row)
699
700 # keep track of bracket depth
701 if token_type == tokenize.OP:
702 if text in '([{':
703 depth += 1
704 indent.append(0)
705 hangs.append(None)
706 if len(open_rows) == depth:
707 open_rows.append([])
708 open_rows[depth].append(row)
709 parens[row] += 1
710 if verbose >= 4:
711 print("bracket depth %s seen, col %s, visual min = %s" %
712 (depth, start[1], indent[depth]))
713 elif text in ')]}' and depth > 0:
714 # parent indents should not be more than this one
715 prev_indent = indent.pop() or last_indent[1]
716 hangs.pop()
717 for d in range(depth):
718 if indent[d] > prev_indent:
719 indent[d] = 0
720 for ind in list(indent_chances):
721 if ind >= prev_indent:
722 del indent_chances[ind]
723 del open_rows[depth + 1:]
724 depth -= 1
725 if depth:
726 indent_chances[indent[depth]] = True
727 for idx in range(row, -1, -1):
728 if parens[idx]:
729 parens[idx] -= 1
730 break
731 assert len(indent) == depth + 1
732 if start[1] not in indent_chances:
733 # allow lining up tokens
734 indent_chances[start[1]] = text
735
736 last_token_multiline = (start[0] != end[0])
737 if last_token_multiline:
738 rel_indent[end[0] - first_row] = rel_indent[row]
739
740 if indent_next and expand_indent(line) == indent_level + indent_size:
741 pos = (start[0], indent[0] + indent_size)
742 if visual_indent:
743 code = "E129 visually indented line"
744 else:
745 code = "E125 continuation line"
746 yield pos, "%s with same indent as next logical line" % code
747
748
749 @register_check
750 def whitespace_before_parameters(logical_line, tokens):
751 r"""Avoid extraneous whitespace.
752
753 Avoid extraneous whitespace in the following situations:
754 - before the open parenthesis that starts the argument list of a
755 function call.
756 - before the open parenthesis that starts an indexing or slicing.
757
758 Okay: spam(1)
759 E211: spam (1)
760
761 Okay: dict['key'] = list[index]
762 E211: dict ['key'] = list[index]
763 E211: dict['key'] = list [index]
764 """
765 prev_type, prev_text, __, prev_end, __ = tokens[0]
766 for index in range(1, len(tokens)):
767 token_type, text, start, end, __ = tokens[index]
768 if (
769 token_type == tokenize.OP and
770 text in '([' and
771 start != prev_end and
772 (prev_type == tokenize.NAME or prev_text in '}])') and
773 # Syntax "class A (B):" is allowed, but avoid it
774 (index < 2 or tokens[index - 2][1] != 'class') and
775 # Allow "return (a.foo for a in range(5))"
776 not keyword.iskeyword(prev_text) and
777 (
778 sys.version_info < (3, 9) or
779 # 3.12+: type is a soft keyword but no braces after
780 prev_text == 'type' or
781 not keyword.issoftkeyword(prev_text)
782 )
783 ):
784 yield prev_end, "E211 whitespace before '%s'" % text
785 prev_type = token_type
786 prev_text = text
787 prev_end = end
788
789
790 @register_check
791 def whitespace_around_operator(logical_line):
792 r"""Avoid extraneous whitespace around an operator.
793
794 Okay: a = 12 + 3
795 E221: a = 4 + 5
796 E222: a = 4 + 5
797 E223: a = 4\t+ 5
798 E224: a = 4 +\t5
799 """
800 for match in OPERATOR_REGEX.finditer(logical_line):
801 before, after = match.groups()
802
803 if '\t' in before:
804 yield match.start(1), "E223 tab before operator"
805 elif len(before) > 1:
806 yield match.start(1), "E221 multiple spaces before operator"
807
808 if '\t' in after:
809 yield match.start(2), "E224 tab after operator"
810 elif len(after) > 1:
811 yield match.start(2), "E222 multiple spaces after operator"
812
813
814 @register_check
815 def missing_whitespace(logical_line, tokens):
816 r"""Surround operators with the correct amount of whitespace.
817
818 - Always surround these binary operators with a single space on
819 either side: assignment (=), augmented assignment (+=, -= etc.),
820 comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
821 Booleans (and, or, not).
822
823 - Each comma, semicolon or colon should be followed by whitespace.
824
825 - If operators with different priorities are used, consider adding
826 whitespace around the operators with the lowest priorities.
827
828 Okay: i = i + 1
829 Okay: submitted += 1
830 Okay: x = x * 2 - 1
831 Okay: hypot2 = x * x + y * y
832 Okay: c = (a + b) * (a - b)
833 Okay: foo(bar, key='word', *args, **kwargs)
834 Okay: alpha[:-i]
835 Okay: [a, b]
836 Okay: (3,)
837 Okay: a[3,] = 1
838 Okay: a[1:4]
839 Okay: a[:4]
840 Okay: a[1:]
841 Okay: a[1:4:2]
842
843 E225: i=i+1
844 E225: submitted +=1
845 E225: x = x /2 - 1
846 E225: z = x **y
847 E225: z = 1and 1
848 E226: c = (a+b) * (a-b)
849 E226: hypot2 = x*x + y*y
850 E227: c = a|b
851 E228: msg = fmt%(errno, errmsg)
852 E231: ['a','b']
853 E231: foo(bar,baz)
854 E231: [{'a':'b'}]
855 """
856 need_space = False
857 prev_type = tokenize.OP
858 prev_text = prev_end = None
859 operator_types = (tokenize.OP, tokenize.NAME)
860 brace_stack = []
861 for token_type, text, start, end, line in tokens:
862 if token_type == tokenize.OP and text in {'[', '(', '{'}:
863 brace_stack.append(text)
864 elif token_type == FSTRING_START: # pragma: >=3.12 cover
865 brace_stack.append('f')
866 elif token_type == tokenize.NAME and text == 'lambda':
867 brace_stack.append('l')
868 elif brace_stack:
869 if token_type == tokenize.OP and text in {']', ')', '}'}:
870 brace_stack.pop()
871 elif token_type == FSTRING_END: # pragma: >=3.12 cover
872 brace_stack.pop()
873 elif (
874 brace_stack[-1] == 'l' and
875 token_type == tokenize.OP and
876 text == ':'
877 ):
878 brace_stack.pop()
879
880 if token_type in SKIP_COMMENTS:
881 continue
882
883 if token_type == tokenize.OP and text in {',', ';', ':'}:
884 next_char = line[end[1]:end[1] + 1]
885 if next_char not in WHITESPACE and next_char not in '\r\n':
886 # slice
887 if text == ':' and brace_stack[-1:] == ['[']:
888 pass
889 # 3.12+ fstring format specifier
890 elif text == ':' and brace_stack[-2:] == ['f', '{']: # pragma: >=3.12 cover # noqa: E501
891 pass
892 # tuple (and list for some reason?)
893 elif text == ',' and next_char in ')]':
894 pass
895 else:
896 yield start, f'E231 missing whitespace after {text!r}'
897
898 if need_space:
899 if start != prev_end:
900 # Found a (probably) needed space
901 if need_space is not True and not need_space[1]:
902 yield (need_space[0],
903 "E225 missing whitespace around operator")
904 need_space = False
905 elif (
906 # def f(a, /, b):
907 # ^
908 # def f(a, b, /):
909 # ^
910 # f = lambda a, /:
911 # ^
912 prev_text == '/' and text in {',', ')', ':'} or
913 # def f(a, b, /):
914 # ^
915 prev_text == ')' and text == ':'
916 ):
917 # Tolerate the "/" operator in function definition
918 # For more info see PEP570
919 pass
920 else:
921 if need_space is True or need_space[1]:
922 # A needed trailing space was not found
923 yield prev_end, "E225 missing whitespace around operator"
924 elif prev_text != '**':
925 code, optype = 'E226', 'arithmetic'
926 if prev_text == '%':
927 code, optype = 'E228', 'modulo'
928 elif prev_text not in ARITHMETIC_OP:
929 code, optype = 'E227', 'bitwise or shift'
930 yield (need_space[0], "%s missing whitespace "
931 "around %s operator" % (code, optype))
932 need_space = False
933 elif token_type in operator_types and prev_end is not None:
934 if (
935 text == '=' and (
936 # allow lambda default args: lambda x=None: None
937 brace_stack[-1:] == ['l'] or
938 # allow keyword args or defaults: foo(bar=None).
939 brace_stack[-1:] == ['('] or
940 # allow python 3.8 fstring repr specifier
941 brace_stack[-2:] == ['f', '{']
942 )
943 ):
944 pass
945 elif text in WS_NEEDED_OPERATORS:
946 need_space = True
947 elif text in UNARY_OPERATORS:
948 # Check if the operator is used as a binary operator
949 # Allow unary operators: -123, -x, +1.
950 # Allow argument unpacking: foo(*args, **kwargs).
951 if prev_type == tokenize.OP and prev_text in '}])' or (
952 prev_type != tokenize.OP and
953 prev_text not in KEYWORDS and (
954 sys.version_info < (3, 9) or
955 not keyword.issoftkeyword(prev_text)
956 )
957 ):
958 need_space = None
959 elif text in WS_OPTIONAL_OPERATORS:
960 need_space = None
961
962 if need_space is None:
963 # Surrounding space is optional, but ensure that
964 # trailing space matches opening space
965 need_space = (prev_end, start != prev_end)
966 elif need_space and start == prev_end:
967 # A needed opening space was not found
968 yield prev_end, "E225 missing whitespace around operator"
969 need_space = False
970 prev_type = token_type
971 prev_text = text
972 prev_end = end
973
974
975 @register_check
976 def whitespace_around_comma(logical_line):
977 r"""Avoid extraneous whitespace after a comma or a colon.
978
979 Note: these checks are disabled by default
980
981 Okay: a = (1, 2)
982 E241: a = (1, 2)
983 E242: a = (1,\t2)
984 """
985 line = logical_line
986 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
987 found = m.start() + 1
988 if '\t' in m.group():
989 yield found, "E242 tab after '%s'" % m.group()[0]
990 else:
991 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
992
993
994 @register_check
995 def whitespace_around_named_parameter_equals(logical_line, tokens):
996 r"""Don't use spaces around the '=' sign in function arguments.
997
998 Don't use spaces around the '=' sign when used to indicate a
999 keyword argument or a default parameter value, except when
1000 using a type annotation.
1001
1002 Okay: def complex(real, imag=0.0):
1003 Okay: return magic(r=real, i=imag)
1004 Okay: boolean(a == b)
1005 Okay: boolean(a != b)
1006 Okay: boolean(a <= b)
1007 Okay: boolean(a >= b)
1008 Okay: def foo(arg: int = 42):
1009 Okay: async def foo(arg: int = 42):
1010
1011 E251: def complex(real, imag = 0.0):
1012 E251: return magic(r = real, i = imag)
1013 E252: def complex(real, image: float=0.0):
1014 """
1015 parens = 0
1016 no_space = False
1017 require_space = False
1018 prev_end = None
1019 annotated_func_arg = False
1020 in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line))
1021
1022 message = "E251 unexpected spaces around keyword / parameter equals"
1023 missing_message = "E252 missing whitespace around parameter equals"
1024
1025 for token_type, text, start, end, line in tokens:
1026 if token_type == tokenize.NL:
1027 continue
1028 if no_space:
1029 no_space = False
1030 if start != prev_end:
1031 yield (prev_end, message)
1032 if require_space:
1033 require_space = False
1034 if start == prev_end:
1035 yield (prev_end, missing_message)
1036 if token_type == tokenize.OP:
1037 if text in '([':
1038 parens += 1
1039 elif text in ')]':
1040 parens -= 1
1041 elif in_def and text == ':' and parens == 1:
1042 annotated_func_arg = True
1043 elif parens == 1 and text == ',':
1044 annotated_func_arg = False
1045 elif parens and text == '=':
1046 if annotated_func_arg and parens == 1:
1047 require_space = True
1048 if start == prev_end:
1049 yield (prev_end, missing_message)
1050 else:
1051 no_space = True
1052 if start != prev_end:
1053 yield (prev_end, message)
1054 if not parens:
1055 annotated_func_arg = False
1056
1057 prev_end = end
1058
1059
1060 @register_check
1061 def whitespace_before_comment(logical_line, tokens):
1062 """Separate inline comments by at least two spaces.
1063
1064 An inline comment is a comment on the same line as a statement.
1065 Inline comments should be separated by at least two spaces from the
1066 statement. They should start with a # and a single space.
1067
1068 Each line of a block comment starts with a # and one or multiple
1069 spaces as there can be indented text inside the comment.
1070
1071 Okay: x = x + 1 # Increment x
1072 Okay: x = x + 1 # Increment x
1073 Okay: # Block comments:
1074 Okay: # - Block comment list
1075 Okay: # \xa0- Block comment list
1076 E261: x = x + 1 # Increment x
1077 E262: x = x + 1 #Increment x
1078 E262: x = x + 1 # Increment x
1079 E262: x = x + 1 # \xa0Increment x
1080 E265: #Block comment
1081 E266: ### Block comment
1082 """
1083 prev_end = (0, 0)
1084 for token_type, text, start, end, line in tokens:
1085 if token_type == tokenize.COMMENT:
1086 inline_comment = line[:start[1]].strip()
1087 if inline_comment:
1088 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
1089 yield (prev_end,
1090 "E261 at least two spaces before inline comment")
1091 symbol, sp, comment = text.partition(' ')
1092 bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
1093 if inline_comment:
1094 if bad_prefix or comment[:1] in WHITESPACE:
1095 yield start, "E262 inline comment should start with '# '"
1096 elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
1097 if bad_prefix != '#':
1098 yield start, "E265 block comment should start with '# '"
1099 elif comment:
1100 yield start, "E266 too many leading '#' for block comment"
1101 elif token_type != tokenize.NL:
1102 prev_end = end
1103
1104
1105 @register_check
1106 def imports_on_separate_lines(logical_line):
1107 r"""Place imports on separate lines.
1108
1109 Okay: import os\nimport sys
1110 E401: import sys, os
1111
1112 Okay: from subprocess import Popen, PIPE
1113 Okay: from myclas import MyClass
1114 Okay: from foo.bar.yourclass import YourClass
1115 Okay: import myclass
1116 Okay: import foo.bar.yourclass
1117 """
1118 line = logical_line
1119 if line.startswith('import '):
1120 found = line.find(',')
1121 if -1 < found and ';' not in line[:found]:
1122 yield found, "E401 multiple imports on one line"
1123
1124
1125 @register_check
1126 def module_imports_on_top_of_file(
1127 logical_line, indent_level, checker_state, noqa):
1128 r"""Place imports at the top of the file.
1129
1130 Always put imports at the top of the file, just after any module
1131 comments and docstrings, and before module globals and constants.
1132
1133 Okay: import os
1134 Okay: # this is a comment\nimport os
1135 Okay: '''this is a module docstring'''\nimport os
1136 Okay: r'''this is a module docstring'''\nimport os
1137 E402: a=1\nimport os
1138 E402: 'One string'\n"Two string"\nimport os
1139 E402: a=1\nfrom sys import x
1140
1141 Okay: if x:\n import os
1142 """ # noqa
1143 def is_string_literal(line):
1144 if line[0] in 'uUbB':
1145 line = line[1:]
1146 if line and line[0] in 'rR':
1147 line = line[1:]
1148 return line and (line[0] == '"' or line[0] == "'")
1149
1150 allowed_keywords = (
1151 'try', 'except', 'else', 'finally', 'with', 'if', 'elif')
1152
1153 if indent_level: # Allow imports in conditional statement/function
1154 return
1155 if not logical_line: # Allow empty lines or comments
1156 return
1157 if noqa:
1158 return
1159 line = logical_line
1160 if line.startswith('import ') or line.startswith('from '):
1161 if checker_state.get('seen_non_imports', False):
1162 yield 0, "E402 module level import not at top of file"
1163 elif re.match(DUNDER_REGEX, line):
1164 return
1165 elif any(line.startswith(kw) for kw in allowed_keywords):
1166 # Allow certain keywords intermixed with imports in order to
1167 # support conditional or filtered importing
1168 return
1169 elif is_string_literal(line):
1170 # The first literal is a docstring, allow it. Otherwise, report
1171 # error.
1172 if checker_state.get('seen_docstring', False):
1173 checker_state['seen_non_imports'] = True
1174 else:
1175 checker_state['seen_docstring'] = True
1176 else:
1177 checker_state['seen_non_imports'] = True
1178
1179
1180 @register_check
1181 def compound_statements(logical_line):
1182 r"""Compound statements (on the same line) are generally
1183 discouraged.
1184
1185 While sometimes it's okay to put an if/for/while with a small body
1186 on the same line, never do this for multi-clause statements.
1187 Also avoid folding such long lines!
1188
1189 Always use a def statement instead of an assignment statement that
1190 binds a lambda expression directly to a name.
1191
1192 Okay: if foo == 'blah':\n do_blah_thing()
1193 Okay: do_one()
1194 Okay: do_two()
1195 Okay: do_three()
1196
1197 E701: if foo == 'blah': do_blah_thing()
1198 E701: for x in lst: total += x
1199 E701: while t < 10: t = delay()
1200 E701: if foo == 'blah': do_blah_thing()
1201 E701: else: do_non_blah_thing()
1202 E701: try: something()
1203 E701: finally: cleanup()
1204 E701: if foo == 'blah': one(); two(); three()
1205 E702: do_one(); do_two(); do_three()
1206 E703: do_four(); # useless semicolon
1207 E704: def f(x): return 2*x
1208 E731: f = lambda x: 2*x
1209 """
1210 line = logical_line
1211 last_char = len(line) - 1
1212 found = line.find(':')
1213 prev_found = 0
1214 counts = {char: 0 for char in '{}[]()'}
1215 while -1 < found < last_char:
1216 update_counts(line[prev_found:found], counts)
1217 if (
1218 counts['{'] <= counts['}'] and # {'a': 1} (dict)
1219 counts['['] <= counts[']'] and # [1:2] (slice)
1220 counts['('] <= counts[')'] and # (annotation)
1221 line[found + 1] != '=' # assignment expression
1222 ):
1223 lambda_kw = LAMBDA_REGEX.search(line, 0, found)
1224 if lambda_kw:
1225 before = line[:lambda_kw.start()].rstrip()
1226 if before[-1:] == '=' and before[:-1].strip().isidentifier():
1227 yield 0, ("E731 do not assign a lambda expression, use a "
1228 "def")
1229 break
1230 if STARTSWITH_DEF_REGEX.match(line):
1231 yield 0, "E704 multiple statements on one line (def)"
1232 elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
1233 yield found, "E701 multiple statements on one line (colon)"
1234 prev_found = found
1235 found = line.find(':', found + 1)
1236 found = line.find(';')
1237 while -1 < found:
1238 if found < last_char:
1239 yield found, "E702 multiple statements on one line (semicolon)"
1240 else:
1241 yield found, "E703 statement ends with a semicolon"
1242 found = line.find(';', found + 1)
1243
1244
1245 @register_check
1246 def explicit_line_join(logical_line, tokens):
1247 r"""Avoid explicit line join between brackets.
1248
1249 The preferred way of wrapping long lines is by using Python's
1250 implied line continuation inside parentheses, brackets and braces.
1251 Long lines can be broken over multiple lines by wrapping expressions
1252 in parentheses. These should be used in preference to using a
1253 backslash for line continuation.
1254
1255 E502: aaa = [123, \\n 123]
1256 E502: aaa = ("bbb " \\n "ccc")
1257
1258 Okay: aaa = [123,\n 123]
1259 Okay: aaa = ("bbb "\n "ccc")
1260 Okay: aaa = "bbb " \\n "ccc"
1261 Okay: aaa = 123 # \\
1262 """
1263 prev_start = prev_end = parens = 0
1264 comment = False
1265 backslash = None
1266 for token_type, text, start, end, line in tokens:
1267 if token_type == tokenize.COMMENT:
1268 comment = True
1269 if start[0] != prev_start and parens and backslash and not comment:
1270 yield backslash, "E502 the backslash is redundant between brackets"
1271 if end[0] != prev_end:
1272 if line.rstrip('\r\n').endswith('\\'):
1273 backslash = (end[0], len(line.splitlines()[-1]) - 1)
1274 else:
1275 backslash = None
1276 prev_start = prev_end = end[0]
1277 else:
1278 prev_start = start[0]
1279 if token_type == tokenize.OP:
1280 if text in '([{':
1281 parens += 1
1282 elif text in ')]}':
1283 parens -= 1
1284
1285
1286 # The % character is strictly speaking a binary operator, but the
1287 # common usage seems to be to put it next to the format parameters,
1288 # after a line break.
1289 _SYMBOLIC_OPS = frozenset("()[]{},:.;@=%~") | frozenset(("...",))
1290
1291
1292 def _is_binary_operator(token_type, text):
1293 return (
1294 token_type == tokenize.OP or
1295 text in {'and', 'or'}
1296 ) and (
1297 text not in _SYMBOLIC_OPS
1298 )
1299
1300
1301 def _break_around_binary_operators(tokens):
1302 """Private function to reduce duplication.
1303
1304 This factors out the shared details between
1305 :func:`break_before_binary_operator` and
1306 :func:`break_after_binary_operator`.
1307 """
1308 line_break = False
1309 unary_context = True
1310 # Previous non-newline token types and text
1311 previous_token_type = None
1312 previous_text = None
1313 for token_type, text, start, end, line in tokens:
1314 if token_type == tokenize.COMMENT:
1315 continue
1316 if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
1317 line_break = True
1318 else:
1319 yield (token_type, text, previous_token_type, previous_text,
1320 line_break, unary_context, start)
1321 unary_context = text in '([{,;'
1322 line_break = False
1323 previous_token_type = token_type
1324 previous_text = text
1325
1326
1327 @register_check
1328 def break_before_binary_operator(logical_line, tokens):
1329 r"""
1330 Avoid breaks before binary operators.
1331
1332 The preferred place to break around a binary operator is after the
1333 operator, not before it.
1334
1335 W503: (width == 0\n + height == 0)
1336 W503: (width == 0\n and height == 0)
1337 W503: var = (1\n & ~2)
1338 W503: var = (1\n / -2)
1339 W503: var = (1\n + -1\n + -2)
1340
1341 Okay: foo(\n -x)
1342 Okay: foo(x\n [])
1343 Okay: x = '''\n''' + ''
1344 Okay: foo(x,\n -y)
1345 Okay: foo(x, # comment\n -y)
1346 """
1347 for context in _break_around_binary_operators(tokens):
1348 (token_type, text, previous_token_type, previous_text,
1349 line_break, unary_context, start) = context
1350 if (_is_binary_operator(token_type, text) and line_break and
1351 not unary_context and
1352 not _is_binary_operator(previous_token_type,
1353 previous_text)):
1354 yield start, "W503 line break before binary operator"
1355
1356
1357 @register_check
1358 def break_after_binary_operator(logical_line, tokens):
1359 r"""
1360 Avoid breaks after binary operators.
1361
1362 The preferred place to break around a binary operator is before the
1363 operator, not after it.
1364
1365 W504: (width == 0 +\n height == 0)
1366 W504: (width == 0 and\n height == 0)
1367 W504: var = (1 &\n ~2)
1368
1369 Okay: foo(\n -x)
1370 Okay: foo(x\n [])
1371 Okay: x = '''\n''' + ''
1372 Okay: x = '' + '''\n'''
1373 Okay: foo(x,\n -y)
1374 Okay: foo(x, # comment\n -y)
1375
1376 The following should be W504 but unary_context is tricky with these
1377 Okay: var = (1 /\n -2)
1378 Okay: var = (1 +\n -1 +\n -2)
1379 """
1380 prev_start = None
1381 for context in _break_around_binary_operators(tokens):
1382 (token_type, text, previous_token_type, previous_text,
1383 line_break, unary_context, start) = context
1384 if (_is_binary_operator(previous_token_type, previous_text) and
1385 line_break and
1386 not unary_context and
1387 not _is_binary_operator(token_type, text)):
1388 yield prev_start, "W504 line break after binary operator"
1389 prev_start = start
1390
1391
1392 @register_check
1393 def comparison_to_singleton(logical_line, noqa):
1394 r"""Comparison to singletons should use "is" or "is not".
1395
1396 Comparisons to singletons like None should always be done
1397 with "is" or "is not", never the equality operators.
1398
1399 Okay: if arg is not None:
1400 E711: if arg != None:
1401 E711: if None == arg:
1402 E712: if arg == True:
1403 E712: if False == arg:
1404
1405 Also, beware of writing if x when you really mean if x is not None
1406 -- e.g. when testing whether a variable or argument that defaults to
1407 None was set to some other value. The other value might have a type
1408 (such as a container) that could be false in a boolean context!
1409 """
1410 if noqa:
1411 return
1412
1413 for match in COMPARE_SINGLETON_REGEX.finditer(logical_line):
1414 singleton = match.group(1) or match.group(3)
1415 same = (match.group(2) == '==')
1416
1417 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1418 if singleton in ('None',):
1419 code = 'E711'
1420 else:
1421 code = 'E712'
1422 nonzero = ((singleton == 'True' and same) or
1423 (singleton == 'False' and not same))
1424 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1425 yield match.start(2), ("%s comparison to %s should be %s" %
1426 (code, singleton, msg))
1427
1428
1429 @register_check
1430 def comparison_negative(logical_line):
1431 r"""Negative comparison should be done using "not in" and "is not".
1432
1433 Okay: if x not in y:\n pass
1434 Okay: assert (X in Y or X is Z)
1435 Okay: if not (X in Y):\n pass
1436 Okay: zz = x is not y
1437 E713: Z = not X in Y
1438 E713: if not X.B in Y:\n pass
1439 E714: if not X is Y:\n pass
1440 E714: Z = not X.B is Y
1441 """
1442 match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1443 if match:
1444 pos = match.start(1)
1445 if match.group(2) == 'in':
1446 yield pos, "E713 test for membership should be 'not in'"
1447 else:
1448 yield pos, "E714 test for object identity should be 'is not'"
1449
1450
1451 @register_check
1452 def comparison_type(logical_line, noqa):
1453 r"""Object type comparisons should `is` / `is not` / `isinstance()`.
1454
1455 Do not compare types directly.
1456
1457 Okay: if isinstance(obj, int):
1458 Okay: if type(obj) is int:
1459 E721: if type(obj) == type(1):
1460 """
1461 match = COMPARE_TYPE_REGEX.search(logical_line)
1462 if match and not noqa:
1463 inst = match.group(1)
1464 if inst and inst.isidentifier() and inst not in SINGLETONS:
1465 return # Allow comparison for types which are not obvious
1466 yield (
1467 match.start(),
1468 "E721 do not compare types, for exact checks use `is` / `is not`, "
1469 "for instance checks use `isinstance()`",
1470 )
1471
1472
1473 @register_check
1474 def bare_except(logical_line, noqa):
1475 r"""When catching exceptions, mention specific exceptions when
1476 possible.
1477
1478 Okay: except Exception:
1479 Okay: except BaseException:
1480 E722: except:
1481 """
1482 if noqa:
1483 return
1484
1485 match = BLANK_EXCEPT_REGEX.match(logical_line)
1486 if match:
1487 yield match.start(), "E722 do not use bare 'except'"
1488
1489
1490 @register_check
1491 def ambiguous_identifier(logical_line, tokens):
1492 r"""Never use the characters 'l', 'O', or 'I' as variable names.
1493
1494 In some fonts, these characters are indistinguishable from the
1495 numerals one and zero. When tempted to use 'l', use 'L' instead.
1496
1497 Okay: L = 0
1498 Okay: o = 123
1499 Okay: i = 42
1500 E741: l = 0
1501 E741: O = 123
1502 E741: I = 42
1503
1504 Variables can be bound in several other contexts, including class
1505 and function definitions, lambda functions, 'global' and 'nonlocal'
1506 statements, exception handlers, and 'with' and 'for' statements.
1507 In addition, we have a special handling for function parameters.
1508
1509 Okay: except AttributeError as o:
1510 Okay: with lock as L:
1511 Okay: foo(l=12)
1512 Okay: foo(l=I)
1513 Okay: for a in foo(l=12):
1514 Okay: lambda arg: arg * l
1515 Okay: lambda a=l[I:5]: None
1516 Okay: lambda x=a.I: None
1517 Okay: if l >= 12:
1518 E741: except AttributeError as O:
1519 E741: with lock as l:
1520 E741: global I
1521 E741: nonlocal l
1522 E741: def foo(l):
1523 E741: def foo(l=12):
1524 E741: l = foo(l=12)
1525 E741: for l in range(10):
1526 E741: [l for l in lines if l]
1527 E741: lambda l: None
1528 E741: lambda a=x[1:5], l: None
1529 E741: lambda **l:
1530 E741: def f(**l):
1531 E742: class I(object):
1532 E743: def l(x):
1533 """
1534 func_depth = None # set to brace depth if 'def' or 'lambda' is found
1535 seen_colon = False # set to true if we're done with function parameters
1536 brace_depth = 0
1537 idents_to_avoid = ('l', 'O', 'I')
1538 prev_type, prev_text, prev_start, prev_end, __ = tokens[0]
1539 for index in range(1, len(tokens)):
1540 token_type, text, start, end, line = tokens[index]
1541 ident = pos = None
1542 # find function definitions
1543 if prev_text in {'def', 'lambda'}:
1544 func_depth = brace_depth
1545 seen_colon = False
1546 elif (
1547 func_depth is not None and
1548 text == ':' and
1549 brace_depth == func_depth
1550 ):
1551 seen_colon = True
1552 # update parameter parentheses level
1553 if text in '([{':
1554 brace_depth += 1
1555 elif text in ')]}':
1556 brace_depth -= 1
1557 # identifiers on the lhs of an assignment operator
1558 if text == ':=' or (text == '=' and brace_depth == 0):
1559 if prev_text in idents_to_avoid:
1560 ident = prev_text
1561 pos = prev_start
1562 # identifiers bound to values with 'as', 'for',
1563 # 'global', or 'nonlocal'
1564 if prev_text in ('as', 'for', 'global', 'nonlocal'):
1565 if text in idents_to_avoid:
1566 ident = text
1567 pos = start
1568 # function / lambda parameter definitions
1569 if (
1570 func_depth is not None and
1571 not seen_colon and
1572 index < len(tokens) - 1 and tokens[index + 1][1] in ':,=)' and
1573 prev_text in {'lambda', ',', '*', '**', '('} and
1574 text in idents_to_avoid
1575 ):
1576 ident = text
1577 pos = start
1578 if prev_text == 'class':
1579 if text in idents_to_avoid:
1580 yield start, "E742 ambiguous class definition '%s'" % text
1581 if prev_text == 'def':
1582 if text in idents_to_avoid:
1583 yield start, "E743 ambiguous function definition '%s'" % text
1584 if ident:
1585 yield pos, "E741 ambiguous variable name '%s'" % ident
1586 prev_text = text
1587 prev_start = start
1588
1589
1590 @register_check
1591 def python_3000_invalid_escape_sequence(logical_line, tokens, noqa):
1592 r"""Invalid escape sequences are deprecated in Python 3.6.
1593
1594 Okay: regex = r'\.png$'
1595 W605: regex = '\.png$'
1596 """
1597 if noqa:
1598 return
1599
1600 # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
1601 valid = [
1602 '\n',
1603 '\\',
1604 '\'',
1605 '"',
1606 'a',
1607 'b',
1608 'f',
1609 'n',
1610 'r',
1611 't',
1612 'v',
1613 '0', '1', '2', '3', '4', '5', '6', '7',
1614 'x',
1615
1616 # Escape sequences only recognized in string literals
1617 'N',
1618 'u',
1619 'U',
1620 ]
1621
1622 prefixes = []
1623 for token_type, text, start, _, _ in tokens:
1624 if token_type in {tokenize.STRING, FSTRING_START}:
1625 # Extract string modifiers (e.g. u or r)
1626 prefixes.append(text[:text.index(text[-1])].lower())
1627
1628 if token_type in {tokenize.STRING, FSTRING_MIDDLE}:
1629 if 'r' not in prefixes[-1]:
1630 start_line, start_col = start
1631 pos = text.find('\\')
1632 while pos >= 0:
1633 pos += 1
1634 if text[pos] not in valid:
1635 line = start_line + text.count('\n', 0, pos)
1636 if line == start_line:
1637 col = start_col + pos
1638 else:
1639 col = pos - text.rfind('\n', 0, pos) - 1
1640 yield (
1641 (line, col - 1),
1642 f"W605 invalid escape sequence '\\{text[pos]}'"
1643 )
1644 pos = text.find('\\', pos + 1)
1645
1646 if token_type in {tokenize.STRING, FSTRING_END}:
1647 prefixes.pop()
1648
1649
1650 ########################################################################
1651 @register_check
1652 def maximum_doc_length(logical_line, max_doc_length, noqa, tokens):
1653 r"""Limit all doc lines to a maximum of 72 characters.
1654
1655 For flowing long blocks of text (docstrings or comments), limiting
1656 the length to 72 characters is recommended.
1657
1658 Reports warning W505
1659 """
1660 if max_doc_length is None or noqa:
1661 return
1662
1663 prev_token = None
1664 skip_lines = set()
1665 # Skip lines that
1666 for token_type, text, start, end, line in tokens:
1667 if token_type not in SKIP_COMMENTS.union([tokenize.STRING]):
1668 skip_lines.add(line)
1669
1670 for token_type, text, start, end, line in tokens:
1671 # Skip lines that aren't pure strings
1672 if token_type == tokenize.STRING and skip_lines:
1673 continue
1674 if token_type in (tokenize.STRING, tokenize.COMMENT):
1675 # Only check comment-only lines
1676 if prev_token is None or prev_token in SKIP_TOKENS:
1677 lines = line.splitlines()
1678 for line_num, physical_line in enumerate(lines):
1679 if start[0] + line_num == 1 and line.startswith('#!'):
1680 return
1681 length = len(physical_line)
1682 chunks = physical_line.split()
1683 if token_type == tokenize.COMMENT:
1684 if (len(chunks) == 2 and
1685 length - len(chunks[-1]) < MAX_DOC_LENGTH):
1686 continue
1687 if len(chunks) == 1 and line_num + 1 < len(lines):
1688 if (len(chunks) == 1 and
1689 length - len(chunks[-1]) < MAX_DOC_LENGTH):
1690 continue
1691 if length > max_doc_length:
1692 doc_error = (start[0] + line_num, max_doc_length)
1693 yield (doc_error, "W505 doc line too long "
1694 "(%d > %d characters)"
1695 % (length, max_doc_length))
1696 prev_token = token_type
1697
1698
1699 ########################################################################
1700 # Helper functions
1701 ########################################################################
1702
1703
1704 def readlines(filename):
1705 """Read the source code."""
1706 try:
1707 with tokenize.open(filename) as f:
1708 return f.readlines()
1709 except (LookupError, SyntaxError, UnicodeError):
1710 # Fall back if file encoding is improperly declared
1711 with open(filename, encoding='latin-1') as f:
1712 return f.readlines()
1713
1714
1715 def stdin_get_value():
1716 """Read the value from stdin."""
1717 return io.TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1718
1719
1720 noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search)
1721
1722
1723 def expand_indent(line):
1724 r"""Return the amount of indentation.
1725
1726 Tabs are expanded to the next multiple of 8.
1727 """
1728 line = line.rstrip('\n\r')
1729 if '\t' not in line:
1730 return len(line) - len(line.lstrip())
1731 result = 0
1732 for char in line:
1733 if char == '\t':
1734 result = result // 8 * 8 + 8
1735 elif char == ' ':
1736 result += 1
1737 else:
1738 break
1739 return result
1740
1741
1742 def mute_string(text):
1743 """Replace contents with 'xxx' to prevent syntax matching."""
1744 # String modifiers (e.g. u or r)
1745 start = text.index(text[-1]) + 1
1746 end = len(text) - 1
1747 # Triple quotes
1748 if text[-3:] in ('"""', "'''"):
1749 start += 2
1750 end -= 2
1751 return text[:start] + 'x' * (end - start) + text[end:]
1752
1753
1754 def parse_udiff(diff, patterns=None, parent='.'):
1755 """Return a dictionary of matching lines."""
1756 # For each file of the diff, the entry key is the filename,
1757 # and the value is a set of row numbers to consider.
1758 rv = {}
1759 path = nrows = None
1760 for line in diff.splitlines():
1761 if nrows:
1762 if line[:1] != '-':
1763 nrows -= 1
1764 continue
1765 if line[:3] == '@@ ':
1766 hunk_match = HUNK_REGEX.match(line)
1767 (row, nrows) = (int(g or '1') for g in hunk_match.groups())
1768 rv[path].update(range(row, row + nrows))
1769 elif line[:3] == '+++':
1770 path = line[4:].split('\t', 1)[0]
1771 # Git diff will use (i)ndex, (w)ork tree, (c)ommit and
1772 # (o)bject instead of a/b/c/d as prefixes for patches
1773 if path[:2] in ('b/', 'w/', 'i/'):
1774 path = path[2:]
1775 rv[path] = set()
1776 return {
1777 os.path.join(parent, filepath): rows
1778 for (filepath, rows) in rv.items()
1779 if rows and filename_match(filepath, patterns)
1780 }
1781
1782
1783 def normalize_paths(value, parent=os.curdir):
1784 """Parse a comma-separated list of paths.
1785
1786 Return a list of absolute paths.
1787 """
1788 if not value:
1789 return []
1790 if isinstance(value, list):
1791 return value
1792 paths = []
1793 for path in value.split(','):
1794 path = path.strip()
1795 if '/' in path:
1796 path = os.path.abspath(os.path.join(parent, path))
1797 paths.append(path.rstrip('/'))
1798 return paths
1799
1800
1801 def filename_match(filename, patterns, default=True):
1802 """Check if patterns contains a pattern that matches filename.
1803
1804 If patterns is unspecified, this always returns True.
1805 """
1806 if not patterns:
1807 return default
1808 return any(fnmatch(filename, pattern) for pattern in patterns)
1809
1810
1811 def update_counts(s, counts):
1812 r"""Adds one to the counts of each appearance of characters in s,
1813 for characters in counts"""
1814 for char in s:
1815 if char in counts:
1816 counts[char] += 1
1817
1818
1819 def _is_eol_token(token):
1820 return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1821
1822
1823 ########################################################################
1824 # Framework to run all checks
1825 ########################################################################
1826
1827
1828 class Checker:
1829 """Load a Python source file, tokenize it, check coding style."""
1830
1831 def __init__(self, filename=None, lines=None,
1832 options=None, report=None, **kwargs):
1833 if options is None:
1834 options = StyleGuide(kwargs).options
1835 else:
1836 assert not kwargs
1837 self._io_error = None
1838 self._physical_checks = options.physical_checks
1839 self._logical_checks = options.logical_checks
1840 self._ast_checks = options.ast_checks
1841 self.max_line_length = options.max_line_length
1842 self.max_doc_length = options.max_doc_length
1843 self.indent_size = options.indent_size
1844 self.fstring_start = 0
1845 self.multiline = False # in a multiline string?
1846 self.hang_closing = options.hang_closing
1847 self.indent_size = options.indent_size
1848 self.verbose = options.verbose
1849 self.filename = filename
1850 # Dictionary where a checker can store its custom state.
1851 self._checker_states = {}
1852 if filename is None:
1853 self.filename = 'stdin'
1854 self.lines = lines or []
1855 elif filename == '-':
1856 self.filename = 'stdin'
1857 self.lines = stdin_get_value().splitlines(True)
1858 elif lines is None:
1859 try:
1860 self.lines = readlines(filename)
1861 except OSError:
1862 (exc_type, exc) = sys.exc_info()[:2]
1863 self._io_error = f'{exc_type.__name__}: {exc}'
1864 self.lines = []
1865 else:
1866 self.lines = lines
1867 if self.lines:
1868 ord0 = ord(self.lines[0][0])
1869 if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM
1870 if ord0 == 0xfeff:
1871 self.lines[0] = self.lines[0][1:]
1872 elif self.lines[0][:3] == '\xef\xbb\xbf':
1873 self.lines[0] = self.lines[0][3:]
1874 self.report = report or options.report
1875 self.report_error = self.report.error
1876 self.noqa = False
1877
1878 def report_invalid_syntax(self):
1879 """Check if the syntax is valid."""
1880 (exc_type, exc) = sys.exc_info()[:2]
1881 if len(exc.args) > 1:
1882 offset = exc.args[1]
1883 if len(offset) > 2:
1884 offset = offset[1:3]
1885 else:
1886 offset = (1, 0)
1887 self.report_error(offset[0], offset[1] or 0,
1888 f'E901 {exc_type.__name__}: {exc.args[0]}',
1889 self.report_invalid_syntax)
1890
1891 def readline(self):
1892 """Get the next line from the input buffer."""
1893 if self.line_number >= self.total_lines:
1894 return ''
1895 line = self.lines[self.line_number]
1896 self.line_number += 1
1897 if self.indent_char is None and line[:1] in WHITESPACE:
1898 self.indent_char = line[0]
1899 return line
1900
1901 def run_check(self, check, argument_names):
1902 """Run a check plugin."""
1903 arguments = []
1904 for name in argument_names:
1905 arguments.append(getattr(self, name))
1906 return check(*arguments)
1907
1908 def init_checker_state(self, name, argument_names):
1909 """Prepare custom state for the specific checker plugin."""
1910 if 'checker_state' in argument_names:
1911 self.checker_state = self._checker_states.setdefault(name, {})
1912
1913 def check_physical(self, line):
1914 """Run all physical checks on a raw input line."""
1915 self.physical_line = line
1916 for name, check, argument_names in self._physical_checks:
1917 self.init_checker_state(name, argument_names)
1918 result = self.run_check(check, argument_names)
1919 if result is not None:
1920 (offset, text) = result
1921 self.report_error(self.line_number, offset, text, check)
1922 if text[:4] == 'E101':
1923 self.indent_char = line[0]
1924
1925 def build_tokens_line(self):
1926 """Build a logical line from tokens."""
1927 logical = []
1928 comments = []
1929 length = 0
1930 prev_row = prev_col = mapping = None
1931 for token_type, text, start, end, line in self.tokens:
1932 if token_type in SKIP_TOKENS:
1933 continue
1934 if not mapping:
1935 mapping = [(0, start)]
1936 if token_type == tokenize.COMMENT:
1937 comments.append(text)
1938 continue
1939 if token_type == tokenize.STRING:
1940 text = mute_string(text)
1941 elif token_type == FSTRING_MIDDLE: # pragma: >=3.12 cover
1942 text = 'x' * len(text)
1943 if prev_row:
1944 (start_row, start_col) = start
1945 if prev_row != start_row: # different row
1946 prev_text = self.lines[prev_row - 1][prev_col - 1]
1947 if prev_text == ',' or (prev_text not in '{[(' and
1948 text not in '}])'):
1949 text = ' ' + text
1950 elif prev_col != start_col: # different column
1951 text = line[prev_col:start_col] + text
1952 logical.append(text)
1953 length += len(text)
1954 mapping.append((length, end))
1955 (prev_row, prev_col) = end
1956 self.logical_line = ''.join(logical)
1957 self.noqa = comments and noqa(''.join(comments))
1958 return mapping
1959
1960 def check_logical(self):
1961 """Build a line from tokens and run all logical checks on it."""
1962 self.report.increment_logical_line()
1963 mapping = self.build_tokens_line()
1964 if not mapping:
1965 return
1966
1967 mapping_offsets = [offset for offset, _ in mapping]
1968 (start_row, start_col) = mapping[0][1]
1969 start_line = self.lines[start_row - 1]
1970 self.indent_level = expand_indent(start_line[:start_col])
1971 if self.blank_before < self.blank_lines:
1972 self.blank_before = self.blank_lines
1973 if self.verbose >= 2:
1974 print(self.logical_line[:80].rstrip())
1975 for name, check, argument_names in self._logical_checks:
1976 if self.verbose >= 4:
1977 print(' ' + name)
1978 self.init_checker_state(name, argument_names)
1979 for offset, text in self.run_check(check, argument_names) or ():
1980 if not isinstance(offset, tuple):
1981 # As mappings are ordered, bisecting is a fast way
1982 # to find a given offset in them.
1983 token_offset, pos = mapping[bisect.bisect_left(
1984 mapping_offsets, offset)]
1985 offset = (pos[0], pos[1] + offset - token_offset)
1986 self.report_error(offset[0], offset[1], text, check)
1987 if self.logical_line:
1988 self.previous_indent_level = self.indent_level
1989 self.previous_logical = self.logical_line
1990 if not self.indent_level:
1991 self.previous_unindented_logical_line = self.logical_line
1992 self.blank_lines = 0
1993 self.tokens = []
1994
1995 def check_ast(self):
1996 """Build the file's AST and run all AST checks."""
1997 try:
1998 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1999 except (ValueError, SyntaxError, TypeError):
2000 return self.report_invalid_syntax()
2001 for name, cls, __ in self._ast_checks:
2002 checker = cls(tree, self.filename)
2003 for lineno, offset, text, check in checker.run():
2004 if not self.lines or not noqa(self.lines[lineno - 1]):
2005 self.report_error(lineno, offset, text, check)
2006
2007 def generate_tokens(self):
2008 """Tokenize file, run physical line checks and yield tokens."""
2009 if self._io_error:
2010 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
2011 tokengen = tokenize.generate_tokens(self.readline)
2012 try:
2013 prev_physical = ''
2014 for token in tokengen:
2015 if token[2][0] > self.total_lines:
2016 return
2017 self.noqa = token[4] and noqa(token[4])
2018 self.maybe_check_physical(token, prev_physical)
2019 yield token
2020 prev_physical = token[4]
2021 except (SyntaxError, tokenize.TokenError):
2022 self.report_invalid_syntax()
2023
2024 def maybe_check_physical(self, token, prev_physical):
2025 """If appropriate for token, check current physical line(s)."""
2026 # Called after every token, but act only on end of line.
2027
2028 if token.type == FSTRING_START: # pragma: >=3.12 cover
2029 self.fstring_start = token.start[0]
2030 # a newline token ends a single physical line.
2031 elif _is_eol_token(token):
2032 # if the file does not end with a newline, the NEWLINE
2033 # token is inserted by the parser, but it does not contain
2034 # the previous physical line in `token[4]`
2035 if token.line == '':
2036 self.check_physical(prev_physical)
2037 else:
2038 self.check_physical(token.line)
2039 elif (
2040 token.type == tokenize.STRING and '\n' in token.string or
2041 token.type == FSTRING_END
2042 ):
2043 # Less obviously, a string that contains newlines is a
2044 # multiline string, either triple-quoted or with internal
2045 # newlines backslash-escaped. Check every physical line in
2046 # the string *except* for the last one: its newline is
2047 # outside of the multiline string, so we consider it a
2048 # regular physical line, and will check it like any other
2049 # physical line.
2050 #
2051 # Subtleties:
2052 # - we don't *completely* ignore the last line; if it
2053 # contains the magical "# noqa" comment, we disable all
2054 # physical checks for the entire multiline string
2055 # - have to wind self.line_number back because initially it
2056 # points to the last line of the string, and we want
2057 # check_physical() to give accurate feedback
2058 if noqa(token.line):
2059 return
2060 if token.type == FSTRING_END: # pragma: >=3.12 cover
2061 start = self.fstring_start
2062 else:
2063 start = token.start[0]
2064 end = token.end[0]
2065
2066 self.multiline = True
2067 self.line_number = start
2068 for line_number in range(start, end):
2069 self.check_physical(self.lines[line_number - 1] + '\n')
2070 self.line_number += 1
2071 self.multiline = False
2072
2073 def check_all(self, expected=None, line_offset=0):
2074 """Run all checks on the input file."""
2075 self.report.init_file(self.filename, self.lines, expected, line_offset)
2076 self.total_lines = len(self.lines)
2077 if self._ast_checks:
2078 self.check_ast()
2079 self.line_number = 0
2080 self.indent_char = None
2081 self.indent_level = self.previous_indent_level = 0
2082 self.previous_logical = ''
2083 self.previous_unindented_logical_line = ''
2084 self.tokens = []
2085 self.blank_lines = self.blank_before = 0
2086 parens = 0
2087 for token in self.generate_tokens():
2088 self.tokens.append(token)
2089 token_type, text = token[0:2]
2090 if self.verbose >= 3:
2091 if token[2][0] == token[3][0]:
2092 pos = '[{}:{}]'.format(token[2][1] or '', token[3][1])
2093 else:
2094 pos = 'l.%s' % token[3][0]
2095 print('l.%s\t%s\t%s\t%r' %
2096 (token[2][0], pos, tokenize.tok_name[token[0]], text))
2097 if token_type == tokenize.OP:
2098 if text in '([{':
2099 parens += 1
2100 elif text in '}])':
2101 parens -= 1
2102 elif not parens:
2103 if token_type in NEWLINE:
2104 if token_type == tokenize.NEWLINE:
2105 self.check_logical()
2106 self.blank_before = 0
2107 elif len(self.tokens) == 1:
2108 # The physical line contains only this token.
2109 self.blank_lines += 1
2110 del self.tokens[0]
2111 else:
2112 self.check_logical()
2113 if self.tokens:
2114 self.check_physical(self.lines[-1])
2115 self.check_logical()
2116 return self.report.get_file_results()
2117
2118
2119 class BaseReport:
2120 """Collect the results of the checks."""
2121
2122 print_filename = False
2123
2124 def __init__(self, options):
2125 self._benchmark_keys = options.benchmark_keys
2126 self._ignore_code = options.ignore_code
2127 # Results
2128 self.elapsed = 0
2129 self.total_errors = 0
2130 self.counters = dict.fromkeys(self._benchmark_keys, 0)
2131 self.messages = {}
2132
2133 def start(self):
2134 """Start the timer."""
2135 self._start_time = time.time()
2136
2137 def stop(self):
2138 """Stop the timer."""
2139 self.elapsed = time.time() - self._start_time
2140
2141 def init_file(self, filename, lines, expected, line_offset):
2142 """Signal a new file."""
2143 self.filename = filename
2144 self.lines = lines
2145 self.expected = expected or ()
2146 self.line_offset = line_offset
2147 self.file_errors = 0
2148 self.counters['files'] += 1
2149 self.counters['physical lines'] += len(lines)
2150
2151 def increment_logical_line(self):
2152 """Signal a new logical line."""
2153 self.counters['logical lines'] += 1
2154
2155 def error(self, line_number, offset, text, check):
2156 """Report an error, according to options."""
2157 code = text[:4]
2158 if self._ignore_code(code):
2159 return
2160 if code in self.counters:
2161 self.counters[code] += 1
2162 else:
2163 self.counters[code] = 1
2164 self.messages[code] = text[5:]
2165 # Don't care about expected errors or warnings
2166 if code in self.expected:
2167 return
2168 if self.print_filename and not self.file_errors:
2169 print(self.filename)
2170 self.file_errors += 1
2171 self.total_errors += 1
2172 return code
2173
2174 def get_file_results(self):
2175 """Return the count of errors and warnings for this file."""
2176 return self.file_errors
2177
2178 def get_count(self, prefix=''):
2179 """Return the total count of errors and warnings."""
2180 return sum(self.counters[key]
2181 for key in self.messages if key.startswith(prefix))
2182
2183 def get_statistics(self, prefix=''):
2184 """Get statistics for message codes that start with the prefix.
2185
2186 prefix='' matches all errors and warnings
2187 prefix='E' matches all errors
2188 prefix='W' matches all warnings
2189 prefix='E4' matches all errors that have to do with imports
2190 """
2191 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
2192 for key in sorted(self.messages) if key.startswith(prefix)]
2193
2194 def print_statistics(self, prefix=''):
2195 """Print overall statistics (number of errors and warnings)."""
2196 for line in self.get_statistics(prefix):
2197 print(line)
2198
2199 def print_benchmark(self):
2200 """Print benchmark numbers."""
2201 print('{:<7.2f} {}'.format(self.elapsed, 'seconds elapsed'))
2202 if self.elapsed:
2203 for key in self._benchmark_keys:
2204 print('%-7d %s per second (%d total)' %
2205 (self.counters[key] / self.elapsed, key,
2206 self.counters[key]))
2207
2208
2209 class FileReport(BaseReport):
2210 """Collect the results of the checks and print the filenames."""
2211
2212 print_filename = True
2213
2214
2215 class StandardReport(BaseReport):
2216 """Collect and print the results of the checks."""
2217
2218 def __init__(self, options):
2219 super().__init__(options)
2220 self._fmt = REPORT_FORMAT.get(options.format.lower(),
2221 options.format)
2222 self._repeat = options.repeat
2223 self._show_source = options.show_source
2224 self._show_pep8 = options.show_pep8
2225
2226 def init_file(self, filename, lines, expected, line_offset):
2227 """Signal a new file."""
2228 self._deferred_print = []
2229 return super().init_file(
2230 filename, lines, expected, line_offset)
2231
2232 def error(self, line_number, offset, text, check):
2233 """Report an error, according to options."""
2234 code = super().error(line_number, offset, text, check)
2235 if code and (self.counters[code] == 1 or self._repeat):
2236 self._deferred_print.append(
2237 (line_number, offset, code, text[5:], check.__doc__))
2238 return code
2239
2240 def get_file_results(self):
2241 """Print results and return the overall count for this file."""
2242 self._deferred_print.sort()
2243 for line_number, offset, code, text, doc in self._deferred_print:
2244 print(self._fmt % {
2245 'path': self.filename,
2246 'row': self.line_offset + line_number, 'col': offset + 1,
2247 'code': code, 'text': text,
2248 })
2249 if self._show_source:
2250 if line_number > len(self.lines):
2251 line = ''
2252 else:
2253 line = self.lines[line_number - 1]
2254 print(line.rstrip())
2255 print(re.sub(r'\S', ' ', line[:offset]) + '^')
2256 if self._show_pep8 and doc:
2257 print(' ' + doc.strip())
2258
2259 # stdout is block buffered when not stdout.isatty().
2260 # line can be broken where buffer boundary since other
2261 # processes write to same file.
2262 # flush() after print() to avoid buffer boundary.
2263 # Typical buffer size is 8192. line written safely when
2264 # len(line) < 8192.
2265 sys.stdout.flush()
2266 return self.file_errors
2267
2268
2269 class DiffReport(StandardReport):
2270 """Collect and print the results for the changed lines only."""
2271
2272 def __init__(self, options):
2273 super().__init__(options)
2274 self._selected = options.selected_lines
2275
2276 def error(self, line_number, offset, text, check):
2277 if line_number not in self._selected[self.filename]:
2278 return
2279 return super().error(line_number, offset, text, check)
2280
2281
2282 class StyleGuide:
2283 """Initialize a PEP-8 instance with few options."""
2284
2285 def __init__(self, *args, **kwargs):
2286 # build options from the command line
2287 self.checker_class = kwargs.pop('checker_class', Checker)
2288 parse_argv = kwargs.pop('parse_argv', False)
2289 config_file = kwargs.pop('config_file', False)
2290 parser = kwargs.pop('parser', None)
2291 # build options from dict
2292 options_dict = dict(*args, **kwargs)
2293 arglist = None if parse_argv else options_dict.get('paths', None)
2294 verbose = options_dict.get('verbose', None)
2295 options, self.paths = process_options(
2296 arglist, parse_argv, config_file, parser, verbose)
2297 if options_dict:
2298 options.__dict__.update(options_dict)
2299 if 'paths' in options_dict:
2300 self.paths = options_dict['paths']
2301
2302 self.runner = self.input_file
2303 self.options = options
2304
2305 if not options.reporter:
2306 options.reporter = BaseReport if options.quiet else StandardReport
2307
2308 options.select = tuple(options.select or ())
2309 if not (options.select or options.ignore) and DEFAULT_IGNORE:
2310 # The default choice: ignore controversial checks
2311 options.ignore = tuple(DEFAULT_IGNORE.split(','))
2312 else:
2313 # Ignore all checks which are not explicitly selected
2314 options.ignore = ('',) if options.select else tuple(options.ignore)
2315 options.benchmark_keys = BENCHMARK_KEYS[:]
2316 options.ignore_code = self.ignore_code
2317 options.physical_checks = self.get_checks('physical_line')
2318 options.logical_checks = self.get_checks('logical_line')
2319 options.ast_checks = self.get_checks('tree')
2320 self.init_report()
2321
2322 def init_report(self, reporter=None):
2323 """Initialize the report instance."""
2324 self.options.report = (reporter or self.options.reporter)(self.options)
2325 return self.options.report
2326
2327 def check_files(self, paths=None):
2328 """Run all checks on the paths."""
2329 if paths is None:
2330 paths = self.paths
2331 report = self.options.report
2332 runner = self.runner
2333 report.start()
2334 try:
2335 for path in paths:
2336 if os.path.isdir(path):
2337 self.input_dir(path)
2338 elif not self.excluded(path):
2339 runner(path)
2340 except KeyboardInterrupt:
2341 print('... stopped')
2342 report.stop()
2343 return report
2344
2345 def input_file(self, filename, lines=None, expected=None, line_offset=0):
2346 """Run all checks on a Python source file."""
2347 if self.options.verbose:
2348 print('checking %s' % filename)
2349 fchecker = self.checker_class(
2350 filename, lines=lines, options=self.options)
2351 return fchecker.check_all(expected=expected, line_offset=line_offset)
2352
2353 def input_dir(self, dirname):
2354 """Check all files in this directory and all subdirectories."""
2355 dirname = dirname.rstrip('/')
2356 if self.excluded(dirname):
2357 return 0
2358 counters = self.options.report.counters
2359 verbose = self.options.verbose
2360 filepatterns = self.options.filename
2361 runner = self.runner
2362 for root, dirs, files in os.walk(dirname):
2363 if verbose:
2364 print('directory ' + root)
2365 counters['directories'] += 1
2366 for subdir in sorted(dirs):
2367 if self.excluded(subdir, root):
2368 dirs.remove(subdir)
2369 for filename in sorted(files):
2370 # contain a pattern that matches?
2371 if (
2372 filename_match(filename, filepatterns) and
2373 not self.excluded(filename, root)
2374 ):
2375 runner(os.path.join(root, filename))
2376
2377 def excluded(self, filename, parent=None):
2378 """Check if the file should be excluded.
2379
2380 Check if 'options.exclude' contains a pattern matching filename.
2381 """
2382 if not self.options.exclude:
2383 return False
2384 basename = os.path.basename(filename)
2385 if filename_match(basename, self.options.exclude):
2386 return True
2387 if parent:
2388 filename = os.path.join(parent, filename)
2389 filename = os.path.abspath(filename)
2390 return filename_match(filename, self.options.exclude)
2391
2392 def ignore_code(self, code):
2393 """Check if the error code should be ignored.
2394
2395 If 'options.select' contains a prefix of the error code,
2396 return False. Else, if 'options.ignore' contains a prefix of
2397 the error code, return True.
2398 """
2399 if len(code) < 4 and any(s.startswith(code)
2400 for s in self.options.select):
2401 return False
2402 return (code.startswith(self.options.ignore) and
2403 not code.startswith(self.options.select))
2404
2405 def get_checks(self, argument_name):
2406 """Get all the checks for this category.
2407
2408 Find all globally visible functions where the first argument
2409 name starts with argument_name and which contain selected tests.
2410 """
2411 checks = []
2412 for check, attrs in _checks[argument_name].items():
2413 (codes, args) = attrs
2414 if any(not (code and self.ignore_code(code)) for code in codes):
2415 checks.append((check.__name__, check, args))
2416 return sorted(checks)
2417
2418
2419 def get_parser(prog='pycodestyle', version=__version__):
2420 """Create the parser for the program."""
2421 parser = OptionParser(prog=prog, version=version,
2422 usage="%prog [options] input ...")
2423 parser.config_options = [
2424 'exclude', 'filename', 'select', 'ignore', 'max-line-length',
2425 'max-doc-length', 'indent-size', 'hang-closing', 'count', 'format',
2426 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose']
2427 parser.add_option('-v', '--verbose', default=0, action='count',
2428 help="print status messages, or debug with -vv")
2429 parser.add_option('-q', '--quiet', default=0, action='count',
2430 help="report only file names, or nothing with -qq")
2431 parser.add_option('-r', '--repeat', default=True, action='store_true',
2432 help="(obsolete) show all occurrences of the same error")
2433 parser.add_option('--first', action='store_false', dest='repeat',
2434 help="show first occurrence of each error")
2435 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
2436 help="exclude files or directories which match these "
2437 "comma separated patterns (default: %default)")
2438 parser.add_option('--filename', metavar='patterns', default='*.py',
2439 help="when parsing directories, only check filenames "
2440 "matching these comma separated patterns "
2441 "(default: %default)")
2442 parser.add_option('--select', metavar='errors', default='',
2443 help="select errors and warnings (e.g. E,W6)")
2444 parser.add_option('--ignore', metavar='errors', default='',
2445 help="skip errors and warnings (e.g. E4,W) "
2446 "(default: %s)" % DEFAULT_IGNORE)
2447 parser.add_option('--show-source', action='store_true',
2448 help="show source code for each error")
2449 parser.add_option('--show-pep8', action='store_true',
2450 help="show text of PEP 8 for each error "
2451 "(implies --first)")
2452 parser.add_option('--statistics', action='store_true',
2453 help="count errors and warnings")
2454 parser.add_option('--count', action='store_true',
2455 help="print total number of errors and warnings "
2456 "to standard error and set exit code to 1 if "
2457 "total is not null")
2458 parser.add_option('--max-line-length', type='int', metavar='n',
2459 default=MAX_LINE_LENGTH,
2460 help="set maximum allowed line length "
2461 "(default: %default)")
2462 parser.add_option('--max-doc-length', type='int', metavar='n',
2463 default=None,
2464 help="set maximum allowed doc line length and perform "
2465 "these checks (unchecked if not set)")
2466 parser.add_option('--indent-size', type='int', metavar='n',
2467 default=INDENT_SIZE,
2468 help="set how many spaces make up an indent "
2469 "(default: %default)")
2470 parser.add_option('--hang-closing', action='store_true',
2471 help="hang closing bracket instead of matching "
2472 "indentation of opening bracket's line")
2473 parser.add_option('--format', metavar='format', default='default',
2474 help="set the error format [default|pylint|<custom>]")
2475 parser.add_option('--diff', action='store_true',
2476 help="report changes only within line number ranges in "
2477 "the unified diff received on STDIN")
2478 group = parser.add_option_group("Testing Options")
2479 group.add_option('--benchmark', action='store_true',
2480 help="measure processing speed")
2481 return parser
2482
2483
2484 def read_config(options, args, arglist, parser):
2485 """Read and parse configurations.
2486
2487 If a config file is specified on the command line with the
2488 "--config" option, then only it is used for configuration.
2489
2490 Otherwise, the user configuration (~/.config/pycodestyle) and any
2491 local configurations in the current directory or above will be
2492 merged together (in that order) using the read method of
2493 ConfigParser.
2494 """
2495 config = configparser.RawConfigParser()
2496
2497 cli_conf = options.config
2498
2499 local_dir = os.curdir
2500
2501 if USER_CONFIG and os.path.isfile(USER_CONFIG):
2502 if options.verbose:
2503 print('user configuration: %s' % USER_CONFIG)
2504 config.read(USER_CONFIG)
2505
2506 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
2507 while tail:
2508 if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
2509 local_dir = parent
2510 if options.verbose:
2511 print('local configuration: in %s' % parent)
2512 break
2513 (parent, tail) = os.path.split(parent)
2514
2515 if cli_conf and os.path.isfile(cli_conf):
2516 if options.verbose:
2517 print('cli configuration: %s' % cli_conf)
2518 config.read(cli_conf)
2519
2520 pycodestyle_section = None
2521 if config.has_section(parser.prog):
2522 pycodestyle_section = parser.prog
2523 elif config.has_section('pep8'):
2524 pycodestyle_section = 'pep8' # Deprecated
2525 warnings.warn('[pep8] section is deprecated. Use [pycodestyle].')
2526
2527 if pycodestyle_section:
2528 option_list = {o.dest: o.type or o.action for o in parser.option_list}
2529
2530 # First, read the default values
2531 (new_options, __) = parser.parse_args([])
2532
2533 # Second, parse the configuration
2534 for opt in config.options(pycodestyle_section):
2535 if opt.replace('_', '-') not in parser.config_options:
2536 print(" unknown option '%s' ignored" % opt)
2537 continue
2538 if options.verbose > 1:
2539 print(" {} = {}".format(opt,
2540 config.get(pycodestyle_section, opt)))
2541 normalized_opt = opt.replace('-', '_')
2542 opt_type = option_list[normalized_opt]
2543 if opt_type in ('int', 'count'):
2544 value = config.getint(pycodestyle_section, opt)
2545 elif opt_type in ('store_true', 'store_false'):
2546 value = config.getboolean(pycodestyle_section, opt)
2547 else:
2548 value = config.get(pycodestyle_section, opt)
2549 if normalized_opt == 'exclude':
2550 value = normalize_paths(value, local_dir)
2551 setattr(new_options, normalized_opt, value)
2552
2553 # Third, overwrite with the command-line options
2554 (options, __) = parser.parse_args(arglist, values=new_options)
2555 return options
2556
2557
2558 def process_options(arglist=None, parse_argv=False, config_file=None,
2559 parser=None, verbose=None):
2560 """Process options passed either via arglist or command line args.
2561
2562 Passing in the ``config_file`` parameter allows other tools, such as
2563 flake8 to specify their own options to be processed in pycodestyle.
2564 """
2565 if not parser:
2566 parser = get_parser()
2567 if not parser.has_option('--config'):
2568 group = parser.add_option_group("Configuration", description=(
2569 "The project options are read from the [%s] section of the "
2570 "tox.ini file or the setup.cfg file located in any parent folder "
2571 "of the path(s) being processed. Allowed options are: %s." %
2572 (parser.prog, ', '.join(parser.config_options))))
2573 group.add_option('--config', metavar='path', default=config_file,
2574 help="user config file location")
2575 # Don't read the command line if the module is used as a library.
2576 if not arglist and not parse_argv:
2577 arglist = []
2578 # If parse_argv is True and arglist is None, arguments are
2579 # parsed from the command line (sys.argv)
2580 (options, args) = parser.parse_args(arglist)
2581 options.reporter = None
2582
2583 # If explicitly specified verbosity, override any `-v` CLI flag
2584 if verbose is not None:
2585 options.verbose = verbose
2586
2587 if parse_argv and not args:
2588 if options.diff or any(os.path.exists(name)
2589 for name in PROJECT_CONFIG):
2590 args = ['.']
2591 else:
2592 parser.error('input not specified')
2593 options = read_config(options, args, arglist, parser)
2594 options.reporter = parse_argv and options.quiet == 1 and FileReport
2595
2596 options.filename = _parse_multi_options(options.filename)
2597 options.exclude = normalize_paths(options.exclude)
2598 options.select = _parse_multi_options(options.select)
2599 options.ignore = _parse_multi_options(options.ignore)
2600
2601 if options.diff:
2602 options.reporter = DiffReport
2603 stdin = stdin_get_value()
2604 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2605 args = sorted(options.selected_lines)
2606
2607 return options, args
2608
2609
2610 def _parse_multi_options(options, split_token=','):
2611 r"""Split and strip and discard empties.
2612
2613 Turns the following:
2614
2615 A,
2616 B,
2617
2618 into ["A", "B"]
2619 """
2620 if options:
2621 return [o.strip() for o in options.split(split_token) if o.strip()]
2622 else:
2623 return options
2624
2625
2626 def _main():
2627 """Parse options and run checks on Python source."""
2628 import signal
2629
2630 # Handle "Broken pipe" gracefully
2631 try:
2632 signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2633 except AttributeError:
2634 pass # not supported on Windows
2635
2636 style_guide = StyleGuide(parse_argv=True)
2637 options = style_guide.options
2638
2639 report = style_guide.check_files()
2640
2641 if options.statistics:
2642 report.print_statistics()
2643
2644 if options.benchmark:
2645 report.print_benchmark()
2646
2647 if report.total_errors:
2648 if options.count:
2649 sys.stderr.write(str(report.total_errors) + '\n')
2650 sys.exit(1)
2651
2652
2653 if __name__ == '__main__':
2654 _main()