]>
Commit | Line | Data |
---|---|---|
1 | #!/usr/bin/env python | |
2 | # pycodestyle.py - Check Python source code formatting, according to | |
3 | # PEP 8 | |
4 | # | |
5 | # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> | |
6 | # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com> | |
7 | # Copyright (C) 2014-2016 Ian Lee <ianlee1521@gmail.com> | |
8 | # | |
9 | # Permission is hereby granted, free of charge, to any person | |
10 | # obtaining a copy of this software and associated documentation files | |
11 | # (the "Software"), to deal in the Software without restriction, | |
12 | # including without limitation the rights to use, copy, modify, merge, | |
13 | # publish, distribute, sublicense, and/or sell copies of the Software, | |
14 | # and to permit persons to whom the Software is furnished to do so, | |
15 | # subject to the following conditions: | |
16 | # | |
17 | # The above copyright notice and this permission notice shall be | |
18 | # included in all copies or substantial portions of the Software. | |
19 | # | |
20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
21 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
22 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
23 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
24 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
25 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
26 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
27 | # SOFTWARE. | |
28 | r""" | |
29 | Check Python source code formatting, according to PEP 8. | |
30 | ||
31 | For usage and a list of options, try this: | |
32 | $ python pycodestyle.py -h | |
33 | ||
34 | This program and its regression test suite live here: | |
35 | https://github.com/pycqa/pycodestyle | |
36 | ||
37 | Groups of errors and warnings: | |
38 | E errors | |
39 | W warnings | |
40 | 100 indentation | |
41 | 200 whitespace | |
42 | 300 blank lines | |
43 | 400 imports | |
44 | 500 line length | |
45 | 600 deprecation | |
46 | 700 statements | |
47 | 900 syntax error | |
48 | """ | |
49 | import bisect | |
50 | import configparser | |
51 | import inspect | |
52 | import io | |
53 | import keyword | |
54 | import os | |
55 | import re | |
56 | import sys | |
57 | import time | |
58 | import tokenize | |
59 | import warnings | |
60 | from fnmatch import fnmatch | |
61 | from functools import lru_cache | |
62 | from optparse import OptionParser | |
63 | ||
64 | # this is a performance hack. see https://bugs.python.org/issue43014 | |
65 | if ( | |
66 | sys.version_info < (3, 10) and | |
67 | callable(getattr(tokenize, '_compile', None)) | |
68 | ): # pragma: no cover (<py310) | |
69 | tokenize._compile = lru_cache(tokenize._compile) # type: ignore | |
70 | ||
71 | __version__ = '2.11.0' | |
72 | ||
73 | DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox' | |
74 | DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504' | |
75 | try: | |
76 | if sys.platform == 'win32': # pragma: win32 cover | |
77 | USER_CONFIG = os.path.expanduser(r'~\.pycodestyle') | |
78 | else: # pragma: win32 no cover | |
79 | USER_CONFIG = os.path.join( | |
80 | os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'), | |
81 | 'pycodestyle' | |
82 | ) | |
83 | except ImportError: | |
84 | USER_CONFIG = None | |
85 | ||
86 | PROJECT_CONFIG = ('setup.cfg', 'tox.ini') | |
87 | MAX_LINE_LENGTH = 79 | |
88 | # Number of blank lines between various code parts. | |
89 | BLANK_LINES_CONFIG = { | |
90 | # Top level class and function. | |
91 | 'top_level': 2, | |
92 | # Methods and nested class and function. | |
93 | 'method': 1, | |
94 | } | |
95 | MAX_DOC_LENGTH = 72 | |
96 | INDENT_SIZE = 4 | |
97 | REPORT_FORMAT = { | |
98 | 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', | |
99 | 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', | |
100 | } | |
101 | ||
102 | PyCF_ONLY_AST = 1024 | |
103 | SINGLETONS = frozenset(['False', 'None', 'True']) | |
104 | KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS | |
105 | UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) | |
106 | ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-', '@']) | |
107 | WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) | |
108 | WS_NEEDED_OPERATORS = frozenset([ | |
109 | '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<', '>', | |
110 | '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=', | |
111 | 'and', 'in', 'is', 'or', '->', ':=']) | |
112 | WHITESPACE = frozenset(' \t\xa0') | |
113 | NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE]) | |
114 | SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT]) | |
115 | # ERRORTOKEN is triggered by backticks in Python 3 | |
116 | SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN]) | |
117 | BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] | |
118 | ||
119 | INDENT_REGEX = re.compile(r'([ \t]*)') | |
120 | ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') | |
121 | DOCSTRING_REGEX = re.compile(r'u?r?["\']') | |
122 | EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({][ \t]|[ \t][\]}),;:](?!=)') | |
123 | WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') | |
124 | COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)' | |
125 | r'\s*(?(1)|(None|False|True))\b') | |
126 | COMPARE_NEGATIVE_REGEX = re.compile(r'\b(?<!is\s)(not)\s+[^][)(}{ ]+\s+' | |
127 | r'(in|is)\s') | |
128 | COMPARE_TYPE_REGEX = re.compile( | |
129 | r'[=!]=\s+type(?:\s*\(\s*([^)]*[^ )])\s*\))' | |
130 | r'|\btype(?:\s*\(\s*([^)]*[^ )])\s*\))\s+[=!]=' | |
131 | ) | |
132 | KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) | |
133 | OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)') | |
134 | LAMBDA_REGEX = re.compile(r'\blambda\b') | |
135 | HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') | |
136 | STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b') | |
137 | STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)') | |
138 | STARTSWITH_INDENT_STATEMENT_REGEX = re.compile( | |
139 | r'^\s*({})\b'.format('|'.join(s.replace(' ', r'\s+') for s in ( | |
140 | 'def', 'async def', | |
141 | 'for', 'async for', | |
142 | 'if', 'elif', 'else', | |
143 | 'try', 'except', 'finally', | |
144 | 'with', 'async with', | |
145 | 'class', | |
146 | 'while', | |
147 | ))) | |
148 | ) | |
149 | DUNDER_REGEX = re.compile(r"^__([^\s]+)__(?::\s*[a-zA-Z.0-9_\[\]\"]+)? = ") | |
150 | BLANK_EXCEPT_REGEX = re.compile(r"except\s*:") | |
151 | ||
152 | if sys.version_info >= (3, 12): # pragma: >=3.12 cover | |
153 | FSTRING_START = tokenize.FSTRING_START | |
154 | FSTRING_MIDDLE = tokenize.FSTRING_MIDDLE | |
155 | FSTRING_END = tokenize.FSTRING_END | |
156 | else: # pragma: <3.12 cover | |
157 | FSTRING_START = FSTRING_MIDDLE = FSTRING_END = -1 | |
158 | ||
159 | _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} | |
160 | ||
161 | ||
162 | def _get_parameters(function): | |
163 | return [parameter.name | |
164 | for parameter | |
165 | in inspect.signature(function).parameters.values() | |
166 | if parameter.kind == parameter.POSITIONAL_OR_KEYWORD] | |
167 | ||
168 | ||
169 | def register_check(check, codes=None): | |
170 | """Register a new check object.""" | |
171 | def _add_check(check, kind, codes, args): | |
172 | if check in _checks[kind]: | |
173 | _checks[kind][check][0].extend(codes or []) | |
174 | else: | |
175 | _checks[kind][check] = (codes or [''], args) | |
176 | if inspect.isfunction(check): | |
177 | args = _get_parameters(check) | |
178 | if args and args[0] in ('physical_line', 'logical_line'): | |
179 | if codes is None: | |
180 | codes = ERRORCODE_REGEX.findall(check.__doc__ or '') | |
181 | _add_check(check, args[0], codes, args) | |
182 | elif inspect.isclass(check): | |
183 | if _get_parameters(check.__init__)[:2] == ['self', 'tree']: | |
184 | _add_check(check, 'tree', codes, None) | |
185 | return check | |
186 | ||
187 | ||
188 | ######################################################################## | |
189 | # Plugins (check functions) for physical lines | |
190 | ######################################################################## | |
191 | ||
192 | @register_check | |
193 | def tabs_or_spaces(physical_line, indent_char): | |
194 | r"""Never mix tabs and spaces. | |
195 | ||
196 | The most popular way of indenting Python is with spaces only. The | |
197 | second-most popular way is with tabs only. Code indented with a | |
198 | mixture of tabs and spaces should be converted to using spaces | |
199 | exclusively. When invoking the Python command line interpreter with | |
200 | the -t option, it issues warnings about code that illegally mixes | |
201 | tabs and spaces. When using -tt these warnings become errors. | |
202 | These options are highly recommended! | |
203 | ||
204 | Okay: if a == 0:\n a = 1\n b = 1 | |
205 | """ | |
206 | indent = INDENT_REGEX.match(physical_line).group(1) | |
207 | for offset, char in enumerate(indent): | |
208 | if char != indent_char: | |
209 | return offset, "E101 indentation contains mixed spaces and tabs" | |
210 | ||
211 | ||
212 | @register_check | |
213 | def tabs_obsolete(physical_line): | |
214 | r"""On new projects, spaces-only are strongly recommended over tabs. | |
215 | ||
216 | Okay: if True:\n return | |
217 | W191: if True:\n\treturn | |
218 | """ | |
219 | indent = INDENT_REGEX.match(physical_line).group(1) | |
220 | if '\t' in indent: | |
221 | return indent.index('\t'), "W191 indentation contains tabs" | |
222 | ||
223 | ||
224 | @register_check | |
225 | def trailing_whitespace(physical_line): | |
226 | r"""Trailing whitespace is superfluous. | |
227 | ||
228 | The warning returned varies on whether the line itself is blank, | |
229 | for easier filtering for those who want to indent their blank lines. | |
230 | ||
231 | Okay: spam(1)\n# | |
232 | W291: spam(1) \n# | |
233 | W293: class Foo(object):\n \n bang = 12 | |
234 | """ | |
235 | physical_line = physical_line.rstrip('\n') # chr(10), newline | |
236 | physical_line = physical_line.rstrip('\r') # chr(13), carriage return | |
237 | physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L | |
238 | stripped = physical_line.rstrip(' \t\v') | |
239 | if physical_line != stripped: | |
240 | if stripped: | |
241 | return len(stripped), "W291 trailing whitespace" | |
242 | else: | |
243 | return 0, "W293 blank line contains whitespace" | |
244 | ||
245 | ||
246 | @register_check | |
247 | def trailing_blank_lines(physical_line, lines, line_number, total_lines): | |
248 | r"""Trailing blank lines are superfluous. | |
249 | ||
250 | Okay: spam(1) | |
251 | W391: spam(1)\n | |
252 | ||
253 | However the last line should end with a new line (warning W292). | |
254 | """ | |
255 | if line_number == total_lines: | |
256 | stripped_last_line = physical_line.rstrip('\r\n') | |
257 | if physical_line and not stripped_last_line: | |
258 | return 0, "W391 blank line at end of file" | |
259 | if stripped_last_line == physical_line: | |
260 | return len(lines[-1]), "W292 no newline at end of file" | |
261 | ||
262 | ||
263 | @register_check | |
264 | def maximum_line_length(physical_line, max_line_length, multiline, | |
265 | line_number, noqa): | |
266 | r"""Limit all lines to a maximum of 79 characters. | |
267 | ||
268 | There are still many devices around that are limited to 80 character | |
269 | lines; plus, limiting windows to 80 characters makes it possible to | |
270 | have several windows side-by-side. The default wrapping on such | |
271 | devices looks ugly. Therefore, please limit all lines to a maximum | |
272 | of 79 characters. For flowing long blocks of text (docstrings or | |
273 | comments), limiting the length to 72 characters is recommended. | |
274 | ||
275 | Reports error E501. | |
276 | """ | |
277 | line = physical_line.rstrip() | |
278 | length = len(line) | |
279 | if length > max_line_length and not noqa: | |
280 | # Special case: ignore long shebang lines. | |
281 | if line_number == 1 and line.startswith('#!'): | |
282 | return | |
283 | # Special case for long URLs in multi-line docstrings or | |
284 | # comments, but still report the error when the 72 first chars | |
285 | # are whitespaces. | |
286 | chunks = line.split() | |
287 | if ((len(chunks) == 1 and multiline) or | |
288 | (len(chunks) == 2 and chunks[0] == '#')) and \ | |
289 | len(line) - len(chunks[-1]) < max_line_length - 7: | |
290 | return | |
291 | if length > max_line_length: | |
292 | return (max_line_length, "E501 line too long " | |
293 | "(%d > %d characters)" % (length, max_line_length)) | |
294 | ||
295 | ||
296 | ######################################################################## | |
297 | # Plugins (check functions) for logical lines | |
298 | ######################################################################## | |
299 | ||
300 | ||
301 | def _is_one_liner(logical_line, indent_level, lines, line_number): | |
302 | if not STARTSWITH_TOP_LEVEL_REGEX.match(logical_line): | |
303 | return False | |
304 | ||
305 | line_idx = line_number - 1 | |
306 | ||
307 | if line_idx < 1: | |
308 | prev_indent = 0 | |
309 | else: | |
310 | prev_indent = expand_indent(lines[line_idx - 1]) | |
311 | ||
312 | if prev_indent > indent_level: | |
313 | return False | |
314 | ||
315 | while line_idx < len(lines): | |
316 | line = lines[line_idx].strip() | |
317 | if not line.startswith('@') and STARTSWITH_TOP_LEVEL_REGEX.match(line): | |
318 | break | |
319 | else: | |
320 | line_idx += 1 | |
321 | else: | |
322 | return False # invalid syntax: EOF while searching for def/class | |
323 | ||
324 | next_idx = line_idx + 1 | |
325 | while next_idx < len(lines): | |
326 | if lines[next_idx].strip(): | |
327 | break | |
328 | else: | |
329 | next_idx += 1 | |
330 | else: | |
331 | return True # line is last in the file | |
332 | ||
333 | return expand_indent(lines[next_idx]) <= indent_level | |
334 | ||
335 | ||
336 | @register_check | |
337 | def blank_lines(logical_line, blank_lines, indent_level, line_number, | |
338 | blank_before, previous_logical, | |
339 | previous_unindented_logical_line, previous_indent_level, | |
340 | lines): | |
341 | r"""Separate top-level function and class definitions with two blank | |
342 | lines. | |
343 | ||
344 | Method definitions inside a class are separated by a single blank | |
345 | line. | |
346 | ||
347 | Extra blank lines may be used (sparingly) to separate groups of | |
348 | related functions. Blank lines may be omitted between a bunch of | |
349 | related one-liners (e.g. a set of dummy implementations). | |
350 | ||
351 | Use blank lines in functions, sparingly, to indicate logical | |
352 | sections. | |
353 | ||
354 | Okay: def a():\n pass\n\n\ndef b():\n pass | |
355 | Okay: def a():\n pass\n\n\nasync def b():\n pass | |
356 | Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass | |
357 | Okay: default = 1\nfoo = 1 | |
358 | Okay: classify = 1\nfoo = 1 | |
359 | ||
360 | E301: class Foo:\n b = 0\n def bar():\n pass | |
361 | E302: def a():\n pass\n\ndef b(n):\n pass | |
362 | E302: def a():\n pass\n\nasync def b(n):\n pass | |
363 | E303: def a():\n pass\n\n\n\ndef b(n):\n pass | |
364 | E303: def a():\n\n\n\n pass | |
365 | E304: @decorator\n\ndef a():\n pass | |
366 | E305: def a():\n pass\na() | |
367 | E306: def a():\n def b():\n pass\n def c():\n pass | |
368 | """ # noqa | |
369 | top_level_lines = BLANK_LINES_CONFIG['top_level'] | |
370 | method_lines = BLANK_LINES_CONFIG['method'] | |
371 | ||
372 | if not previous_logical and blank_before < top_level_lines: | |
373 | return # Don't expect blank lines before the first line | |
374 | if previous_logical.startswith('@'): | |
375 | if blank_lines: | |
376 | yield 0, "E304 blank lines found after function decorator" | |
377 | elif (blank_lines > top_level_lines or | |
378 | (indent_level and blank_lines == method_lines + 1) | |
379 | ): | |
380 | yield 0, "E303 too many blank lines (%d)" % blank_lines | |
381 | elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line): | |
382 | # allow a group of one-liners | |
383 | if ( | |
384 | _is_one_liner(logical_line, indent_level, lines, line_number) and | |
385 | blank_before == 0 | |
386 | ): | |
387 | return | |
388 | if indent_level: | |
389 | if not (blank_before == method_lines or | |
390 | previous_indent_level < indent_level or | |
391 | DOCSTRING_REGEX.match(previous_logical) | |
392 | ): | |
393 | ancestor_level = indent_level | |
394 | nested = False | |
395 | # Search backwards for a def ancestor or tree root | |
396 | # (top level). | |
397 | for line in lines[line_number - top_level_lines::-1]: | |
398 | if line.strip() and expand_indent(line) < ancestor_level: | |
399 | ancestor_level = expand_indent(line) | |
400 | nested = STARTSWITH_DEF_REGEX.match(line.lstrip()) | |
401 | if nested or ancestor_level == 0: | |
402 | break | |
403 | if nested: | |
404 | yield 0, "E306 expected %s blank line before a " \ | |
405 | "nested definition, found 0" % (method_lines,) | |
406 | else: | |
407 | yield 0, "E301 expected {} blank line, found 0".format( | |
408 | method_lines) | |
409 | elif blank_before != top_level_lines: | |
410 | yield 0, "E302 expected %s blank lines, found %d" % ( | |
411 | top_level_lines, blank_before) | |
412 | elif (logical_line and | |
413 | not indent_level and | |
414 | blank_before != top_level_lines and | |
415 | previous_unindented_logical_line.startswith(('def ', 'class ')) | |
416 | ): | |
417 | yield 0, "E305 expected %s blank lines after " \ | |
418 | "class or function definition, found %d" % ( | |
419 | top_level_lines, blank_before) | |
420 | ||
421 | ||
422 | @register_check | |
423 | def extraneous_whitespace(logical_line): | |
424 | r"""Avoid extraneous whitespace. | |
425 | ||
426 | Avoid extraneous whitespace in these situations: | |
427 | - Immediately inside parentheses, brackets or braces. | |
428 | - Immediately before a comma, semicolon, or colon. | |
429 | ||
430 | Okay: spam(ham[1], {eggs: 2}) | |
431 | E201: spam( ham[1], {eggs: 2}) | |
432 | E201: spam(ham[ 1], {eggs: 2}) | |
433 | E201: spam(ham[1], { eggs: 2}) | |
434 | E202: spam(ham[1], {eggs: 2} ) | |
435 | E202: spam(ham[1 ], {eggs: 2}) | |
436 | E202: spam(ham[1], {eggs: 2 }) | |
437 | ||
438 | E203: if x == 4: print x, y; x, y = y , x | |
439 | E203: if x == 4: print x, y ; x, y = y, x | |
440 | E203: if x == 4 : print x, y; x, y = y, x | |
441 | """ | |
442 | line = logical_line | |
443 | for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): | |
444 | text = match.group() | |
445 | char = text.strip() | |
446 | found = match.start() | |
447 | if text[-1].isspace(): | |
448 | # assert char in '([{' | |
449 | yield found + 1, "E201 whitespace after '%s'" % char | |
450 | elif line[found - 1] != ',': | |
451 | code = ('E202' if char in '}])' else 'E203') # if char in ',;:' | |
452 | yield found, f"{code} whitespace before '{char}'" | |
453 | ||
454 | ||
455 | @register_check | |
456 | def whitespace_around_keywords(logical_line): | |
457 | r"""Avoid extraneous whitespace around keywords. | |
458 | ||
459 | Okay: True and False | |
460 | E271: True and False | |
461 | E272: True and False | |
462 | E273: True and\tFalse | |
463 | E274: True\tand False | |
464 | """ | |
465 | for match in KEYWORD_REGEX.finditer(logical_line): | |
466 | before, after = match.groups() | |
467 | ||
468 | if '\t' in before: | |
469 | yield match.start(1), "E274 tab before keyword" | |
470 | elif len(before) > 1: | |
471 | yield match.start(1), "E272 multiple spaces before keyword" | |
472 | ||
473 | if '\t' in after: | |
474 | yield match.start(2), "E273 tab after keyword" | |
475 | elif len(after) > 1: | |
476 | yield match.start(2), "E271 multiple spaces after keyword" | |
477 | ||
478 | ||
479 | @register_check | |
480 | def missing_whitespace_after_keyword(logical_line, tokens): | |
481 | r"""Keywords should be followed by whitespace. | |
482 | ||
483 | Okay: from foo import (bar, baz) | |
484 | E275: from foo import(bar, baz) | |
485 | E275: from importable.module import(bar, baz) | |
486 | E275: if(foo): bar | |
487 | """ | |
488 | for tok0, tok1 in zip(tokens, tokens[1:]): | |
489 | # This must exclude the True/False/None singletons, which can | |
490 | # appear e.g. as "if x is None:", and async/await, which were | |
491 | # valid identifier names in old Python versions. | |
492 | if (tok0.end == tok1.start and | |
493 | keyword.iskeyword(tok0.string) and | |
494 | tok0.string not in SINGLETONS and | |
495 | not (tok0.string == 'except' and tok1.string == '*') and | |
496 | not (tok0.string == 'yield' and tok1.string == ')') and | |
497 | tok1.string not in ':\n'): | |
498 | yield tok0.end, "E275 missing whitespace after keyword" | |
499 | ||
500 | ||
501 | @register_check | |
502 | def indentation(logical_line, previous_logical, indent_char, | |
503 | indent_level, previous_indent_level, | |
504 | indent_size): | |
505 | r"""Use indent_size (PEP8 says 4) spaces per indentation level. | |
506 | ||
507 | For really old code that you don't want to mess up, you can continue | |
508 | to use 8-space tabs. | |
509 | ||
510 | Okay: a = 1 | |
511 | Okay: if a == 0:\n a = 1 | |
512 | E111: a = 1 | |
513 | E114: # a = 1 | |
514 | ||
515 | Okay: for item in items:\n pass | |
516 | E112: for item in items:\npass | |
517 | E115: for item in items:\n# Hi\n pass | |
518 | ||
519 | Okay: a = 1\nb = 2 | |
520 | E113: a = 1\n b = 2 | |
521 | E116: a = 1\n # b = 2 | |
522 | """ | |
523 | c = 0 if logical_line else 3 | |
524 | tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)" | |
525 | if indent_level % indent_size: | |
526 | yield 0, tmpl % ( | |
527 | 1 + c, | |
528 | "indentation is not a multiple of " + str(indent_size), | |
529 | ) | |
530 | indent_expect = previous_logical.endswith(':') | |
531 | if indent_expect and indent_level <= previous_indent_level: | |
532 | yield 0, tmpl % (2 + c, "expected an indented block") | |
533 | elif not indent_expect and indent_level > previous_indent_level: | |
534 | yield 0, tmpl % (3 + c, "unexpected indentation") | |
535 | ||
536 | if indent_expect: | |
537 | expected_indent_amount = 8 if indent_char == '\t' else 4 | |
538 | expected_indent_level = previous_indent_level + expected_indent_amount | |
539 | if indent_level > expected_indent_level: | |
540 | yield 0, tmpl % (7, 'over-indented') | |
541 | ||
542 | ||
543 | @register_check | |
544 | def continued_indentation(logical_line, tokens, indent_level, hang_closing, | |
545 | indent_char, indent_size, noqa, verbose): | |
546 | r"""Continuation lines indentation. | |
547 | ||
548 | Continuation lines should align wrapped elements either vertically | |
549 | using Python's implicit line joining inside parentheses, brackets | |
550 | and braces, or using a hanging indent. | |
551 | ||
552 | When using a hanging indent these considerations should be applied: | |
553 | - there should be no arguments on the first line, and | |
554 | - further indentation should be used to clearly distinguish itself | |
555 | as a continuation line. | |
556 | ||
557 | Okay: a = (\n) | |
558 | E123: a = (\n ) | |
559 | ||
560 | Okay: a = (\n 42) | |
561 | E121: a = (\n 42) | |
562 | E122: a = (\n42) | |
563 | E123: a = (\n 42\n ) | |
564 | E124: a = (24,\n 42\n) | |
565 | E125: if (\n b):\n pass | |
566 | E126: a = (\n 42) | |
567 | E127: a = (24,\n 42) | |
568 | E128: a = (24,\n 42) | |
569 | E129: if (a or\n b):\n pass | |
570 | E131: a = (\n 42\n 24) | |
571 | """ | |
572 | first_row = tokens[0][2][0] | |
573 | nrows = 1 + tokens[-1][2][0] - first_row | |
574 | if noqa or nrows == 1: | |
575 | return | |
576 | ||
577 | # indent_next tells us whether the next block is indented; assuming | |
578 | # that it is indented by 4 spaces, then we should not allow 4-space | |
579 | # indents on the final continuation line; in turn, some other | |
580 | # indents are allowed to have an extra 4 spaces. | |
581 | indent_next = logical_line.endswith(':') | |
582 | ||
583 | row = depth = 0 | |
584 | valid_hangs = (indent_size,) if indent_char != '\t' \ | |
585 | else (indent_size, indent_size * 2) | |
586 | # remember how many brackets were opened on each line | |
587 | parens = [0] * nrows | |
588 | # relative indents of physical lines | |
589 | rel_indent = [0] * nrows | |
590 | # for each depth, collect a list of opening rows | |
591 | open_rows = [[0]] | |
592 | # for each depth, memorize the hanging indentation | |
593 | hangs = [None] | |
594 | # visual indents | |
595 | indent_chances = {} | |
596 | last_indent = tokens[0][2] | |
597 | visual_indent = None | |
598 | last_token_multiline = False | |
599 | # for each depth, memorize the visual indent column | |
600 | indent = [last_indent[1]] | |
601 | if verbose >= 3: | |
602 | print(">>> " + tokens[0][4].rstrip()) | |
603 | ||
604 | for token_type, text, start, end, line in tokens: | |
605 | ||
606 | newline = row < start[0] - first_row | |
607 | if newline: | |
608 | row = start[0] - first_row | |
609 | newline = not last_token_multiline and token_type not in NEWLINE | |
610 | ||
611 | if newline: | |
612 | # this is the beginning of a continuation line. | |
613 | last_indent = start | |
614 | if verbose >= 3: | |
615 | print("... " + line.rstrip()) | |
616 | ||
617 | # record the initial indent. | |
618 | rel_indent[row] = expand_indent(line) - indent_level | |
619 | ||
620 | # identify closing bracket | |
621 | close_bracket = (token_type == tokenize.OP and text in ']})') | |
622 | ||
623 | # is the indent relative to an opening bracket line? | |
624 | for open_row in reversed(open_rows[depth]): | |
625 | hang = rel_indent[row] - rel_indent[open_row] | |
626 | hanging_indent = hang in valid_hangs | |
627 | if hanging_indent: | |
628 | break | |
629 | if hangs[depth]: | |
630 | hanging_indent = (hang == hangs[depth]) | |
631 | # is there any chance of visual indent? | |
632 | visual_indent = (not close_bracket and hang > 0 and | |
633 | indent_chances.get(start[1])) | |
634 | ||
635 | if close_bracket and indent[depth]: | |
636 | # closing bracket for visual indent | |
637 | if start[1] != indent[depth]: | |
638 | yield (start, "E124 closing bracket does not match " | |
639 | "visual indentation") | |
640 | elif close_bracket and not hang: | |
641 | # closing bracket matches indentation of opening | |
642 | # bracket's line | |
643 | if hang_closing: | |
644 | yield start, "E133 closing bracket is missing indentation" | |
645 | elif indent[depth] and start[1] < indent[depth]: | |
646 | if visual_indent is not True: | |
647 | # visual indent is broken | |
648 | yield (start, "E128 continuation line " | |
649 | "under-indented for visual indent") | |
650 | elif hanging_indent or (indent_next and | |
651 | rel_indent[row] == 2 * indent_size): | |
652 | # hanging indent is verified | |
653 | if close_bracket and not hang_closing: | |
654 | yield (start, "E123 closing bracket does not match " | |
655 | "indentation of opening bracket's line") | |
656 | hangs[depth] = hang | |
657 | elif visual_indent is True: | |
658 | # visual indent is verified | |
659 | indent[depth] = start[1] | |
660 | elif visual_indent in (text, str): | |
661 | # ignore token lined up with matching one from a | |
662 | # previous line | |
663 | pass | |
664 | else: | |
665 | # indent is broken | |
666 | if hang <= 0: | |
667 | error = "E122", "missing indentation or outdented" | |
668 | elif indent[depth]: | |
669 | error = "E127", "over-indented for visual indent" | |
670 | elif not close_bracket and hangs[depth]: | |
671 | error = "E131", "unaligned for hanging indent" | |
672 | else: | |
673 | hangs[depth] = hang | |
674 | if hang > indent_size: | |
675 | error = "E126", "over-indented for hanging indent" | |
676 | else: | |
677 | error = "E121", "under-indented for hanging indent" | |
678 | yield start, "%s continuation line %s" % error | |
679 | ||
680 | # look for visual indenting | |
681 | if (parens[row] and | |
682 | token_type not in (tokenize.NL, tokenize.COMMENT) and | |
683 | not indent[depth]): | |
684 | indent[depth] = start[1] | |
685 | indent_chances[start[1]] = True | |
686 | if verbose >= 4: | |
687 | print(f"bracket depth {depth} indent to {start[1]}") | |
688 | # deal with implicit string concatenation | |
689 | elif token_type in (tokenize.STRING, tokenize.COMMENT, FSTRING_START): | |
690 | indent_chances[start[1]] = str | |
691 | # visual indent after assert/raise/with | |
692 | elif not row and not depth and text in ["assert", "raise", "with"]: | |
693 | indent_chances[end[1] + 1] = True | |
694 | # special case for the "if" statement because len("if (") == 4 | |
695 | elif not indent_chances and not row and not depth and text == 'if': | |
696 | indent_chances[end[1] + 1] = True | |
697 | elif text == ':' and line[end[1]:].isspace(): | |
698 | open_rows[depth].append(row) | |
699 | ||
700 | # keep track of bracket depth | |
701 | if token_type == tokenize.OP: | |
702 | if text in '([{': | |
703 | depth += 1 | |
704 | indent.append(0) | |
705 | hangs.append(None) | |
706 | if len(open_rows) == depth: | |
707 | open_rows.append([]) | |
708 | open_rows[depth].append(row) | |
709 | parens[row] += 1 | |
710 | if verbose >= 4: | |
711 | print("bracket depth %s seen, col %s, visual min = %s" % | |
712 | (depth, start[1], indent[depth])) | |
713 | elif text in ')]}' and depth > 0: | |
714 | # parent indents should not be more than this one | |
715 | prev_indent = indent.pop() or last_indent[1] | |
716 | hangs.pop() | |
717 | for d in range(depth): | |
718 | if indent[d] > prev_indent: | |
719 | indent[d] = 0 | |
720 | for ind in list(indent_chances): | |
721 | if ind >= prev_indent: | |
722 | del indent_chances[ind] | |
723 | del open_rows[depth + 1:] | |
724 | depth -= 1 | |
725 | if depth: | |
726 | indent_chances[indent[depth]] = True | |
727 | for idx in range(row, -1, -1): | |
728 | if parens[idx]: | |
729 | parens[idx] -= 1 | |
730 | break | |
731 | assert len(indent) == depth + 1 | |
732 | if start[1] not in indent_chances: | |
733 | # allow lining up tokens | |
734 | indent_chances[start[1]] = text | |
735 | ||
736 | last_token_multiline = (start[0] != end[0]) | |
737 | if last_token_multiline: | |
738 | rel_indent[end[0] - first_row] = rel_indent[row] | |
739 | ||
740 | if indent_next and expand_indent(line) == indent_level + indent_size: | |
741 | pos = (start[0], indent[0] + indent_size) | |
742 | if visual_indent: | |
743 | code = "E129 visually indented line" | |
744 | else: | |
745 | code = "E125 continuation line" | |
746 | yield pos, "%s with same indent as next logical line" % code | |
747 | ||
748 | ||
749 | @register_check | |
750 | def whitespace_before_parameters(logical_line, tokens): | |
751 | r"""Avoid extraneous whitespace. | |
752 | ||
753 | Avoid extraneous whitespace in the following situations: | |
754 | - before the open parenthesis that starts the argument list of a | |
755 | function call. | |
756 | - before the open parenthesis that starts an indexing or slicing. | |
757 | ||
758 | Okay: spam(1) | |
759 | E211: spam (1) | |
760 | ||
761 | Okay: dict['key'] = list[index] | |
762 | E211: dict ['key'] = list[index] | |
763 | E211: dict['key'] = list [index] | |
764 | """ | |
765 | prev_type, prev_text, __, prev_end, __ = tokens[0] | |
766 | for index in range(1, len(tokens)): | |
767 | token_type, text, start, end, __ = tokens[index] | |
768 | if ( | |
769 | token_type == tokenize.OP and | |
770 | text in '([' and | |
771 | start != prev_end and | |
772 | (prev_type == tokenize.NAME or prev_text in '}])') and | |
773 | # Syntax "class A (B):" is allowed, but avoid it | |
774 | (index < 2 or tokens[index - 2][1] != 'class') and | |
775 | # Allow "return (a.foo for a in range(5))" | |
776 | not keyword.iskeyword(prev_text) and | |
777 | ( | |
778 | sys.version_info < (3, 9) or | |
779 | # 3.12+: type is a soft keyword but no braces after | |
780 | prev_text == 'type' or | |
781 | not keyword.issoftkeyword(prev_text) | |
782 | ) | |
783 | ): | |
784 | yield prev_end, "E211 whitespace before '%s'" % text | |
785 | prev_type = token_type | |
786 | prev_text = text | |
787 | prev_end = end | |
788 | ||
789 | ||
790 | @register_check | |
791 | def whitespace_around_operator(logical_line): | |
792 | r"""Avoid extraneous whitespace around an operator. | |
793 | ||
794 | Okay: a = 12 + 3 | |
795 | E221: a = 4 + 5 | |
796 | E222: a = 4 + 5 | |
797 | E223: a = 4\t+ 5 | |
798 | E224: a = 4 +\t5 | |
799 | """ | |
800 | for match in OPERATOR_REGEX.finditer(logical_line): | |
801 | before, after = match.groups() | |
802 | ||
803 | if '\t' in before: | |
804 | yield match.start(1), "E223 tab before operator" | |
805 | elif len(before) > 1: | |
806 | yield match.start(1), "E221 multiple spaces before operator" | |
807 | ||
808 | if '\t' in after: | |
809 | yield match.start(2), "E224 tab after operator" | |
810 | elif len(after) > 1: | |
811 | yield match.start(2), "E222 multiple spaces after operator" | |
812 | ||
813 | ||
814 | @register_check | |
815 | def missing_whitespace(logical_line, tokens): | |
816 | r"""Surround operators with the correct amount of whitespace. | |
817 | ||
818 | - Always surround these binary operators with a single space on | |
819 | either side: assignment (=), augmented assignment (+=, -= etc.), | |
820 | comparisons (==, <, >, !=, <=, >=, in, not in, is, is not), | |
821 | Booleans (and, or, not). | |
822 | ||
823 | - Each comma, semicolon or colon should be followed by whitespace. | |
824 | ||
825 | - If operators with different priorities are used, consider adding | |
826 | whitespace around the operators with the lowest priorities. | |
827 | ||
828 | Okay: i = i + 1 | |
829 | Okay: submitted += 1 | |
830 | Okay: x = x * 2 - 1 | |
831 | Okay: hypot2 = x * x + y * y | |
832 | Okay: c = (a + b) * (a - b) | |
833 | Okay: foo(bar, key='word', *args, **kwargs) | |
834 | Okay: alpha[:-i] | |
835 | Okay: [a, b] | |
836 | Okay: (3,) | |
837 | Okay: a[3,] = 1 | |
838 | Okay: a[1:4] | |
839 | Okay: a[:4] | |
840 | Okay: a[1:] | |
841 | Okay: a[1:4:2] | |
842 | ||
843 | E225: i=i+1 | |
844 | E225: submitted +=1 | |
845 | E225: x = x /2 - 1 | |
846 | E225: z = x **y | |
847 | E225: z = 1and 1 | |
848 | E226: c = (a+b) * (a-b) | |
849 | E226: hypot2 = x*x + y*y | |
850 | E227: c = a|b | |
851 | E228: msg = fmt%(errno, errmsg) | |
852 | E231: ['a','b'] | |
853 | E231: foo(bar,baz) | |
854 | E231: [{'a':'b'}] | |
855 | """ | |
856 | need_space = False | |
857 | prev_type = tokenize.OP | |
858 | prev_text = prev_end = None | |
859 | operator_types = (tokenize.OP, tokenize.NAME) | |
860 | brace_stack = [] | |
861 | for token_type, text, start, end, line in tokens: | |
862 | if token_type == tokenize.OP and text in {'[', '(', '{'}: | |
863 | brace_stack.append(text) | |
864 | elif token_type == FSTRING_START: # pragma: >=3.12 cover | |
865 | brace_stack.append('f') | |
866 | elif token_type == tokenize.NAME and text == 'lambda': | |
867 | brace_stack.append('l') | |
868 | elif brace_stack: | |
869 | if token_type == tokenize.OP and text in {']', ')', '}'}: | |
870 | brace_stack.pop() | |
871 | elif token_type == FSTRING_END: # pragma: >=3.12 cover | |
872 | brace_stack.pop() | |
873 | elif ( | |
874 | brace_stack[-1] == 'l' and | |
875 | token_type == tokenize.OP and | |
876 | text == ':' | |
877 | ): | |
878 | brace_stack.pop() | |
879 | ||
880 | if token_type in SKIP_COMMENTS: | |
881 | continue | |
882 | ||
883 | if token_type == tokenize.OP and text in {',', ';', ':'}: | |
884 | next_char = line[end[1]:end[1] + 1] | |
885 | if next_char not in WHITESPACE and next_char not in '\r\n': | |
886 | # slice | |
887 | if text == ':' and brace_stack[-1:] == ['[']: | |
888 | pass | |
889 | # 3.12+ fstring format specifier | |
890 | elif text == ':' and brace_stack[-2:] == ['f', '{']: # pragma: >=3.12 cover # noqa: E501 | |
891 | pass | |
892 | # tuple (and list for some reason?) | |
893 | elif text == ',' and next_char in ')]': | |
894 | pass | |
895 | else: | |
896 | yield start, f'E231 missing whitespace after {text!r}' | |
897 | ||
898 | if need_space: | |
899 | if start != prev_end: | |
900 | # Found a (probably) needed space | |
901 | if need_space is not True and not need_space[1]: | |
902 | yield (need_space[0], | |
903 | "E225 missing whitespace around operator") | |
904 | need_space = False | |
905 | elif ( | |
906 | # def f(a, /, b): | |
907 | # ^ | |
908 | # def f(a, b, /): | |
909 | # ^ | |
910 | # f = lambda a, /: | |
911 | # ^ | |
912 | prev_text == '/' and text in {',', ')', ':'} or | |
913 | # def f(a, b, /): | |
914 | # ^ | |
915 | prev_text == ')' and text == ':' | |
916 | ): | |
917 | # Tolerate the "/" operator in function definition | |
918 | # For more info see PEP570 | |
919 | pass | |
920 | else: | |
921 | if need_space is True or need_space[1]: | |
922 | # A needed trailing space was not found | |
923 | yield prev_end, "E225 missing whitespace around operator" | |
924 | elif prev_text != '**': | |
925 | code, optype = 'E226', 'arithmetic' | |
926 | if prev_text == '%': | |
927 | code, optype = 'E228', 'modulo' | |
928 | elif prev_text not in ARITHMETIC_OP: | |
929 | code, optype = 'E227', 'bitwise or shift' | |
930 | yield (need_space[0], "%s missing whitespace " | |
931 | "around %s operator" % (code, optype)) | |
932 | need_space = False | |
933 | elif token_type in operator_types and prev_end is not None: | |
934 | if ( | |
935 | text == '=' and ( | |
936 | # allow lambda default args: lambda x=None: None | |
937 | brace_stack[-1:] == ['l'] or | |
938 | # allow keyword args or defaults: foo(bar=None). | |
939 | brace_stack[-1:] == ['('] or | |
940 | # allow python 3.8 fstring repr specifier | |
941 | brace_stack[-2:] == ['f', '{'] | |
942 | ) | |
943 | ): | |
944 | pass | |
945 | elif text in WS_NEEDED_OPERATORS: | |
946 | need_space = True | |
947 | elif text in UNARY_OPERATORS: | |
948 | # Check if the operator is used as a binary operator | |
949 | # Allow unary operators: -123, -x, +1. | |
950 | # Allow argument unpacking: foo(*args, **kwargs). | |
951 | if prev_type == tokenize.OP and prev_text in '}])' or ( | |
952 | prev_type != tokenize.OP and | |
953 | prev_text not in KEYWORDS and ( | |
954 | sys.version_info < (3, 9) or | |
955 | not keyword.issoftkeyword(prev_text) | |
956 | ) | |
957 | ): | |
958 | need_space = None | |
959 | elif text in WS_OPTIONAL_OPERATORS: | |
960 | need_space = None | |
961 | ||
962 | if need_space is None: | |
963 | # Surrounding space is optional, but ensure that | |
964 | # trailing space matches opening space | |
965 | need_space = (prev_end, start != prev_end) | |
966 | elif need_space and start == prev_end: | |
967 | # A needed opening space was not found | |
968 | yield prev_end, "E225 missing whitespace around operator" | |
969 | need_space = False | |
970 | prev_type = token_type | |
971 | prev_text = text | |
972 | prev_end = end | |
973 | ||
974 | ||
975 | @register_check | |
976 | def whitespace_around_comma(logical_line): | |
977 | r"""Avoid extraneous whitespace after a comma or a colon. | |
978 | ||
979 | Note: these checks are disabled by default | |
980 | ||
981 | Okay: a = (1, 2) | |
982 | E241: a = (1, 2) | |
983 | E242: a = (1,\t2) | |
984 | """ | |
985 | line = logical_line | |
986 | for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): | |
987 | found = m.start() + 1 | |
988 | if '\t' in m.group(): | |
989 | yield found, "E242 tab after '%s'" % m.group()[0] | |
990 | else: | |
991 | yield found, "E241 multiple spaces after '%s'" % m.group()[0] | |
992 | ||
993 | ||
994 | @register_check | |
995 | def whitespace_around_named_parameter_equals(logical_line, tokens): | |
996 | r"""Don't use spaces around the '=' sign in function arguments. | |
997 | ||
998 | Don't use spaces around the '=' sign when used to indicate a | |
999 | keyword argument or a default parameter value, except when | |
1000 | using a type annotation. | |
1001 | ||
1002 | Okay: def complex(real, imag=0.0): | |
1003 | Okay: return magic(r=real, i=imag) | |
1004 | Okay: boolean(a == b) | |
1005 | Okay: boolean(a != b) | |
1006 | Okay: boolean(a <= b) | |
1007 | Okay: boolean(a >= b) | |
1008 | Okay: def foo(arg: int = 42): | |
1009 | Okay: async def foo(arg: int = 42): | |
1010 | ||
1011 | E251: def complex(real, imag = 0.0): | |
1012 | E251: return magic(r = real, i = imag) | |
1013 | E252: def complex(real, image: float=0.0): | |
1014 | """ | |
1015 | parens = 0 | |
1016 | no_space = False | |
1017 | require_space = False | |
1018 | prev_end = None | |
1019 | annotated_func_arg = False | |
1020 | in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line)) | |
1021 | ||
1022 | message = "E251 unexpected spaces around keyword / parameter equals" | |
1023 | missing_message = "E252 missing whitespace around parameter equals" | |
1024 | ||
1025 | for token_type, text, start, end, line in tokens: | |
1026 | if token_type == tokenize.NL: | |
1027 | continue | |
1028 | if no_space: | |
1029 | no_space = False | |
1030 | if start != prev_end: | |
1031 | yield (prev_end, message) | |
1032 | if require_space: | |
1033 | require_space = False | |
1034 | if start == prev_end: | |
1035 | yield (prev_end, missing_message) | |
1036 | if token_type == tokenize.OP: | |
1037 | if text in '([': | |
1038 | parens += 1 | |
1039 | elif text in ')]': | |
1040 | parens -= 1 | |
1041 | elif in_def and text == ':' and parens == 1: | |
1042 | annotated_func_arg = True | |
1043 | elif parens == 1 and text == ',': | |
1044 | annotated_func_arg = False | |
1045 | elif parens and text == '=': | |
1046 | if annotated_func_arg and parens == 1: | |
1047 | require_space = True | |
1048 | if start == prev_end: | |
1049 | yield (prev_end, missing_message) | |
1050 | else: | |
1051 | no_space = True | |
1052 | if start != prev_end: | |
1053 | yield (prev_end, message) | |
1054 | if not parens: | |
1055 | annotated_func_arg = False | |
1056 | ||
1057 | prev_end = end | |
1058 | ||
1059 | ||
1060 | @register_check | |
1061 | def whitespace_before_comment(logical_line, tokens): | |
1062 | """Separate inline comments by at least two spaces. | |
1063 | ||
1064 | An inline comment is a comment on the same line as a statement. | |
1065 | Inline comments should be separated by at least two spaces from the | |
1066 | statement. They should start with a # and a single space. | |
1067 | ||
1068 | Each line of a block comment starts with a # and one or multiple | |
1069 | spaces as there can be indented text inside the comment. | |
1070 | ||
1071 | Okay: x = x + 1 # Increment x | |
1072 | Okay: x = x + 1 # Increment x | |
1073 | Okay: # Block comments: | |
1074 | Okay: # - Block comment list | |
1075 | Okay: # \xa0- Block comment list | |
1076 | E261: x = x + 1 # Increment x | |
1077 | E262: x = x + 1 #Increment x | |
1078 | E262: x = x + 1 # Increment x | |
1079 | E262: x = x + 1 # \xa0Increment x | |
1080 | E265: #Block comment | |
1081 | E266: ### Block comment | |
1082 | """ | |
1083 | prev_end = (0, 0) | |
1084 | for token_type, text, start, end, line in tokens: | |
1085 | if token_type == tokenize.COMMENT: | |
1086 | inline_comment = line[:start[1]].strip() | |
1087 | if inline_comment: | |
1088 | if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: | |
1089 | yield (prev_end, | |
1090 | "E261 at least two spaces before inline comment") | |
1091 | symbol, sp, comment = text.partition(' ') | |
1092 | bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#') | |
1093 | if inline_comment: | |
1094 | if bad_prefix or comment[:1] in WHITESPACE: | |
1095 | yield start, "E262 inline comment should start with '# '" | |
1096 | elif bad_prefix and (bad_prefix != '!' or start[0] > 1): | |
1097 | if bad_prefix != '#': | |
1098 | yield start, "E265 block comment should start with '# '" | |
1099 | elif comment: | |
1100 | yield start, "E266 too many leading '#' for block comment" | |
1101 | elif token_type != tokenize.NL: | |
1102 | prev_end = end | |
1103 | ||
1104 | ||
1105 | @register_check | |
1106 | def imports_on_separate_lines(logical_line): | |
1107 | r"""Place imports on separate lines. | |
1108 | ||
1109 | Okay: import os\nimport sys | |
1110 | E401: import sys, os | |
1111 | ||
1112 | Okay: from subprocess import Popen, PIPE | |
1113 | Okay: from myclas import MyClass | |
1114 | Okay: from foo.bar.yourclass import YourClass | |
1115 | Okay: import myclass | |
1116 | Okay: import foo.bar.yourclass | |
1117 | """ | |
1118 | line = logical_line | |
1119 | if line.startswith('import '): | |
1120 | found = line.find(',') | |
1121 | if -1 < found and ';' not in line[:found]: | |
1122 | yield found, "E401 multiple imports on one line" | |
1123 | ||
1124 | ||
1125 | @register_check | |
1126 | def module_imports_on_top_of_file( | |
1127 | logical_line, indent_level, checker_state, noqa): | |
1128 | r"""Place imports at the top of the file. | |
1129 | ||
1130 | Always put imports at the top of the file, just after any module | |
1131 | comments and docstrings, and before module globals and constants. | |
1132 | ||
1133 | Okay: import os | |
1134 | Okay: # this is a comment\nimport os | |
1135 | Okay: '''this is a module docstring'''\nimport os | |
1136 | Okay: r'''this is a module docstring'''\nimport os | |
1137 | E402: a=1\nimport os | |
1138 | E402: 'One string'\n"Two string"\nimport os | |
1139 | E402: a=1\nfrom sys import x | |
1140 | ||
1141 | Okay: if x:\n import os | |
1142 | """ # noqa | |
1143 | def is_string_literal(line): | |
1144 | if line[0] in 'uUbB': | |
1145 | line = line[1:] | |
1146 | if line and line[0] in 'rR': | |
1147 | line = line[1:] | |
1148 | return line and (line[0] == '"' or line[0] == "'") | |
1149 | ||
1150 | allowed_keywords = ( | |
1151 | 'try', 'except', 'else', 'finally', 'with', 'if', 'elif') | |
1152 | ||
1153 | if indent_level: # Allow imports in conditional statement/function | |
1154 | return | |
1155 | if not logical_line: # Allow empty lines or comments | |
1156 | return | |
1157 | if noqa: | |
1158 | return | |
1159 | line = logical_line | |
1160 | if line.startswith('import ') or line.startswith('from '): | |
1161 | if checker_state.get('seen_non_imports', False): | |
1162 | yield 0, "E402 module level import not at top of file" | |
1163 | elif re.match(DUNDER_REGEX, line): | |
1164 | return | |
1165 | elif any(line.startswith(kw) for kw in allowed_keywords): | |
1166 | # Allow certain keywords intermixed with imports in order to | |
1167 | # support conditional or filtered importing | |
1168 | return | |
1169 | elif is_string_literal(line): | |
1170 | # The first literal is a docstring, allow it. Otherwise, report | |
1171 | # error. | |
1172 | if checker_state.get('seen_docstring', False): | |
1173 | checker_state['seen_non_imports'] = True | |
1174 | else: | |
1175 | checker_state['seen_docstring'] = True | |
1176 | else: | |
1177 | checker_state['seen_non_imports'] = True | |
1178 | ||
1179 | ||
1180 | @register_check | |
1181 | def compound_statements(logical_line): | |
1182 | r"""Compound statements (on the same line) are generally | |
1183 | discouraged. | |
1184 | ||
1185 | While sometimes it's okay to put an if/for/while with a small body | |
1186 | on the same line, never do this for multi-clause statements. | |
1187 | Also avoid folding such long lines! | |
1188 | ||
1189 | Always use a def statement instead of an assignment statement that | |
1190 | binds a lambda expression directly to a name. | |
1191 | ||
1192 | Okay: if foo == 'blah':\n do_blah_thing() | |
1193 | Okay: do_one() | |
1194 | Okay: do_two() | |
1195 | Okay: do_three() | |
1196 | ||
1197 | E701: if foo == 'blah': do_blah_thing() | |
1198 | E701: for x in lst: total += x | |
1199 | E701: while t < 10: t = delay() | |
1200 | E701: if foo == 'blah': do_blah_thing() | |
1201 | E701: else: do_non_blah_thing() | |
1202 | E701: try: something() | |
1203 | E701: finally: cleanup() | |
1204 | E701: if foo == 'blah': one(); two(); three() | |
1205 | E702: do_one(); do_two(); do_three() | |
1206 | E703: do_four(); # useless semicolon | |
1207 | E704: def f(x): return 2*x | |
1208 | E731: f = lambda x: 2*x | |
1209 | """ | |
1210 | line = logical_line | |
1211 | last_char = len(line) - 1 | |
1212 | found = line.find(':') | |
1213 | prev_found = 0 | |
1214 | counts = {char: 0 for char in '{}[]()'} | |
1215 | while -1 < found < last_char: | |
1216 | update_counts(line[prev_found:found], counts) | |
1217 | if ( | |
1218 | counts['{'] <= counts['}'] and # {'a': 1} (dict) | |
1219 | counts['['] <= counts[']'] and # [1:2] (slice) | |
1220 | counts['('] <= counts[')'] and # (annotation) | |
1221 | line[found + 1] != '=' # assignment expression | |
1222 | ): | |
1223 | lambda_kw = LAMBDA_REGEX.search(line, 0, found) | |
1224 | if lambda_kw: | |
1225 | before = line[:lambda_kw.start()].rstrip() | |
1226 | if before[-1:] == '=' and before[:-1].strip().isidentifier(): | |
1227 | yield 0, ("E731 do not assign a lambda expression, use a " | |
1228 | "def") | |
1229 | break | |
1230 | if STARTSWITH_DEF_REGEX.match(line): | |
1231 | yield 0, "E704 multiple statements on one line (def)" | |
1232 | elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line): | |
1233 | yield found, "E701 multiple statements on one line (colon)" | |
1234 | prev_found = found | |
1235 | found = line.find(':', found + 1) | |
1236 | found = line.find(';') | |
1237 | while -1 < found: | |
1238 | if found < last_char: | |
1239 | yield found, "E702 multiple statements on one line (semicolon)" | |
1240 | else: | |
1241 | yield found, "E703 statement ends with a semicolon" | |
1242 | found = line.find(';', found + 1) | |
1243 | ||
1244 | ||
1245 | @register_check | |
1246 | def explicit_line_join(logical_line, tokens): | |
1247 | r"""Avoid explicit line join between brackets. | |
1248 | ||
1249 | The preferred way of wrapping long lines is by using Python's | |
1250 | implied line continuation inside parentheses, brackets and braces. | |
1251 | Long lines can be broken over multiple lines by wrapping expressions | |
1252 | in parentheses. These should be used in preference to using a | |
1253 | backslash for line continuation. | |
1254 | ||
1255 | E502: aaa = [123, \\n 123] | |
1256 | E502: aaa = ("bbb " \\n "ccc") | |
1257 | ||
1258 | Okay: aaa = [123,\n 123] | |
1259 | Okay: aaa = ("bbb "\n "ccc") | |
1260 | Okay: aaa = "bbb " \\n "ccc" | |
1261 | Okay: aaa = 123 # \\ | |
1262 | """ | |
1263 | prev_start = prev_end = parens = 0 | |
1264 | comment = False | |
1265 | backslash = None | |
1266 | for token_type, text, start, end, line in tokens: | |
1267 | if token_type == tokenize.COMMENT: | |
1268 | comment = True | |
1269 | if start[0] != prev_start and parens and backslash and not comment: | |
1270 | yield backslash, "E502 the backslash is redundant between brackets" | |
1271 | if end[0] != prev_end: | |
1272 | if line.rstrip('\r\n').endswith('\\'): | |
1273 | backslash = (end[0], len(line.splitlines()[-1]) - 1) | |
1274 | else: | |
1275 | backslash = None | |
1276 | prev_start = prev_end = end[0] | |
1277 | else: | |
1278 | prev_start = start[0] | |
1279 | if token_type == tokenize.OP: | |
1280 | if text in '([{': | |
1281 | parens += 1 | |
1282 | elif text in ')]}': | |
1283 | parens -= 1 | |
1284 | ||
1285 | ||
1286 | # The % character is strictly speaking a binary operator, but the | |
1287 | # common usage seems to be to put it next to the format parameters, | |
1288 | # after a line break. | |
1289 | _SYMBOLIC_OPS = frozenset("()[]{},:.;@=%~") | frozenset(("...",)) | |
1290 | ||
1291 | ||
1292 | def _is_binary_operator(token_type, text): | |
1293 | return ( | |
1294 | token_type == tokenize.OP or | |
1295 | text in {'and', 'or'} | |
1296 | ) and ( | |
1297 | text not in _SYMBOLIC_OPS | |
1298 | ) | |
1299 | ||
1300 | ||
1301 | def _break_around_binary_operators(tokens): | |
1302 | """Private function to reduce duplication. | |
1303 | ||
1304 | This factors out the shared details between | |
1305 | :func:`break_before_binary_operator` and | |
1306 | :func:`break_after_binary_operator`. | |
1307 | """ | |
1308 | line_break = False | |
1309 | unary_context = True | |
1310 | # Previous non-newline token types and text | |
1311 | previous_token_type = None | |
1312 | previous_text = None | |
1313 | for token_type, text, start, end, line in tokens: | |
1314 | if token_type == tokenize.COMMENT: | |
1315 | continue | |
1316 | if ('\n' in text or '\r' in text) and token_type != tokenize.STRING: | |
1317 | line_break = True | |
1318 | else: | |
1319 | yield (token_type, text, previous_token_type, previous_text, | |
1320 | line_break, unary_context, start) | |
1321 | unary_context = text in '([{,;' | |
1322 | line_break = False | |
1323 | previous_token_type = token_type | |
1324 | previous_text = text | |
1325 | ||
1326 | ||
1327 | @register_check | |
1328 | def break_before_binary_operator(logical_line, tokens): | |
1329 | r""" | |
1330 | Avoid breaks before binary operators. | |
1331 | ||
1332 | The preferred place to break around a binary operator is after the | |
1333 | operator, not before it. | |
1334 | ||
1335 | W503: (width == 0\n + height == 0) | |
1336 | W503: (width == 0\n and height == 0) | |
1337 | W503: var = (1\n & ~2) | |
1338 | W503: var = (1\n / -2) | |
1339 | W503: var = (1\n + -1\n + -2) | |
1340 | ||
1341 | Okay: foo(\n -x) | |
1342 | Okay: foo(x\n []) | |
1343 | Okay: x = '''\n''' + '' | |
1344 | Okay: foo(x,\n -y) | |
1345 | Okay: foo(x, # comment\n -y) | |
1346 | """ | |
1347 | for context in _break_around_binary_operators(tokens): | |
1348 | (token_type, text, previous_token_type, previous_text, | |
1349 | line_break, unary_context, start) = context | |
1350 | if (_is_binary_operator(token_type, text) and line_break and | |
1351 | not unary_context and | |
1352 | not _is_binary_operator(previous_token_type, | |
1353 | previous_text)): | |
1354 | yield start, "W503 line break before binary operator" | |
1355 | ||
1356 | ||
1357 | @register_check | |
1358 | def break_after_binary_operator(logical_line, tokens): | |
1359 | r""" | |
1360 | Avoid breaks after binary operators. | |
1361 | ||
1362 | The preferred place to break around a binary operator is before the | |
1363 | operator, not after it. | |
1364 | ||
1365 | W504: (width == 0 +\n height == 0) | |
1366 | W504: (width == 0 and\n height == 0) | |
1367 | W504: var = (1 &\n ~2) | |
1368 | ||
1369 | Okay: foo(\n -x) | |
1370 | Okay: foo(x\n []) | |
1371 | Okay: x = '''\n''' + '' | |
1372 | Okay: x = '' + '''\n''' | |
1373 | Okay: foo(x,\n -y) | |
1374 | Okay: foo(x, # comment\n -y) | |
1375 | ||
1376 | The following should be W504 but unary_context is tricky with these | |
1377 | Okay: var = (1 /\n -2) | |
1378 | Okay: var = (1 +\n -1 +\n -2) | |
1379 | """ | |
1380 | prev_start = None | |
1381 | for context in _break_around_binary_operators(tokens): | |
1382 | (token_type, text, previous_token_type, previous_text, | |
1383 | line_break, unary_context, start) = context | |
1384 | if (_is_binary_operator(previous_token_type, previous_text) and | |
1385 | line_break and | |
1386 | not unary_context and | |
1387 | not _is_binary_operator(token_type, text)): | |
1388 | yield prev_start, "W504 line break after binary operator" | |
1389 | prev_start = start | |
1390 | ||
1391 | ||
1392 | @register_check | |
1393 | def comparison_to_singleton(logical_line, noqa): | |
1394 | r"""Comparison to singletons should use "is" or "is not". | |
1395 | ||
1396 | Comparisons to singletons like None should always be done | |
1397 | with "is" or "is not", never the equality operators. | |
1398 | ||
1399 | Okay: if arg is not None: | |
1400 | E711: if arg != None: | |
1401 | E711: if None == arg: | |
1402 | E712: if arg == True: | |
1403 | E712: if False == arg: | |
1404 | ||
1405 | Also, beware of writing if x when you really mean if x is not None | |
1406 | -- e.g. when testing whether a variable or argument that defaults to | |
1407 | None was set to some other value. The other value might have a type | |
1408 | (such as a container) that could be false in a boolean context! | |
1409 | """ | |
1410 | if noqa: | |
1411 | return | |
1412 | ||
1413 | for match in COMPARE_SINGLETON_REGEX.finditer(logical_line): | |
1414 | singleton = match.group(1) or match.group(3) | |
1415 | same = (match.group(2) == '==') | |
1416 | ||
1417 | msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) | |
1418 | if singleton in ('None',): | |
1419 | code = 'E711' | |
1420 | else: | |
1421 | code = 'E712' | |
1422 | nonzero = ((singleton == 'True' and same) or | |
1423 | (singleton == 'False' and not same)) | |
1424 | msg += " or 'if %scond:'" % ('' if nonzero else 'not ') | |
1425 | yield match.start(2), ("%s comparison to %s should be %s" % | |
1426 | (code, singleton, msg)) | |
1427 | ||
1428 | ||
1429 | @register_check | |
1430 | def comparison_negative(logical_line): | |
1431 | r"""Negative comparison should be done using "not in" and "is not". | |
1432 | ||
1433 | Okay: if x not in y:\n pass | |
1434 | Okay: assert (X in Y or X is Z) | |
1435 | Okay: if not (X in Y):\n pass | |
1436 | Okay: zz = x is not y | |
1437 | E713: Z = not X in Y | |
1438 | E713: if not X.B in Y:\n pass | |
1439 | E714: if not X is Y:\n pass | |
1440 | E714: Z = not X.B is Y | |
1441 | """ | |
1442 | match = COMPARE_NEGATIVE_REGEX.search(logical_line) | |
1443 | if match: | |
1444 | pos = match.start(1) | |
1445 | if match.group(2) == 'in': | |
1446 | yield pos, "E713 test for membership should be 'not in'" | |
1447 | else: | |
1448 | yield pos, "E714 test for object identity should be 'is not'" | |
1449 | ||
1450 | ||
1451 | @register_check | |
1452 | def comparison_type(logical_line, noqa): | |
1453 | r"""Object type comparisons should `is` / `is not` / `isinstance()`. | |
1454 | ||
1455 | Do not compare types directly. | |
1456 | ||
1457 | Okay: if isinstance(obj, int): | |
1458 | Okay: if type(obj) is int: | |
1459 | E721: if type(obj) == type(1): | |
1460 | """ | |
1461 | match = COMPARE_TYPE_REGEX.search(logical_line) | |
1462 | if match and not noqa: | |
1463 | inst = match.group(1) | |
1464 | if inst and inst.isidentifier() and inst not in SINGLETONS: | |
1465 | return # Allow comparison for types which are not obvious | |
1466 | yield ( | |
1467 | match.start(), | |
1468 | "E721 do not compare types, for exact checks use `is` / `is not`, " | |
1469 | "for instance checks use `isinstance()`", | |
1470 | ) | |
1471 | ||
1472 | ||
1473 | @register_check | |
1474 | def bare_except(logical_line, noqa): | |
1475 | r"""When catching exceptions, mention specific exceptions when | |
1476 | possible. | |
1477 | ||
1478 | Okay: except Exception: | |
1479 | Okay: except BaseException: | |
1480 | E722: except: | |
1481 | """ | |
1482 | if noqa: | |
1483 | return | |
1484 | ||
1485 | match = BLANK_EXCEPT_REGEX.match(logical_line) | |
1486 | if match: | |
1487 | yield match.start(), "E722 do not use bare 'except'" | |
1488 | ||
1489 | ||
1490 | @register_check | |
1491 | def ambiguous_identifier(logical_line, tokens): | |
1492 | r"""Never use the characters 'l', 'O', or 'I' as variable names. | |
1493 | ||
1494 | In some fonts, these characters are indistinguishable from the | |
1495 | numerals one and zero. When tempted to use 'l', use 'L' instead. | |
1496 | ||
1497 | Okay: L = 0 | |
1498 | Okay: o = 123 | |
1499 | Okay: i = 42 | |
1500 | E741: l = 0 | |
1501 | E741: O = 123 | |
1502 | E741: I = 42 | |
1503 | ||
1504 | Variables can be bound in several other contexts, including class | |
1505 | and function definitions, lambda functions, 'global' and 'nonlocal' | |
1506 | statements, exception handlers, and 'with' and 'for' statements. | |
1507 | In addition, we have a special handling for function parameters. | |
1508 | ||
1509 | Okay: except AttributeError as o: | |
1510 | Okay: with lock as L: | |
1511 | Okay: foo(l=12) | |
1512 | Okay: foo(l=I) | |
1513 | Okay: for a in foo(l=12): | |
1514 | Okay: lambda arg: arg * l | |
1515 | Okay: lambda a=l[I:5]: None | |
1516 | Okay: lambda x=a.I: None | |
1517 | Okay: if l >= 12: | |
1518 | E741: except AttributeError as O: | |
1519 | E741: with lock as l: | |
1520 | E741: global I | |
1521 | E741: nonlocal l | |
1522 | E741: def foo(l): | |
1523 | E741: def foo(l=12): | |
1524 | E741: l = foo(l=12) | |
1525 | E741: for l in range(10): | |
1526 | E741: [l for l in lines if l] | |
1527 | E741: lambda l: None | |
1528 | E741: lambda a=x[1:5], l: None | |
1529 | E741: lambda **l: | |
1530 | E741: def f(**l): | |
1531 | E742: class I(object): | |
1532 | E743: def l(x): | |
1533 | """ | |
1534 | func_depth = None # set to brace depth if 'def' or 'lambda' is found | |
1535 | seen_colon = False # set to true if we're done with function parameters | |
1536 | brace_depth = 0 | |
1537 | idents_to_avoid = ('l', 'O', 'I') | |
1538 | prev_type, prev_text, prev_start, prev_end, __ = tokens[0] | |
1539 | for index in range(1, len(tokens)): | |
1540 | token_type, text, start, end, line = tokens[index] | |
1541 | ident = pos = None | |
1542 | # find function definitions | |
1543 | if prev_text in {'def', 'lambda'}: | |
1544 | func_depth = brace_depth | |
1545 | seen_colon = False | |
1546 | elif ( | |
1547 | func_depth is not None and | |
1548 | text == ':' and | |
1549 | brace_depth == func_depth | |
1550 | ): | |
1551 | seen_colon = True | |
1552 | # update parameter parentheses level | |
1553 | if text in '([{': | |
1554 | brace_depth += 1 | |
1555 | elif text in ')]}': | |
1556 | brace_depth -= 1 | |
1557 | # identifiers on the lhs of an assignment operator | |
1558 | if text == ':=' or (text == '=' and brace_depth == 0): | |
1559 | if prev_text in idents_to_avoid: | |
1560 | ident = prev_text | |
1561 | pos = prev_start | |
1562 | # identifiers bound to values with 'as', 'for', | |
1563 | # 'global', or 'nonlocal' | |
1564 | if prev_text in ('as', 'for', 'global', 'nonlocal'): | |
1565 | if text in idents_to_avoid: | |
1566 | ident = text | |
1567 | pos = start | |
1568 | # function / lambda parameter definitions | |
1569 | if ( | |
1570 | func_depth is not None and | |
1571 | not seen_colon and | |
1572 | index < len(tokens) - 1 and tokens[index + 1][1] in ':,=)' and | |
1573 | prev_text in {'lambda', ',', '*', '**', '('} and | |
1574 | text in idents_to_avoid | |
1575 | ): | |
1576 | ident = text | |
1577 | pos = start | |
1578 | if prev_text == 'class': | |
1579 | if text in idents_to_avoid: | |
1580 | yield start, "E742 ambiguous class definition '%s'" % text | |
1581 | if prev_text == 'def': | |
1582 | if text in idents_to_avoid: | |
1583 | yield start, "E743 ambiguous function definition '%s'" % text | |
1584 | if ident: | |
1585 | yield pos, "E741 ambiguous variable name '%s'" % ident | |
1586 | prev_text = text | |
1587 | prev_start = start | |
1588 | ||
1589 | ||
1590 | @register_check | |
1591 | def python_3000_invalid_escape_sequence(logical_line, tokens, noqa): | |
1592 | r"""Invalid escape sequences are deprecated in Python 3.6. | |
1593 | ||
1594 | Okay: regex = r'\.png$' | |
1595 | W605: regex = '\.png$' | |
1596 | """ | |
1597 | if noqa: | |
1598 | return | |
1599 | ||
1600 | # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals | |
1601 | valid = [ | |
1602 | '\n', | |
1603 | '\\', | |
1604 | '\'', | |
1605 | '"', | |
1606 | 'a', | |
1607 | 'b', | |
1608 | 'f', | |
1609 | 'n', | |
1610 | 'r', | |
1611 | 't', | |
1612 | 'v', | |
1613 | '0', '1', '2', '3', '4', '5', '6', '7', | |
1614 | 'x', | |
1615 | ||
1616 | # Escape sequences only recognized in string literals | |
1617 | 'N', | |
1618 | 'u', | |
1619 | 'U', | |
1620 | ] | |
1621 | ||
1622 | prefixes = [] | |
1623 | for token_type, text, start, _, _ in tokens: | |
1624 | if token_type in {tokenize.STRING, FSTRING_START}: | |
1625 | # Extract string modifiers (e.g. u or r) | |
1626 | prefixes.append(text[:text.index(text[-1])].lower()) | |
1627 | ||
1628 | if token_type in {tokenize.STRING, FSTRING_MIDDLE}: | |
1629 | if 'r' not in prefixes[-1]: | |
1630 | start_line, start_col = start | |
1631 | pos = text.find('\\') | |
1632 | while pos >= 0: | |
1633 | pos += 1 | |
1634 | if text[pos] not in valid: | |
1635 | line = start_line + text.count('\n', 0, pos) | |
1636 | if line == start_line: | |
1637 | col = start_col + pos | |
1638 | else: | |
1639 | col = pos - text.rfind('\n', 0, pos) - 1 | |
1640 | yield ( | |
1641 | (line, col - 1), | |
1642 | f"W605 invalid escape sequence '\\{text[pos]}'" | |
1643 | ) | |
1644 | pos = text.find('\\', pos + 1) | |
1645 | ||
1646 | if token_type in {tokenize.STRING, FSTRING_END}: | |
1647 | prefixes.pop() | |
1648 | ||
1649 | ||
1650 | ######################################################################## | |
1651 | @register_check | |
1652 | def maximum_doc_length(logical_line, max_doc_length, noqa, tokens): | |
1653 | r"""Limit all doc lines to a maximum of 72 characters. | |
1654 | ||
1655 | For flowing long blocks of text (docstrings or comments), limiting | |
1656 | the length to 72 characters is recommended. | |
1657 | ||
1658 | Reports warning W505 | |
1659 | """ | |
1660 | if max_doc_length is None or noqa: | |
1661 | return | |
1662 | ||
1663 | prev_token = None | |
1664 | skip_lines = set() | |
1665 | # Skip lines that | |
1666 | for token_type, text, start, end, line in tokens: | |
1667 | if token_type not in SKIP_COMMENTS.union([tokenize.STRING]): | |
1668 | skip_lines.add(line) | |
1669 | ||
1670 | for token_type, text, start, end, line in tokens: | |
1671 | # Skip lines that aren't pure strings | |
1672 | if token_type == tokenize.STRING and skip_lines: | |
1673 | continue | |
1674 | if token_type in (tokenize.STRING, tokenize.COMMENT): | |
1675 | # Only check comment-only lines | |
1676 | if prev_token is None or prev_token in SKIP_TOKENS: | |
1677 | lines = line.splitlines() | |
1678 | for line_num, physical_line in enumerate(lines): | |
1679 | if start[0] + line_num == 1 and line.startswith('#!'): | |
1680 | return | |
1681 | length = len(physical_line) | |
1682 | chunks = physical_line.split() | |
1683 | if token_type == tokenize.COMMENT: | |
1684 | if (len(chunks) == 2 and | |
1685 | length - len(chunks[-1]) < MAX_DOC_LENGTH): | |
1686 | continue | |
1687 | if len(chunks) == 1 and line_num + 1 < len(lines): | |
1688 | if (len(chunks) == 1 and | |
1689 | length - len(chunks[-1]) < MAX_DOC_LENGTH): | |
1690 | continue | |
1691 | if length > max_doc_length: | |
1692 | doc_error = (start[0] + line_num, max_doc_length) | |
1693 | yield (doc_error, "W505 doc line too long " | |
1694 | "(%d > %d characters)" | |
1695 | % (length, max_doc_length)) | |
1696 | prev_token = token_type | |
1697 | ||
1698 | ||
1699 | ######################################################################## | |
1700 | # Helper functions | |
1701 | ######################################################################## | |
1702 | ||
1703 | ||
1704 | def readlines(filename): | |
1705 | """Read the source code.""" | |
1706 | try: | |
1707 | with tokenize.open(filename) as f: | |
1708 | return f.readlines() | |
1709 | except (LookupError, SyntaxError, UnicodeError): | |
1710 | # Fall back if file encoding is improperly declared | |
1711 | with open(filename, encoding='latin-1') as f: | |
1712 | return f.readlines() | |
1713 | ||
1714 | ||
1715 | def stdin_get_value(): | |
1716 | """Read the value from stdin.""" | |
1717 | return io.TextIOWrapper(sys.stdin.buffer, errors='ignore').read() | |
1718 | ||
1719 | ||
1720 | noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search) | |
1721 | ||
1722 | ||
1723 | def expand_indent(line): | |
1724 | r"""Return the amount of indentation. | |
1725 | ||
1726 | Tabs are expanded to the next multiple of 8. | |
1727 | """ | |
1728 | line = line.rstrip('\n\r') | |
1729 | if '\t' not in line: | |
1730 | return len(line) - len(line.lstrip()) | |
1731 | result = 0 | |
1732 | for char in line: | |
1733 | if char == '\t': | |
1734 | result = result // 8 * 8 + 8 | |
1735 | elif char == ' ': | |
1736 | result += 1 | |
1737 | else: | |
1738 | break | |
1739 | return result | |
1740 | ||
1741 | ||
1742 | def mute_string(text): | |
1743 | """Replace contents with 'xxx' to prevent syntax matching.""" | |
1744 | # String modifiers (e.g. u or r) | |
1745 | start = text.index(text[-1]) + 1 | |
1746 | end = len(text) - 1 | |
1747 | # Triple quotes | |
1748 | if text[-3:] in ('"""', "'''"): | |
1749 | start += 2 | |
1750 | end -= 2 | |
1751 | return text[:start] + 'x' * (end - start) + text[end:] | |
1752 | ||
1753 | ||
1754 | def parse_udiff(diff, patterns=None, parent='.'): | |
1755 | """Return a dictionary of matching lines.""" | |
1756 | # For each file of the diff, the entry key is the filename, | |
1757 | # and the value is a set of row numbers to consider. | |
1758 | rv = {} | |
1759 | path = nrows = None | |
1760 | for line in diff.splitlines(): | |
1761 | if nrows: | |
1762 | if line[:1] != '-': | |
1763 | nrows -= 1 | |
1764 | continue | |
1765 | if line[:3] == '@@ ': | |
1766 | hunk_match = HUNK_REGEX.match(line) | |
1767 | (row, nrows) = (int(g or '1') for g in hunk_match.groups()) | |
1768 | rv[path].update(range(row, row + nrows)) | |
1769 | elif line[:3] == '+++': | |
1770 | path = line[4:].split('\t', 1)[0] | |
1771 | # Git diff will use (i)ndex, (w)ork tree, (c)ommit and | |
1772 | # (o)bject instead of a/b/c/d as prefixes for patches | |
1773 | if path[:2] in ('b/', 'w/', 'i/'): | |
1774 | path = path[2:] | |
1775 | rv[path] = set() | |
1776 | return { | |
1777 | os.path.join(parent, filepath): rows | |
1778 | for (filepath, rows) in rv.items() | |
1779 | if rows and filename_match(filepath, patterns) | |
1780 | } | |
1781 | ||
1782 | ||
1783 | def normalize_paths(value, parent=os.curdir): | |
1784 | """Parse a comma-separated list of paths. | |
1785 | ||
1786 | Return a list of absolute paths. | |
1787 | """ | |
1788 | if not value: | |
1789 | return [] | |
1790 | if isinstance(value, list): | |
1791 | return value | |
1792 | paths = [] | |
1793 | for path in value.split(','): | |
1794 | path = path.strip() | |
1795 | if '/' in path: | |
1796 | path = os.path.abspath(os.path.join(parent, path)) | |
1797 | paths.append(path.rstrip('/')) | |
1798 | return paths | |
1799 | ||
1800 | ||
1801 | def filename_match(filename, patterns, default=True): | |
1802 | """Check if patterns contains a pattern that matches filename. | |
1803 | ||
1804 | If patterns is unspecified, this always returns True. | |
1805 | """ | |
1806 | if not patterns: | |
1807 | return default | |
1808 | return any(fnmatch(filename, pattern) for pattern in patterns) | |
1809 | ||
1810 | ||
1811 | def update_counts(s, counts): | |
1812 | r"""Adds one to the counts of each appearance of characters in s, | |
1813 | for characters in counts""" | |
1814 | for char in s: | |
1815 | if char in counts: | |
1816 | counts[char] += 1 | |
1817 | ||
1818 | ||
1819 | def _is_eol_token(token): | |
1820 | return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n' | |
1821 | ||
1822 | ||
1823 | ######################################################################## | |
1824 | # Framework to run all checks | |
1825 | ######################################################################## | |
1826 | ||
1827 | ||
1828 | class Checker: | |
1829 | """Load a Python source file, tokenize it, check coding style.""" | |
1830 | ||
1831 | def __init__(self, filename=None, lines=None, | |
1832 | options=None, report=None, **kwargs): | |
1833 | if options is None: | |
1834 | options = StyleGuide(kwargs).options | |
1835 | else: | |
1836 | assert not kwargs | |
1837 | self._io_error = None | |
1838 | self._physical_checks = options.physical_checks | |
1839 | self._logical_checks = options.logical_checks | |
1840 | self._ast_checks = options.ast_checks | |
1841 | self.max_line_length = options.max_line_length | |
1842 | self.max_doc_length = options.max_doc_length | |
1843 | self.indent_size = options.indent_size | |
1844 | self.fstring_start = 0 | |
1845 | self.multiline = False # in a multiline string? | |
1846 | self.hang_closing = options.hang_closing | |
1847 | self.indent_size = options.indent_size | |
1848 | self.verbose = options.verbose | |
1849 | self.filename = filename | |
1850 | # Dictionary where a checker can store its custom state. | |
1851 | self._checker_states = {} | |
1852 | if filename is None: | |
1853 | self.filename = 'stdin' | |
1854 | self.lines = lines or [] | |
1855 | elif filename == '-': | |
1856 | self.filename = 'stdin' | |
1857 | self.lines = stdin_get_value().splitlines(True) | |
1858 | elif lines is None: | |
1859 | try: | |
1860 | self.lines = readlines(filename) | |
1861 | except OSError: | |
1862 | (exc_type, exc) = sys.exc_info()[:2] | |
1863 | self._io_error = f'{exc_type.__name__}: {exc}' | |
1864 | self.lines = [] | |
1865 | else: | |
1866 | self.lines = lines | |
1867 | if self.lines: | |
1868 | ord0 = ord(self.lines[0][0]) | |
1869 | if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM | |
1870 | if ord0 == 0xfeff: | |
1871 | self.lines[0] = self.lines[0][1:] | |
1872 | elif self.lines[0][:3] == '\xef\xbb\xbf': | |
1873 | self.lines[0] = self.lines[0][3:] | |
1874 | self.report = report or options.report | |
1875 | self.report_error = self.report.error | |
1876 | self.noqa = False | |
1877 | ||
1878 | def report_invalid_syntax(self): | |
1879 | """Check if the syntax is valid.""" | |
1880 | (exc_type, exc) = sys.exc_info()[:2] | |
1881 | if len(exc.args) > 1: | |
1882 | offset = exc.args[1] | |
1883 | if len(offset) > 2: | |
1884 | offset = offset[1:3] | |
1885 | else: | |
1886 | offset = (1, 0) | |
1887 | self.report_error(offset[0], offset[1] or 0, | |
1888 | f'E901 {exc_type.__name__}: {exc.args[0]}', | |
1889 | self.report_invalid_syntax) | |
1890 | ||
1891 | def readline(self): | |
1892 | """Get the next line from the input buffer.""" | |
1893 | if self.line_number >= self.total_lines: | |
1894 | return '' | |
1895 | line = self.lines[self.line_number] | |
1896 | self.line_number += 1 | |
1897 | if self.indent_char is None and line[:1] in WHITESPACE: | |
1898 | self.indent_char = line[0] | |
1899 | return line | |
1900 | ||
1901 | def run_check(self, check, argument_names): | |
1902 | """Run a check plugin.""" | |
1903 | arguments = [] | |
1904 | for name in argument_names: | |
1905 | arguments.append(getattr(self, name)) | |
1906 | return check(*arguments) | |
1907 | ||
1908 | def init_checker_state(self, name, argument_names): | |
1909 | """Prepare custom state for the specific checker plugin.""" | |
1910 | if 'checker_state' in argument_names: | |
1911 | self.checker_state = self._checker_states.setdefault(name, {}) | |
1912 | ||
1913 | def check_physical(self, line): | |
1914 | """Run all physical checks on a raw input line.""" | |
1915 | self.physical_line = line | |
1916 | for name, check, argument_names in self._physical_checks: | |
1917 | self.init_checker_state(name, argument_names) | |
1918 | result = self.run_check(check, argument_names) | |
1919 | if result is not None: | |
1920 | (offset, text) = result | |
1921 | self.report_error(self.line_number, offset, text, check) | |
1922 | if text[:4] == 'E101': | |
1923 | self.indent_char = line[0] | |
1924 | ||
1925 | def build_tokens_line(self): | |
1926 | """Build a logical line from tokens.""" | |
1927 | logical = [] | |
1928 | comments = [] | |
1929 | length = 0 | |
1930 | prev_row = prev_col = mapping = None | |
1931 | for token_type, text, start, end, line in self.tokens: | |
1932 | if token_type in SKIP_TOKENS: | |
1933 | continue | |
1934 | if not mapping: | |
1935 | mapping = [(0, start)] | |
1936 | if token_type == tokenize.COMMENT: | |
1937 | comments.append(text) | |
1938 | continue | |
1939 | if token_type == tokenize.STRING: | |
1940 | text = mute_string(text) | |
1941 | elif token_type == FSTRING_MIDDLE: # pragma: >=3.12 cover | |
1942 | text = 'x' * len(text) | |
1943 | if prev_row: | |
1944 | (start_row, start_col) = start | |
1945 | if prev_row != start_row: # different row | |
1946 | prev_text = self.lines[prev_row - 1][prev_col - 1] | |
1947 | if prev_text == ',' or (prev_text not in '{[(' and | |
1948 | text not in '}])'): | |
1949 | text = ' ' + text | |
1950 | elif prev_col != start_col: # different column | |
1951 | text = line[prev_col:start_col] + text | |
1952 | logical.append(text) | |
1953 | length += len(text) | |
1954 | mapping.append((length, end)) | |
1955 | (prev_row, prev_col) = end | |
1956 | self.logical_line = ''.join(logical) | |
1957 | self.noqa = comments and noqa(''.join(comments)) | |
1958 | return mapping | |
1959 | ||
1960 | def check_logical(self): | |
1961 | """Build a line from tokens and run all logical checks on it.""" | |
1962 | self.report.increment_logical_line() | |
1963 | mapping = self.build_tokens_line() | |
1964 | if not mapping: | |
1965 | return | |
1966 | ||
1967 | mapping_offsets = [offset for offset, _ in mapping] | |
1968 | (start_row, start_col) = mapping[0][1] | |
1969 | start_line = self.lines[start_row - 1] | |
1970 | self.indent_level = expand_indent(start_line[:start_col]) | |
1971 | if self.blank_before < self.blank_lines: | |
1972 | self.blank_before = self.blank_lines | |
1973 | if self.verbose >= 2: | |
1974 | print(self.logical_line[:80].rstrip()) | |
1975 | for name, check, argument_names in self._logical_checks: | |
1976 | if self.verbose >= 4: | |
1977 | print(' ' + name) | |
1978 | self.init_checker_state(name, argument_names) | |
1979 | for offset, text in self.run_check(check, argument_names) or (): | |
1980 | if not isinstance(offset, tuple): | |
1981 | # As mappings are ordered, bisecting is a fast way | |
1982 | # to find a given offset in them. | |
1983 | token_offset, pos = mapping[bisect.bisect_left( | |
1984 | mapping_offsets, offset)] | |
1985 | offset = (pos[0], pos[1] + offset - token_offset) | |
1986 | self.report_error(offset[0], offset[1], text, check) | |
1987 | if self.logical_line: | |
1988 | self.previous_indent_level = self.indent_level | |
1989 | self.previous_logical = self.logical_line | |
1990 | if not self.indent_level: | |
1991 | self.previous_unindented_logical_line = self.logical_line | |
1992 | self.blank_lines = 0 | |
1993 | self.tokens = [] | |
1994 | ||
1995 | def check_ast(self): | |
1996 | """Build the file's AST and run all AST checks.""" | |
1997 | try: | |
1998 | tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) | |
1999 | except (ValueError, SyntaxError, TypeError): | |
2000 | return self.report_invalid_syntax() | |
2001 | for name, cls, __ in self._ast_checks: | |
2002 | checker = cls(tree, self.filename) | |
2003 | for lineno, offset, text, check in checker.run(): | |
2004 | if not self.lines or not noqa(self.lines[lineno - 1]): | |
2005 | self.report_error(lineno, offset, text, check) | |
2006 | ||
2007 | def generate_tokens(self): | |
2008 | """Tokenize file, run physical line checks and yield tokens.""" | |
2009 | if self._io_error: | |
2010 | self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) | |
2011 | tokengen = tokenize.generate_tokens(self.readline) | |
2012 | try: | |
2013 | prev_physical = '' | |
2014 | for token in tokengen: | |
2015 | if token[2][0] > self.total_lines: | |
2016 | return | |
2017 | self.noqa = token[4] and noqa(token[4]) | |
2018 | self.maybe_check_physical(token, prev_physical) | |
2019 | yield token | |
2020 | prev_physical = token[4] | |
2021 | except (SyntaxError, tokenize.TokenError): | |
2022 | self.report_invalid_syntax() | |
2023 | ||
2024 | def maybe_check_physical(self, token, prev_physical): | |
2025 | """If appropriate for token, check current physical line(s).""" | |
2026 | # Called after every token, but act only on end of line. | |
2027 | ||
2028 | if token.type == FSTRING_START: # pragma: >=3.12 cover | |
2029 | self.fstring_start = token.start[0] | |
2030 | # a newline token ends a single physical line. | |
2031 | elif _is_eol_token(token): | |
2032 | # if the file does not end with a newline, the NEWLINE | |
2033 | # token is inserted by the parser, but it does not contain | |
2034 | # the previous physical line in `token[4]` | |
2035 | if token.line == '': | |
2036 | self.check_physical(prev_physical) | |
2037 | else: | |
2038 | self.check_physical(token.line) | |
2039 | elif ( | |
2040 | token.type == tokenize.STRING and '\n' in token.string or | |
2041 | token.type == FSTRING_END | |
2042 | ): | |
2043 | # Less obviously, a string that contains newlines is a | |
2044 | # multiline string, either triple-quoted or with internal | |
2045 | # newlines backslash-escaped. Check every physical line in | |
2046 | # the string *except* for the last one: its newline is | |
2047 | # outside of the multiline string, so we consider it a | |
2048 | # regular physical line, and will check it like any other | |
2049 | # physical line. | |
2050 | # | |
2051 | # Subtleties: | |
2052 | # - we don't *completely* ignore the last line; if it | |
2053 | # contains the magical "# noqa" comment, we disable all | |
2054 | # physical checks for the entire multiline string | |
2055 | # - have to wind self.line_number back because initially it | |
2056 | # points to the last line of the string, and we want | |
2057 | # check_physical() to give accurate feedback | |
2058 | if noqa(token.line): | |
2059 | return | |
2060 | if token.type == FSTRING_END: # pragma: >=3.12 cover | |
2061 | start = self.fstring_start | |
2062 | else: | |
2063 | start = token.start[0] | |
2064 | end = token.end[0] | |
2065 | ||
2066 | self.multiline = True | |
2067 | self.line_number = start | |
2068 | for line_number in range(start, end): | |
2069 | self.check_physical(self.lines[line_number - 1] + '\n') | |
2070 | self.line_number += 1 | |
2071 | self.multiline = False | |
2072 | ||
2073 | def check_all(self, expected=None, line_offset=0): | |
2074 | """Run all checks on the input file.""" | |
2075 | self.report.init_file(self.filename, self.lines, expected, line_offset) | |
2076 | self.total_lines = len(self.lines) | |
2077 | if self._ast_checks: | |
2078 | self.check_ast() | |
2079 | self.line_number = 0 | |
2080 | self.indent_char = None | |
2081 | self.indent_level = self.previous_indent_level = 0 | |
2082 | self.previous_logical = '' | |
2083 | self.previous_unindented_logical_line = '' | |
2084 | self.tokens = [] | |
2085 | self.blank_lines = self.blank_before = 0 | |
2086 | parens = 0 | |
2087 | for token in self.generate_tokens(): | |
2088 | self.tokens.append(token) | |
2089 | token_type, text = token[0:2] | |
2090 | if self.verbose >= 3: | |
2091 | if token[2][0] == token[3][0]: | |
2092 | pos = '[{}:{}]'.format(token[2][1] or '', token[3][1]) | |
2093 | else: | |
2094 | pos = 'l.%s' % token[3][0] | |
2095 | print('l.%s\t%s\t%s\t%r' % | |
2096 | (token[2][0], pos, tokenize.tok_name[token[0]], text)) | |
2097 | if token_type == tokenize.OP: | |
2098 | if text in '([{': | |
2099 | parens += 1 | |
2100 | elif text in '}])': | |
2101 | parens -= 1 | |
2102 | elif not parens: | |
2103 | if token_type in NEWLINE: | |
2104 | if token_type == tokenize.NEWLINE: | |
2105 | self.check_logical() | |
2106 | self.blank_before = 0 | |
2107 | elif len(self.tokens) == 1: | |
2108 | # The physical line contains only this token. | |
2109 | self.blank_lines += 1 | |
2110 | del self.tokens[0] | |
2111 | else: | |
2112 | self.check_logical() | |
2113 | if self.tokens: | |
2114 | self.check_physical(self.lines[-1]) | |
2115 | self.check_logical() | |
2116 | return self.report.get_file_results() | |
2117 | ||
2118 | ||
2119 | class BaseReport: | |
2120 | """Collect the results of the checks.""" | |
2121 | ||
2122 | print_filename = False | |
2123 | ||
2124 | def __init__(self, options): | |
2125 | self._benchmark_keys = options.benchmark_keys | |
2126 | self._ignore_code = options.ignore_code | |
2127 | # Results | |
2128 | self.elapsed = 0 | |
2129 | self.total_errors = 0 | |
2130 | self.counters = dict.fromkeys(self._benchmark_keys, 0) | |
2131 | self.messages = {} | |
2132 | ||
2133 | def start(self): | |
2134 | """Start the timer.""" | |
2135 | self._start_time = time.time() | |
2136 | ||
2137 | def stop(self): | |
2138 | """Stop the timer.""" | |
2139 | self.elapsed = time.time() - self._start_time | |
2140 | ||
2141 | def init_file(self, filename, lines, expected, line_offset): | |
2142 | """Signal a new file.""" | |
2143 | self.filename = filename | |
2144 | self.lines = lines | |
2145 | self.expected = expected or () | |
2146 | self.line_offset = line_offset | |
2147 | self.file_errors = 0 | |
2148 | self.counters['files'] += 1 | |
2149 | self.counters['physical lines'] += len(lines) | |
2150 | ||
2151 | def increment_logical_line(self): | |
2152 | """Signal a new logical line.""" | |
2153 | self.counters['logical lines'] += 1 | |
2154 | ||
2155 | def error(self, line_number, offset, text, check): | |
2156 | """Report an error, according to options.""" | |
2157 | code = text[:4] | |
2158 | if self._ignore_code(code): | |
2159 | return | |
2160 | if code in self.counters: | |
2161 | self.counters[code] += 1 | |
2162 | else: | |
2163 | self.counters[code] = 1 | |
2164 | self.messages[code] = text[5:] | |
2165 | # Don't care about expected errors or warnings | |
2166 | if code in self.expected: | |
2167 | return | |
2168 | if self.print_filename and not self.file_errors: | |
2169 | print(self.filename) | |
2170 | self.file_errors += 1 | |
2171 | self.total_errors += 1 | |
2172 | return code | |
2173 | ||
2174 | def get_file_results(self): | |
2175 | """Return the count of errors and warnings for this file.""" | |
2176 | return self.file_errors | |
2177 | ||
2178 | def get_count(self, prefix=''): | |
2179 | """Return the total count of errors and warnings.""" | |
2180 | return sum(self.counters[key] | |
2181 | for key in self.messages if key.startswith(prefix)) | |
2182 | ||
2183 | def get_statistics(self, prefix=''): | |
2184 | """Get statistics for message codes that start with the prefix. | |
2185 | ||
2186 | prefix='' matches all errors and warnings | |
2187 | prefix='E' matches all errors | |
2188 | prefix='W' matches all warnings | |
2189 | prefix='E4' matches all errors that have to do with imports | |
2190 | """ | |
2191 | return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) | |
2192 | for key in sorted(self.messages) if key.startswith(prefix)] | |
2193 | ||
2194 | def print_statistics(self, prefix=''): | |
2195 | """Print overall statistics (number of errors and warnings).""" | |
2196 | for line in self.get_statistics(prefix): | |
2197 | print(line) | |
2198 | ||
2199 | def print_benchmark(self): | |
2200 | """Print benchmark numbers.""" | |
2201 | print('{:<7.2f} {}'.format(self.elapsed, 'seconds elapsed')) | |
2202 | if self.elapsed: | |
2203 | for key in self._benchmark_keys: | |
2204 | print('%-7d %s per second (%d total)' % | |
2205 | (self.counters[key] / self.elapsed, key, | |
2206 | self.counters[key])) | |
2207 | ||
2208 | ||
2209 | class FileReport(BaseReport): | |
2210 | """Collect the results of the checks and print the filenames.""" | |
2211 | ||
2212 | print_filename = True | |
2213 | ||
2214 | ||
2215 | class StandardReport(BaseReport): | |
2216 | """Collect and print the results of the checks.""" | |
2217 | ||
2218 | def __init__(self, options): | |
2219 | super().__init__(options) | |
2220 | self._fmt = REPORT_FORMAT.get(options.format.lower(), | |
2221 | options.format) | |
2222 | self._repeat = options.repeat | |
2223 | self._show_source = options.show_source | |
2224 | self._show_pep8 = options.show_pep8 | |
2225 | ||
2226 | def init_file(self, filename, lines, expected, line_offset): | |
2227 | """Signal a new file.""" | |
2228 | self._deferred_print = [] | |
2229 | return super().init_file( | |
2230 | filename, lines, expected, line_offset) | |
2231 | ||
2232 | def error(self, line_number, offset, text, check): | |
2233 | """Report an error, according to options.""" | |
2234 | code = super().error(line_number, offset, text, check) | |
2235 | if code and (self.counters[code] == 1 or self._repeat): | |
2236 | self._deferred_print.append( | |
2237 | (line_number, offset, code, text[5:], check.__doc__)) | |
2238 | return code | |
2239 | ||
2240 | def get_file_results(self): | |
2241 | """Print results and return the overall count for this file.""" | |
2242 | self._deferred_print.sort() | |
2243 | for line_number, offset, code, text, doc in self._deferred_print: | |
2244 | print(self._fmt % { | |
2245 | 'path': self.filename, | |
2246 | 'row': self.line_offset + line_number, 'col': offset + 1, | |
2247 | 'code': code, 'text': text, | |
2248 | }) | |
2249 | if self._show_source: | |
2250 | if line_number > len(self.lines): | |
2251 | line = '' | |
2252 | else: | |
2253 | line = self.lines[line_number - 1] | |
2254 | print(line.rstrip()) | |
2255 | print(re.sub(r'\S', ' ', line[:offset]) + '^') | |
2256 | if self._show_pep8 and doc: | |
2257 | print(' ' + doc.strip()) | |
2258 | ||
2259 | # stdout is block buffered when not stdout.isatty(). | |
2260 | # line can be broken where buffer boundary since other | |
2261 | # processes write to same file. | |
2262 | # flush() after print() to avoid buffer boundary. | |
2263 | # Typical buffer size is 8192. line written safely when | |
2264 | # len(line) < 8192. | |
2265 | sys.stdout.flush() | |
2266 | return self.file_errors | |
2267 | ||
2268 | ||
2269 | class DiffReport(StandardReport): | |
2270 | """Collect and print the results for the changed lines only.""" | |
2271 | ||
2272 | def __init__(self, options): | |
2273 | super().__init__(options) | |
2274 | self._selected = options.selected_lines | |
2275 | ||
2276 | def error(self, line_number, offset, text, check): | |
2277 | if line_number not in self._selected[self.filename]: | |
2278 | return | |
2279 | return super().error(line_number, offset, text, check) | |
2280 | ||
2281 | ||
2282 | class StyleGuide: | |
2283 | """Initialize a PEP-8 instance with few options.""" | |
2284 | ||
2285 | def __init__(self, *args, **kwargs): | |
2286 | # build options from the command line | |
2287 | self.checker_class = kwargs.pop('checker_class', Checker) | |
2288 | parse_argv = kwargs.pop('parse_argv', False) | |
2289 | config_file = kwargs.pop('config_file', False) | |
2290 | parser = kwargs.pop('parser', None) | |
2291 | # build options from dict | |
2292 | options_dict = dict(*args, **kwargs) | |
2293 | arglist = None if parse_argv else options_dict.get('paths', None) | |
2294 | verbose = options_dict.get('verbose', None) | |
2295 | options, self.paths = process_options( | |
2296 | arglist, parse_argv, config_file, parser, verbose) | |
2297 | if options_dict: | |
2298 | options.__dict__.update(options_dict) | |
2299 | if 'paths' in options_dict: | |
2300 | self.paths = options_dict['paths'] | |
2301 | ||
2302 | self.runner = self.input_file | |
2303 | self.options = options | |
2304 | ||
2305 | if not options.reporter: | |
2306 | options.reporter = BaseReport if options.quiet else StandardReport | |
2307 | ||
2308 | options.select = tuple(options.select or ()) | |
2309 | if not (options.select or options.ignore) and DEFAULT_IGNORE: | |
2310 | # The default choice: ignore controversial checks | |
2311 | options.ignore = tuple(DEFAULT_IGNORE.split(',')) | |
2312 | else: | |
2313 | # Ignore all checks which are not explicitly selected | |
2314 | options.ignore = ('',) if options.select else tuple(options.ignore) | |
2315 | options.benchmark_keys = BENCHMARK_KEYS[:] | |
2316 | options.ignore_code = self.ignore_code | |
2317 | options.physical_checks = self.get_checks('physical_line') | |
2318 | options.logical_checks = self.get_checks('logical_line') | |
2319 | options.ast_checks = self.get_checks('tree') | |
2320 | self.init_report() | |
2321 | ||
2322 | def init_report(self, reporter=None): | |
2323 | """Initialize the report instance.""" | |
2324 | self.options.report = (reporter or self.options.reporter)(self.options) | |
2325 | return self.options.report | |
2326 | ||
2327 | def check_files(self, paths=None): | |
2328 | """Run all checks on the paths.""" | |
2329 | if paths is None: | |
2330 | paths = self.paths | |
2331 | report = self.options.report | |
2332 | runner = self.runner | |
2333 | report.start() | |
2334 | try: | |
2335 | for path in paths: | |
2336 | if os.path.isdir(path): | |
2337 | self.input_dir(path) | |
2338 | elif not self.excluded(path): | |
2339 | runner(path) | |
2340 | except KeyboardInterrupt: | |
2341 | print('... stopped') | |
2342 | report.stop() | |
2343 | return report | |
2344 | ||
2345 | def input_file(self, filename, lines=None, expected=None, line_offset=0): | |
2346 | """Run all checks on a Python source file.""" | |
2347 | if self.options.verbose: | |
2348 | print('checking %s' % filename) | |
2349 | fchecker = self.checker_class( | |
2350 | filename, lines=lines, options=self.options) | |
2351 | return fchecker.check_all(expected=expected, line_offset=line_offset) | |
2352 | ||
2353 | def input_dir(self, dirname): | |
2354 | """Check all files in this directory and all subdirectories.""" | |
2355 | dirname = dirname.rstrip('/') | |
2356 | if self.excluded(dirname): | |
2357 | return 0 | |
2358 | counters = self.options.report.counters | |
2359 | verbose = self.options.verbose | |
2360 | filepatterns = self.options.filename | |
2361 | runner = self.runner | |
2362 | for root, dirs, files in os.walk(dirname): | |
2363 | if verbose: | |
2364 | print('directory ' + root) | |
2365 | counters['directories'] += 1 | |
2366 | for subdir in sorted(dirs): | |
2367 | if self.excluded(subdir, root): | |
2368 | dirs.remove(subdir) | |
2369 | for filename in sorted(files): | |
2370 | # contain a pattern that matches? | |
2371 | if ( | |
2372 | filename_match(filename, filepatterns) and | |
2373 | not self.excluded(filename, root) | |
2374 | ): | |
2375 | runner(os.path.join(root, filename)) | |
2376 | ||
2377 | def excluded(self, filename, parent=None): | |
2378 | """Check if the file should be excluded. | |
2379 | ||
2380 | Check if 'options.exclude' contains a pattern matching filename. | |
2381 | """ | |
2382 | if not self.options.exclude: | |
2383 | return False | |
2384 | basename = os.path.basename(filename) | |
2385 | if filename_match(basename, self.options.exclude): | |
2386 | return True | |
2387 | if parent: | |
2388 | filename = os.path.join(parent, filename) | |
2389 | filename = os.path.abspath(filename) | |
2390 | return filename_match(filename, self.options.exclude) | |
2391 | ||
2392 | def ignore_code(self, code): | |
2393 | """Check if the error code should be ignored. | |
2394 | ||
2395 | If 'options.select' contains a prefix of the error code, | |
2396 | return False. Else, if 'options.ignore' contains a prefix of | |
2397 | the error code, return True. | |
2398 | """ | |
2399 | if len(code) < 4 and any(s.startswith(code) | |
2400 | for s in self.options.select): | |
2401 | return False | |
2402 | return (code.startswith(self.options.ignore) and | |
2403 | not code.startswith(self.options.select)) | |
2404 | ||
2405 | def get_checks(self, argument_name): | |
2406 | """Get all the checks for this category. | |
2407 | ||
2408 | Find all globally visible functions where the first argument | |
2409 | name starts with argument_name and which contain selected tests. | |
2410 | """ | |
2411 | checks = [] | |
2412 | for check, attrs in _checks[argument_name].items(): | |
2413 | (codes, args) = attrs | |
2414 | if any(not (code and self.ignore_code(code)) for code in codes): | |
2415 | checks.append((check.__name__, check, args)) | |
2416 | return sorted(checks) | |
2417 | ||
2418 | ||
2419 | def get_parser(prog='pycodestyle', version=__version__): | |
2420 | """Create the parser for the program.""" | |
2421 | parser = OptionParser(prog=prog, version=version, | |
2422 | usage="%prog [options] input ...") | |
2423 | parser.config_options = [ | |
2424 | 'exclude', 'filename', 'select', 'ignore', 'max-line-length', | |
2425 | 'max-doc-length', 'indent-size', 'hang-closing', 'count', 'format', | |
2426 | 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose'] | |
2427 | parser.add_option('-v', '--verbose', default=0, action='count', | |
2428 | help="print status messages, or debug with -vv") | |
2429 | parser.add_option('-q', '--quiet', default=0, action='count', | |
2430 | help="report only file names, or nothing with -qq") | |
2431 | parser.add_option('-r', '--repeat', default=True, action='store_true', | |
2432 | help="(obsolete) show all occurrences of the same error") | |
2433 | parser.add_option('--first', action='store_false', dest='repeat', | |
2434 | help="show first occurrence of each error") | |
2435 | parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, | |
2436 | help="exclude files or directories which match these " | |
2437 | "comma separated patterns (default: %default)") | |
2438 | parser.add_option('--filename', metavar='patterns', default='*.py', | |
2439 | help="when parsing directories, only check filenames " | |
2440 | "matching these comma separated patterns " | |
2441 | "(default: %default)") | |
2442 | parser.add_option('--select', metavar='errors', default='', | |
2443 | help="select errors and warnings (e.g. E,W6)") | |
2444 | parser.add_option('--ignore', metavar='errors', default='', | |
2445 | help="skip errors and warnings (e.g. E4,W) " | |
2446 | "(default: %s)" % DEFAULT_IGNORE) | |
2447 | parser.add_option('--show-source', action='store_true', | |
2448 | help="show source code for each error") | |
2449 | parser.add_option('--show-pep8', action='store_true', | |
2450 | help="show text of PEP 8 for each error " | |
2451 | "(implies --first)") | |
2452 | parser.add_option('--statistics', action='store_true', | |
2453 | help="count errors and warnings") | |
2454 | parser.add_option('--count', action='store_true', | |
2455 | help="print total number of errors and warnings " | |
2456 | "to standard error and set exit code to 1 if " | |
2457 | "total is not null") | |
2458 | parser.add_option('--max-line-length', type='int', metavar='n', | |
2459 | default=MAX_LINE_LENGTH, | |
2460 | help="set maximum allowed line length " | |
2461 | "(default: %default)") | |
2462 | parser.add_option('--max-doc-length', type='int', metavar='n', | |
2463 | default=None, | |
2464 | help="set maximum allowed doc line length and perform " | |
2465 | "these checks (unchecked if not set)") | |
2466 | parser.add_option('--indent-size', type='int', metavar='n', | |
2467 | default=INDENT_SIZE, | |
2468 | help="set how many spaces make up an indent " | |
2469 | "(default: %default)") | |
2470 | parser.add_option('--hang-closing', action='store_true', | |
2471 | help="hang closing bracket instead of matching " | |
2472 | "indentation of opening bracket's line") | |
2473 | parser.add_option('--format', metavar='format', default='default', | |
2474 | help="set the error format [default|pylint|<custom>]") | |
2475 | parser.add_option('--diff', action='store_true', | |
2476 | help="report changes only within line number ranges in " | |
2477 | "the unified diff received on STDIN") | |
2478 | group = parser.add_option_group("Testing Options") | |
2479 | group.add_option('--benchmark', action='store_true', | |
2480 | help="measure processing speed") | |
2481 | return parser | |
2482 | ||
2483 | ||
2484 | def read_config(options, args, arglist, parser): | |
2485 | """Read and parse configurations. | |
2486 | ||
2487 | If a config file is specified on the command line with the | |
2488 | "--config" option, then only it is used for configuration. | |
2489 | ||
2490 | Otherwise, the user configuration (~/.config/pycodestyle) and any | |
2491 | local configurations in the current directory or above will be | |
2492 | merged together (in that order) using the read method of | |
2493 | ConfigParser. | |
2494 | """ | |
2495 | config = configparser.RawConfigParser() | |
2496 | ||
2497 | cli_conf = options.config | |
2498 | ||
2499 | local_dir = os.curdir | |
2500 | ||
2501 | if USER_CONFIG and os.path.isfile(USER_CONFIG): | |
2502 | if options.verbose: | |
2503 | print('user configuration: %s' % USER_CONFIG) | |
2504 | config.read(USER_CONFIG) | |
2505 | ||
2506 | parent = tail = args and os.path.abspath(os.path.commonprefix(args)) | |
2507 | while tail: | |
2508 | if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG): | |
2509 | local_dir = parent | |
2510 | if options.verbose: | |
2511 | print('local configuration: in %s' % parent) | |
2512 | break | |
2513 | (parent, tail) = os.path.split(parent) | |
2514 | ||
2515 | if cli_conf and os.path.isfile(cli_conf): | |
2516 | if options.verbose: | |
2517 | print('cli configuration: %s' % cli_conf) | |
2518 | config.read(cli_conf) | |
2519 | ||
2520 | pycodestyle_section = None | |
2521 | if config.has_section(parser.prog): | |
2522 | pycodestyle_section = parser.prog | |
2523 | elif config.has_section('pep8'): | |
2524 | pycodestyle_section = 'pep8' # Deprecated | |
2525 | warnings.warn('[pep8] section is deprecated. Use [pycodestyle].') | |
2526 | ||
2527 | if pycodestyle_section: | |
2528 | option_list = {o.dest: o.type or o.action for o in parser.option_list} | |
2529 | ||
2530 | # First, read the default values | |
2531 | (new_options, __) = parser.parse_args([]) | |
2532 | ||
2533 | # Second, parse the configuration | |
2534 | for opt in config.options(pycodestyle_section): | |
2535 | if opt.replace('_', '-') not in parser.config_options: | |
2536 | print(" unknown option '%s' ignored" % opt) | |
2537 | continue | |
2538 | if options.verbose > 1: | |
2539 | print(" {} = {}".format(opt, | |
2540 | config.get(pycodestyle_section, opt))) | |
2541 | normalized_opt = opt.replace('-', '_') | |
2542 | opt_type = option_list[normalized_opt] | |
2543 | if opt_type in ('int', 'count'): | |
2544 | value = config.getint(pycodestyle_section, opt) | |
2545 | elif opt_type in ('store_true', 'store_false'): | |
2546 | value = config.getboolean(pycodestyle_section, opt) | |
2547 | else: | |
2548 | value = config.get(pycodestyle_section, opt) | |
2549 | if normalized_opt == 'exclude': | |
2550 | value = normalize_paths(value, local_dir) | |
2551 | setattr(new_options, normalized_opt, value) | |
2552 | ||
2553 | # Third, overwrite with the command-line options | |
2554 | (options, __) = parser.parse_args(arglist, values=new_options) | |
2555 | return options | |
2556 | ||
2557 | ||
2558 | def process_options(arglist=None, parse_argv=False, config_file=None, | |
2559 | parser=None, verbose=None): | |
2560 | """Process options passed either via arglist or command line args. | |
2561 | ||
2562 | Passing in the ``config_file`` parameter allows other tools, such as | |
2563 | flake8 to specify their own options to be processed in pycodestyle. | |
2564 | """ | |
2565 | if not parser: | |
2566 | parser = get_parser() | |
2567 | if not parser.has_option('--config'): | |
2568 | group = parser.add_option_group("Configuration", description=( | |
2569 | "The project options are read from the [%s] section of the " | |
2570 | "tox.ini file or the setup.cfg file located in any parent folder " | |
2571 | "of the path(s) being processed. Allowed options are: %s." % | |
2572 | (parser.prog, ', '.join(parser.config_options)))) | |
2573 | group.add_option('--config', metavar='path', default=config_file, | |
2574 | help="user config file location") | |
2575 | # Don't read the command line if the module is used as a library. | |
2576 | if not arglist and not parse_argv: | |
2577 | arglist = [] | |
2578 | # If parse_argv is True and arglist is None, arguments are | |
2579 | # parsed from the command line (sys.argv) | |
2580 | (options, args) = parser.parse_args(arglist) | |
2581 | options.reporter = None | |
2582 | ||
2583 | # If explicitly specified verbosity, override any `-v` CLI flag | |
2584 | if verbose is not None: | |
2585 | options.verbose = verbose | |
2586 | ||
2587 | if parse_argv and not args: | |
2588 | if options.diff or any(os.path.exists(name) | |
2589 | for name in PROJECT_CONFIG): | |
2590 | args = ['.'] | |
2591 | else: | |
2592 | parser.error('input not specified') | |
2593 | options = read_config(options, args, arglist, parser) | |
2594 | options.reporter = parse_argv and options.quiet == 1 and FileReport | |
2595 | ||
2596 | options.filename = _parse_multi_options(options.filename) | |
2597 | options.exclude = normalize_paths(options.exclude) | |
2598 | options.select = _parse_multi_options(options.select) | |
2599 | options.ignore = _parse_multi_options(options.ignore) | |
2600 | ||
2601 | if options.diff: | |
2602 | options.reporter = DiffReport | |
2603 | stdin = stdin_get_value() | |
2604 | options.selected_lines = parse_udiff(stdin, options.filename, args[0]) | |
2605 | args = sorted(options.selected_lines) | |
2606 | ||
2607 | return options, args | |
2608 | ||
2609 | ||
2610 | def _parse_multi_options(options, split_token=','): | |
2611 | r"""Split and strip and discard empties. | |
2612 | ||
2613 | Turns the following: | |
2614 | ||
2615 | A, | |
2616 | B, | |
2617 | ||
2618 | into ["A", "B"] | |
2619 | """ | |
2620 | if options: | |
2621 | return [o.strip() for o in options.split(split_token) if o.strip()] | |
2622 | else: | |
2623 | return options | |
2624 | ||
2625 | ||
2626 | def _main(): | |
2627 | """Parse options and run checks on Python source.""" | |
2628 | import signal | |
2629 | ||
2630 | # Handle "Broken pipe" gracefully | |
2631 | try: | |
2632 | signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1)) | |
2633 | except AttributeError: | |
2634 | pass # not supported on Windows | |
2635 | ||
2636 | style_guide = StyleGuide(parse_argv=True) | |
2637 | options = style_guide.options | |
2638 | ||
2639 | report = style_guide.check_files() | |
2640 | ||
2641 | if options.statistics: | |
2642 | report.print_statistics() | |
2643 | ||
2644 | if options.benchmark: | |
2645 | report.print_benchmark() | |
2646 | ||
2647 | if report.total_errors: | |
2648 | if options.count: | |
2649 | sys.stderr.write(str(report.total_errors) + '\n') | |
2650 | sys.exit(1) | |
2651 | ||
2652 | ||
2653 | if __name__ == '__main__': | |
2654 | _main() |