1 """Utility methods for flake8."""
2 from __future__
import annotations
4 import fnmatch
as _fnmatch
14 from typing
import NamedTuple
15 from typing
import Pattern
16 from typing
import Sequence
18 from flake8
import exceptions
20 COMMA_SEPARATED_LIST_RE
= re
.compile(r
"[,\s]")
21 LOCAL_PLUGIN_LIST_RE
= re
.compile(r
"[,\t\n\r\f\v]")
22 NORMALIZE_PACKAGE_NAME_RE
= re
.compile(r
"[-_.]+")
25 def parse_comma_separated_list(
26 value
: str, regexp
: Pattern
[str] = COMMA_SEPARATED_LIST_RE
28 """Parse a comma-separated list.
31 String to be parsed and normalized.
33 Compiled regular expression used to split the value when it is a
36 List of values with whitespace stripped.
38 assert isinstance(value
, str), value
40 separated
= regexp
.split(value
)
41 item_gen
= (item
.strip() for item
in separated
)
42 return [item
for item
in item_gen
if item
]
45 class _Token(NamedTuple
):
50 _CODE
, _FILE
, _COLON
, _COMMA
, _WS
= "code", "file", "colon", "comma", "ws"
52 _FILE_LIST_TOKEN_TYPES
= [
53 (re
.compile(r
"[A-Z]+[0-9]*(?=$|\s|,)"), _CODE
),
54 (re
.compile(r
"[^\s:,]+"), _FILE
),
55 (re
.compile(r
"\s*:\s*"), _COLON
),
56 (re
.compile(r
"\s*,\s*"), _COMMA
),
57 (re
.compile(r
"\s+"), _WS
),
61 def _tokenize_files_to_codes_mapping(value
: str) -> list[_Token
]:
65 for token_re
, token_name
in _FILE_LIST_TOKEN_TYPES
:
66 match
= token_re
.match(value
, i
)
68 tokens
.append(_Token(token_name
, match
.group().strip()))
72 raise AssertionError("unreachable", value
, i
)
73 tokens
.append(_Token(_EOF
, ""))
78 def parse_files_to_codes_mapping( # noqa: C901
79 value_
: Sequence
[str] |
str,
80 ) -> list[tuple[str, list[str]]]:
81 """Parse a files-to-codes mapping.
83 A files-to-codes mapping a sequence of values specified as
84 `filenames list:codes list ...`. Each of the lists may be separated by
85 either comma or whitespace tokens.
87 :param value: String to be parsed and normalized.
89 if not isinstance(value_
, str):
90 value
= "\n".join(value_
)
94 ret
: list[tuple[str, list[str]]] = []
101 filenames
: list[str] = []
102 codes
: list[str] = []
104 def _reset() -> None:
106 for filename
in State
.filenames
:
107 ret
.append((filename
, State
.codes
))
108 State
.seen_sep
= True
109 State
.seen_colon
= False
113 def _unexpected_token() -> exceptions
.ExecutionError
:
114 return exceptions
.ExecutionError(
115 f
"Expected `per-file-ignores` to be a mapping from file exclude "
116 f
"patterns to ignore codes.\n\n"
117 f
"Configured `per-file-ignores` setting:\n\n"
118 f
"{textwrap.indent(value.strip(), ' ')}"
121 for token
in _tokenize_files_to_codes_mapping(value
):
122 # legal in any state: separator sets the sep bit
123 if token
.tp
in {_COMMA
, _WS
}:
124 State
.seen_sep
= True
125 # looking for filenames
126 elif not State
.seen_colon
:
127 if token
.tp
== _COLON
:
128 State
.seen_colon
= True
129 State
.seen_sep
= True
130 elif State
.seen_sep
and token
.tp
== _FILE
:
131 State
.filenames
.append(token
.src
)
132 State
.seen_sep
= False
134 raise _unexpected_token()
139 elif State
.seen_sep
and token
.tp
== _CODE
:
140 State
.codes
.append(token
.src
)
141 State
.seen_sep
= False
142 elif State
.seen_sep
and token
.tp
== _FILE
:
144 State
.filenames
.append(token
.src
)
145 State
.seen_sep
= False
147 raise _unexpected_token()
153 paths
: Sequence
[str], parent
: str = os
.curdir
155 """Normalize a list of paths relative to a parent directory.
158 The normalized paths.
160 assert isinstance(paths
, list), paths
161 return [normalize_path(p
, parent
) for p
in paths
]
164 def normalize_path(path
: str, parent
: str = os
.curdir
) -> str:
165 """Normalize a single-path.
170 # NOTE(sigmavirus24): Using os.path.sep and os.path.altsep allow for
171 # Windows compatibility with both Windows-style paths (c:\foo\bar) and
172 # Unix style paths (/foo/bar).
173 separator
= os
.path
.sep
174 # NOTE(sigmavirus24): os.path.altsep may be None
175 alternate_separator
= os
.path
.altsep
or ""
179 or (alternate_separator
and alternate_separator
in path
)
181 path
= os
.path
.abspath(os
.path
.join(parent
, path
))
182 return path
.rstrip(separator
+ alternate_separator
)
185 @functools.lru_cache(maxsize
=1)
186 def stdin_get_value() -> str:
187 """Get and cache it so plugins can use it."""
188 stdin_value
= sys
.stdin
.buffer.read()
189 fd
= io
.BytesIO(stdin_value
)
191 coding
, _
= tokenize
.detect_encoding(fd
.readline
)
193 return io
.TextIOWrapper(fd
, coding
).read()
194 except (LookupError, SyntaxError, UnicodeError):
195 return stdin_value
.decode("utf-8")
198 def stdin_get_lines() -> list[str]:
199 """Return lines of stdin split according to file splitting."""
200 return list(io
.StringIO(stdin_get_value()))
203 def is_using_stdin(paths
: list[str]) -> bool:
204 """Determine if we're going to read from stdin.
207 The paths that we're going to check.
209 True if stdin (-) is in the path, otherwise False
214 def fnmatch(filename
: str, patterns
: Sequence
[str]) -> bool:
215 """Wrap :func:`fnmatch.fnmatch` to add some functionality.
218 Name of the file we're trying to match.
220 Patterns we're using to try to match the filename.
222 The default value if patterns is empty
224 True if a pattern matches the filename, False if it doesn't.
225 ``True`` if patterns is empty.
229 return any(_fnmatch
.fnmatch(filename
, pattern
) for pattern
in patterns
)
232 def matches_filename(
234 patterns
: Sequence
[str],
236 logger
: logging
.Logger
,
238 """Use fnmatch to discern if a path exists in patterns.
241 The path to the file under question
243 The patterns to match the path against.
245 The message used for logging purposes.
247 True if path matches patterns, False otherwise
251 basename
= os
.path
.basename(path
)
252 if basename
not in {".", ".."} and fnmatch(basename
, patterns
):
253 logger
.debug(log_message
, {"path": basename
, "whether": ""})
256 absolute_path
= os
.path
.abspath(path
)
257 match
= fnmatch(absolute_path
, patterns
)
260 {"path": absolute_path
, "whether": "" if match
else "not "},
265 def get_python_version() -> str:
266 """Find and format the python implementation and version.
269 Implementation name, version, and platform as a string.
271 return "{} {} on {}".format(
272 platform
.python_implementation(),
273 platform
.python_version(),
278 def normalize_pypi_name(s
: str) -> str:
279 """Normalize a distribution name according to PEP 503."""
280 return NORMALIZE_PACKAGE_NAME_RE
.sub("-", s
).lower()