]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """Utility methods for flake8.""" |
2 | from __future__ import annotations | |
3 | ||
4 | import fnmatch as _fnmatch | |
5 | import functools | |
6 | import io | |
7 | import logging | |
8 | import os | |
9 | import platform | |
10 | import re | |
11 | import sys | |
12 | import textwrap | |
13 | import tokenize | |
14 | from typing import NamedTuple | |
15 | from typing import Pattern | |
16 | from typing import Sequence | |
17 | ||
18 | from flake8 import exceptions | |
19 | ||
20 | COMMA_SEPARATED_LIST_RE = re.compile(r"[,\s]") | |
21 | LOCAL_PLUGIN_LIST_RE = re.compile(r"[,\t\n\r\f\v]") | |
22 | NORMALIZE_PACKAGE_NAME_RE = re.compile(r"[-_.]+") | |
23 | ||
24 | ||
25 | def parse_comma_separated_list( | |
26 | value: str, regexp: Pattern[str] = COMMA_SEPARATED_LIST_RE | |
27 | ) -> list[str]: | |
28 | """Parse a comma-separated list. | |
29 | ||
30 | :param value: | |
31 | String to be parsed and normalized. | |
32 | :param regexp: | |
33 | Compiled regular expression used to split the value when it is a | |
34 | string. | |
35 | :returns: | |
36 | List of values with whitespace stripped. | |
37 | """ | |
38 | assert isinstance(value, str), value | |
39 | ||
40 | separated = regexp.split(value) | |
41 | item_gen = (item.strip() for item in separated) | |
42 | return [item for item in item_gen if item] | |
43 | ||
44 | ||
45 | class _Token(NamedTuple): | |
46 | tp: str | |
47 | src: str | |
48 | ||
49 | ||
50 | _CODE, _FILE, _COLON, _COMMA, _WS = "code", "file", "colon", "comma", "ws" | |
51 | _EOF = "eof" | |
52 | _FILE_LIST_TOKEN_TYPES = [ | |
53 | (re.compile(r"[A-Z]+[0-9]*(?=$|\s|,)"), _CODE), | |
54 | (re.compile(r"[^\s:,]+"), _FILE), | |
55 | (re.compile(r"\s*:\s*"), _COLON), | |
56 | (re.compile(r"\s*,\s*"), _COMMA), | |
57 | (re.compile(r"\s+"), _WS), | |
58 | ] | |
59 | ||
60 | ||
61 | def _tokenize_files_to_codes_mapping(value: str) -> list[_Token]: | |
62 | tokens = [] | |
63 | i = 0 | |
64 | while i < len(value): | |
65 | for token_re, token_name in _FILE_LIST_TOKEN_TYPES: | |
66 | match = token_re.match(value, i) | |
67 | if match: | |
68 | tokens.append(_Token(token_name, match.group().strip())) | |
69 | i = match.end() | |
70 | break | |
71 | else: | |
72 | raise AssertionError("unreachable", value, i) | |
73 | tokens.append(_Token(_EOF, "")) | |
74 | ||
75 | return tokens | |
76 | ||
77 | ||
78 | def parse_files_to_codes_mapping( # noqa: C901 | |
79 | value_: Sequence[str] | str, | |
80 | ) -> list[tuple[str, list[str]]]: | |
81 | """Parse a files-to-codes mapping. | |
82 | ||
83 | A files-to-codes mapping a sequence of values specified as | |
84 | `filenames list:codes list ...`. Each of the lists may be separated by | |
85 | either comma or whitespace tokens. | |
86 | ||
87 | :param value: String to be parsed and normalized. | |
88 | """ | |
89 | if not isinstance(value_, str): | |
90 | value = "\n".join(value_) | |
91 | else: | |
92 | value = value_ | |
93 | ||
94 | ret: list[tuple[str, list[str]]] = [] | |
95 | if not value.strip(): | |
96 | return ret | |
97 | ||
98 | class State: | |
99 | seen_sep = True | |
100 | seen_colon = False | |
101 | filenames: list[str] = [] | |
102 | codes: list[str] = [] | |
103 | ||
104 | def _reset() -> None: | |
105 | if State.codes: | |
106 | for filename in State.filenames: | |
107 | ret.append((filename, State.codes)) | |
108 | State.seen_sep = True | |
109 | State.seen_colon = False | |
110 | State.filenames = [] | |
111 | State.codes = [] | |
112 | ||
113 | def _unexpected_token() -> exceptions.ExecutionError: | |
114 | return exceptions.ExecutionError( | |
115 | f"Expected `per-file-ignores` to be a mapping from file exclude " | |
116 | f"patterns to ignore codes.\n\n" | |
117 | f"Configured `per-file-ignores` setting:\n\n" | |
118 | f"{textwrap.indent(value.strip(), ' ')}" | |
119 | ) | |
120 | ||
121 | for token in _tokenize_files_to_codes_mapping(value): | |
122 | # legal in any state: separator sets the sep bit | |
123 | if token.tp in {_COMMA, _WS}: | |
124 | State.seen_sep = True | |
125 | # looking for filenames | |
126 | elif not State.seen_colon: | |
127 | if token.tp == _COLON: | |
128 | State.seen_colon = True | |
129 | State.seen_sep = True | |
130 | elif State.seen_sep and token.tp == _FILE: | |
131 | State.filenames.append(token.src) | |
132 | State.seen_sep = False | |
133 | else: | |
134 | raise _unexpected_token() | |
135 | # looking for codes | |
136 | else: | |
137 | if token.tp == _EOF: | |
138 | _reset() | |
139 | elif State.seen_sep and token.tp == _CODE: | |
140 | State.codes.append(token.src) | |
141 | State.seen_sep = False | |
142 | elif State.seen_sep and token.tp == _FILE: | |
143 | _reset() | |
144 | State.filenames.append(token.src) | |
145 | State.seen_sep = False | |
146 | else: | |
147 | raise _unexpected_token() | |
148 | ||
149 | return ret | |
150 | ||
151 | ||
152 | def normalize_paths( | |
153 | paths: Sequence[str], parent: str = os.curdir | |
154 | ) -> list[str]: | |
155 | """Normalize a list of paths relative to a parent directory. | |
156 | ||
157 | :returns: | |
158 | The normalized paths. | |
159 | """ | |
160 | assert isinstance(paths, list), paths | |
161 | return [normalize_path(p, parent) for p in paths] | |
162 | ||
163 | ||
164 | def normalize_path(path: str, parent: str = os.curdir) -> str: | |
165 | """Normalize a single-path. | |
166 | ||
167 | :returns: | |
168 | The normalized path. | |
169 | """ | |
170 | # NOTE(sigmavirus24): Using os.path.sep and os.path.altsep allow for | |
171 | # Windows compatibility with both Windows-style paths (c:\foo\bar) and | |
172 | # Unix style paths (/foo/bar). | |
173 | separator = os.path.sep | |
174 | # NOTE(sigmavirus24): os.path.altsep may be None | |
175 | alternate_separator = os.path.altsep or "" | |
176 | if ( | |
177 | path == "." | |
178 | or separator in path | |
179 | or (alternate_separator and alternate_separator in path) | |
180 | ): | |
181 | path = os.path.abspath(os.path.join(parent, path)) | |
182 | return path.rstrip(separator + alternate_separator) | |
183 | ||
184 | ||
185 | @functools.lru_cache(maxsize=1) | |
186 | def stdin_get_value() -> str: | |
187 | """Get and cache it so plugins can use it.""" | |
188 | stdin_value = sys.stdin.buffer.read() | |
189 | fd = io.BytesIO(stdin_value) | |
190 | try: | |
191 | coding, _ = tokenize.detect_encoding(fd.readline) | |
192 | fd.seek(0) | |
193 | return io.TextIOWrapper(fd, coding).read() | |
194 | except (LookupError, SyntaxError, UnicodeError): | |
195 | return stdin_value.decode("utf-8") | |
196 | ||
197 | ||
198 | def stdin_get_lines() -> list[str]: | |
199 | """Return lines of stdin split according to file splitting.""" | |
200 | return list(io.StringIO(stdin_get_value())) | |
201 | ||
202 | ||
203 | def is_using_stdin(paths: list[str]) -> bool: | |
204 | """Determine if we're going to read from stdin. | |
205 | ||
206 | :param paths: | |
207 | The paths that we're going to check. | |
208 | :returns: | |
209 | True if stdin (-) is in the path, otherwise False | |
210 | """ | |
211 | return "-" in paths | |
212 | ||
213 | ||
214 | def fnmatch(filename: str, patterns: Sequence[str]) -> bool: | |
215 | """Wrap :func:`fnmatch.fnmatch` to add some functionality. | |
216 | ||
217 | :param filename: | |
218 | Name of the file we're trying to match. | |
219 | :param patterns: | |
220 | Patterns we're using to try to match the filename. | |
221 | :param default: | |
222 | The default value if patterns is empty | |
223 | :returns: | |
224 | True if a pattern matches the filename, False if it doesn't. | |
225 | ``True`` if patterns is empty. | |
226 | """ | |
227 | if not patterns: | |
228 | return True | |
229 | return any(_fnmatch.fnmatch(filename, pattern) for pattern in patterns) | |
230 | ||
231 | ||
232 | def matches_filename( | |
233 | path: str, | |
234 | patterns: Sequence[str], | |
235 | log_message: str, | |
236 | logger: logging.Logger, | |
237 | ) -> bool: | |
238 | """Use fnmatch to discern if a path exists in patterns. | |
239 | ||
240 | :param path: | |
241 | The path to the file under question | |
242 | :param patterns: | |
243 | The patterns to match the path against. | |
244 | :param log_message: | |
245 | The message used for logging purposes. | |
246 | :returns: | |
247 | True if path matches patterns, False otherwise | |
248 | """ | |
249 | if not patterns: | |
250 | return False | |
251 | basename = os.path.basename(path) | |
252 | if basename not in {".", ".."} and fnmatch(basename, patterns): | |
253 | logger.debug(log_message, {"path": basename, "whether": ""}) | |
254 | return True | |
255 | ||
256 | absolute_path = os.path.abspath(path) | |
257 | match = fnmatch(absolute_path, patterns) | |
258 | logger.debug( | |
259 | log_message, | |
260 | {"path": absolute_path, "whether": "" if match else "not "}, | |
261 | ) | |
262 | return match | |
263 | ||
264 | ||
265 | def get_python_version() -> str: | |
266 | """Find and format the python implementation and version. | |
267 | ||
268 | :returns: | |
269 | Implementation name, version, and platform as a string. | |
270 | """ | |
271 | return "{} {} on {}".format( | |
272 | platform.python_implementation(), | |
273 | platform.python_version(), | |
274 | platform.system(), | |
275 | ) | |
276 | ||
277 | ||
278 | def normalize_pypi_name(s: str) -> str: | |
279 | """Normalize a distribution name according to PEP 503.""" | |
280 | return NORMALIZE_PACKAGE_NAME_RE.sub("-", s).lower() |