]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | from dataclasses import dataclass | |
3 | from functools import lru_cache | |
4 | from typing import Final, Iterator, List, Optional, Union | |
5 | ||
6 | from black.nodes import ( | |
7 | CLOSING_BRACKETS, | |
8 | STANDALONE_COMMENT, | |
9 | WHITESPACE, | |
10 | container_of, | |
11 | first_leaf_of, | |
12 | preceding_leaf, | |
13 | syms, | |
14 | ) | |
15 | from blib2to3.pgen2 import token | |
16 | from blib2to3.pytree import Leaf, Node | |
17 | ||
18 | # types | |
19 | LN = Union[Leaf, Node] | |
20 | ||
21 | FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"} | |
22 | FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"} | |
23 | FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP} | |
24 | FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"} | |
25 | ||
26 | COMMENT_EXCEPTIONS = " !:#'" | |
27 | ||
28 | ||
29 | @dataclass | |
30 | class ProtoComment: | |
31 | """Describes a piece of syntax that is a comment. | |
32 | ||
33 | It's not a :class:`blib2to3.pytree.Leaf` so that: | |
34 | ||
35 | * it can be cached (`Leaf` objects should not be reused more than once as | |
36 | they store their lineno, column, prefix, and parent information); | |
37 | * `newlines` and `consumed` fields are kept separate from the `value`. This | |
38 | simplifies handling of special marker comments like ``# fmt: off/on``. | |
39 | """ | |
40 | ||
41 | type: int # token.COMMENT or STANDALONE_COMMENT | |
42 | value: str # content of the comment | |
43 | newlines: int # how many newlines before the comment | |
44 | consumed: int # how many characters of the original leaf's prefix did we consume | |
45 | ||
46 | ||
47 | def generate_comments(leaf: LN) -> Iterator[Leaf]: | |
48 | """Clean the prefix of the `leaf` and generate comments from it, if any. | |
49 | ||
50 | Comments in lib2to3 are shoved into the whitespace prefix. This happens | |
51 | in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation | |
52 | move because it does away with modifying the grammar to include all the | |
53 | possible places in which comments can be placed. | |
54 | ||
55 | The sad consequence for us though is that comments don't "belong" anywhere. | |
56 | This is why this function generates simple parentless Leaf objects for | |
57 | comments. We simply don't know what the correct parent should be. | |
58 | ||
59 | No matter though, we can live without this. We really only need to | |
60 | differentiate between inline and standalone comments. The latter don't | |
61 | share the line with any code. | |
62 | ||
63 | Inline comments are emitted as regular token.COMMENT leaves. Standalone | |
64 | are emitted with a fake STANDALONE_COMMENT token identifier. | |
65 | """ | |
66 | for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER): | |
67 | yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines) | |
68 | ||
69 | ||
70 | @lru_cache(maxsize=4096) | |
71 | def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]: | |
72 | """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`.""" | |
73 | result: List[ProtoComment] = [] | |
74 | if not prefix or "#" not in prefix: | |
75 | return result | |
76 | ||
77 | consumed = 0 | |
78 | nlines = 0 | |
79 | ignored_lines = 0 | |
80 | for index, line in enumerate(re.split("\r?\n", prefix)): | |
81 | consumed += len(line) + 1 # adding the length of the split '\n' | |
82 | line = line.lstrip() | |
83 | if not line: | |
84 | nlines += 1 | |
85 | if not line.startswith("#"): | |
86 | # Escaped newlines outside of a comment are not really newlines at | |
87 | # all. We treat a single-line comment following an escaped newline | |
88 | # as a simple trailing comment. | |
89 | if line.endswith("\\"): | |
90 | ignored_lines += 1 | |
91 | continue | |
92 | ||
93 | if index == ignored_lines and not is_endmarker: | |
94 | comment_type = token.COMMENT # simple trailing comment | |
95 | else: | |
96 | comment_type = STANDALONE_COMMENT | |
97 | comment = make_comment(line) | |
98 | result.append( | |
99 | ProtoComment( | |
100 | type=comment_type, value=comment, newlines=nlines, consumed=consumed | |
101 | ) | |
102 | ) | |
103 | nlines = 0 | |
104 | return result | |
105 | ||
106 | ||
107 | def make_comment(content: str) -> str: | |
108 | """Return a consistently formatted comment from the given `content` string. | |
109 | ||
110 | All comments (except for "##", "#!", "#:", '#'") should have a single | |
111 | space between the hash sign and the content. | |
112 | ||
113 | If `content` didn't start with a hash sign, one is provided. | |
114 | """ | |
115 | content = content.rstrip() | |
116 | if not content: | |
117 | return "#" | |
118 | ||
119 | if content[0] == "#": | |
120 | content = content[1:] | |
121 | NON_BREAKING_SPACE = " " | |
122 | if ( | |
123 | content | |
124 | and content[0] == NON_BREAKING_SPACE | |
125 | and not content.lstrip().startswith("type:") | |
126 | ): | |
127 | content = " " + content[1:] # Replace NBSP by a simple space | |
128 | if content and content[0] not in COMMENT_EXCEPTIONS: | |
129 | content = " " + content | |
130 | return "#" + content | |
131 | ||
132 | ||
133 | def normalize_fmt_off(node: Node) -> None: | |
134 | """Convert content between `# fmt: off`/`# fmt: on` into standalone comments.""" | |
135 | try_again = True | |
136 | while try_again: | |
137 | try_again = convert_one_fmt_off_pair(node) | |
138 | ||
139 | ||
140 | def convert_one_fmt_off_pair(node: Node) -> bool: | |
141 | """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment. | |
142 | ||
143 | Returns True if a pair was converted. | |
144 | """ | |
145 | for leaf in node.leaves(): | |
146 | previous_consumed = 0 | |
147 | for comment in list_comments(leaf.prefix, is_endmarker=False): | |
148 | if comment.value not in FMT_PASS: | |
149 | previous_consumed = comment.consumed | |
150 | continue | |
151 | # We only want standalone comments. If there's no previous leaf or | |
152 | # the previous leaf is indentation, it's a standalone comment in | |
153 | # disguise. | |
154 | if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT: | |
155 | prev = preceding_leaf(leaf) | |
156 | if prev: | |
157 | if comment.value in FMT_OFF and prev.type not in WHITESPACE: | |
158 | continue | |
159 | if comment.value in FMT_SKIP and prev.type in WHITESPACE: | |
160 | continue | |
161 | ||
162 | ignored_nodes = list(generate_ignored_nodes(leaf, comment)) | |
163 | if not ignored_nodes: | |
164 | continue | |
165 | ||
166 | first = ignored_nodes[0] # Can be a container node with the `leaf`. | |
167 | parent = first.parent | |
168 | prefix = first.prefix | |
169 | if comment.value in FMT_OFF: | |
170 | first.prefix = prefix[comment.consumed :] | |
171 | if comment.value in FMT_SKIP: | |
172 | first.prefix = "" | |
173 | standalone_comment_prefix = prefix | |
174 | else: | |
175 | standalone_comment_prefix = ( | |
176 | prefix[:previous_consumed] + "\n" * comment.newlines | |
177 | ) | |
178 | hidden_value = "".join(str(n) for n in ignored_nodes) | |
179 | if comment.value in FMT_OFF: | |
180 | hidden_value = comment.value + "\n" + hidden_value | |
181 | if comment.value in FMT_SKIP: | |
182 | hidden_value += " " + comment.value | |
183 | if hidden_value.endswith("\n"): | |
184 | # That happens when one of the `ignored_nodes` ended with a NEWLINE | |
185 | # leaf (possibly followed by a DEDENT). | |
186 | hidden_value = hidden_value[:-1] | |
187 | first_idx: Optional[int] = None | |
188 | for ignored in ignored_nodes: | |
189 | index = ignored.remove() | |
190 | if first_idx is None: | |
191 | first_idx = index | |
192 | assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)" | |
193 | assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)" | |
194 | parent.insert_child( | |
195 | first_idx, | |
196 | Leaf( | |
197 | STANDALONE_COMMENT, | |
198 | hidden_value, | |
199 | prefix=standalone_comment_prefix, | |
200 | fmt_pass_converted_first_leaf=first_leaf_of(first), | |
201 | ), | |
202 | ) | |
203 | return True | |
204 | ||
205 | return False | |
206 | ||
207 | ||
208 | def generate_ignored_nodes(leaf: Leaf, comment: ProtoComment) -> Iterator[LN]: | |
209 | """Starting from the container of `leaf`, generate all leaves until `# fmt: on`. | |
210 | ||
211 | If comment is skip, returns leaf only. | |
212 | Stops at the end of the block. | |
213 | """ | |
214 | if comment.value in FMT_SKIP: | |
215 | yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment) | |
216 | return | |
217 | container: Optional[LN] = container_of(leaf) | |
218 | while container is not None and container.type != token.ENDMARKER: | |
219 | if is_fmt_on(container): | |
220 | return | |
221 | ||
222 | # fix for fmt: on in children | |
223 | if children_contains_fmt_on(container): | |
224 | for index, child in enumerate(container.children): | |
225 | if isinstance(child, Leaf) and is_fmt_on(child): | |
226 | if child.type in CLOSING_BRACKETS: | |
227 | # This means `# fmt: on` is placed at a different bracket level | |
228 | # than `# fmt: off`. This is an invalid use, but as a courtesy, | |
229 | # we include this closing bracket in the ignored nodes. | |
230 | # The alternative is to fail the formatting. | |
231 | yield child | |
232 | return | |
233 | if ( | |
234 | child.type == token.INDENT | |
235 | and index < len(container.children) - 1 | |
236 | and children_contains_fmt_on(container.children[index + 1]) | |
237 | ): | |
238 | # This means `# fmt: on` is placed right after an indentation | |
239 | # level, and we shouldn't swallow the previous INDENT token. | |
240 | return | |
241 | if children_contains_fmt_on(child): | |
242 | return | |
243 | yield child | |
244 | else: | |
245 | if container.type == token.DEDENT and container.next_sibling is None: | |
246 | # This can happen when there is no matching `# fmt: on` comment at the | |
247 | # same level as `# fmt: on`. We need to keep this DEDENT. | |
248 | return | |
249 | yield container | |
250 | container = container.next_sibling | |
251 | ||
252 | ||
253 | def _generate_ignored_nodes_from_fmt_skip( | |
254 | leaf: Leaf, comment: ProtoComment | |
255 | ) -> Iterator[LN]: | |
256 | """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`.""" | |
257 | prev_sibling = leaf.prev_sibling | |
258 | parent = leaf.parent | |
259 | # Need to properly format the leaf prefix to compare it to comment.value, | |
260 | # which is also formatted | |
261 | comments = list_comments(leaf.prefix, is_endmarker=False) | |
262 | if not comments or comment.value != comments[0].value: | |
263 | return | |
264 | if prev_sibling is not None: | |
265 | leaf.prefix = "" | |
266 | siblings = [prev_sibling] | |
267 | while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None: | |
268 | prev_sibling = prev_sibling.prev_sibling | |
269 | siblings.insert(0, prev_sibling) | |
270 | yield from siblings | |
271 | elif ( | |
272 | parent is not None and parent.type == syms.suite and leaf.type == token.NEWLINE | |
273 | ): | |
274 | # The `# fmt: skip` is on the colon line of the if/while/def/class/... | |
275 | # statements. The ignored nodes should be previous siblings of the | |
276 | # parent suite node. | |
277 | leaf.prefix = "" | |
278 | ignored_nodes: List[LN] = [] | |
279 | parent_sibling = parent.prev_sibling | |
280 | while parent_sibling is not None and parent_sibling.type != syms.suite: | |
281 | ignored_nodes.insert(0, parent_sibling) | |
282 | parent_sibling = parent_sibling.prev_sibling | |
283 | # Special case for `async_stmt` where the ASYNC token is on the | |
284 | # grandparent node. | |
285 | grandparent = parent.parent | |
286 | if ( | |
287 | grandparent is not None | |
288 | and grandparent.prev_sibling is not None | |
289 | and grandparent.prev_sibling.type == token.ASYNC | |
290 | ): | |
291 | ignored_nodes.insert(0, grandparent.prev_sibling) | |
292 | yield from iter(ignored_nodes) | |
293 | ||
294 | ||
295 | def is_fmt_on(container: LN) -> bool: | |
296 | """Determine whether formatting is switched on within a container. | |
297 | Determined by whether the last `# fmt:` comment is `on` or `off`. | |
298 | """ | |
299 | fmt_on = False | |
300 | for comment in list_comments(container.prefix, is_endmarker=False): | |
301 | if comment.value in FMT_ON: | |
302 | fmt_on = True | |
303 | elif comment.value in FMT_OFF: | |
304 | fmt_on = False | |
305 | return fmt_on | |
306 | ||
307 | ||
308 | def children_contains_fmt_on(container: LN) -> bool: | |
309 | """Determine if children have formatting switched on.""" | |
310 | for child in container.children: | |
311 | leaf = first_leaf_of(child) | |
312 | if leaf is not None and is_fmt_on(leaf): | |
313 | return True | |
314 | ||
315 | return False | |
316 | ||
317 | ||
318 | def contains_pragma_comment(comment_list: List[Leaf]) -> bool: | |
319 | """ | |
320 | Returns: | |
321 | True iff one of the comments in @comment_list is a pragma used by one | |
322 | of the more common static analysis tools for python (e.g. mypy, flake8, | |
323 | pylint). | |
324 | """ | |
325 | for comment in comment_list: | |
326 | if comment.value.startswith(("# type:", "# noqa", "# pylint:")): | |
327 | return True | |
328 | ||
329 | return False |