]> crepu.dev Git - config.git/blame_incremental - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/black/strings.py
Reorganización de directorios
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / black / strings.py
... / ...
CommitLineData
1"""
2Simple formatting on strings. Further string formatting code is in trans.py.
3"""
4
5import re
6import sys
7from functools import lru_cache
8from typing import Final, List, Match, Pattern
9
10from black._width_table import WIDTH_TABLE
11from blib2to3.pytree import Leaf
12
13STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
14STRING_PREFIX_RE: Final = re.compile(
15 r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
16)
17FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
18UNICODE_ESCAPE_RE: Final = re.compile(
19 r"(?P<backslashes>\\+)(?P<body>"
20 r"(u(?P<u>[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx
21 r"|(U(?P<U>[a-fA-F0-9]{8}))" # Character with 32-bit hex value xxxxxxxx
22 r"|(x(?P<x>[a-fA-F0-9]{2}))" # Character with hex value hh
23 r"|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})" # Character named name in the Unicode database
24 r")",
25 re.VERBOSE,
26)
27
28
29def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
30 """Replace `regex` with `replacement` twice on `original`.
31
32 This is used by string normalization to perform replaces on
33 overlapping matches.
34 """
35 return regex.sub(replacement, regex.sub(replacement, original))
36
37
38def has_triple_quotes(string: str) -> bool:
39 """
40 Returns:
41 True iff @string starts with three quotation characters.
42 """
43 raw_string = string.lstrip(STRING_PREFIX_CHARS)
44 return raw_string[:3] in {'"""', "'''"}
45
46
47def lines_with_leading_tabs_expanded(s: str) -> List[str]:
48 """
49 Splits string into lines and expands only leading tabs (following the normal
50 Python rules)
51 """
52 lines = []
53 for line in s.splitlines():
54 # Find the index of the first non-whitespace character after a string of
55 # whitespace that includes at least one tab
56 match = FIRST_NON_WHITESPACE_RE.match(line)
57 if match:
58 first_non_whitespace_idx = match.start(1)
59
60 lines.append(
61 line[:first_non_whitespace_idx].expandtabs()
62 + line[first_non_whitespace_idx:]
63 )
64 else:
65 lines.append(line)
66 return lines
67
68
69def fix_docstring(docstring: str, prefix: str) -> str:
70 # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
71 if not docstring:
72 return ""
73 lines = lines_with_leading_tabs_expanded(docstring)
74 # Determine minimum indentation (first line doesn't count):
75 indent = sys.maxsize
76 for line in lines[1:]:
77 stripped = line.lstrip()
78 if stripped:
79 indent = min(indent, len(line) - len(stripped))
80 # Remove indentation (first line is special):
81 trimmed = [lines[0].strip()]
82 if indent < sys.maxsize:
83 last_line_idx = len(lines) - 2
84 for i, line in enumerate(lines[1:]):
85 stripped_line = line[indent:].rstrip()
86 if stripped_line or i == last_line_idx:
87 trimmed.append(prefix + stripped_line)
88 else:
89 trimmed.append("")
90 return "\n".join(trimmed)
91
92
93def get_string_prefix(string: str) -> str:
94 """
95 Pre-conditions:
96 * assert_is_leaf_string(@string)
97
98 Returns:
99 @string's prefix (e.g. '', 'r', 'f', or 'rf').
100 """
101 assert_is_leaf_string(string)
102
103 prefix = ""
104 prefix_idx = 0
105 while string[prefix_idx] in STRING_PREFIX_CHARS:
106 prefix += string[prefix_idx]
107 prefix_idx += 1
108
109 return prefix
110
111
112def assert_is_leaf_string(string: str) -> None:
113 """
114 Checks the pre-condition that @string has the format that you would expect
115 of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
116 token.STRING`. A more precise description of the pre-conditions that are
117 checked are listed below.
118
119 Pre-conditions:
120 * @string starts with either ', ", <prefix>', or <prefix>" where
121 `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
122 * @string ends with a quote character (' or ").
123
124 Raises:
125 AssertionError(...) if the pre-conditions listed above are not
126 satisfied.
127 """
128 dquote_idx = string.find('"')
129 squote_idx = string.find("'")
130 if -1 in [dquote_idx, squote_idx]:
131 quote_idx = max(dquote_idx, squote_idx)
132 else:
133 quote_idx = min(squote_idx, dquote_idx)
134
135 assert (
136 0 <= quote_idx < len(string) - 1
137 ), f"{string!r} is missing a starting quote character (' or \")."
138 assert string[-1] in (
139 "'",
140 '"',
141 ), f"{string!r} is missing an ending quote character (' or \")."
142 assert set(string[:quote_idx]).issubset(
143 set(STRING_PREFIX_CHARS)
144 ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
145
146
147def normalize_string_prefix(s: str) -> str:
148 """Make all string prefixes lowercase."""
149 match = STRING_PREFIX_RE.match(s)
150 assert match is not None, f"failed to match string {s!r}"
151 orig_prefix = match.group(1)
152 new_prefix = (
153 orig_prefix.replace("F", "f")
154 .replace("B", "b")
155 .replace("U", "")
156 .replace("u", "")
157 )
158
159 # Python syntax guarantees max 2 prefixes and that one of them is "r"
160 if len(new_prefix) == 2 and "r" != new_prefix[0].lower():
161 new_prefix = new_prefix[::-1]
162 return f"{new_prefix}{match.group(2)}"
163
164
165# Re(gex) does actually cache patterns internally but this still improves
166# performance on a long list literal of strings by 5-9% since lru_cache's
167# caching overhead is much lower.
168@lru_cache(maxsize=64)
169def _cached_compile(pattern: str) -> Pattern[str]:
170 return re.compile(pattern)
171
172
173def normalize_string_quotes(s: str) -> str:
174 """Prefer double quotes but only if it doesn't cause more escaping.
175
176 Adds or removes backslashes as appropriate. Doesn't parse and fix
177 strings nested in f-strings.
178 """
179 value = s.lstrip(STRING_PREFIX_CHARS)
180 if value[:3] == '"""':
181 return s
182
183 elif value[:3] == "'''":
184 orig_quote = "'''"
185 new_quote = '"""'
186 elif value[0] == '"':
187 orig_quote = '"'
188 new_quote = "'"
189 else:
190 orig_quote = "'"
191 new_quote = '"'
192 first_quote_pos = s.find(orig_quote)
193 if first_quote_pos == -1:
194 return s # There's an internal error
195
196 prefix = s[:first_quote_pos]
197 unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
198 escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
199 escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
200 body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
201 if "r" in prefix.casefold():
202 if unescaped_new_quote.search(body):
203 # There's at least one unescaped new_quote in this raw string
204 # so converting is impossible
205 return s
206
207 # Do not introduce or remove backslashes in raw strings
208 new_body = body
209 else:
210 # remove unnecessary escapes
211 new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
212 if body != new_body:
213 # Consider the string without unnecessary escapes as the original
214 body = new_body
215 s = f"{prefix}{orig_quote}{body}{orig_quote}"
216 new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
217 new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
218 if "f" in prefix.casefold():
219 matches = re.findall(
220 r"""
221 (?:(?<!\{)|^)\{ # start of the string or a non-{ followed by a single {
222 ([^{].*?) # contents of the brackets except if begins with {{
223 \}(?:(?!\})|$) # A } followed by end of the string or a non-}
224 """,
225 new_body,
226 re.VERBOSE,
227 )
228 for m in matches:
229 if "\\" in str(m):
230 # Do not introduce backslashes in interpolated expressions
231 return s
232
233 if new_quote == '"""' and new_body[-1:] == '"':
234 # edge case:
235 new_body = new_body[:-1] + '\\"'
236 orig_escape_count = body.count("\\")
237 new_escape_count = new_body.count("\\")
238 if new_escape_count > orig_escape_count:
239 return s # Do not introduce more escaping
240
241 if new_escape_count == orig_escape_count and orig_quote == '"':
242 return s # Prefer double quotes
243
244 return f"{prefix}{new_quote}{new_body}{new_quote}"
245
246
247def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
248 """Replace hex codes in Unicode escape sequences with lowercase representation."""
249 text = leaf.value
250 prefix = get_string_prefix(text)
251 if "r" in prefix.lower():
252 return
253
254 def replace(m: Match[str]) -> str:
255 groups = m.groupdict()
256 back_slashes = groups["backslashes"]
257
258 if len(back_slashes) % 2 == 0:
259 return back_slashes + groups["body"]
260
261 if groups["u"]:
262 # \u
263 return back_slashes + "u" + groups["u"].lower()
264 elif groups["U"]:
265 # \U
266 return back_slashes + "U" + groups["U"].lower()
267 elif groups["x"]:
268 # \x
269 return back_slashes + "x" + groups["x"].lower()
270 else:
271 assert groups["N"], f"Unexpected match: {m}"
272 # \N{}
273 return back_slashes + "N{" + groups["N"].upper() + "}"
274
275 leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)
276
277
278@lru_cache(maxsize=4096)
279def char_width(char: str) -> int:
280 """Return the width of a single character as it would be displayed in a
281 terminal or editor (which respects Unicode East Asian Width).
282
283 Full width characters are counted as 2, while half width characters are
284 counted as 1. Also control characters are counted as 0.
285 """
286 table = WIDTH_TABLE
287 codepoint = ord(char)
288 highest = len(table) - 1
289 lowest = 0
290 idx = highest // 2
291 while True:
292 start_codepoint, end_codepoint, width = table[idx]
293 if codepoint < start_codepoint:
294 highest = idx - 1
295 elif codepoint > end_codepoint:
296 lowest = idx + 1
297 else:
298 return 0 if width < 0 else width
299 if highest < lowest:
300 break
301 idx = (highest + lowest) // 2
302 return 1
303
304
305def str_width(line_str: str) -> int:
306 """Return the width of `line_str` as it would be displayed in a terminal
307 or editor (which respects Unicode East Asian Width).
308
309 You could utilize this function to determine, for example, if a string
310 is too wide to display in a terminal or editor.
311 """
312 if line_str.isascii():
313 # Fast path for a line consisting of only ASCII characters
314 return len(line_str)
315 return sum(map(char_width, line_str))
316
317
318def count_chars_in_width(line_str: str, max_width: int) -> int:
319 """Count the number of characters in `line_str` that would fit in a
320 terminal or editor of `max_width` (which respects Unicode East Asian
321 Width).
322 """
323 total_width = 0
324 for i, char in enumerate(line_str):
325 width = char_width(char)
326 if width + total_width > max_width:
327 return i
328 total_width += width
329 return len(line_str)