[config.git] / djavu-asus / elpy / rpc-venv / lib / python3.11 / site-packages / black / strings.py

"""
Simple formatting on strings. Further string formatting code is in trans.py.
"""

import re
import sys
from functools import lru_cache
from typing import Final, List, Match, Pattern

from black._width_table import WIDTH_TABLE
from blib2to3.pytree import Leaf

STRING_PREFIX_CHARS: Final = "furbFURB"  # All possible string prefix characters.
STRING_PREFIX_RE: Final = re.compile(
    r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL
)
FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")
UNICODE_ESCAPE_RE: Final = re.compile(
    r"(?P<backslashes>\\+)(?P<body>"
    r"(u(?P<u>[a-fA-F0-9]{4}))"  # Character with 16-bit hex value xxxx
    r"|(U(?P<U>[a-fA-F0-9]{8}))"  # Character with 32-bit hex value xxxxxxxx
    r"|(x(?P<x>[a-fA-F0-9]{2}))"  # Character with hex value hh
    r"|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})"  # Character named name in the Unicode database
    r")",
    re.VERBOSE,
)


def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
    """Replace `regex` with `replacement` twice on `original`.

    This is used by string normalization to perform replaces on
    overlapping matches.
    """
    return regex.sub(replacement, regex.sub(replacement, original))


def has_triple_quotes(string: str) -> bool:
    """
    Returns:
        True iff @string starts with three quotation characters.
    """
    raw_string = string.lstrip(STRING_PREFIX_CHARS)
    return raw_string[:3] in {'"""', "'''"}


def lines_with_leading_tabs_expanded(s: str) -> List[str]:
    """
    Splits string into lines and expands only leading tabs (following the normal
    Python rules)
    """
    lines = []
    for line in s.splitlines():
        # Find the index of the first non-whitespace character after a string of
        # whitespace that includes at least one tab
        match = FIRST_NON_WHITESPACE_RE.match(line)
        if match:
            first_non_whitespace_idx = match.start(1)

            lines.append(
                line[:first_non_whitespace_idx].expandtabs()
                + line[first_non_whitespace_idx:]
            )
        else:
            lines.append(line)
    return lines


def fix_docstring(docstring: str, prefix: str) -> str:
    # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
    if not docstring:
        return ""
    lines = lines_with_leading_tabs_expanded(docstring)
    # Determine minimum indentation (first line doesn't count):
    indent = sys.maxsize
    for line in lines[1:]:
        stripped = line.lstrip()
        if stripped:
            indent = min(indent, len(line) - len(stripped))
    # Remove indentation (first line is special):
    trimmed = [lines[0].strip()]
    if indent < sys.maxsize:
        last_line_idx = len(lines) - 2
        for i, line in enumerate(lines[1:]):
            stripped_line = line[indent:].rstrip()
            if stripped_line or i == last_line_idx:
                trimmed.append(prefix + stripped_line)
            else:
                trimmed.append("")
    return "\n".join(trimmed)


def get_string_prefix(string: str) -> str:
    """
    Pre-conditions:
        * assert_is_leaf_string(@string)

    Returns:
        @string's prefix (e.g. '', 'r', 'f', or 'rf').
    """
    assert_is_leaf_string(string)

    prefix = ""
    prefix_idx = 0
    while string[prefix_idx] in STRING_PREFIX_CHARS:
        prefix += string[prefix_idx]
        prefix_idx += 1

    return prefix


def assert_is_leaf_string(string: str) -> None:
    """
    Checks the pre-condition that @string has the format that you would expect
    of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
    token.STRING`. A more precise description of the pre-conditions that are
    checked are listed below.

    Pre-conditions:
        * @string starts with either ', ", <prefix>', or <prefix>" where
        `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
        * @string ends with a quote character (' or ").

    Raises:
        AssertionError(...) if the pre-conditions listed above are not
        satisfied.
    """
    dquote_idx = string.find('"')
    squote_idx = string.find("'")
    if -1 in [dquote_idx, squote_idx]:
        quote_idx = max(dquote_idx, squote_idx)
    else:
        quote_idx = min(squote_idx, dquote_idx)

    assert (
        0 <= quote_idx < len(string) - 1
    ), f"{string!r} is missing a starting quote character (' or \")."
    assert string[-1] in (
        "'",
        '"',
    ), f"{string!r} is missing an ending quote character (' or \")."
    assert set(string[:quote_idx]).issubset(
        set(STRING_PREFIX_CHARS)
    ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."


def normalize_string_prefix(s: str) -> str:
    """Make all string prefixes lowercase."""
    match = STRING_PREFIX_RE.match(s)
    assert match is not None, f"failed to match string {s!r}"
    orig_prefix = match.group(1)
    new_prefix = (
        orig_prefix.replace("F", "f")
        .replace("B", "b")
        .replace("U", "")
        .replace("u", "")
    )

    # Python syntax guarantees max 2 prefixes and that one of them is "r"
    if len(new_prefix) == 2 and "r" != new_prefix[0].lower():
        new_prefix = new_prefix[::-1]
    return f"{new_prefix}{match.group(2)}"


# Re(gex) does actually cache patterns internally but this still improves
# performance on a long list literal of strings by 5-9% since lru_cache's
# caching overhead is much lower.
@lru_cache(maxsize=64)
def _cached_compile(pattern: str) -> Pattern[str]:
    return re.compile(pattern)


def normalize_string_quotes(s: str) -> str:
    """Prefer double quotes but only if it doesn't cause more escaping.

    Adds or removes backslashes as appropriate. Doesn't parse and fix
    strings nested in f-strings.
    """
    value = s.lstrip(STRING_PREFIX_CHARS)
    if value[:3] == '"""':
        return s

    elif value[:3] == "'''":
        orig_quote = "'''"
        new_quote = '"""'
    elif value[0] == '"':
        orig_quote = '"'
        new_quote = "'"
    else:
        orig_quote = "'"
        new_quote = '"'
    first_quote_pos = s.find(orig_quote)
    if first_quote_pos == -1:
        return s  # There's an internal error

    prefix = s[:first_quote_pos]
    unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
    escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
    escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")
    body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
    if "r" in prefix.casefold():
        if unescaped_new_quote.search(body):
            # There's at least one unescaped new_quote in this raw string
            # so converting is impossible
            return s

        # Do not introduce or remove backslashes in raw strings
        new_body = body
    else:
        # remove unnecessary escapes
        new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
        if body != new_body:
            # Consider the string without unnecessary escapes as the original
            body = new_body
            s = f"{prefix}{orig_quote}{body}{orig_quote}"
        new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
        new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
    if "f" in prefix.casefold():
        matches = re.findall(
            r"""
            (?:(?<!\{)|^)\{  # start of the string or a non-{ followed by a single {
                ([^{].*?)  # contents of the brackets except if begins with {{
            \}(?:(?!\})|$)  # A } followed by end of the string or a non-}
            """,
            new_body,
            re.VERBOSE,
        )
        for m in matches:
            if "\\" in str(m):
                # Do not introduce backslashes in interpolated expressions
                return s

    if new_quote == '"""' and new_body[-1:] == '"':
        # edge case:
        new_body = new_body[:-1] + '\\"'
    orig_escape_count = body.count("\\")
    new_escape_count = new_body.count("\\")
    if new_escape_count > orig_escape_count:
        return s  # Do not introduce more escaping

    if new_escape_count == orig_escape_count and orig_quote == '"':
        return s  # Prefer double quotes

    return f"{prefix}{new_quote}{new_body}{new_quote}"


def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
    """Replace hex codes in Unicode escape sequences with lowercase representation."""
    text = leaf.value
    prefix = get_string_prefix(text)
    if "r" in prefix.lower():
        return

    def replace(m: Match[str]) -> str:
        groups = m.groupdict()
        back_slashes = groups["backslashes"]

        if len(back_slashes) % 2 == 0:
            return back_slashes + groups["body"]

        if groups["u"]:
            # \u
            return back_slashes + "u" + groups["u"].lower()
        elif groups["U"]:
            # \U
            return back_slashes + "U" + groups["U"].lower()
        elif groups["x"]:
            # \x
            return back_slashes + "x" + groups["x"].lower()
        else:
            assert groups["N"], f"Unexpected match: {m}"
            # \N{}
            return back_slashes + "N{" + groups["N"].upper() + "}"

    leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)


@lru_cache(maxsize=4096)
def char_width(char: str) -> int:
    """Return the width of a single character as it would be displayed in a
    terminal or editor (which respects Unicode East Asian Width).

    Full width characters are counted as 2, while half width characters are
    counted as 1.  Also control characters are counted as 0.
    """
    table = WIDTH_TABLE
    codepoint = ord(char)
    highest = len(table) - 1
    lowest = 0
    idx = highest // 2
    while True:
        start_codepoint, end_codepoint, width = table[idx]
        if codepoint < start_codepoint:
            highest = idx - 1
        elif codepoint > end_codepoint:
            lowest = idx + 1
        else:
            return 0 if width < 0 else width
        if highest < lowest:
            break
        idx = (highest + lowest) // 2
    return 1


def str_width(line_str: str) -> int:
    """Return the width of `line_str` as it would be displayed in a terminal
    or editor (which respects Unicode East Asian Width).

    You could utilize this function to determine, for example, if a string
    is too wide to display in a terminal or editor.
    """
    if line_str.isascii():
        # Fast path for a line consisting of only ASCII characters
        return len(line_str)
    return sum(map(char_width, line_str))


def count_chars_in_width(line_str: str, max_width: int) -> int:
    """Count the number of characters in `line_str` that would fit in a
    terminal or editor of `max_width` (which respects Unicode East Asian
    Width).
    """
    total_width = 0
    for i, char in enumerate(line_str):
        width = char_width(char)
        if width + total_width > max_width:
            return i
        total_width += width
    return len(line_str)
Commit	Line	Data
53e6db90 DC	1	"""
	2	Simple formatting on strings. Further string formatting code is in trans.py.
	3	"""
	4
	5	import re
	6	import sys
	7	from functools import lru_cache
	8	from typing import Final, List, Match, Pattern
	9
	10	from black._width_table import WIDTH_TABLE
	11	from blib2to3.pytree import Leaf
	12
	13	STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.
	14	STRING_PREFIX_RE: Final = re.compile(
	15	r"^([" + STRING_PREFIX_CHARS + r"])(.)$", re.DOTALL
	16	)
	17	FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s\t+\s(\S)")
	18	UNICODE_ESCAPE_RE: Final = re.compile(
	19	r"(?P<backslashes>\\+)(?P<body>"
	20	r"(u(?P<u>[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx
	21	r"\|(U(?P<U>[a-fA-F0-9]{8}))" # Character with 32-bit hex value xxxxxxxx
	22	r"\|(x(?P<x>[a-fA-F0-9]{2}))" # Character with hex value hh
	23	r"\|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})" # Character named name in the Unicode database
	24	r")",
	25	re.VERBOSE,
	26	)
	27
	28
	29	def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
	30	"""Replace `regex` with `replacement` twice on `original`.
	31
	32	This is used by string normalization to perform replaces on
	33	overlapping matches.
	34	"""
	35	return regex.sub(replacement, regex.sub(replacement, original))
	36
	37
	38	def has_triple_quotes(string: str) -> bool:
	39	"""
	40	Returns:
	41	True iff @string starts with three quotation characters.
	42	"""
	43	raw_string = string.lstrip(STRING_PREFIX_CHARS)
	44	return raw_string[:3] in {'"""', "'''"}
	45
	46
	47	def lines_with_leading_tabs_expanded(s: str) -> List[str]:
	48	"""
	49	Splits string into lines and expands only leading tabs (following the normal
	50	Python rules)
	51	"""
	52	lines = []
	53	for line in s.splitlines():
	54	# Find the index of the first non-whitespace character after a string of
	55	# whitespace that includes at least one tab
	56	match = FIRST_NON_WHITESPACE_RE.match(line)
	57	if match:
	58	first_non_whitespace_idx = match.start(1)
	59
	60	lines.append(
	61	line[:first_non_whitespace_idx].expandtabs()
	62	+ line[first_non_whitespace_idx:]
	63	)
	64	else:
65	lines.append(line)
66	return lines
67
68
69	def fix_docstring(docstring: str, prefix: str) -> str:
70	# https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
71	if not docstring:
72	return ""
73	lines = lines_with_leading_tabs_expanded(docstring)
74	# Determine minimum indentation (first line doesn't count):
75	indent = sys.maxsize
76	for line in lines[1:]:
77	stripped = line.lstrip()
78	if stripped:
79	indent = min(indent, len(line) - len(stripped))
80	# Remove indentation (first line is special):
81	trimmed = [lines[0].strip()]
82	if indent < sys.maxsize:
83	last_line_idx = len(lines) - 2
84	for i, line in enumerate(lines[1:]):
85	stripped_line = line[indent:].rstrip()
86	if stripped_line or i == last_line_idx:
87	trimmed.append(prefix + stripped_line)
88	else:
89	trimmed.append("")
90	return "\n".join(trimmed)
91
92
93	def get_string_prefix(string: str) -> str:
94	"""
95	Pre-conditions:
96	* assert_is_leaf_string(@string)
97
98	Returns:
99	@string's prefix (e.g. '', 'r', 'f', or 'rf').
100	"""
101	assert_is_leaf_string(string)
102
103	prefix = ""
104	prefix_idx = 0
105	while string[prefix_idx] in STRING_PREFIX_CHARS:
106	prefix += string[prefix_idx]
107	prefix_idx += 1
108
109	return prefix
110
111
112	def assert_is_leaf_string(string: str) -> None:
113	"""
114	Checks the pre-condition that @string has the format that you would expect
115	of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==
116	token.STRING`. A more precise description of the pre-conditions that are
117	checked are listed below.
118
119	Pre-conditions:
120	* @string starts with either ', ", <prefix>', or <prefix>" where
121	`set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.
122	* @string ends with a quote character (' or ").
123
124	Raises:
125	AssertionError(...) if the pre-conditions listed above are not
126	satisfied.
127	"""
128	dquote_idx = string.find('"')
129	squote_idx = string.find("'")
130	if -1 in [dquote_idx, squote_idx]:
131	quote_idx = max(dquote_idx, squote_idx)
132	else:
133	quote_idx = min(squote_idx, dquote_idx)
134
135	assert (
136	0 <= quote_idx < len(string) - 1
137	), f"{string!r} is missing a starting quote character (' or \")."
138	assert string[-1] in (
139	"'",
140	'"',
141	), f"{string!r} is missing an ending quote character (' or \")."
142	assert set(string[:quote_idx]).issubset(
143	set(STRING_PREFIX_CHARS)
144	), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."
145
146
147	def normalize_string_prefix(s: str) -> str:
148	"""Make all string prefixes lowercase."""
149	match = STRING_PREFIX_RE.match(s)
150	assert match is not None, f"failed to match string {s!r}"
151	orig_prefix = match.group(1)
152	new_prefix = (
153	orig_prefix.replace("F", "f")
154	.replace("B", "b")
155	.replace("U", "")
156	.replace("u", "")
157	)
158
159	# Python syntax guarantees max 2 prefixes and that one of them is "r"
160	if len(new_prefix) == 2 and "r" != new_prefix[0].lower():
161	new_prefix = new_prefix[::-1]
162	return f"{new_prefix}{match.group(2)}"
163
164
165	# Re(gex) does actually cache patterns internally but this still improves
166	# performance on a long list literal of strings by 5-9% since lru_cache's
167	# caching overhead is much lower.
168	@lru_cache(maxsize=64)
169	def _cached_compile(pattern: str) -> Pattern[str]:
170	return re.compile(pattern)
171
172
173	def normalize_string_quotes(s: str) -> str:
174	"""Prefer double quotes but only if it doesn't cause more escaping.
175
176	Adds or removes backslashes as appropriate. Doesn't parse and fix
177	strings nested in f-strings.
178	"""
179	value = s.lstrip(STRING_PREFIX_CHARS)
180	if value[:3] == '"""':
181	return s
182
183	elif value[:3] == "'''":
184	orig_quote = "'''"
185	new_quote = '"""'
186	elif value[0] == '"':
187	orig_quote = '"'
188	new_quote = "'"
189	else:
190	orig_quote = "'"
191	new_quote = '"'
192	first_quote_pos = s.find(orig_quote)
193	if first_quote_pos == -1:
194	return s # There's an internal error
195
196	prefix = s[:first_quote_pos]
197	unescaped_new_quote = _cached_compile(rf"(([^\\]\|^)(\\\\)*){new_quote}")
198	escaped_new_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){new_quote}")
199	escaped_orig_quote = _cached_compile(rf"([^\\]\|^)\\((?:\\\\)*){orig_quote}")
200	body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]
201	if "r" in prefix.casefold():
202	if unescaped_new_quote.search(body):
203	# There's at least one unescaped new_quote in this raw string
204	# so converting is impossible
205	return s
206
207	# Do not introduce or remove backslashes in raw strings
208	new_body = body
209	else:
210	# remove unnecessary escapes
211	new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
212	if body != new_body:
213	# Consider the string without unnecessary escapes as the original
214	body = new_body
215	s = f"{prefix}{orig_quote}{body}{orig_quote}"
216	new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
217	new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
218	if "f" in prefix.casefold():
219	matches = re.findall(
220	r"""
221	(?:(?<!\{)\|^)\{ # start of the string or a non-{ followed by a single {
222	([^{].*?) # contents of the brackets except if begins with {{
223	\}(?:(?!\})\|$) # A } followed by end of the string or a non-}
224	""",
225	new_body,
226	re.VERBOSE,
227	)
228	for m in matches:
229	if "\\" in str(m):
230	# Do not introduce backslashes in interpolated expressions
231	return s
232
233	if new_quote == '"""' and new_body[-1:] == '"':
234	# edge case:
235	new_body = new_body[:-1] + '\\"'
236	orig_escape_count = body.count("\\")
237	new_escape_count = new_body.count("\\")
238	if new_escape_count > orig_escape_count:
239	return s # Do not introduce more escaping
240
241	if new_escape_count == orig_escape_count and orig_quote == '"':
242	return s # Prefer double quotes
243
244	return f"{prefix}{new_quote}{new_body}{new_quote}"
245
246
247	def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
248	"""Replace hex codes in Unicode escape sequences with lowercase representation."""
249	text = leaf.value
250	prefix = get_string_prefix(text)
251	if "r" in prefix.lower():
252	return
253
254	def replace(m: Match[str]) -> str:
255	groups = m.groupdict()
256	back_slashes = groups["backslashes"]
257
258	if len(back_slashes) % 2 == 0:
259	return back_slashes + groups["body"]
260
261	if groups["u"]:
262	# \u
263	return back_slashes + "u" + groups["u"].lower()
264	elif groups["U"]:
265	# \U
266	return back_slashes + "U" + groups["U"].lower()
267	elif groups["x"]:
268	# \x
269	return back_slashes + "x" + groups["x"].lower()
270	else:
271	assert groups["N"], f"Unexpected match: {m}"
272	# \N{}
273	return back_slashes + "N{" + groups["N"].upper() + "}"
274
275	leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)
276
277
278	@lru_cache(maxsize=4096)
279	def char_width(char: str) -> int:
280	"""Return the width of a single character as it would be displayed in a
281	terminal or editor (which respects Unicode East Asian Width).
282
283	Full width characters are counted as 2, while half width characters are
284	counted as 1. Also control characters are counted as 0.
285	"""
286	table = WIDTH_TABLE
287	codepoint = ord(char)
288	highest = len(table) - 1
289	lowest = 0
290	idx = highest // 2
291	while True:
292	start_codepoint, end_codepoint, width = table[idx]
293	if codepoint < start_codepoint:
294	highest = idx - 1
295	elif codepoint > end_codepoint:
296	lowest = idx + 1
297	else:
298	return 0 if width < 0 else width
299	if highest < lowest:
300	break
301	idx = (highest + lowest) // 2
302	return 1
303
304
305	def str_width(line_str: str) -> int:
306	"""Return the width of `line_str` as it would be displayed in a terminal
307	or editor (which respects Unicode East Asian Width).
308
309	You could utilize this function to determine, for example, if a string
310	is too wide to display in a terminal or editor.
311	"""
312	if line_str.isascii():
313	# Fast path for a line consisting of only ASCII characters
314	return len(line_str)
315	return sum(map(char_width, line_str))
316
317
318	def count_chars_in_width(line_str: str, max_width: int) -> int:
319	"""Count the number of characters in `line_str` that would fit in a
320	terminal or editor of `max_width` (which respects Unicode East Asian
321	Width).
322	"""
323	total_width = 0
324	for i, char in enumerate(line_str):
325	width = char_width(char)
326	if width + total_width > max_width:
327	return i
328	total_width += width
329	return len(line_str)