crepu.dev Git - config.git/blob - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/black/strings.py

2 Simple formatting on strings. Further string formatting code is in trans.py.

5 import re

6 import sys

7 from functools import lru_cache

 from typing import Final, List, Match, Pattern

10 from black._width_table import WIDTH_TABLE

11 from blib2to3.pytree import Leaf

13 STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters.

 STRING_PREFIX_RE: Final = re.compile(

     r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL

16 )

 FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)")

 UNICODE_ESCAPE_RE: Final = re.compile(

19 r"(?P<backslashes>\\+)(?P<body>"

20 r"(u(?P<u>[a-fA-F0-9]{4}))" # Character with 16-bit hex value xxxx

21 r"|(U(?P<U>[a-fA-F0-9]{8}))" # Character with 32-bit hex value xxxxxxxx

22 r"|(x(?P<x>[a-fA-F0-9]{2}))" # Character with hex value hh

23 r"|(N\{(?P<N>[a-zA-Z0-9 \-]{2,})\})" # Character named name in the Unicode database

24 r")",

25 re.VERBOSE,

26 )

 def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:

30 """Replace `regex` with `replacement` twice on `original`.

32 This is used by string normalization to perform replaces on

33 overlapping matches.

34 """

     return regex.sub(replacement, regex.sub(replacement, original))

 def has_triple_quotes(string: str) -> bool:

39 """

40 Returns:

41 True iff @string starts with three quotation characters.

42 """

     raw_string = string.lstrip(STRING_PREFIX_CHARS)

     return raw_string[:3] in {'"""', "'''"}

 def lines_with_leading_tabs_expanded(s: str) -> List[str]:

48 """

49 Splits string into lines and expands only leading tabs (following the normal

50 Python rules)

51 """

52 lines = []

     for line in s.splitlines():

54 # Find the index of the first non-whitespace character after a string of

55 # whitespace that includes at least one tab

         match = FIRST_NON_WHITESPACE_RE.match(line)

57 if match:

             first_non_whitespace_idx = match.start(1)

60 lines.append(

61 line[:first_non_whitespace_idx].expandtabs()

62 + line[first_non_whitespace_idx:]

63 )

64 else:

65 lines.append(line)

66 return lines

 def fix_docstring(docstring: str, prefix: str) -> str:

70 # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation

71 if not docstring:

72 return ""

73 lines = lines_with_leading_tabs_expanded(docstring)

74 # Determine minimum indentation (first line doesn't count):

75 indent = sys.maxsize

     for line in lines[1:]:

77 stripped = line.lstrip()

78 if stripped:

             indent = min(indent, len(line) - len(stripped))

80 # Remove indentation (first line is special):

     trimmed = [lines[0].strip()]

82 if indent < sys.maxsize:

         last_line_idx = len(lines) - 2

         for i, line in enumerate(lines[1:]):

             stripped_line = line[indent:].rstrip()

86 if stripped_line or i == last_line_idx:

                 trimmed.append(prefix + stripped_line)

88 else:

                 trimmed.append("")

     return "\n".join(trimmed)

 def get_string_prefix(string: str) -> str:

94 """

95 Pre-conditions:

96 * assert_is_leaf_string(@string)

98 Returns:

99 @string's prefix (e.g. '', 'r', 'f', or 'rf').

100 """

101 assert_is_leaf_string(string)

102

103 prefix = ""

104 prefix_idx = 0

     while string[prefix_idx] in STRING_PREFIX_CHARS:

106 prefix += string[prefix_idx]

107 prefix_idx += 1

108

109 return prefix

110

111

 def assert_is_leaf_string(string: str) -> None:

113 """

114 Checks the pre-condition that @string has the format that you would expect

115 of `leaf.value` where `leaf` is some Leaf such that `leaf.type ==

116 token.STRING`. A more precise description of the pre-conditions that are

117 checked are listed below.

118

119 Pre-conditions:

120 * @string starts with either ', ", <prefix>', or <prefix>" where

121 `set(<prefix>)` is some subset of `set(STRING_PREFIX_CHARS)`.

122 * @string ends with a quote character (' or ").

123

124 Raises:

125 AssertionError(...) if the pre-conditions listed above are not

126 satisfied.

127 """

     dquote_idx = string.find('"')

     squote_idx = string.find("'")

     if -1 in [dquote_idx, squote_idx]:

         quote_idx = max(dquote_idx, squote_idx)

132 else:

         quote_idx = min(squote_idx, dquote_idx)

134

135 assert (

         0 <= quote_idx < len(string) - 1

137 ), f"{string!r} is missing a starting quote character (' or \")."

     assert string[-1] in (

139 "'",

140 '"',

141 ), f"{string!r} is missing an ending quote character (' or \")."

     assert set(string[:quote_idx]).issubset(

143 set(STRING_PREFIX_CHARS)

144 ), f"{set(string[:quote_idx])} is NOT a subset of {set(STRING_PREFIX_CHARS)}."

145

146

 def normalize_string_prefix(s: str) -> str:

148 """Make all string prefixes lowercase."""

     match = STRING_PREFIX_RE.match(s)

150 assert match is not None, f"failed to match string {s!r}"

     orig_prefix = match.group(1)

152 new_prefix = (

         orig_prefix.replace("F", "f")

         .replace("B", "b")

         .replace("U", "")

         .replace("u", "")

157 )

158

159 # Python syntax guarantees max 2 prefixes and that one of them is "r"

     if len(new_prefix) == 2 and "r" != new_prefix[0].lower():

161 new_prefix = new_prefix[::-1]

162 return f"{new_prefix}{match.group(2)}"

163

164

165 # Re(gex) does actually cache patterns internally but this still improves

166 # performance on a long list literal of strings by 5-9% since lru_cache's

167 # caching overhead is much lower.

 @lru_cache(maxsize=64)

 def _cached_compile(pattern: str) -> Pattern[str]:

     return re.compile(pattern)

171

172

 def normalize_string_quotes(s: str) -> str:

174 """Prefer double quotes but only if it doesn't cause more escaping.

175

176 Adds or removes backslashes as appropriate. Doesn't parse and fix

177 strings nested in f-strings.

178 """

     value = s.lstrip(STRING_PREFIX_CHARS)

     if value[:3] == '"""':

181 return s

182

     elif value[:3] == "'''":

184 orig_quote = "'''"

185 new_quote = '"""'

     elif value[0] == '"':

187 orig_quote = '"'

188 new_quote = "'"

189 else:

190 orig_quote = "'"

191 new_quote = '"'

     first_quote_pos = s.find(orig_quote)

193 if first_quote_pos == -1:

194 return s # There's an internal error

195

196 prefix = s[:first_quote_pos]

     unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")

     escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")

     escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}")

     body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)]

     if "r" in prefix.casefold():

         if unescaped_new_quote.search(body):

203 # There's at least one unescaped new_quote in this raw string

204 # so converting is impossible

205 return s

206

207 # Do not introduce or remove backslashes in raw strings

208 new_body = body

209 else:

210 # remove unnecessary escapes

         new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)

212 if body != new_body:

213 # Consider the string without unnecessary escapes as the original

214 body = new_body

215 s = f"{prefix}{orig_quote}{body}{orig_quote}"

         new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)

         new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)

     if "f" in prefix.casefold():

219 matches = re.findall(

220 r"""

221 (?:(?<!\{)|^)\{ # start of the string or a non-{ followed by a single {

222 ([^{].*?) # contents of the brackets except if begins with {{

223 \}(?:(?!\})|$) # A } followed by end of the string or a non-}

224 """,

225 new_body,

226 re.VERBOSE,

227 )

228 for m in matches:

             if "\\" in str(m):

230 # Do not introduce backslashes in interpolated expressions

231 return s

232

     if new_quote == '"""' and new_body[-1:] == '"':

234 # edge case:

         new_body = new_body[:-1] + '\\"'

     orig_escape_count = body.count("\\")

     new_escape_count = new_body.count("\\")

238 if new_escape_count > orig_escape_count:

239 return s # Do not introduce more escaping

240

     if new_escape_count == orig_escape_count and orig_quote == '"':

242 return s # Prefer double quotes

243

244 return f"{prefix}{new_quote}{new_body}{new_quote}"

245

246

 def normalize_unicode_escape_sequences(leaf: Leaf) -> None:

248 """Replace hex codes in Unicode escape sequences with lowercase representation."""

249 text = leaf.value

250 prefix = get_string_prefix(text)

     if "r" in prefix.lower():

252 return

253

     def replace(m: Match[str]) -> str:

255 groups = m.groupdict()

256 back_slashes = groups["backslashes"]

257

         if len(back_slashes) % 2 == 0:

             return back_slashes + groups["body"]

260

261 if groups["u"]:

262 # \u

             return back_slashes + "u" + groups["u"].lower()

264 elif groups["U"]:

265 # \U

             return back_slashes + "U" + groups["U"].lower()

267 elif groups["x"]:

268 # \x

             return back_slashes + "x" + groups["x"].lower()

270 else:

             assert groups["N"], f"Unexpected match: {m}"

272 # \N{}

             return back_slashes + "N{" + groups["N"].upper() + "}"

274

     leaf.value = re.sub(UNICODE_ESCAPE_RE, replace, text)

276

277

 @lru_cache(maxsize=4096)

 def char_width(char: str) -> int:

280 """Return the width of a single character as it would be displayed in a

281 terminal or editor (which respects Unicode East Asian Width).

282

283 Full width characters are counted as 2, while half width characters are

284 counted as 1. Also control characters are counted as 0.

285 """

286 table = WIDTH_TABLE

287 codepoint = ord(char)

     highest = len(table) - 1

289 lowest = 0

290 idx = highest // 2

291 while True:

         start_codepoint, end_codepoint, width = table[idx]

293 if codepoint < start_codepoint:

294 highest = idx - 1

295 elif codepoint > end_codepoint:

296 lowest = idx + 1

297 else:

             return 0 if width < 0 else width

299 if highest < lowest:

300 break

301 idx = (highest + lowest) // 2

302 return 1

303

304

 def str_width(line_str: str) -> int:

306 """Return the width of `line_str` as it would be displayed in a terminal

307 or editor (which respects Unicode East Asian Width).

308

309 You could utilize this function to determine, for example, if a string

310 is too wide to display in a terminal or editor.

311 """

312 if line_str.isascii():

313 # Fast path for a line consisting of only ASCII characters

314 return len(line_str)

     return sum(map(char_width, line_str))

316

317

 def count_chars_in_width(line_str: str, max_width: int) -> int:

319 """Count the number of characters in `line_str` that would fit in a

320 terminal or editor of `max_width` (which respects Unicode East Asian

321 Width).

322 """

323 total_width = 0

     for i, char in enumerate(line_str):

325 width = char_width(char)

326 if width + total_width > max_width:

327 return i

328 total_width += width

329 return len(line_str)