crepu.dev Git - config.git/blame_incremental - djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/utils.py

... / ...

Commit	Line	Data
	1	import re
	2	import sys
	3	from ast import literal_eval
	4	from functools import total_ordering
	5	from typing import NamedTuple, Sequence, Union
	6
	7	# The following is a list in Python that are line breaks in str.splitlines, but
	8	# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
	9	# 0xA) are allowed to split lines.
	10	_NON_LINE_BREAKS = (
	11	'\v', # Vertical Tabulation 0xB
	12	'\f', # Form Feed 0xC
	13	'\x1C', # File Separator
	14	'\x1D', # Group Separator
	15	'\x1E', # Record Separator
	16	'\x85', # Next Line (NEL - Equivalent to CR+LF.
	17	# Used to mark end-of-line on some IBM mainframes.)
	18	'\u2028', # Line Separator
	19	'\u2029', # Paragraph Separator
	20	)
	21
	22
	23	class Version(NamedTuple):
	24	major: int
	25	minor: int
	26	micro: int
	27
	28
	29	def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
	30	r"""
	31	Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
	32	looks at form feeds and other special characters as normal text. Just
	33	splits ``\n`` and ``\r\n``.
	34	Also different: Returns ``[""]`` for an empty string input.
	35
	36	In Python 2.7 form feeds are used as normal characters when using
	37	str.splitlines. However in Python 3 somewhere there was a decision to split
	38	also on form feeds.
	39	"""
	40	if keepends:
	41	lst = string.splitlines(True)
	42
	43	# We have to merge lines that were broken by form feed characters.
	44	merge = []
	45	for i, line in enumerate(lst):
	46	try:
	47	last_chr = line[-1]
	48	except IndexError:
	49	pass
	50	else:
	51	if last_chr in _NON_LINE_BREAKS:
	52	merge.append(i)
	53
	54	for index in reversed(merge):
	55	try:
	56	lst[index] = lst[index] + lst[index + 1]
	57	del lst[index + 1]
	58	except IndexError:
	59	# index + 1 can be empty and therefore there's no need to
	60	# merge.
	61	pass
	62
	63	# The stdlib's implementation of the end is inconsistent when calling
	64	# it with/without keepends. One time there's an empty string in the
	65	# end, one time there's none.
	66	if string.endswith('\n') or string.endswith('\r') or string == '':
	67	lst.append('')
	68	return lst
	69	else:
	70	return re.split(r'\n\|\r\n\|\r', string)
	71
	72
	73	def python_bytes_to_unicode(
	74	source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict'
	75	) -> str:
	76	"""
	77	Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
	78	unicode object like in :py:meth:`bytes.decode`.
	79
	80	:param encoding: See :py:meth:`bytes.decode` documentation.
	81	:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
	82	``'strict'``, ``'replace'`` or ``'ignore'``.
	83	"""
	84	def detect_encoding():
	85	"""
	86	For the implementation of encoding definitions in Python, look at:
	87	- http://www.python.org/dev/peps/pep-0263/
	88	- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
	89	"""
	90	byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
	91	if source.startswith(byte_mark):
	92	# UTF-8 byte-order mark
	93	return 'utf-8'
	94
	95	first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n\|\r\|\n)){0,2}', source).group(0)
	96	possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
	97	first_two_lines)
	98	if possible_encoding:
	99	e = possible_encoding.group(1)
	100	if not isinstance(e, str):
	101	e = str(e, 'ascii', 'replace')
	102	return e
	103	else:
	104	# the default if nothing else has been set -> PEP 263
	105	return encoding
	106
	107	if isinstance(source, str):
	108	# only cast str/bytes
	109	return source
	110
	111	encoding = detect_encoding()
	112	try:
	113	# Cast to unicode
	114	return str(source, encoding, errors)
	115	except LookupError:
	116	if errors == 'replace':
	117	# This is a weird case that can happen if the given encoding is not
	118	# a valid encoding. This usually shouldn't happen with provided
	119	# encodings, but can happen if somebody uses encoding declarations
	120	# like `# coding: foo-8`.
	121	return str(source, 'utf-8', errors)
	122	raise
	123
	124
	125	def version_info() -> Version:
	126	"""
	127	Returns a namedtuple of parso's version, similar to Python's
	128	``sys.version_info``.
	129	"""
	130	from parso import __version__
	131	tupl = re.findall(r'[a-z]+\|\d+', __version__)
	132	return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
	133
	134
	135	class _PythonVersionInfo(NamedTuple):
	136	major: int
	137	minor: int
	138
	139
	140	@total_ordering
	141	class PythonVersionInfo(_PythonVersionInfo):
	142	def __gt__(self, other):
	143	if isinstance(other, tuple):
	144	if len(other) != 2:
	145	raise ValueError("Can only compare to tuples of length 2.")
	146	return (self.major, self.minor) > other
	147	super().__gt__(other)
	148
	149	return (self.major, self.minor)
	150
	151	def __eq__(self, other):
	152	if isinstance(other, tuple):
	153	if len(other) != 2:
	154	raise ValueError("Can only compare to tuples of length 2.")
	155	return (self.major, self.minor) == other
	156	super().__eq__(other)
	157
	158	def __ne__(self, other):
	159	return not self.__eq__(other)
	160
	161
	162	def _parse_version(version) -> PythonVersionInfo:
	163	match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a\|b\|rc)\d)?$', version)
	164	if match is None:
	165	raise ValueError('The given version is not in the right format. '
	166	'Use something like "3.8" or "3".')
	167
	168	major = int(match.group(1))
	169	minor = match.group(2)
	170	if minor is None:
	171	# Use the latest Python in case it's not exactly defined, because the
	172	# grammars are typically backwards compatible?
	173	if major == 2:
	174	minor = "7"
	175	elif major == 3:
	176	minor = "6"
	177	else:
	178	raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
	179	minor = int(minor)
	180	return PythonVersionInfo(major, minor)
	181
	182
	183	def parse_version_string(version: str = None) -> PythonVersionInfo:
	184	"""
	185	Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and
	186	returns a corresponding version info that is always two characters long in
	187	decimal.
	188	"""
	189	if version is None:
	190	version = '%s.%s' % sys.version_info[:2]
	191	if not isinstance(version, str):
	192	raise TypeError('version must be a string like "3.8"')
	193
	194	return _parse_version(version)