crepu.dev Git - config.git/blob - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/parso/utils.py

1 import re

2 import sys

3 from ast import literal_eval

4 from functools import total_ordering

5 from typing import NamedTuple, Sequence, Union

7 # The following is a list in Python that are line breaks in str.splitlines, but

8 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,

9 # 0xA) are allowed to split lines.

10 _NON_LINE_BREAKS = (

     '\v',  # Vertical Tabulation 0xB

     '\f',  # Form Feed 0xC

     '\x1C',  # File Separator

     '\x1D',  # Group Separator

     '\x1E',  # Record Separator

     '\x85',  # Next Line (NEL - Equivalent to CR+LF.

17 # Used to mark end-of-line on some IBM mainframes.)

18 '\u2028', # Line Separator

19 '\u2029', # Paragraph Separator

20 )

23 class Version(NamedTuple):

24 major: int

25 minor: int

26 micro: int

 def split_lines(string: str, keepends: bool = False) -> Sequence[str]:

30 r"""

31 Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,

32 looks at form feeds and other special characters as normal text. Just

33 splits ``\n`` and ``\r\n``.

34 Also different: Returns ``[""]`` for an empty string input.

36 In Python 2.7 form feeds are used as normal characters when using

37 str.splitlines. However in Python 3 somewhere there was a decision to split

38 also on form feeds.

39 """

40 if keepends:

         lst = string.splitlines(True)

43 # We have to merge lines that were broken by form feed characters.

44 merge = []

         for i, line in enumerate(lst):

46 try:

47 last_chr = line[-1]

48 except IndexError:

49 pass

50 else:

51 if last_chr in _NON_LINE_BREAKS:

52 merge.append(i)

         for index in reversed(merge):

55 try:

                 lst[index] = lst[index] + lst[index + 1]

                 del lst[index + 1]

58 except IndexError:

59 # index + 1 can be empty and therefore there's no need to

60 # merge.

61 pass

63 # The stdlib's implementation of the end is inconsistent when calling

64 # it with/without keepends. One time there's an empty string in the

65 # end, one time there's none.

         if string.endswith('\n') or string.endswith('\r') or string == '':

             lst.append('')

68 return lst

69 else:

         return re.split(r'\n|\r\n|\r', string)

73 def python_bytes_to_unicode(

     source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict'

75 ) -> str:

76 """

77 Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a

78 unicode object like in :py:meth:`bytes.decode`.

80 :param encoding: See :py:meth:`bytes.decode` documentation.

81 :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be

82 ``'strict'``, ``'replace'`` or ``'ignore'``.

83 """

84 def detect_encoding():

85 """

86 For the implementation of encoding definitions in Python, look at:

87 - http://www.python.org/dev/peps/pep-0263/

88 - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations

89 """

         byte_mark = literal_eval(r"b'\xef\xbb\xbf'")

         if source.startswith(byte_mark):

92 # UTF-8 byte-order mark

93 return 'utf-8'

         first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0)

         possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",

97 first_two_lines)

98 if possible_encoding:

             e = possible_encoding.group(1)

             if not isinstance(e, str):

                 e = str(e, 'ascii', 'replace')

102 return e

103 else:

104 # the default if nothing else has been set -> PEP 263

105 return encoding

106

     if isinstance(source, str):

108 # only cast str/bytes

109 return source

110

111 encoding = detect_encoding()

112 try:

113 # Cast to unicode

         return str(source, encoding, errors)

115 except LookupError:

116 if errors == 'replace':

117 # This is a weird case that can happen if the given encoding is not

118 # a valid encoding. This usually shouldn't happen with provided

119 # encodings, but can happen if somebody uses encoding declarations

120 # like `# coding: foo-8`.

             return str(source, 'utf-8', errors)

122 raise

123

124

125 def version_info() -> Version:

126 """

127 Returns a namedtuple of parso's version, similar to Python's

128 ``sys.version_info``.

129 """

130 from parso import __version__

     tupl = re.findall(r'[a-z]+|\d+', __version__)

     return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])

133

134

135 class _PythonVersionInfo(NamedTuple):

136 major: int

137 minor: int

138

139

140 @total_ordering

141 class PythonVersionInfo(_PythonVersionInfo):

     def __gt__(self, other):

         if isinstance(other, tuple):

             if len(other) != 2:

                 raise ValueError("Can only compare to tuples of length 2.")

             return (self.major, self.minor) > other

         super().__gt__(other)

148

         return (self.major, self.minor)

150

     def __eq__(self, other):

         if isinstance(other, tuple):

             if len(other) != 2:

                 raise ValueError("Can only compare to tuples of length 2.")

             return (self.major, self.minor) == other

         super().__eq__(other)

157

     def __ne__(self, other):

         return not self.__eq__(other)

160

161

 def _parse_version(version) -> PythonVersionInfo:

     match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)

164 if match is None:

165 raise ValueError('The given version is not in the right format. '

166 'Use something like "3.8" or "3".')

167

     major = int(match.group(1))

     minor = match.group(2)

170 if minor is None:

171 # Use the latest Python in case it's not exactly defined, because the

172 # grammars are typically backwards compatible?

173 if major == 2:

174 minor = "7"

175 elif major == 3:

176 minor = "6"

177 else:

             raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")

179 minor = int(minor)

     return PythonVersionInfo(major, minor)

181

182

 def parse_version_string(version: str = None) -> PythonVersionInfo:

184 """

185 Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and

186 returns a corresponding version info that is always two characters long in

187 decimal.

188 """

189 if version is None:

         version = '%s.%s' % sys.version_info[:2]

     if not isinstance(version, str):

         raise TypeError('version must be a string like "3.8"')

193

194 return _parse_version(version)