djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/utils.py

   1 import re
   2 import sys
   3 from ast import literal_eval
   4 from functools import total_ordering
   5 from typing import NamedTuple, Sequence, Union
   6
   7 # The following is a list in Python that are line breaks in str.splitlines, but
   8 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
   9 # 0xA) are allowed to split lines.
  10 _NON_LINE_BREAKS = (
  11     '\v',  # Vertical Tabulation 0xB
  12     '\f',  # Form Feed 0xC
  13     '\x1C',  # File Separator
  14     '\x1D',  # Group Separator
  15     '\x1E',  # Record Separator
  16     '\x85',  # Next Line (NEL - Equivalent to CR+LF.
  17              # Used to mark end-of-line on some IBM mainframes.)
  18     '\u2028',  # Line Separator
  19     '\u2029',  # Paragraph Separator
  20 )
  21
  22
  23 class Version(NamedTuple):
  24     major: int
  25     minor: int
  26     micro: int
  27
  28
  29 def split_lines(string: str, keepends: bool = False) -> Sequence[str]:
  30     r"""
  31     Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
  32     looks at form feeds and other special characters as normal text. Just
  33     splits ``\n`` and ``\r\n``.
  34     Also different: Returns ``[""]`` for an empty string input.
  35
  36     In Python 2.7 form feeds are used as normal characters when using
  37     str.splitlines. However in Python 3 somewhere there was a decision to split
  38     also on form feeds.
  39     """
  40     if keepends:
  41         lst = string.splitlines(True)
  42
  43         # We have to merge lines that were broken by form feed characters.
  44         merge = []
  45         for i, line in enumerate(lst):
  46             try:
  47                 last_chr = line[-1]
  48             except IndexError:
  49                 pass
  50             else:
  51                 if last_chr in _NON_LINE_BREAKS:
  52                     merge.append(i)
  53
  54         for index in reversed(merge):
  55             try:
  56                 lst[index] = lst[index] + lst[index + 1]
  57                 del lst[index + 1]
  58             except IndexError:
  59                 # index + 1 can be empty and therefore there's no need to
  60                 # merge.
  61                 pass
  62
  63         # The stdlib's implementation of the end is inconsistent when calling
  64         # it with/without keepends. One time there's an empty string in the
  65         # end, one time there's none.
  66         if string.endswith('\n') or string.endswith('\r') or string == '':
  67             lst.append('')
  68         return lst
  69     else:
  70         return re.split(r'\n|\r\n|\r', string)
  71
  72
  73 def python_bytes_to_unicode(
  74     source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict'
  75 ) -> str:
  76     """
  77     Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
  78     unicode object like in :py:meth:`bytes.decode`.
  79
  80     :param encoding: See :py:meth:`bytes.decode` documentation.
  81     :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
  82         ``'strict'``, ``'replace'`` or ``'ignore'``.
  83     """
  84     def detect_encoding():
  85         """
  86         For the implementation of encoding definitions in Python, look at:
  87         - http://www.python.org/dev/peps/pep-0263/
  88         - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations
  89         """
  90         byte_mark = literal_eval(r"b'\xef\xbb\xbf'")
  91         if source.startswith(byte_mark):
  92             # UTF-8 byte-order mark
  93             return 'utf-8'
  94
  95         first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0)
  96         possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
  97                                       first_two_lines)
  98         if possible_encoding:
  99             e = possible_encoding.group(1)
 100             if not isinstance(e, str):
 101                 e = str(e, 'ascii', 'replace')
 102             return e
 103         else:
 104             # the default if nothing else has been set -> PEP 263
 105             return encoding
 106
 107     if isinstance(source, str):
 108         # only cast str/bytes
 109         return source
 110
 111     encoding = detect_encoding()
 112     try:
 113         # Cast to unicode
 114         return str(source, encoding, errors)
 115     except LookupError:
 116         if errors == 'replace':
 117             # This is a weird case that can happen if the given encoding is not
 118             # a valid encoding. This usually shouldn't happen with provided
 119             # encodings, but can happen if somebody uses encoding declarations
 120             # like `# coding: foo-8`.
 121             return str(source, 'utf-8', errors)
 122         raise
 123
 124
 125 def version_info() -> Version:
 126     """
 127     Returns a namedtuple of parso's version, similar to Python's
 128     ``sys.version_info``.
 129     """
 130     from parso import __version__
 131     tupl = re.findall(r'[a-z]+|\d+', __version__)
 132     return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
 133
 134
 135 class _PythonVersionInfo(NamedTuple):
 136     major: int
 137     minor: int
 138
 139
 140 @total_ordering
 141 class PythonVersionInfo(_PythonVersionInfo):
 142     def __gt__(self, other):
 143         if isinstance(other, tuple):
 144             if len(other) != 2:
 145                 raise ValueError("Can only compare to tuples of length 2.")
 146             return (self.major, self.minor) > other
 147         super().__gt__(other)
 148
 149         return (self.major, self.minor)
 150
 151     def __eq__(self, other):
 152         if isinstance(other, tuple):
 153             if len(other) != 2:
 154                 raise ValueError("Can only compare to tuples of length 2.")
 155             return (self.major, self.minor) == other
 156         super().__eq__(other)
 157
 158     def __ne__(self, other):
 159         return not self.__eq__(other)
 160
 161
 162 def _parse_version(version) -> PythonVersionInfo:
 163     match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
 164     if match is None:
 165         raise ValueError('The given version is not in the right format. '
 166                          'Use something like "3.8" or "3".')
 167
 168     major = int(match.group(1))
 169     minor = match.group(2)
 170     if minor is None:
 171         # Use the latest Python in case it's not exactly defined, because the
 172         # grammars are typically backwards compatible?
 173         if major == 2:
 174             minor = "7"
 175         elif major == 3:
 176             minor = "6"
 177         else:
 178             raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
 179     minor = int(minor)
 180     return PythonVersionInfo(major, minor)
 181
 182
 183 def parse_version_string(version: str = None) -> PythonVersionInfo:
 184     """
 185     Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and
 186     returns a corresponding version info that is always two characters long in
 187     decimal.
 188     """
 189     if version is None:
 190         version = '%s.%s' % sys.version_info[:2]
 191     if not isinstance(version, str):
 192         raise TypeError('version must be a string like "3.8"')
 193
 194     return _parse_version(version)