djavu-asus/elpy/rpc-venv/lib/python3.11/site-packages/parso/python/tokenize.py

   1 # -*- coding: utf-8 -*-
   2 """
   3 This tokenizer has been copied from the ``tokenize.py`` standard library
   4 tokenizer. The reason was simple: The standard library tokenizer fails
   5 if the indentation is not right. To make it possible to do error recovery the
   6     tokenizer needed to be rewritten.
   7
   8 Basically this is a stripped down version of the standard library module, so
   9 you can read the documentation there. Additionally we included some speed and
  10 memory optimizations here.
  11 """
  12 from __future__ import absolute_import
  13
  14 import sys
  15 import re
  16 import itertools as _itertools
  17 from codecs import BOM_UTF8
  18 from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
  19     Pattern, Set
  20
  21 from parso.python.token import PythonTokenTypes
  22 from parso.utils import split_lines, PythonVersionInfo, parse_version_string
  23
  24
  25 # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
  26 MAX_UNICODE = '\U0010ffff'
  27
  28 STRING = PythonTokenTypes.STRING
  29 NAME = PythonTokenTypes.NAME
  30 NUMBER = PythonTokenTypes.NUMBER
  31 OP = PythonTokenTypes.OP
  32 NEWLINE = PythonTokenTypes.NEWLINE
  33 INDENT = PythonTokenTypes.INDENT
  34 DEDENT = PythonTokenTypes.DEDENT
  35 ENDMARKER = PythonTokenTypes.ENDMARKER
  36 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
  37 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
  38 FSTRING_START = PythonTokenTypes.FSTRING_START
  39 FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
  40 FSTRING_END = PythonTokenTypes.FSTRING_END
  41
  42
  43 class TokenCollection(NamedTuple):
  44     pseudo_token: Pattern
  45     single_quoted: Set[str]
  46     triple_quoted: Set[str]
  47     endpats: Dict[str, Pattern]
  48     whitespace: Pattern
  49     fstring_pattern_map: Dict[str, str]
  50     always_break_tokens: Tuple[str]
  51
  52
  53 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
  54
  55 _token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}
  56
  57
  58 def group(*choices, capture=False, **kwargs):
  59     assert not kwargs
  60
  61     start = '('
  62     if not capture:
  63         start += '?:'
  64     return start + '|'.join(choices) + ')'
  65
  66
  67 def maybe(*choices):
  68     return group(*choices) + '?'
  69
  70
  71 # Return the empty string, plus all of the valid string prefixes.
  72 def _all_string_prefixes(*, include_fstring=False, only_fstring=False):
  73     def different_case_versions(prefix):
  74         for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
  75             yield ''.join(s)
  76     # The valid string prefixes. Only contain the lower case versions,
  77     #  and don't contain any permuations (include 'fr', but not
  78     #  'rf'). The various permutations will be generated.
  79     valid_string_prefixes = ['b', 'r', 'u', 'br']
  80
  81     result = {''}
  82     if include_fstring:
  83         f = ['f', 'fr']
  84         if only_fstring:
  85             valid_string_prefixes = f
  86             result = set()
  87         else:
  88             valid_string_prefixes += f
  89     elif only_fstring:
  90         return set()
  91
  92     # if we add binary f-strings, add: ['fb', 'fbr']
  93     for prefix in valid_string_prefixes:
  94         for t in _itertools.permutations(prefix):
  95             # create a list with upper and lower versions of each
  96             #  character
  97             result.update(different_case_versions(t))
  98     return result
  99
 100
 101 def _compile(expr):
 102     return re.compile(expr, re.UNICODE)
 103
 104
 105 def _get_token_collection(version_info):
 106     try:
 107         return _token_collection_cache[tuple(version_info)]
 108     except KeyError:
 109         _token_collection_cache[tuple(version_info)] = result = \
 110             _create_token_collection(version_info)
 111         return result
 112
 113
 114 unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*'
 115 fstring_string_single_line = _compile(
 116     r'(?:\{\{|\}\}|\\N\{' + unicode_character_name
 117     + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+'
 118 )
 119 fstring_string_multi_line = _compile(
 120     r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+'
 121 )
 122 fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
 123 fstring_format_spec_multi_line = _compile(r'[^{}]+')
 124
 125
 126 def _create_token_collection(version_info):
 127     # Note: we use unicode matching for names ("\w") but ascii matching for
 128     # number literals.
 129     Whitespace = r'[ \f\t]*'
 130     whitespace = _compile(Whitespace)
 131     Comment = r'#[^\r\n]*'
 132     Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
 133
 134     Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
 135     Binnumber = r'0[bB](?:_?[01])+'
 136     Octnumber = r'0[oO](?:_?[0-7])+'
 137     Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
 138     Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
 139     Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
 140     Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
 141                        r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
 142     Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
 143     Floatnumber = group(Pointfloat, Expfloat)
 144     Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 145     Number = group(Imagnumber, Floatnumber, Intnumber)
 146
 147     # Note that since _all_string_prefixes includes the empty string,
 148     #  StringPrefix can be the empty string (making it optional).
 149     possible_prefixes = _all_string_prefixes()
 150     StringPrefix = group(*possible_prefixes)
 151     StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True))
 152     fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True)
 153     FStringStart = group(*fstring_prefixes)
 154
 155     # Tail end of ' string.
 156     Single = r"(?:\\.|[^'\\])*'"
 157     # Tail end of " string.
 158     Double = r'(?:\\.|[^"\\])*"'
 159     # Tail end of ''' string.
 160     Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
 161     # Tail end of """ string.
 162     Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
 163     Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
 164
 165     # Because of leftmost-then-longest match semantics, be sure to put the
 166     # longest operators first (e.g., if = came before ==, == would get
 167     # recognized as two instances of =).
 168     Operator = group(r"\*\*=?", r">>=?", r"<<=?",
 169                      r"//=?", r"->",
 170                      r"[+\-*/%&@`|^!=<>]=?",
 171                      r"~")
 172
 173     Bracket = '[][(){}]'
 174
 175     special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]']
 176     if version_info >= (3, 8):
 177         special_args.insert(0, ":=?")
 178     else:
 179         special_args.insert(0, ":")
 180     Special = group(*special_args)
 181
 182     Funny = group(Operator, Bracket, Special)
 183
 184     # First (or only) line of ' or " string.
 185     ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*"
 186                     + group("'", r'\\(?:\r\n?|\n)'),
 187                     StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*'
 188                     + group('"', r'\\(?:\r\n?|\n)'))
 189     pseudo_extra_pool = [Comment, Triple]
 190     all_quotes = '"', "'", '"""', "'''"
 191     if fstring_prefixes:
 192         pseudo_extra_pool.append(FStringStart + group(*all_quotes))
 193
 194     PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
 195     PseudoToken = group(Whitespace, capture=True) + \
 196         group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
 197
 198     # For a given string prefix plus quotes, endpats maps it to a regex
 199     #  to match the remainder of that string. _prefix can be empty, for
 200     #  a normal single or triple quoted string (with no prefix).
 201     endpats = {}
 202     for _prefix in possible_prefixes:
 203         endpats[_prefix + "'"] = _compile(Single)
 204         endpats[_prefix + '"'] = _compile(Double)
 205         endpats[_prefix + "'''"] = _compile(Single3)
 206         endpats[_prefix + '"""'] = _compile(Double3)
 207
 208     # A set of all of the single and triple quoted string prefixes,
 209     #  including the opening quotes.
 210     single_quoted = set()
 211     triple_quoted = set()
 212     fstring_pattern_map = {}
 213     for t in possible_prefixes:
 214         for quote in '"', "'":
 215             single_quoted.add(t + quote)
 216
 217         for quote in '"""', "'''":
 218             triple_quoted.add(t + quote)
 219
 220     for t in fstring_prefixes:
 221         for quote in all_quotes:
 222             fstring_pattern_map[t + quote] = quote
 223
 224     ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
 225                            'finally', 'while', 'with', 'return', 'continue',
 226                            'break', 'del', 'pass', 'global', 'assert', 'nonlocal')
 227     pseudo_token_compiled = _compile(PseudoToken)
 228     return TokenCollection(
 229         pseudo_token_compiled, single_quoted, triple_quoted, endpats,
 230         whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS)
 231     )
 232
 233
 234 class Token(NamedTuple):
 235     type: PythonTokenTypes
 236     string: str
 237     start_pos: Tuple[int, int]
 238     prefix: str
 239
 240     @property
 241     def end_pos(self) -> Tuple[int, int]:
 242         lines = split_lines(self.string)
 243         if len(lines) > 1:
 244             return self.start_pos[0] + len(lines) - 1, 0
 245         else:
 246             return self.start_pos[0], self.start_pos[1] + len(self.string)
 247
 248
 249 class PythonToken(Token):
 250     def __repr__(self):
 251         return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
 252                 self._replace(type=self.type.name))
 253
 254
 255 class FStringNode:
 256     def __init__(self, quote):
 257         self.quote = quote
 258         self.parentheses_count = 0
 259         self.previous_lines = ''
 260         self.last_string_start_pos = None
 261         # In the syntax there can be multiple format_spec's nested:
 262         # {x:{y:3}}
 263         self.format_spec_count = 0
 264
 265     def open_parentheses(self, character):
 266         self.parentheses_count += 1
 267
 268     def close_parentheses(self, character):
 269         self.parentheses_count -= 1
 270         if self.parentheses_count == 0:
 271             # No parentheses means that the format spec is also finished.
 272             self.format_spec_count = 0
 273
 274     def allow_multiline(self):
 275         return len(self.quote) == 3
 276
 277     def is_in_expr(self):
 278         return self.parentheses_count > self.format_spec_count
 279
 280     def is_in_format_spec(self):
 281         return not self.is_in_expr() and self.format_spec_count
 282
 283
 284 def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix):
 285     for fstring_stack_index, node in enumerate(fstring_stack):
 286         lstripped_string = string.lstrip()
 287         len_lstrip = len(string) - len(lstripped_string)
 288         if lstripped_string.startswith(node.quote):
 289             token = PythonToken(
 290                 FSTRING_END,
 291                 node.quote,
 292                 (line_nr, column + len_lstrip),
 293                 prefix=additional_prefix+string[:len_lstrip],
 294             )
 295             additional_prefix = ''
 296             assert not node.previous_lines
 297             del fstring_stack[fstring_stack_index:]
 298             return token, '', len(node.quote) + len_lstrip
 299     return None, additional_prefix, 0
 300
 301
 302 def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
 303     tos = fstring_stack[-1]
 304     allow_multiline = tos.allow_multiline()
 305     if tos.is_in_format_spec():
 306         if allow_multiline:
 307             regex = fstring_format_spec_multi_line
 308         else:
 309             regex = fstring_format_spec_single_line
 310     else:
 311         if allow_multiline:
 312             regex = fstring_string_multi_line
 313         else:
 314             regex = fstring_string_single_line
 315
 316     match = regex.match(line, pos)
 317     if match is None:
 318         return tos.previous_lines, pos
 319
 320     if not tos.previous_lines:
 321         tos.last_string_start_pos = (lnum, pos)
 322
 323     string = match.group(0)
 324     for fstring_stack_node in fstring_stack:
 325         end_match = endpats[fstring_stack_node.quote].match(string)
 326         if end_match is not None:
 327             string = end_match.group(0)[:-len(fstring_stack_node.quote)]
 328
 329     new_pos = pos
 330     new_pos += len(string)
 331     # even if allow_multiline is False, we still need to check for trailing
 332     # newlines, because a single-line f-string can contain line continuations
 333     if string.endswith('\n') or string.endswith('\r'):
 334         tos.previous_lines += string
 335         string = ''
 336     else:
 337         string = tos.previous_lines + string
 338
 339     return string, new_pos
 340
 341
 342 def tokenize(
 343     code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
 344 ) -> Iterator[PythonToken]:
 345     """Generate tokens from a the source code (string)."""
 346     lines = split_lines(code, keepends=True)
 347     return tokenize_lines(lines, version_info=version_info, start_pos=start_pos)
 348
 349
 350 def _print_tokens(func):
 351     """
 352     A small helper function to help debug the tokenize_lines function.
 353     """
 354     def wrapper(*args, **kwargs):
 355         for token in func(*args, **kwargs):
 356             print(token)  # This print is intentional for debugging!
 357             yield token
 358
 359     return wrapper
 360
 361
 362 # @_print_tokens
 363 def tokenize_lines(
 364     lines: Iterable[str],
 365     *,
 366     version_info: PythonVersionInfo,
 367     indents: List[int] = None,
 368     start_pos: Tuple[int, int] = (1, 0),
 369     is_first_token=True,
 370 ) -> Iterator[PythonToken]:
 371     """
 372     A heavily modified Python standard library tokenizer.
 373
 374     Additionally to the default information, yields also the prefix of each
 375     token. This idea comes from lib2to3. The prefix contains all information
 376     that is irrelevant for the parser like newlines in parentheses or comments.
 377     """
 378     def dedent_if_necessary(start):
 379         while start < indents[-1]:
 380             if start > indents[-2]:
 381                 yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
 382                 indents[-1] = start
 383                 break
 384             indents.pop()
 385             yield PythonToken(DEDENT, '', spos, '')
 386
 387     pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
 388         fstring_pattern_map, always_break_tokens, = \
 389         _get_token_collection(version_info)
 390     paren_level = 0  # count parentheses
 391     if indents is None:
 392         indents = [0]
 393     max_ = 0
 394     numchars = '0123456789'
 395     contstr = ''
 396     contline: str
 397     contstr_start: Tuple[int, int]
 398     endprog: Pattern
 399     # We start with a newline. This makes indent at the first position
 400     # possible. It's not valid Python, but still better than an INDENT in the
 401     # second line (and not in the first). This makes quite a few things in
 402     # Jedi's fast parser possible.
 403     new_line = True
 404     prefix = ''  # Should never be required, but here for safety
 405     additional_prefix = ''
 406     lnum = start_pos[0] - 1
 407     fstring_stack: List[FStringNode] = []
 408     for line in lines:  # loop over lines in stream
 409         lnum += 1
 410         pos = 0
 411         max_ = len(line)
 412         if is_first_token:
 413             if line.startswith(BOM_UTF8_STRING):
 414                 additional_prefix = BOM_UTF8_STRING
 415                 line = line[1:]
 416                 max_ = len(line)
 417
 418             # Fake that the part before was already parsed.
 419             line = '^' * start_pos[1] + line
 420             pos = start_pos[1]
 421             max_ += start_pos[1]
 422
 423             is_first_token = False
 424
 425         if contstr:                                         # continued string
 426             endmatch = endprog.match(line)  # noqa: F821
 427             if endmatch:
 428                 pos = endmatch.end(0)
 429                 yield PythonToken(
 430                     STRING, contstr + line[:pos],
 431                     contstr_start, prefix)  # noqa: F821
 432                 contstr = ''
 433                 contline = ''
 434             else:
 435                 contstr = contstr + line
 436                 contline = contline + line
 437                 continue
 438
 439         while pos < max_:
 440             if fstring_stack:
 441                 tos = fstring_stack[-1]
 442                 if not tos.is_in_expr():
 443                     string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
 444                     if string:
 445                         yield PythonToken(
 446                             FSTRING_STRING, string,
 447                             tos.last_string_start_pos,
 448                             # Never has a prefix because it can start anywhere and
 449                             # include whitespace.
 450                             prefix=''
 451                         )
 452                         tos.previous_lines = ''
 453                         continue
 454                     if pos == max_:
 455                         break
 456
 457                 rest = line[pos:]
 458                 fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
 459                     fstring_stack,
 460                     rest,
 461                     lnum,
 462                     pos,
 463                     additional_prefix,
 464                 )
 465                 pos += quote_length
 466                 if fstring_end_token is not None:
 467                     yield fstring_end_token
 468                     continue
 469
 470             # in an f-string, match until the end of the string
 471             if fstring_stack:
 472                 string_line = line
 473                 for fstring_stack_node in fstring_stack:
 474                     quote = fstring_stack_node.quote
 475                     end_match = endpats[quote].match(line, pos)
 476                     if end_match is not None:
 477                         end_match_string = end_match.group(0)
 478                         if len(end_match_string) - len(quote) + pos < len(string_line):
 479                             string_line = line[:pos] + end_match_string[:-len(quote)]
 480                 pseudomatch = pseudo_token.match(string_line, pos)
 481             else:
 482                 pseudomatch = pseudo_token.match(line, pos)
 483
 484             if pseudomatch:
 485                 prefix = additional_prefix + pseudomatch.group(1)
 486                 additional_prefix = ''
 487                 start, pos = pseudomatch.span(2)
 488                 spos = (lnum, start)
 489                 token = pseudomatch.group(2)
 490                 if token == '':
 491                     assert prefix
 492                     additional_prefix = prefix
 493                     # This means that we have a line with whitespace/comments at
 494                     # the end, which just results in an endmarker.
 495                     break
 496                 initial = token[0]
 497             else:
 498                 match = whitespace.match(line, pos)
 499                 initial = line[match.end()]
 500                 start = match.end()
 501                 spos = (lnum, start)
 502
 503             if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
 504                 new_line = False
 505                 if paren_level == 0 and not fstring_stack:
 506                     indent_start = start
 507                     if indent_start > indents[-1]:
 508                         yield PythonToken(INDENT, '', spos, '')
 509                         indents.append(indent_start)
 510                     yield from dedent_if_necessary(indent_start)
 511
 512             if not pseudomatch:  # scan for tokens
 513                 match = whitespace.match(line, pos)
 514                 if new_line and paren_level == 0 and not fstring_stack:
 515                     yield from dedent_if_necessary(match.end())
 516                 pos = match.end()
 517                 new_line = False
 518                 yield PythonToken(
 519                     ERRORTOKEN, line[pos], (lnum, pos),
 520                     additional_prefix + match.group(0)
 521                 )
 522                 additional_prefix = ''
 523                 pos += 1
 524                 continue
 525
 526             if (initial in numchars                      # ordinary number
 527                     or (initial == '.' and token != '.' and token != '...')):
 528                 yield PythonToken(NUMBER, token, spos, prefix)
 529             elif pseudomatch.group(3) is not None:            # ordinary name
 530                 if token in always_break_tokens and (fstring_stack or paren_level):
 531                     fstring_stack[:] = []
 532                     paren_level = 0
 533                     # We only want to dedent if the token is on a new line.
 534                     m = re.match(r'[ \f\t]*$', line[:start])
 535                     if m is not None:
 536                         yield from dedent_if_necessary(m.end())
 537                 if token.isidentifier():
 538                     yield PythonToken(NAME, token, spos, prefix)
 539                 else:
 540                     yield from _split_illegal_unicode_name(token, spos, prefix)
 541             elif initial in '\r\n':
 542                 if any(not f.allow_multiline() for f in fstring_stack):
 543                     fstring_stack.clear()
 544
 545                 if not new_line and paren_level == 0 and not fstring_stack:
 546                     yield PythonToken(NEWLINE, token, spos, prefix)
 547                 else:
 548                     additional_prefix = prefix + token
 549                 new_line = True
 550             elif initial == '#':  # Comments
 551                 assert not token.endswith("\n") and not token.endswith("\r")
 552                 if fstring_stack and fstring_stack[-1].is_in_expr():
 553                     # `#` is not allowed in f-string expressions
 554                     yield PythonToken(ERRORTOKEN, initial, spos, prefix)
 555                     pos = start + 1
 556                 else:
 557                     additional_prefix = prefix + token
 558             elif token in triple_quoted:
 559                 endprog = endpats[token]
 560                 endmatch = endprog.match(line, pos)
 561                 if endmatch:                                # all on one line
 562                     pos = endmatch.end(0)
 563                     token = line[start:pos]
 564                     yield PythonToken(STRING, token, spos, prefix)
 565                 else:
 566                     contstr_start = spos                    # multiple lines
 567                     contstr = line[start:]
 568                     contline = line
 569                     break
 570
 571             # Check up to the first 3 chars of the token to see if
 572             #  they're in the single_quoted set. If so, they start
 573             #  a string.
 574             # We're using the first 3, because we're looking for
 575             #  "rb'" (for example) at the start of the token. If
 576             #  we switch to longer prefixes, this needs to be
 577             #  adjusted.
 578             # Note that initial == token[:1].
 579             # Also note that single quote checking must come after
 580             #  triple quote checking (above).
 581             elif initial in single_quoted or \
 582                     token[:2] in single_quoted or \
 583                     token[:3] in single_quoted:
 584                 if token[-1] in '\r\n':                       # continued string
 585                     # This means that a single quoted string ends with a
 586                     # backslash and is continued.
 587                     contstr_start = lnum, start
 588                     endprog = (endpats.get(initial) or endpats.get(token[1])
 589                                or endpats.get(token[2]))
 590                     contstr = line[start:]
 591                     contline = line
 592                     break
 593                 else:                                       # ordinary string
 594                     yield PythonToken(STRING, token, spos, prefix)
 595             elif token in fstring_pattern_map:  # The start of an fstring.
 596                 fstring_stack.append(FStringNode(fstring_pattern_map[token]))
 597                 yield PythonToken(FSTRING_START, token, spos, prefix)
 598             elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'):  # continued stmt
 599                 additional_prefix += prefix + line[start:]
 600                 break
 601             else:
 602                 if token in '([{':
 603                     if fstring_stack:
 604                         fstring_stack[-1].open_parentheses(token)
 605                     else:
 606                         paren_level += 1
 607                 elif token in ')]}':
 608                     if fstring_stack:
 609                         fstring_stack[-1].close_parentheses(token)
 610                     else:
 611                         if paren_level:
 612                             paren_level -= 1
 613                 elif token.startswith(':') and fstring_stack \
 614                         and fstring_stack[-1].parentheses_count \
 615                         - fstring_stack[-1].format_spec_count == 1:
 616                     # `:` and `:=` both count
 617                     fstring_stack[-1].format_spec_count += 1
 618                     token = ':'
 619                     pos = start + 1
 620
 621                 yield PythonToken(OP, token, spos, prefix)
 622
 623     if contstr:
 624         yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
 625         if contstr.endswith('\n') or contstr.endswith('\r'):
 626             new_line = True
 627
 628     if fstring_stack:
 629         tos = fstring_stack[-1]
 630         if tos.previous_lines:
 631             yield PythonToken(
 632                 FSTRING_STRING, tos.previous_lines,
 633                 tos.last_string_start_pos,
 634                 # Never has a prefix because it can start anywhere and
 635                 # include whitespace.
 636                 prefix=''
 637             )
 638
 639     end_pos = lnum, max_
 640     # As the last position we just take the maximally possible position. We
 641     # remove -1 for the last new line.
 642     for indent in indents[1:]:
 643         indents.pop()
 644         yield PythonToken(DEDENT, '', end_pos, '')
 645     yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
 646
 647
 648 def _split_illegal_unicode_name(token, start_pos, prefix):
 649     def create_token():
 650         return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
 651
 652     found = ''
 653     is_illegal = False
 654     pos = start_pos
 655     for i, char in enumerate(token):
 656         if is_illegal:
 657             if char.isidentifier():
 658                 yield create_token()
 659                 found = char
 660                 is_illegal = False
 661                 prefix = ''
 662                 pos = start_pos[0], start_pos[1] + i
 663             else:
 664                 found += char
 665         else:
 666             new_found = found + char
 667             if new_found.isidentifier():
 668                 found = new_found
 669             else:
 670                 if found:
 671                     yield create_token()
 672                     prefix = ''
 673                     pos = start_pos[0], start_pos[1] + i
 674                 found = char
 675                 is_illegal = True
 676
 677     if found:
 678         yield create_token()
 679
 680
 681 if __name__ == "__main__":
 682     path = sys.argv[1]
 683     with open(path) as f:
 684         code = f.read()
 685
 686     for token in tokenize(code, version_info=parse_version_string('3.10')):
 687         print(token)