]> crepu.dev Git - config.git/blob - djavu-asus/emacs/elpy/rpc-venv/lib/python3.11/site-packages/parso/python/tokenize.py
e3ffe440b9ebd11e3fa17ed863eb933a33ec5b17
[config.git] / djavu-asus / emacs / elpy / rpc-venv / lib / python3.11 / site-packages / parso / python / tokenize.py
1 # -*- coding: utf-8 -*-
2 """
3 This tokenizer has been copied from the ``tokenize.py`` standard library
4 tokenizer. The reason was simple: The standard library tokenizer fails
5 if the indentation is not right. To make it possible to do error recovery the
6 tokenizer needed to be rewritten.
7
8 Basically this is a stripped down version of the standard library module, so
9 you can read the documentation there. Additionally we included some speed and
10 memory optimizations here.
11 """
12 from __future__ import absolute_import
13
14 import sys
15 import re
16 import itertools as _itertools
17 from codecs import BOM_UTF8
18 from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
19 Pattern, Set
20
21 from parso.python.token import PythonTokenTypes
22 from parso.utils import split_lines, PythonVersionInfo, parse_version_string
23
24
25 # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
26 MAX_UNICODE = '\U0010ffff'
27
28 STRING = PythonTokenTypes.STRING
29 NAME = PythonTokenTypes.NAME
30 NUMBER = PythonTokenTypes.NUMBER
31 OP = PythonTokenTypes.OP
32 NEWLINE = PythonTokenTypes.NEWLINE
33 INDENT = PythonTokenTypes.INDENT
34 DEDENT = PythonTokenTypes.DEDENT
35 ENDMARKER = PythonTokenTypes.ENDMARKER
36 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
37 ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
38 FSTRING_START = PythonTokenTypes.FSTRING_START
39 FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
40 FSTRING_END = PythonTokenTypes.FSTRING_END
41
42
43 class TokenCollection(NamedTuple):
44 pseudo_token: Pattern
45 single_quoted: Set[str]
46 triple_quoted: Set[str]
47 endpats: Dict[str, Pattern]
48 whitespace: Pattern
49 fstring_pattern_map: Dict[str, str]
50 always_break_tokens: Tuple[str]
51
52
53 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
54
55 _token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {}
56
57
58 def group(*choices, capture=False, **kwargs):
59 assert not kwargs
60
61 start = '('
62 if not capture:
63 start += '?:'
64 return start + '|'.join(choices) + ')'
65
66
67 def maybe(*choices):
68 return group(*choices) + '?'
69
70
71 # Return the empty string, plus all of the valid string prefixes.
72 def _all_string_prefixes(*, include_fstring=False, only_fstring=False):
73 def different_case_versions(prefix):
74 for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
75 yield ''.join(s)
76 # The valid string prefixes. Only contain the lower case versions,
77 # and don't contain any permuations (include 'fr', but not
78 # 'rf'). The various permutations will be generated.
79 valid_string_prefixes = ['b', 'r', 'u', 'br']
80
81 result = {''}
82 if include_fstring:
83 f = ['f', 'fr']
84 if only_fstring:
85 valid_string_prefixes = f
86 result = set()
87 else:
88 valid_string_prefixes += f
89 elif only_fstring:
90 return set()
91
92 # if we add binary f-strings, add: ['fb', 'fbr']
93 for prefix in valid_string_prefixes:
94 for t in _itertools.permutations(prefix):
95 # create a list with upper and lower versions of each
96 # character
97 result.update(different_case_versions(t))
98 return result
99
100
101 def _compile(expr):
102 return re.compile(expr, re.UNICODE)
103
104
105 def _get_token_collection(version_info):
106 try:
107 return _token_collection_cache[tuple(version_info)]
108 except KeyError:
109 _token_collection_cache[tuple(version_info)] = result = \
110 _create_token_collection(version_info)
111 return result
112
113
114 unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*'
115 fstring_string_single_line = _compile(
116 r'(?:\{\{|\}\}|\\N\{' + unicode_character_name
117 + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+'
118 )
119 fstring_string_multi_line = _compile(
120 r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+'
121 )
122 fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
123 fstring_format_spec_multi_line = _compile(r'[^{}]+')
124
125
126 def _create_token_collection(version_info):
127 # Note: we use unicode matching for names ("\w") but ascii matching for
128 # number literals.
129 Whitespace = r'[ \f\t]*'
130 whitespace = _compile(Whitespace)
131 Comment = r'#[^\r\n]*'
132 Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
133
134 Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
135 Binnumber = r'0[bB](?:_?[01])+'
136 Octnumber = r'0[oO](?:_?[0-7])+'
137 Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
138 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
139 Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
140 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
141 r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
142 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
143 Floatnumber = group(Pointfloat, Expfloat)
144 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
145 Number = group(Imagnumber, Floatnumber, Intnumber)
146
147 # Note that since _all_string_prefixes includes the empty string,
148 # StringPrefix can be the empty string (making it optional).
149 possible_prefixes = _all_string_prefixes()
150 StringPrefix = group(*possible_prefixes)
151 StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True))
152 fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True)
153 FStringStart = group(*fstring_prefixes)
154
155 # Tail end of ' string.
156 Single = r"(?:\\.|[^'\\])*'"
157 # Tail end of " string.
158 Double = r'(?:\\.|[^"\\])*"'
159 # Tail end of ''' string.
160 Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
161 # Tail end of """ string.
162 Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
163 Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
164
165 # Because of leftmost-then-longest match semantics, be sure to put the
166 # longest operators first (e.g., if = came before ==, == would get
167 # recognized as two instances of =).
168 Operator = group(r"\*\*=?", r">>=?", r"<<=?",
169 r"//=?", r"->",
170 r"[+\-*/%&@`|^!=<>]=?",
171 r"~")
172
173 Bracket = '[][(){}]'
174
175 special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]']
176 if version_info >= (3, 8):
177 special_args.insert(0, ":=?")
178 else:
179 special_args.insert(0, ":")
180 Special = group(*special_args)
181
182 Funny = group(Operator, Bracket, Special)
183
184 # First (or only) line of ' or " string.
185 ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*"
186 + group("'", r'\\(?:\r\n?|\n)'),
187 StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*'
188 + group('"', r'\\(?:\r\n?|\n)'))
189 pseudo_extra_pool = [Comment, Triple]
190 all_quotes = '"', "'", '"""', "'''"
191 if fstring_prefixes:
192 pseudo_extra_pool.append(FStringStart + group(*all_quotes))
193
194 PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
195 PseudoToken = group(Whitespace, capture=True) + \
196 group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
197
198 # For a given string prefix plus quotes, endpats maps it to a regex
199 # to match the remainder of that string. _prefix can be empty, for
200 # a normal single or triple quoted string (with no prefix).
201 endpats = {}
202 for _prefix in possible_prefixes:
203 endpats[_prefix + "'"] = _compile(Single)
204 endpats[_prefix + '"'] = _compile(Double)
205 endpats[_prefix + "'''"] = _compile(Single3)
206 endpats[_prefix + '"""'] = _compile(Double3)
207
208 # A set of all of the single and triple quoted string prefixes,
209 # including the opening quotes.
210 single_quoted = set()
211 triple_quoted = set()
212 fstring_pattern_map = {}
213 for t in possible_prefixes:
214 for quote in '"', "'":
215 single_quoted.add(t + quote)
216
217 for quote in '"""', "'''":
218 triple_quoted.add(t + quote)
219
220 for t in fstring_prefixes:
221 for quote in all_quotes:
222 fstring_pattern_map[t + quote] = quote
223
224 ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
225 'finally', 'while', 'with', 'return', 'continue',
226 'break', 'del', 'pass', 'global', 'assert', 'nonlocal')
227 pseudo_token_compiled = _compile(PseudoToken)
228 return TokenCollection(
229 pseudo_token_compiled, single_quoted, triple_quoted, endpats,
230 whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS)
231 )
232
233
234 class Token(NamedTuple):
235 type: PythonTokenTypes
236 string: str
237 start_pos: Tuple[int, int]
238 prefix: str
239
240 @property
241 def end_pos(self) -> Tuple[int, int]:
242 lines = split_lines(self.string)
243 if len(lines) > 1:
244 return self.start_pos[0] + len(lines) - 1, 0
245 else:
246 return self.start_pos[0], self.start_pos[1] + len(self.string)
247
248
249 class PythonToken(Token):
250 def __repr__(self):
251 return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
252 self._replace(type=self.type.name))
253
254
255 class FStringNode:
256 def __init__(self, quote):
257 self.quote = quote
258 self.parentheses_count = 0
259 self.previous_lines = ''
260 self.last_string_start_pos = None
261 # In the syntax there can be multiple format_spec's nested:
262 # {x:{y:3}}
263 self.format_spec_count = 0
264
265 def open_parentheses(self, character):
266 self.parentheses_count += 1
267
268 def close_parentheses(self, character):
269 self.parentheses_count -= 1
270 if self.parentheses_count == 0:
271 # No parentheses means that the format spec is also finished.
272 self.format_spec_count = 0
273
274 def allow_multiline(self):
275 return len(self.quote) == 3
276
277 def is_in_expr(self):
278 return self.parentheses_count > self.format_spec_count
279
280 def is_in_format_spec(self):
281 return not self.is_in_expr() and self.format_spec_count
282
283
284 def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix):
285 for fstring_stack_index, node in enumerate(fstring_stack):
286 lstripped_string = string.lstrip()
287 len_lstrip = len(string) - len(lstripped_string)
288 if lstripped_string.startswith(node.quote):
289 token = PythonToken(
290 FSTRING_END,
291 node.quote,
292 (line_nr, column + len_lstrip),
293 prefix=additional_prefix+string[:len_lstrip],
294 )
295 additional_prefix = ''
296 assert not node.previous_lines
297 del fstring_stack[fstring_stack_index:]
298 return token, '', len(node.quote) + len_lstrip
299 return None, additional_prefix, 0
300
301
302 def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
303 tos = fstring_stack[-1]
304 allow_multiline = tos.allow_multiline()
305 if tos.is_in_format_spec():
306 if allow_multiline:
307 regex = fstring_format_spec_multi_line
308 else:
309 regex = fstring_format_spec_single_line
310 else:
311 if allow_multiline:
312 regex = fstring_string_multi_line
313 else:
314 regex = fstring_string_single_line
315
316 match = regex.match(line, pos)
317 if match is None:
318 return tos.previous_lines, pos
319
320 if not tos.previous_lines:
321 tos.last_string_start_pos = (lnum, pos)
322
323 string = match.group(0)
324 for fstring_stack_node in fstring_stack:
325 end_match = endpats[fstring_stack_node.quote].match(string)
326 if end_match is not None:
327 string = end_match.group(0)[:-len(fstring_stack_node.quote)]
328
329 new_pos = pos
330 new_pos += len(string)
331 # even if allow_multiline is False, we still need to check for trailing
332 # newlines, because a single-line f-string can contain line continuations
333 if string.endswith('\n') or string.endswith('\r'):
334 tos.previous_lines += string
335 string = ''
336 else:
337 string = tos.previous_lines + string
338
339 return string, new_pos
340
341
342 def tokenize(
343 code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
344 ) -> Iterator[PythonToken]:
345 """Generate tokens from a the source code (string)."""
346 lines = split_lines(code, keepends=True)
347 return tokenize_lines(lines, version_info=version_info, start_pos=start_pos)
348
349
350 def _print_tokens(func):
351 """
352 A small helper function to help debug the tokenize_lines function.
353 """
354 def wrapper(*args, **kwargs):
355 for token in func(*args, **kwargs):
356 print(token) # This print is intentional for debugging!
357 yield token
358
359 return wrapper
360
361
362 # @_print_tokens
363 def tokenize_lines(
364 lines: Iterable[str],
365 *,
366 version_info: PythonVersionInfo,
367 indents: List[int] = None,
368 start_pos: Tuple[int, int] = (1, 0),
369 is_first_token=True,
370 ) -> Iterator[PythonToken]:
371 """
372 A heavily modified Python standard library tokenizer.
373
374 Additionally to the default information, yields also the prefix of each
375 token. This idea comes from lib2to3. The prefix contains all information
376 that is irrelevant for the parser like newlines in parentheses or comments.
377 """
378 def dedent_if_necessary(start):
379 while start < indents[-1]:
380 if start > indents[-2]:
381 yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
382 indents[-1] = start
383 break
384 indents.pop()
385 yield PythonToken(DEDENT, '', spos, '')
386
387 pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
388 fstring_pattern_map, always_break_tokens, = \
389 _get_token_collection(version_info)
390 paren_level = 0 # count parentheses
391 if indents is None:
392 indents = [0]
393 max_ = 0
394 numchars = '0123456789'
395 contstr = ''
396 contline: str
397 contstr_start: Tuple[int, int]
398 endprog: Pattern
399 # We start with a newline. This makes indent at the first position
400 # possible. It's not valid Python, but still better than an INDENT in the
401 # second line (and not in the first). This makes quite a few things in
402 # Jedi's fast parser possible.
403 new_line = True
404 prefix = '' # Should never be required, but here for safety
405 additional_prefix = ''
406 lnum = start_pos[0] - 1
407 fstring_stack: List[FStringNode] = []
408 for line in lines: # loop over lines in stream
409 lnum += 1
410 pos = 0
411 max_ = len(line)
412 if is_first_token:
413 if line.startswith(BOM_UTF8_STRING):
414 additional_prefix = BOM_UTF8_STRING
415 line = line[1:]
416 max_ = len(line)
417
418 # Fake that the part before was already parsed.
419 line = '^' * start_pos[1] + line
420 pos = start_pos[1]
421 max_ += start_pos[1]
422
423 is_first_token = False
424
425 if contstr: # continued string
426 endmatch = endprog.match(line) # noqa: F821
427 if endmatch:
428 pos = endmatch.end(0)
429 yield PythonToken(
430 STRING, contstr + line[:pos],
431 contstr_start, prefix) # noqa: F821
432 contstr = ''
433 contline = ''
434 else:
435 contstr = contstr + line
436 contline = contline + line
437 continue
438
439 while pos < max_:
440 if fstring_stack:
441 tos = fstring_stack[-1]
442 if not tos.is_in_expr():
443 string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
444 if string:
445 yield PythonToken(
446 FSTRING_STRING, string,
447 tos.last_string_start_pos,
448 # Never has a prefix because it can start anywhere and
449 # include whitespace.
450 prefix=''
451 )
452 tos.previous_lines = ''
453 continue
454 if pos == max_:
455 break
456
457 rest = line[pos:]
458 fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
459 fstring_stack,
460 rest,
461 lnum,
462 pos,
463 additional_prefix,
464 )
465 pos += quote_length
466 if fstring_end_token is not None:
467 yield fstring_end_token
468 continue
469
470 # in an f-string, match until the end of the string
471 if fstring_stack:
472 string_line = line
473 for fstring_stack_node in fstring_stack:
474 quote = fstring_stack_node.quote
475 end_match = endpats[quote].match(line, pos)
476 if end_match is not None:
477 end_match_string = end_match.group(0)
478 if len(end_match_string) - len(quote) + pos < len(string_line):
479 string_line = line[:pos] + end_match_string[:-len(quote)]
480 pseudomatch = pseudo_token.match(string_line, pos)
481 else:
482 pseudomatch = pseudo_token.match(line, pos)
483
484 if pseudomatch:
485 prefix = additional_prefix + pseudomatch.group(1)
486 additional_prefix = ''
487 start, pos = pseudomatch.span(2)
488 spos = (lnum, start)
489 token = pseudomatch.group(2)
490 if token == '':
491 assert prefix
492 additional_prefix = prefix
493 # This means that we have a line with whitespace/comments at
494 # the end, which just results in an endmarker.
495 break
496 initial = token[0]
497 else:
498 match = whitespace.match(line, pos)
499 initial = line[match.end()]
500 start = match.end()
501 spos = (lnum, start)
502
503 if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
504 new_line = False
505 if paren_level == 0 and not fstring_stack:
506 indent_start = start
507 if indent_start > indents[-1]:
508 yield PythonToken(INDENT, '', spos, '')
509 indents.append(indent_start)
510 yield from dedent_if_necessary(indent_start)
511
512 if not pseudomatch: # scan for tokens
513 match = whitespace.match(line, pos)
514 if new_line and paren_level == 0 and not fstring_stack:
515 yield from dedent_if_necessary(match.end())
516 pos = match.end()
517 new_line = False
518 yield PythonToken(
519 ERRORTOKEN, line[pos], (lnum, pos),
520 additional_prefix + match.group(0)
521 )
522 additional_prefix = ''
523 pos += 1
524 continue
525
526 if (initial in numchars # ordinary number
527 or (initial == '.' and token != '.' and token != '...')):
528 yield PythonToken(NUMBER, token, spos, prefix)
529 elif pseudomatch.group(3) is not None: # ordinary name
530 if token in always_break_tokens and (fstring_stack or paren_level):
531 fstring_stack[:] = []
532 paren_level = 0
533 # We only want to dedent if the token is on a new line.
534 m = re.match(r'[ \f\t]*$', line[:start])
535 if m is not None:
536 yield from dedent_if_necessary(m.end())
537 if token.isidentifier():
538 yield PythonToken(NAME, token, spos, prefix)
539 else:
540 yield from _split_illegal_unicode_name(token, spos, prefix)
541 elif initial in '\r\n':
542 if any(not f.allow_multiline() for f in fstring_stack):
543 fstring_stack.clear()
544
545 if not new_line and paren_level == 0 and not fstring_stack:
546 yield PythonToken(NEWLINE, token, spos, prefix)
547 else:
548 additional_prefix = prefix + token
549 new_line = True
550 elif initial == '#': # Comments
551 assert not token.endswith("\n") and not token.endswith("\r")
552 if fstring_stack and fstring_stack[-1].is_in_expr():
553 # `#` is not allowed in f-string expressions
554 yield PythonToken(ERRORTOKEN, initial, spos, prefix)
555 pos = start + 1
556 else:
557 additional_prefix = prefix + token
558 elif token in triple_quoted:
559 endprog = endpats[token]
560 endmatch = endprog.match(line, pos)
561 if endmatch: # all on one line
562 pos = endmatch.end(0)
563 token = line[start:pos]
564 yield PythonToken(STRING, token, spos, prefix)
565 else:
566 contstr_start = spos # multiple lines
567 contstr = line[start:]
568 contline = line
569 break
570
571 # Check up to the first 3 chars of the token to see if
572 # they're in the single_quoted set. If so, they start
573 # a string.
574 # We're using the first 3, because we're looking for
575 # "rb'" (for example) at the start of the token. If
576 # we switch to longer prefixes, this needs to be
577 # adjusted.
578 # Note that initial == token[:1].
579 # Also note that single quote checking must come after
580 # triple quote checking (above).
581 elif initial in single_quoted or \
582 token[:2] in single_quoted or \
583 token[:3] in single_quoted:
584 if token[-1] in '\r\n': # continued string
585 # This means that a single quoted string ends with a
586 # backslash and is continued.
587 contstr_start = lnum, start
588 endprog = (endpats.get(initial) or endpats.get(token[1])
589 or endpats.get(token[2]))
590 contstr = line[start:]
591 contline = line
592 break
593 else: # ordinary string
594 yield PythonToken(STRING, token, spos, prefix)
595 elif token in fstring_pattern_map: # The start of an fstring.
596 fstring_stack.append(FStringNode(fstring_pattern_map[token]))
597 yield PythonToken(FSTRING_START, token, spos, prefix)
598 elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
599 additional_prefix += prefix + line[start:]
600 break
601 else:
602 if token in '([{':
603 if fstring_stack:
604 fstring_stack[-1].open_parentheses(token)
605 else:
606 paren_level += 1
607 elif token in ')]}':
608 if fstring_stack:
609 fstring_stack[-1].close_parentheses(token)
610 else:
611 if paren_level:
612 paren_level -= 1
613 elif token.startswith(':') and fstring_stack \
614 and fstring_stack[-1].parentheses_count \
615 - fstring_stack[-1].format_spec_count == 1:
616 # `:` and `:=` both count
617 fstring_stack[-1].format_spec_count += 1
618 token = ':'
619 pos = start + 1
620
621 yield PythonToken(OP, token, spos, prefix)
622
623 if contstr:
624 yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
625 if contstr.endswith('\n') or contstr.endswith('\r'):
626 new_line = True
627
628 if fstring_stack:
629 tos = fstring_stack[-1]
630 if tos.previous_lines:
631 yield PythonToken(
632 FSTRING_STRING, tos.previous_lines,
633 tos.last_string_start_pos,
634 # Never has a prefix because it can start anywhere and
635 # include whitespace.
636 prefix=''
637 )
638
639 end_pos = lnum, max_
640 # As the last position we just take the maximally possible position. We
641 # remove -1 for the last new line.
642 for indent in indents[1:]:
643 indents.pop()
644 yield PythonToken(DEDENT, '', end_pos, '')
645 yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
646
647
648 def _split_illegal_unicode_name(token, start_pos, prefix):
649 def create_token():
650 return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
651
652 found = ''
653 is_illegal = False
654 pos = start_pos
655 for i, char in enumerate(token):
656 if is_illegal:
657 if char.isidentifier():
658 yield create_token()
659 found = char
660 is_illegal = False
661 prefix = ''
662 pos = start_pos[0], start_pos[1] + i
663 else:
664 found += char
665 else:
666 new_found = found + char
667 if new_found.isidentifier():
668 found = new_found
669 else:
670 if found:
671 yield create_token()
672 prefix = ''
673 pos = start_pos[0], start_pos[1] + i
674 found = char
675 is_illegal = True
676
677 if found:
678 yield create_token()
679
680
681 if __name__ == "__main__":
682 path = sys.argv[1]
683 with open(path) as f:
684 code = f.read()
685
686 for token in tokenize(code, version_info=parse_version_string('3.10')):
687 print(token)