]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | from codecs import BOM_UTF8 | |
3 | from typing import Tuple | |
4 | ||
5 | from parso.python.tokenize import group | |
6 | ||
7 | unicode_bom = BOM_UTF8.decode('utf-8') | |
8 | ||
9 | ||
10 | class PrefixPart: | |
11 | def __init__(self, leaf, typ, value, spacing='', start_pos=None): | |
12 | assert start_pos is not None | |
13 | self.parent = leaf | |
14 | self.type = typ | |
15 | self.value = value | |
16 | self.spacing = spacing | |
17 | self.start_pos: Tuple[int, int] = start_pos | |
18 | ||
19 | @property | |
20 | def end_pos(self) -> Tuple[int, int]: | |
21 | if self.value.endswith('\n') or self.value.endswith('\r'): | |
22 | return self.start_pos[0] + 1, 0 | |
23 | if self.value == unicode_bom: | |
24 | # The bom doesn't have a length at the start of a Python file. | |
25 | return self.start_pos | |
26 | return self.start_pos[0], self.start_pos[1] + len(self.value) | |
27 | ||
28 | def create_spacing_part(self): | |
29 | column = self.start_pos[1] - len(self.spacing) | |
30 | return PrefixPart( | |
31 | self.parent, 'spacing', self.spacing, | |
32 | start_pos=(self.start_pos[0], column) | |
33 | ) | |
34 | ||
35 | def __repr__(self): | |
36 | return '%s(%s, %s, %s)' % ( | |
37 | self.__class__.__name__, | |
38 | self.type, | |
39 | repr(self.value), | |
40 | self.start_pos | |
41 | ) | |
42 | ||
43 | def search_ancestor(self, *node_types): | |
44 | node = self.parent | |
45 | while node is not None: | |
46 | if node.type in node_types: | |
47 | return node | |
48 | node = node.parent | |
49 | return None | |
50 | ||
51 | ||
52 | _comment = r'#[^\n\r\f]*' | |
53 | _backslash = r'\\\r?\n|\\\r' | |
54 | _newline = r'\r?\n|\r' | |
55 | _form_feed = r'\f' | |
56 | _only_spacing = '$' | |
57 | _spacing = r'[ \t]*' | |
58 | _bom = unicode_bom | |
59 | ||
60 | _regex = group( | |
61 | _comment, _backslash, _newline, _form_feed, _only_spacing, _bom, | |
62 | capture=True | |
63 | ) | |
64 | _regex = re.compile(group(_spacing, capture=True) + _regex) | |
65 | ||
66 | ||
67 | _types = { | |
68 | '#': 'comment', | |
69 | '\\': 'backslash', | |
70 | '\f': 'formfeed', | |
71 | '\n': 'newline', | |
72 | '\r': 'newline', | |
73 | unicode_bom: 'bom' | |
74 | } | |
75 | ||
76 | ||
77 | def split_prefix(leaf, start_pos): | |
78 | line, column = start_pos | |
79 | start = 0 | |
80 | value = spacing = '' | |
81 | bom = False | |
82 | while start != len(leaf.prefix): | |
83 | match = _regex.match(leaf.prefix, start) | |
84 | spacing = match.group(1) | |
85 | value = match.group(2) | |
86 | if not value: | |
87 | break | |
88 | type_ = _types[value[0]] | |
89 | yield PrefixPart( | |
90 | leaf, type_, value, spacing, | |
91 | start_pos=(line, column + start - int(bom) + len(spacing)) | |
92 | ) | |
93 | if type_ == 'bom': | |
94 | bom = True | |
95 | ||
96 | start = match.end(0) | |
97 | if value.endswith('\n') or value.endswith('\r'): | |
98 | line += 1 | |
99 | column = -start | |
100 | ||
101 | if value: | |
102 | spacing = '' | |
103 | yield PrefixPart( | |
104 | leaf, 'spacing', spacing, | |
105 | start_pos=(line, column + start) | |
106 | ) |