]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | from parso.python import tree |
2 | from parso.python.token import PythonTokenTypes | |
3 | from parso.parser import BaseParser | |
4 | ||
5 | ||
6 | NAME = PythonTokenTypes.NAME | |
7 | INDENT = PythonTokenTypes.INDENT | |
8 | DEDENT = PythonTokenTypes.DEDENT | |
9 | ||
10 | ||
11 | class Parser(BaseParser): | |
12 | """ | |
13 | This class is used to parse a Python file, it then divides them into a | |
14 | class structure of different scopes. | |
15 | ||
16 | :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. | |
17 | """ | |
18 | ||
19 | node_map = { | |
20 | 'expr_stmt': tree.ExprStmt, | |
21 | 'classdef': tree.Class, | |
22 | 'funcdef': tree.Function, | |
23 | 'file_input': tree.Module, | |
24 | 'import_name': tree.ImportName, | |
25 | 'import_from': tree.ImportFrom, | |
26 | 'break_stmt': tree.KeywordStatement, | |
27 | 'continue_stmt': tree.KeywordStatement, | |
28 | 'return_stmt': tree.ReturnStmt, | |
29 | 'raise_stmt': tree.KeywordStatement, | |
30 | 'yield_expr': tree.YieldExpr, | |
31 | 'del_stmt': tree.KeywordStatement, | |
32 | 'pass_stmt': tree.KeywordStatement, | |
33 | 'global_stmt': tree.GlobalStmt, | |
34 | 'nonlocal_stmt': tree.KeywordStatement, | |
35 | 'print_stmt': tree.KeywordStatement, | |
36 | 'assert_stmt': tree.AssertStmt, | |
37 | 'if_stmt': tree.IfStmt, | |
38 | 'with_stmt': tree.WithStmt, | |
39 | 'for_stmt': tree.ForStmt, | |
40 | 'while_stmt': tree.WhileStmt, | |
41 | 'try_stmt': tree.TryStmt, | |
42 | 'sync_comp_for': tree.SyncCompFor, | |
43 | # Not sure if this is the best idea, but IMO it's the easiest way to | |
44 | # avoid extreme amounts of work around the subtle difference of 2/3 | |
45 | # grammar in list comoprehensions. | |
46 | 'decorator': tree.Decorator, | |
47 | 'lambdef': tree.Lambda, | |
48 | 'lambdef_nocond': tree.Lambda, | |
49 | 'namedexpr_test': tree.NamedExpr, | |
50 | } | |
51 | default_node = tree.PythonNode | |
52 | ||
53 | # Names/Keywords are handled separately | |
54 | _leaf_map = { | |
55 | PythonTokenTypes.STRING: tree.String, | |
56 | PythonTokenTypes.NUMBER: tree.Number, | |
57 | PythonTokenTypes.NEWLINE: tree.Newline, | |
58 | PythonTokenTypes.ENDMARKER: tree.EndMarker, | |
59 | PythonTokenTypes.FSTRING_STRING: tree.FStringString, | |
60 | PythonTokenTypes.FSTRING_START: tree.FStringStart, | |
61 | PythonTokenTypes.FSTRING_END: tree.FStringEnd, | |
62 | } | |
63 | ||
64 | def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): | |
65 | super().__init__(pgen_grammar, start_nonterminal, | |
66 | error_recovery=error_recovery) | |
67 | ||
68 | self.syntax_errors = [] | |
69 | self._omit_dedent_list = [] | |
70 | self._indent_counter = 0 | |
71 | ||
72 | def parse(self, tokens): | |
73 | if self._error_recovery: | |
74 | if self._start_nonterminal != 'file_input': | |
75 | raise NotImplementedError | |
76 | ||
77 | tokens = self._recovery_tokenize(tokens) | |
78 | ||
79 | return super().parse(tokens) | |
80 | ||
81 | def convert_node(self, nonterminal, children): | |
82 | """ | |
83 | Convert raw node information to a PythonBaseNode instance. | |
84 | ||
85 | This is passed to the parser driver which calls it whenever a reduction of a | |
86 | grammar rule produces a new complete node, so that the tree is build | |
87 | strictly bottom-up. | |
88 | """ | |
89 | try: | |
90 | node = self.node_map[nonterminal](children) | |
91 | except KeyError: | |
92 | if nonterminal == 'suite': | |
93 | # We don't want the INDENT/DEDENT in our parser tree. Those | |
94 | # leaves are just cancer. They are virtual leaves and not real | |
95 | # ones and therefore have pseudo start/end positions and no | |
96 | # prefixes. Just ignore them. | |
97 | children = [children[0]] + children[2:-1] | |
98 | node = self.default_node(nonterminal, children) | |
99 | return node | |
100 | ||
101 | def convert_leaf(self, type, value, prefix, start_pos): | |
102 | # print('leaf', repr(value), token.tok_name[type]) | |
103 | if type == NAME: | |
104 | if value in self._pgen_grammar.reserved_syntax_strings: | |
105 | return tree.Keyword(value, start_pos, prefix) | |
106 | else: | |
107 | return tree.Name(value, start_pos, prefix) | |
108 | ||
109 | return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) | |
110 | ||
111 | def error_recovery(self, token): | |
112 | tos_nodes = self.stack[-1].nodes | |
113 | if tos_nodes: | |
114 | last_leaf = tos_nodes[-1].get_last_leaf() | |
115 | else: | |
116 | last_leaf = None | |
117 | ||
118 | if self._start_nonterminal == 'file_input' and \ | |
119 | (token.type == PythonTokenTypes.ENDMARKER | |
120 | or token.type == DEDENT and not last_leaf.value.endswith('\n') | |
121 | and not last_leaf.value.endswith('\r')): | |
122 | # In Python statements need to end with a newline. But since it's | |
123 | # possible (and valid in Python) that there's no newline at the | |
124 | # end of a file, we have to recover even if the user doesn't want | |
125 | # error recovery. | |
126 | if self.stack[-1].dfa.from_rule == 'simple_stmt': | |
127 | try: | |
128 | plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE] | |
129 | except KeyError: | |
130 | pass | |
131 | else: | |
132 | if plan.next_dfa.is_final and not plan.dfa_pushes: | |
133 | # We are ignoring here that the newline would be | |
134 | # required for a simple_stmt. | |
135 | self.stack[-1].dfa = plan.next_dfa | |
136 | self._add_token(token) | |
137 | return | |
138 | ||
139 | if not self._error_recovery: | |
140 | return super().error_recovery(token) | |
141 | ||
142 | def current_suite(stack): | |
143 | # For now just discard everything that is not a suite or | |
144 | # file_input, if we detect an error. | |
145 | for until_index, stack_node in reversed(list(enumerate(stack))): | |
146 | # `suite` can sometimes be only simple_stmt, not stmt. | |
147 | if stack_node.nonterminal == 'file_input': | |
148 | break | |
149 | elif stack_node.nonterminal == 'suite': | |
150 | # In the case where we just have a newline we don't want to | |
151 | # do error recovery here. In all other cases, we want to do | |
152 | # error recovery. | |
153 | if len(stack_node.nodes) != 1: | |
154 | break | |
155 | return until_index | |
156 | ||
157 | until_index = current_suite(self.stack) | |
158 | ||
159 | if self._stack_removal(until_index + 1): | |
160 | self._add_token(token) | |
161 | else: | |
162 | typ, value, start_pos, prefix = token | |
163 | if typ == INDENT: | |
164 | # For every deleted INDENT we have to delete a DEDENT as well. | |
165 | # Otherwise the parser will get into trouble and DEDENT too early. | |
166 | self._omit_dedent_list.append(self._indent_counter) | |
167 | ||
168 | error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) | |
169 | self.stack[-1].nodes.append(error_leaf) | |
170 | ||
171 | tos = self.stack[-1] | |
172 | if tos.nonterminal == 'suite': | |
173 | # Need at least one statement in the suite. This happend with the | |
174 | # error recovery above. | |
175 | try: | |
176 | tos.dfa = tos.dfa.arcs['stmt'] | |
177 | except KeyError: | |
178 | # We're already in a final state. | |
179 | pass | |
180 | ||
181 | def _stack_removal(self, start_index): | |
182 | all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes] | |
183 | ||
184 | if all_nodes: | |
185 | node = tree.PythonErrorNode(all_nodes) | |
186 | self.stack[start_index - 1].nodes.append(node) | |
187 | ||
188 | self.stack[start_index:] = [] | |
189 | return bool(all_nodes) | |
190 | ||
191 | def _recovery_tokenize(self, tokens): | |
192 | for token in tokens: | |
193 | typ = token[0] | |
194 | if typ == DEDENT: | |
195 | # We need to count indents, because if we just omit any DEDENT, | |
196 | # we might omit them in the wrong place. | |
197 | o = self._omit_dedent_list | |
198 | if o and o[-1] == self._indent_counter: | |
199 | o.pop() | |
200 | self._indent_counter -= 1 | |
201 | continue | |
202 | ||
203 | self._indent_counter -= 1 | |
204 | elif typ == INDENT: | |
205 | self._indent_counter += 1 | |
206 | yield token |