]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | # Grammar for 2to3. This grammar supports Python 2.x and 3.x. |
2 | ||
3 | # NOTE WELL: You should also follow all the steps listed at | |
4 | # https://devguide.python.org/grammar/ | |
5 | ||
6 | # Start symbols for the grammar: | |
7 | # file_input is a module or sequence of commands read from an input file; | |
8 | # single_input is a single interactive statement; | |
9 | # eval_input is the input for the eval() and input() functions. | |
10 | # NB: compound_stmt in single_input is followed by extra NEWLINE! | |
11 | file_input: (NEWLINE | stmt)* ENDMARKER | |
12 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE | |
13 | eval_input: testlist NEWLINE* ENDMARKER | |
14 | ||
15 | typevar: NAME [':' expr] | |
16 | paramspec: '**' NAME | |
17 | typevartuple: '*' NAME | |
18 | typeparam: typevar | paramspec | typevartuple | |
19 | typeparams: '[' typeparam (',' typeparam)* [','] ']' | |
20 | ||
21 | decorator: '@' namedexpr_test NEWLINE | |
22 | decorators: decorator+ | |
23 | decorated: decorators (classdef | funcdef | async_funcdef) | |
24 | async_funcdef: ASYNC funcdef | |
25 | funcdef: 'def' NAME [typeparams] parameters ['->' test] ':' suite | |
26 | parameters: '(' [typedargslist] ')' | |
27 | ||
28 | # The following definition for typedarglist is equivalent to this set of rules: | |
29 | # | |
30 | # arguments = argument (',' argument)* | |
31 | # argument = tfpdef ['=' test] | |
32 | # kwargs = '**' tname [','] | |
33 | # args = '*' [tname_star] | |
34 | # kwonly_kwargs = (',' argument)* [',' [kwargs]] | |
35 | # args_kwonly_kwargs = args kwonly_kwargs | kwargs | |
36 | # poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] | |
37 | # typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs | |
38 | # typedarglist = arguments ',' '/' [',' [typedargslist_no_posonly]])|(typedargslist_no_posonly)" | |
39 | # | |
40 | # It needs to be fully expanded to allow our LL(1) parser to work on it. | |
41 | ||
42 | typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [ | |
43 | ',' [((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* | |
44 | [',' ['**' tname [',']]] | '**' tname [',']) | |
45 | | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])] | |
46 | ] | ((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])* | |
47 | [',' ['**' tname [',']]] | '**' tname [',']) | |
48 | | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) | |
49 | ||
50 | tname: NAME [':' test] | |
51 | tname_star: NAME [':' (test|star_expr)] | |
52 | tfpdef: tname | '(' tfplist ')' | |
53 | tfplist: tfpdef (',' tfpdef)* [','] | |
54 | ||
55 | # The following definition for varargslist is equivalent to this set of rules: | |
56 | # | |
57 | # arguments = argument (',' argument )* | |
58 | # argument = vfpdef ['=' test] | |
59 | # kwargs = '**' vname [','] | |
60 | # args = '*' [vname] | |
61 | # kwonly_kwargs = (',' argument )* [',' [kwargs]] | |
62 | # args_kwonly_kwargs = args kwonly_kwargs | kwargs | |
63 | # poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]] | |
64 | # vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs | |
65 | # varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly) | |
66 | # | |
67 | # It needs to be fully expanded to allow our LL(1) parser to work on it. | |
68 | ||
69 | varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ | |
70 | ((vfpdef ['=' test] ',')* ('*' [vname] (',' vname ['=' test])* | |
71 | [',' ['**' vname [',']]] | '**' vname [',']) | |
72 | | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) | |
73 | ]] | ((vfpdef ['=' test] ',')* | |
74 | ('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]]| '**' vname [',']) | |
75 | | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) | |
76 | ||
77 | vname: NAME | |
78 | vfpdef: vname | '(' vfplist ')' | |
79 | vfplist: vfpdef (',' vfpdef)* [','] | |
80 | ||
81 | stmt: simple_stmt | compound_stmt | |
82 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE | |
83 | small_stmt: (type_stmt | expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | | |
84 | import_stmt | global_stmt | exec_stmt | assert_stmt) | |
85 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | | |
86 | ('=' (yield_expr|testlist_star_expr))*) | |
87 | annassign: ':' test ['=' (yield_expr|testlist_star_expr)] | |
88 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] | |
89 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | | |
90 | '<<=' | '>>=' | '**=' | '//=') | |
91 | # For normal and annotated assignments, additional restrictions enforced by the interpreter | |
92 | print_stmt: 'print' ( [ test (',' test)* [','] ] | | |
93 | '>>' test [ (',' test)+ [','] ] ) | |
94 | del_stmt: 'del' exprlist | |
95 | pass_stmt: 'pass' | |
96 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt | |
97 | break_stmt: 'break' | |
98 | continue_stmt: 'continue' | |
99 | return_stmt: 'return' [testlist_star_expr] | |
100 | yield_stmt: yield_expr | |
101 | raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]] | |
102 | import_stmt: import_name | import_from | |
103 | import_name: 'import' dotted_as_names | |
104 | import_from: ('from' ('.'* dotted_name | '.'+) | |
105 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) | |
106 | import_as_name: NAME ['as' NAME] | |
107 | dotted_as_name: dotted_name ['as' NAME] | |
108 | import_as_names: import_as_name (',' import_as_name)* [','] | |
109 | dotted_as_names: dotted_as_name (',' dotted_as_name)* | |
110 | dotted_name: NAME ('.' NAME)* | |
111 | global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* | |
112 | exec_stmt: 'exec' expr ['in' test [',' test]] | |
113 | assert_stmt: 'assert' test [',' test] | |
114 | type_stmt: "type" NAME [typeparams] '=' expr | |
115 | ||
116 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt | |
117 | async_stmt: ASYNC (funcdef | with_stmt | for_stmt) | |
118 | if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] | |
119 | while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] | |
120 | for_stmt: 'for' exprlist 'in' testlist_star_expr ':' suite ['else' ':' suite] | |
121 | try_stmt: ('try' ':' suite | |
122 | ((except_clause ':' suite)+ | |
123 | ['else' ':' suite] | |
124 | ['finally' ':' suite] | | |
125 | 'finally' ':' suite)) | |
126 | with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite | |
127 | ||
128 | # NB compile.c makes sure that the default except clause is last | |
129 | except_clause: 'except' ['*'] [test [(',' | 'as') test]] | |
130 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT | |
131 | ||
132 | # Backward compatibility cruft to support: | |
133 | # [ x for x in lambda: True, lambda: False if x() ] | |
134 | # even while also allowing: | |
135 | # lambda x: 5 if x else 2 | |
136 | # (But not a mix of the two) | |
137 | testlist_safe: old_test [(',' old_test)+ [',']] | |
138 | old_test: or_test | old_lambdef | |
139 | old_lambdef: 'lambda' [varargslist] ':' old_test | |
140 | ||
141 | namedexpr_test: asexpr_test [':=' asexpr_test] | |
142 | ||
143 | # This is actually not a real rule, though since the parser is very | |
144 | # limited in terms of the strategy about match/case rules, we are inserting | |
145 | # a virtual case (<expr> as <expr>) as a valid expression. Unless a better | |
146 | # approach is thought, the only side effect of this seem to be just allowing | |
147 | # more stuff to be parser (which would fail on the ast). | |
148 | asexpr_test: test ['as' test] | |
149 | ||
150 | test: or_test ['if' or_test 'else' test] | lambdef | |
151 | or_test: and_test ('or' and_test)* | |
152 | and_test: not_test ('and' not_test)* | |
153 | not_test: 'not' not_test | comparison | |
154 | comparison: expr (comp_op expr)* | |
155 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' | |
156 | star_expr: '*' expr | |
157 | expr: xor_expr ('|' xor_expr)* | |
158 | xor_expr: and_expr ('^' and_expr)* | |
159 | and_expr: shift_expr ('&' shift_expr)* | |
160 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* | |
161 | arith_expr: term (('+'|'-') term)* | |
162 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* | |
163 | factor: ('+'|'-'|'~') factor | power | |
164 | power: [AWAIT] atom trailer* ['**' factor] | |
165 | atom: ('(' [yield_expr|testlist_gexp] ')' | | |
166 | '[' [listmaker] ']' | | |
167 | '{' [dictsetmaker] '}' | | |
168 | '`' testlist1 '`' | | |
169 | NAME | NUMBER | STRING+ | '.' '.' '.') | |
170 | listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) | |
171 | testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] ) | |
172 | lambdef: 'lambda' [varargslist] ':' test | |
173 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME | |
174 | subscriptlist: (subscript|star_expr) (',' (subscript|star_expr))* [','] | |
175 | subscript: test [':=' test] | [test] ':' [test] [sliceop] | |
176 | sliceop: ':' [test] | |
177 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] | |
178 | testlist: test (',' test)* [','] | |
179 | dictsetmaker: ( ((test ':' asexpr_test | '**' expr) | |
180 | (comp_for | (',' (test ':' asexpr_test | '**' expr))* [','])) | | |
181 | ((test [':=' test] | star_expr) | |
182 | (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) | |
183 | ||
184 | classdef: 'class' NAME [typeparams] ['(' [arglist] ')'] ':' suite | |
185 | ||
186 | arglist: argument (',' argument)* [','] | |
187 | ||
188 | # "test '=' test" is really "keyword '=' test", but we have no such token. | |
189 | # These need to be in a single rule to avoid grammar that is ambiguous | |
190 | # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, | |
191 | # we explicitly match '*' here, too, to give it proper precedence. | |
192 | # Illegal combinations and orderings are blocked in ast.c: | |
193 | # multiple (test comp_for) arguments are blocked; keyword unpackings | |
194 | # that precede iterable unpackings are blocked; etc. | |
195 | argument: ( test [comp_for] | | |
196 | test ':=' test [comp_for] | | |
197 | test 'as' test | | |
198 | test '=' asexpr_test | | |
199 | '**' test | | |
200 | '*' test ) | |
201 | ||
202 | comp_iter: comp_for | comp_if | |
203 | comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] | |
204 | comp_if: 'if' old_test [comp_iter] | |
205 | ||
206 | # As noted above, testlist_safe extends the syntax allowed in list | |
207 | # comprehensions and generators. We can't use it indiscriminately in all | |
208 | # derivations using a comp_for-like pattern because the testlist_safe derivation | |
209 | # contains comma which clashes with trailing comma in arglist. | |
210 | # | |
211 | # This was an issue because the parser would not follow the correct derivation | |
212 | # when parsing syntactically valid Python code. Since testlist_safe was created | |
213 | # specifically to handle list comprehensions and generator expressions enclosed | |
214 | # with parentheses, it's safe to only use it in those. That avoids the issue; we | |
215 | # can parse code like set(x for x in [],). | |
216 | # | |
217 | # The syntax supported by this set of rules is not a valid Python 3 syntax, | |
218 | # hence the prefix "old". | |
219 | # | |
220 | # See https://bugs.python.org/issue27494 | |
221 | old_comp_iter: old_comp_for | old_comp_if | |
222 | old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter] | |
223 | old_comp_if: 'if' old_test [old_comp_iter] | |
224 | ||
225 | testlist1: test (',' test)* | |
226 | ||
227 | # not used in grammar, but may appear in "node" passed from Parser to Compiler | |
228 | encoding_decl: NAME | |
229 | ||
230 | yield_expr: 'yield' [yield_arg] | |
231 | yield_arg: 'from' test | testlist_star_expr | |
232 | ||
233 | ||
234 | # 3.10 match statement definition | |
235 | ||
236 | # PS: normally the grammar is much much more restricted, but | |
237 | # at this moment for not trying to bother much with encoding the | |
238 | # exact same DSL in a LL(1) parser, we will just accept an expression | |
239 | # and let the ast.parse() step of the safe mode to reject invalid | |
240 | # grammar. | |
241 | ||
242 | # The reason why it is more restricted is that, patterns are some | |
243 | # sort of a DSL (more advanced than our LHS on assignments, but | |
244 | # still in a very limited python subset). They are not really | |
245 | # expressions, but who cares. If we can parse them, that is enough | |
246 | # to reformat them. | |
247 | ||
248 | match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT | |
249 | ||
250 | # This is more permissive than the actual version. For example it | |
251 | # accepts `match *something:`, even though single-item starred expressions | |
252 | # are forbidden. | |
253 | subject_expr: (namedexpr_test|star_expr) (',' (namedexpr_test|star_expr))* [','] | |
254 | ||
255 | # cases | |
256 | case_block: "case" patterns [guard] ':' suite | |
257 | guard: 'if' namedexpr_test | |
258 | patterns: pattern (',' pattern)* [','] | |
259 | pattern: (expr|star_expr) ['as' expr] |