]>
Commit | Line | Data |
---|---|---|
1 | # Copyright 2022 Bill Wendling, All Rights Reserved. | |
2 | # | |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | """Simple Python Parser | |
15 | ||
16 | Parse Python code into a list of logical lines, represented by LogicalLine | |
17 | objects. This uses Python's tokenizer to generate the tokens. As such, YAPF must | |
18 | be run with the appropriate Python version---Python >=3.7 for Python 3.7 code, | |
19 | Python >=3.8 for Python 3.8 code, etc. | |
20 | ||
21 | This parser uses Python's native "tokenizer" module to generate a list of tokens | |
22 | for the source code. It then uses Python's native "ast" module to assign | |
23 | subtypes, calculate split penalties, etc. | |
24 | ||
25 | A "logical line" produced by Python's "tokenizer" module ends with a | |
26 | tokenize.NEWLINE, rather than a tokenize.NL, making it easy to separate them | |
27 | out. Comments all end with a tokentizer.NL, so we need to make sure we don't | |
28 | errantly pick up non-comment tokens when parsing comment blocks. | |
29 | ||
30 | ParseCode(): parse the code producing a list of logical lines. | |
31 | """ | |
32 | ||
33 | # TODO: Call from yapf_api.FormatCode. | |
34 | ||
35 | import ast | |
36 | import codecs | |
37 | import os | |
38 | import token | |
39 | import tokenize | |
40 | from io import StringIO | |
41 | from tokenize import TokenInfo | |
42 | ||
43 | from yapf.pyparser import split_penalty_visitor | |
44 | from yapf.yapflib import format_token | |
45 | from yapf.yapflib import logical_line | |
46 | ||
47 | CONTINUATION = token.N_TOKENS | |
48 | ||
49 | ||
50 | def ParseCode(unformatted_source, filename='<unknown>'): | |
51 | """Parse a string of Python code into logical lines. | |
52 | ||
53 | This provides an alternative entry point to YAPF. | |
54 | ||
55 | Arguments: | |
56 | unformatted_source: (unicode) The code to format. | |
57 | filename: (unicode) The name of the file being reformatted. | |
58 | ||
59 | Returns: | |
60 | A list of LogicalLines. | |
61 | ||
62 | Raises: | |
63 | An exception is raised if there's an error during AST parsing. | |
64 | """ | |
65 | if not unformatted_source.endswith(os.linesep): | |
66 | unformatted_source += os.linesep | |
67 | ||
68 | try: | |
69 | ast_tree = ast.parse(unformatted_source, filename) | |
70 | ast.fix_missing_locations(ast_tree) | |
71 | readline = StringIO(unformatted_source).readline | |
72 | tokens = tokenize.generate_tokens(readline) | |
73 | except Exception: | |
74 | raise | |
75 | ||
76 | logical_lines = _CreateLogicalLines(tokens) | |
77 | ||
78 | # Process the logical lines. | |
79 | split_penalty_visitor.SplitPenalty(logical_lines).visit(ast_tree) | |
80 | ||
81 | return logical_lines | |
82 | ||
83 | ||
84 | def _CreateLogicalLines(tokens): | |
85 | """Separate tokens into logical lines. | |
86 | ||
87 | Arguments: | |
88 | tokens: (list of tokenizer.TokenInfo) Tokens generated by tokenizer. | |
89 | ||
90 | Returns: | |
91 | A list of LogicalLines. | |
92 | """ | |
93 | formatted_tokens = [] | |
94 | ||
95 | # Convert tokens into "TokenInfo" and add tokens for continuation markers. | |
96 | prev_tok = None | |
97 | for tok in tokens: | |
98 | tok = TokenInfo(*tok) | |
99 | ||
100 | if (prev_tok and prev_tok.line.rstrip().endswith('\\') and | |
101 | prev_tok.start[0] < tok.start[0]): | |
102 | ctok = TokenInfo( | |
103 | type=CONTINUATION, | |
104 | string='\\', | |
105 | start=(prev_tok.start[0], prev_tok.start[1] + 1), | |
106 | end=(prev_tok.end[0], prev_tok.end[0] + 2), | |
107 | line=prev_tok.line) | |
108 | ctok.lineno = ctok.start[0] | |
109 | ctok.column = ctok.start[1] | |
110 | ctok.value = '\\' | |
111 | formatted_tokens.append(format_token.FormatToken(ctok, 'CONTINUATION')) | |
112 | ||
113 | tok.lineno = tok.start[0] | |
114 | tok.column = tok.start[1] | |
115 | tok.value = tok.string | |
116 | formatted_tokens.append( | |
117 | format_token.FormatToken(tok, token.tok_name[tok.type])) | |
118 | prev_tok = tok | |
119 | ||
120 | # Generate logical lines. | |
121 | logical_lines, cur_logical_line = [], [] | |
122 | depth = 0 | |
123 | for tok in formatted_tokens: | |
124 | if tok.type == tokenize.ENDMARKER: | |
125 | break | |
126 | ||
127 | if tok.type == tokenize.NEWLINE: | |
128 | # End of a logical line. | |
129 | logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line)) | |
130 | cur_logical_line = [] | |
131 | elif tok.type == tokenize.INDENT: | |
132 | depth += 1 | |
133 | elif tok.type == tokenize.DEDENT: | |
134 | depth -= 1 | |
135 | elif tok.type == tokenize.NL: | |
136 | pass | |
137 | else: | |
138 | if (cur_logical_line and not tok.type == tokenize.COMMENT and | |
139 | cur_logical_line[0].type == tokenize.COMMENT): | |
140 | # We were parsing a comment block, but now we have real code to worry | |
141 | # about. Store the comment and carry on. | |
142 | logical_lines.append(logical_line.LogicalLine(depth, cur_logical_line)) | |
143 | cur_logical_line = [] | |
144 | ||
145 | cur_logical_line.append(tok) | |
146 | ||
147 | # Link the FormatTokens in each line together to form a doubly linked list. | |
148 | for line in logical_lines: | |
149 | previous = line.first | |
150 | bracket_stack = [previous] if previous.OpensScope() else [] | |
151 | for tok in line.tokens[1:]: | |
152 | tok.previous_token = previous | |
153 | previous.next_token = tok | |
154 | previous = tok | |
155 | ||
156 | # Set up the "matching_bracket" attribute. | |
157 | if tok.OpensScope(): | |
158 | bracket_stack.append(tok) | |
159 | elif tok.ClosesScope(): | |
160 | bracket_stack[-1].matching_bracket = tok | |
161 | tok.matching_bracket = bracket_stack.pop() | |
162 | ||
163 | return logical_lines |