]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | import hashlib |
2 | import os | |
3 | from typing import Generic, TypeVar, Union, Dict, Optional, Any | |
4 | from pathlib import Path | |
5 | ||
6 | from parso._compatibility import is_pypy | |
7 | from parso.pgen2 import generate_grammar | |
8 | from parso.utils import split_lines, python_bytes_to_unicode, \ | |
9 | PythonVersionInfo, parse_version_string | |
10 | from parso.python.diff import DiffParser | |
11 | from parso.python.tokenize import tokenize_lines, tokenize | |
12 | from parso.python.token import PythonTokenTypes | |
13 | from parso.cache import parser_cache, load_module, try_to_save_module | |
14 | from parso.parser import BaseParser | |
15 | from parso.python.parser import Parser as PythonParser | |
16 | from parso.python.errors import ErrorFinderConfig | |
17 | from parso.python import pep8 | |
18 | from parso.file_io import FileIO, KnownContentFileIO | |
19 | from parso.normalizer import RefactoringNormalizer, NormalizerConfig | |
20 | ||
21 | _loaded_grammars: Dict[str, 'Grammar'] = {} | |
22 | ||
23 | _NodeT = TypeVar("_NodeT") | |
24 | ||
25 | ||
26 | class Grammar(Generic[_NodeT]): | |
27 | """ | |
28 | :py:func:`parso.load_grammar` returns instances of this class. | |
29 | ||
30 | Creating custom none-python grammars by calling this is not supported, yet. | |
31 | ||
32 | :param text: A BNF representation of your grammar. | |
33 | """ | |
34 | _start_nonterminal: str | |
35 | _error_normalizer_config: Optional[ErrorFinderConfig] = None | |
36 | _token_namespace: Any = None | |
37 | _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig() | |
38 | ||
39 | def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None): | |
40 | self._pgen_grammar = generate_grammar( | |
41 | text, | |
42 | token_namespace=self._get_token_namespace() | |
43 | ) | |
44 | self._parser = parser | |
45 | self._tokenizer = tokenizer | |
46 | self._diff_parser = diff_parser | |
47 | self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() | |
48 | ||
49 | def parse(self, | |
50 | code: Union[str, bytes] = None, | |
51 | *, | |
52 | error_recovery=True, | |
53 | path: Union[os.PathLike, str] = None, | |
54 | start_symbol: str = None, | |
55 | cache=False, | |
56 | diff_cache=False, | |
57 | cache_path: Union[os.PathLike, str] = None, | |
58 | file_io: FileIO = None) -> _NodeT: | |
59 | """ | |
60 | If you want to parse a Python file you want to start here, most likely. | |
61 | ||
62 | If you need finer grained control over the parsed instance, there will be | |
63 | other ways to access it. | |
64 | ||
65 | :param str code: A unicode or bytes string. When it's not possible to | |
66 | decode bytes to a string, returns a | |
67 | :py:class:`UnicodeDecodeError`. | |
68 | :param bool error_recovery: If enabled, any code will be returned. If | |
69 | it is invalid, it will be returned as an error node. If disabled, | |
70 | you will get a ParseError when encountering syntax errors in your | |
71 | code. | |
72 | :param str start_symbol: The grammar rule (nonterminal) that you want | |
73 | to parse. Only allowed to be used when error_recovery is False. | |
74 | :param str path: The path to the file you want to open. Only needed for caching. | |
75 | :param bool cache: Keeps a copy of the parser tree in RAM and on disk | |
76 | if a path is given. Returns the cached trees if the corresponding | |
77 | files on disk have not changed. Note that this stores pickle files | |
78 | on your file system (e.g. for Linux in ``~/.cache/parso/``). | |
79 | :param bool diff_cache: Diffs the cached python module against the new | |
80 | code and tries to parse only the parts that have changed. Returns | |
81 | the same (changed) module that is found in cache. Using this option | |
82 | requires you to not do anything anymore with the cached modules | |
83 | under that path, because the contents of it might change. This | |
84 | option is still somewhat experimental. If you want stability, | |
85 | please don't use it. | |
86 | :param bool cache_path: If given saves the parso cache in this | |
87 | directory. If not given, defaults to the default cache places on | |
88 | each platform. | |
89 | ||
90 | :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a | |
91 | :py:class:`parso.python.tree.Module`. | |
92 | """ | |
93 | if code is None and path is None and file_io is None: | |
94 | raise TypeError("Please provide either code or a path.") | |
95 | ||
96 | if isinstance(path, str): | |
97 | path = Path(path) | |
98 | if isinstance(cache_path, str): | |
99 | cache_path = Path(cache_path) | |
100 | ||
101 | if start_symbol is None: | |
102 | start_symbol = self._start_nonterminal | |
103 | ||
104 | if error_recovery and start_symbol != 'file_input': | |
105 | raise NotImplementedError("This is currently not implemented.") | |
106 | ||
107 | if file_io is None: | |
108 | if code is None: | |
109 | file_io = FileIO(path) # type: ignore | |
110 | else: | |
111 | file_io = KnownContentFileIO(path, code) | |
112 | ||
113 | if cache and file_io.path is not None: | |
114 | module_node = load_module(self._hashed, file_io, cache_path=cache_path) | |
115 | if module_node is not None: | |
116 | return module_node # type: ignore | |
117 | ||
118 | if code is None: | |
119 | code = file_io.read() | |
120 | code = python_bytes_to_unicode(code) | |
121 | ||
122 | lines = split_lines(code, keepends=True) | |
123 | if diff_cache: | |
124 | if self._diff_parser is None: | |
125 | raise TypeError("You have to define a diff parser to be able " | |
126 | "to use this option.") | |
127 | try: | |
128 | module_cache_item = parser_cache[self._hashed][file_io.path] | |
129 | except KeyError: | |
130 | pass | |
131 | else: | |
132 | module_node = module_cache_item.node | |
133 | old_lines = module_cache_item.lines | |
134 | if old_lines == lines: | |
135 | return module_node # type: ignore | |
136 | ||
137 | new_node = self._diff_parser( | |
138 | self._pgen_grammar, self._tokenizer, module_node | |
139 | ).update( | |
140 | old_lines=old_lines, | |
141 | new_lines=lines | |
142 | ) | |
143 | try_to_save_module(self._hashed, file_io, new_node, lines, | |
144 | # Never pickle in pypy, it's slow as hell. | |
145 | pickling=cache and not is_pypy, | |
146 | cache_path=cache_path) | |
147 | return new_node # type: ignore | |
148 | ||
149 | tokens = self._tokenizer(lines) | |
150 | ||
151 | p = self._parser( | |
152 | self._pgen_grammar, | |
153 | error_recovery=error_recovery, | |
154 | start_nonterminal=start_symbol | |
155 | ) | |
156 | root_node = p.parse(tokens=tokens) | |
157 | ||
158 | if cache or diff_cache: | |
159 | try_to_save_module(self._hashed, file_io, root_node, lines, | |
160 | # Never pickle in pypy, it's slow as hell. | |
161 | pickling=cache and not is_pypy, | |
162 | cache_path=cache_path) | |
163 | return root_node # type: ignore | |
164 | ||
165 | def _get_token_namespace(self): | |
166 | ns = self._token_namespace | |
167 | if ns is None: | |
168 | raise ValueError("The token namespace should be set.") | |
169 | return ns | |
170 | ||
171 | def iter_errors(self, node): | |
172 | """ | |
173 | Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of | |
174 | :py:class:`parso.normalizer.Issue` objects. For Python this is | |
175 | a list of syntax/indentation errors. | |
176 | """ | |
177 | if self._error_normalizer_config is None: | |
178 | raise ValueError("No error normalizer specified for this grammar.") | |
179 | ||
180 | return self._get_normalizer_issues(node, self._error_normalizer_config) | |
181 | ||
182 | def refactor(self, base_node, node_to_str_map): | |
183 | return RefactoringNormalizer(node_to_str_map).walk(base_node) | |
184 | ||
185 | def _get_normalizer(self, normalizer_config): | |
186 | if normalizer_config is None: | |
187 | normalizer_config = self._default_normalizer_config | |
188 | if normalizer_config is None: | |
189 | raise ValueError("You need to specify a normalizer, because " | |
190 | "there's no default normalizer for this tree.") | |
191 | return normalizer_config.create_normalizer(self) | |
192 | ||
193 | def _normalize(self, node, normalizer_config=None): | |
194 | """ | |
195 | TODO this is not public, yet. | |
196 | The returned code will be normalized, e.g. PEP8 for Python. | |
197 | """ | |
198 | normalizer = self._get_normalizer(normalizer_config) | |
199 | return normalizer.walk(node) | |
200 | ||
201 | def _get_normalizer_issues(self, node, normalizer_config=None): | |
202 | normalizer = self._get_normalizer(normalizer_config) | |
203 | normalizer.walk(node) | |
204 | return normalizer.issues | |
205 | ||
206 | def __repr__(self): | |
207 | nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys() | |
208 | txt = ' '.join(list(nonterminals)[:3]) + ' ...' | |
209 | return '<%s:%s>' % (self.__class__.__name__, txt) | |
210 | ||
211 | ||
212 | class PythonGrammar(Grammar): | |
213 | _error_normalizer_config = ErrorFinderConfig() | |
214 | _token_namespace = PythonTokenTypes | |
215 | _start_nonterminal = 'file_input' | |
216 | ||
217 | def __init__(self, version_info: PythonVersionInfo, bnf_text: str): | |
218 | super().__init__( | |
219 | bnf_text, | |
220 | tokenizer=self._tokenize_lines, | |
221 | parser=PythonParser, | |
222 | diff_parser=DiffParser | |
223 | ) | |
224 | self.version_info = version_info | |
225 | ||
226 | def _tokenize_lines(self, lines, **kwargs): | |
227 | return tokenize_lines(lines, version_info=self.version_info, **kwargs) | |
228 | ||
229 | def _tokenize(self, code): | |
230 | # Used by Jedi. | |
231 | return tokenize(code, version_info=self.version_info) | |
232 | ||
233 | ||
234 | def load_grammar(*, version: str = None, path: str = None): | |
235 | """ | |
236 | Loads a :py:class:`parso.Grammar`. The default version is the current Python | |
237 | version. | |
238 | ||
239 | :param str version: A python version string, e.g. ``version='3.8'``. | |
240 | :param str path: A path to a grammar file | |
241 | """ | |
242 | version_info = parse_version_string(version) | |
243 | ||
244 | file = path or os.path.join( | |
245 | 'python', | |
246 | 'grammar%s%s.txt' % (version_info.major, version_info.minor) | |
247 | ) | |
248 | ||
249 | global _loaded_grammars | |
250 | path = os.path.join(os.path.dirname(__file__), file) | |
251 | try: | |
252 | return _loaded_grammars[path] | |
253 | except KeyError: | |
254 | try: | |
255 | with open(path) as f: | |
256 | bnf_text = f.read() | |
257 | ||
258 | grammar = PythonGrammar(version_info, bnf_text) | |
259 | return _loaded_grammars.setdefault(path, grammar) | |
260 | except FileNotFoundError: | |
261 | message = "Python version %s.%s is currently not supported." % ( | |
262 | version_info.major, version_info.minor | |
263 | ) | |
264 | raise NotImplementedError(message) |