3 from typing
import Generic
, TypeVar
, Union
, Dict
, Optional
, Any
4 from pathlib
import Path
6 from parso
._compatibility
import is_pypy
7 from parso
.pgen2
import generate_grammar
8 from parso
.utils
import split_lines
, python_bytes_to_unicode
, \
9 PythonVersionInfo
, parse_version_string
10 from parso
.python
.diff
import DiffParser
11 from parso
.python
.tokenize
import tokenize_lines
, tokenize
12 from parso
.python
.token
import PythonTokenTypes
13 from parso
.cache
import parser_cache
, load_module
, try_to_save_module
14 from parso
.parser
import BaseParser
15 from parso
.python
.parser
import Parser
as PythonParser
16 from parso
.python
.errors
import ErrorFinderConfig
17 from parso
.python
import pep8
18 from parso
.file_io
import FileIO
, KnownContentFileIO
19 from parso
.normalizer
import RefactoringNormalizer
, NormalizerConfig
21 _loaded_grammars
: Dict
[str, 'Grammar'] = {}
23 _NodeT
= TypeVar("_NodeT")
26 class Grammar(Generic
[_NodeT
]):
28 :py:func:`parso.load_grammar` returns instances of this class.
30 Creating custom none-python grammars by calling this is not supported, yet.
32 :param text: A BNF representation of your grammar.
34 _start_nonterminal
: str
35 _error_normalizer_config
: Optional
[ErrorFinderConfig
] = None
36 _token_namespace
: Any
= None
37 _default_normalizer_config
: NormalizerConfig
= pep8
.PEP8NormalizerConfig()
39 def __init__(self
, text
: str, *, tokenizer
, parser
=BaseParser
, diff_parser
=None):
40 self
._pgen
_grammar
= generate_grammar(
42 token_namespace
=self
._get
_token
_namespace
()
45 self
._tokenizer
= tokenizer
46 self
._diff
_parser
= diff_parser
47 self
._hashed
= hashlib
.sha256(text
.encode("utf-8")).hexdigest()
50 code
: Union
[str, bytes
] = None,
53 path
: Union
[os
.PathLike
, str] = None,
54 start_symbol
: str = None,
57 cache_path
: Union
[os
.PathLike
, str] = None,
58 file_io
: FileIO
= None) -> _NodeT
:
60 If you want to parse a Python file you want to start here, most likely.
62 If you need finer grained control over the parsed instance, there will be
63 other ways to access it.
65 :param str code: A unicode or bytes string. When it's not possible to
66 decode bytes to a string, returns a
67 :py:class:`UnicodeDecodeError`.
68 :param bool error_recovery: If enabled, any code will be returned. If
69 it is invalid, it will be returned as an error node. If disabled,
70 you will get a ParseError when encountering syntax errors in your
72 :param str start_symbol: The grammar rule (nonterminal) that you want
73 to parse. Only allowed to be used when error_recovery is False.
74 :param str path: The path to the file you want to open. Only needed for caching.
75 :param bool cache: Keeps a copy of the parser tree in RAM and on disk
76 if a path is given. Returns the cached trees if the corresponding
77 files on disk have not changed. Note that this stores pickle files
78 on your file system (e.g. for Linux in ``~/.cache/parso/``).
79 :param bool diff_cache: Diffs the cached python module against the new
80 code and tries to parse only the parts that have changed. Returns
81 the same (changed) module that is found in cache. Using this option
82 requires you to not do anything anymore with the cached modules
83 under that path, because the contents of it might change. This
84 option is still somewhat experimental. If you want stability,
86 :param bool cache_path: If given saves the parso cache in this
87 directory. If not given, defaults to the default cache places on
90 :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
91 :py:class:`parso.python.tree.Module`.
93 if code
is None and path
is None and file_io
is None:
94 raise TypeError("Please provide either code or a path.")
96 if isinstance(path
, str):
98 if isinstance(cache_path
, str):
99 cache_path
= Path(cache_path
)
101 if start_symbol
is None:
102 start_symbol
= self
._start
_nonterminal
104 if error_recovery
and start_symbol
!= 'file_input':
105 raise NotImplementedError("This is currently not implemented.")
109 file_io
= FileIO(path
) # type: ignore
111 file_io
= KnownContentFileIO(path
, code
)
113 if cache
and file_io
.path
is not None:
114 module_node
= load_module(self
._hashed
, file_io
, cache_path
=cache_path
)
115 if module_node
is not None:
116 return module_node
# type: ignore
119 code
= file_io
.read()
120 code
= python_bytes_to_unicode(code
)
122 lines
= split_lines(code
, keepends
=True)
124 if self
._diff
_parser
is None:
125 raise TypeError("You have to define a diff parser to be able "
126 "to use this option.")
128 module_cache_item
= parser_cache
[self
._hashed
][file_io
.path
]
132 module_node
= module_cache_item
.node
133 old_lines
= module_cache_item
.lines
134 if old_lines
== lines
:
135 return module_node
# type: ignore
137 new_node
= self
._diff
_parser
(
138 self
._pgen
_grammar
, self
._tokenizer
, module_node
143 try_to_save_module(self
._hashed
, file_io
, new_node
, lines
,
144 # Never pickle in pypy, it's slow as hell.
145 pickling
=cache
and not is_pypy
,
146 cache_path
=cache_path
)
147 return new_node
# type: ignore
149 tokens
= self
._tokenizer
(lines
)
153 error_recovery
=error_recovery
,
154 start_nonterminal
=start_symbol
156 root_node
= p
.parse(tokens
=tokens
)
158 if cache
or diff_cache
:
159 try_to_save_module(self
._hashed
, file_io
, root_node
, lines
,
160 # Never pickle in pypy, it's slow as hell.
161 pickling
=cache
and not is_pypy
,
162 cache_path
=cache_path
)
163 return root_node
# type: ignore
165 def _get_token_namespace(self
):
166 ns
= self
._token
_namespace
168 raise ValueError("The token namespace should be set.")
171 def iter_errors(self
, node
):
173 Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of
174 :py:class:`parso.normalizer.Issue` objects. For Python this is
175 a list of syntax/indentation errors.
177 if self
._error
_normalizer
_config
is None:
178 raise ValueError("No error normalizer specified for this grammar.")
180 return self
._get
_normalizer
_issues
(node
, self
._error
_normalizer
_config
)
182 def refactor(self
, base_node
, node_to_str_map
):
183 return RefactoringNormalizer(node_to_str_map
).walk(base_node
)
185 def _get_normalizer(self
, normalizer_config
):
186 if normalizer_config
is None:
187 normalizer_config
= self
._default
_normalizer
_config
188 if normalizer_config
is None:
189 raise ValueError("You need to specify a normalizer, because "
190 "there's no default normalizer for this tree.")
191 return normalizer_config
.create_normalizer(self
)
193 def _normalize(self
, node
, normalizer_config
=None):
195 TODO this is not public, yet.
196 The returned code will be normalized, e.g. PEP8 for Python.
198 normalizer
= self
._get
_normalizer
(normalizer_config
)
199 return normalizer
.walk(node
)
201 def _get_normalizer_issues(self
, node
, normalizer_config
=None):
202 normalizer
= self
._get
_normalizer
(normalizer_config
)
203 normalizer
.walk(node
)
204 return normalizer
.issues
207 nonterminals
= self
._pgen
_grammar
.nonterminal_to_dfas
.keys()
208 txt
= ' '.join(list(nonterminals
)[:3]) + ' ...'
209 return '<%s:%s>' % (self
.__class
__.__name
__, txt
)
212 class PythonGrammar(Grammar
):
213 _error_normalizer_config
= ErrorFinderConfig()
214 _token_namespace
= PythonTokenTypes
215 _start_nonterminal
= 'file_input'
217 def __init__(self
, version_info
: PythonVersionInfo
, bnf_text
: str):
220 tokenizer
=self
._tokenize
_lines
,
222 diff_parser
=DiffParser
224 self
.version_info
= version_info
226 def _tokenize_lines(self
, lines
, **kwargs
):
227 return tokenize_lines(lines
, version_info
=self
.version_info
, **kwargs
)
229 def _tokenize(self
, code
):
231 return tokenize(code
, version_info
=self
.version_info
)
234 def load_grammar(*, version
: str = None, path
: str = None):
236 Loads a :py:class:`parso.Grammar`. The default version is the current Python
239 :param str version: A python version string, e.g. ``version='3.8'``.
240 :param str path: A path to a grammar file
242 version_info
= parse_version_string(version
)
244 file = path
or os
.path
.join(
246 'grammar%s%s.txt' % (version_info
.major
, version_info
.minor
)
249 global _loaded_grammars
250 path
= os
.path
.join(os
.path
.dirname(__file__
), file)
252 return _loaded_grammars
[path
]
255 with
open(path
) as f
:
258 grammar
= PythonGrammar(version_info
, bnf_text
)
259 return _loaded_grammars
.setdefault(path
, grammar
)
260 except FileNotFoundError
:
261 message
= "Python version %s.%s is currently not supported." % (
262 version_info
.major
, version_info
.minor
264 raise NotImplementedError(message
)