]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | """ |
2 | Docstrings are another source of information for functions and classes. | |
3 | :mod:`jedi.inference.dynamic_params` tries to find all executions of functions, | |
4 | while the docstring parsing is much easier. There are three different types of | |
5 | docstrings that |jedi| understands: | |
6 | ||
7 | - `Sphinx <http://sphinx-doc.org/markup/desc.html#info-field-lists>`_ | |
8 | - `Epydoc <http://epydoc.sourceforge.net/manual-fields.html>`_ | |
9 | - `Numpydoc <https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt>`_ | |
10 | ||
11 | For example, the sphinx annotation ``:type foo: str`` clearly states that the | |
12 | type of ``foo`` is ``str``. | |
13 | ||
14 | As an addition to parameter searching, this module also provides return | |
15 | annotations. | |
16 | """ | |
17 | ||
18 | import re | |
19 | import warnings | |
20 | ||
21 | from parso import parse, ParserSyntaxError | |
22 | ||
23 | from jedi import debug | |
24 | from jedi.inference.cache import inference_state_method_cache | |
25 | from jedi.inference.base_value import iterator_to_value_set, ValueSet, \ | |
26 | NO_VALUES | |
27 | from jedi.inference.lazy_value import LazyKnownValues | |
28 | ||
29 | ||
30 | DOCSTRING_PARAM_PATTERNS = [ | |
31 | r'\s*:type\s+%s:\s*([^\n]+)', # Sphinx | |
32 | r'\s*:param\s+(\w+)\s+%s:[^\n]*', # Sphinx param with type | |
33 | r'\s*@type\s+%s:\s*([^\n]+)', # Epydoc | |
34 | ] | |
35 | ||
36 | DOCSTRING_RETURN_PATTERNS = [ | |
37 | re.compile(r'\s*:rtype:\s*([^\n]+)', re.M), # Sphinx | |
38 | re.compile(r'\s*@rtype:\s*([^\n]+)', re.M), # Epydoc | |
39 | ] | |
40 | ||
41 | REST_ROLE_PATTERN = re.compile(r':[^`]+:`([^`]+)`') | |
42 | ||
43 | ||
44 | _numpy_doc_string_cache = None | |
45 | ||
46 | ||
47 | def _get_numpy_doc_string_cls(): | |
48 | global _numpy_doc_string_cache | |
49 | if isinstance(_numpy_doc_string_cache, (ImportError, SyntaxError)): | |
50 | raise _numpy_doc_string_cache | |
51 | from numpydoc.docscrape import NumpyDocString # type: ignore[import] | |
52 | _numpy_doc_string_cache = NumpyDocString | |
53 | return _numpy_doc_string_cache | |
54 | ||
55 | ||
56 | def _search_param_in_numpydocstr(docstr, param_str): | |
57 | """Search `docstr` (in numpydoc format) for type(-s) of `param_str`.""" | |
58 | with warnings.catch_warnings(): | |
59 | warnings.simplefilter("ignore") | |
60 | try: | |
61 | # This is a non-public API. If it ever changes we should be | |
62 | # prepared and return gracefully. | |
63 | params = _get_numpy_doc_string_cls()(docstr)._parsed_data['Parameters'] | |
64 | except Exception: | |
65 | return [] | |
66 | for p_name, p_type, p_descr in params: | |
67 | if p_name == param_str: | |
68 | m = re.match(r'([^,]+(,[^,]+)*?)(,[ ]*optional)?$', p_type) | |
69 | if m: | |
70 | p_type = m.group(1) | |
71 | return list(_expand_typestr(p_type)) | |
72 | return [] | |
73 | ||
74 | ||
75 | def _search_return_in_numpydocstr(docstr): | |
76 | """ | |
77 | Search `docstr` (in numpydoc format) for type(-s) of function returns. | |
78 | """ | |
79 | with warnings.catch_warnings(): | |
80 | warnings.simplefilter("ignore") | |
81 | try: | |
82 | doc = _get_numpy_doc_string_cls()(docstr) | |
83 | except Exception: | |
84 | return | |
85 | try: | |
86 | # This is a non-public API. If it ever changes we should be | |
87 | # prepared and return gracefully. | |
88 | returns = doc._parsed_data['Returns'] | |
89 | returns += doc._parsed_data['Yields'] | |
90 | except Exception: | |
91 | return | |
92 | for r_name, r_type, r_descr in returns: | |
93 | # Return names are optional and if so the type is in the name | |
94 | if not r_type: | |
95 | r_type = r_name | |
96 | yield from _expand_typestr(r_type) | |
97 | ||
98 | ||
99 | def _expand_typestr(type_str): | |
100 | """ | |
101 | Attempts to interpret the possible types in `type_str` | |
102 | """ | |
103 | # Check if alternative types are specified with 'or' | |
104 | if re.search(r'\bor\b', type_str): | |
105 | for t in type_str.split('or'): | |
106 | yield t.split('of')[0].strip() | |
107 | # Check if like "list of `type`" and set type to list | |
108 | elif re.search(r'\bof\b', type_str): | |
109 | yield type_str.split('of')[0] | |
110 | # Check if type has is a set of valid literal values eg: {'C', 'F', 'A'} | |
111 | elif type_str.startswith('{'): | |
112 | node = parse(type_str, version='3.7').children[0] | |
113 | if node.type == 'atom': | |
114 | for leaf in getattr(node.children[1], "children", []): | |
115 | if leaf.type == 'number': | |
116 | if '.' in leaf.value: | |
117 | yield 'float' | |
118 | else: | |
119 | yield 'int' | |
120 | elif leaf.type == 'string': | |
121 | if 'b' in leaf.string_prefix.lower(): | |
122 | yield 'bytes' | |
123 | else: | |
124 | yield 'str' | |
125 | # Ignore everything else. | |
126 | ||
127 | # Otherwise just work with what we have. | |
128 | else: | |
129 | yield type_str | |
130 | ||
131 | ||
132 | def _search_param_in_docstr(docstr, param_str): | |
133 | """ | |
134 | Search `docstr` for type(-s) of `param_str`. | |
135 | ||
136 | >>> _search_param_in_docstr(':type param: int', 'param') | |
137 | ['int'] | |
138 | >>> _search_param_in_docstr('@type param: int', 'param') | |
139 | ['int'] | |
140 | >>> _search_param_in_docstr( | |
141 | ... ':type param: :class:`threading.Thread`', 'param') | |
142 | ['threading.Thread'] | |
143 | >>> bool(_search_param_in_docstr('no document', 'param')) | |
144 | False | |
145 | >>> _search_param_in_docstr(':param int param: some description', 'param') | |
146 | ['int'] | |
147 | ||
148 | """ | |
149 | # look at #40 to see definitions of those params | |
150 | patterns = [re.compile(p % re.escape(param_str)) | |
151 | for p in DOCSTRING_PARAM_PATTERNS] | |
152 | for pattern in patterns: | |
153 | match = pattern.search(docstr) | |
154 | if match: | |
155 | return [_strip_rst_role(match.group(1))] | |
156 | ||
157 | return _search_param_in_numpydocstr(docstr, param_str) | |
158 | ||
159 | ||
160 | def _strip_rst_role(type_str): | |
161 | """ | |
162 | Strip off the part looks like a ReST role in `type_str`. | |
163 | ||
164 | >>> _strip_rst_role(':class:`ClassName`') # strip off :class: | |
165 | 'ClassName' | |
166 | >>> _strip_rst_role(':py:obj:`module.Object`') # works with domain | |
167 | 'module.Object' | |
168 | >>> _strip_rst_role('ClassName') # do nothing when not ReST role | |
169 | 'ClassName' | |
170 | ||
171 | See also: | |
172 | http://sphinx-doc.org/domains.html#cross-referencing-python-objects | |
173 | ||
174 | """ | |
175 | match = REST_ROLE_PATTERN.match(type_str) | |
176 | if match: | |
177 | return match.group(1) | |
178 | else: | |
179 | return type_str | |
180 | ||
181 | ||
182 | def _infer_for_statement_string(module_context, string): | |
183 | if string is None: | |
184 | return [] | |
185 | ||
186 | potential_imports = re.findall(r'((?:\w+\.)*\w+)\.', string) | |
187 | # Try to import module part in dotted name. | |
188 | # (e.g., 'threading' in 'threading.Thread'). | |
189 | imports = "\n".join(f"import {p}" for p in potential_imports) | |
190 | string = f'{imports}\n{string}' | |
191 | ||
192 | debug.dbg('Parse docstring code %s', string, color='BLUE') | |
193 | grammar = module_context.inference_state.grammar | |
194 | try: | |
195 | module = grammar.parse(string, error_recovery=False) | |
196 | except ParserSyntaxError: | |
197 | return [] | |
198 | try: | |
199 | # It's not the last item, because that's an end marker. | |
200 | stmt = module.children[-2] | |
201 | except (AttributeError, IndexError): | |
202 | return [] | |
203 | ||
204 | if stmt.type not in ('name', 'atom', 'atom_expr'): | |
205 | return [] | |
206 | ||
207 | # Here we basically use a fake module that also uses the filters in | |
208 | # the actual module. | |
209 | from jedi.inference.docstring_utils import DocstringModule | |
210 | m = DocstringModule( | |
211 | in_module_context=module_context, | |
212 | inference_state=module_context.inference_state, | |
213 | module_node=module, | |
214 | code_lines=[], | |
215 | ) | |
216 | return list(_execute_types_in_stmt(m.as_context(), stmt)) | |
217 | ||
218 | ||
219 | def _execute_types_in_stmt(module_context, stmt): | |
220 | """ | |
221 | Executing all types or general elements that we find in a statement. This | |
222 | doesn't include tuple, list and dict literals, because the stuff they | |
223 | contain is executed. (Used as type information). | |
224 | """ | |
225 | definitions = module_context.infer_node(stmt) | |
226 | return ValueSet.from_sets( | |
227 | _execute_array_values(module_context.inference_state, d) | |
228 | for d in definitions | |
229 | ) | |
230 | ||
231 | ||
232 | def _execute_array_values(inference_state, array): | |
233 | """ | |
234 | Tuples indicate that there's not just one return value, but the listed | |
235 | ones. `(str, int)` means that it returns a tuple with both types. | |
236 | """ | |
237 | from jedi.inference.value.iterable import SequenceLiteralValue, FakeTuple, FakeList | |
238 | if isinstance(array, SequenceLiteralValue) and array.array_type in ('tuple', 'list'): | |
239 | values = [] | |
240 | for lazy_value in array.py__iter__(): | |
241 | objects = ValueSet.from_sets( | |
242 | _execute_array_values(inference_state, typ) | |
243 | for typ in lazy_value.infer() | |
244 | ) | |
245 | values.append(LazyKnownValues(objects)) | |
246 | cls = FakeTuple if array.array_type == 'tuple' else FakeList | |
247 | return {cls(inference_state, values)} | |
248 | else: | |
249 | return array.execute_annotation() | |
250 | ||
251 | ||
252 | @inference_state_method_cache() | |
253 | def infer_param(function_value, param): | |
254 | def infer_docstring(docstring): | |
255 | return ValueSet( | |
256 | p | |
257 | for param_str in _search_param_in_docstr(docstring, param.name.value) | |
258 | for p in _infer_for_statement_string(module_context, param_str) | |
259 | ) | |
260 | module_context = function_value.get_root_context() | |
261 | func = param.get_parent_function() | |
262 | if func.type == 'lambdef': | |
263 | return NO_VALUES | |
264 | ||
265 | types = infer_docstring(function_value.py__doc__()) | |
266 | if function_value.is_bound_method() \ | |
267 | and function_value.py__name__() == '__init__': | |
268 | types |= infer_docstring(function_value.class_context.py__doc__()) | |
269 | ||
270 | debug.dbg('Found param types for docstring: %s', types, color='BLUE') | |
271 | return types | |
272 | ||
273 | ||
274 | @inference_state_method_cache() | |
275 | @iterator_to_value_set | |
276 | def infer_return_types(function_value): | |
277 | def search_return_in_docstr(code): | |
278 | for p in DOCSTRING_RETURN_PATTERNS: | |
279 | match = p.search(code) | |
280 | if match: | |
281 | yield _strip_rst_role(match.group(1)) | |
282 | # Check for numpy style return hint | |
283 | yield from _search_return_in_numpydocstr(code) | |
284 | ||
285 | for type_str in search_return_in_docstr(function_value.py__doc__()): | |
286 | yield from _infer_for_statement_string(function_value.get_root_context(), type_str) |