]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | import os |
2 | import re | |
3 | ||
4 | from parso import python_bytes_to_unicode | |
5 | ||
6 | from jedi.debug import dbg | |
7 | from jedi.file_io import KnownContentFileIO, FolderIO | |
8 | from jedi.inference.names import SubModuleName | |
9 | from jedi.inference.imports import load_module_from_path | |
10 | from jedi.inference.filters import ParserTreeFilter | |
11 | from jedi.inference.gradual.conversion import convert_names | |
12 | ||
13 | _IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__') | |
14 | ||
15 | _OPENED_FILE_LIMIT = 2000 | |
16 | """ | |
17 | Stats from a 2016 Lenovo Notebook running Linux: | |
18 | With os.walk, it takes about 10s to scan 11'000 files (without filesystem | |
19 | caching). Once cached it only takes 5s. So it is expected that reading all | |
20 | those files might take a few seconds, but not a lot more. | |
21 | """ | |
22 | _PARSED_FILE_LIMIT = 30 | |
23 | """ | |
24 | For now we keep the amount of parsed files really low, since parsing might take | |
25 | easily 100ms for bigger files. | |
26 | """ | |
27 | ||
28 | ||
29 | def _resolve_names(definition_names, avoid_names=()): | |
30 | for name in definition_names: | |
31 | if name in avoid_names: | |
32 | # Avoiding recursions here, because goto on a module name lands | |
33 | # on the same module. | |
34 | continue | |
35 | ||
36 | if not isinstance(name, SubModuleName): | |
37 | # SubModuleNames are not actually existing names but created | |
38 | # names when importing something like `import foo.bar.baz`. | |
39 | yield name | |
40 | ||
41 | if name.api_type == 'module': | |
42 | yield from _resolve_names(name.goto(), definition_names) | |
43 | ||
44 | ||
45 | def _dictionarize(names): | |
46 | return dict( | |
47 | (n if n.tree_name is None else n.tree_name, n) | |
48 | for n in names | |
49 | ) | |
50 | ||
51 | ||
52 | def _find_defining_names(module_context, tree_name): | |
53 | found_names = _find_names(module_context, tree_name) | |
54 | ||
55 | for name in list(found_names): | |
56 | # Convert from/to stubs, because those might also be usages. | |
57 | found_names |= set(convert_names( | |
58 | [name], | |
59 | only_stubs=not name.get_root_context().is_stub(), | |
60 | prefer_stub_to_compiled=False | |
61 | )) | |
62 | ||
63 | found_names |= set(_find_global_variables(found_names, tree_name.value)) | |
64 | for name in list(found_names): | |
65 | if name.api_type == 'param' or name.tree_name is None \ | |
66 | or name.tree_name.parent.type == 'trailer': | |
67 | continue | |
68 | found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name)) | |
69 | return set(_resolve_names(found_names)) | |
70 | ||
71 | ||
72 | def _find_names(module_context, tree_name): | |
73 | name = module_context.create_name(tree_name) | |
74 | found_names = set(name.goto()) | |
75 | found_names.add(name) | |
76 | ||
77 | return set(_resolve_names(found_names)) | |
78 | ||
79 | ||
80 | def _add_names_in_same_context(context, string_name): | |
81 | if context.tree_node is None: | |
82 | return | |
83 | ||
84 | until_position = None | |
85 | while True: | |
86 | filter_ = ParserTreeFilter( | |
87 | parent_context=context, | |
88 | until_position=until_position, | |
89 | ) | |
90 | names = set(filter_.get(string_name)) | |
91 | if not names: | |
92 | break | |
93 | yield from names | |
94 | ordered = sorted(names, key=lambda x: x.start_pos) | |
95 | until_position = ordered[0].start_pos | |
96 | ||
97 | ||
98 | def _find_global_variables(names, search_name): | |
99 | for name in names: | |
100 | if name.tree_name is None: | |
101 | continue | |
102 | module_context = name.get_root_context() | |
103 | try: | |
104 | method = module_context.get_global_filter | |
105 | except AttributeError: | |
106 | continue | |
107 | else: | |
108 | for global_name in method().get(search_name): | |
109 | yield global_name | |
110 | c = module_context.create_context(global_name.tree_name) | |
111 | yield from _add_names_in_same_context(c, global_name.string_name) | |
112 | ||
113 | ||
114 | def find_references(module_context, tree_name, only_in_module=False): | |
115 | inf = module_context.inference_state | |
116 | search_name = tree_name.value | |
117 | ||
118 | # We disable flow analysis, because if we have ifs that are only true in | |
119 | # certain cases, we want both sides. | |
120 | try: | |
121 | inf.flow_analysis_enabled = False | |
122 | found_names = _find_defining_names(module_context, tree_name) | |
123 | finally: | |
124 | inf.flow_analysis_enabled = True | |
125 | ||
126 | found_names_dct = _dictionarize(found_names) | |
127 | ||
128 | module_contexts = [module_context] | |
129 | if not only_in_module: | |
130 | for m in set(d.get_root_context() for d in found_names): | |
131 | if m != module_context and m.tree_node is not None \ | |
132 | and inf.project.path in m.py__file__().parents: | |
133 | module_contexts.append(m) | |
134 | # For param no search for other modules is necessary. | |
135 | if only_in_module or any(n.api_type == 'param' for n in found_names): | |
136 | potential_modules = module_contexts | |
137 | else: | |
138 | potential_modules = get_module_contexts_containing_name( | |
139 | inf, | |
140 | module_contexts, | |
141 | search_name, | |
142 | ) | |
143 | ||
144 | non_matching_reference_maps = {} | |
145 | for module_context in potential_modules: | |
146 | for name_leaf in module_context.tree_node.get_used_names().get(search_name, []): | |
147 | new = _dictionarize(_find_names(module_context, name_leaf)) | |
148 | if any(tree_name in found_names_dct for tree_name in new): | |
149 | found_names_dct.update(new) | |
150 | for tree_name in new: | |
151 | for dct in non_matching_reference_maps.get(tree_name, []): | |
152 | # A reference that was previously searched for matches | |
153 | # with a now found name. Merge. | |
154 | found_names_dct.update(dct) | |
155 | try: | |
156 | del non_matching_reference_maps[tree_name] | |
157 | except KeyError: | |
158 | pass | |
159 | else: | |
160 | for name in new: | |
161 | non_matching_reference_maps.setdefault(name, []).append(new) | |
162 | result = found_names_dct.values() | |
163 | if only_in_module: | |
164 | return [n for n in result if n.get_root_context() == module_context] | |
165 | return result | |
166 | ||
167 | ||
168 | def _check_fs(inference_state, file_io, regex): | |
169 | try: | |
170 | code = file_io.read() | |
171 | except FileNotFoundError: | |
172 | return None | |
173 | code = python_bytes_to_unicode(code, errors='replace') | |
174 | if not regex.search(code): | |
175 | return None | |
176 | new_file_io = KnownContentFileIO(file_io.path, code) | |
177 | m = load_module_from_path(inference_state, new_file_io) | |
178 | if m.is_compiled(): | |
179 | return None | |
180 | return m.as_context() | |
181 | ||
182 | ||
183 | def gitignored_paths(folder_io, file_io): | |
184 | ignored_paths_abs = set() | |
185 | ignored_paths_rel = set() | |
186 | ||
187 | for l in file_io.read().splitlines(): | |
188 | if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l: | |
189 | continue | |
190 | ||
191 | p = l.decode('utf-8', 'ignore').rstrip('/') | |
192 | if '/' in p: | |
193 | name = p.lstrip('/') | |
194 | ignored_paths_abs.add(os.path.join(folder_io.path, name)) | |
195 | else: | |
196 | name = p | |
197 | ignored_paths_rel.add((folder_io.path, name)) | |
198 | ||
199 | return ignored_paths_abs, ignored_paths_rel | |
200 | ||
201 | ||
202 | def expand_relative_ignore_paths(folder_io, relative_paths): | |
203 | curr_path = folder_io.path | |
204 | return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])} | |
205 | ||
206 | ||
207 | def recurse_find_python_folders_and_files(folder_io, except_paths=()): | |
208 | except_paths = set(except_paths) | |
209 | except_paths_relative = set() | |
210 | ||
211 | for root_folder_io, folder_ios, file_ios in folder_io.walk(): | |
212 | # Delete folders that we don't want to iterate over. | |
213 | for file_io in file_ios: | |
214 | path = file_io.path | |
215 | if path.suffix in ('.py', '.pyi'): | |
216 | if path not in except_paths: | |
217 | yield None, file_io | |
218 | ||
219 | if path.name == '.gitignore': | |
220 | ignored_paths_abs, ignored_paths_rel = gitignored_paths( | |
221 | root_folder_io, file_io | |
222 | ) | |
223 | except_paths |= ignored_paths_abs | |
224 | except_paths_relative |= ignored_paths_rel | |
225 | ||
226 | except_paths_relative_expanded = expand_relative_ignore_paths( | |
227 | root_folder_io, except_paths_relative | |
228 | ) | |
229 | ||
230 | folder_ios[:] = [ | |
231 | folder_io | |
232 | for folder_io in folder_ios | |
233 | if folder_io.path not in except_paths | |
234 | and folder_io.path not in except_paths_relative_expanded | |
235 | and folder_io.get_base_name() not in _IGNORE_FOLDERS | |
236 | ] | |
237 | for folder_io in folder_ios: | |
238 | yield folder_io, None | |
239 | ||
240 | ||
241 | def recurse_find_python_files(folder_io, except_paths=()): | |
242 | for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths): | |
243 | if file_io is not None: | |
244 | yield file_io | |
245 | ||
246 | ||
247 | def _find_python_files_in_sys_path(inference_state, module_contexts): | |
248 | sys_path = inference_state.get_sys_path() | |
249 | except_paths = set() | |
250 | yielded_paths = [m.py__file__() for m in module_contexts] | |
251 | for module_context in module_contexts: | |
252 | file_io = module_context.get_value().file_io | |
253 | if file_io is None: | |
254 | continue | |
255 | ||
256 | folder_io = file_io.get_parent_folder() | |
257 | while True: | |
258 | path = folder_io.path | |
259 | if not any(path.startswith(p) for p in sys_path) or path in except_paths: | |
260 | break | |
261 | for file_io in recurse_find_python_files(folder_io, except_paths): | |
262 | if file_io.path not in yielded_paths: | |
263 | yield file_io | |
264 | except_paths.add(path) | |
265 | folder_io = folder_io.get_parent_folder() | |
266 | ||
267 | ||
268 | def _find_project_modules(inference_state, module_contexts): | |
269 | except_ = [m.py__file__() for m in module_contexts] | |
270 | yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_) | |
271 | ||
272 | ||
273 | def get_module_contexts_containing_name(inference_state, module_contexts, name, | |
274 | limit_reduction=1): | |
275 | """ | |
276 | Search a name in the directories of modules. | |
277 | ||
278 | :param limit_reduction: Divides the limits on opening/parsing files by this | |
279 | factor. | |
280 | """ | |
281 | # Skip non python modules | |
282 | for module_context in module_contexts: | |
283 | if module_context.is_compiled(): | |
284 | continue | |
285 | yield module_context | |
286 | ||
287 | # Very short names are not searched in other modules for now to avoid lots | |
288 | # of file lookups. | |
289 | if len(name) <= 2: | |
290 | return | |
291 | ||
292 | # Currently not used, because there's only `scope=project` and `scope=file` | |
293 | # At the moment there is no such thing as `scope=sys.path`. | |
294 | # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts) | |
295 | file_io_iterator = _find_project_modules(inference_state, module_contexts) | |
296 | yield from search_in_file_ios(inference_state, file_io_iterator, name, | |
297 | limit_reduction=limit_reduction) | |
298 | ||
299 | ||
300 | def search_in_file_ios(inference_state, file_io_iterator, name, | |
301 | limit_reduction=1, complete=False): | |
302 | parse_limit = _PARSED_FILE_LIMIT / limit_reduction | |
303 | open_limit = _OPENED_FILE_LIMIT / limit_reduction | |
304 | file_io_count = 0 | |
305 | parsed_file_count = 0 | |
306 | regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b')) | |
307 | for file_io in file_io_iterator: | |
308 | file_io_count += 1 | |
309 | m = _check_fs(inference_state, file_io, regex) | |
310 | if m is not None: | |
311 | parsed_file_count += 1 | |
312 | yield m | |
313 | if parsed_file_count >= parse_limit: | |
314 | dbg('Hit limit of parsed files: %s', parse_limit) | |
315 | break | |
316 | ||
317 | if file_io_count >= open_limit: | |
318 | dbg('Hit limit of opened files: %s', open_limit) | |
319 | break |