4 from parso
import python_bytes_to_unicode
6 from jedi
.debug
import dbg
7 from jedi
.file_io
import KnownContentFileIO
, FolderIO
8 from jedi
.inference
.names
import SubModuleName
9 from jedi
.inference
.imports
import load_module_from_path
10 from jedi
.inference
.filters
import ParserTreeFilter
11 from jedi
.inference
.gradual
.conversion
import convert_names
13 _IGNORE_FOLDERS
= ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')
15 _OPENED_FILE_LIMIT
= 2000
17 Stats from a 2016 Lenovo Notebook running Linux:
18 With os.walk, it takes about 10s to scan 11'000 files (without filesystem
19 caching). Once cached it only takes 5s. So it is expected that reading all
20 those files might take a few seconds, but not a lot more.
22 _PARSED_FILE_LIMIT
= 30
24 For now we keep the amount of parsed files really low, since parsing might take
25 easily 100ms for bigger files.
29 def _resolve_names(definition_names
, avoid_names
=()):
30 for name
in definition_names
:
31 if name
in avoid_names
:
32 # Avoiding recursions here, because goto on a module name lands
36 if not isinstance(name
, SubModuleName
):
37 # SubModuleNames are not actually existing names but created
38 # names when importing something like `import foo.bar.baz`.
41 if name
.api_type
== 'module':
42 yield from _resolve_names(name
.goto(), definition_names
)
45 def _dictionarize(names
):
47 (n
if n
.tree_name
is None else n
.tree_name
, n
)
52 def _find_defining_names(module_context
, tree_name
):
53 found_names
= _find_names(module_context
, tree_name
)
55 for name
in list(found_names
):
56 # Convert from/to stubs, because those might also be usages.
57 found_names |
= set(convert_names(
59 only_stubs
=not name
.get_root_context().is_stub(),
60 prefer_stub_to_compiled
=False
63 found_names |
= set(_find_global_variables(found_names
, tree_name
.value
))
64 for name
in list(found_names
):
65 if name
.api_type
== 'param' or name
.tree_name
is None \
66 or name
.tree_name
.parent
.type == 'trailer':
68 found_names |
= set(_add_names_in_same_context(name
.parent_context
, name
.string_name
))
69 return set(_resolve_names(found_names
))
72 def _find_names(module_context
, tree_name
):
73 name
= module_context
.create_name(tree_name
)
74 found_names
= set(name
.goto())
77 return set(_resolve_names(found_names
))
80 def _add_names_in_same_context(context
, string_name
):
81 if context
.tree_node
is None:
86 filter_
= ParserTreeFilter(
87 parent_context
=context
,
88 until_position
=until_position
,
90 names
= set(filter_
.get(string_name
))
94 ordered
= sorted(names
, key
=lambda x
: x
.start_pos
)
95 until_position
= ordered
[0].start_pos
98 def _find_global_variables(names
, search_name
):
100 if name
.tree_name
is None:
102 module_context
= name
.get_root_context()
104 method
= module_context
.get_global_filter
105 except AttributeError:
108 for global_name
in method().get(search_name
):
110 c
= module_context
.create_context(global_name
.tree_name
)
111 yield from _add_names_in_same_context(c
, global_name
.string_name
)
114 def find_references(module_context
, tree_name
, only_in_module
=False):
115 inf
= module_context
.inference_state
116 search_name
= tree_name
.value
118 # We disable flow analysis, because if we have ifs that are only true in
119 # certain cases, we want both sides.
121 inf
.flow_analysis_enabled
= False
122 found_names
= _find_defining_names(module_context
, tree_name
)
124 inf
.flow_analysis_enabled
= True
126 found_names_dct
= _dictionarize(found_names
)
128 module_contexts
= [module_context
]
129 if not only_in_module
:
130 for m
in set(d
.get_root_context() for d
in found_names
):
131 if m
!= module_context
and m
.tree_node
is not None \
132 and inf
.project
.path
in m
.py__file__().parents
:
133 module_contexts
.append(m
)
134 # For param no search for other modules is necessary.
135 if only_in_module
or any(n
.api_type
== 'param' for n
in found_names
):
136 potential_modules
= module_contexts
138 potential_modules
= get_module_contexts_containing_name(
144 non_matching_reference_maps
= {}
145 for module_context
in potential_modules
:
146 for name_leaf
in module_context
.tree_node
.get_used_names().get(search_name
, []):
147 new
= _dictionarize(_find_names(module_context
, name_leaf
))
148 if any(tree_name
in found_names_dct
for tree_name
in new
):
149 found_names_dct
.update(new
)
150 for tree_name
in new
:
151 for dct
in non_matching_reference_maps
.get(tree_name
, []):
152 # A reference that was previously searched for matches
153 # with a now found name. Merge.
154 found_names_dct
.update(dct
)
156 del non_matching_reference_maps
[tree_name
]
161 non_matching_reference_maps
.setdefault(name
, []).append(new
)
162 result
= found_names_dct
.values()
164 return [n
for n
in result
if n
.get_root_context() == module_context
]
168 def _check_fs(inference_state
, file_io
, regex
):
170 code
= file_io
.read()
171 except FileNotFoundError
:
173 code
= python_bytes_to_unicode(code
, errors
='replace')
174 if not regex
.search(code
):
176 new_file_io
= KnownContentFileIO(file_io
.path
, code
)
177 m
= load_module_from_path(inference_state
, new_file_io
)
180 return m
.as_context()
183 def gitignored_paths(folder_io
, file_io
):
184 ignored_paths_abs
= set()
185 ignored_paths_rel
= set()
187 for l
in file_io
.read().splitlines():
188 if not l
or l
.startswith(b
'#') or l
.startswith(b
'!') or b
'*' in l
:
191 p
= l
.decode('utf-8', 'ignore').rstrip('/')
194 ignored_paths_abs
.add(os
.path
.join(folder_io
.path
, name
))
197 ignored_paths_rel
.add((folder_io
.path
, name
))
199 return ignored_paths_abs
, ignored_paths_rel
202 def expand_relative_ignore_paths(folder_io
, relative_paths
):
203 curr_path
= folder_io
.path
204 return {os
.path
.join(curr_path
, p
[1]) for p
in relative_paths
if curr_path
.startswith(p
[0])}
207 def recurse_find_python_folders_and_files(folder_io
, except_paths
=()):
208 except_paths
= set(except_paths
)
209 except_paths_relative
= set()
211 for root_folder_io
, folder_ios
, file_ios
in folder_io
.walk():
212 # Delete folders that we don't want to iterate over.
213 for file_io
in file_ios
:
215 if path
.suffix
in ('.py', '.pyi'):
216 if path
not in except_paths
:
219 if path
.name
== '.gitignore':
220 ignored_paths_abs
, ignored_paths_rel
= gitignored_paths(
221 root_folder_io
, file_io
223 except_paths |
= ignored_paths_abs
224 except_paths_relative |
= ignored_paths_rel
226 except_paths_relative_expanded
= expand_relative_ignore_paths(
227 root_folder_io
, except_paths_relative
232 for folder_io
in folder_ios
233 if folder_io
.path
not in except_paths
234 and folder_io
.path
not in except_paths_relative_expanded
235 and folder_io
.get_base_name() not in _IGNORE_FOLDERS
237 for folder_io
in folder_ios
:
238 yield folder_io
, None
241 def recurse_find_python_files(folder_io
, except_paths
=()):
242 for folder_io
, file_io
in recurse_find_python_folders_and_files(folder_io
, except_paths
):
243 if file_io
is not None:
247 def _find_python_files_in_sys_path(inference_state
, module_contexts
):
248 sys_path
= inference_state
.get_sys_path()
250 yielded_paths
= [m
.py__file__() for m
in module_contexts
]
251 for module_context
in module_contexts
:
252 file_io
= module_context
.get_value().file_io
256 folder_io
= file_io
.get_parent_folder()
258 path
= folder_io
.path
259 if not any(path
.startswith(p
) for p
in sys_path
) or path
in except_paths
:
261 for file_io
in recurse_find_python_files(folder_io
, except_paths
):
262 if file_io
.path
not in yielded_paths
:
264 except_paths
.add(path
)
265 folder_io
= folder_io
.get_parent_folder()
268 def _find_project_modules(inference_state
, module_contexts
):
269 except_
= [m
.py__file__() for m
in module_contexts
]
270 yield from recurse_find_python_files(FolderIO(inference_state
.project
.path
), except_
)
273 def get_module_contexts_containing_name(inference_state
, module_contexts
, name
,
276 Search a name in the directories of modules.
278 :param limit_reduction: Divides the limits on opening/parsing files by this
281 # Skip non python modules
282 for module_context
in module_contexts
:
283 if module_context
.is_compiled():
287 # Very short names are not searched in other modules for now to avoid lots
292 # Currently not used, because there's only `scope=project` and `scope=file`
293 # At the moment there is no such thing as `scope=sys.path`.
294 # file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)
295 file_io_iterator
= _find_project_modules(inference_state
, module_contexts
)
296 yield from search_in_file_ios(inference_state
, file_io_iterator
, name
,
297 limit_reduction
=limit_reduction
)
300 def search_in_file_ios(inference_state
, file_io_iterator
, name
,
301 limit_reduction
=1, complete
=False):
302 parse_limit
= _PARSED_FILE_LIMIT
/ limit_reduction
303 open_limit
= _OPENED_FILE_LIMIT
/ limit_reduction
305 parsed_file_count
= 0
306 regex
= re
.compile(r
'\b' + re
.escape(name
) + (r
'' if complete
else r
'\b'))
307 for file_io
in file_io_iterator
:
309 m
= _check_fs(inference_state
, file_io
, regex
)
311 parsed_file_count
+= 1
313 if parsed_file_count
>= parse_limit
:
314 dbg('Hit limit of parsed files: %s', parse_limit
)
317 if file_io_count
>= open_limit
:
318 dbg('Hit limit of opened files: %s', open_limit
)