]>
Commit | Line | Data |
---|---|---|
53e6db90 DC |
1 | import os |
2 | import re | |
3 | from pathlib import Path | |
4 | from importlib.machinery import all_suffixes | |
5 | ||
6 | from jedi.inference.cache import inference_state_method_cache | |
7 | from jedi.inference.base_value import ContextualizedNode | |
8 | from jedi.inference.helpers import is_string, get_str_or_none | |
9 | from jedi.parser_utils import get_cached_code_lines | |
10 | from jedi.file_io import FileIO | |
11 | from jedi import settings | |
12 | from jedi import debug | |
13 | ||
14 | _BUILDOUT_PATH_INSERTION_LIMIT = 10 | |
15 | ||
16 | ||
17 | def _abs_path(module_context, str_path: str): | |
18 | path = Path(str_path) | |
19 | if path.is_absolute(): | |
20 | return path | |
21 | ||
22 | module_path = module_context.py__file__() | |
23 | if module_path is None: | |
24 | # In this case we have no idea where we actually are in the file | |
25 | # system. | |
26 | return None | |
27 | ||
28 | base_dir = module_path.parent | |
29 | return base_dir.joinpath(path).absolute() | |
30 | ||
31 | ||
32 | def _paths_from_assignment(module_context, expr_stmt): | |
33 | """ | |
34 | Extracts the assigned strings from an assignment that looks as follows:: | |
35 | ||
36 | sys.path[0:0] = ['module/path', 'another/module/path'] | |
37 | ||
38 | This function is in general pretty tolerant (and therefore 'buggy'). | |
39 | However, it's not a big issue usually to add more paths to Jedi's sys_path, | |
40 | because it will only affect Jedi in very random situations and by adding | |
41 | more paths than necessary, it usually benefits the general user. | |
42 | """ | |
43 | for assignee, operator in zip(expr_stmt.children[::2], expr_stmt.children[1::2]): | |
44 | try: | |
45 | assert operator in ['=', '+='] | |
46 | assert assignee.type in ('power', 'atom_expr') and \ | |
47 | len(assignee.children) > 1 | |
48 | c = assignee.children | |
49 | assert c[0].type == 'name' and c[0].value == 'sys' | |
50 | trailer = c[1] | |
51 | assert trailer.children[0] == '.' and trailer.children[1].value == 'path' | |
52 | # TODO Essentially we're not checking details on sys.path | |
53 | # manipulation. Both assigment of the sys.path and changing/adding | |
54 | # parts of the sys.path are the same: They get added to the end of | |
55 | # the current sys.path. | |
56 | """ | |
57 | execution = c[2] | |
58 | assert execution.children[0] == '[' | |
59 | subscript = execution.children[1] | |
60 | assert subscript.type == 'subscript' | |
61 | assert ':' in subscript.children | |
62 | """ | |
63 | except AssertionError: | |
64 | continue | |
65 | ||
66 | cn = ContextualizedNode(module_context.create_context(expr_stmt), expr_stmt) | |
67 | for lazy_value in cn.infer().iterate(cn): | |
68 | for value in lazy_value.infer(): | |
69 | if is_string(value): | |
70 | abs_path = _abs_path(module_context, value.get_safe_value()) | |
71 | if abs_path is not None: | |
72 | yield abs_path | |
73 | ||
74 | ||
75 | def _paths_from_list_modifications(module_context, trailer1, trailer2): | |
76 | """ extract the path from either "sys.path.append" or "sys.path.insert" """ | |
77 | # Guarantee that both are trailers, the first one a name and the second one | |
78 | # a function execution with at least one param. | |
79 | if not (trailer1.type == 'trailer' and trailer1.children[0] == '.' | |
80 | and trailer2.type == 'trailer' and trailer2.children[0] == '(' | |
81 | and len(trailer2.children) == 3): | |
82 | return | |
83 | ||
84 | name = trailer1.children[1].value | |
85 | if name not in ['insert', 'append']: | |
86 | return | |
87 | arg = trailer2.children[1] | |
88 | if name == 'insert' and len(arg.children) in (3, 4): # Possible trailing comma. | |
89 | arg = arg.children[2] | |
90 | ||
91 | for value in module_context.create_context(arg).infer_node(arg): | |
92 | p = get_str_or_none(value) | |
93 | if p is None: | |
94 | continue | |
95 | abs_path = _abs_path(module_context, p) | |
96 | if abs_path is not None: | |
97 | yield abs_path | |
98 | ||
99 | ||
100 | @inference_state_method_cache(default=[]) | |
101 | def check_sys_path_modifications(module_context): | |
102 | """ | |
103 | Detect sys.path modifications within module. | |
104 | """ | |
105 | def get_sys_path_powers(names): | |
106 | for name in names: | |
107 | power = name.parent.parent | |
108 | if power is not None and power.type in ('power', 'atom_expr'): | |
109 | c = power.children | |
110 | if c[0].type == 'name' and c[0].value == 'sys' \ | |
111 | and c[1].type == 'trailer': | |
112 | n = c[1].children[1] | |
113 | if n.type == 'name' and n.value == 'path': | |
114 | yield name, power | |
115 | ||
116 | if module_context.tree_node is None: | |
117 | return [] | |
118 | ||
119 | added = [] | |
120 | try: | |
121 | possible_names = module_context.tree_node.get_used_names()['path'] | |
122 | except KeyError: | |
123 | pass | |
124 | else: | |
125 | for name, power in get_sys_path_powers(possible_names): | |
126 | expr_stmt = power.parent | |
127 | if len(power.children) >= 4: | |
128 | added.extend( | |
129 | _paths_from_list_modifications( | |
130 | module_context, *power.children[2:4] | |
131 | ) | |
132 | ) | |
133 | elif expr_stmt is not None and expr_stmt.type == 'expr_stmt': | |
134 | added.extend(_paths_from_assignment(module_context, expr_stmt)) | |
135 | return added | |
136 | ||
137 | ||
138 | def discover_buildout_paths(inference_state, script_path): | |
139 | buildout_script_paths = set() | |
140 | ||
141 | for buildout_script_path in _get_buildout_script_paths(script_path): | |
142 | for path in _get_paths_from_buildout_script(inference_state, buildout_script_path): | |
143 | buildout_script_paths.add(path) | |
144 | if len(buildout_script_paths) >= _BUILDOUT_PATH_INSERTION_LIMIT: | |
145 | break | |
146 | ||
147 | return buildout_script_paths | |
148 | ||
149 | ||
150 | def _get_paths_from_buildout_script(inference_state, buildout_script_path): | |
151 | file_io = FileIO(str(buildout_script_path)) | |
152 | try: | |
153 | module_node = inference_state.parse( | |
154 | file_io=file_io, | |
155 | cache=True, | |
156 | cache_path=settings.cache_directory | |
157 | ) | |
158 | except IOError: | |
159 | debug.warning('Error trying to read buildout_script: %s', buildout_script_path) | |
160 | return | |
161 | ||
162 | from jedi.inference.value import ModuleValue | |
163 | module_context = ModuleValue( | |
164 | inference_state, module_node, | |
165 | file_io=file_io, | |
166 | string_names=None, | |
167 | code_lines=get_cached_code_lines(inference_state.grammar, buildout_script_path), | |
168 | ).as_context() | |
169 | yield from check_sys_path_modifications(module_context) | |
170 | ||
171 | ||
172 | def _get_parent_dir_with_file(path: Path, filename): | |
173 | for parent in path.parents: | |
174 | try: | |
175 | if parent.joinpath(filename).is_file(): | |
176 | return parent | |
177 | except OSError: | |
178 | continue | |
179 | return None | |
180 | ||
181 | ||
182 | def _get_buildout_script_paths(search_path: Path): | |
183 | """ | |
184 | if there is a 'buildout.cfg' file in one of the parent directories of the | |
185 | given module it will return a list of all files in the buildout bin | |
186 | directory that look like python files. | |
187 | ||
188 | :param search_path: absolute path to the module. | |
189 | """ | |
190 | project_root = _get_parent_dir_with_file(search_path, 'buildout.cfg') | |
191 | if not project_root: | |
192 | return | |
193 | bin_path = project_root.joinpath('bin') | |
194 | if not bin_path.exists(): | |
195 | return | |
196 | ||
197 | for filename in os.listdir(bin_path): | |
198 | try: | |
199 | filepath = bin_path.joinpath(filename) | |
200 | with open(filepath, 'r') as f: | |
201 | firstline = f.readline() | |
202 | if firstline.startswith('#!') and 'python' in firstline: | |
203 | yield filepath | |
204 | except (UnicodeDecodeError, IOError) as e: | |
205 | # Probably a binary file; permission error or race cond. because | |
206 | # file got deleted. Ignore it. | |
207 | debug.warning(str(e)) | |
208 | continue | |
209 | ||
210 | ||
211 | def remove_python_path_suffix(path): | |
212 | for suffix in all_suffixes() + ['.pyi']: | |
213 | if path.suffix == suffix: | |
214 | path = path.with_name(path.stem) | |
215 | break | |
216 | return path | |
217 | ||
218 | ||
219 | def transform_path_to_dotted(sys_path, module_path): | |
220 | """ | |
221 | Returns the dotted path inside a sys.path as a list of names. e.g. | |
222 | ||
223 | >>> transform_path_to_dotted([str(Path("/foo").absolute())], Path('/foo/bar/baz.py').absolute()) | |
224 | (('bar', 'baz'), False) | |
225 | ||
226 | Returns (None, False) if the path doesn't really resolve to anything. | |
227 | The second return part is if it is a package. | |
228 | """ | |
229 | # First remove the suffix. | |
230 | module_path = remove_python_path_suffix(module_path) | |
231 | if module_path.name.startswith('.'): | |
232 | return None, False | |
233 | ||
234 | # Once the suffix was removed we are using the files as we know them. This | |
235 | # means that if someone uses an ending like .vim for a Python file, .vim | |
236 | # will be part of the returned dotted part. | |
237 | ||
238 | is_package = module_path.name == '__init__' | |
239 | if is_package: | |
240 | module_path = module_path.parent | |
241 | ||
242 | def iter_potential_solutions(): | |
243 | for p in sys_path: | |
244 | if str(module_path).startswith(p): | |
245 | # Strip the trailing slash/backslash | |
246 | rest = str(module_path)[len(p):] | |
247 | # On Windows a path can also use a slash. | |
248 | if rest.startswith(os.path.sep) or rest.startswith('/'): | |
249 | # Remove a slash in cases it's still there. | |
250 | rest = rest[1:] | |
251 | ||
252 | if rest: | |
253 | split = rest.split(os.path.sep) | |
254 | if not all(split): | |
255 | # This means that part of the file path was empty, this | |
256 | # is very strange and is probably a file that is called | |
257 | # `.py`. | |
258 | return | |
259 | # Stub folders for foo can end with foo-stubs. Just remove | |
260 | # it. | |
261 | yield tuple(re.sub(r'-stubs$', '', s) for s in split) | |
262 | ||
263 | potential_solutions = tuple(iter_potential_solutions()) | |
264 | if not potential_solutions: | |
265 | return None, False | |
266 | # Try to find the shortest path, this makes more sense usually, because the | |
267 | # user usually has venvs somewhere. This means that a path like | |
268 | # .tox/py37/lib/python3.7/os.py can be normal for a file. However in that | |
269 | # case we definitely want to return ['os'] as a path and not a crazy | |
270 | # ['.tox', 'py37', 'lib', 'python3.7', 'os']. Keep in mind that this is a | |
271 | # heuristic and there's now ay to "always" do it right. | |
272 | return sorted(potential_solutions, key=lambda p: len(p))[0], is_package |