]>
Commit | Line | Data |
---|---|---|
1 | import time | |
2 | import os | |
3 | import sys | |
4 | import hashlib | |
5 | import gc | |
6 | import shutil | |
7 | import platform | |
8 | import logging | |
9 | import warnings | |
10 | import pickle | |
11 | from pathlib import Path | |
12 | from typing import Dict, Any | |
13 | ||
14 | LOG = logging.getLogger(__name__) | |
15 | ||
16 | _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes | |
17 | """ | |
18 | Cached files should survive at least a few minutes. | |
19 | """ | |
20 | ||
21 | _CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30 | |
22 | """ | |
23 | Maximum time for a cached file to survive if it is not | |
24 | accessed within. | |
25 | """ | |
26 | ||
27 | _CACHED_SIZE_TRIGGER = 600 | |
28 | """ | |
29 | This setting limits the amount of cached files. It's basically a way to start | |
30 | garbage collection. | |
31 | ||
32 | The reasoning for this limit being as big as it is, is the following: | |
33 | ||
34 | Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This | |
35 | makes Jedi use ~500mb of memory. Since we might want a bit more than those few | |
36 | libraries, we just increase it a bit. | |
37 | """ | |
38 | ||
39 | _PICKLE_VERSION = 33 | |
40 | """ | |
41 | Version number (integer) for file system cache. | |
42 | ||
43 | Increment this number when there are any incompatible changes in | |
44 | the parser tree classes. For example, the following changes | |
45 | are regarded as incompatible. | |
46 | ||
47 | - A class name is changed. | |
48 | - A class is moved to another module. | |
49 | - A __slot__ of a class is changed. | |
50 | """ | |
51 | ||
52 | _VERSION_TAG = '%s-%s%s-%s' % ( | |
53 | platform.python_implementation(), | |
54 | sys.version_info[0], | |
55 | sys.version_info[1], | |
56 | _PICKLE_VERSION | |
57 | ) | |
58 | """ | |
59 | Short name for distinguish Python implementations and versions. | |
60 | ||
61 | It's a bit similar to `sys.implementation.cache_tag`. | |
62 | See: http://docs.python.org/3/library/sys.html#sys.implementation | |
63 | """ | |
64 | ||
65 | ||
66 | def _get_default_cache_path(): | |
67 | if platform.system().lower() == 'windows': | |
68 | dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') | |
69 | elif platform.system().lower() == 'darwin': | |
70 | dir_ = Path('~', 'Library', 'Caches', 'Parso') | |
71 | else: | |
72 | dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') | |
73 | return dir_.expanduser() | |
74 | ||
75 | ||
76 | _default_cache_path = _get_default_cache_path() | |
77 | """ | |
78 | The path where the cache is stored. | |
79 | ||
80 | On Linux, this defaults to ``~/.cache/parso/``, on OS X to | |
81 | ``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. | |
82 | On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, | |
83 | ``$XDG_CACHE_HOME/parso`` is used instead of the default one. | |
84 | """ | |
85 | ||
86 | _CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 | |
87 | ||
88 | ||
89 | def _get_cache_clear_lock_path(cache_path=None): | |
90 | """ | |
91 | The path where the cache lock is stored. | |
92 | ||
93 | Cache lock will prevent continous cache clearing and only allow garbage | |
94 | collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). | |
95 | """ | |
96 | cache_path = cache_path or _default_cache_path | |
97 | return cache_path.joinpath("PARSO-CACHE-LOCK") | |
98 | ||
99 | ||
100 | parser_cache: Dict[str, Any] = {} | |
101 | ||
102 | ||
103 | class _NodeCacheItem: | |
104 | def __init__(self, node, lines, change_time=None): | |
105 | self.node = node | |
106 | self.lines = lines | |
107 | if change_time is None: | |
108 | change_time = time.time() | |
109 | self.change_time = change_time | |
110 | self.last_used = change_time | |
111 | ||
112 | ||
113 | def load_module(hashed_grammar, file_io, cache_path=None): | |
114 | """ | |
115 | Returns a module or None, if it fails. | |
116 | """ | |
117 | p_time = file_io.get_last_modified() | |
118 | if p_time is None: | |
119 | return None | |
120 | ||
121 | try: | |
122 | module_cache_item = parser_cache[hashed_grammar][file_io.path] | |
123 | if p_time <= module_cache_item.change_time: | |
124 | module_cache_item.last_used = time.time() | |
125 | return module_cache_item.node | |
126 | except KeyError: | |
127 | return _load_from_file_system( | |
128 | hashed_grammar, | |
129 | file_io.path, | |
130 | p_time, | |
131 | cache_path=cache_path | |
132 | ) | |
133 | ||
134 | ||
135 | def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): | |
136 | cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) | |
137 | try: | |
138 | if p_time > os.path.getmtime(cache_path): | |
139 | # Cache is outdated | |
140 | return None | |
141 | ||
142 | with open(cache_path, 'rb') as f: | |
143 | gc.disable() | |
144 | try: | |
145 | module_cache_item = pickle.load(f) | |
146 | finally: | |
147 | gc.enable() | |
148 | except FileNotFoundError: | |
149 | return None | |
150 | else: | |
151 | _set_cache_item(hashed_grammar, path, module_cache_item) | |
152 | LOG.debug('pickle loaded: %s', path) | |
153 | return module_cache_item.node | |
154 | ||
155 | ||
156 | def _set_cache_item(hashed_grammar, path, module_cache_item): | |
157 | if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER: | |
158 | # Garbage collection of old cache files. | |
159 | # We are basically throwing everything away that hasn't been accessed | |
160 | # in 10 minutes. | |
161 | cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL | |
162 | for key, path_to_item_map in parser_cache.items(): | |
163 | parser_cache[key] = { | |
164 | path: node_item | |
165 | for path, node_item in path_to_item_map.items() | |
166 | if node_item.last_used > cutoff_time | |
167 | } | |
168 | ||
169 | parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item | |
170 | ||
171 | ||
172 | def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None): | |
173 | path = file_io.path | |
174 | try: | |
175 | p_time = None if path is None else file_io.get_last_modified() | |
176 | except OSError: | |
177 | p_time = None | |
178 | pickling = False | |
179 | ||
180 | item = _NodeCacheItem(module, lines, p_time) | |
181 | _set_cache_item(hashed_grammar, path, item) | |
182 | if pickling and path is not None: | |
183 | try: | |
184 | _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) | |
185 | except PermissionError: | |
186 | # It's not really a big issue if the cache cannot be saved to the | |
187 | # file system. It's still in RAM in that case. However we should | |
188 | # still warn the user that this is happening. | |
189 | warnings.warn( | |
190 | 'Tried to save a file to %s, but got permission denied.' % path, | |
191 | Warning | |
192 | ) | |
193 | else: | |
194 | _remove_cache_and_update_lock(cache_path=cache_path) | |
195 | ||
196 | ||
197 | def _save_to_file_system(hashed_grammar, path, item, cache_path=None): | |
198 | with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: | |
199 | pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) | |
200 | ||
201 | ||
202 | def clear_cache(cache_path=None): | |
203 | if cache_path is None: | |
204 | cache_path = _default_cache_path | |
205 | shutil.rmtree(cache_path) | |
206 | parser_cache.clear() | |
207 | ||
208 | ||
209 | def clear_inactive_cache( | |
210 | cache_path=None, | |
211 | inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, | |
212 | ): | |
213 | if cache_path is None: | |
214 | cache_path = _default_cache_path | |
215 | if not cache_path.exists(): | |
216 | return False | |
217 | for dirname in os.listdir(cache_path): | |
218 | version_path = cache_path.joinpath(dirname) | |
219 | if not version_path.is_dir(): | |
220 | continue | |
221 | for file in os.scandir(version_path): | |
222 | if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time(): | |
223 | try: | |
224 | os.remove(file.path) | |
225 | except OSError: # silently ignore all failures | |
226 | continue | |
227 | else: | |
228 | return True | |
229 | ||
230 | ||
231 | def _touch(path): | |
232 | try: | |
233 | os.utime(path, None) | |
234 | except FileNotFoundError: | |
235 | try: | |
236 | file = open(path, 'a') | |
237 | file.close() | |
238 | except (OSError, IOError): # TODO Maybe log this? | |
239 | return False | |
240 | return True | |
241 | ||
242 | ||
243 | def _remove_cache_and_update_lock(cache_path=None): | |
244 | lock_path = _get_cache_clear_lock_path(cache_path=cache_path) | |
245 | try: | |
246 | clear_lock_time = os.path.getmtime(lock_path) | |
247 | except FileNotFoundError: | |
248 | clear_lock_time = None | |
249 | if ( | |
250 | clear_lock_time is None # first time | |
251 | or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() | |
252 | ): | |
253 | if not _touch(lock_path): | |
254 | # First make sure that as few as possible other cleanup jobs also | |
255 | # get started. There is still a race condition but it's probably | |
256 | # not a big problem. | |
257 | return False | |
258 | ||
259 | clear_inactive_cache(cache_path=cache_path) | |
260 | ||
261 | ||
262 | def _get_hashed_path(hashed_grammar, path, cache_path=None): | |
263 | directory = _get_cache_directory_path(cache_path=cache_path) | |
264 | ||
265 | file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest() | |
266 | return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) | |
267 | ||
268 | ||
269 | def _get_cache_directory_path(cache_path=None): | |
270 | if cache_path is None: | |
271 | cache_path = _default_cache_path | |
272 | directory = cache_path.joinpath(_VERSION_TAG) | |
273 | if not directory.exists(): | |
274 | os.makedirs(directory) | |
275 | return directory |