11 from pathlib
import Path
12 from typing
import Dict
, Any
14 LOG
= logging
.getLogger(__name__
)
16 _CACHED_FILE_MINIMUM_SURVIVAL
= 60 * 10 # 10 minutes
18 Cached files should survive at least a few minutes.
21 _CACHED_FILE_MAXIMUM_SURVIVAL
= 60 * 60 * 24 * 30
23 Maximum time for a cached file to survive if it is not
27 _CACHED_SIZE_TRIGGER
= 600
29 This setting limits the amount of cached files. It's basically a way to start
32 The reasoning for this limit being as big as it is, is the following:
34 Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
35 makes Jedi use ~500mb of memory. Since we might want a bit more than those few
36 libraries, we just increase it a bit.
41 Version number (integer) for file system cache.
43 Increment this number when there are any incompatible changes in
44 the parser tree classes. For example, the following changes
45 are regarded as incompatible.
47 - A class name is changed.
48 - A class is moved to another module.
49 - A __slot__ of a class is changed.
52 _VERSION_TAG
= '%s-%s%s-%s' % (
53 platform
.python_implementation(),
59 Short name for distinguish Python implementations and versions.
61 It's a bit similar to `sys.implementation.cache_tag`.
62 See: http://docs.python.org/3/library/sys.html#sys.implementation
66 def _get_default_cache_path():
67 if platform
.system().lower() == 'windows':
68 dir_
= Path(os
.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
69 elif platform
.system().lower() == 'darwin':
70 dir_
= Path('~', 'Library', 'Caches', 'Parso')
72 dir_
= Path(os
.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
73 return dir_
.expanduser()
76 _default_cache_path
= _get_default_cache_path()
78 The path where the cache is stored.
80 On Linux, this defaults to ``~/.cache/parso/``, on OS X to
81 ``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
82 On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
83 ``$XDG_CACHE_HOME/parso`` is used instead of the default one.
86 _CACHE_CLEAR_THRESHOLD
= 60 * 60 * 24
89 def _get_cache_clear_lock_path(cache_path
=None):
91 The path where the cache lock is stored.
93 Cache lock will prevent continous cache clearing and only allow garbage
94 collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
96 cache_path
= cache_path
or _default_cache_path
97 return cache_path
.joinpath("PARSO-CACHE-LOCK")
100 parser_cache
: Dict
[str, Any
] = {}
103 class _NodeCacheItem
:
104 def __init__(self
, node
, lines
, change_time
=None):
107 if change_time
is None:
108 change_time
= time
.time()
109 self
.change_time
= change_time
110 self
.last_used
= change_time
113 def load_module(hashed_grammar
, file_io
, cache_path
=None):
115 Returns a module or None, if it fails.
117 p_time
= file_io
.get_last_modified()
122 module_cache_item
= parser_cache
[hashed_grammar
][file_io
.path
]
123 if p_time
<= module_cache_item
.change_time
:
124 module_cache_item
.last_used
= time
.time()
125 return module_cache_item
.node
127 return _load_from_file_system(
131 cache_path
=cache_path
135 def _load_from_file_system(hashed_grammar
, path
, p_time
, cache_path
=None):
136 cache_path
= _get_hashed_path(hashed_grammar
, path
, cache_path
=cache_path
)
138 if p_time
> os
.path
.getmtime(cache_path
):
142 with
open(cache_path
, 'rb') as f
:
145 module_cache_item
= pickle
.load(f
)
148 except FileNotFoundError
:
151 _set_cache_item(hashed_grammar
, path
, module_cache_item
)
152 LOG
.debug('pickle loaded: %s', path
)
153 return module_cache_item
.node
156 def _set_cache_item(hashed_grammar
, path
, module_cache_item
):
157 if sum(len(v
) for v
in parser_cache
.values()) >= _CACHED_SIZE_TRIGGER
:
158 # Garbage collection of old cache files.
159 # We are basically throwing everything away that hasn't been accessed
161 cutoff_time
= time
.time() - _CACHED_FILE_MINIMUM_SURVIVAL
162 for key
, path_to_item_map
in parser_cache
.items():
163 parser_cache
[key
] = {
165 for path
, node_item
in path_to_item_map
.items()
166 if node_item
.last_used
> cutoff_time
169 parser_cache
.setdefault(hashed_grammar
, {})[path
] = module_cache_item
172 def try_to_save_module(hashed_grammar
, file_io
, module
, lines
, pickling
=True, cache_path
=None):
175 p_time
= None if path
is None else file_io
.get_last_modified()
180 item
= _NodeCacheItem(module
, lines
, p_time
)
181 _set_cache_item(hashed_grammar
, path
, item
)
182 if pickling
and path
is not None:
184 _save_to_file_system(hashed_grammar
, path
, item
, cache_path
=cache_path
)
185 except PermissionError
:
186 # It's not really a big issue if the cache cannot be saved to the
187 # file system. It's still in RAM in that case. However we should
188 # still warn the user that this is happening.
190 'Tried to save a file to %s, but got permission denied.' % path
,
194 _remove_cache_and_update_lock(cache_path
=cache_path
)
197 def _save_to_file_system(hashed_grammar
, path
, item
, cache_path
=None):
198 with
open(_get_hashed_path(hashed_grammar
, path
, cache_path
=cache_path
), 'wb') as f
:
199 pickle
.dump(item
, f
, pickle
.HIGHEST_PROTOCOL
)
202 def clear_cache(cache_path
=None):
203 if cache_path
is None:
204 cache_path
= _default_cache_path
205 shutil
.rmtree(cache_path
)
209 def clear_inactive_cache(
211 inactivity_threshold
=_CACHED_FILE_MAXIMUM_SURVIVAL
,
213 if cache_path
is None:
214 cache_path
= _default_cache_path
215 if not cache_path
.exists():
217 for dirname
in os
.listdir(cache_path
):
218 version_path
= cache_path
.joinpath(dirname
)
219 if not version_path
.is_dir():
221 for file in os
.scandir(version_path
):
222 if file.stat().st_atime
+ _CACHED_FILE_MAXIMUM_SURVIVAL
<= time
.time():
225 except OSError: # silently ignore all failures
234 except FileNotFoundError
:
236 file = open(path
, 'a')
238 except (OSError, IOError): # TODO Maybe log this?
243 def _remove_cache_and_update_lock(cache_path
=None):
244 lock_path
= _get_cache_clear_lock_path(cache_path
=cache_path
)
246 clear_lock_time
= os
.path
.getmtime(lock_path
)
247 except FileNotFoundError
:
248 clear_lock_time
= None
250 clear_lock_time
is None # first time
251 or clear_lock_time
+ _CACHE_CLEAR_THRESHOLD
<= time
.time()
253 if not _touch(lock_path
):
254 # First make sure that as few as possible other cleanup jobs also
255 # get started. There is still a race condition but it's probably
259 clear_inactive_cache(cache_path
=cache_path
)
262 def _get_hashed_path(hashed_grammar
, path
, cache_path
=None):
263 directory
= _get_cache_directory_path(cache_path
=cache_path
)
265 file_hash
= hashlib
.sha256(str(path
).encode("utf-8")).hexdigest()
266 return os
.path
.join(directory
, '%s-%s.pkl' % (hashed_grammar
, file_hash
))
269 def _get_cache_directory_path(cache_path
=None):
270 if cache_path
is None:
271 cache_path
= _default_cache_path
272 directory
= cache_path
.joinpath(_VERSION_TAG
)
273 if not directory
.exists():
274 os
.makedirs(directory
)