-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cache files for different CachingFileManager objects separately #4879
Merged
+150
−71
Merged
Changes from all commits
Commits
Show all changes
36 commits
Select commit
Hold shift + click to select a range
a80cfd2
Cache files for different CachingFileManager objects separately
shoyer d8b3bb7
Merge branch 'main' into file-manager-not-shared
shoyer 6bc80e7
Fix whats-new message location
shoyer e637165
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] d587bfc
Add id to CachingFileManager
shoyer 4f4ba13
Merge branch 'main' into file-manager-not-shared
shoyer e93f1f5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 257eb00
restrict new test to only netCDF files
shoyer 7d857f3
fix whats-new message
shoyer a3556d1
skip test on windows
shoyer 7c7c4e8
Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks"
shoyer c51e81e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 89c2b55
Revert "Fix whats-new message location"
shoyer c320acb
fixups
shoyer 2cab733
fix syntax
shoyer 997c3d4
Merge branch 'main' into file-manager-not-shared
shoyer a3486c8
tweaks
shoyer d24914e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 7105ec2
fix types for mypy
shoyer 38c2a16
add uuid
shoyer 6d6e2dd
restore ref_counts
shoyer d95f9f0
Merge branch 'main' into file-manager-not-shared
shoyer a837f3b
doc tweaks
shoyer 25706eb
close files inside test_open_mfdataset_list_attr
shoyer 1466c82
remove unused itertools
shoyer 382d734
don't use refcounts
shoyer 46f4fef
re-enable ref counting
shoyer 3ec678e
cleanup
shoyer 929e5d1
Merge branch 'main' into file-manager-not-shared
dcherian fe7b3c3
Apply typing suggestions from code review
dcherian 915976d
Merge branch 'main' into file-manager-not-shared
dcherian 06c5d51
fix import of Hashable
dcherian cb16f88
Merge branch 'main' into file-manager-not-shared
dcherian e05cb3b
ignore __init__ type
shoyer 4dfbfc4
Merge branch 'main' into file-manager-not-shared
dcherian a5bf621
fix whats-new
dcherian File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,21 +3,21 @@ | |
import contextlib | ||
import io | ||
import threading | ||
import uuid | ||
import warnings | ||
from typing import Any | ||
from typing import Any, Hashable | ||
|
||
from ..core import utils | ||
from ..core.options import OPTIONS | ||
from .locks import acquire | ||
from .lru_cache import LRUCache | ||
|
||
# Global cache for storing open files. | ||
FILE_CACHE: LRUCache[str, io.IOBase] = LRUCache( | ||
FILE_CACHE: LRUCache[Any, io.IOBase] = LRUCache( | ||
maxsize=OPTIONS["file_cache_maxsize"], on_evict=lambda k, v: v.close() | ||
) | ||
assert FILE_CACHE.maxsize, "file cache must be at least size one" | ||
|
||
|
||
REF_COUNTS: dict[Any, int] = {} | ||
|
||
_DEFAULT_MODE = utils.ReprObject("<unused>") | ||
|
@@ -85,12 +85,13 @@ def __init__( | |
kwargs=None, | ||
lock=None, | ||
cache=None, | ||
manager_id: Hashable | None = None, | ||
ref_counts=None, | ||
): | ||
"""Initialize a FileManager. | ||
"""Initialize a CachingFileManager. | ||
|
||
The cache and ref_counts arguments exist solely to facilitate | ||
dependency injection, and should only be set for tests. | ||
The cache, manager_id and ref_counts arguments exist solely to | ||
facilitate dependency injection, and should only be set for tests. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -120,6 +121,8 @@ def __init__( | |
global variable and contains non-picklable file objects, an | ||
unpickled FileManager objects will be restored with the default | ||
cache. | ||
manager_id : hashable, optional | ||
Identifier for this CachingFileManager. | ||
ref_counts : dict, optional | ||
Optional dict to use for keeping track the number of references to | ||
the same file. | ||
|
@@ -129,13 +132,17 @@ def __init__( | |
self._mode = mode | ||
self._kwargs = {} if kwargs is None else dict(kwargs) | ||
|
||
self._default_lock = lock is None or lock is False | ||
self._lock = threading.Lock() if self._default_lock else lock | ||
self._use_default_lock = lock is None or lock is False | ||
self._lock = threading.Lock() if self._use_default_lock else lock | ||
|
||
# cache[self._key] stores the file associated with this object. | ||
if cache is None: | ||
cache = FILE_CACHE | ||
self._cache = cache | ||
if manager_id is None: | ||
# Each call to CachingFileManager should separately open files. | ||
manager_id = str(uuid.uuid4()) | ||
self._manager_id = manager_id | ||
self._key = self._make_key() | ||
|
||
# ref_counts[self._key] stores the number of CachingFileManager objects | ||
|
@@ -153,6 +160,7 @@ def _make_key(self): | |
self._args, | ||
"a" if self._mode == "w" else self._mode, | ||
tuple(sorted(self._kwargs.items())), | ||
self._manager_id, | ||
) | ||
return _HashedSequence(value) | ||
|
||
|
@@ -223,20 +231,14 @@ def close(self, needs_lock=True): | |
if file is not None: | ||
file.close() | ||
|
||
def __del__(self): | ||
# If we're the only CachingFileManger referencing a unclosed file, we | ||
# should remove it from the cache upon garbage collection. | ||
def __del__(self) -> None: | ||
# If we're the only CachingFileManger referencing a unclosed file, | ||
# remove it from the cache upon garbage collection. | ||
# | ||
# Keeping our own count of file references might seem like overkill, | ||
# but it's actually pretty common to reopen files with the same | ||
# variable name in a notebook or command line environment, e.g., to | ||
# fix the parameters used when opening a file: | ||
# >>> ds = xarray.open_dataset('myfile.nc') | ||
# >>> ds = xarray.open_dataset('myfile.nc', decode_times=False) | ||
# This second assignment to "ds" drops CPython's ref-count on the first | ||
# "ds" argument to zero, which can trigger garbage collections. So if | ||
# we didn't check whether another object is referencing 'myfile.nc', | ||
# the newly opened file would actually be immediately closed! | ||
# We keep track of our own reference count because we don't want to | ||
# close files if another identical file manager needs it. This can | ||
# happen if a CachingFileManager is pickled and unpickled without | ||
# closing the original file. | ||
ref_count = self._ref_counter.decrement(self._key) | ||
|
||
if not ref_count and self._key in self._cache: | ||
|
@@ -249,30 +251,40 @@ def __del__(self): | |
|
||
if OPTIONS["warn_for_unclosed_files"]: | ||
warnings.warn( | ||
"deallocating {}, but file is not already closed. " | ||
"This may indicate a bug.".format(self), | ||
f"deallocating {self}, but file is not already closed. " | ||
"This may indicate a bug.", | ||
RuntimeWarning, | ||
stacklevel=2, | ||
) | ||
|
||
def __getstate__(self): | ||
"""State for pickling.""" | ||
# cache and ref_counts are intentionally omitted: we don't want to try | ||
# to serialize these global objects. | ||
lock = None if self._default_lock else self._lock | ||
return (self._opener, self._args, self._mode, self._kwargs, lock) | ||
# cache is intentionally omitted: we don't want to try to serialize | ||
# these global objects. | ||
lock = None if self._use_default_lock else self._lock | ||
return ( | ||
self._opener, | ||
self._args, | ||
self._mode, | ||
self._kwargs, | ||
lock, | ||
self._manager_id, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know enough what exactly this is used for, but make sure that you don't need to do a similar thing as for lock (replace with None in case it is default). |
||
) | ||
|
||
def __setstate__(self, state): | ||
def __setstate__(self, state) -> None: | ||
"""Restore from a pickle.""" | ||
opener, args, mode, kwargs, lock = state | ||
self.__init__(opener, *args, mode=mode, kwargs=kwargs, lock=lock) | ||
opener, args, mode, kwargs, lock, manager_id = state | ||
self.__init__( # type: ignore | ||
opener, *args, mode=mode, kwargs=kwargs, lock=lock, manager_id=manager_id | ||
) | ||
|
||
def __repr__(self): | ||
def __repr__(self) -> str: | ||
args_string = ", ".join(map(repr, self._args)) | ||
if self._mode is not _DEFAULT_MODE: | ||
args_string += f", mode={self._mode!r}" | ||
return "{}({!r}, {}, kwargs={})".format( | ||
type(self).__name__, self._opener, args_string, self._kwargs | ||
return ( | ||
f"{type(self).__name__}({self._opener!r}, {args_string}, " | ||
f"kwargs={self._kwargs}, manager_id={self._manager_id!r})" | ||
) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Any's can be replaced with narrower versions, I couldn't figure them out on a quick glance.