Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Unique file ids
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkgr committed Aug 5, 2021
1 parent b7d4a92 commit 153bade
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions allennlp/common/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,13 @@ def _serialize(data):
return np.frombuffer(buffer, dtype=np.uint8)


_active_tensor_caches: MutableMapping[str, "TensorCache"] = weakref.WeakValueDictionary()
_active_tensor_caches: MutableMapping[int, "TensorCache"] = weakref.WeakValueDictionary()


def _unique_file_id(path: Union[str, PathLike]) -> int:
result = os.stat(path).st_ino
assert result != 0
return result


class TensorCache(MutableMapping[str, Tensor], ABC):
Expand All @@ -584,7 +590,10 @@ def __new__(cls, filename: Union[str, PathLike], *, read_only: bool = False, **k
# This mechanism makes sure we re-use open lmdb file handles. Lmdb has a problem when the same file is
# opened by the same process multiple times. This is our workaround.
filename = str(filename)
result = _active_tensor_caches.get(filename)
try:
result = _active_tensor_caches.get(_unique_file_id(filename))
except FileNotFoundError:
result = None
if result is None:
result = super(TensorCache, cls).__new__(
cls, filename, read_only=read_only, **kwargs
Expand Down Expand Up @@ -691,7 +700,7 @@ def __init__(
readonly=read_only,
lock=use_lock,
)
_active_tensor_caches[self.lmdb_env.path()] = self
_active_tensor_caches[_unique_file_id(filename)] = self

# We have another cache here that makes sure we return the same object for the same key. Without it,
# you would get a different tensor, using different memory, every time you call __getitem__(), even
Expand Down

0 comments on commit 153bade

Please sign in to comment.