From e4bb3b608dacb711318e77e0a404f582d5df0506 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Tue, 29 Mar 2022 17:36:26 -0700 Subject: [PATCH] unify into ZipStorage --- src/sourmash/index/__init__.py | 2 +- src/sourmash/sbt.py | 10 +++--- src/sourmash/sbt_storage.py | 65 +++++++++++++++++++++------------- src/sourmash/sourmash_args.py | 2 +- tests/test_sbt.py | 2 +- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py index 1feddd0852..5fa521db66 100644 --- a/src/sourmash/index/__init__.py +++ b/src/sourmash/index/__init__.py @@ -607,7 +607,7 @@ def load(cls, location, traverse_yield_all=False, use_manifest=True): if not os.path.exists(location): raise FileNotFoundError(location) - storage = ZipStorage(location, read_only=True) + storage = ZipStorage(location) return cls(storage, traverse_yield_all=traverse_yield_all, use_manifest=use_manifest) diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py index d8f816edbc..bb001fd940 100644 --- a/src/sourmash/sbt.py +++ b/src/sourmash/sbt.py @@ -625,7 +625,7 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False): kind = "Zip" if not path.endswith('.sbt.zip'): path += '.sbt.zip' - storage = ZipStorage(path, read_only=False) + storage = ZipStorage(path, mode="w") backend = "FSStorage" assert path[-8:] == '.sbt.zip' @@ -1435,12 +1435,13 @@ def filter_distance(filter_a, filter_b, n=1000): def convert_cmd(name, backend): "Convert an SBT to use a different back end." from .sbtmh import SigLeaf - from .sbt_storage import RwZipStorage options = backend.split('(') backend = options.pop(0) backend = backend.lower().strip("'") + kwargs = {} + if options: print(options) options = options[0].split(')') @@ -1454,7 +1455,8 @@ def convert_cmd(name, backend): elif backend.lower() in ('redis', 'redisstorage'): backend = RedisStorage elif backend.lower() in ('zip', 'zipstorage'): - backend = RwZipStorage + backend = ZipStorage + kwargs['mode'] = 'w' elif backend.lower() in ('fs', 'fsstorage'): backend = FSStorage if options: @@ -1470,6 +1472,6 @@ def convert_cmd(name, backend): else: error('backend not recognized: {}'.format(backend)) - with backend(*options) as storage: + with backend(*options, **kwargs) as storage: sbt = SBT.load(name, leaf_loader=SigLeaf.load) sbt.save(name, storage=storage) diff --git a/src/sourmash/sbt_storage.py b/src/sourmash/sbt_storage.py index 08d6ee6693..398e11c877 100644 --- a/src/sourmash/sbt_storage.py +++ b/src/sourmash/sbt_storage.py @@ -94,40 +94,45 @@ def load(self, path): return path.read_bytes() -class ZipStorage(Storage): +class ZipStorage(RustObject, Storage): - def __new__(self, path, *, read_only=True): - if read_only: - return RustZipStorage(path) + __dealloc_func__ = lib.zipstorage_free + + def __init__(self, path, *, mode="r"): + if mode == "w": + self.__inner = _RwZipStorage(path) else: - return RwZipStorage(path) + self.__inner = None + path = os.path.abspath(path) + self._objptr = rustcall(lib.zipstorage_new, to_bytes(path), len(path)) @staticmethod def can_open(location): return zipfile.is_zipfile(location) - -class RustZipStorage(RustObject, Storage): - - __dealloc_func__ = lib.zipstorage_free - - def __init__(self, path): - path = os.path.abspath(path) - self._objptr = rustcall(lib.zipstorage_new, to_bytes(path), len(path)) - @property def path(self): + if self.__inner: + return self.__inner.path return decode_str(self._methodcall(lib.zipstorage_path)) @property def subdir(self): + if self.__inner: + return self.__inner.subdir return decode_str(self._methodcall(lib.zipstorage_subdir)) @subdir.setter - def name(self, value): - self._methodcall(lib.zipstorage_set_subdir, to_bytes(value), len(value)) + def subdir(self, value): + if self.__inner: + self.__inner.subdir = value + else: + self._methodcall(lib.zipstorage_set_subdir, to_bytes(value), len(value)) def _filenames(self): + if self.__inner: + return self.__inner._filenames() + size = ffi.new("uintptr_t *") paths_ptr = self._methodcall(lib.zipstorage_filenames, size) size = size[0] @@ -140,9 +145,14 @@ def _filenames(self): return paths def save(self, path, content, *, overwrite=False, compress=False): + if self.__inner: + return self.__inner.save(path, content, overwrite=overwrite, compress=compress) raise NotImplementedError() def load(self, path): + if self.__inner: + return self.__inner.load(path) + try: size = ffi.new("uintptr_t *") rawbuf = self._methodcall(lib.zipstorage_load, to_bytes(path), len(path), size) @@ -157,6 +167,9 @@ def load(self, path): raise FileNotFoundError(path) def list_sbts(self): + if self.__inner: + return self.__inner.list_sbts() + size = ffi.new("uintptr_t *") paths_ptr = self._methodcall(lib.zipstorage_list_sbts, size) size = size[0] @@ -168,12 +181,23 @@ def list_sbts(self): return paths + def init_args(self): + return {'path': self.path} + + def flush(self): + if self.__inner: + self.__inner.flush() + + def close(self): + if self.__inner: + self.__inner.close() + @staticmethod def can_open(location): return zipfile.is_zipfile(location) -class RwZipStorage(Storage): +class _RwZipStorage(Storage): def __init__(self, path): self.path = os.path.abspath(path) @@ -296,9 +320,6 @@ def load(self, path): else: raise FileNotFoundError(path) - def init_args(self): - return {'path': self.path} - def close(self): # TODO: this is not ideal; checking for zipfile.fp is looking at # internal implementation details from CPython... @@ -378,10 +399,6 @@ def list_sbts(self): def __del__(self): self.close() - @staticmethod - def can_open(location): - return zipfile.is_zipfile(location) - class IPFSStorage(Storage): diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index 08b03e8abf..c98b4fce0a 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -963,7 +963,7 @@ def open(self): if os.path.exists(self.location): do_create = False - storage = ZipStorage(self.location, read_only=False) + storage = ZipStorage(self.location, mode="w") if not storage.subdir: storage.subdir = 'signatures' diff --git a/tests/test_sbt.py b/tests/test_sbt.py index 7f4f214e08..d188cfa291 100644 --- a/tests/test_sbt.py +++ b/tests/test_sbt.py @@ -367,7 +367,7 @@ def test_sbt_zipstorage(tmpdir): old_result = {str(s.signature) for s in tree.find(search_obj, to_search.data)} print(*old_result, sep='\n') - with ZipStorage(str(tmpdir.join("tree.sbt.zip")), read_only=False) as storage: + with ZipStorage(str(tmpdir.join("tree.sbt.zip")), mode="w") as storage: tree.save(str(tmpdir.join("tree.sbt.json")), storage=storage) with ZipStorage(str(tmpdir.join("tree.sbt.zip"))) as storage: