Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Node: add the copy_tree method #5114

Merged
merged 2 commits into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions aiida/orm/nodes/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,14 @@ def walk(self, path: FilePath = None) -> Iterable[Tuple[pathlib.PurePosixPath, L
"""
yield from self._repository.walk(path)

def copy_tree(self, target: Union[str, pathlib.Path], path: FilePath = None) -> None:
"""Copy the contents of the entire node repository to another location on the local file system.

:param target: absolute path of the directory where to copy the contents to.
:param path: optional relative path whose contents to copy.
"""
self._repository.copy_tree(target, path)

def delete_object(self, path: str):
"""Delete the object from the repository.

Expand Down
37 changes: 37 additions & 0 deletions aiida/repository/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,3 +473,40 @@ def walk(self, path: FilePath = None) -> Iterable[Tuple[pathlib.PurePosixPath, L
yield from self.walk(path / dirname)

yield path, dirnames, filenames

def copy_tree(self, target: Union[str, pathlib.Path], path: FilePath = None) -> None:
"""Copy the contents of the entire node repository to another location on the local file system.

:param target: absolute path of the directory where to copy the contents to.
:param path: optional relative path whose contents to copy.
:raises TypeError: if ``target`` is of incorrect type or not absolute.
:raises NotADirectoryError: if ``path`` does not reference a directory.
"""
path = self._pre_process_path(path)
file_object = self.get_object(path)

if file_object.file_type != FileType.DIRECTORY:
raise NotADirectoryError(f'object with path `{path}` is not a directory.')

if isinstance(target, str):
target = pathlib.Path(target)

if not isinstance(target, pathlib.Path):
raise TypeError(f'path `{path}` is not of type `str` nor `pathlib.Path`.')

if not target.is_absolute():
raise TypeError(f'provided target `{target}` is not an absolute path.')

for root, dirnames, filenames in self.walk(path):
for dirname in dirnames:
dirpath = target / root / dirname
dirpath.mkdir(parents=True, exist_ok=True)

for filename in filenames:
dirpath = target / root
filepath = dirpath / filename

dirpath.mkdir(parents=True, exist_ok=True)

with self.open(root / filename) as handle:
filepath.write_bytes(handle.read())
52 changes: 52 additions & 0 deletions docs/source/topics/repository.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,57 @@ Note that these methods can only be used to read content from the repository and
To write files to the repository, use the methods that are described in the section on :ref:`writing to the repository <topics:repository:writing>`.


.. _topics:repository:copying:

Copying from the repository
===========================

If you want to copy specific files from a node's repository, the section on :ref:`reading from the repository<topics:repository:reading>` shows how to read their content which can then be written elsewhere.
However, sometimes you want to copy the entire contents of the node's repository, or a subdirectory of it.
The :meth:`~aiida.orm.nodes.repository.NodeRepositoryMixin.copy_tree` method makes this easy and can be used as follows:

.. code:: python

node.copy_tree('/some/target/directory')

which will write the entire repository content of ``node`` to the directory ``/some/target/directory`` on the local file system.
If you only want to copy a particular subdirectory of the repository, you can pass this as the second ``path`` argument:

.. code:: python

node.copy_tree('/some/target/directory', path='sub/directory')

This method, combined with :meth:`~aiida.orm.nodes.repository.NodeRepositoryMixin.put_object_from_tree`, makes it easy to copy the entire repository content (or a subdirectory) from one node to another:

.. code:: python

import tempfile
node_source = load_node(<PK>)
node_target = Node()

with tempfile.TemporaryDirectory() as dirpath:
node_source.copy_tree(dirpath)
node_target.put_object_from_tree(dirpath)

Note that this method is not the most efficient as the files are first written from ``node_a`` to a temporary directory on disk, before they are read in memory again and written to the repository of ``node_b``.
There is a more efficient method which requires a bit more code and that directly uses the :meth:`~aiida.orm.nodes.repository.NodeRepositoryMixin.walk` method explained in the section on :ref:`listing repository content <topics:repository:listing>`.

.. code:: python

node_source = load_node(<PK>)
node_target = Node()

for root, dirnames, filenames in node_source.walk():
for filename in filenames:
filepath = root / filename
with node_source.open(filepath) as handle:
node_target.put_object_from_filelike(handle, filepath)

.. note:: In the example above, only the files are explicitly copied over.
Any intermediate nested directories will be automatically created in the virtual hierarchy.
However, currently it is not possible to create a directory explicitly.
Empty directories are not yet supported.


.. |os.walk| replace:: ``os.walk`` method of the Python standard library
.. _os.walk: https://docs.python.org/3/library/os.html#os.walk
15 changes: 15 additions & 0 deletions tests/orm/node/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,18 @@ def test_walk():
(pathlib.Path('.'), ['relative'], []),
(pathlib.Path('relative'), [], ['path']),
]


@pytest.mark.usefixtures('clear_database_before_test')
def test_copy_tree(tmp_path):
"""Test the ``Repository.copy_tree`` method."""
node = Data()
node.put_object_from_filelike(io.BytesIO(b'content'), 'relative/path')

node.copy_tree(tmp_path)
dirpath = pathlib.Path(tmp_path / 'relative')
filepath = dirpath / 'path'
assert dirpath.is_dir()
assert filepath.is_file()
with node.open('relative/path', 'rb') as handle:
assert filepath.read_bytes() == handle.read()
51 changes: 51 additions & 0 deletions tests/repository/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import contextlib
import io
import pathlib
import typing as t

import pytest

Expand Down Expand Up @@ -57,6 +58,20 @@ def repository_uninitialised(request, tmp_path_factory) -> Repository:
yield repository


@pytest.fixture(scope='function', params=[True, False])
def tmp_path_parametrized(request, tmp_path_factory) -> t.Union[str, pathlib.Path]:
"""Indirect parametrized fixture that returns temporary path both as ``str`` and as ``pathlib.Path``.

This is a useful fixture to automatically parametrize a test for a method that accepts both types.
"""
tmp_path = tmp_path_factory.mktemp('target')

if request.param:
tmp_path = str(tmp_path)

yield tmp_path


def test_uuid(repository_uninitialised):
"""Test the ``uuid`` property."""
repository = repository_uninitialised
Expand Down Expand Up @@ -566,6 +581,42 @@ def test_walk(repository, generate_directory):
]


@pytest.mark.parametrize('path', ('.', 'relative'))
def test_copy_tree(repository, generate_directory, tmp_path_parametrized, path):
"""Test the ``Repository.copy_tree`` method."""
directory = generate_directory({'file_a': None, 'relative': {'file_b': None, 'sub': {'file_c': None}}})
repository.put_object_from_tree(str(directory))

repository.copy_tree(tmp_path_parametrized, path=path)
for root, dirnames, filenames in repository.walk(path):
for dirname in dirnames:
assert pathlib.Path(tmp_path_parametrized / root / dirname).is_dir()
for filename in filenames:
filepath = pathlib.Path(tmp_path_parametrized / root / filename)
assert filepath.is_file()
with repository.open(root / filename) as handle:
assert filepath.read_bytes() == handle.read()


@pytest.mark.parametrize(('argument', 'value', 'exception', 'match'), (
('target', None, TypeError, r'path .* is not of type `str` nor `pathlib.Path`.'),
('target', 'relative/path', TypeError, r'provided target `.*` is not an absolute path.'),
('target', pathlib.Path('.'), TypeError, r'provided target `.*` is not an absolute path.'),
('path', pathlib.Path('file_a'), NotADirectoryError, r'object with path `.*` is not a directory.'),
))
def test_copy_tree_invalid(tmp_path, repository, generate_directory, argument, value, exception, match):
"""Test the ``Repository.copy_tree`` method for invalid input."""
directory = generate_directory({'file_a': None})
repository.put_object_from_tree(str(directory))

if argument == 'target':
with pytest.raises(exception, match=match):
repository.copy_tree(target=value)
else:
with pytest.raises(exception, match=match):
repository.copy_tree(target=tmp_path, path=value)


def test_clone(repository, generate_directory):
"""Test the ``Repository.clone`` method."""
directory = generate_directory({'file_a': None, 'relative': {'file_b': None, 'sub': {'file_c': None}}})
Expand Down