Skip to content

Commit

Permalink
Improve the backup mechanism of the configuration file
Browse files Browse the repository at this point in the history
The `Config` class was creating a new backup each time `store` was
called, which also happens if nothing really changed to the configuration
content. With the recently introduced change that backup files are now
made unique through a timestamp, this led to a lot of backups being
created that were essentially clones.

To improve this, the `Config.store` method now only writes the file to
disk if the contents in memory have changed. This is done by comparing
the checksum of the on disk file and that memory contents as written to
a temporary file. If the checksums differ a backup is created of the
existing file and the new contents written to the temporary file are
copied to the actual configuration file location. This latter new
approach also guarantees that the backup is always created before
overwriting the original one.
  • Loading branch information
sphuber committed Nov 27, 2019
1 parent 80bd663 commit 71479b8
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 23 deletions.
7 changes: 6 additions & 1 deletion aiida/backends/tests/utils/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def temporary_config_instance():
current_profile_name = None
temporary_config_directory = None

from aiida.common.utils import Capturing
from aiida.manage import configuration
from aiida.manage.configuration import settings, load_profile, reset_profile

Expand All @@ -75,7 +76,11 @@ def temporary_config_instance():

# Create the instance base directory structure, the config file and a dummy profile
create_instance_directories()
configuration.CONFIG = configuration.load_config(create=True)

# The constructor of `Config` called by `load_config` will print warning messages about migrating it
with Capturing():
configuration.CONFIG = configuration.load_config(create=True)

profile = create_mock_profile(name=profile_name, repository_dirpath=temporary_config_directory)

# Add the created profile and set it as the default
Expand Down
7 changes: 0 additions & 7 deletions aiida/manage/configuration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def load_config(create=False):
:rtype: :class:`~aiida.manage.configuration.config.Config`
:raises aiida.common.MissingConfigurationError: if the configuration file could not be found and create=False
"""
import io
import os
from aiida.common import exceptions
from .config import Config
Expand Down Expand Up @@ -116,12 +115,6 @@ def load_config(create=False):
if not os.path.isfile(filepath) and not create:
raise exceptions.MissingConfigurationError('configuration file {} does not exist'.format(filepath))

# If it doesn't exist, just create the file
if not os.path.isfile(filepath):
from aiida.common import json
with io.open(filepath, 'ab') as handle:
json.dump({}, handle)

try:
config = Config.from_file(filepath)
except ValueError:
Expand Down
61 changes: 46 additions & 15 deletions aiida/manage/configuration/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from .options import get_option, parse_option, NO_DEFAULT
from .profile import Profile
from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE

__all__ = ('Config',)

Expand All @@ -39,14 +38,22 @@ class Config(object): # pylint: disable=too-many-public-methods
def from_file(cls, filepath):
"""Instantiate a configuration object from the contents of a given file.
.. note:: if the filepath does not exist it will be created with the default configuration.
:param filepath: the absolute path to the configuration file
:return: `Config` instance
"""
from aiida.cmdline.utils import echo
from .migrations import check_and_migrate_config, config_needs_migrating

with io.open(filepath, 'r', encoding='utf8') as handle:
config = json.load(handle)
try:
with io.open(filepath, 'r', encoding='utf8') as handle:
config = json.load(handle)
except (IOError, OSError):
# The file does not exist so "touch" it and create an empty configuration
with io.open(filepath, 'ab'):
pass
config = {}

# If the configuration file needs to be migrated, first create a specific backup so it can easily be reverted
if config_needs_migrating(config):
Expand All @@ -73,11 +80,7 @@ def _backup(cls, filepath):
while not filepath_backup or os.path.isfile(filepath_backup):
filepath_backup = '{}.{}'.format(filepath, timezone.now().strftime('%Y%m%d-%H%M%S.%f'))

try:
umask = os.umask(DEFAULT_UMASK)
shutil.copy(filepath, filepath_backup)
finally:
os.umask(umask)
shutil.copy(filepath, filepath_backup)

return filepath_backup

Expand Down Expand Up @@ -366,16 +369,44 @@ def get_option(self, option_name, scope=None, default=True):
return value

def store(self):
"""Write the current config to file."""
if os.path.isfile(self.filepath):
self._backup(self.filepath)
"""Write the current config to file.
.. note:: if the configuration file already exists on disk it will only be overwritten if there are changes
made to the configuration in memory. In that case, a backup of the original file on disk will be created.
:return: self
"""
import tempfile
from aiida.common.files import md5_from_filelike, md5_file

# If the filepath of this configuration does not yet exist, simply write it.
if not os.path.isfile(self.filepath):
with io.open(self.filepath, 'wb') as handle:
self._write(handle)
return self

# Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on
# disk. Only when the checksums differ do we first create a backup and then overwrite the existing file.
with tempfile.NamedTemporaryFile() as handle:
self._write(handle)
handle.seek(0)

if md5_from_filelike(handle) != md5_file(self.filepath):
self._backup(self.filepath)
shutil.copy(handle.name, self.filepath)

return self

def _write(self, filelike):
"""Write the contents of `self.dictionary` to the given file handle.
:param filelike: the filelike object to write the current configuration to
"""
from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE

umask = os.umask(DEFAULT_UMASK)

try:
with io.open(self.filepath, 'wb') as handle:
json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE)
json.dump(self.dictionary, filelike, indent=DEFAULT_CONFIG_INDENT_SIZE)
finally:
os.umask(umask)

return self

0 comments on commit 71479b8

Please sign in to comment.