diff --git a/openml/_api_calls.py b/openml/_api_calls.py index f039bb7c3..d451ac5c8 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -155,7 +155,7 @@ def _read_url_files(url, data=None, file_elements=None): def __read_url(url, request_method, data=None, md5_checksum=None): data = {} if data is None else data - if config.apikey is not None: + if config.apikey: data["api_key"] = config.apikey return _send_request( request_method=request_method, url=url, data=data, md5_checksum=md5_checksum diff --git a/openml/config.py b/openml/config.py index 237e71170..b9a9788ac 100644 --- a/openml/config.py +++ b/openml/config.py @@ -7,6 +7,8 @@ import logging import logging.handlers import os +from pathlib import Path +import platform from typing import Tuple, cast from io import StringIO @@ -19,7 +21,7 @@ file_handler = None -def _create_log_handlers(): +def _create_log_handlers(create_file_handler=True): """ Creates but does not attach the log handlers. """ global console_handler, file_handler if console_handler is not None or file_handler is not None: @@ -32,12 +34,13 @@ def _create_log_handlers(): console_handler = logging.StreamHandler() console_handler.setFormatter(output_formatter) - one_mb = 2 ** 20 - log_path = os.path.join(cache_directory, "openml_python.log") - file_handler = logging.handlers.RotatingFileHandler( - log_path, maxBytes=one_mb, backupCount=1, delay=True - ) - file_handler.setFormatter(output_formatter) + if create_file_handler: + one_mb = 2 ** 20 + log_path = os.path.join(cache_directory, "openml_python.log") + file_handler = logging.handlers.RotatingFileHandler( + log_path, maxBytes=one_mb, backupCount=1, delay=True + ) + file_handler.setFormatter(output_formatter) def _convert_log_levels(log_level: int) -> Tuple[int, int]: @@ -83,15 +86,18 @@ def set_file_log_level(file_output_level: int): # Default values (see also /~https://github.com/openml/OpenML/wiki/Client-API-Standards) _defaults = { - "apikey": None, + "apikey": "", "server": "https://www.openml.org/api/v1/xml", - "cachedir": os.path.expanduser(os.path.join("~", ".openml", "cache")), + "cachedir": ( + os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",)) + if platform.system() == "Linux" + else os.path.join("~", ".openml") + ), "avoid_duplicate_runs": "True", - "connection_n_retries": 10, - "max_retries": 20, + "connection_n_retries": "10", + "max_retries": "20", } -config_file = os.path.expanduser(os.path.join("~", ".openml", "config")) # Default values are actually added here in the _setup() function which is # called at the end of this module @@ -116,8 +122,8 @@ def get_server_base_url() -> str: avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False # Number of retries if the connection breaks -connection_n_retries = _defaults["connection_n_retries"] -max_retries = _defaults["max_retries"] +connection_n_retries = int(_defaults["connection_n_retries"]) +max_retries = int(_defaults["max_retries"]) class ConfigurationForExamples: @@ -187,30 +193,53 @@ def _setup(): global connection_n_retries global max_retries - # read config file, create cache directory - try: - os.mkdir(os.path.expanduser(os.path.join("~", ".openml"))) - except FileExistsError: - # For other errors, we want to propagate the error as openml does not work without cache - pass + if platform.system() == "Linux": + config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml")) + else: + config_dir = Path("~") / ".openml" + # Still use os.path.expanduser to trigger the mock in the unit test + config_dir = Path(os.path.expanduser(config_dir)) + config_file = config_dir / "config" + + # read config file, create directory for config file + if not os.path.exists(config_dir): + try: + os.mkdir(config_dir) + cache_exists = True + except PermissionError: + cache_exists = False + else: + cache_exists = True + + if cache_exists: + _create_log_handlers() + else: + _create_log_handlers(create_file_handler=False) + openml_logger.warning( + "No permission to create OpenML directory at %s! This can result in OpenML-Python " + "not working properly." % config_dir + ) - config = _parse_config() + config = _parse_config(config_file) apikey = config.get("FAKE_SECTION", "apikey") server = config.get("FAKE_SECTION", "server") - short_cache_dir = config.get("FAKE_SECTION", "cachedir") - cache_directory = os.path.expanduser(short_cache_dir) + cache_dir = config.get("FAKE_SECTION", "cachedir") + cache_directory = os.path.expanduser(cache_dir) # create the cache subdirectory - try: - os.mkdir(cache_directory) - except FileExistsError: - # For other errors, we want to propagate the error as openml does not work without cache - pass + if not os.path.exists(cache_directory): + try: + os.mkdir(cache_directory) + except PermissionError: + openml_logger.warning( + "No permission to create openml cache directory at %s! This can result in " + "OpenML-Python not working properly." % cache_directory + ) avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs") - connection_n_retries = config.get("FAKE_SECTION", "connection_n_retries") - max_retries = config.get("FAKE_SECTION", "max_retries") + connection_n_retries = int(config.get("FAKE_SECTION", "connection_n_retries")) + max_retries = int(config.get("FAKE_SECTION", "max_retries")) if connection_n_retries > max_retries: raise ValueError( "A higher number of retries than {} is not allowed to keep the " @@ -218,31 +247,24 @@ def _setup(): ) -def _parse_config(): +def _parse_config(config_file: str): """ Parse the config file, set up defaults. """ config = configparser.RawConfigParser(defaults=_defaults) - if not os.path.exists(config_file): - # Create an empty config file if there was none so far - fh = open(config_file, "w") - fh.close() - logger.info( - "Could not find a configuration file at %s. Going to " - "create an empty file there." % config_file - ) - + # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. + # Cheat the ConfigParser module by adding a fake section header + config_file_ = StringIO() + config_file_.write("[FAKE_SECTION]\n") try: - # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. - # Cheat the ConfigParser module by adding a fake section header - config_file_ = StringIO() - config_file_.write("[FAKE_SECTION]\n") with open(config_file) as fh: for line in fh: config_file_.write(line) - config_file_.seek(0) - config.read_file(config_file_) + except FileNotFoundError: + logger.info("No config file found at %s, using default configuration.", config_file) except OSError as e: - logger.info("Error opening file %s: %s", config_file, e.message) + logger.info("Error opening file %s: %s", config_file, e.args[0]) + config_file_.seek(0) + config.read_file(config_file_) return config @@ -257,11 +279,7 @@ def get_cache_directory(): """ url_suffix = urlparse(server).netloc reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) - if not cache_directory: - _cachedir = _defaults(cache_directory) - else: - _cachedir = cache_directory - _cachedir = os.path.join(_cachedir, reversed_url_suffix) + _cachedir = os.path.join(cache_directory, reversed_url_suffix) return _cachedir @@ -297,4 +315,3 @@ def set_cache_directory(cachedir): ] _setup() -_create_log_handlers() diff --git a/openml/testing.py b/openml/testing.py index 31bd87b9a..f8e22bb4c 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -8,15 +8,8 @@ import time from typing import Dict, Union, cast import unittest -import warnings import pandas as pd -# Currently, importing oslo raises a lot of warning that it will stop working -# under python3.8; remove this once they disappear -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - from oslo_concurrency import lockutils - import openml from openml.tasks import TaskType from openml.exceptions import OpenMLServerException @@ -100,13 +93,6 @@ def setUp(self, n_levels: int = 1): openml.config.avoid_duplicate_runs = False openml.config.cache_directory = self.workdir - # If we're on travis, we save the api key in the config file to allow - # the notebook tests to read them. - if os.environ.get("TRAVIS") or os.environ.get("APPVEYOR"): - with lockutils.external_lock("config", lock_path=self.workdir): - with open(openml.config.config_file, "w") as fh: - fh.write("apikey = %s" % openml.config.apikey) - # Increase the number of retries to avoid spurious server failures self.connection_n_retries = openml.config.connection_n_retries openml.config.connection_n_retries = 10 diff --git a/openml/utils.py b/openml/utils.py index 96102f5dd..a482bf0bc 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -244,7 +244,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters): limit=batch_size, offset=current_offset, output_format=output_format, - **active_filters + **active_filters, ) except openml.exceptions.OpenMLServerNoResult: # we want to return an empty dict in this case @@ -277,9 +277,11 @@ def _create_cache_directory(key): cache = config.get_cache_directory() cache_dir = os.path.join(cache, key) try: - os.makedirs(cache_dir) - except OSError: - pass + os.makedirs(cache_dir, exist_ok=True) + except Exception as e: + raise openml.exceptions.OpenMLCacheException( + f"Cannot create cache directory {cache_dir}." + ) from e return cache_dir diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 88136dbd9..73507aabb 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -1,15 +1,29 @@ # License: BSD 3-Clause +import tempfile import os +import unittest.mock import openml.config import openml.testing class TestConfig(openml.testing.TestBase): - def test_config_loading(self): - self.assertTrue(os.path.exists(openml.config.config_file)) - self.assertTrue(os.path.isdir(os.path.expanduser("~/.openml"))) + @unittest.mock.patch("os.path.expanduser") + @unittest.mock.patch("openml.config.openml_logger.warning") + @unittest.mock.patch("openml.config._create_log_handlers") + def test_non_writable_home(self, log_handler_mock, warnings_mock, expanduser_mock): + with tempfile.TemporaryDirectory(dir=self.workdir) as td: + expanduser_mock.side_effect = ( + os.path.join(td, "openmldir"), + os.path.join(td, "cachedir"), + ) + os.chmod(td, 0o444) + openml.config._setup() + + self.assertEqual(warnings_mock.call_count, 2) + self.assertEqual(log_handler_mock.call_count, 1) + self.assertFalse(log_handler_mock.call_args_list[0][1]["create_file_handler"]) class TestConfigurationForExamples(openml.testing.TestBase): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index b5ef7b2bf..2a6d44f2d 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -1,12 +1,11 @@ -from openml.testing import TestBase +import os +import tempfile +import unittest.mock + import numpy as np -import openml -import sys -if sys.version_info[0] >= 3: - from unittest import mock -else: - import mock +import openml +from openml.testing import TestBase class OpenMLTaskTest(TestBase): @@ -20,7 +19,7 @@ def mocked_perform_api_call(call, request_method): def test_list_all(self): openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) - @mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) + @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) def test_list_all_few_results_available(self, _perform_api_call): # we want to make sure that the number of api calls is only 1. # Although we have multiple versions of the iris dataset, there is only @@ -86,3 +85,18 @@ def test_list_all_for_evaluations(self): # might not be on test server after reset, please rerun test at least once if fails self.assertEqual(len(evaluations), required_size) + + @unittest.mock.patch("openml.config.get_cache_directory") + def test__create_cache_directory(self, config_mock): + with tempfile.TemporaryDirectory(dir=self.workdir) as td: + config_mock.return_value = td + openml.utils._create_cache_directory("abc") + self.assertTrue(os.path.exists(os.path.join(td, "abc"))) + subdir = os.path.join(td, "def") + os.mkdir(subdir) + os.chmod(subdir, 0o444) + config_mock.return_value = subdir + with self.assertRaisesRegex( + openml.exceptions.OpenMLCacheException, r"Cannot create cache directory", + ): + openml.utils._create_cache_directory("ghi")