Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add/1034 #1352

Merged
merged 10 commits into from
Sep 22, 2024
Prev Previous commit
Next Next commit
Ensure metadata is downloaded to verify dataset is processed
  • Loading branch information
PGijsbers committed Sep 19, 2024
commit 879bd251f0d140f197704b600652c79126d98f1c
24 changes: 7 additions & 17 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,28 +1188,18 @@ def test_publish_fetch_ignore_attribute(self):
downloaded_dataset = self._wait_for_dataset_being_processed(dataset.id)
assert downloaded_dataset.ignore_attribute == ignore_attribute

def _wait_for_dataset_being_processed(self, dataset_id):
downloaded_dataset = None
# fetching from server
# loop till timeout or fetch not successful
max_waiting_time_seconds = 600
# time.time() works in seconds
def _wait_for_dataset_being_processed(self, dataset_id, poll_delay:int=10,max_waiting_time_seconds:int=600):
start_time = time.time()
while time.time() - start_time < max_waiting_time_seconds:
while (time.time() - start_time) < max_waiting_time_seconds:
try:
downloaded_dataset = openml.datasets.get_dataset(dataset_id)
break
# being able to download qualities is a sign that the dataset is processed
return openml.datasets.get_dataset(dataset_id, download_qualities=True)
except OpenMLServerException as e:
# returned code 273: Dataset not processed yet
# returned code 362: No qualities found
TestBase.logger.error(
f"Failed to fetch dataset:{dataset_id} with '{e!s}'.",
)
time.sleep(10)
continue
if downloaded_dataset is None:
raise ValueError(f"TIMEOUT: Failed to fetch uploaded dataset - {dataset_id}")
return downloaded_dataset
time.sleep(poll_delay)
raise ValueError(f"TIMEOUT: Failed to fetch uploaded dataset - {dataset_id}")

def test_create_dataset_row_id_attribute_error(self):
# meta-information
Expand Down Expand Up @@ -1900,7 +1890,7 @@ def _assert_datasets_retrieved_successfully( dids: Iterable[int], with_qualities
assert has_data if with_data else not has_data

@pytest.fixture()
def isolate_for_test(tmp_path):
def isolate_for_test():
t = TestOpenMLDataset()
t.setUp(tmpdir_suffix=uuid.uuid4().hex)
yield
Expand Down