From c9c86557cc996c74877bed1158471b225456554f Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Mon, 10 Jun 2024 17:45:03 +0200 Subject: [PATCH 1/2] [Streaming] retry on requests errors --- src/datasets/utils/file_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py index 261ea258992..a2db0ec8cab 100644 --- a/src/datasets/utils/file_utils.py +++ b/src/datasets/utils/file_utils.py @@ -1103,7 +1103,12 @@ def read_with_retries(*args, **kwargs): try: out = read(*args, **kwargs) break - except (ClientError, TimeoutError) as err: + except ( + ClientError, + TimeoutError, + requests.exceptions.ConnectTimeout, + requests.exceptions.ConnectionError, + ) as err: disconnect_err = err logger.warning( f"Got disconnected from remote data host. Retrying in {config.STREAMING_READ_RETRY_INTERVAL}sec [{retry}/{max_retries}]" From 6cd13c11c876de387f48370c667b9ba5ac65418b Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 28 Jun 2024 11:34:48 +0200 Subject: [PATCH 2/2] lucain's comment --- src/datasets/utils/file_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py index 41b0a48cd44..2fb6ca20438 100644 --- a/src/datasets/utils/file_utils.py +++ b/src/datasets/utils/file_utils.py @@ -4,6 +4,7 @@ Copyright by the AllenNLP authors. """ +import asyncio import copy import glob import io @@ -20,7 +21,6 @@ import warnings import xml.dom.minidom import zipfile -from asyncio import TimeoutError from contextlib import closing, contextmanager from functools import partial from io import BytesIO @@ -31,10 +31,10 @@ from urllib.parse import urljoin, urlparse from xml.etree import ElementTree as ET +import aiohttp.client_exceptions import fsspec import huggingface_hub import requests -from aiohttp.client_exceptions import ClientError from fsspec.core import strip_protocol, url_to_fs from fsspec.utils import can_be_local from huggingface_hub.utils import EntryNotFoundError, insecure_hashlib @@ -1094,8 +1094,8 @@ def read_with_retries(*args, **kwargs): out = read(*args, **kwargs) break except ( - ClientError, - TimeoutError, + aiohttp.client_exceptions.ClientError, + asyncio.TimeoutError, requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, ) as err: