Skip to content

Commit

Permalink
pythonGH-89727: Fix shutil.rmtree() recursion error on deep trees (p…
Browse files Browse the repository at this point in the history
…ython#119808)

Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees.

`shutil._rmtree_unsafe()` was fixed in a150679.
  • Loading branch information
barneygale authored Jun 1, 2024
1 parent 63111bf commit 53b1981
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 97 deletions.
162 changes: 66 additions & 96 deletions Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,81 +635,76 @@ def onerror(err):
onexc(os.rmdir, path, err)

# Version using fd-based APIs to protect against races
def _rmtree_safe_fd(topfd, path, onexc):
def _rmtree_safe_fd(stack, onexc):
# Each stack item has four elements:
# * func: The first operation to perform: os.lstat, os.close or os.rmdir.
# Walking a directory starts with an os.lstat() to detect symlinks; in
# this case, func is updated before subsequent operations and passed to
# onexc() if an error occurs.
# * dirfd: Open file descriptor, or None if we're processing the top-level
# directory given to rmtree() and the user didn't supply dir_fd.
# * path: Path of file to operate upon. This is passed to onexc() if an
# error occurs.
# * orig_entry: os.DirEntry, or None if we're processing the top-level
# directory given to rmtree(). We used the cached stat() of the entry to
# save a call to os.lstat() when walking subdirectories.
func, dirfd, path, orig_entry = stack.pop()
name = path if orig_entry is None else orig_entry.name
try:
if func is os.close:
os.close(dirfd)
return
if func is os.rmdir:
os.rmdir(name, dir_fd=dirfd)
return

# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
assert func is os.lstat
if orig_entry is None:
orig_st = os.lstat(name, dir_fd=dirfd)
else:
orig_st = orig_entry.stat(follow_symlinks=False)

func = os.open # For error reporting.
topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd)

func = os.path.islink # For error reporting.
try:
if not os.path.samestat(orig_st, os.fstat(topfd)):
# Symlinks to directories are forbidden, see GH-46010.
raise OSError("Cannot call rmtree on a symbolic link")
stack.append((os.rmdir, dirfd, path, orig_entry))
finally:
stack.append((os.close, topfd, path, orig_entry))

func = os.scandir # For error reporting.
with os.scandir(topfd) as scandir_it:
entries = list(scandir_it)
except FileNotFoundError:
return
except OSError as err:
err.filename = path
onexc(os.scandir, path, err)
return
for entry in entries:
fullname = os.path.join(path, entry.name)
try:
is_dir = entry.is_dir(follow_symlinks=False)
except FileNotFoundError:
continue
except OSError:
is_dir = False
else:
if is_dir:
try:
orig_st = entry.stat(follow_symlinks=False)
is_dir = stat.S_ISDIR(orig_st.st_mode)
except FileNotFoundError:
continue
except OSError as err:
onexc(os.lstat, fullname, err)
continue
if is_dir:
for entry in entries:
fullname = os.path.join(path, entry.name)
try:
dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd)
dirfd_closed = False
if entry.is_dir(follow_symlinks=False):
# Traverse into sub-directory.
stack.append((os.lstat, topfd, fullname, entry))
continue
except FileNotFoundError:
continue
except OSError as err:
onexc(os.open, fullname, err)
else:
try:
if os.path.samestat(orig_st, os.fstat(dirfd)):
_rmtree_safe_fd(dirfd, fullname, onexc)
try:
os.close(dirfd)
except OSError as err:
# close() should not be retried after an error.
dirfd_closed = True
onexc(os.close, fullname, err)
dirfd_closed = True
try:
os.rmdir(entry.name, dir_fd=topfd)
except FileNotFoundError:
continue
except OSError as err:
onexc(os.rmdir, fullname, err)
else:
try:
# This can only happen if someone replaces
# a directory with a symlink after the call to
# os.scandir or stat.S_ISDIR above.
raise OSError("Cannot call rmtree on a symbolic "
"link")
except OSError as err:
onexc(os.path.islink, fullname, err)
finally:
if not dirfd_closed:
try:
os.close(dirfd)
except OSError as err:
onexc(os.close, fullname, err)
else:
except OSError:
pass
try:
os.unlink(entry.name, dir_fd=topfd)
except FileNotFoundError:
continue
except OSError as err:
onexc(os.unlink, fullname, err)
except FileNotFoundError as err:
if orig_entry is None or func is os.close:
err.filename = path
onexc(func, path, err)
except OSError as err:
err.filename = path
onexc(func, path, err)

_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
os.supports_dir_fd and
Expand Down Expand Up @@ -762,41 +757,16 @@ def onexc(*args):
# While the unsafe rmtree works fine on bytes, the fd based does not.
if isinstance(path, bytes):
path = os.fsdecode(path)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
try:
orig_st = os.lstat(path, dir_fd=dir_fd)
except OSError as err:
onexc(os.lstat, path, err)
return
stack = [(os.lstat, dir_fd, path, None)]
try:
fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd)
fd_closed = False
except OSError as err:
onexc(os.open, path, err)
return
try:
if os.path.samestat(orig_st, os.fstat(fd)):
_rmtree_safe_fd(fd, path, onexc)
try:
os.close(fd)
except OSError as err:
# close() should not be retried after an error.
fd_closed = True
onexc(os.close, path, err)
fd_closed = True
try:
os.rmdir(path, dir_fd=dir_fd)
except OSError as err:
onexc(os.rmdir, path, err)
else:
try:
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError as err:
onexc(os.path.islink, path, err)
while stack:
_rmtree_safe_fd(stack, onexc)
finally:
if not fd_closed:
# Close any file descriptors still on the stack.
while stack:
func, fd, path, entry = stack.pop()
if func is not os.close:
continue
try:
os.close(fd)
except OSError as err:
Expand Down
1 change: 0 additions & 1 deletion Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,6 @@ def _onexc(fn, path, exc):
shutil.rmtree(TESTFN)
raise

@unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)")
def test_rmtree_above_recursion_limit(self):
recursion_limit = 40
# directory_depth > recursion_limit
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised
on deep directory trees.

0 comments on commit 53b1981

Please sign in to comment.