Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-89727: Fix os.walk RecursionError on deep trees #99803

Merged
merged 16 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 87 additions & 77 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,89 +340,99 @@ def walk(top, topdown=True, onerror=None, followlinks=False):

"""
sys.audit("os.walk", top, topdown, onerror, followlinks)
return _walk(fspath(top), topdown, onerror, followlinks)

def _walk(top, topdown, onerror, followlinks):
dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
return

with scandir_it:
while True:
try:
from collections import deque

stack = deque([(False, fspath(top))])
while stack:
must_yield, top = stack.pop()
if must_yield:
yield top
continue

dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
jonburdo marked this conversation as resolved.
Show resolved Hide resolved
try:
# Note that scandir is global in this module due
# to earlier import-*.
jonburdo marked this conversation as resolved.
Show resolved Hide resolved
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
continue

cont = False
with scandir_it:
while True:
try:
entry = next(scandir_it)
except StopIteration:
try:
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break
except OSError as error:
if onerror is not None:
onerror(error)
return

try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
jonburdo marked this conversation as resolved.
Show resolved Hide resolved
is_dir = False

if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
if is_dir:
dirs.append(entry.name)
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)

# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs

# Recurse into sub-directories
islink, join = path.islink, path.join
for dirname in dirs:
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from _walk(new_path, topdown, onerror, followlinks)
else:
# Recurse into sub-directories
for new_path in walk_dirs:
yield from _walk(new_path, topdown, onerror, followlinks)
# Yield after recursion if going bottom up
yield top, dirs, nondirs
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if cont:
continue

# Yield before sub-directory traversal if going top down
if topdown:
yield top, dirs, nondirs
# Traverse into sub-directories
islink, join = path.islink, path.join
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're caching these functions in locals, might as well do it outside the main while loop to micro-optimize a bit more.

for dirname in reversed(dirs):
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
jonburdo marked this conversation as resolved.
Show resolved Hide resolved
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
stack.append((False, new_path))
else:
# Yield after sub-directory traversal if going bottom up
stack.append((True, (top, dirs, nondirs)))
# Traverse into sub-directories
for new_path in reversed(walk_dirs):
stack.append((False, new_path))
JelleZijlstra marked this conversation as resolved.
Show resolved Hide resolved

__all__.append("walk")

Expand Down
16 changes: 10 additions & 6 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2178,19 +2178,23 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds):
testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds)

@contextlib.contextmanager
def set_recursion_limit(limit):
"""Temporarily change the recursion limit."""
original_limit = sys.getrecursionlimit()
try:
sys.setrecursionlimit(limit)
yield
finally:
sys.setrecursionlimit(original_limit)

def infinite_recursion(max_depth=75):
"""Set a lower limit for tests that interact with infinite recursions
(e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some
debug windows builds, due to not enough functions being inlined the
stack size might not handle the default recursion limit (1000). See
bpo-11105 for details."""
return set_recursion_limit(max_depth)

original_depth = sys.getrecursionlimit()
try:
sys.setrecursionlimit(max_depth)
yield
finally:
sys.setrecursionlimit(original_depth)

def ignore_deprecations_from(module: str, *, like: str) -> object:
token = object()
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from test.support import import_helper
from test.support import os_helper
from test.support import socket_helper
from test.support import set_recursion_limit
from test.support import warnings_helper
from platform import win32_is_iot

Expand Down Expand Up @@ -1471,6 +1472,12 @@ def test_walk_many_open_files(self):
self.assertEqual(next(it), expected)
p = os.path.join(p, 'd')

def test_walk_above_recursion_limit(self):
os.makedirs(os.path.join(self.walk_path, *(['d'] * 50)))
with set_recursion_limit(50):
all = list(self.walk(self.walk_path))
self.assertEqual(len(all), 54)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also test that the return value is correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, good idea



@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
class FwalkTests(WalkTests):
Expand Down Expand Up @@ -1545,6 +1552,8 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
deep directory structures by adjusting the implementation of
:func:`os.walk` to be iterative instead of recursive.