Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-89727: Fix os.walk RecursionError on deep trees #99803

Merged
merged 16 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 83 additions & 77 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,89 +340,95 @@ def walk(top, topdown=True, onerror=None, followlinks=False):

"""
sys.audit("os.walk", top, topdown, onerror, followlinks)
return _walk(fspath(top), topdown, onerror, followlinks)

def _walk(top, topdown, onerror, followlinks):
dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
return

with scandir_it:
while True:
try:
stack = [(False, fspath(top))]
islink, join = path.islink, path.join
while stack:
must_yield, top = stack.pop()
if must_yield:
yield top
continue

dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains.
# We suppress the exception here, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit.
try:
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
continue

cont = False
with scandir_it:
while True:
try:
entry = next(scandir_it)
except StopIteration:
try:
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break
except OSError as error:
if onerror is not None:
onerror(error)
return

try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider the entry not to
# be a directory, same behaviour as os.path.isdir().
is_dir = False

if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
if is_dir:
dirs.append(entry.name)
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)

# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs

# Recurse into sub-directories
islink, join = path.islink, path.join
for dirname in dirs:
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from _walk(new_path, topdown, onerror, followlinks)
else:
# Recurse into sub-directories
for new_path in walk_dirs:
yield from _walk(new_path, topdown, onerror, followlinks)
# Yield after recursion if going bottom up
yield top, dirs, nondirs
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude
# symlinks to directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider the
# entry not to be a symbolic link, same behaviour
# as os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if cont:
continue

if topdown:
# Yield before sub-directory traversal if going top down
yield top, dirs, nondirs
# Traverse into sub-directories
for dirname in reversed(dirs):
new_path = join(top, dirname)
# bpo-23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
stack.append((False, new_path))
else:
# Yield after sub-directory traversal if going bottom up
stack.append((True, (top, dirs, nondirs)))
# Traverse into sub-directories
for new_path in reversed(walk_dirs):
stack.append((False, new_path))
JelleZijlstra marked this conversation as resolved.
Show resolved Hide resolved

__all__.append("walk")

Expand Down
16 changes: 10 additions & 6 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2178,19 +2178,23 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds):
testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds)

@contextlib.contextmanager
def set_recursion_limit(limit):
"""Temporarily change the recursion limit."""
original_limit = sys.getrecursionlimit()
try:
sys.setrecursionlimit(limit)
yield
finally:
sys.setrecursionlimit(original_limit)

def infinite_recursion(max_depth=75):
"""Set a lower limit for tests that interact with infinite recursions
(e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some
debug windows builds, due to not enough functions being inlined the
stack size might not handle the default recursion limit (1000). See
bpo-11105 for details."""
return set_recursion_limit(max_depth)

original_depth = sys.getrecursionlimit()
try:
sys.setrecursionlimit(max_depth)
yield
finally:
sys.setrecursionlimit(original_depth)

def ignore_deprecations_from(module: str, *, like: str) -> object:
token = object()
Expand Down
43 changes: 43 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from test.support import import_helper
from test.support import os_helper
from test.support import socket_helper
from test.support import set_recursion_limit
from test.support import warnings_helper
from platform import win32_is_iot

Expand Down Expand Up @@ -1471,6 +1472,46 @@ def test_walk_many_open_files(self):
self.assertEqual(next(it), expected)
p = os.path.join(p, 'd')

def test_walk_above_recursion_limit(self):
depth = 50
os.makedirs(os.path.join(self.walk_path, *(['d'] * depth)))
with set_recursion_limit(depth - 5):
all = list(self.walk(self.walk_path))

sub2_path = self.sub2_tree[0]
for root, dirs, files in all:
if root == sub2_path:
dirs.sort()
files.sort()

d_entries = []
d_path = self.walk_path
for _ in range(depth):
d_path = os.path.join(d_path, "d")
d_entries.append((d_path, ["d"], []))
d_entries[-1][1].clear()

# Sub-sequences where the order is known
sections = {
"SUB1": [
(self.sub1_path, ["SUB11"], ["tmp2"]),
(self.sub11_path, [], []),
],
"SUB2": [self.sub2_tree],
"d": d_entries,
}

# The ordering of sub-dirs is arbitrary but determines the order in
# which sub-sequences appear
dirs = all[0][1]
expected = [(self.walk_path, dirs, ["tmp1"])]
for d in dirs:
expected.extend(sections[d])

self.assertEqual(len(all), depth + 4)
self.assertEqual(sorted(dirs), ["SUB1", "SUB2", "d"])
self.assertEqual(all, expected)


@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
class FwalkTests(WalkTests):
Expand Down Expand Up @@ -1545,6 +1586,8 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
deep directory structures by adjusting the implementation of
:func:`os.walk` to be iterative instead of recursive.