Skip to content

Commit

Permalink
Updated implementation to match upstream at v3.13.0a4
Browse files Browse the repository at this point in the history
  • Loading branch information
jaraco committed May 28, 2024
1 parent a749fa9 commit 3ad87d8
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 35 deletions.
31 changes: 21 additions & 10 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
import struct
import copy
import re
import warnings

try:
import pwd
Expand Down Expand Up @@ -1641,7 +1640,7 @@ class TarFile(object):
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
errors="surrogateescape", pax_headers=None, debug=None,
errorlevel=None, copybufsize=None):
errorlevel=None, copybufsize=None, stream=False):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
Expand Down Expand Up @@ -1673,6 +1672,8 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None,
self.name = os.path.abspath(name) if name else None
self.fileobj = fileobj

self.stream = stream

# Init attributes.
if format is not None:
self.format = format
Expand Down Expand Up @@ -2106,6 +2107,10 @@ def list(self, verbose=True, *, members=None):
output is produced. `members' is optional and must be a subset of the
list returned by getmembers().
"""
# Convert tarinfo type to stat type.
type2mode = {REGTYPE: stat.S_IFREG, SYMTYPE: stat.S_IFLNK,
FIFOTYPE: stat.S_IFIFO, CHRTYPE: stat.S_IFCHR,
DIRTYPE: stat.S_IFDIR, BLKTYPE: stat.S_IFBLK}
self._check()

if members is None:
Expand All @@ -2115,7 +2120,8 @@ def list(self, verbose=True, *, members=None):
if tarinfo.mode is None:
_safe_print("??????????")
else:
_safe_print(stat.filemode(tarinfo.mode))
modetype = type2mode.get(tarinfo.type, 0)
_safe_print(stat.filemode(modetype | tarinfo.mode))
_safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
tarinfo.gname or tarinfo.gid))
if tarinfo.ischr() or tarinfo.isblk():
Expand Down Expand Up @@ -2218,6 +2224,7 @@ def _get_filter_function(self, filter):
if filter is None:
filter = self.extraction_filter
if filter is None:
import warnings
warnings.warn(
'Python 3.14 will, by default, filter extracted tar '
+ 'archives and reject files or modify their metadata. '
Expand Down Expand Up @@ -2404,7 +2411,7 @@ def _extract_member(self, tarinfo, targetpath, set_attrs=True,
if upperdirs and not os.path.exists(upperdirs):
# Create directories that are not part of the archive with
# default permissions.
os.makedirs(upperdirs)
os.makedirs(upperdirs, exist_ok=True)

if tarinfo.islnk() or tarinfo.issym():
self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
Expand Down Expand Up @@ -2557,7 +2564,8 @@ def chown(self, tarinfo, targetpath, numeric_owner):
os.lchown(targetpath, u, g)
else:
os.chown(targetpath, u, g)
except OSError as e:
except (OSError, OverflowError) as e:
# OverflowError can be raised if an ID doesn't fit in `id_t`
raise ExtractError("could not change owner") from e

def chmod(self, tarinfo, targetpath):
Expand Down Expand Up @@ -2640,7 +2648,9 @@ def next(self):
break

if tarinfo is not None:
self.members.append(tarinfo)
# if streaming the file we do not want to cache the tarinfo
if not self.stream:
self.members.append(tarinfo)
else:
self._loaded = True

Expand Down Expand Up @@ -2691,11 +2701,12 @@ def _getmember(self, name, tarinfo=None, normalize=False):

def _load(self):
"""Read through the entire archive file and look for readable
members.
members. This should not run if the file is set to stream.
"""
while self.next() is not None:
pass
self._loaded = True
if not self.stream:
while self.next() is not None:
pass
self._loaded = True

def _check(self, mode=None):
"""Check if TarFile is still open, and if the operation's mode
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
96 changes: 71 additions & 25 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def sha256sum(data):

TEMPDIR = os.path.abspath(os_helper.TESTFN) + "-tardir"
tarextdir = TEMPDIR + '-extract-test'
tarname = support.findfile("testtar.tar")
tarname = support.findfile("testtar.tar", subdir="archivetestdata")
gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
Expand Down Expand Up @@ -102,6 +102,14 @@ def setUp(self):
def tearDown(self):
self.tar.close()

class StreamModeTest(ReadTest):

# Only needs to change how the tarfile is opened to set
# stream mode
def setUp(self):
self.tar = tarfile.open(self.tarname, mode=self.mode,
encoding="iso8859-1",
stream=True)

class UstarReadTest(ReadTest, unittest.TestCase):

Expand Down Expand Up @@ -316,11 +324,23 @@ def test_list_verbose(self):
# accessories if verbose flag is being used
# ...
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
# -rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
# drwxr-xr-x tarfile/tarfile 0 2003-01-05 15:19:43 ustar/dirtype/
# ...
self.assertRegex(out, (br'\?rw-r--r-- tarfile/tarfile\s+7011 '
br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
br'ustar/\w+type ?\r?\n') * 2)
#
# Array of values to modify the regex below:
# ((file_type, file_permissions, file_length), ...)
type_perm_lengths = (
(br'\?', b'rw-r--r--', b'7011'), (b'-', b'rw-r--r--', b'7011'),
(b'd', b'rwxr-xr-x', b'0'), (b'd', b'rwxr-xr-x', b'255'),
(br'\?', b'rw-r--r--', b'0'), (b'l', b'rwxrwxrwx', b'0'),
(b'b', b'rw-rw----', b'3,0'), (b'c', b'rw-rw-rw-', b'1,3'),
(b'p', b'rw-r--r--', b'0'))
self.assertRegex(out, b''.join(
[(tp + (br'%s tarfile/tarfile\s+%s ' % (perm, ln) +
br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
br'ustar/\w+type[/>\sa-z-]*\n')) for tp, perm, ln
in type_perm_lengths]))
# Make sure it prints the source of link with verbose flag
self.assertIn(b'ustar/symtype -> regtype', out)
self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
Expand Down Expand Up @@ -484,7 +504,7 @@ def test_length_zero_header(self):
# bpo-39017 (CVE-2019-20907): reading a zero-length header should fail
# with an exception
with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"):
with tarfile.open(support.findfile('recursion.tar')) as tar:
with tarfile.open(support.findfile('recursion.tar', subdir='archivetestdata')):
pass

def test_extractfile_name(self):
Expand Down Expand Up @@ -854,6 +874,21 @@ class Bz2StreamReadTest(Bz2Test, StreamReadTest):
class LzmaStreamReadTest(LzmaTest, StreamReadTest):
pass

class TarStreamModeReadTest(StreamModeTest, unittest.TestCase):

def test_stream_mode_no_cache(self):
for _ in self.tar:
pass
self.assertEqual(self.tar.members, [])

class GzipStreamModeReadTest(GzipTest, TarStreamModeReadTest):
pass

class Bz2StreamModeReadTest(Bz2Test, TarStreamModeReadTest):
pass

class LzmaStreamModeReadTest(LzmaTest, TarStreamModeReadTest):
pass

class DetectReadTest(TarTest, unittest.TestCase):
def _testfunc_file(self, name, mode):
Expand Down Expand Up @@ -2543,7 +2578,7 @@ def test__all__(self):
support.check__all__(self, tarfile, not_exported=not_exported)

def test_useful_error_message_when_modules_missing(self):
fname = os.path.join(os.path.dirname(__file__), 'testtar.tar.xz')
fname = os.path.join(os.path.dirname(__file__), 'archivetestdata', 'testtar.tar.xz')
with self.assertRaises(tarfile.ReadError) as excinfo:
error = tarfile.CompressionError('lzma module is not available'),
with unittest.mock.patch.object(tarfile.TarFile, 'xzopen', side_effect=error):
Expand Down Expand Up @@ -2608,7 +2643,7 @@ def test_test_command_verbose(self):
self.assertIn(b'is a tar archive.\n', out)

def test_test_command_invalid_file(self):
zipname = support.findfile('zipdir.zip')
zipname = support.findfile('zipdir.zip', subdir='archivetestdata')
rc, out, err = self.tarfilecmd_failure('-t', zipname)
self.assertIn(b' is not a tar archive.', err)
self.assertEqual(out, b'')
Expand Down Expand Up @@ -2650,7 +2685,7 @@ def test_list_command_verbose(self):
self.assertEqual(out, expected)

def test_list_command_invalid_file(self):
zipname = support.findfile('zipdir.zip')
zipname = support.findfile('zipdir.zip', subdir='archivetestdata')
rc, out, err = self.tarfilecmd_failure('-l', zipname)
self.assertIn(b' is not a tar archive.', err)
self.assertEqual(out, b'')
Expand Down Expand Up @@ -2775,7 +2810,7 @@ def test_extract_command_different_directory(self):
os_helper.rmtree(tarextdir)

def test_extract_command_invalid_file(self):
zipname = support.findfile('zipdir.zip')
zipname = support.findfile('zipdir.zip', subdir='archivetestdata')
with os_helper.temp_cwd(tarextdir):
rc, out, err = self.tarfilecmd_failure('-e', zipname)
self.assertIn(b' is not a tar archive.', err)
Expand Down Expand Up @@ -3794,16 +3829,19 @@ def test_modes(self):
arc.add('read_group_only', mode='?---r-----')
arc.add('no_bits', mode='?---------')
arc.add('dir/', mode='?---rwsrwt')
arc.add('dir_all_bits/', mode='?rwsrwsrwt')

# On some systems, setting the sticky bit is a no-op.
# Check if that's the case.
# On some systems, setting the uid, gid, and/or sticky bit is a no-ops.
# Check which bits we can set, so we can compare tarfile machinery to
# a simple chmod.
tmp_filename = os.path.join(TEMPDIR, "tmp.file")
with open(tmp_filename, 'w'):
pass
try:
new_mode = (os.stat(tmp_filename).st_mode
| stat.S_ISVTX | stat.S_ISGID | stat.S_ISUID)
try:
os.chmod(tmp_filename,
os.stat(tmp_filename).st_mode | stat.S_ISVTX)
os.chmod(tmp_filename, new_mode)
except OSError as exc:
if exc.errno == getattr(errno, "EFTYPE", 0):
# gh-108948: On FreeBSD, regular users cannot set
Expand All @@ -3812,28 +3850,34 @@ def test_modes(self):
"regular users cannot set sticky bit")
else:
raise
have_sticky_files = (os.stat(tmp_filename).st_mode & stat.S_ISVTX)

got_mode = os.stat(tmp_filename).st_mode
_t_file = 't' if (got_mode & stat.S_ISVTX) else 'x'
_suid_file = 's' if (got_mode & stat.S_ISUID) else 'x'
_sgid_file = 's' if (got_mode & stat.S_ISGID) else 'x'
finally:
os.unlink(tmp_filename)

os.mkdir(tmp_filename)
os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX)
have_sticky_dirs = (os.stat(tmp_filename).st_mode & stat.S_ISVTX)
new_mode = (os.stat(tmp_filename).st_mode
| stat.S_ISVTX | stat.S_ISGID | stat.S_ISUID)
os.chmod(tmp_filename, new_mode)
got_mode = os.stat(tmp_filename).st_mode
_t_dir = 't' if (got_mode & stat.S_ISVTX) else 'x'
_suid_dir = 's' if (got_mode & stat.S_ISUID) else 'x'
_sgid_dir = 's' if (got_mode & stat.S_ISGID) else 'x'
os.rmdir(tmp_filename)

with self.check_context(arc.open(), 'fully_trusted'):
if have_sticky_files:
self.expect_file('all_bits', mode='?rwsrwsrwt')
else:
self.expect_file('all_bits', mode='?rwsrwsrwx')
self.expect_file('all_bits',
mode=f'?rw{_suid_file}rw{_sgid_file}rw{_t_file}')
self.expect_file('perm_bits', mode='?rwxrwxrwx')
self.expect_file('exec_group_other', mode='?rw-rwxrwx')
self.expect_file('read_group_only', mode='?---r-----')
self.expect_file('no_bits', mode='?---------')
if have_sticky_dirs:
self.expect_file('dir/', mode='?---rwsrwt')
else:
self.expect_file('dir/', mode='?---rwsrwx')
self.expect_file('dir/', mode=f'?---rw{_sgid_dir}rw{_t_dir}')
self.expect_file('dir_all_bits/',
mode=f'?rw{_suid_dir}rw{_sgid_dir}rw{_t_dir}')

with self.check_context(arc.open(), 'tar'):
self.expect_file('all_bits', mode='?rwxr-xr-x')
Expand All @@ -3842,6 +3886,7 @@ def test_modes(self):
self.expect_file('read_group_only', mode='?---r-----')
self.expect_file('no_bits', mode='?---------')
self.expect_file('dir/', mode='?---r-xr-x')
self.expect_file('dir_all_bits/', mode='?rwxr-xr-x')

with self.check_context(arc.open(), 'data'):
normal_dir_mode = stat.filemode(stat.S_IMODE(
Expand All @@ -3852,6 +3897,7 @@ def test_modes(self):
self.expect_file('read_group_only', mode='?rw-r-----')
self.expect_file('no_bits', mode='?rw-------')
self.expect_file('dir/', mode=normal_dir_mode)
self.expect_file('dir_all_bits/', mode=normal_dir_mode)

def test_pipe(self):
# Test handling of a special file
Expand Down

0 comments on commit 3ad87d8

Please sign in to comment.