diff --git a/mesonpy/__init__.py b/mesonpy/__init__.py index 917bd6d04..9a0f2b360 100644 --- a/mesonpy/__init__.py +++ b/mesonpy/__init__.py @@ -865,64 +865,57 @@ def _meson_version(self) -> str: def sdist(self, directory: Path) -> pathlib.Path: """Generates a sdist (source distribution) in the specified directory.""" - # generate meson dist file + # Generate meson dist file. self._run(self._meson + ['dist', '--allow-dirty', '--no-tests', '--formats', 'gztar', *self._meson_args['dist']]) - # move meson dist file to output path dist_name = f'{self._metadata.distribution_name}-{self._metadata.version}' meson_dist_name = f'{self._meson_name}-{self._meson_version}' meson_dist_path = pathlib.Path(self._build_dir, 'meson-dist', f'{meson_dist_name}.tar.gz') - sdist = pathlib.Path(directory, f'{dist_name}.tar.gz') + sdist_path = pathlib.Path(directory, f'{dist_name}.tar.gz') - with tarfile.open(meson_dist_path, 'r:gz') as meson_dist, mesonpy._util.create_targz(sdist) as tar: + with tarfile.open(meson_dist_path, 'r:gz') as meson_dist, mesonpy._util.create_targz(sdist_path) as sdist: for member in meson_dist.getmembers(): - # calculate the file path in the source directory - assert member.name, member.name - member_parts = member.name.split('/') - if len(member_parts) <= 1: - continue - path = self._source_dir.joinpath(*member_parts[1:]) - - if not path.exists() and member.isfile(): - # File doesn't exists on the source directory but exists on - # the Meson dist, so it is generated file, which we need to - # include. - # See https://mesonbuild.com/Reference-manual_builtin_meson.html#mesonadd_dist_script - - # MESON_DIST_ROOT could have a different base name - # than the actual sdist basename, so we need to rename here + if member.isfile(): file = meson_dist.extractfile(member.name) - member.name = str(pathlib.Path(dist_name, *member_parts[1:]).as_posix()) - tar.addfile(member, file) - continue - - if not path.is_file(): - continue - info = tarfile.TarInfo(member.name) - file_stat = os.stat(path) - info.mtime = member.mtime - info.size = file_stat.st_size - info.mode = int(oct(file_stat.st_mode)[-3:], 8) - - # rewrite the path if necessary, to match the sdist distribution name - if dist_name != meson_dist_name: - info.name = pathlib.Path( - dist_name, - path.relative_to(self._source_dir) - ).as_posix() - - with path.open('rb') as f: - tar.addfile(info, fileobj=f) - - # add PKG-INFO to dist file to make it a sdist - pkginfo_info = tarfile.TarInfo(f'{dist_name}/PKG-INFO') - pkginfo_info.mtime = time.time() # type: ignore[assignment] + # Reset pax extended header. The tar archive member may be + # using pax headers to store some file metadata. The pax + # headers are not reset when the metadata is modified and + # they take precedence when the member is deserialized. + # This is relevant because when rewriting the member name, + # the length of the path may shrink from being more than + # 100 characters (requiring the path to be stored in the + # pax headers) to being less than 100 characters. When this + # happens, the tar archive member is serialized with the + # shorter name in the regular header and the longer one in + # the extended pax header. The archives handled here are + # not expected to use extended pax headers other than for + # the ones required to encode file metadata. The easiest + # solution is to reset the pax extended headers. + member.pax_headers = {} + + # Rewrite the path to match the sdist distribution name. + stem = member.name.split('/', 1)[1] + member.name = '/'.join((dist_name, stem)) + + # Reset owner and group to root:root. This mimics what + # 'git archive' does and makes the sdist reproducible upon + # being built by different users. + member.uname = member.gname = 'root' + member.uid = member.gid = 0 + + sdist.addfile(member, file) + + # Add 'PKG-INFO'. + member = tarfile.TarInfo(f'{dist_name}/PKG-INFO') + member.uid = member.gid = 0 + member.uname = member.gname = 'root' + member.mtime = time.time() metadata = bytes(self._metadata.as_rfc822()) - pkginfo_info.size = len(metadata) - tar.addfile(pkginfo_info, fileobj=io.BytesIO(metadata)) + member.size = len(metadata) + sdist.addfile(member, io.BytesIO(metadata)) - return sdist + return sdist_path def wheel(self, directory: Path) -> pathlib.Path: """Generates a wheel in the specified directory.""" diff --git a/tests/packages/long-path/meson.build b/tests/packages/long-path/meson.build new file mode 100644 index 000000000..272d7fe0f --- /dev/null +++ b/tests/packages/long-path/meson.build @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2024 The meson-python developers +# +# SPDX-License-Identifier: MIT + +project('very-long-project-name-that-makes-the-paths-within-the-sdist-exceed-100-characters-xxxxxxxxxxxxxxxxx', version: '1.0.0') diff --git a/tests/packages/long-path/pyproject.toml b/tests/packages/long-path/pyproject.toml new file mode 100644 index 000000000..70791dd42 --- /dev/null +++ b/tests/packages/long-path/pyproject.toml @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2021 The meson-python developers +# +# SPDX-License-Identifier: MIT + +[build-system] +build-backend = 'mesonpy' +requires = ['meson-python'] + +[project] +name = 'long-path' +dynamic = ['version'] diff --git a/tests/test_sdist.py b/tests/test_sdist.py index fb698b53d..0be265dd0 100644 --- a/tests/test_sdist.py +++ b/tests/test_sdist.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import os +import pathlib import re import stat import sys @@ -122,37 +123,35 @@ def test_contents_subdirs(sdist_subdirs): def test_contents_unstaged(package_pure, tmp_path): - new_data = textwrap.dedent(''' - def bar(): - return 'foo' + new = textwrap.dedent(''' + def bar(): + return 'foo' ''').strip() - with open('pure.py', 'r') as f: - old_data = f.read() - - try: - with in_git_repo_context(): - with open('pure.py', 'w') as f, open('crap', 'x'): - f.write(new_data) + old = pathlib.Path('pure.py').read_text() + with in_git_repo_context(): + try: + pathlib.Path('pure.py').write_text(new) + pathlib.Path('other.py').touch() sdist_path = mesonpy.build_sdist(os.fspath(tmp_path)) - finally: - with open('pure.py', 'w') as f: - f.write(old_data) - os.unlink('crap') + finally: + pathlib.Path('pure.py').write_text(old) + pathlib.Path('other.py').unlink() with tarfile.open(tmp_path / sdist_path, 'r:gz') as sdist: names = {member.name for member in sdist.getmembers()} mtimes = {member.mtime for member in sdist.getmembers()} - read_data = sdist.extractfile('pure-1.0.0/pure.py').read().replace(b'\r\n', b'\n') + data = sdist.extractfile('pure-1.0.0/pure.py').read().replace(b'\r\n', b'\n') + # Verify that uncommitted changes are not included in the sdist. assert names == { 'pure-1.0.0/PKG-INFO', 'pure-1.0.0/meson.build', 'pure-1.0.0/pure.py', 'pure-1.0.0/pyproject.toml', } - assert read_data == new_data.encode() + assert data == old.encode() # All the archive members have a valid mtime. assert 0 not in mtimes @@ -192,3 +191,17 @@ def test_generated_files(sdist_generated_files): # All the archive members have a valid mtime. assert 0 not in mtimes + + +def test_long_path(sdist_long_path): + # See https://github.com/mesonbuild/meson-python/pull/587#pullrequestreview-2020891328 + # and https://github.com/mesonbuild/meson-python/pull/587#issuecomment-2075973593 + + with tarfile.open(sdist_long_path, 'r:gz') as sdist: + names = {member.name for member in sdist.getmembers()} + + assert names == { + 'long_path-1.0.0/PKG-INFO', + 'long_path-1.0.0/meson.build', + 'long_path-1.0.0/pyproject.toml' + }