Skip to content

Commit

Permalink
Fix non-determinism of package_data in generated setup.py files (…
Browse files Browse the repository at this point in the history
…Cherry-pick of #15292) (#15299)

As described in #14843, (some) tests which consumed `runtime_package_dependencies` would always miss the cache.

This was because the dists generated for each run were slightly different, which came down to the `package_data` generated for `resources` being in a non-deterministic order due to iterating over a `set`. See #14195 (comment) for some thoughts on how to avoid this kind of issue in the future.

Fixes #14843: warm CI times should drop by about 1 minute from ~3m to ~2m.

[ci skip-rust]
  • Loading branch information
stuhood authored May 2, 2022
1 parent b0d92c2 commit 196d9b5
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/python/pants/backend/python/goals/setup_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ def maybe_add_resource(fp: str) -> None:
return (
tuple(sorted(packages)),
tuple(sorted(namespace_packages)),
tuple((pkg, tuple(sorted(files))) for pkg, files in package_data.items()),
tuple((pkg, tuple(sorted(files))) for pkg, files in sorted(package_data.items())),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@

from __future__ import annotations

import hashlib
import os
import subprocess
import sys
import venv
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent

from pants.testutil.pants_integration_test import run_pants
from pants.testutil.pants_integration_test import run_pants, setup_tmpdir
from pants.util.dirutil import safe_rmtree


def test_native_code() -> None:
Expand Down Expand Up @@ -43,3 +47,64 @@ def test_native_code() -> None:
capture_output=True,
)
assert proc.stdout == b"Professor Native\n"


def package_determinism(expected_artifact_count: int, files: dict[str, str]) -> None:
"""Tests that the given sources can be `package`d reproducibly."""

def digest(path: str) -> tuple[str, str]:
d = hashlib.sha256(Path(path).read_bytes()).hexdigest()
return path, d

def run_and_digest(address: str) -> dict[str, str]:
safe_rmtree("dist")
pants_run = run_pants(
[
"--backend-packages=pants.backend.python",
"--no-pantsd",
"package",
address,
],
)
pants_run.assert_success()
return dict(digest(os.path.join("dist", f)) for f in os.listdir("dist"))

with setup_tmpdir(files) as source_dir:
one = run_and_digest(f"{source_dir}:dist")
two = run_and_digest(f"{source_dir}:dist")

assert len(one) == expected_artifact_count
assert one == two


def test_deterministic_package_data() -> None:
package_determinism(
2,
{
"BUILD": dedent(
"""\
python_distribution(
name="dist",
dependencies=["{tmpdir}/a", "{tmpdir}/b"],
provides=python_artifact(name="det", version="2.3.4"),
)
"""
),
"a/BUILD": dedent(
"""\
python_sources(dependencies=[":resources"])
resources(name="resources", sources=["*.txt"])
"""
),
"a/source.py": "",
"a/a.txt": "",
"b/BUILD": dedent(
"""\
python_sources(dependencies=[":resources"])
resources(name="resources", sources=["*.txt"])
"""
),
"b/source.py": "",
"b/b.txt": "",
},
)

0 comments on commit 196d9b5

Please sign in to comment.