From 3c084308dc88eb90edd14fbc9f4b4a3d000f04e4 Mon Sep 17 00:00:00 2001 From: ralbertazzi Date: Tue, 7 Mar 2023 17:51:23 +0100 Subject: [PATCH] refactor: extract cache utilities --- src/poetry/config/config.py | 4 + src/poetry/installation/chef.py | 84 +-------------- src/poetry/installation/executor.py | 61 ++++++++++- src/poetry/utils/cache.py | 37 +++++++ tests/installation/test_chef.py | 155 +--------------------------- tests/installation/test_executor.py | 119 +++++++++++++++++++-- tests/utils/test_cache.py | 33 +++++- 7 files changed, 248 insertions(+), 245 deletions(-) diff --git a/src/poetry/config/config.py b/src/poetry/config/config.py index 6eda9699870..1ea84b9c34c 100644 --- a/src/poetry/config/config.py +++ b/src/poetry/config/config.py @@ -212,6 +212,10 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]: def repository_cache_directory(self) -> Path: return Path(self.get("cache-dir")) / "cache" / "repositories" + @property + def artifacts_cache_directory(self) -> Path: + return Path(self.get("cache-dir")) / "artifacts" + @property def virtualenvs_path(self) -> Path: path = self.get("virtualenvs.path") diff --git a/src/poetry/installation/chef.py b/src/poetry/installation/chef.py index 6eb1ae3f21b..b58d6de460b 100644 --- a/src/poetry/installation/chef.py +++ b/src/poetry/installation/chef.py @@ -1,7 +1,5 @@ from __future__ import annotations -import hashlib -import json import tarfile import tempfile import zipfile @@ -19,16 +17,13 @@ from poetry.core.utils.helpers import temporary_directory from pyproject_hooks import quiet_subprocess_runner # type: ignore[import] -from poetry.installation.chooser import InvalidWheelName -from poetry.installation.chooser import Wheel +from poetry.utils.cache import get_cache_directory_for_link from poetry.utils.env import ephemeral_environment if TYPE_CHECKING: from contextlib import AbstractContextManager - from poetry.core.packages.utils.link import Link - from poetry.config.config import Config from poetry.repositories import RepositoryPool from poetry.utils.env import Env @@ -89,9 +84,7 @@ class Chef: def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None: self._env = env self._pool = pool - self._cache_dir = ( - Path(config.get("cache-dir")).expanduser().joinpath("artifacts") - ) + self._cache_dir = config.artifacts_cache_directory def prepare( self, archive: Path, output_dir: Path | None = None, *, editable: bool = False @@ -181,7 +174,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path sdist_dir = archive_dir if destination is None: - destination = self.get_cache_directory_for_link(Link(archive.as_uri())) + destination = get_cache_directory_for_link( + self._cache_dir, Link(archive.as_uri()) + ) destination.mkdir(parents=True, exist_ok=True) @@ -196,72 +191,3 @@ def _should_prepare(self, archive: Path) -> bool: @classmethod def _is_wheel(cls, archive: Path) -> bool: return archive.suffix == ".whl" - - def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None: - archives = self.get_cached_archives_for_link(link) - if not archives: - return None - - candidates: list[tuple[float | None, Path]] = [] - for archive in archives: - if strict: - # in strict mode return the original cached archive instead of the - # prioritized archive type. - if link.filename == archive.name: - return archive - continue - if archive.suffix != ".whl": - candidates.append((float("inf"), archive)) - continue - - try: - wheel = Wheel(archive.name) - except InvalidWheelName: - continue - - if not wheel.is_supported_by_environment(self._env): - continue - - candidates.append( - (wheel.get_minimum_supported_index(self._env.supported_tags), archive), - ) - - if not candidates: - return None - - return min(candidates)[1] - - def get_cached_archives_for_link(self, link: Link) -> list[Path]: - cache_dir = self.get_cache_directory_for_link(link) - - archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"] - paths = [] - for archive_type in archive_types: - for archive in cache_dir.glob(f"*.{archive_type}"): - paths.append(Path(archive)) - - return paths - - def get_cache_directory_for_link(self, link: Link) -> Path: - key_parts = {"url": link.url_without_fragment} - - if link.hash_name is not None and link.hash is not None: - key_parts[link.hash_name] = link.hash - - if link.subdirectory_fragment: - key_parts["subdirectory"] = link.subdirectory_fragment - - key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"] - key_parts["interpreter_version"] = "".join( - self._env.marker_env["interpreter_version"].split(".")[:2] - ) - - key = hashlib.sha256( - json.dumps( - key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True - ).encode("ascii") - ).hexdigest() - - split_key = [key[:2], key[2:4], key[4:6], key[6:]] - - return self._cache_dir.joinpath(*split_key) diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py index 70e61a2cea6..4040beeedb0 100644 --- a/src/poetry/installation/executor.py +++ b/src/poetry/installation/executor.py @@ -20,12 +20,16 @@ from poetry.installation.chef import Chef from poetry.installation.chef import ChefBuildError from poetry.installation.chooser import Chooser +from poetry.installation.chooser import InvalidWheelName +from poetry.installation.chooser import Wheel from poetry.installation.operations import Install from poetry.installation.operations import Uninstall from poetry.installation.operations import Update from poetry.installation.wheel_installer import WheelInstaller from poetry.utils._compat import decode from poetry.utils.authenticator import Authenticator +from poetry.utils.cache import get_cache_directory_for_link +from poetry.utils.cache import get_cached_archives_for_link from poetry.utils.env import EnvCommandError from poetry.utils.helpers import atomic_open from poetry.utils.helpers import get_file_hash @@ -81,6 +85,7 @@ def __init__( ) self._chef = Chef(config, self._env, pool) self._chooser = Chooser(pool, self._env, config) + self._artifacts_cache_dir = config.artifacts_cache_directory self._executor = ThreadPoolExecutor(max_workers=self._max_workers) self._total_operations = 0 @@ -692,15 +697,19 @@ def _download(self, operation: Install | Update) -> Path: def _download_link(self, operation: Install | Update, link: Link) -> Path: package = operation.package - output_dir = self._chef.get_cache_directory_for_link(link) + output_dir = get_cache_directory_for_link(self._artifacts_cache_dir, link) # Try to get cached original package for the link provided - original_archive = self._chef.get_cached_archive_for_link(link, strict=True) + original_archive = self._get_cached_archive_for_link( + self._env, self._artifacts_cache_dir, link, strict=True + ) if original_archive is None: # No cached original distributions was found, so we download and prepare it try: original_archive = self._download_archive(operation, link) except BaseException: - cache_directory = self._chef.get_cache_directory_for_link(link) + cache_directory = get_cache_directory_for_link( + self._artifacts_cache_dir, link + ) cached_file = cache_directory.joinpath(link.filename) # We can't use unlink(missing_ok=True) because it's not available # prior to Python 3.8 @@ -711,7 +720,9 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path: # Get potential higher prioritized cached archive, otherwise it will fall back # to the original archive. - archive = self._chef.get_cached_archive_for_link(link, strict=False) + archive = self._get_cached_archive_for_link( + self._env, self._artifacts_cache_dir, link, strict=False + ) # 'archive' can at this point never be None. Since we previously downloaded # an archive, we now should have something cached that we can use here assert archive is not None @@ -775,7 +786,10 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path: progress.start() done = 0 - archive = self._chef.get_cache_directory_for_link(link) / link.filename + archive = ( + get_cache_directory_for_link(self._artifacts_cache_dir, link) + / link.filename + ) archive.parent.mkdir(parents=True, exist_ok=True) with atomic_open(archive) as f: for chunk in response.iter_content(chunk_size=4096): @@ -909,3 +923,40 @@ def _get_archive_info(self, package: Package) -> dict[str, Any]: archive_info["hashes"] = {algorithm: value} return archive_info + + @staticmethod + def _get_cached_archive_for_link( + env: Env, cache_dir: Path, link: Link, *, strict: bool + ) -> Path | None: + archives = get_cached_archives_for_link(cache_dir, link) + if not archives: + return None + + candidates: list[tuple[float | None, Path]] = [] + for archive in archives: + if strict: + # in strict mode return the original cached archive instead of the + # prioritized archive type. + if link.filename == archive.name: + return archive + continue + if archive.suffix != ".whl": + candidates.append((float("inf"), archive)) + continue + + try: + wheel = Wheel(archive.name) + except InvalidWheelName: + continue + + if not wheel.is_supported_by_environment(env): + continue + + candidates.append( + (wheel.get_minimum_supported_index(env.supported_tags), archive), + ) + + if not candidates: + return None + + return min(candidates)[1] diff --git a/src/poetry/utils/cache.py b/src/poetry/utils/cache.py index ba88a077055..e4530108c02 100644 --- a/src/poetry/utils/cache.py +++ b/src/poetry/utils/cache.py @@ -8,12 +8,17 @@ import time from pathlib import Path +from typing import TYPE_CHECKING from typing import Any from typing import Callable from typing import Generic from typing import TypeVar +if TYPE_CHECKING: + from poetry.core.packages.utils.link import Link + + # Used by Cachy for items that do not expire. MAX_DATE = 9999999999 T = TypeVar("T") @@ -196,3 +201,35 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]: data = json.loads(data_str[10:]) expires = int(data_str[:10]) return CacheItem(data, expires) + + +def get_cached_archives_for_link(cache_dir: Path, link: Link) -> list[Path]: + cache_dir = get_cache_directory_for_link(cache_dir, link) + + archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"] + paths = [] + for archive_type in archive_types: + for archive in cache_dir.glob(f"*.{archive_type}"): + paths.append(Path(archive)) + + return paths + + +def get_cache_directory_for_link(cache_dir: Path, link: Link) -> Path: + key_parts = {"url": link.url_without_fragment} + + if link.hash_name is not None and link.hash is not None: + key_parts[link.hash_name] = link.hash + + if link.subdirectory_fragment: + key_parts["subdirectory"] = link.subdirectory_fragment + + key = hashlib.sha256( + json.dumps( + key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True + ).encode("ascii") + ).hexdigest() + + split_key = [key[:2], key[2:4], key[4:6], key[6:]] + + return cache_dir.joinpath(*split_key) diff --git a/tests/installation/test_chef.py b/tests/installation/test_chef.py index 19283cdeaaa..2de504c10b9 100644 --- a/tests/installation/test_chef.py +++ b/tests/installation/test_chef.py @@ -9,14 +9,13 @@ import pytest -from packaging.tags import Tag from poetry.core.packages.utils.link import Link from poetry.factory import Factory from poetry.installation.chef import Chef from poetry.repositories import RepositoryPool +from poetry.utils.cache import get_cache_directory_for_link from poetry.utils.env import EnvManager -from poetry.utils.env import MockEnv from tests.repositories.test_pypi_repository import MockRepository @@ -40,156 +39,6 @@ def setup(mocker: MockerFixture, pool: RepositoryPool) -> None: mocker.patch.object(Factory, "create_pool", return_value=pool) -@pytest.mark.parametrize( - ("link", "strict", "available_packages"), - [ - ( - "https://files.python-poetry.org/demo-0.1.0.tar.gz", - True, - [ - Path("/cache/demo-0.1.0-py2.py3-none-any"), - Path("/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl"), - Path("/cache/demo-0.1.0-cp37-cp37-macosx_10_15_x86_64.whl"), - ], - ), - ( - "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - False, - [], - ), - ], -) -def test_get_not_found_cached_archive_for_link( - config: Config, - mocker: MockerFixture, - link: str, - strict: bool, - available_packages: list[Path], -): - chef = Chef( - config, - MockEnv( - version_info=(3, 8, 3), - marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"}, - supported_tags=[ - Tag("cp38", "cp38", "macosx_10_15_x86_64"), - Tag("py3", "none", "any"), - ], - ), - Factory.create_pool(config), - ) - - mocker.patch.object( - chef, "get_cached_archives_for_link", return_value=available_packages - ) - - archive = chef.get_cached_archive_for_link(Link(link), strict=strict) - - assert archive is None - - -@pytest.mark.parametrize( - ("link", "cached", "strict"), - [ - ( - "https://files.python-poetry.org/demo-0.1.0.tar.gz", - "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - False, - ), - ( - "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - False, - ), - ( - "https://files.python-poetry.org/demo-0.1.0.tar.gz", - "/cache/demo-0.1.0.tar.gz", - True, - ), - ( - "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", - True, - ), - ], -) -def test_get_found_cached_archive_for_link( - config: Config, mocker: MockerFixture, link: str, cached: str, strict: bool -): - chef = Chef( - config, - MockEnv( - version_info=(3, 8, 3), - marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"}, - supported_tags=[ - Tag("cp38", "cp38", "macosx_10_15_x86_64"), - Tag("py3", "none", "any"), - ], - ), - Factory.create_pool(config), - ) - - mocker.patch.object( - chef, - "get_cached_archives_for_link", - return_value=[ - Path("/cache/demo-0.1.0-py2.py3-none-any"), - Path("/cache/demo-0.1.0.tar.gz"), - Path("/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl"), - Path("/cache/demo-0.1.0-cp37-cp37-macosx_10_15_x86_64.whl"), - ], - ) - - archive = chef.get_cached_archive_for_link(Link(link), strict=strict) - - assert Path(cached) == archive - - -def test_get_cached_archives_for_link(config: Config, mocker: MockerFixture): - chef = Chef( - config, - MockEnv( - marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"} - ), - Factory.create_pool(config), - ) - - distributions = Path(__file__).parent.parent.joinpath("fixtures/distributions") - mocker.patch.object( - chef, - "get_cache_directory_for_link", - return_value=distributions, - ) - - archives = chef.get_cached_archives_for_link( - Link("https://files.python-poetry.org/demo-0.1.0.tar.gz") - ) - - assert archives - assert set(archives) == set(distributions.glob("demo-0.1.*")) - - -def test_get_cache_directory_for_link(config: Config, config_cache_dir: Path): - chef = Chef( - config, - MockEnv( - marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"} - ), - Factory.create_pool(config), - ) - - directory = chef.get_cache_directory_for_link( - Link("https://files.python-poetry.org/poetry-1.1.0.tar.gz") - ) - - expected = Path( - f"{config_cache_dir.as_posix()}/artifacts/ba/63/13/" - "283a3b3b7f95f05e9e6f84182d276f7bb0951d5b0cc24422b33f7a4648" - ) - - assert directory == expected - - def test_prepare_sdist(config: Config, config_cache_dir: Path) -> None: chef = Chef(config, EnvManager.get_system_env(), Factory.create_pool(config)) @@ -199,7 +48,7 @@ def test_prepare_sdist(config: Config, config_cache_dir: Path) -> None: .resolve() ) - destination = chef.get_cache_directory_for_link(Link(archive.as_uri())) + destination = get_cache_directory_for_link(chef._cache_dir, Link(archive.as_uri())) wheel = chef.prepare(archive) diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index 578b270354b..7d05d008386 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -20,6 +20,7 @@ from cleo.formatters.style import Style from cleo.io.buffered_io import BufferedIO from cleo.io.outputs.output import Verbosity +from packaging.tags import Tag from poetry.core.packages.package import Package from poetry.core.packages.utils.link import Link @@ -43,6 +44,7 @@ from poetry.config.config import Config from poetry.installation.operations.operation import Operation + from poetry.utils.env import Env from poetry.utils.env import VirtualEnv from tests.types import FixtureDirGetter @@ -537,11 +539,11 @@ def test_executor_should_delete_incomplete_downloads( side_effect=Exception("Download error"), ) mocker.patch( - "poetry.installation.chef.Chef.get_cached_archive_for_link", - side_effect=lambda link, strict: None, + "poetry.installation.executor.Executor._get_cached_archive_for_link", + return_value=None, ) mocker.patch( - "poetry.installation.chef.Chef.get_cache_directory_for_link", + "poetry.installation.executor.get_cache_directory_for_link", return_value=Path(tmp_dir), ) @@ -760,7 +762,7 @@ def test_executor_should_write_pep610_url_references_for_wheel_urls( if is_artifact_cached: link_cached = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" mocker.patch( - "poetry.installation.chef.Chef.get_cached_archive_for_link", + "poetry.installation.executor.Executor._get_cached_archive_for_link", return_value=link_cached, ) download_spy = mocker.spy(Executor, "_download_archive") @@ -839,7 +841,9 @@ def test_executor_should_write_pep610_url_references_for_non_wheel_urls( cached_sdist = fixture_dir("distributions") / "demo-0.1.0.tar.gz" cached_wheel = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" - def mock_get_cached_archive_for_link_func(_: Link, strict: bool): + def mock_get_cached_archive_for_link_func( + _: Env, __: Path, ___: Link, strict: bool + ): if is_wheel_cached and not strict: return cached_wheel if is_sdist_cached: @@ -847,7 +851,7 @@ def mock_get_cached_archive_for_link_func(_: Link, strict: bool): return None mocker.patch( - "poetry.installation.chef.Chef.get_cached_archive_for_link", + "poetry.installation.executor.Executor._get_cached_archive_for_link", side_effect=mock_get_cached_archive_for_link_func, ) @@ -1156,3 +1160,106 @@ def test_build_backend_errors_are_reported_correctly_if_caused_by_subprocess( output = io.fetch_output() assert output.startswith(expected_start) assert output.endswith(expected_end) + + +@pytest.mark.parametrize( + ("link", "strict", "available_packages"), + [ + ( + "https://files.python-poetry.org/demo-0.1.0.tar.gz", + True, + [ + Path("/cache/demo-0.1.0-py2.py3-none-any"), + Path("/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl"), + Path("/cache/demo-0.1.0-cp37-cp37-macosx_10_15_x86_64.whl"), + ], + ), + ( + "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, + [], + ), + ], +) +def test_get_not_found_cached_archive_for_link( + mocker: MockerFixture, + link: str, + strict: bool, + available_packages: list[Path], +) -> None: + env = MockEnv( + version_info=(3, 8, 3), + marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"}, + supported_tags=[ + Tag("cp38", "cp38", "macosx_10_15_x86_64"), + Tag("py3", "none", "any"), + ], + ) + + mocker.patch( + "poetry.installation.executor.get_cached_archives_for_link", + return_value=available_packages, + ) + + archive = Executor._get_cached_archive_for_link( + env, Path(), Link(link), strict=strict + ) + + assert archive is None + + +@pytest.mark.parametrize( + ("link", "cached", "strict"), + [ + ( + "https://files.python-poetry.org/demo-0.1.0.tar.gz", + "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, + ), + ( + "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, + ), + ( + "https://files.python-poetry.org/demo-0.1.0.tar.gz", + "/cache/demo-0.1.0.tar.gz", + True, + ), + ( + "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + True, + ), + ], +) +def test_get_found_cached_archive_for_link( + mocker: MockerFixture, + link: str, + cached: str, + strict: bool, +) -> None: + env = MockEnv( + version_info=(3, 8, 3), + marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"}, + supported_tags=[ + Tag("cp38", "cp38", "macosx_10_15_x86_64"), + Tag("py3", "none", "any"), + ], + ) + + mocker.patch( + "poetry.installation.executor.get_cached_archives_for_link", + return_value=[ + Path("/cache/demo-0.1.0-py2.py3-none-any"), + Path("/cache/demo-0.1.0.tar.gz"), + Path("/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl"), + Path("/cache/demo-0.1.0-cp37-cp37-macosx_10_15_x86_64.whl"), + ], + ) + + archive = Executor._get_cached_archive_for_link( + env, Path(), Link(link), strict=strict + ) + + assert Path(cached) == archive diff --git a/tests/utils/test_cache.py b/tests/utils/test_cache.py index c1bbae5071a..8cb6b5df068 100644 --- a/tests/utils/test_cache.py +++ b/tests/utils/test_cache.py @@ -1,5 +1,6 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING from typing import Any from typing import TypeVar @@ -9,13 +10,14 @@ import pytest from cachy import CacheManager +from poetry.core.packages.utils.link import Link from poetry.utils.cache import FileCache +from poetry.utils.cache import get_cache_directory_for_link +from poetry.utils.cache import get_cached_archives_for_link if TYPE_CHECKING: - from pathlib import Path - from _pytest.monkeypatch import MonkeyPatch from pytest import FixtureRequest from pytest_mock import MockerFixture @@ -192,3 +194,30 @@ def test_cachy_compatibility( assert cachy_file_cache.get("key3") == test_str assert cachy_file_cache.get("key4") == test_obj + + +def test_get_cached_archives_for_link(mocker: MockerFixture) -> None: + distributions = Path(__file__).parent.parent.joinpath("fixtures/distributions") + mocker.patch( + "poetry.utils.cache.get_cache_directory_for_link", + return_value=distributions, + ) + archives = get_cached_archives_for_link( + Path(), Link("https://files.python-poetry.org/demo-0.1.0.tar.gz") + ) + + assert archives + assert set(archives) == set(distributions.glob("demo-0.1.*")) + + +def test_get_cache_directory_for_link(tmp_path: Path) -> None: + directory = get_cache_directory_for_link( + tmp_path, Link("https://files.python-poetry.org/poetry-1.1.0.tar.gz") + ) + + expected = Path( + f"{tmp_path.as_posix()}/11/4f/a8/" + "1c89d75547e4967082d30a28360401c82c83b964ddacee292201bf85f2" + ) + + assert directory == expected