refactor: extract cache utilities

python-poetry · Mar 9, 2023 · 3c08430 · 3c08430
1 parent 40061f9
commit 3c08430
Show file tree

Hide file tree

Showing 7 changed files with 248 additions and 245 deletions.
diff --git a/src/poetry/config/config.py b/src/poetry/config/config.py
@@ -212,6 +212,10 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]:
     def repository_cache_directory(self) -> Path:
         return Path(self.get("cache-dir")) / "cache" / "repositories"
 
+    @property
+    def artifacts_cache_directory(self) -> Path:
+        return Path(self.get("cache-dir")) / "artifacts"
+
     @property
     def virtualenvs_path(self) -> Path:
         path = self.get("virtualenvs.path")

diff --git a/src/poetry/installation/chef.py b/src/poetry/installation/chef.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import hashlib
-import json
 import tarfile
 import tempfile
 import zipfile
@@ -19,16 +17,13 @@
 from poetry.core.utils.helpers import temporary_directory
 from pyproject_hooks import quiet_subprocess_runner  # type: ignore[import]
 
-from poetry.installation.chooser import InvalidWheelName
-from poetry.installation.chooser import Wheel
+from poetry.utils.cache import get_cache_directory_for_link
 from poetry.utils.env import ephemeral_environment
 
 
 if TYPE_CHECKING:
     from contextlib import AbstractContextManager
 
-    from poetry.core.packages.utils.link import Link
-
     from poetry.config.config import Config
     from poetry.repositories import RepositoryPool
     from poetry.utils.env import Env
@@ -89,9 +84,7 @@ class Chef:
     def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None:
         self._env = env
         self._pool = pool
-        self._cache_dir = (
-            Path(config.get("cache-dir")).expanduser().joinpath("artifacts")
-        )
+        self._cache_dir = config.artifacts_cache_directory
 
     def prepare(
         self, archive: Path, output_dir: Path | None = None, *, editable: bool = False
@@ -181,7 +174,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path
                     sdist_dir = archive_dir
 
             if destination is None:
-                destination = self.get_cache_directory_for_link(Link(archive.as_uri()))
+                destination = get_cache_directory_for_link(
+                    self._cache_dir, Link(archive.as_uri())
+                )
 
             destination.mkdir(parents=True, exist_ok=True)
 
@@ -196,72 +191,3 @@ def _should_prepare(self, archive: Path) -> bool:
     @classmethod
     def _is_wheel(cls, archive: Path) -> bool:
         return archive.suffix == ".whl"
-
-    def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None:
-        archives = self.get_cached_archives_for_link(link)
-        if not archives:
-            return None
-
-        candidates: list[tuple[float | None, Path]] = []
-        for archive in archives:
-            if strict:
-                # in strict mode return the original cached archive instead of the
-                # prioritized archive type.
-                if link.filename == archive.name:
-                    return archive
-                continue
-            if archive.suffix != ".whl":
-                candidates.append((float("inf"), archive))
-                continue
-
-            try:
-                wheel = Wheel(archive.name)
-            except InvalidWheelName:
-                continue
-
-            if not wheel.is_supported_by_environment(self._env):
-                continue
-
-            candidates.append(
-                (wheel.get_minimum_supported_index(self._env.supported_tags), archive),
-            )
-
-        if not candidates:
-            return None
-
-        return min(candidates)[1]
-
-    def get_cached_archives_for_link(self, link: Link) -> list[Path]:
-        cache_dir = self.get_cache_directory_for_link(link)
-
-        archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
-        paths = []
-        for archive_type in archive_types:
-            for archive in cache_dir.glob(f"*.{archive_type}"):
-                paths.append(Path(archive))
-
-        return paths
-
-    def get_cache_directory_for_link(self, link: Link) -> Path:
-        key_parts = {"url": link.url_without_fragment}
-
-        if link.hash_name is not None and link.hash is not None:
-            key_parts[link.hash_name] = link.hash
-
-        if link.subdirectory_fragment:
-            key_parts["subdirectory"] = link.subdirectory_fragment
-
-        key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"]
-        key_parts["interpreter_version"] = "".join(
-            self._env.marker_env["interpreter_version"].split(".")[:2]
-        )
-
-        key = hashlib.sha256(
-            json.dumps(
-                key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
-            ).encode("ascii")
-        ).hexdigest()
-
-        split_key = [key[:2], key[2:4], key[4:6], key[6:]]
-
-        return self._cache_dir.joinpath(*split_key)
diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py
@@ -20,12 +20,16 @@
 from poetry.installation.chef import Chef
 from poetry.installation.chef import ChefBuildError
 from poetry.installation.chooser import Chooser
+from poetry.installation.chooser import InvalidWheelName
+from poetry.installation.chooser import Wheel
 from poetry.installation.operations import Install
 from poetry.installation.operations import Uninstall
 from poetry.installation.operations import Update
 from poetry.installation.wheel_installer import WheelInstaller
 from poetry.utils._compat import decode
 from poetry.utils.authenticator import Authenticator
+from poetry.utils.cache import get_cache_directory_for_link
+from poetry.utils.cache import get_cached_archives_for_link
 from poetry.utils.env import EnvCommandError
 from poetry.utils.helpers import atomic_open
 from poetry.utils.helpers import get_file_hash
@@ -81,6 +85,7 @@ def __init__(
         )
         self._chef = Chef(config, self._env, pool)
         self._chooser = Chooser(pool, self._env, config)
+        self._artifacts_cache_dir = config.artifacts_cache_directory
 
         self._executor = ThreadPoolExecutor(max_workers=self._max_workers)
         self._total_operations = 0
@@ -692,15 +697,19 @@ def _download(self, operation: Install | Update) -> Path:
     def _download_link(self, operation: Install | Update, link: Link) -> Path:
         package = operation.package
 
-        output_dir = self._chef.get_cache_directory_for_link(link)
+        output_dir = get_cache_directory_for_link(self._artifacts_cache_dir, link)
         # Try to get cached original package for the link provided
-        original_archive = self._chef.get_cached_archive_for_link(link, strict=True)
+        original_archive = self._get_cached_archive_for_link(
+            self._env, self._artifacts_cache_dir, link, strict=True
+        )
         if original_archive is None:
             # No cached original distributions was found, so we download and prepare it
             try:
                 original_archive = self._download_archive(operation, link)
             except BaseException:
-                cache_directory = self._chef.get_cache_directory_for_link(link)
+                cache_directory = get_cache_directory_for_link(
+                    self._artifacts_cache_dir, link
+                )
                 cached_file = cache_directory.joinpath(link.filename)
                 # We can't use unlink(missing_ok=True) because it's not available
                 # prior to Python 3.8
@@ -711,7 +720,9 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path:
 
         # Get potential higher prioritized cached archive, otherwise it will fall back
         # to the original archive.
-        archive = self._chef.get_cached_archive_for_link(link, strict=False)
+        archive = self._get_cached_archive_for_link(
+            self._env, self._artifacts_cache_dir, link, strict=False
+        )
         # 'archive' can at this point never be None. Since we previously downloaded
         # an archive, we now should have something cached that we can use here
         assert archive is not None
@@ -775,7 +786,10 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path:
                 progress.start()
 
         done = 0
-        archive = self._chef.get_cache_directory_for_link(link) / link.filename
+        archive = (
+            get_cache_directory_for_link(self._artifacts_cache_dir, link)
+            / link.filename
+        )
         archive.parent.mkdir(parents=True, exist_ok=True)
         with atomic_open(archive) as f:
             for chunk in response.iter_content(chunk_size=4096):
@@ -909,3 +923,40 @@ def _get_archive_info(self, package: Package) -> dict[str, Any]:
             archive_info["hashes"] = {algorithm: value}
 
         return archive_info
+
+    @staticmethod
+    def _get_cached_archive_for_link(
+        env: Env, cache_dir: Path, link: Link, *, strict: bool
+    ) -> Path | None:
+        archives = get_cached_archives_for_link(cache_dir, link)
+        if not archives:
+            return None
+
+        candidates: list[tuple[float | None, Path]] = []
+        for archive in archives:
+            if strict:
+                # in strict mode return the original cached archive instead of the
+                # prioritized archive type.
+                if link.filename == archive.name:
+                    return archive
+                continue
+            if archive.suffix != ".whl":
+                candidates.append((float("inf"), archive))
+                continue
+
+            try:
+                wheel = Wheel(archive.name)
+            except InvalidWheelName:
+                continue
+
+            if not wheel.is_supported_by_environment(env):
+                continue
+
+            candidates.append(
+                (wheel.get_minimum_supported_index(env.supported_tags), archive),
+            )
+
+        if not candidates:
+            return None
+
+        return min(candidates)[1]
diff --git a/src/poetry/utils/cache.py b/src/poetry/utils/cache.py
@@ -8,12 +8,17 @@
 import time
 
 from pathlib import Path
+from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
 from typing import Generic
 from typing import TypeVar
 
 
+if TYPE_CHECKING:
+    from poetry.core.packages.utils.link import Link
+
+
 # Used by Cachy for items that do not expire.
 MAX_DATE = 9999999999
 T = TypeVar("T")
@@ -196,3 +201,35 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]:
         data = json.loads(data_str[10:])
         expires = int(data_str[:10])
         return CacheItem(data, expires)
+
+
+def get_cached_archives_for_link(cache_dir: Path, link: Link) -> list[Path]:
+    cache_dir = get_cache_directory_for_link(cache_dir, link)
+
+    archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
+    paths = []
+    for archive_type in archive_types:
+        for archive in cache_dir.glob(f"*.{archive_type}"):
+            paths.append(Path(archive))
+
+    return paths
+
+
+def get_cache_directory_for_link(cache_dir: Path, link: Link) -> Path:
+    key_parts = {"url": link.url_without_fragment}
+
+    if link.hash_name is not None and link.hash is not None:
+        key_parts[link.hash_name] = link.hash
+
+    if link.subdirectory_fragment:
+        key_parts["subdirectory"] = link.subdirectory_fragment
+
+    key = hashlib.sha256(
+        json.dumps(
+            key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
+        ).encode("ascii")
+    ).hexdigest()
+
+    split_key = [key[:2], key[2:4], key[4:6], key[6:]]
+
+    return cache_dir.joinpath(*split_key)