Skip to content

Commit

Permalink
refactor: extract cache utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
ralbertazzi committed Mar 9, 2023
1 parent 40061f9 commit 3c08430
Show file tree
Hide file tree
Showing 7 changed files with 248 additions and 245 deletions.
4 changes: 4 additions & 0 deletions src/poetry/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]:
def repository_cache_directory(self) -> Path:
return Path(self.get("cache-dir")) / "cache" / "repositories"

@property
def artifacts_cache_directory(self) -> Path:
return Path(self.get("cache-dir")) / "artifacts"

@property
def virtualenvs_path(self) -> Path:
path = self.get("virtualenvs.path")
Expand Down
84 changes: 5 additions & 79 deletions src/poetry/installation/chef.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import hashlib
import json
import tarfile
import tempfile
import zipfile
Expand All @@ -19,16 +17,13 @@
from poetry.core.utils.helpers import temporary_directory
from pyproject_hooks import quiet_subprocess_runner # type: ignore[import]

from poetry.installation.chooser import InvalidWheelName
from poetry.installation.chooser import Wheel
from poetry.utils.cache import get_cache_directory_for_link
from poetry.utils.env import ephemeral_environment


if TYPE_CHECKING:
from contextlib import AbstractContextManager

from poetry.core.packages.utils.link import Link

from poetry.config.config import Config
from poetry.repositories import RepositoryPool
from poetry.utils.env import Env
Expand Down Expand Up @@ -89,9 +84,7 @@ class Chef:
def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None:
self._env = env
self._pool = pool
self._cache_dir = (
Path(config.get("cache-dir")).expanduser().joinpath("artifacts")
)
self._cache_dir = config.artifacts_cache_directory

def prepare(
self, archive: Path, output_dir: Path | None = None, *, editable: bool = False
Expand Down Expand Up @@ -181,7 +174,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path
sdist_dir = archive_dir

if destination is None:
destination = self.get_cache_directory_for_link(Link(archive.as_uri()))
destination = get_cache_directory_for_link(
self._cache_dir, Link(archive.as_uri())
)

destination.mkdir(parents=True, exist_ok=True)

Expand All @@ -196,72 +191,3 @@ def _should_prepare(self, archive: Path) -> bool:
@classmethod
def _is_wheel(cls, archive: Path) -> bool:
return archive.suffix == ".whl"

def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None:
archives = self.get_cached_archives_for_link(link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue
if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(self._env):
continue

candidates.append(
(wheel.get_minimum_supported_index(self._env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]

def get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"]
key_parts["interpreter_version"] = "".join(
self._env.marker_env["interpreter_version"].split(".")[:2]
)

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return self._cache_dir.joinpath(*split_key)
61 changes: 56 additions & 5 deletions src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@
from poetry.installation.chef import Chef
from poetry.installation.chef import ChefBuildError
from poetry.installation.chooser import Chooser
from poetry.installation.chooser import InvalidWheelName
from poetry.installation.chooser import Wheel
from poetry.installation.operations import Install
from poetry.installation.operations import Uninstall
from poetry.installation.operations import Update
from poetry.installation.wheel_installer import WheelInstaller
from poetry.utils._compat import decode
from poetry.utils.authenticator import Authenticator
from poetry.utils.cache import get_cache_directory_for_link
from poetry.utils.cache import get_cached_archives_for_link
from poetry.utils.env import EnvCommandError
from poetry.utils.helpers import atomic_open
from poetry.utils.helpers import get_file_hash
Expand Down Expand Up @@ -81,6 +85,7 @@ def __init__(
)
self._chef = Chef(config, self._env, pool)
self._chooser = Chooser(pool, self._env, config)
self._artifacts_cache_dir = config.artifacts_cache_directory

self._executor = ThreadPoolExecutor(max_workers=self._max_workers)
self._total_operations = 0
Expand Down Expand Up @@ -692,15 +697,19 @@ def _download(self, operation: Install | Update) -> Path:
def _download_link(self, operation: Install | Update, link: Link) -> Path:
package = operation.package

output_dir = self._chef.get_cache_directory_for_link(link)
output_dir = get_cache_directory_for_link(self._artifacts_cache_dir, link)
# Try to get cached original package for the link provided
original_archive = self._chef.get_cached_archive_for_link(link, strict=True)
original_archive = self._get_cached_archive_for_link(
self._env, self._artifacts_cache_dir, link, strict=True
)
if original_archive is None:
# No cached original distributions was found, so we download and prepare it
try:
original_archive = self._download_archive(operation, link)
except BaseException:
cache_directory = self._chef.get_cache_directory_for_link(link)
cache_directory = get_cache_directory_for_link(
self._artifacts_cache_dir, link
)
cached_file = cache_directory.joinpath(link.filename)
# We can't use unlink(missing_ok=True) because it's not available
# prior to Python 3.8
Expand All @@ -711,7 +720,9 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path:

# Get potential higher prioritized cached archive, otherwise it will fall back
# to the original archive.
archive = self._chef.get_cached_archive_for_link(link, strict=False)
archive = self._get_cached_archive_for_link(
self._env, self._artifacts_cache_dir, link, strict=False
)
# 'archive' can at this point never be None. Since we previously downloaded
# an archive, we now should have something cached that we can use here
assert archive is not None
Expand Down Expand Up @@ -775,7 +786,10 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path:
progress.start()

done = 0
archive = self._chef.get_cache_directory_for_link(link) / link.filename
archive = (
get_cache_directory_for_link(self._artifacts_cache_dir, link)
/ link.filename
)
archive.parent.mkdir(parents=True, exist_ok=True)
with atomic_open(archive) as f:
for chunk in response.iter_content(chunk_size=4096):
Expand Down Expand Up @@ -909,3 +923,40 @@ def _get_archive_info(self, package: Package) -> dict[str, Any]:
archive_info["hashes"] = {algorithm: value}

return archive_info

@staticmethod
def _get_cached_archive_for_link(
env: Env, cache_dir: Path, link: Link, *, strict: bool
) -> Path | None:
archives = get_cached_archives_for_link(cache_dir, link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue
if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(env):
continue

candidates.append(
(wheel.get_minimum_supported_index(env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]
37 changes: 37 additions & 0 deletions src/poetry/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
import time

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Generic
from typing import TypeVar


if TYPE_CHECKING:
from poetry.core.packages.utils.link import Link


# Used by Cachy for items that do not expire.
MAX_DATE = 9999999999
T = TypeVar("T")
Expand Down Expand Up @@ -196,3 +201,35 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]:
data = json.loads(data_str[10:])
expires = int(data_str[:10])
return CacheItem(data, expires)


def get_cached_archives_for_link(cache_dir: Path, link: Link) -> list[Path]:
cache_dir = get_cache_directory_for_link(cache_dir, link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths


def get_cache_directory_for_link(cache_dir: Path, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return cache_dir.joinpath(*split_key)
Loading

0 comments on commit 3c08430

Please sign in to comment.