Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: extract cache utilities #7621

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/poetry/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,11 @@ def _get_environment_repositories() -> dict[str, dict[str, str]]:

@property
def repository_cache_directory(self) -> Path:
return Path(self.get("cache-dir")) / "cache" / "repositories"
return Path(self.get("cache-dir")).expanduser() / "cache" / "repositories"

@property
def artifacts_cache_directory(self) -> Path:
return Path(self.get("cache-dir")).expanduser() / "artifacts"

@property
def virtualenvs_path(self) -> Path:
Expand Down
89 changes: 8 additions & 81 deletions src/poetry/installation/chef.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import hashlib
import json
import tarfile
import tempfile
import zipfile
Expand All @@ -19,18 +17,14 @@
from poetry.core.utils.helpers import temporary_directory
from pyproject_hooks import quiet_subprocess_runner # type: ignore[import]

from poetry.installation.chooser import InvalidWheelName
from poetry.installation.chooser import Wheel
from poetry.utils.env import ephemeral_environment


if TYPE_CHECKING:
from contextlib import AbstractContextManager

from poetry.core.packages.utils.link import Link

from poetry.config.config import Config
from poetry.repositories import RepositoryPool
from poetry.utils.cache import ArtifactCache
from poetry.utils.env import Env


Expand Down Expand Up @@ -86,12 +80,12 @@ def install(self, requirements: Collection[str]) -> None:


class Chef:
def __init__(self, config: Config, env: Env, pool: RepositoryPool) -> None:
def __init__(
self, artifact_cache: ArtifactCache, env: Env, pool: RepositoryPool
) -> None:
self._env = env
self._pool = pool
self._cache_dir = (
Path(config.get("cache-dir")).expanduser().joinpath("artifacts")
)
self._artifact_cache = artifact_cache

def prepare(
self, archive: Path, output_dir: Path | None = None, *, editable: bool = False
Expand Down Expand Up @@ -181,7 +175,9 @@ def _prepare_sdist(self, archive: Path, destination: Path | None = None) -> Path
sdist_dir = archive_dir

if destination is None:
destination = self.get_cache_directory_for_link(Link(archive.as_uri()))
destination = self._artifact_cache.get_cache_directory_for_link(
Link(archive.as_uri())
)

destination.mkdir(parents=True, exist_ok=True)

Expand All @@ -196,72 +192,3 @@ def _should_prepare(self, archive: Path) -> bool:
@classmethod
def _is_wheel(cls, archive: Path) -> bool:
return archive.suffix == ".whl"

def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None:
archives = self.get_cached_archives_for_link(link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue
if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(self._env):
continue

candidates.append(
(wheel.get_minimum_supported_index(self._env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]

def get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key_parts["interpreter_name"] = self._env.marker_env["interpreter_name"]
key_parts["interpreter_version"] = "".join(
self._env.marker_env["interpreter_version"].split(".")[:2]
)

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return self._cache_dir.joinpath(*split_key)
35 changes: 1 addition & 34 deletions src/poetry/installation/chooser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
from typing import TYPE_CHECKING
from typing import Any

from packaging.tags import Tag

from poetry.config.config import Config
from poetry.config.config import PackageFilterPolicy
from poetry.utils.patterns import wheel_file_re
from poetry.utils.wheel import Wheel


if TYPE_CHECKING:
Expand All @@ -25,37 +23,6 @@
logger = logging.getLogger(__name__)


class InvalidWheelName(Exception):
pass


class Wheel:
def __init__(self, filename: str) -> None:
wheel_info = wheel_file_re.match(filename)
if not wheel_info:
raise InvalidWheelName(f"{filename} is not a valid wheel filename.")

self.filename = filename
self.name = wheel_info.group("name").replace("_", "-")
self.version = wheel_info.group("ver").replace("_", "-")
self.build_tag = wheel_info.group("build")
self.pyversions = wheel_info.group("pyver").split(".")
self.abis = wheel_info.group("abi").split(".")
self.plats = wheel_info.group("plat").split(".")

self.tags = {
Tag(x, y, z) for x in self.pyversions for y in self.abis for z in self.plats
}

def get_minimum_supported_index(self, tags: list[Tag]) -> int | None:
indexes = [tags.index(t) for t in self.tags if t in tags]

return min(indexes) if indexes else None

def is_supported_by_environment(self, env: Env) -> bool:
return bool(set(env.supported_tags).intersection(self.tags))


class Chooser:
"""
A Chooser chooses an appropriate release archive for packages.
Expand Down
24 changes: 18 additions & 6 deletions src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from poetry.puzzle.exceptions import SolverProblemError
from poetry.utils._compat import decode
from poetry.utils.authenticator import Authenticator
from poetry.utils.cache import ArtifactCache
from poetry.utils.env import EnvCommandError
from poetry.utils.helpers import atomic_open
from poetry.utils.helpers import get_file_hash
Expand Down Expand Up @@ -77,10 +78,11 @@ def __init__(
else:
self._max_workers = 1

self._artifact_cache = ArtifactCache(cache_dir=config.artifacts_cache_directory)
self._authenticator = Authenticator(
config, self._io, disable_cache=disable_cache, pool_size=self._max_workers
)
self._chef = Chef(config, self._env, pool)
self._chef = Chef(self._artifact_cache, self._env, pool)
self._chooser = Chooser(pool, self._env, config)

self._executor = ThreadPoolExecutor(max_workers=self._max_workers)
Expand Down Expand Up @@ -709,15 +711,19 @@ def _download(self, operation: Install | Update) -> Path:
def _download_link(self, operation: Install | Update, link: Link) -> Path:
package = operation.package

output_dir = self._chef.get_cache_directory_for_link(link)
output_dir = self._artifact_cache.get_cache_directory_for_link(link)
# Try to get cached original package for the link provided
original_archive = self._chef.get_cached_archive_for_link(link, strict=True)
original_archive = self._artifact_cache.get_cached_archive_for_link(
link, strict=True
)
if original_archive is None:
# No cached original distributions was found, so we download and prepare it
try:
original_archive = self._download_archive(operation, link)
except BaseException:
cache_directory = self._chef.get_cache_directory_for_link(link)
cache_directory = self._artifact_cache.get_cache_directory_for_link(
link
)
cached_file = cache_directory.joinpath(link.filename)
# We can't use unlink(missing_ok=True) because it's not available
# prior to Python 3.8
Expand All @@ -728,7 +734,11 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path:

# Get potential higher prioritized cached archive, otherwise it will fall back
# to the original archive.
archive = self._chef.get_cached_archive_for_link(link, strict=False)
archive = self._artifact_cache.get_cached_archive_for_link(
link,
strict=False,
env=self._env,
)
# 'archive' can at this point never be None. Since we previously downloaded
# an archive, we now should have something cached that we can use here
assert archive is not None
Expand Down Expand Up @@ -792,7 +802,9 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path:
progress.start()

done = 0
archive = self._chef.get_cache_directory_for_link(link) / link.filename
archive = (
self._artifact_cache.get_cache_directory_for_link(link) / link.filename
)
archive.parent.mkdir(parents=True, exist_ok=True)
with atomic_open(archive) as f:
for chunk in response.iter_content(chunk_size=4096):
Expand Down
90 changes: 90 additions & 0 deletions src/poetry/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,21 @@
import time

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Generic
from typing import TypeVar

from poetry.utils.wheel import InvalidWheelName
from poetry.utils.wheel import Wheel


if TYPE_CHECKING:
from poetry.core.packages.utils.link import Link

from poetry.utils.env import Env


# Used by Cachy for items that do not expire.
MAX_DATE = 9999999999
Expand Down Expand Up @@ -196,3 +206,83 @@ def _deserialize(self, data_raw: bytes) -> CacheItem[T]:
data = json.loads(data_str[10:])
expires = int(data_str[:10])
return CacheItem(data, expires)


class ArtifactCache:
def __init__(self, *, cache_dir: Path) -> None:
self._cache_dir = cache_dir

def get_cache_directory_for_link(self, link: Link) -> Path:
key_parts = {"url": link.url_without_fragment}

if link.hash_name is not None and link.hash is not None:
key_parts[link.hash_name] = link.hash

if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment

key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
).encode("ascii")
).hexdigest()

split_key = [key[:2], key[2:4], key[4:6], key[6:]]

return self._cache_dir.joinpath(*split_key)

def get_cached_archive_for_link(
self,
link: Link,
*,
strict: bool,
env: Env | None = None,
) -> Path | None:
assert strict or env is not None

archives = self._get_cached_archives_for_link(link)
if not archives:
return None

candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
return archive
continue

assert env is not None

if archive.suffix != ".whl":
candidates.append((float("inf"), archive))
continue

try:
wheel = Wheel(archive.name)
except InvalidWheelName:
continue

if not wheel.is_supported_by_environment(env):
continue

candidates.append(
(wheel.get_minimum_supported_index(env.supported_tags), archive),
)

if not candidates:
return None

return min(candidates)[1]

def _get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)

archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths = []
for archive_type in archive_types:
for archive in cache_dir.glob(f"*.{archive_type}"):
paths.append(Path(archive))

return paths
Loading