Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An almost working implementation to install from .whl #3

Closed
wants to merge 16 commits into from
144 changes: 144 additions & 0 deletions examples/whl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Install from a .whl file.
"""

import argparse
import codecs
import contextlib
import errno
import os
import sys
import zipfile

import six

from installer._compat import pathlib
from installer._compat.typing import TYPE_CHECKING
from installer.layouts import DistInfo
from installer.records import RecordItem, parse_record_file, write_record_file

if TYPE_CHECKING:
from typing import Any, ContextManager, Dict, IO, Iterator


def _wrap_as_io_str(f):
# type: (IO[six.binary_type]) -> Iterator[str]
if not six.PY3:
return f
return codecs.getreader("utf-8")(f)


def _makedirs_exist_ok(path):
# type: (pathlib.Path) -> None
"""Implement ``Path.mkdir(parents=True, exist_ok=True)`` Python 2.
"""
try:
os.makedirs(str(path))
except OSError as e:
if e.errno != errno.EEXIST:
raise


class ZipFileInstaller(object):
"""Install a local wheel.
"""

def __init__(self, name, dist_info, zip_file_handle):
# type: (str, DistInfo, zipfile.ZipFile) -> None
self._name = name
self._dist_info = dist_info
self._zip_file_handle = zip_file_handle

@classmethod
@contextlib.contextmanager
def from_wheel_path(cls, name, wheel_path):
# type: (str, pathlib.Path) -> Iterator[ZipFileInstaller]
project_name, project_version, _ = wheel_path.stem.split("-", 2)
with zipfile.ZipFile(str(wheel_path)) as zf:
entry_names = (name.lstrip("/").split("/", 1)[0] for name in zf.namelist())
dist_info = DistInfo.find(project_name, project_version, entry_names)
yield cls(name, dist_info, zf)

@contextlib.contextmanager
def _open_adjacent_tmp_for_write(self, path, **kwargs):
# type: (pathlib.Path, Any) -> Iterator[IO[Any]]
_makedirs_exist_ok(path.parent)
temp = path.with_name("{}.tmp.{}".format(path.name, self._name))
with temp.open(**kwargs) as f:
yield f
temp.replace(path)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given it also replaces I feel like just open is not adequate naming here

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then again what's the point of this tmp file rather than just direct-write?

Copy link
Member Author

@uranusjr uranusjr May 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I stole this pattern from pip and thought the original implementation must exist for a reason. Likely some kind of edge case about writing to an already-open file.

I’m not really in love with the naming of this either, but can’t think of a better one.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pradyunsg care to enlighten us? for me at the moment seems just a needless complication 😊 At least let's document it why we need this awkward way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to trace this, and… it takes me straight back to the initial implementation of wheel support! cc @dholth

pypa/pip@aa5b33d#diff-2695f32c4432acd141c3dbe7e7e3a6b0R152

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's likely to prevent concurrency issues, the rename operation being atomic.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so I consider this a bad approach, it silently swallows an important error. Sure you will not throw a failure... but instead, you're getting an environment that might not be what you expect. I'd rather throw the failure to notify the user. E.g. imagine two processes install in parallel different versions; with this approach, we'll not throw an error, but the created version will be a combination of version A and B. I'd rather throw and let the user know that something bad is happening.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not to mention that while you might not get failure on the .dist-info file, you can still get the same concurrency failure on .dist-info.tmp file.


def _open_target_for_write(self, path, binary=False):
# type: (pathlib.Path, bool) -> ContextManager[IO[Any]]
if binary:
kwargs = {"mode": "wb"}
else:
kwargs = {"mode": "w", "encoding": "utf-8"}
return self._open_adjacent_tmp_for_write(path, **kwargs)

def _open_csv_for_write(self, path):
# type: (pathlib.Path) -> ContextManager[IO[Any]]
if sys.version_info < (3,):
kwargs = {"mode": "wb"}
else:
kwargs = {"mode": "w", "newline": "", "encoding": "utf-8"}
return self._open_adjacent_tmp_for_write(path, **kwargs)

def _install_record_item(self, item, directory):
# type: (RecordItem, pathlib.Path) -> None
with self._zip_file_handle.open(str(item.path)) as f:
data = f.read()
item.raise_for_validation(data)
target = directory.joinpath(item.path)
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
with self._open_target_for_write(target, binary=True) as f:
f.write(data)
# TODO: Handle file permission and other metadata.

def _iter_installed_record_items(self, directory):
# type: (pathlib.Path) -> Iterator[RecordItem]
with self._zip_file_handle.open(str(self._dist_info.record)) as f:
for item in parse_record_file(_wrap_as_io_str(f)):
self._install_record_item(item, directory)
yield item

def _iter_installed_scripts(self, directory):
# type: (pathlib.Path) -> Iterator[RecordItem]
return iter(()) # TODO: Implement me.

def _write_additional_metadata(self, directory):
# type: (pathlib.Path) -> Iterator[RecordItem]
installer = directory.joinpath(self._dist_info.installer)
with self._open_target_for_write(installer) as f:
f.write(self._name)
yield RecordItem(self._dist_info.installer, None, None)
# TODO: Write direct_url.json.

def _write_record(self, directory, installed_items):
# type: (pathlib.Path, Dict[pathlib.PurePosixPath, RecordItem]) -> None
record = self._dist_info.record
installed_items[record] = RecordItem(record, None, None)
with self._open_csv_for_write(directory.joinpath(record)) as f:
write_record_file(f, installed_items.values())

def install(self, directory):
# type: (pathlib.Path) -> None
items = {r.path: r for r in self._iter_installed_record_items(directory)}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the iter keyword, feels like redundant to me 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like to spell out an iterator to distinguish it from sequences. The call site needs to be aware of their different lifetime characteristics, otherwise it’s too easy for a maintainer to accidentally iterate through an iterator twice.

# TODO: Install .data directory.
items.update((r.path, r) for r in self._iter_installed_scripts(directory))
items.update((r.path, r) for r in self._write_additional_metadata(directory))
self._write_record(directory, items)
# TODO: Compile .pyc files.


def main(args=None):
gaborbernat marked this conversation as resolved.
Show resolved Hide resolved
parser = argparse.ArgumentParser()
parser.add_argument("wheel", type=pathlib.Path)
parser.add_argument("dest", type=pathlib.Path)
parser.add_argument("--installer", default="pypa-installer")

options = parser.parse_args(args)
with ZipFileInstaller.create(options.installer, options.wheel) as installer:
installer.install(options.dest)


if __name__ == "__main__":

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this should go into the __main__ 👍

main()
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ description-file = "README.md"
classifiers = ["License :: OSI Approved :: MIT License"]
requires-python = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
requires = [
"pathlib2; python_version < '3.4'"
"pathlib2; python_version < '3.4'",
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
"six",
uranusjr marked this conversation as resolved.
Show resolved Hide resolved
]

[tool.flit.metadata.requires-extra]
Expand Down
8 changes: 4 additions & 4 deletions src/installer/_compat/pathlib.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
__all__ = ["PurePosixPath"]

import sys

__all__ = ["Path", "PurePath", "PurePosixPath"]

if sys.version_info >= (3, 4): # pragma: no cover
from pathlib import PurePosixPath
from pathlib import Path, PurePath, PurePosixPath
else: # pragma: no cover
from pathlib2 import PurePosixPath
from pathlib2 import Path, PurePath, PurePosixPath
4 changes: 2 additions & 2 deletions src/installer/_compat/typing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__all__ = ["TYPE_CHECKING"]

try: # pragma: no cover
from typing import TYPE_CHECKING
except ImportError: # pragma: no cover
TYPE_CHECKING = False

__all__ = ["TYPE_CHECKING"]
27 changes: 27 additions & 0 deletions src/installer/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
__all__ = [
"InvalidWheel",
"MetadataNotFound",
"RecordItemError",
"RecordItemHashMismatch",
"RecordItemSizeMismatch",
]


class InvalidWheel(Exception):
pass

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of pass consider adding a docstring that explains when the error may be thrown



class MetadataNotFound(InvalidWheel):
pass


class RecordItemError(InvalidWheel):
pass


class RecordItemHashMismatch(RecordItemError):
pass


class RecordItemSizeMismatch(RecordItemError):
pass
94 changes: 94 additions & 0 deletions src/installer/layouts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import os
import re

import six

from installer._compat import pathlib
from installer._compat.typing import TYPE_CHECKING
from installer.exceptions import MetadataNotFound

if TYPE_CHECKING:
from typing import Iterable, Union

if six.PY2:
FileName = Union[str, six.text_type]
else:
FileName = str

__all__ = ["DistInfo"]


_NAME_ESCAPE_REGEX = re.compile(r"[^A-Za-z0-9]+")

_VERSION_ESCAPE_REGEX = re.compile(r"[^A-Za-z0-9\.]+")


def _name_escape(s):
# type: (six.text_type) -> six.text_type
"""Filename-escape the distribution name according to PEP 376.

1. Replace any runs of non-alphanumeric characters with a single ``-``.
2. Any ``-`` characters are replaced with ``_``.
"""
return _NAME_ESCAPE_REGEX.sub("_", s)


def _version_escape(v):
# type: (six.text_type) -> six.text_type
"""Filename-escape the version string according to PEP 376.

1. Spaces become dots, and all other non-alphanumeric characters (except
dots) become dashes, with runs of multiple dashes condensed to a single
dash.
2. Any ``-`` characters are replaced with ``_``.
"""
return _VERSION_ESCAPE_REGEX.sub("_", v.replace(" ", "."))


class DistInfo(object):
_EXTENSION = ".dist-info"

def __init__(self, directory_name):
# type: (str) -> None
self.directory_name = directory_name

@classmethod
def find(cls, project_name, project_version, entry_names):
# type: (str, str, Iterable[FileName]) -> DistInfo
escaped_project_name = _name_escape(project_name).lower()
escaped_project_version = _version_escape(project_version)

for entry_name in entry_names:
stem, ext = os.path.splitext(entry_name)
if ext.lower() != cls._EXTENSION:
continue
name, _, version = stem.partition("-")
if not version: # Dash not found.
continue
if escaped_project_name != _name_escape(name).lower():
continue
if escaped_project_version != _version_escape(version):
continue
# The directory name needs to be str on Python 2 so we can
# correctly build paths with pathlib2, which does not take unicode.
return cls(six.ensure_str(entry_name))

expected_name = "{}-{}{}".format(
escaped_project_name, escaped_project_version, cls._EXTENSION,
)
raise MetadataNotFound(expected_name)

@property
def record(self):
# type: () -> pathlib.PurePosixPath
return pathlib.PurePosixPath(self.directory_name, "RECORD")

@property
def installer(self):
# type: () -> pathlib.PurePosixPath
return pathlib.PurePosixPath(self.directory_name, "INSTALLER")

@property
def direct_url_json(self):
# type: () -> pathlib.PurePosixPath
return pathlib.PurePosixPath(self.directory_name, "direct_url.json")
60 changes: 51 additions & 9 deletions src/installer/records.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
__all__ = [
"Hash",
"RecordItem",
"SuperfulousRecordColumnsWarning",
"parse_record_file",
]

import base64
import csv
import hashlib
import warnings

import six

from installer._compat import pathlib
from installer._compat.typing import TYPE_CHECKING
from installer.exceptions import RecordItemHashMismatch, RecordItemSizeMismatch

if TYPE_CHECKING:
from typing import Iterator, Optional
from typing import Iterable, Iterator, Optional, Protocol, Tuple

class _Writable(Protocol):
def write(self, s):
# type: (str) -> int
pass


__all__ = [
"Hash",
"RecordItem",
"SuperfulousRecordColumnsWarning",
"parse_record_file",
]


class SuperfulousRecordColumnsWarning(UserWarning):
pass


class Hash(object):
__slots__ = ("name", "value")

def __init__(self, name, value):
# type: (str, str) -> None
self.name = name
Expand All @@ -34,6 +47,13 @@ def parse(cls, h):
name, value = h.split("=", 1)
return Hash(name, value)

def raise_for_validation(self, data):
# type: (six.binary_type) -> None
digest = hashlib.new(self.name, data).digest()
value = base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=")
if value != self.value:
raise RecordItemHashMismatch(self, data)


class RecordItem(object):
def __init__(self, path, hash_, size):
Expand Down Expand Up @@ -68,9 +88,25 @@ def parse(cls, path, hash_, size):
size=int(size) if size else None,
)

def raise_for_validation(self, data):
# type: (six.binary_type) -> None
if self.size is not None and self.size != len(data):
raise RecordItemSizeMismatch(self.size, data)
if self.hash_ is not None:
self.hash_.raise_for_validation(data)

def as_row(self):
# type: () -> Tuple[str, str, str]
h = self.hash_
return (
str(self.path),
"{}={}".format(h.name, h.value) if h is not None else "",
str(self.size) if self.size is not None else "",
)


def parse_record_file(f):
# type: (Iterator[str]) -> Iterator[RecordItem]
# type: (Iterable[str]) -> Iterator[RecordItem]
for row_index, row in enumerate(csv.reader(f)):
if len(row) > 3:
warnings.warn(
Expand All @@ -82,3 +118,9 @@ def parse_record_file(f):
except (IndexError, ValueError):
raise ValueError("invalid row {}: {!r}".format(row_index, row))
yield record


def write_record_file(f, items):
# type: (_Writable, Iterable[RecordItem]) -> None
writer = csv.writer(f)
writer.writerows(sorted(item.as_row() for item in items))
Loading