Skip to content

Commit

Permalink
Switch write path to transact in URLs
Browse files Browse the repository at this point in the history
Previously, the read path for slicedimage operated with URLs.  This allowed us to have manifests on local disk that referred to network resources for tiles.  This PR will use URLs as the destination for manifest files and tile data.

This PR also changes the way users can control the behavior of writing.  Previously, we allowed for two callbacks: one allowed callers to designate where a sub-manifest is located on disk, and the other allowed callers to open a file for writing tile data.  This turns out not to capture all the use cases for how we might want to control writing, so this PR creates WriterContract.  It has three callbacks: one to designate where a sub-manifest is located in URL-space, another to designate where a tile is located in URL space, and one to write tile data to a URL.

Furthermore, we provide a CompatibilityWriterContract that _mostly_ mimics the behavior of the old callbacks.  The one notable exception is that the tricks utilized to perform in-place tile writing no longer work.  CompatibilityWriterContract finds the destination in `tile_url_generator` by opening the tile data file, find out where it is on disk, and then closing the file.  The new callback (write_tile_data) just opens the path and writes to it.  There is not a way to maintain the file handle between `tile_url_generator` and `write_tile_data`.

Because this is a slight change in behavior, we bump the version of the library to 4.0.0.

Test plan: verified existing tests pass.  verify existing starfish imagestack and experiment tests pass with this library (except for in-place tile construction).  verify that the new in-place code for starfish passes with this library.
  • Loading branch information
Tony Tung committed Jul 12, 2019
1 parent 2d849bd commit dfbd13f
Show file tree
Hide file tree
Showing 13 changed files with 315 additions and 114 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setuptools.setup(
name="slicedimage",
version="3.2.0",
version="4.0.0",
description="Library to access sliced imaging data",
author="Tony Tung",
author_email="[email protected]",
Expand Down
2 changes: 1 addition & 1 deletion slicedimage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from ._collection import Collection
from ._tile import Tile
from ._tileset import TileSet
from .io import Reader, Writer, v0_0_0, v0_1_0, VERSIONS
from .io import Reader, v0_0_0, v0_1_0, Writer, WriterContract, VERSIONS
2 changes: 1 addition & 1 deletion slicedimage/backends/_disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, basedir):
def read_contextmanager(self, name, checksum_sha256=None):
return _FileLikeContextManager(os.path.join(self._basedir, name), checksum_sha256)

def write_file_handle(self, name=None):
def write_file_handle(self, name):
return open(os.path.join(self._basedir, name), "wb")


Expand Down
4 changes: 3 additions & 1 deletion slicedimage/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from ._base import Reader, VERSIONS, Writer
from slicedimage.url.resolve import resolve_url, resolve_path_or_url

from ._base import Reader, VERSIONS, Writer, WriterContract
from ._v0_0_0 import v0_0_0
from ._v0_1_0 import v0_1_0
285 changes: 256 additions & 29 deletions slicedimage/io/_base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,31 @@
import codecs
import json
import pathlib
import tempfile
import hashlib
import urllib.parse
import warnings
from abc import abstractmethod
from typing import MutableSequence, Sequence
from io import BytesIO
from pathlib import Path, PurePath, PurePosixPath
from typing import (
BinaryIO,
Callable,
cast,
Mapping,
MutableSequence,
Optional,
Sequence,
TextIO,
Union,
)

from packaging import version

from slicedimage.url.resolve import resolve_url
from slicedimage._collection import Collection
from slicedimage._compat import fspath
from slicedimage._formats import ImageFormat
from slicedimage._tile import Tile
from slicedimage._tileset import TileSet
from ._keys import CommonPartitionKeys


Expand Down Expand Up @@ -55,44 +72,254 @@ def parse(self, json_doc, baseurl, backend_config):

class Writer:
@staticmethod
def write_to_path(partition, path, pretty=False, version_class=None, *args, **kwargs):
def write_to_path(
partition: Union[Collection, TileSet],
path: Path,
pretty: bool = False,
version_class=None,
*args, **kwargs):
if isinstance(path, str):
warnings.warn("Paths should be passed in as pathlib.Path objects", DeprecationWarning)
path = pathlib.Path(path)
path = Path(path)
if version_class is None:
version_class = VERSIONS[-1]
document = version_class.Writer().generate_partition_document(
partition, path, pretty, *args, **kwargs)
indent = 4 if pretty else None
with open(str(path), "w") as fh:
json.dump(document, fh, indent=indent, sort_keys=pretty)

@staticmethod
def default_partition_path_generator(parent_partition_path, partition_name):
parent_partition_stem = parent_partition_path.stem
partition_file = tempfile.NamedTemporaryFile(
suffix=".json",
prefix="{}-".format(parent_partition_stem),
dir=str(parent_partition_path.parent),
delete=False,
)
return pathlib.Path(partition_file.name)
partition_path_generator = None # type: Optional[Callable[[PurePath, str], Path]]
if len(args) > 0:
partition_path_generator = args[0]
args = args[1:]
elif 'partition_path_generator' in kwargs:
partition_path_generator = kwargs.pop('partition_path_generator')
if partition_path_generator is not None:
warnings.warn(
"`partition_path_generator` is deprecated. Please use `WriterContract` to control "
"the behavior of image writing",
DeprecationWarning
)

tile_opener = None # type: Optional[Callable[[PurePath, Tile, str], BinaryIO]]
if len(args) > 0:
tile_opener = args[0]
args = args[1:]
elif 'tile_opener' in kwargs:
tile_opener = kwargs.pop('tile_opener')
if tile_opener is not None:
warnings.warn(
"`tile_opener` is deprecated. Please use `WriterContract` to control the behavior "
"of image writing",
DeprecationWarning
)

writer_contract = None # type: Optional[WriterContract]
if len(args) > 0:
writer_contract = args[0]
args = args[1:]
elif 'writer_contract' in kwargs:
writer_contract = kwargs.pop('writer_contract')

if partition_path_generator is not None or tile_opener is not None:
if writer_contract is not None:
raise ValueError(
"Cannot specify both `writer_contract` and `partition_path_generator` or "
"`tile_opener`")
kwargs['writer_contract'] = CompatibilityWriterContract(
partition_path_generator, tile_opener)
elif writer_contract is not None:
kwargs['writer_contract'] = writer_contract
else:
kwargs['writer_contract'] = WriterContract()

url = urllib.parse.urlunparse(("file", "", fspath(path), "", "", ""))

return Writer.write_to_url(partition, url, pretty, version_class, *args, **kwargs)

@staticmethod
def default_tile_opener(tileset_path, tile, ext):
tileset_stem = tileset_path.stem
return tempfile.NamedTemporaryFile(
suffix=".{}".format(ext),
prefix="{}-".format(tileset_stem),
dir=str(tileset_path.parent),
delete=False,
)
def write_to_url(
partition: Union[Collection, TileSet],
url: str,
pretty: bool = False,
version_class=None,
*args, **kwargs):
if version_class is None:
version_class = VERSIONS[-1]

document = version_class.Writer().generate_partition_document(
partition, url, pretty, *args, **kwargs)
indent = 4 if pretty else None

backend, name, _ = resolve_url(url)
with backend.write_file_handle(name) as fh:
writer = cast(TextIO, codecs.getwriter("utf-8")(fh))
json.dump(document, writer, indent=indent, sort_keys=pretty, ensure_ascii=False)

@abstractmethod
def generate_partition_document(self, partition, path, pretty=False, *args, **kwargs):
def generate_partition_document(
self,
partition: Union[Collection, TileSet],
url: str,
pretty: bool = False,
*args, **kwargs):
raise NotImplementedError()


class WriterContract(object):
def partition_url_generator(self, parent_partition_url: str, partition_name: str) -> str:
"""Given the url of the parent partition and the name of a partition to be added to the
parent partition, return the url of the resulting of the resulting partition.
Parameters
----------
parent_partition_url : str
The URL of the parent partition.
partition_name : str
The name of the partition we're adding to the parent partition.
Returns
-------
str :
The URL of the partition being added.
"""
parent_parsed_url = urllib.parse.urlparse(parent_partition_url)
parent_path = PurePosixPath(parent_parsed_url.path)
parent_stem = parent_path.stem
partition_path = parent_path.parent / "{}-{}.json".format(parent_stem, partition_name)
partition_parsed_url = parent_parsed_url._replace(path=str(partition_path))
return urllib.parse.urlunparse(partition_parsed_url)

def tile_url_generator(self, tileset_url: str, tile: Tile, ext: str) -> str:
"""Given the url of a tileset and a tile to be added to the tileset, return the url where
the tile data is written to.
Parameters
----------
tileset_url : str
The URL of the tileset
tile : Tile
The tile to be added to the tileset.
ext : str
The extension to be used for writing the tile data.
Returns
-------
str :
The URL of the tile being added.
"""
tileset_parsed_url = urllib.parse.urlparse(tileset_url)
tileset_path = PurePosixPath(tileset_parsed_url.path)
tileset_stem = tileset_path.stem
indices_sorted_str = "-".join([
"{}{}".format(index_name, tile.indices[index_name])
for index_name in sorted(tile.indices.keys())
])
tile_path = tileset_path.parent / "{}-{}.{}".format(tileset_stem, indices_sorted_str, ext)
tile_parsed_url = tileset_parsed_url._replace(path=str(tile_path))
return urllib.parse.urlunparse(tile_parsed_url)

def write_tile(
self,
tile_url: str,
tile: Tile,
tile_format: ImageFormat,
backend_config: Optional[Mapping] = None,
) -> str:
"""Write the data for a tile to a given URL.
Parameters
----------
tile_url : str
The URL of the tile.
tile : Tile
The tile to be written.
tile_format : ImageFormat
The format to write the tile in.
backend_config : Optional[Mapping]
Mapping from the backend names to the config
Returns
-------
str :
The sha256 of the tile being added.
"""
backend, name, _ = resolve_url(tile_url, backend_config=backend_config)
buffer_fh = BytesIO()
tile.write(buffer_fh, tile_format)

buffer_fh.seek(0)
sha256 = hashlib.sha256(buffer_fh.getvalue()).hexdigest()

buffer_fh.seek(0)
with backend.write_file_handle(name) as fh:
fh.write(buffer_fh.read())

return sha256


class CompatibilityWriterContract(WriterContract):
"""This provides a WriterContract to support the previous API of partition_path_generator and
tile_opener. This compatibility layer only works with URLs with the scheme ``file``."""
def __init__(
self,
partition_path_generator: Optional[Callable[[PurePath, str], Path]] = None,
tile_opener: Optional[Callable[[PurePath, Tile, str], BinaryIO]] = None,
):
self.partition_path_generator = partition_path_generator
self.tile_opener = tile_opener

def partition_url_generator(self, parent_partition_url: str, partition_name: str) -> str:
"""Given the url of the parent partition and the name of a partition to be added to the
parent partition, return the url of the resulting of the resulting partition.
Parameters
----------
parent_partition_url : str
The URL of the parent partition.
partition_name : str
The name of the partition we're adding to the parent partition.
Returns
-------
str :
The URL of the partition being added.
"""
if self.partition_path_generator is None:
return super().partition_url_generator(parent_partition_url, partition_name)
parent_parsed_url = urllib.parse.urlparse(parent_partition_url)
assert parent_parsed_url.scheme == "file"
parent_path = PurePosixPath(parent_parsed_url.path)
partition_path = self.partition_path_generator(parent_path, partition_name)
partition_parsed_url = parent_parsed_url._replace(path=str(partition_path))
return urllib.parse.urlunparse(partition_parsed_url)

def tile_url_generator(self, tileset_url: str, tile: Tile, ext: str) -> str:
"""Given the url of a tileset and a tile to be added to the tileset, return the url where
the tile data is written to.
Parameters
----------
tileset_url : str
The URL of the tileset
tile : Tile
The tile to be added to the tileset.
ext : str
The extension to be used for writing the tile data.
Returns
-------
str :
The URL of the tile being added.
"""
if self.tile_opener is None:
return super().tile_url_generator(tileset_url, tile, ext)
tileset_parsed_url = urllib.parse.urlparse(tileset_url)
assert tileset_parsed_url.scheme == "file"
tileset_path = PurePosixPath(tileset_parsed_url.path)
with self.tile_opener(tileset_path, tile, ext) as open_fh:
tile_path = open_fh.name

tile_parsed_url = tileset_parsed_url._replace(path=str(tile_path))
return urllib.parse.urlunparse(tile_parsed_url)


def _parse_collection(parse_method, baseurl, backend_config):
"""Return a method that binds a parse method, a baseurl, and a backend config to a method that
accepts name and path of a partition belonging to a collection. The method should then return
Expand Down
Loading

0 comments on commit dfbd13f

Please sign in to comment.