diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..8332de34 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 120 +extend-ignore = E203 diff --git a/dlup/__init__.py b/dlup/__init__.py index f96d5aff..04cb9805 100644 --- a/dlup/__init__.py +++ b/dlup/__init__.py @@ -1,8 +1,6 @@ -# coding=utf-8 -# Copyright (c) dlup contributors -"""Top-level package for dlup.""" +# Copyright (c) dlup contributors"""Top-level package for dlup.""" -from ._exceptions import DlupError, UnsupportedSlideError +from ._exceptions import UnsupportedSlideError from ._image import SlideImage from ._region import BoundaryMode, RegionView diff --git a/dlup/_image.py b/dlup/_image.py index 7abefa6d..a375e209 100644 --- a/dlup/_image.py +++ b/dlup/_image.py @@ -16,13 +16,13 @@ from typing import Callable, Type, TypeVar, cast import numpy as np # type: ignore -import openslide # type: ignore import PIL import PIL.Image # type: ignore from dlup import UnsupportedSlideError from dlup._region import BoundaryMode, RegionView -from dlup.experimental_backends import AbstractSlideBackend, ImageBackend +from dlup.backends.common import AbstractSlideBackend +from dlup.experimental_backends import ImageBackend from dlup.types import GenericFloatArray, GenericIntArray, GenericNumber, PathLike from dlup.utils.image import check_if_mpp_is_valid @@ -116,7 +116,8 @@ def __init__(self, wsi: AbstractSlideBackend, identifier: str | None = None, **k if self._wsi.spacing is None: raise UnsupportedSlideError( - f"The spacing of {identifier} cannot be derived from image and is not explicitly set in the `overwrite_mpp` parameter." + f"The spacing of {identifier} cannot be derived from image and is " + "not explicitly set in the `overwrite_mpp` parameter." ) check_if_mpp_is_valid(*self._wsi.spacing) @@ -207,8 +208,8 @@ def read_region( Examples -------- The locations are defined at the requested scaling (with respect to level 0), so if we want to extract at - location ``(location_x, location_y)`` of a scaling 0.5 (with respect to level 0), and have resulting tile size of - ``(tile_size, tile_size)`` with a scaling factor of 0.5, we can use: + location ``(location_x, location_y)`` of a scaling 0.5 (with respect to level 0), and have + resulting tile size of ``(tile_size, tile_size)`` with a scaling factor of 0.5, we can use: >>> wsi.read_region(location=(coordinate_x, coordinate_y), scaling=0.5, size=(tile_size, tile_size)) """ owsi = self._wsi diff --git a/dlup/_region.py b/dlup/_region.py index 18ecd819..6e7e0607 100644 --- a/dlup/_region.py +++ b/dlup/_region.py @@ -1,13 +1,12 @@ -# coding=utf-8 +# Copyright (c) dlup contributors """Defines the RegionView interface.""" from __future__ import annotations from abc import ABC, abstractmethod from enum import Enum -from typing import Iterable, Union, cast +from typing import cast import numpy as np -import numpy.typing as npt import PIL.Image from dlup.types import GenericFloatArray, GenericIntArray diff --git a/dlup/annotations.py b/dlup/annotations.py index 04a6e4b1..1a3a56cb 100644 --- a/dlup/annotations.py +++ b/dlup/annotations.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """ Annotation module for dlup. @@ -256,13 +255,13 @@ def annotation_class(self, a_cls: AnnotationClass): self._type = a_cls.a_cls # TODO: We also need to rewrite all the polygons. This cannot yet be set in-place _annotations = [] - for geometry in self._annotations: - if isinstance(geometry, shapely.geometry.Polygon): - _annotations.append(Polygon(geometry, a_cls=a_cls)) - elif isinstance(geometry, shapely.geometry.Point): - _annotations.append(Point(geometry, a_cls=a_cls)) + for _geometry in self._annotations: + if isinstance(_geometry, shapely.geometry.Polygon): + _annotations.append(Polygon(_geometry, a_cls=a_cls)) + elif isinstance(_geometry, shapely.geometry.Point): + _annotations.append(Point(_geometry, a_cls=a_cls)) else: - raise AnnotationError(f"Unknown annotation type {type(geometry)}.") + raise AnnotationError(f"Unknown annotation type {type(_geometry)}.") self._annotations = _annotations @@ -539,7 +538,7 @@ def from_asap_xml( if isinstance(coordinates, shapely.geometry.collection.GeometryCollection): split_up = [_ for _ in coordinates.geoms if _.area > 0] if len(split_up) != 1: - raise RuntimeError(f"Got unexpected object.") + raise RuntimeError("Got unexpected object.") coordinates = split_up[0] if coordinates.area == 0: @@ -640,17 +639,17 @@ def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) - _cls = AnnotationClass(label=name, a_cls=annotation_type) if annotation_type == AnnotationType.POINT: curr_point = Point((curr_data["x"], curr_data["y"]), a_cls=_cls) - curr_point = rescale_geometry(curr_point, scaling=scaling) + curr_point = rescale_geometry(curr_point, scaling=_scaling) annotations[key].append(curr_point) elif annotation_type == AnnotationType.POLYGON: if "path" in curr_data: # This is a regular polygon curr_polygon = Polygon([(_["x"], _["y"]) for _ in curr_data["path"]], a_cls=_cls) - curr_polygon = rescale_geometry(curr_polygon, scaling=scaling) + curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling) annotations[key].append(Polygon(curr_polygon, a_cls=_cls)) elif "paths" in curr_data: # This is a complex polygon which needs to be parsed with the even-odd rule curr_complex_polygon = _parse_darwin_complex_polygon(curr_data) for curr_polygon in curr_complex_polygon.geoms: - curr_polygon = rescale_geometry(curr_polygon, scaling=scaling) + curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling) annotations[key].append(Polygon(curr_polygon, a_cls=_cls)) else: raise ValueError(f"Got unexpected data keys: {curr_data.keys()}") @@ -658,7 +657,7 @@ def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) - elif annotation_type == AnnotationType.BOX: x, y, h, w = curr_data.values() curr_polygon = shapely.geometry.box(x, y, x + w, y + h) - curr_polygon = rescale_geometry(curr_polygon, scaling=scaling) + curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling) annotations[key].append(Polygon(curr_polygon, a_cls=_cls)) else: ValueError(f"Annotation type {annotation_type} is not supported.") @@ -884,8 +883,8 @@ def __contains__(self, item: Union[str, AnnotationClass]) -> bool: def __add__(self, other: WsiAnnotations) -> WsiAnnotations: if set(self.available_labels).intersection(other.available_labels) != set(): raise AnnotationError( - f"Can only add annotations with different labels. " - f"Use `.relabel` or relabel during construction of the object." + "Can only add annotations with different labels. " + "Use `.relabel` or relabel during construction of the object." ) curr_annotations = list(self._annotations.values()) diff --git a/dlup/backends/__init__.py b/dlup/backends/__init__.py new file mode 100644 index 00000000..8d73ff9f --- /dev/null +++ b/dlup/backends/__init__.py @@ -0,0 +1 @@ +# Copyright (c) dlup contributors diff --git a/dlup/experimental_backends/common.py b/dlup/backends/common.py similarity index 98% rename from dlup/experimental_backends/common.py rename to dlup/backends/common.py index 7b7afdcb..404f0ec0 100644 --- a/dlup/experimental_backends/common.py +++ b/dlup/backends/common.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors from __future__ import annotations @@ -33,7 +32,7 @@ def numpy_to_pil(tile: np.ndarray) -> PIL.Image.Image: elif bands == 4: mode = "RGBA" else: - raise RuntimeError(f"Incorrect number of channels.") + raise RuntimeError("Incorrect number of channels.") return PIL.Image.fromarray(tile, mode=mode) @@ -160,6 +159,7 @@ def get_thumbnail(self, size: int | tuple[int, int]) -> PIL.Image.Image: downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, size))) level = self.get_best_level_for_downsample(downsample) + thumbnail = ( self.read_region((0, 0), level, self.level_dimensions[level]) .convert("RGB") diff --git a/dlup/background.py b/dlup/background.py index ccfea5e7..9d08e001 100644 --- a/dlup/background.py +++ b/dlup/background.py @@ -325,7 +325,9 @@ def _is_foreground_numpy( max_boundary = np.tile(mask_size, 2) min_boundary = np.zeros_like(max_boundary) - box = np.clip((*scaled_coordinates, *(scaled_coordinates + scaled_sizes)), min_boundary, max_boundary) # type: ignore + box = np.clip( + (*scaled_coordinates, *(scaled_coordinates + scaled_sizes)), min_boundary, max_boundary + ) # type: ignore clipped_w, clipped_h = (box[2:] - box[:2]).astype(int) if clipped_h == 0 or clipped_w == 0: diff --git a/dlup/cli/__init__.py b/dlup/cli/__init__.py index ee7e3638..6f62dd4b 100644 --- a/dlup/cli/__init__.py +++ b/dlup/cli/__init__.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """DLUP Command-line interface. This is the file which builds the main parser.""" import argparse diff --git a/dlup/cli/mask.py b/dlup/cli/mask.py index 8bd651f5..c77f8a26 100644 --- a/dlup/cli/mask.py +++ b/dlup/cli/mask.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """CLI utilities to handle masks""" import argparse @@ -57,13 +56,13 @@ def mask_to_polygon(args: argparse.Namespace): for pair in args.labels.split(","): name, index = pair.split("=") if not index.isnumeric(): - raise argparse.ArgumentTypeError(f"Expected a key-pair of the form 1=tumor,2=stroma") + raise argparse.ArgumentTypeError("Expected a key-pair of the form 1=tumor,2=stroma") index = float(index) if not index.is_integer(): - raise argparse.ArgumentTypeError(f"Expected a key-pair of the form 1=tumor,2=stroma") + raise argparse.ArgumentTypeError("Expected a key-pair of the form 1=tumor,2=stroma") index = int(index) if index == 0: - raise argparse.ArgumentTypeError(f"0 is not a proper index. Needs to be at least 1.") + raise argparse.ArgumentTypeError("0 is not a proper index. Needs to be at least 1.") index_map[index] = name.strip() polygons = dataset_to_polygon(dataset, index_map=index_map, num_workers=args.num_workers, scaling=scaling) @@ -94,7 +93,7 @@ def mask_to_polygon(args: argparse.Namespace): json.dump(slide_annotations.as_geojson(split_per_label=False), f, indent=2) else: jsons = slide_annotations.as_geojson(split_per_label=True) - if not type(jsons) == list[tuple[str, GeoJsonDict]]: + if not type(jsons) == list[tuple[str, GeoJsonDict]]: # noqa raise ValueError("Expected a list of tuples") for label, json_dict in jsons: suffix = output_filename.suffix diff --git a/dlup/data/dataset.py b/dlup/data/dataset.py index 98c09083..5baec1a8 100644 --- a/dlup/data/dataset.py +++ b/dlup/data/dataset.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """Datasets helpers to simplify the generation of a dataset made of tiles from a WSI. @@ -11,12 +10,11 @@ import functools import itertools import pathlib -from typing import Any, Callable, Generic, Iterable, TypedDict, TypeVar, Union, cast +from typing import Callable, Generic, Iterable, TypedDict, TypeVar, Union, cast import numpy as np import PIL from numpy.typing import NDArray -from PIL import Image from dlup import BoundaryMode, SlideImage from dlup.annotations import WsiAnnotations @@ -437,10 +435,11 @@ def from_standard_tiling( if limit_bounds: if rois is not None: - raise ValueError(f"Cannot use both `rois` and `limit_bounds` at the same time.") + raise ValueError("Cannot use both `rois` and `limit_bounds` at the same time.") if backend == ImageBackend.AUTODETECT or backend == "AUTODETECT": raise ValueError( - f"Cannot use AutoDetect as backend and use limit_bounds at the same time. This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151" + "Cannot use AutoDetect as backend and use limit_bounds at the same time. " + "This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151" ) offset, bounds = slide_image.slide_bounds diff --git a/dlup/data/experimental/dataset.py b/dlup/data/experimental/dataset.py index 77f5cf7c..a97c6cf2 100644 --- a/dlup/data/experimental/dataset.py +++ b/dlup/data/experimental/dataset.py @@ -62,7 +62,7 @@ def __init__( self._grids = grids self._num_scales = num_scales if len(list(grids)) % num_scales != 0: - raise ValueError(f"In a multiscale dataset the grids needs to be divisible by the number of scales.") + raise ValueError("In a multiscale dataset the grids needs to be divisible by the number of scales.") self._step_size = len(list(grids)[0][0]) self._index_ranges = [ @@ -101,7 +101,7 @@ def multiscale_from_tiling( backend: Callable = ImageBackend.PYVIPS, ): if mpps != sorted(mpps): - raise ValueError(f"The mpp values should be in increasing order.") + raise ValueError("The mpp values should be in increasing order.") with SlideImage.from_file_path(path, backend=backend) as slide_image: original_mpp = slide_image.mpp diff --git a/dlup/data/transforms.py b/dlup/data/transforms.py index 6cff6758..7064a186 100644 --- a/dlup/data/transforms.py +++ b/dlup/data/transforms.py @@ -65,7 +65,7 @@ def convert_annotations( boxes: dict[str, list[tuple[tuple[int, int], tuple[int, int]]]] = defaultdict(list) roi_mask = np.zeros(region_size, dtype=np.int32) - + has_roi = False for curr_annotation in annotations: holes_mask = None if isinstance(curr_annotation, dlup.annotations.Point): @@ -82,6 +82,7 @@ def convert_annotations( [np.asarray(curr_annotation.exterior.coords).round().astype(np.int32)], 1, ) + has_roi = True continue if not (curr_annotation.label in index_map): @@ -104,6 +105,10 @@ def convert_annotations( # TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it. mask = np.where(holes_mask == 1, original_values, mask) # type: ignore + # This is a hard to find bug, so better give an explicit error. + if not has_roi and roi_name is not None: + raise AnnotationError(f"ROI mask {roi_name} not found, please add a ROI mask to the annotations.") + return dict(points), dict(boxes), mask, roi_mask if roi_name else None diff --git a/dlup/experimental_backends/__init__.py b/dlup/experimental_backends/__init__.py index 8282643a..42eb2e92 100644 --- a/dlup/experimental_backends/__init__.py +++ b/dlup/experimental_backends/__init__.py @@ -14,7 +14,7 @@ from dlup import UnsupportedSlideError -from .common import AbstractSlideBackend +from ..backends.common import AbstractSlideBackend from .openslide_backend import OpenSlideSlide from .pyvips_backend import PyVipsSlide from .tifffile_backend import TifffileSlide diff --git a/dlup/experimental_backends/openslide_backend.py b/dlup/experimental_backends/openslide_backend.py index e8d9cea6..eb33498a 100644 --- a/dlup/experimental_backends/openslide_backend.py +++ b/dlup/experimental_backends/openslide_backend.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors from __future__ import annotations @@ -8,7 +7,7 @@ import openslide import PIL.Image -from dlup.experimental_backends.common import AbstractSlideBackend +from dlup.backends.common import AbstractSlideBackend from dlup.types import PathLike from dlup.utils.image import check_if_mpp_is_valid @@ -56,7 +55,7 @@ def spacing(self) -> tuple[float, float] | None: @spacing.setter def spacing(self, value: tuple[float, float]) -> None: if not isinstance(value, tuple) and len(value) != 2: - raise ValueError(f"`.spacing` has to be of the form (mpp_x, mpp_y).") + raise ValueError("`.spacing` has to be of the form (mpp_x, mpp_y).") mpp_x, mpp_y = value check_if_mpp_is_valid(mpp_x, mpp_y) diff --git a/dlup/experimental_backends/pyvips_backend.py b/dlup/experimental_backends/pyvips_backend.py index c3c18ca4..165d20a0 100644 --- a/dlup/experimental_backends/pyvips_backend.py +++ b/dlup/experimental_backends/pyvips_backend.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors from __future__ import annotations @@ -11,7 +10,7 @@ import pyvips from dlup import UnsupportedSlideError -from dlup.experimental_backends.common import AbstractSlideBackend, numpy_to_pil +from dlup.backends.common import AbstractSlideBackend, numpy_to_pil from dlup.types import PathLike from dlup.utils.image import check_if_mpp_is_valid @@ -157,7 +156,7 @@ def spacing(self) -> tuple[float, float] | None: @spacing.setter def spacing(self, value: tuple[float, float]) -> None: if not isinstance(value, tuple) and len(value) != 2: - raise ValueError(f"`.spacing` has to be of the form (mpp_x, mpp_y).") + raise ValueError("`.spacing` has to be of the form (mpp_x, mpp_y).") mpp_x, mpp_y = value check_if_mpp_is_valid(mpp_x, mpp_y) @@ -191,7 +190,8 @@ def associated_images(self): """Images associated with this whole-slide image.""" if not self._loader == "openslideload": return {} - associated_images = (_.strip() for _ in self.properties["slide-associated-images"].split(",")) + # TODO: Fix this + # associated_images = (_.strip() for _ in self.properties["slide-associated-images"].split(",")) raise NotImplementedError def set_cache(self, cache): diff --git a/dlup/experimental_backends/tifffile_backend.py b/dlup/experimental_backends/tifffile_backend.py index 9f6cbead..30bceec6 100644 --- a/dlup/experimental_backends/tifffile_backend.py +++ b/dlup/experimental_backends/tifffile_backend.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors from typing import Any @@ -6,7 +5,7 @@ import PIL.Image import tifffile -from dlup.experimental_backends.common import AbstractSlideBackend, numpy_to_pil +from dlup.backends.common import AbstractSlideBackend, numpy_to_pil from dlup.types import PathLike from dlup.utils.tifffile_utils import get_tile diff --git a/dlup/tools.py b/dlup/tools.py index a567179c..9f6fa70c 100644 --- a/dlup/tools.py +++ b/dlup/tools.py @@ -5,7 +5,6 @@ import bisect import collections -import functools import itertools diff --git a/dlup/utils/__init__.py b/dlup/utils/__init__.py index 5abbd0ee..d784f0fb 100644 --- a/dlup/utils/__init__.py +++ b/dlup/utils/__init__.py @@ -1,19 +1,18 @@ -# coding=utf-8 # Copyright (c) dlup contributors import json import warnings import numpy as np -from dlup.utils.imports import _PYTORCH_AVAILABLE +from dlup.utils.imports import PYTORCH_AVAILABLE -if _PYTORCH_AVAILABLE: +if PYTORCH_AVAILABLE: import torch # type: ignore # pylint: disable=import-error class ArrayEncoder(json.JSONEncoder): def default(self, obj): - if _PYTORCH_AVAILABLE and isinstance(obj, torch.Tensor): # type: ignore + if PYTORCH_AVAILABLE and isinstance(obj, torch.Tensor): # type: ignore obj = obj.numpy() if isinstance(obj, np.ndarray): diff --git a/dlup/utils/image.py b/dlup/utils/image.py index d6c46afa..7492d230 100644 --- a/dlup/utils/image.py +++ b/dlup/utils/image.py @@ -19,7 +19,7 @@ def check_if_mpp_is_valid(mpp_x: float, mpp_y: float, *, rel_tol: float = 0.015) None """ if mpp_x == 0 or mpp_y == 0: - raise UnsupportedSlideError(f"Unable to parse mpp.") + raise UnsupportedSlideError("Unable to parse mpp.") if not mpp_x or not mpp_y or not math.isclose(mpp_x, mpp_y, rel_tol=rel_tol): raise UnsupportedSlideError(f"cannot deal with slides having anisotropic mpps. Got {mpp_x} and {mpp_y}.") diff --git a/dlup/utils/imports.py b/dlup/utils/imports.py index 66342e0d..2e3cf852 100644 --- a/dlup/utils/imports.py +++ b/dlup/utils/imports.py @@ -5,7 +5,7 @@ def _module_available(module_path: str) -> bool: - """ + r""" Check if a path is available in your environment >>> _module_available('os') True @@ -14,13 +14,13 @@ def _module_available(module_path: str) -> bool: Adapted from: https://github.com/PyTorchLightning/pytorch-lightning/blob/ef7d41692ca04bb9877da5c743f80fceecc6a100/pytorch_lightning/utilities/imports.py#L27 Under Apache 2.0 license. - """ + """ # noqa: E501 try: return find_spec(module_path) is not None except ModuleNotFoundError: return False -_PYTORCH_AVAILABLE = _module_available("pytorch") +PYTORCH_AVAILABLE = _module_available("pytorch") PYHALOXML_AVAILABLE = _module_available("pyhaloxml") DARWIN_SDK_AVAILABLE = _module_available("darwin") diff --git a/dlup/utils/tifffile_utils.py b/dlup/utils/tifffile_utils.py index 58cbd712..85823aa9 100644 --- a/dlup/utils/tifffile_utils.py +++ b/dlup/utils/tifffile_utils.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors from typing import Any @@ -6,33 +5,7 @@ import tifffile -def get_tile(page: tifffile.TiffPage, coordinates: tuple[Any, ...], size: tuple[Any, ...]) -> np.ndarray: - """Extract a crop from a TIFF image file directory (IFD). - - Only the tiles englobing the crop area are loaded and not the whole page. - This is useful for large Whole slide images that can't fit into RAM. - - Code obtained from [1]. - - Parameters - ---------- - page : TiffPage - TIFF image file directory (IFD) from which the crop must be extracted. - coordinates: (int, int) - Coordinates of the top left and right corner corner of the desired crop. - size: (int, int) - Desired crop height and width. - - References - ---------- - .. [1] https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743 - - Returns - ------- - out : ndarray of shape (imagedepth, h, w, sampleperpixel) - Extracted crop. - - """ +def _validate_inputs(page, coordinates, size): x0, y0 = coordinates w, h = size @@ -48,50 +21,116 @@ def get_tile(page: tifffile.TiffPage, coordinates: tuple[Any, ...], size: tuple[ if y0 < 0 or x0 < 0 or y0 + h > image_height or x0 + w > image_width: raise ValueError("Requested crop area is out of image bounds.") + +def _compute_tile_indices(page, coordinates, size): + x0, y0 = coordinates + w, h = size tile_width, tile_height = page.tilewidth, page.tilelength y1, x1 = y0 + h, x0 + w tile_y0, tile_x0 = y0 // tile_height, x0 // tile_width tile_y1, tile_x1 = np.ceil([y1 / tile_height, x1 / tile_width]).astype(int) - tile_per_line = int(np.ceil(image_width / tile_width)) + return tile_y0, tile_y1, tile_x0, tile_x1 + + +def retrieve_tile_data(page, tile_y0, tile_y1, tile_x0, tile_x1): + tile_per_line = int(np.ceil(page.imagewidth / page.tilewidth)) + fh = page.parent.filehandle + jpeg_tables = page.tags.get("JPEGTables", None) + if jpeg_tables is not None: + jpeg_tables = jpeg_tables.value + + tiles_data = [] + for idx_y in range(tile_y0, tile_y1): + for idx_x in range(tile_x0, tile_x1): + index = int(idx_y * tile_per_line + idx_x) + offset = page.dataoffsets[index] + bytecount = page.databytecounts[index] + + if not bytecount: + continue + + fh.seek(offset) + data = fh.read(bytecount) + tile, _, _ = page.decode(data, index, jpegtables=jpeg_tables) + tiles_data.append(((idx_y, idx_x), tile)) + return tiles_data + + +def _get_tile_from_data(page, tiles_data, coordinates, size): + x0, y0 = coordinates + w, h = size + tile_width, tile_height = page.tilewidth, page.tilelength + tile_y0, tile_x0 = y0 // tile_height, x0 // tile_width out = np.zeros( - ( - page.imagedepth, - (tile_y1 - tile_y0) * tile_height, - (tile_x1 - tile_x0) * tile_width, - page.samplesperpixel, - ), + (page.imagedepth, h, w, page.samplesperpixel), dtype=page.dtype, ) - fh = page.parent.filehandle + for (idx_y, idx_x), tile in tiles_data: + image_y = (idx_y - tile_y0) * tile_height + image_x = (idx_x - tile_x0) * tile_width + out[:, image_y : image_y + tile_height, image_x : image_x + tile_width, :] = tile + image_y0 = y0 - tile_y0 * tile_height + image_x0 = x0 - tile_x0 * tile_width + return out[:, image_y0 : image_y0 + h, image_x0 : image_x0 + w, :] + + +def _retrieve_tile_data(page, tile_y0, tile_y1, tile_x0, tile_x1): + tile_per_line = int(np.ceil(page.imagewidth / page.tilewidth)) + fh = page.parent.filehandle jpeg_tables = page.tags.get("JPEGTables", None) if jpeg_tables is not None: jpeg_tables = jpeg_tables.value + tiles_data = [] for idx_y in range(tile_y0, tile_y1): for idx_x in range(tile_x0, tile_x1): index = int(idx_y * tile_per_line + idx_x) - offset = page.dataoffsets[index] bytecount = page.databytecounts[index] - # Some files written by ASAP have an empty bytecount if it is empty. if not bytecount: continue fh.seek(offset) data = fh.read(bytecount) - tile, indices, shape = page.decode(data, index, jpegtables=jpeg_tables) + tile, _, _ = page.decode(data, index, jpegtables=jpeg_tables) + tiles_data.append(((idx_y, idx_x), tile)) + return tiles_data - image_y = (idx_y - tile_y0) * tile_height - image_x = (idx_x - tile_x0) * tile_width - out[:, image_y : image_y + tile_height, image_x : image_x + tile_width, :] = tile - image_y0 = y0 - tile_y0 * tile_height - image_x0 = x0 - tile_x0 * tile_width +def get_tile(page: tifffile.TiffPage, coordinates: tuple[Any, ...], size: tuple[Any, ...]) -> np.ndarray: + """Extract a crop from a TIFF image file directory (IFD). - return out[:, image_y0 : image_y0 + h, image_x0 : image_x0 + w, :] + Only the tiles englobing the crop area are loaded and not the whole page. + This is useful for large Whole slide images that can't fit into RAM. + + Code obtained from [1]. + + Parameters + ---------- + page : TiffPage + TIFF image file directory (IFD) from which the crop must be extracted. + coordinates: (int, int) + Coordinates of the top left and right corner corner of the desired crop. + size: (int, int) + Desired crop height and width. + + References + ---------- + .. [1] https://gist.github.com/rfezzani/b4b8852c5a48a901c1e94e09feb34743 + + Returns + ------- + out : ndarray of shape (imagedepth, h, w, sampleperpixel) + Extracted crop. + + """ + _validate_inputs(page, coordinates, size) + tile_y0, tile_y1, tile_x0, tile_x1 = _compute_tile_indices(page, coordinates, size) + tiles_data = _retrieve_tile_data(page, tile_y0, tile_y1, tile_x0, tile_x1) + return _get_tile_from_data(page, tiles_data, coordinates, size) diff --git a/dlup/writers.py b/dlup/writers.py index a5017b23..faaeb3ca 100644 --- a/dlup/writers.py +++ b/dlup/writers.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """ Classes to write image and mask files @@ -209,7 +208,12 @@ def from_tiles_iterator(self, iterator: Iterator[np.ndarray]) -> None: tiff_reader = tifffile.TiffReader(temp_filename) page = tiff_reader.pages[level] tile_iterator = _tile_iterator_from_page( - page, self._tile_size, shapes[level], scale=2, is_rgb=is_rgb, interpolator=self._interpolator # type: ignore + page, # type: ignore + self._tile_size, + shapes[level], + scale=2, + is_rgb=is_rgb, + interpolator=self._interpolator, ) self._write_page( tiff_writer, diff --git a/tests/backends/__init__.py b/tests/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/backends/test_common.py b/tests/backends/test_common.py new file mode 100644 index 00000000..eb6bfe93 --- /dev/null +++ b/tests/backends/test_common.py @@ -0,0 +1,115 @@ +# Copyright (c) dlup contributors + +import numpy as np +import pytest +from PIL import Image + +from dlup.backends.common import AbstractSlideBackend, numpy_to_pil + + +def test_numpy_to_pil_single_channel(): + arr = np.arange(100, dtype=np.uint8).reshape((10, 10, 1)) + pil_img = numpy_to_pil(arr) + assert pil_img.mode == "L" + + +def test_numpy_to_pil_rgb(): + arr = (np.arange(300) / 300 * 255).astype(np.uint8).reshape((10, 10, 3)) + pil_img = numpy_to_pil(arr) + assert pil_img.mode == "RGB" + + +def test_numpy_to_pil_rgba(): + arr = (np.arange(400) / 400 * 255).astype(np.uint8).reshape((10, 10, 4)) + pil_img = numpy_to_pil(arr) + assert pil_img.mode == "RGBA" + + +def test_numpy_to_pil_invalid_channels(): + arr = (np.arange(500) / 500 * 255).astype(np.uint8).reshape((10, 10, 5)) + with pytest.raises(RuntimeError): + numpy_to_pil(arr) + + +class TestAbstractBackend: + class DummySlideBackend(AbstractSlideBackend): + # Minimal implementation to avoid the ABC restriction. + def __init__(self, filename: str): + super().__init__(filename) + # Dummy data for testing + self._level_count = 3 + self._downsamples = [1.0, 2.0, 4.0] + self._spacings = [(0.5, 0.5), (1.0, 1.0), (2.0, 2.0)] + self._shapes = [(1000, 1000), (500, 500), (250, 250)] + + def read_region(self, coordinates, level, size) -> Image.Image: + return Image.new("RGB", size, color="white") + + @property + def properties(self): + return {} + + @property + def magnification(self): + return 10.0 + + @property + def vendor(self): + return "TestVendor" + + def close(self): + pass + + def test_dummy_slide_backend_properties(self): + slide = self.DummySlideBackend("test_filename.tiff") + + # Testing the level_count + assert slide.level_count == 3 + + # Testing the dimensions + assert slide.dimensions == (1000, 1000) + + # Testing the spacing + assert slide.spacing == (0.5, 0.5) + + # Testing the level_dimensions + assert slide.level_dimensions == [(1000, 1000), (500, 500), (250, 250)] + + # Testing the level_spacings + assert slide.level_spacings == ((0.5, 0.5), (1.0, 1.0), (2.0, 2.0)) + + # Testing the level_downsamples + assert slide.level_downsamples == (1.0, 2.0, 4.0) + + # Testing slide bounds + assert slide.slide_bounds == ((0, 0), (1000, 1000)) + + # Testing get_best_level_for_downsample + assert slide.get_best_level_for_downsample(0.5) == 0 + assert slide.get_best_level_for_downsample(1.0) == 0 + assert slide.get_best_level_for_downsample(2.0) == 1 + assert slide.get_best_level_for_downsample(3.0) == 1 + assert slide.get_best_level_for_downsample(4.5) == 2 + + def test_repr(self): + slide = self.DummySlideBackend("test_filename.tiff") + assert repr(slide) == "" + + def test_spacing_without_set(self): + slide = self.DummySlideBackend("test_filename.tiff") + slide._spacings = None + assert slide.spacing is None + + def test_get_thumbnail(self): + slide = self.DummySlideBackend("test_filename.tiff") + + # Getting a 200x200 thumbnail + thumbnail = slide.get_thumbnail(200) + assert isinstance(thumbnail, Image.Image) + assert thumbnail.size == (200, 200) + + # Getting a 300x150 thumbnail + thumbnail = slide.get_thumbnail((300, 150)) + assert isinstance(thumbnail, Image.Image) + # The aspect ratio should be preserved, so width might be less than 300 + assert thumbnail.size[1] == 150 diff --git a/tests/backends/test_tifffile_backend.py b/tests/backends/test_tifffile_backend.py new file mode 100644 index 00000000..58d2479e --- /dev/null +++ b/tests/backends/test_tifffile_backend.py @@ -0,0 +1,62 @@ +# Copyright (c) dlup contributors +from unittest.mock import Mock, patch + +import pytest + +from dlup.experimental_backends.tifffile_backend import TifffileSlide + + +@pytest.fixture +def mock_tifffile_slide(): + with patch("tifffile.TiffFile") as MockTiffFile: + # List to hold mock pages + mock_pages = [] + + # Starting values + size = 4096 + res_value = (1, 1) # Using a tuple since the code accesses the numerator and denominator + + # Create 3 mock pages (or however many you need) + for _ in range(3): + # Create mock tags for the current page + x_res_mock = Mock(value=res_value) + y_res_mock = Mock(value=res_value) + unit_mock = Mock(value=3) + + mock_page = Mock() + mock_page.shape = [3, size, size] + mock_page.tags = {"XResolution": x_res_mock, "YResolution": y_res_mock, "ResolutionUnit": unit_mock} + + mock_pages.append(mock_page) + + # Halve the values for the next iteration + size //= 2 + x_res_value = (res_value[0], res_value[1] * 2) # To halve the resolution + + instance = MockTiffFile.return_value + instance.pages = mock_pages + yield TifffileSlide("path_to_image.tif") + + +class TestTifffileSlide: + def test_initialization(self, mock_tifffile_slide): + slide = mock_tifffile_slide + assert slide._level_count == 3 # Checking the initialized _level_count + + def test_properties(self, mock_tifffile_slide): + slide = mock_tifffile_slide + + assert slide.vendor is None + assert slide.magnification is None + + # Check the properties + + def test_read_region_invalid_level(self, mock_tifffile_slide): + slide = mock_tifffile_slide + with pytest.raises(RuntimeError, match="Level 4 not present."): + slide.read_region((0, 0), 4, (100, 100)) + + def test_close(self, mock_tifffile_slide): + slide = mock_tifffile_slide + slide.close() + slide._image.close.assert_called_once() diff --git a/tests/common.py b/tests/common.py index 5041e38d..79df195a 100644 --- a/tests/common.py +++ b/tests/common.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors """Utilities to simplify the mocking of SlideImages.""" diff --git a/tests/conftest.py b/tests/conftest.py index e7d72064..0f812dff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,11 @@ -# coding=utf-8 # Copyright (c) dlup contributors """Fixtures, hooks and plugins.""" +import pytest from dlup import SlideImage -from .common import * +from .common import OpenSlideImageMock, SlideConfig @pytest.fixture diff --git a/tests/new_test.py b/tests/new_test.py deleted file mode 100644 index a4c0e5e2..00000000 --- a/tests/new_test.py +++ /dev/null @@ -1,101 +0,0 @@ -# import logging -# from pathlib import Path -# from typing import Generator, Optional -# -# from darwin.client import Client -# from darwin.dataset import RemoteDataset -# from darwin.dataset.release import Release -# from darwin.exceptions import NotFound -# from darwin.utils import parse_darwin_json -# -# from dlup.annotations import AnnotationClass, AnnotationType, WsiAnnotations -# -# -# def _has_release(dataset: RemoteDataset, release_name: str) -> bool: -# return release_name in [_.name for _ in dataset.get_releases()] -# -# -# def _get_release(dataset: RemoteDataset, release_name: str) -> Release: -# if not _has_release(dataset, release_name): -# raise ValueError( -# f"Release {release_name} does not exist in {dataset.team}/{dataset.slug}." -# "Create one in the dataset overview on Darwin V7" -# ) -# return dataset.get_release(release_name) -# -# -# class DarwinV7ReleaseWrapper: -# def __init__(self, dataset_slug: str, release_name: str, api_key: Optional[str] = None): -# self._logger = logging.getLogger(type(self).__name__) -# if api_key is not None: -# client = Client.from_api_key(api_key) -# else: # This requires that you have run `darwin authenticate` -# client = Client.local() -# -# # TODO: Caching might help -# try: -# self._dataset: RemoteDataset = client.get_remote_dataset(dataset_slug) -# except NotFound: -# raise ValueError(f"Dataset {dataset_slug} not found") -# -# self._release_name: str = release_name -# self._release: Release = _get_release(self._dataset, release_name) -# -# self._annotated_files = [] -# self._release_path: Optional[Path] = None -# # TODO: Maybe only call this on demand -# self._pull_from_remote() -# -# def _pull_from_remote(self): -# self._release_path = self._dataset.local_releases_path / self._release.name / "annotations" -# if not (self._release_path / "completed").is_file(): -# self._dataset.pull(release=self._release, only_annotations=True) -# with open(self._release_path / "completed", "w") as f: -# f.write("") -# -# def annotated_files(self) -> Generator: -# for filename in self._release_path.glob("*.json"): -# parsed = parse_darwin_json(filename, 0) -# yield filename, parsed.filename -# -# def _get_annotation_json(self, filename: str): -# _path = self._release_path / filename -# if not (_path.with_suffix(".json")).is_file(): -# raise ValueError(f"Filename {filename} is not available in release {self._release_name}") -# -# return _path.with_suffix(".json") -# -# def build_dlup_wsi_annotations(self): -# output = {} -# for json_path, filename in self.annotated_files(): -# output[filename] = WsiAnnotations.from_darwin_json(json_path) -# return output -# -# -# if __name__ == "__main__": -# # z = WsiAnnotations.from_darwin_json( -# # "/Users/jteuwen/Downloads/test_complex_polygons.json" -# # ) -# # -# # z.filter(["lymphocyte (cell)", "ROI (detection)"]) -# -# # a = AnnotationClass("lymphocyte (cell)", AnnotationType.POINT) -# # b = AnnotationClass("lymphocyte cell", AnnotationType.POINT) -# # -# # z.relabel(((a, b),)) -# # -# # y = z.bounding_box -# # -# # b = z[b] -# # -# # x = z.as_geojson() -# # -# # w = z.read_region(y[0], 1, y[1]) -# -# release_name = "test" -# dataset_slug = "tcga-lung" -# wrapper = DarwinV7ReleaseWrapper(dataset_slug=dataset_slug, release_name=release_name) -# -# z = wrapper.build_dlup_wsi_annotations() -# -# pass diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 00000000..29e56b2e --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,39 @@ +import pathlib +from argparse import ArgumentTypeError +from unittest.mock import patch + +import pytest + +from dlup.cli import dir_path, file_path, main + + +def test_dir_path_valid_directory(tmpdir): + path = tmpdir.mkdir("subdir") + assert dir_path(str(path)) == pathlib.Path(path) + + +def test_dir_path_invalid_directory(): + with pytest.raises(ArgumentTypeError): + dir_path("/path/which/does/not/exist") + + +def test_file_path_valid_file(tmpdir): + path = tmpdir.join("test_file.txt") + path.write("content") + assert file_path(str(path)) == pathlib.Path(path) + + +def test_file_path_invalid_file(): + with pytest.raises(ArgumentTypeError): + file_path("/path/which/does/not/exist.txt") + + +def test_file_path_no_need_exists(): + _path = "/path/which/does/not/need/to/exist.txt" + assert file_path(_path, need_exists=False) == pathlib.Path(_path) + + +def test_main_no_arguments(capsys): + with patch("sys.argv", ["dlup"]): + with pytest.raises(SystemExit): + main() diff --git a/tests/test_image.py b/tests/test_image.py index 0c7be0e7..030a95df 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -8,20 +8,14 @@ interpolation as well as ensuring the tiles are extracted from the right level and locations of the original image. """ - -from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union - import numpy as np import openslide # type: ignore import PIL import pytest -from PIL.Image import Image -from pydantic import BaseModel, Field -from scipy import interpolate from dlup import SlideImage, UnsupportedSlideError -from .common import * +from .common import OpenSlideImageMock, SlideConfig, SlideProperties, get_sample_nonuniform_image class TestSlideImage: @@ -37,7 +31,7 @@ class TestSlideImage: def test_mpp_exceptions(self, openslide_image): """Test that we break if the slide has no isotropic resolution.""" with pytest.raises(UnsupportedSlideError): - wsi = SlideImage(openslide_image) + _ = SlideImage(openslide_image) def test_properties(self, openslide_image): """Test properties.""" @@ -88,8 +82,9 @@ def test_read_region( We want to be sure that reading a region at some scaling level is equivalent to downsampling the whole image and extracting that region using PIL. """ - base_image = openslide_image.image - base_image_size = np.array((base_image.width, base_image.height)) + # TODO: Use these + # base_image = openslide_image.image + # base_image_size = np.array((base_image.width, base_image.height)) # Compute output image global coordinates. out_region_location = np.array((out_region_x, out_region_y)) @@ -159,6 +154,8 @@ def test_border_region(self, shift_x): return extracted_region = wsi.read_region(out_region_location, scaling, out_region_size) + # TODO: Make this test smarter + assert extracted_region is not None def test_scaled_size(self, dlup_wsi): """Check the scale is greater than zero.""" diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..f20ec14b --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,44 @@ +import logging + +import pytest + +from dlup.logging import build_cli_logger, setup_logging + + +@pytest.mark.usefixtures("caplog") +class TestLogging: + def test_setup_logging_valid_log_level(self, caplog): + setup_logging(log_level="DEBUG") + assert len(caplog.records) == 0 + + def test_setup_logging_invalid_log_level(self): + with pytest.raises(ValueError, match="Unexpected log level got INVALID"): + setup_logging(log_level="INVALID") + + def test_setup_logging_filename_creation(self, tmp_path): + log_file = tmp_path / "log.txt" + setup_logging(filename=log_file) + assert log_file.exists() + + def test_setup_logging_log_message(self, caplog): + setup_logging(log_level="DEBUG") + logging.debug("This is a debug message.") + assert caplog.records[0].message == "This is a debug message." + + @pytest.mark.usefixtures("tmp_path") + class TestCLILogger: + def test_build_cli_logger_filename_creation(self, tmp_path): + build_cli_logger("test_logger", True, 1, tmp_path) + assert any(tmp_path.iterdir()) # checks if any file is created in tmp_path + + def test_build_cli_logger_valid_verbosity(self, caplog): + build_cli_logger("test_logger", True, 1) + logging.info("This is an info message.") + assert caplog.records[-1].message == "This is an info message." + + def test_build_cli_logger_warning_message(self, caplog): + build_cli_logger("test_logger", True, 1) + assert ( + caplog.records[0].message + == "Beta software. In case you run into issues report at https://github.com/NKI-AI/dlup/." + ) diff --git a/tests/test_tiling.py b/tests/test_tiling.py index 6b3b7368..772f1fac 100644 --- a/tests/test_tiling.py +++ b/tests/test_tiling.py @@ -1,8 +1,5 @@ # coding=utf-8 # Copyright (c) dlup contributors -import functools -from typing import Any, Dict, Optional, Tuple, TypeVar - import numpy as np import pytest @@ -47,9 +44,10 @@ def test_spanned_basis(self, size, tile_size, tile_overlap, mode): # First coordinate is always zero. assert basis[0] == 0 - tile_overlap = np.remainder(tile_overlap, np.minimum(tile_size, size), casting="safe") - right = basis + tile_size - overlap = right - basis + # TODO: These are not used yet + # tile_overlap = np.remainder(tile_overlap, np.minimum(tile_size, size), casting="safe") + # right = basis + tile_size + # overlap = right - basis stride = np.diff(basis) tiled_size = basis[-1] + tile_size diff --git a/tests/test_transforms.py b/tests/test_transforms.py new file mode 100644 index 00000000..ee1697c9 --- /dev/null +++ b/tests/test_transforms.py @@ -0,0 +1,76 @@ +# Copyright (c) dlup contributors +import numpy as np +import pytest + +from dlup.annotations import Point, Polygon +from dlup.data.transforms import AnnotationClass, AnnotationType, RenameLabels, convert_annotations + + +def test_convert_annotations_points_only(): + point = Point((5, 5), AnnotationClass(label="point1", a_cls=AnnotationType.POINT)) + points, boxes, mask, roi_mask = convert_annotations([point], (10, 10), {"point1": 1}) + + assert mask.sum() == 0 + assert roi_mask is None + assert boxes == {} + + assert points["point1"] == [(5.0, 5.0)] + + +def test_convert_annotations_polygons_only(): + polygon = Polygon( + [(2, 2), (2, 8), (8, 8), (8, 2)], AnnotationClass(label="polygon1", a_cls=AnnotationType.POLYGON) + ) + points, boxes, mask, roi_mask = convert_annotations([polygon], (10, 10), {"polygon1": 2}) + + assert points == {} + assert boxes == {} + assert roi_mask is None + + assert np.all(mask[2:8, 2:8] == 2) + + +class MockBoxAnnotation: + def __init__(self, label): + self.a_cls = AnnotationClass(label=label, a_cls=AnnotationType.BOX) + self.label = label + + +class MockPolygonAnnotation: + def __init__(self, label): + self.a_cls = AnnotationClass(label=label, a_cls=AnnotationType.POLYGON) + self.label = label + + +class TestRenameLabels: + @pytest.fixture + def transformer(self): + return RenameLabels(remap_labels={"old_name": "new_name"}) + + def test_no_remap(self, transformer): + old_annotation = Polygon( + [(2, 2), (2, 8), (8, 8), (8, 2)], AnnotationClass(label="unchanged_name", a_cls=AnnotationType.POLYGON) + ) + sample = {"annotations": [old_annotation]} + transformed_sample = transformer(sample) + assert transformed_sample["annotations"][0].label == "unchanged_name" + + def test_remap_polygon(self, transformer): + old_annotation = Polygon( + [(2, 2), (2, 8), (8, 8), (8, 2)], AnnotationClass(label="old_name", a_cls=AnnotationType.POLYGON) + ) + sample = {"annotations": [old_annotation]} + transformed_sample = transformer(sample) + assert transformed_sample["annotations"][0].label == "new_name" + assert isinstance(transformed_sample["annotations"][0], Polygon) + + def test_unsupported_annotation(self, transformer): + class UnsupportedAnnotation: + def __init__(self): + self.a_cls = AnnotationClass(label="old_name", a_cls="UNSUPPORTED") + self.label = "old_name" + + old_annotation = UnsupportedAnnotation() + sample = {"annotations": [old_annotation]} + with pytest.raises(Exception, match="Unsupported annotation type UNSUPPORTED"): + transformer(sample) diff --git a/tests/utils/test_imports.py b/tests/utils/test_imports.py new file mode 100644 index 00000000..6cad1fba --- /dev/null +++ b/tests/utils/test_imports.py @@ -0,0 +1,24 @@ +# Copyright (c) dlup contributors + +from dlup.utils.imports import _module_available + + +def test_module_available_existing_module(): + assert _module_available("os") is True + + +def test_module_available_non_existing_module(): + assert _module_available("non.existing.module") is False + + +def test_module_available_submodule(): + assert _module_available("pytest") is True + + +def test_module_available_non_existing_submodule(): + assert _module_available("os.non_existing_submodule") is False + + +def test_module_available_raises_module_not_found_error(mocker): + mocker.patch("importlib.util.find_spec", side_effect=ModuleNotFoundError) + assert _module_available("bla") is False diff --git a/tests/utils/test_pyvips.py b/tests/utils/test_pyvips.py index 23da6d1a..db8c0b2a 100644 --- a/tests/utils/test_pyvips.py +++ b/tests/utils/test_pyvips.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright (c) dlup contributors import numpy as np import pytest diff --git a/tests/utils/test_tifffile.py b/tests/utils/test_tifffile.py new file mode 100644 index 00000000..8e5e6a09 --- /dev/null +++ b/tests/utils/test_tifffile.py @@ -0,0 +1,60 @@ +# Copyright (c) dlup contributors +from unittest.mock import Mock + +import pytest + +from dlup.utils.tifffile_utils import ( + _compute_tile_indices, + _get_tile_from_data, + _retrieve_tile_data, + _validate_inputs, + get_tile, +) + + +# 1. Testing input validation +def test_validate_inputs(): + mock_page = Mock() + mock_page.is_tiled = True + mock_page.imagewidth = 500 + mock_page.imagelength = 500 + + # Test for non-tiled page + mock_page.is_tiled = False + with pytest.raises(ValueError, match="Input page must be tiled."): + _validate_inputs(mock_page, (0, 0), (100, 100)) + + mock_page.is_tiled = True + + # Test for negative coordinates + with pytest.raises(ValueError, match="Requested crop area is out of image bounds."): + _validate_inputs(mock_page, (-10, -10), (100, 100)) + + # Test for out-of-bounds coordinates + with pytest.raises(ValueError, match="Requested crop area is out of image bounds."): + _validate_inputs(mock_page, (450, 450), (100, 100)) + + +# 2. Testing computation of tile indices +def test_compute_tile_indices(): + mock_page = Mock() + mock_page.tilewidth = 200 + mock_page.tilelength = 200 + + tile_y0, tile_y1, tile_x0, tile_x1 = _compute_tile_indices(mock_page, (50, 50), (100, 100)) + assert (tile_y0, tile_y1, tile_x0, tile_x1) == (0, 1, 0, 1) + + +def create_mock_page(imagewidth, imageheight, tilewidth, tilelength, dataoffsets, databytecounts): + mock_page = Mock() + mock_page.imagewidth = imagewidth + mock_page.imageheight = imageheight + mock_page.tilewidth = tilewidth + mock_page.tilelength = tilelength + mock_page.dataoffsets = dataoffsets + mock_page.databytecounts = databytecounts + mock_page.tags = {"JPEGTables": None} + mock_filehandle = Mock() + mock_filehandle.read.return_value = b"some_data" + mock_page.parent.filehandle = mock_filehandle + return mock_page diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py new file mode 100644 index 00000000..6bf301a3 --- /dev/null +++ b/tests/utils/test_utils.py @@ -0,0 +1,32 @@ +# Copyright (c) dlup contributors + +import json + +import numpy as np +import pytest + +from dlup.utils import ArrayEncoder + + +class TestArrayEncoder: + def test_encode_numpy_array(self): + arr = np.array([1, 2, 3, 4, 5]) + result = json.dumps(arr, cls=ArrayEncoder) + assert result == "[1, 2, 3, 4, 5]" + + def test_large_numpy_array_warning(self): + large_arr = np.zeros(int(10e4 + 1)) + with pytest.warns(UserWarning, match=r"Trying to JSON serialize a very large array"): + json.dumps(large_arr, cls=ArrayEncoder) + + def test_encode_numpy_integers(self): + int32_val = np.int32(42) + int64_val = np.int64(42) + result_int32 = json.dumps(int32_val, cls=ArrayEncoder) + result_int64 = json.dumps(int64_val, cls=ArrayEncoder) + assert result_int32 == "42" + assert result_int64 == "42" + + def test_unhandled_data_type(self): + with pytest.raises(TypeError, match=r"Object of type .* is not JSON serializable"): + json.dumps({"key": object()}, cls=ArrayEncoder) # Here `object()` is an unhandled data type