NKI-AI · jonasteuwen · Sep 28, 2023 · Sep 23, 2023 · Sep 23, 2023 · Sep 23, 2023
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 120
+extend-ignore = E203
diff --git a/dlup/__init__.py b/dlup/__init__.py
@@ -1,8 +1,6 @@
-# coding=utf-8
-# Copyright (c) dlup contributors
-"""Top-level package for dlup."""
+# Copyright (c) dlup contributors"""Top-level package for dlup."""
 
-from ._exceptions import DlupError, UnsupportedSlideError
+from ._exceptions import UnsupportedSlideError
 from ._image import SlideImage
 from ._region import BoundaryMode, RegionView
 

diff --git a/dlup/_image.py b/dlup/_image.py
@@ -16,13 +16,13 @@
 from typing import Callable, Type, TypeVar, cast
 
 import numpy as np  # type: ignore
-import openslide  # type: ignore
 import PIL
 import PIL.Image  # type: ignore
 
 from dlup import UnsupportedSlideError
 from dlup._region import BoundaryMode, RegionView
-from dlup.experimental_backends import AbstractSlideBackend, ImageBackend
+from dlup.backends.common import AbstractSlideBackend
+from dlup.experimental_backends import ImageBackend
 from dlup.types import GenericFloatArray, GenericIntArray, GenericNumber, PathLike
 from dlup.utils.image import check_if_mpp_is_valid
 
@@ -116,7 +116,8 @@ def __init__(self, wsi: AbstractSlideBackend, identifier: str | None = None, **k
 
         if self._wsi.spacing is None:
             raise UnsupportedSlideError(
-                f"The spacing of {identifier} cannot be derived from image and is not explicitly set in the `overwrite_mpp` parameter."
+                f"The spacing of {identifier} cannot be derived from image and is "
+                "not explicitly set in the `overwrite_mpp` parameter."
             )
 
         check_if_mpp_is_valid(*self._wsi.spacing)
@@ -207,8 +208,8 @@ def read_region(
         Examples
         --------
         The locations are defined at the requested scaling (with respect to level 0), so if we want to extract at
-        location ``(location_x, location_y)`` of a scaling 0.5 (with respect to level 0), and have resulting tile size of
-         ``(tile_size, tile_size)`` with a scaling factor of 0.5, we can use:
+        location ``(location_x, location_y)`` of a scaling 0.5 (with respect to level 0), and have
+        resulting tile size of ``(tile_size, tile_size)`` with a scaling factor of 0.5, we can use:
         >>>  wsi.read_region(location=(coordinate_x, coordinate_y), scaling=0.5, size=(tile_size, tile_size))
         """
         owsi = self._wsi

diff --git a/dlup/_region.py b/dlup/_region.py
@@ -1,13 +1,12 @@
-# coding=utf-8
+# Copyright (c) dlup contributors
 """Defines the RegionView interface."""
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Iterable, Union, cast
+from typing import cast
 
 import numpy as np
-import numpy.typing as npt
 import PIL.Image
 
 from dlup.types import GenericFloatArray, GenericIntArray

diff --git a/dlup/annotations.py b/dlup/annotations.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 """
 Annotation module for dlup.
@@ -256,13 +255,13 @@ def annotation_class(self, a_cls: AnnotationClass):
         self._type = a_cls.a_cls
         # TODO: We also need to rewrite all the polygons. This cannot yet be set in-place
         _annotations = []
-        for geometry in self._annotations:
-            if isinstance(geometry, shapely.geometry.Polygon):
-                _annotations.append(Polygon(geometry, a_cls=a_cls))
-            elif isinstance(geometry, shapely.geometry.Point):
-                _annotations.append(Point(geometry, a_cls=a_cls))
+        for _geometry in self._annotations:
+            if isinstance(_geometry, shapely.geometry.Polygon):
+                _annotations.append(Polygon(_geometry, a_cls=a_cls))
+            elif isinstance(_geometry, shapely.geometry.Point):
+                _annotations.append(Point(_geometry, a_cls=a_cls))
             else:
-                raise AnnotationError(f"Unknown annotation type {type(geometry)}.")
+                raise AnnotationError(f"Unknown annotation type {type(_geometry)}.")
 
         self._annotations = _annotations
 
@@ -539,7 +538,7 @@ def from_asap_xml(
                 if isinstance(coordinates, shapely.geometry.collection.GeometryCollection):
                     split_up = [_ for _ in coordinates.geoms if _.area > 0]
                     if len(split_up) != 1:
-                        raise RuntimeError(f"Got unexpected object.")
+                        raise RuntimeError("Got unexpected object.")
                     coordinates = split_up[0]
 
                 if coordinates.area == 0:
@@ -640,25 +639,25 @@ def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) -
             _cls = AnnotationClass(label=name, a_cls=annotation_type)
             if annotation_type == AnnotationType.POINT:
                 curr_point = Point((curr_data["x"], curr_data["y"]), a_cls=_cls)
-                curr_point = rescale_geometry(curr_point, scaling=scaling)
+                curr_point = rescale_geometry(curr_point, scaling=_scaling)
                 annotations[key].append(curr_point)
             elif annotation_type == AnnotationType.POLYGON:
                 if "path" in curr_data:  # This is a regular polygon
                     curr_polygon = Polygon([(_["x"], _["y"]) for _ in curr_data["path"]], a_cls=_cls)
-                    curr_polygon = rescale_geometry(curr_polygon, scaling=scaling)
+                    curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling)
                     annotations[key].append(Polygon(curr_polygon, a_cls=_cls))
                 elif "paths" in curr_data:  # This is a complex polygon which needs to be parsed with the even-odd rule
                     curr_complex_polygon = _parse_darwin_complex_polygon(curr_data)
                     for curr_polygon in curr_complex_polygon.geoms:
-                        curr_polygon = rescale_geometry(curr_polygon, scaling=scaling)
+                        curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling)
                         annotations[key].append(Polygon(curr_polygon, a_cls=_cls))
                 else:
                     raise ValueError(f"Got unexpected data keys: {curr_data.keys()}")
 
             elif annotation_type == AnnotationType.BOX:
                 x, y, h, w = curr_data.values()
                 curr_polygon = shapely.geometry.box(x, y, x + w, y + h)
-                curr_polygon = rescale_geometry(curr_polygon, scaling=scaling)
+                curr_polygon = rescale_geometry(curr_polygon, scaling=_scaling)
                 annotations[key].append(Polygon(curr_polygon, a_cls=_cls))
             else:
                 ValueError(f"Annotation type {annotation_type} is not supported.")
@@ -884,8 +883,8 @@ def __contains__(self, item: Union[str, AnnotationClass]) -> bool:
     def __add__(self, other: WsiAnnotations) -> WsiAnnotations:
         if set(self.available_labels).intersection(other.available_labels) != set():
             raise AnnotationError(
-                f"Can only add annotations with different labels. "
-                f"Use `.relabel` or relabel during construction of the object."
+                "Can only add annotations with different labels. "
+                "Use `.relabel` or relabel during construction of the object."
             )
 
         curr_annotations = list(self._annotations.values())

diff --git a/dlup/backends/__init__.py b/dlup/backends/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) dlup contributors
diff --git a/dlup/experimental_backends/common.py → dlup/backends/common.py b/dlup/experimental_backends/common.py → dlup/backends/common.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 from __future__ import annotations
 
@@ -33,7 +32,7 @@ def numpy_to_pil(tile: np.ndarray) -> PIL.Image.Image:
     elif bands == 4:
         mode = "RGBA"
     else:
-        raise RuntimeError(f"Incorrect number of channels.")
+        raise RuntimeError("Incorrect number of channels.")
 
     return PIL.Image.fromarray(tile, mode=mode)
 
@@ -160,6 +159,7 @@ def get_thumbnail(self, size: int | tuple[int, int]) -> PIL.Image.Image:
 
         downsample = max(*(dim / thumb for dim, thumb in zip(self.dimensions, size)))
         level = self.get_best_level_for_downsample(downsample)
+
         thumbnail = (
             self.read_region((0, 0), level, self.level_dimensions[level])
             .convert("RGB")

diff --git a/dlup/background.py b/dlup/background.py
@@ -325,7 +325,9 @@ def _is_foreground_numpy(
 
     max_boundary = np.tile(mask_size, 2)
     min_boundary = np.zeros_like(max_boundary)
-    box = np.clip((*scaled_coordinates, *(scaled_coordinates + scaled_sizes)), min_boundary, max_boundary)  # type: ignore
+    box = np.clip(
+        (*scaled_coordinates, *(scaled_coordinates + scaled_sizes)), min_boundary, max_boundary
+    )  # type: ignore
     clipped_w, clipped_h = (box[2:] - box[:2]).astype(int)
 
     if clipped_h == 0 or clipped_w == 0:

diff --git a/dlup/cli/__init__.py b/dlup/cli/__init__.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 """DLUP Command-line interface. This is the file which builds the main parser."""
 import argparse

diff --git a/dlup/cli/mask.py b/dlup/cli/mask.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 """CLI utilities to handle masks"""
 import argparse
@@ -57,13 +56,13 @@ def mask_to_polygon(args: argparse.Namespace):
         for pair in args.labels.split(","):
             name, index = pair.split("=")
             if not index.isnumeric():
-                raise argparse.ArgumentTypeError(f"Expected a key-pair of the form 1=tumor,2=stroma")
+                raise argparse.ArgumentTypeError("Expected a key-pair of the form 1=tumor,2=stroma")
             index = float(index)
             if not index.is_integer():
-                raise argparse.ArgumentTypeError(f"Expected a key-pair of the form 1=tumor,2=stroma")
+                raise argparse.ArgumentTypeError("Expected a key-pair of the form 1=tumor,2=stroma")
             index = int(index)
             if index == 0:
-                raise argparse.ArgumentTypeError(f"0 is not a proper index. Needs to be at least 1.")
+                raise argparse.ArgumentTypeError("0 is not a proper index. Needs to be at least 1.")
             index_map[index] = name.strip()
 
     polygons = dataset_to_polygon(dataset, index_map=index_map, num_workers=args.num_workers, scaling=scaling)
@@ -94,7 +93,7 @@ def mask_to_polygon(args: argparse.Namespace):
             json.dump(slide_annotations.as_geojson(split_per_label=False), f, indent=2)
     else:
         jsons = slide_annotations.as_geojson(split_per_label=True)
-        if not type(jsons) == list[tuple[str, GeoJsonDict]]:
+        if not type(jsons) == list[tuple[str, GeoJsonDict]]:  # noqa
             raise ValueError("Expected a list of tuples")
         for label, json_dict in jsons:
             suffix = output_filename.suffix

diff --git a/dlup/data/dataset.py b/dlup/data/dataset.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 
 """Datasets helpers to simplify the generation of a dataset made of tiles from a WSI.
@@ -11,12 +10,11 @@
 import functools
 import itertools
 import pathlib
-from typing import Any, Callable, Generic, Iterable, TypedDict, TypeVar, Union, cast
+from typing import Callable, Generic, Iterable, TypedDict, TypeVar, Union, cast
 
 import numpy as np
 import PIL
 from numpy.typing import NDArray
-from PIL import Image
 
 from dlup import BoundaryMode, SlideImage
 from dlup.annotations import WsiAnnotations
@@ -437,10 +435,11 @@ def from_standard_tiling(
 
             if limit_bounds:
                 if rois is not None:
-                    raise ValueError(f"Cannot use both `rois` and `limit_bounds` at the same time.")
+                    raise ValueError("Cannot use both `rois` and `limit_bounds` at the same time.")
                 if backend == ImageBackend.AUTODETECT or backend == "AUTODETECT":
                     raise ValueError(
-                        f"Cannot use AutoDetect as backend and use limit_bounds at the same time. This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151"
+                        "Cannot use AutoDetect as backend and use limit_bounds at the same time. "
+                        "This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151"
                     )
 
                 offset, bounds = slide_image.slide_bounds

diff --git a/dlup/data/experimental/dataset.py b/dlup/data/experimental/dataset.py
@@ -62,7 +62,7 @@ def __init__(
         self._grids = grids
         self._num_scales = num_scales
         if len(list(grids)) % num_scales != 0:
-            raise ValueError(f"In a multiscale dataset the grids needs to be divisible by the number of scales.")
+            raise ValueError("In a multiscale dataset the grids needs to be divisible by the number of scales.")
 
         self._step_size = len(list(grids)[0][0])
         self._index_ranges = [
@@ -101,7 +101,7 @@ def multiscale_from_tiling(
         backend: Callable = ImageBackend.PYVIPS,
     ):
         if mpps != sorted(mpps):
-            raise ValueError(f"The mpp values should be in increasing order.")
+            raise ValueError("The mpp values should be in increasing order.")
 
         with SlideImage.from_file_path(path, backend=backend) as slide_image:
             original_mpp = slide_image.mpp

diff --git a/dlup/data/transforms.py b/dlup/data/transforms.py
@@ -65,7 +65,7 @@ def convert_annotations(
     boxes: dict[str, list[tuple[tuple[int, int], tuple[int, int]]]] = defaultdict(list)
 
     roi_mask = np.zeros(region_size, dtype=np.int32)
-
+    has_roi = False
     for curr_annotation in annotations:
         holes_mask = None
         if isinstance(curr_annotation, dlup.annotations.Point):
@@ -82,6 +82,7 @@ def convert_annotations(
                 [np.asarray(curr_annotation.exterior.coords).round().astype(np.int32)],
                 1,
             )
+            has_roi = True
             continue
 
         if not (curr_annotation.label in index_map):
@@ -104,6 +105,10 @@ def convert_annotations(
             # TODO: This is a bit hacky to ignore mypy here, but I don't know how to fix it.
             mask = np.where(holes_mask == 1, original_values, mask)  # type: ignore
 
+        # This is a hard to find bug, so better give an explicit error.
+        if not has_roi and roi_name is not None:
+            raise AnnotationError(f"ROI mask {roi_name} not found, please add a ROI mask to the annotations.")
+
     return dict(points), dict(boxes), mask, roi_mask if roi_name else None
 
 

diff --git a/dlup/experimental_backends/__init__.py b/dlup/experimental_backends/__init__.py
@@ -14,7 +14,7 @@
 
 from dlup import UnsupportedSlideError
 
-from .common import AbstractSlideBackend
+from ..backends.common import AbstractSlideBackend
 from .openslide_backend import OpenSlideSlide
 from .pyvips_backend import PyVipsSlide
 from .tifffile_backend import TifffileSlide

diff --git a/dlup/experimental_backends/openslide_backend.py b/dlup/experimental_backends/openslide_backend.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 from __future__ import annotations
 
@@ -8,7 +7,7 @@
 import openslide
 import PIL.Image
 
-from dlup.experimental_backends.common import AbstractSlideBackend
+from dlup.backends.common import AbstractSlideBackend
 from dlup.types import PathLike
 from dlup.utils.image import check_if_mpp_is_valid
 
@@ -56,7 +55,7 @@ def spacing(self) -> tuple[float, float] | None:
     @spacing.setter
     def spacing(self, value: tuple[float, float]) -> None:
         if not isinstance(value, tuple) and len(value) != 2:
-            raise ValueError(f"`.spacing` has to be of the form (mpp_x, mpp_y).")
+            raise ValueError("`.spacing` has to be of the form (mpp_x, mpp_y).")
 
         mpp_x, mpp_y = value
         check_if_mpp_is_valid(mpp_x, mpp_y)

diff --git a/dlup/experimental_backends/pyvips_backend.py b/dlup/experimental_backends/pyvips_backend.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 from __future__ import annotations
 
@@ -11,7 +10,7 @@
 import pyvips
 
 from dlup import UnsupportedSlideError
-from dlup.experimental_backends.common import AbstractSlideBackend, numpy_to_pil
+from dlup.backends.common import AbstractSlideBackend, numpy_to_pil
 from dlup.types import PathLike
 from dlup.utils.image import check_if_mpp_is_valid
 
@@ -157,7 +156,7 @@ def spacing(self) -> tuple[float, float] | None:
     @spacing.setter
     def spacing(self, value: tuple[float, float]) -> None:
         if not isinstance(value, tuple) and len(value) != 2:
-            raise ValueError(f"`.spacing` has to be of the form (mpp_x, mpp_y).")
+            raise ValueError("`.spacing` has to be of the form (mpp_x, mpp_y).")
 
         mpp_x, mpp_y = value
         check_if_mpp_is_valid(mpp_x, mpp_y)
@@ -191,7 +190,8 @@ def associated_images(self):
         """Images associated with this whole-slide image."""
         if not self._loader == "openslideload":
             return {}
-        associated_images = (_.strip() for _ in self.properties["slide-associated-images"].split(","))
+        # TODO: Fix this
+        # associated_images = (_.strip() for _ in self.properties["slide-associated-images"].split(","))
         raise NotImplementedError
 
     def set_cache(self, cache):

diff --git a/dlup/experimental_backends/tifffile_backend.py b/dlup/experimental_backends/tifffile_backend.py
@@ -1,12 +1,11 @@
-# coding=utf-8
 # Copyright (c) dlup contributors
 from typing import Any
 
 import numpy as np
 import PIL.Image
 import tifffile
 
-from dlup.experimental_backends.common import AbstractSlideBackend, numpy_to_pil
+from dlup.backends.common import AbstractSlideBackend, numpy_to_pil
 from dlup.types import PathLike
 from dlup.utils.tifffile_utils import get_tile
 

diff --git a/dlup/tools.py b/dlup/tools.py
@@ -5,7 +5,6 @@
 
 import bisect
 import collections
-import functools
 import itertools