From 46f5fe46c5f825685616dc26130f3c541c65d1d8 Mon Sep 17 00:00:00 2001
From: David Meaux <48763344+dmeaux@users.noreply.github.com>
Date: Sat, 2 Mar 2024 21:33:51 +0100
Subject: [PATCH] Clarify RasterDataset documentation for is_image and dtype
 (#1811)

* Change DEMs from mask to image (is_image=True)

* fix to revert to upstream file

* fix unused type: ignore comment

* Update torchgeo/datasets/geo.py

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>

* Update documentation to explain is_image and dtype. Update asterdem to override dtype.

* fix linting errors

* Made comment for is_image more succint.

* change asterdem dtype back to float32 (same as RasterDataset)

* removed integer images from documentation

* change Digital Elevation Model to DEM

* Clarify is_image and dtype.
Revert DEMs to masks

* Finish reverting DEMs to masks

* address review comments

* Changed Aster Global DEM and EU-DEM Dataset types to "DEM"

* Reorganize some information

* Use better formatting

---------

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
---
 docs/api/geo_datasets.csv                  |  4 +-
 docs/tutorials/custom_raster_dataset.ipynb |  6 ++-
 torchgeo/datasets/geo.py                   | 53 ++++++++++++++++------
 3 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/docs/api/geo_datasets.csv b/docs/api/geo_datasets.csv
index 54cf53b9f27..ef74a372a4c 100644
--- a/docs/api/geo_datasets.csv
+++ b/docs/api/geo_datasets.csv
@@ -1,6 +1,6 @@
 Dataset,Type,Source,Size (px),Resolution (m)
 `Aboveground Woody Biomass`_,Masks,"Landsat, LiDAR","40,000x40,000",30
-`Aster Global DEM`_,Masks,Aster,"3,601x3,601",30
+`Aster Global DEM`_,DEM,Aster,"3,601x3,601",30
 `Canadian Building Footprints`_,Geometries,Bing Imagery,-,-
 `Chesapeake Land Cover`_,"Imagery, Masks",NAIP,-,1
 `Global Mangrove Distribution`_,Masks,"Remote Sensing, In Situ Measurements",-,3
@@ -8,7 +8,7 @@ Dataset,Type,Source,Size (px),Resolution (m)
 `EDDMapS`_,Points,Citizen Scientists,-,-
 `EnviroAtlas`_,"Imagery, Masks","NAIP, NLCD, OpenStreetMap",-,1
 `Esri2020`_,Masks,Sentinel-2,-,10
-`EU-DEM`_,Masks,"Aster, SRTM, Russian Topomaps",-,25
+`EU-DEM`_,DEM,"Aster, SRTM, Russian Topomaps",-,25
 `GBIF`_,Points,Citizen Scientists,-,-
 `GlobBiomass`_,Masks,Landsat,"45,000x45,000",100
 `iNaturalist`_,Points,Citizen Scientists,-,-
diff --git a/docs/tutorials/custom_raster_dataset.ipynb b/docs/tutorials/custom_raster_dataset.ipynb
index 91021fc0ba7..7401e580edb 100644
--- a/docs/tutorials/custom_raster_dataset.ipynb
+++ b/docs/tutorials/custom_raster_dataset.ipynb
@@ -329,7 +329,11 @@
     "\n",
     "### `is_image`\n",
     "\n",
-    "If your data only contains image files, as is the case with Sentinel-2, use `is_image = True`. If your data only contains segmentation masks, use `is_image = False` instead.\n",
+    "If your data only contains model inputs (such as images), use `is_image = True`. If your data only contains ground truth model outputs (such as segmentation masks), use `is_image = False` instead.\n",
+    "\n",
+    "### `dtype`\n",
+    "\n",
+    "Defaults to float32 for `is_image == True` and long for `is_image == False`. This is what you want for 99% of datasets, but can be overridden for tasks like pixel-wise regression (where the target mask should be float32).\n",
     "\n",
     "### `separate_files`\n",
     "\n",
diff --git a/torchgeo/datasets/geo.py b/torchgeo/datasets/geo.py
index 662161b8193..99195793f91 100644
--- a/torchgeo/datasets/geo.py
+++ b/torchgeo/datasets/geo.py
@@ -53,9 +53,11 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
     based on latitude/longitude. This allows users to do things like:
 
     * Combine image and target labels and sample from both simultaneously
-      (e.g. Landsat and CDL)
+      (e.g., Landsat and CDL)
     * Combine datasets for multiple image sources for multimodal learning or data fusion
-      (e.g. Landsat and Sentinel)
+      (e.g., Landsat and Sentinel)
+    * Combine image and other raster data (e.g., elevation, temperature, pressure)
+      and sample from both simultaneously (e.g., Landsat and Aster Global DEM)
 
     These combinations require that all queries are present in *both* datasets,
     and can be combined using an :class:`IntersectionDataset`:
@@ -67,9 +69,9 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
     Users may also want to:
 
     * Combine datasets for multiple image sources and treat them as equivalent
-      (e.g. Landsat 7 and Landsat 8)
+      (e.g., Landsat 7 and Landsat 8)
     * Combine datasets for disparate geospatial locations
-      (e.g. Chesapeake NY and PA)
+      (e.g., Chesapeake NY and PA)
 
     These combinations require that all queries are present in *at least one* dataset,
     and can be combined using a :class:`UnionDataset`:
@@ -106,7 +108,7 @@ class GeoDataset(Dataset[dict[str, Any]], abc.ABC):
     def __init__(
         self, transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None
     ) -> None:
-        """Initialize a new Dataset instance.
+        """Initialize a new GeoDataset instance.
 
         Args:
             transforms: a function/transform that takes an input sample
@@ -339,7 +341,14 @@ class RasterDataset(GeoDataset):
     #: Not used if :attr:`filename_regex` does not contain a ``date`` group.
     date_format = "%Y%m%d"
 
-    #: True if dataset contains imagery, False if dataset contains mask
+    #: True if the dataset only contains model inputs (such as images). False if the
+    #: dataset only contains ground truth model outputs (such as segmentation masks).
+    #:
+    #: The sample returned by the dataset/data loader will use the "image" key if
+    #: *is_image* is True, otherwise it will use the "mask" key.
+    #:
+    #: For datasets with both model inputs and outputs, a custom
+    #: :func:`~RasterDataset.__getitem__` method must be implemented.
     is_image = True
 
     #: True if data is stored in a separate file for each band, else False.
@@ -358,6 +367,10 @@ class RasterDataset(GeoDataset):
     def dtype(self) -> torch.dtype:
         """The dtype of the dataset (overrides the dtype of the data file via a cast).
 
+        Defaults to float32 if :attr:`~RasterDataset.is_image` is True, else long.
+        Can be overridden for tasks like pixel-wise regression where the mask should be
+        float32 instead of long.
+
         Returns:
             the dtype of the dataset
 
@@ -377,7 +390,7 @@ def __init__(
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
         cache: bool = True,
     ) -> None:
-        """Initialize a new Dataset instance.
+        """Initialize a new RasterDataset instance.
 
         Args:
             paths: one or more root directories to search or files to load
@@ -594,7 +607,7 @@ def __init__(
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
         label_name: Optional[str] = None,
     ) -> None:
-        """Initialize a new Dataset instance.
+        """Initialize a new VectorDataset instance.
 
         Args:
             paths: one or more root directories to search or files to load
@@ -840,9 +853,11 @@ class IntersectionDataset(GeoDataset):
     This allows users to do things like:
 
     * Combine image and target labels and sample from both simultaneously
-      (e.g. Landsat and CDL)
+      (e.g., Landsat and CDL)
     * Combine datasets for multiple image sources for multimodal learning or data fusion
-      (e.g. Landsat and Sentinel)
+      (e.g., Landsat and Sentinel)
+    * Combine image and other raster data (e.g., elevation, temperature, pressure)
+      and sample from both simultaneously (e.g., Landsat and Aster Global DEM)
 
     These combinations require that all queries are present in *both* datasets,
     and can be combined using an :class:`IntersectionDataset`:
@@ -863,7 +878,12 @@ def __init__(
         ] = concat_samples,
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
     ) -> None:
-        """Initialize a new Dataset instance.
+        """Initialize a new IntersectionDataset instance.
+
+        When computing the intersection between two datasets that both contain model
+        inputs (such as images) or model outputs (such as masks), the default behavior
+        is to stack the data along the channel dimension. The *collate_fn* parameter
+        can be used to change this behavior.
 
         Args:
             dataset1: the first dataset
@@ -993,9 +1013,9 @@ class UnionDataset(GeoDataset):
     This allows users to do things like:
 
     * Combine datasets for multiple image sources and treat them as equivalent
-      (e.g. Landsat 7 and Landsat 8)
+      (e.g., Landsat 7 and Landsat 8)
     * Combine datasets for disparate geospatial locations
-      (e.g. Chesapeake NY and PA)
+      (e.g., Chesapeake NY and PA)
 
     These combinations require that all queries are present in *at least one* dataset,
     and can be combined using a :class:`UnionDataset`:
@@ -1016,7 +1036,12 @@ def __init__(
         ] = merge_samples,
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
     ) -> None:
-        """Initialize a new Dataset instance.
+        """Initialize a new UnionDataset instance.
+
+        When computing the union between two datasets that both contain model inputs
+        (such as images) or model outputs (such as masks), the default behavior is to
+        merge the data to create a single image/mask. The *collate_fn* parameter can be
+        used to change this behavior.
 
         Args:
             dataset1: the first dataset