microsoft · adamjstewart · Sep 29, 2023 · Sep 7, 2023 · Sep 7, 2023 · Sep 11, 2023
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -174,6 +174,11 @@ BigEarthNet
 
 .. autoclass:: BigEarthNet
 
+BioMassters
+^^^^^^^^^^^
+
+.. autoclass:: BioMassters
+
 Cloud Cover Detection
 ^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/docs/api/non_geo_datasets.csv b/docs/api/non_geo_datasets.csv
@@ -2,6 +2,7 @@ Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands
 `ADVANCE`_,C,"Google Earth, Freesound","5,075",13,512x512,0.5,RGB
 `Benin Cashew Plantations`_,S,Airbus Pléiades,70,6,"1,122x1,186",10,MSI
 `BigEarthNet`_,C,Sentinel-1/2,"590,326",19--43,120x120,10,"SAR, MSI"
+`BioMassters`_,R,Sentinel-1/2 and Lidar,,256, 10, "SAR, MSI"
 `Cloud Cover Detection`_,S,Sentinel-2,"22,728",2,512x512,10,MSI
 `COWC`_,"C, R","CSUAV AFRL, ISPRS, LINZ, AGRC","388,435",2,256x256,0.15,RGB
 `Kenya Crop Type`_,S,Sentinel-2,"4,688",7,"3,035x2,016",10,MSI

diff --git a/tests/data/biomassters/The_BioMassters_-_features_metadata.csv.csv b/tests/data/biomassters/The_BioMassters_-_features_metadata.csv.csv
@@ -0,0 +1,21 @@
+filename,chip_id,satellite,split,month,size,cksum,s3path_us,s3path_eu,s3path_as,corresponding_agbm
+0003d2eb_S1_00.tif,0003d2eb,S1,train,September,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S1_01.tif,0003d2eb,S1,train,October,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S1_02.tif,0003d2eb,S1,train,November,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S2_00.tif,0003d2eb,S2,train,September,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S2_02.tif,0003d2eb,S2,train,November,0,0,path,path,path,0003d2eb_agbm.tif
+000aa810_S1_00.tif,000aa810,S1,train,September,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S1_01.tif,000aa810,S1,train,October,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S1_02.tif,000aa810,S1,train,November,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S2_00.tif,000aa810,S2,train,September,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S2_02.tif,000aa810,S2,train,November,0,0,path,path,path,000aa810_agbm.tif
+0003d2eb_S1_00.tif,0003d2eb,S1,test,September,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S1_01.tif,0003d2eb,S1,test,October,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S1_02.tif,0003d2eb,S1,test,November,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S2_00.tif,0003d2eb,S2,test,September,0,0,path,path,path,0003d2eb_agbm.tif
+0003d2eb_S2_02.tif,0003d2eb,S2,test,November,0,0,path,path,path,0003d2eb_agbm.tif
+000aa810_S1_00.tif,000aa810,S1,test,September,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S1_01.tif,000aa810,S1,test,October,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S1_02.tif,000aa810,S1,test,November,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S2_00.tif,000aa810,S2,test,September,0,0,path,path,path,000aa810_agbm.tif
+000aa810_S2_02.tif,000aa810,S2,test,November,0,0,path,path,path,000aa810_agbm.tif
diff --git a/tests/data/biomassters/data.py b/tests/data/biomassters/data.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import csv
+import hashlib
+import os
+import shutil
+
+import numpy as np
+import rasterio
+
+metadata_train = "The_BioMassters_-_features_metadata.csv.csv"
+
+csv_columns = [
+    "filename",
+    "chip_id",
+    "satellite",
+    "split",
+    "month",
+    "size",
+    "cksum",
+    "s3path_us",
+    "s3path_eu",
+    "s3path_as",
+    "corresponding_agbm",
+]
+
+targets = "train_agbm.zip"
+
+splits = ["train", "test"]
+
+sample_ids = ["0003d2eb", "000aa810"]
+
+months = ["September", "October", "November"]
+
+satellite = ["S1", "S2"]
+
+SIZE = 32
+
+
+def create_tif_file(path: str, num_channels: int, dtype: str) -> None:
+    """Create S1 or S2 data with num channels.
+
+    Args:
+        path: path where to save tif
+        num_channels: number of channels (4 for S1, 11 for S2)
+        dtype: uint16 for image data and float 32 for target
+    """
+    profile = {}
+    profile["driver"] = "GTiff"
+    profile["dtype"] = dtype
+    profile["count"] = num_channels
+    profile["crs"] = "epsg:4326"
+    profile["transform"] = rasterio.transform.from_bounds(0, 0, 1, 1, 1, 1)
+    profile["height"] = SIZE
+    profile["width"] = SIZE
+    profile["compress"] = "lzw"
+    profile["predictor"] = 2
+
+    if "float" in profile["dtype"]:
+        Z = np.random.randn(SIZE, SIZE).astype(profile["dtype"])
+    else:
+        Z = np.random.randint(
+            np.iinfo(profile["dtype"]).max, size=(SIZE, SIZE), dtype=profile["dtype"]
+        )
+
+    with rasterio.open(path, "w", **profile) as src:
+        for i in range(1, profile["count"] + 1):
+            src.write(Z, i)
+
+
+# filename,chip_id,satellite,split,month,size,cksum,s3path_us,s3path_eu,s3path_as,corresponding_agbm
+if __name__ == "__main__":
+    csv_rows = []
+    for split in splits:
+        os.makedirs(f"{split}_features", exist_ok=True)
+        if split == "train":
+            os.makedirs("train_agbm", exist_ok=True)
+        for id in sample_ids:
+            for sat in satellite:
+                path = id + "_" + str(sat)
+                for idx, month in enumerate(months):
+                    # S2 data is not present for every month
+                    if sat == "S2" and idx == 1:
+                        continue
+                    file_path = path + "_" + f"{idx:02d}" + ".tif"
+
+                    csv_rows.append(
+                        [
+                            file_path,
+                            id,
+                            sat,
+                            split,
+                            month,
+                            "0",
+                            "0",
+                            "path",
+                            "path",
+                            "path",
+                            id + "_agbm.tif",
+                        ]
+                    )
+
+                    # file path to save
+                    file_path = os.path.join(f"{split}_features", file_path)
+
+                    if sat == "S1":
+                        create_tif_file(file_path, num_channels=4, dtype="uint16")
+                    else:
+                        create_tif_file(file_path, num_channels=11, dtype="uint16")
+
+            # create target data one per id
+            if split == "train":
+                create_tif_file(
+                    os.path.join(f"{split}_agbm", id + "_agbm.tif"),
+                    num_channels=1,
+                    dtype="float32",
+                )
+
+    # write out metadata
+
+    with open(metadata_train, "w") as csv_file:
+        wr = csv.writer(csv_file)
+        wr.writerow(csv_columns)
+        for row in csv_rows:
+            wr.writerow(row)
+
+    # zip up feature and target folders
+    zip_dirs = ["train_features", "test_features", "train_agbm"]
+    for dir in zip_dirs:
+        shutil.make_archive(dir, "zip", dir)
+        # Compute checksums
+        with open(dir + ".zip", "rb") as f:
+            md5 = hashlib.md5(f.read()).hexdigest()
+            print(f"{dir}: {md5}")
diff --git a/tests/data/biomassters/test_features.zip b/tests/data/biomassters/test_features.zip
diff --git a/tests/data/biomassters/test_features/0003d2eb_S1_00.tif b/tests/data/biomassters/test_features/0003d2eb_S1_00.tif
diff --git a/tests/data/biomassters/test_features/0003d2eb_S1_01.tif b/tests/data/biomassters/test_features/0003d2eb_S1_01.tif
diff --git a/tests/data/biomassters/test_features/0003d2eb_S1_02.tif b/tests/data/biomassters/test_features/0003d2eb_S1_02.tif
diff --git a/tests/data/biomassters/test_features/0003d2eb_S2_00.tif b/tests/data/biomassters/test_features/0003d2eb_S2_00.tif
diff --git a/tests/data/biomassters/test_features/0003d2eb_S2_01.tif b/tests/data/biomassters/test_features/0003d2eb_S2_01.tif
diff --git a/tests/data/biomassters/test_features/0003d2eb_S2_02.tif b/tests/data/biomassters/test_features/0003d2eb_S2_02.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S1_00.tif b/tests/data/biomassters/test_features/000aa810_S1_00.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S1_01.tif b/tests/data/biomassters/test_features/000aa810_S1_01.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S1_02.tif b/tests/data/biomassters/test_features/000aa810_S1_02.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S2_00.tif b/tests/data/biomassters/test_features/000aa810_S2_00.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S2_01.tif b/tests/data/biomassters/test_features/000aa810_S2_01.tif
diff --git a/tests/data/biomassters/test_features/000aa810_S2_02.tif b/tests/data/biomassters/test_features/000aa810_S2_02.tif
diff --git a/tests/data/biomassters/train_agbm.zip b/tests/data/biomassters/train_agbm.zip
diff --git a/tests/data/biomassters/train_agbm/0003d2eb_agbm.tif b/tests/data/biomassters/train_agbm/0003d2eb_agbm.tif
diff --git a/tests/data/biomassters/train_agbm/000aa810_agbm.tif b/tests/data/biomassters/train_agbm/000aa810_agbm.tif
diff --git a/tests/data/biomassters/train_features.zip b/tests/data/biomassters/train_features.zip
diff --git a/tests/data/biomassters/train_features/0003d2eb_S1_00.tif b/tests/data/biomassters/train_features/0003d2eb_S1_00.tif
diff --git a/tests/data/biomassters/train_features/0003d2eb_S1_01.tif b/tests/data/biomassters/train_features/0003d2eb_S1_01.tif
diff --git a/tests/data/biomassters/train_features/0003d2eb_S1_02.tif b/tests/data/biomassters/train_features/0003d2eb_S1_02.tif
diff --git a/tests/data/biomassters/train_features/0003d2eb_S2_00.tif b/tests/data/biomassters/train_features/0003d2eb_S2_00.tif
diff --git a/tests/data/biomassters/train_features/0003d2eb_S2_01.tif b/tests/data/biomassters/train_features/0003d2eb_S2_01.tif
diff --git a/tests/data/biomassters/train_features/0003d2eb_S2_02.tif b/tests/data/biomassters/train_features/0003d2eb_S2_02.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S1_00.tif b/tests/data/biomassters/train_features/000aa810_S1_00.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S1_01.tif b/tests/data/biomassters/train_features/000aa810_S1_01.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S1_02.tif b/tests/data/biomassters/train_features/000aa810_S1_02.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S2_00.tif b/tests/data/biomassters/train_features/000aa810_S2_00.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S2_01.tif b/tests/data/biomassters/train_features/000aa810_S2_01.tif
diff --git a/tests/data/biomassters/train_features/000aa810_S2_02.tif b/tests/data/biomassters/train_features/000aa810_S2_02.tif
diff --git a/tests/datasets/test_biomassters.py b/tests/datasets/test_biomassters.py
@@ -0,0 +1,51 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+
+import os
+from itertools import product
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pytest
+from _pytest.fixtures import SubRequest
+
+from torchgeo.datasets import BioMassters
+
+
+class TestBioMassters:
+    @pytest.fixture(
+        params=product(["train", "test"], [["S1"], ["S2"], ["S1", "S2"]], [True, False])
+    )
+    def dataset(self, request: SubRequest) -> BioMassters:
+        root = os.path.join("tests", "data", "biomassters")
+        split, sensors, as_time_series = request.param
+        return BioMassters(
+            root, split=split, sensors=sensors, as_time_series=as_time_series
+        )
+
+    def test_len_of_ds(self, dataset: BioMassters) -> None:
+        assert len(dataset) > 0
+
+    def test_invalid_split(self, dataset: BioMassters) -> None:
+        with pytest.raises(AssertionError):
+            BioMassters(dataset.root, split="foo")
+
+    def test_invalid_bands(self, dataset: BioMassters) -> None:
+        with pytest.raises(AssertionError):
+            BioMassters(dataset.root, sensors=["S3"])
+
+    def test_not_downloaded(self, tmp_path: Path) -> None:
+        match = "Dataset not found"
+        with pytest.raises(RuntimeError, match=match):
+            BioMassters(str(tmp_path))
+
+    def test_plot(self, dataset: BioMassters) -> None:
+        dataset.plot(dataset[0], suptitle="Test")
+        plt.close()
+
+        sample = dataset[0]
+        if dataset.split == "train":
+            sample["prediction"] = sample["label"]
+        dataset.plot(sample)
+        plt.close()
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
@@ -8,6 +8,7 @@
 from .astergdem import AsterGDEM
 from .benin_cashews import BeninSmallHolderCashews
 from .bigearthnet import BigEarthNet
+from .biomassters import BioMassters
 from .cbf import CanadianBuildingFootprints
 from .cdl import CDL
 from .chesapeake import (
@@ -173,6 +174,7 @@
     "ADVANCE",
     "BeninSmallHolderCashews",
     "BigEarthNet",
+    "BioMassters",
     "CloudCoverDetection",
     "COWC",
     "COWCCounting",