Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DeepGlobe dataset for land cover #578

Merged
merged 36 commits into from
Jul 2, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c29b010
add class for Deep Globe Land Cover dataset
Jun 13, 2022
e6c5980
add Lightning data module implementation for deepglobe land cover
Jun 13, 2022
283645f
fix formatting errors
Jun 14, 2022
4f5ea5c
fix urls, formats and add link for paper
Jun 14, 2022
0a478a2
add tests for deepglobe dataset and datamodule
Jun 15, 2022
156af6c
fix a test case and a few more formatting error
Jun 15, 2022
48bf64b
add data.py and modify error match for data download
Jun 16, 2022
502d14e
modify draw_semantic_segmentation_masks for cases when mask is a subs…
Jun 16, 2022
b0b95e5
fix mypy error
Jun 17, 2022
20d346d
add to docs for documentation
Jun 27, 2022
5165830
add deepglobe to the dataset lists csv
Jun 27, 2022
68705d2
fix error in building docs
Jun 27, 2022
1a3852d
Update datamodules.rst
calebrob6 Jun 27, 2022
abb3651
Update datasets.rst
calebrob6 Jun 27, 2022
6bc68e6
Update data.py
calebrob6 Jun 27, 2022
0b86e13
Update utils.py
calebrob6 Jun 29, 2022
d664ba9
change file permissions of non_geo_datasets.csv
Jun 29, 2022
3989c33
Add versionadded
calebrob6 Jul 2, 2022
210d375
Update torchgeo/datasets/deepglobelandcover.py
calebrob6 Jul 2, 2022
073ea9a
Change end of line sequence
calebrob6 Jul 2, 2022
0dca2e1
Update tests/data/deepglobelandcover/data.py
calebrob6 Jul 2, 2022
ff6f2f6
exist_ok
calebrob6 Jul 2, 2022
b1358b6
Update tests/datasets/test_deepglobelandcover.py
calebrob6 Jul 2, 2022
21721cc
Remove datamodule tests
calebrob6 Jul 2, 2022
e65fba7
Remove split monkeypatch
calebrob6 Jul 2, 2022
7554def
Merge branch 'add-DeepGlobe-dataset' of github.com:saumyasinha/torchg…
calebrob6 Jul 2, 2022
6aa6f32
Running black
calebrob6 Jul 2, 2022
4d0212f
Add val percent to test conf
calebrob6 Jul 2, 2022
09d4a23
Sort filelist so indices are the same across platforms
calebrob6 Jul 2, 2022
e49e006
Simplified the file and mask fns
calebrob6 Jul 2, 2022
c63afd8
Re-adding datamodule tests for coverage
calebrob6 Jul 2, 2022
6b0a74b
Add sub-configs to test val_split_pct in the datamodule
calebrob6 Jul 2, 2022
3ccf788
Lets try it
calebrob6 Jul 2, 2022
015aa0f
Update tests/conf/deepglobelandcover_0.yaml
calebrob6 Jul 2, 2022
3142743
nulllllllll
calebrob6 Jul 2, 2022
7024047
ingore_zeros -> ignore_index
adamjstewart Jul 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/datamodules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ COWC

.. autoclass:: COWCCountingDataModule

Deep Globe Land Cover Challenge
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: DeepGlobeLandCoverDataModule

ETCI2021 Flood Detection
^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ Kenya Crop Type

.. autoclass:: CV4AKenyaCropType

Deep Globe Land Cover Challenge
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: DeepGlobeLandCover

DFC2022
^^^^^^^

Expand Down
67 changes: 34 additions & 33 deletions docs/api/non_geo_datasets.csv
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
`ADVANCE`_,C,"Google Earth, Freesound","5,075",13,512x512,0.5,RGB
`Benin Cashew Plantations`_,S,Airbus Pléiades,70,6,"1,186x1,122",0.5,MSI
`BigEarthNet`_,C,Sentinel-1/2,"590,326",19--43,120x120,10,"SAR, MSI"
`COWC`_,"C, R","CSUAV AFRL, ISPRS, LINZ, AGRC","388,435",2,256x256,0.15,RGB
`Kenya Crop Type`_,S,Sentinel-2,"4,688",7,"3,035x2,016",10,MSI
`DFC2022`_,S,Aerial,,15,"2,000x2,000",0.5,RGB
`ETCI2021 Flood Detection`_,S,Sentinel-1,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"27,000",10,64x64,10,MSI
`FAIR1M`_,OD,Gaofen/Google Earth,"15,000",37,"1,024x1,024",0.3--0.8,RGB
`Forest Damage`_,OD,Drone imagery,"1,543",4,"1,500x1,500",,RGB
`GID-15`_,S,Gaofen-2,150,15,"6,800x7,200",3,RGB
`IDTReeS`_,"OD,C",Aerial,591,33,200x200,0.1--1,RGB
`Inria Aerial Image Labeling`_,S,Aerial,360,,"5,000x5,000",0.3,RGB
`LandCover.ai`_,S,Aerial,"10,674",5,512x512,0.25--0.5,RGB
`LEVIR-CD+`_,CD,Google Earth,985,2,"1,024x1,024",0.5,RGB
`LoveDA`_,S,Google Earth,"5,987",7,"1,024x1,024",0.3,RGB
`NASA Marine Debris`_,OD,PlanetScope,707,1,256x256,3,RGB
`NWPU VHR-10`_,I,"Google Earth, Vaihingen",800,10,"358--1,728",0.08--2,RGB
`OSCD`_,CD,Sentinel-2,24,2,"40--1,180",60,MSI
`PatternNet`_,C,Google Earth,"30,400",38,256x256,0.06--5,RGB
`Potsdam`_,S,Aerial,38,6,"6,000x6,000",0.05,MSI
`RESISC45`_,C,Google Earth,"31,500",45,256x256,0.2--30,RGB
`Seasonal Contrast`_,T,Sentinel-2,100K--1M,,264x264,10,MSI
`SEN12MS`_,S,"Sentinel-1/2, MODIS","180,662",33,256x256,10,"SAR, MSI"
`So2Sat`_,C,Sentinel-1/2,"400,673",17,32x32,10,"SAR, MSI"
`SpaceNet`_,I,WorldView-2/3 Planet Lab Dove,"1,889--28,728",2,102--900,0.5--4,MSI
`Tropical Cyclone`_,R,GOES 8--16,"108,110",,256x256,4K--8K,MSI
`UC Merced`_,C,USGS National Map,"21,000",21,256x256,0.3,RGB
`USAVars`_,S,NAIP Aerial,~100K,,,4,"RGB, NIR"
`Vaihingen`_,S,Aerial,33,6,"1,281--3,816",0.09,RGB
`xView2`_,CD,Maxar,"3,732",4,"1,024x1,024",0.8,RGB
`ZueriCrop`_,"I, T",Sentinel-2,116K,48,24x24,10,MSI
Dataset,Task,Source,# Samples,# Classes,Size (px),Resolution (m),Bands
`ADVANCE`_,C,"Google Earth, Freesound","5,075",13,512x512,0.5,RGB
`Benin Cashew Plantations`_,S,Airbus Pléiades,70,6,"1,186x1,122",0.5,MSI
`BigEarthNet`_,C,Sentinel-1/2,"590,326",19--43,120x120,10,"SAR, MSI"
`COWC`_,"C, R","CSUAV AFRL, ISPRS, LINZ, AGRC","388,435",2,256x256,0.15,RGB
`Kenya Crop Type`_,S,Sentinel-2,"4,688",7,"3,035x2,016",10,MSI
`Deep Globe Land Cover Challenge`_,S,DigitalGlobe +Vivid,803,7,"2,448x2,448",0.5,RGB
`DFC2022`_,S,Aerial,,15,"2,000x2,000",0.5,RGB
`ETCI2021 Flood Detection`_,S,Sentinel-1,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"27,000",10,64x64,10,MSI
`FAIR1M`_,OD,Gaofen/Google Earth,"15,000",37,"1,024x1,024",0.3--0.8,RGB
`Forest Damage`_,OD,Drone imagery,"1,543",4,"1,500x1,500",,RGB
`GID-15`_,S,Gaofen-2,150,15,"6,800x7,200",3,RGB
`IDTReeS`_,"OD,C",Aerial,591,33,200x200,0.1--1,RGB
`Inria Aerial Image Labeling`_,S,Aerial,360,,"5,000x5,000",0.3,RGB
`LandCover.ai`_,S,Aerial,"10,674",5,512x512,0.25--0.5,RGB
`LEVIR-CD+`_,CD,Google Earth,985,2,"1,024x1,024",0.5,RGB
`LoveDA`_,S,Google Earth,"5,987",7,"1,024x1,024",0.3,RGB
`NASA Marine Debris`_,OD,PlanetScope,707,1,256x256,3,RGB
`NWPU VHR-10`_,I,"Google Earth, Vaihingen",800,10,"358--1,728",0.08--2,RGB
`OSCD`_,CD,Sentinel-2,24,2,"40--1,180",60,MSI
`PatternNet`_,C,Google Earth,"30,400",38,256x256,0.06--5,RGB
`Potsdam`_,S,Aerial,38,6,"6,000x6,000",0.05,MSI
`RESISC45`_,C,Google Earth,"31,500",45,256x256,0.2--30,RGB
`Seasonal Contrast`_,T,Sentinel-2,100K--1M,,264x264,10,MSI
`SEN12MS`_,S,"Sentinel-1/2, MODIS","180,662",33,256x256,10,"SAR, MSI"
`So2Sat`_,C,Sentinel-1/2,"400,673",17,32x32,10,"SAR, MSI"
`SpaceNet`_,I,WorldView-2/3 Planet Lab Dove,"1,889--28,728",2,102--900,0.5--4,MSI
`Tropical Cyclone`_,R,GOES 8--16,"108,110",,256x256,4K--8K,MSI
`UC Merced`_,C,USGS National Map,"21,000",21,256x256,0.3,RGB
`USAVars`_,S,NAIP Aerial,~100K,,,4,"RGB, NIR"
`Vaihingen`_,S,Aerial,33,6,"1,281--3,816",0.09,RGB
`xView2`_,CD,Maxar,"3,732",4,"1,024x1,024",0.8,RGB
`ZueriCrop`_,"I, T",Sentinel-2,116K,48,24x24,10,MSI
74 changes: 74 additions & 0 deletions tests/data/deepglobelandcover/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil

import numpy as np
from PIL import Image
from torchvision.datasets.utils import calculate_md5


def generate_test_data(root: str, n_samples: int = 3) -> str:
"""Creates test data archive for DeepGlobeLandCover dataset and
returns its md5 hash.
Args:
root (str): Path to store test data
n_samples (int, optional): Number of samples.
Returns:
str: md5 hash of created archive
"""
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
dtype = np.uint8
size = 2

folder_path = os.path.join(root, "data")

train_img_dir = os.path.join(folder_path, "data", "training_data", "images")
train_mask_dir = os.path.join(folder_path, "data", "training_data", "masks")
test_img_dir = os.path.join(folder_path, "data", "test_data", "images")
test_mask_dir = os.path.join(folder_path, "data", "test_data", "masks")

if not os.path.exists(train_img_dir):
os.makedirs(train_img_dir)
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
if not os.path.exists(train_mask_dir):
os.makedirs(train_mask_dir)
if not os.path.exists(test_img_dir):
os.makedirs(test_img_dir)
if not os.path.exists(test_mask_dir):
os.makedirs(test_mask_dir)

train_ids = [1, 2, 3]
test_ids = [8, 9, 10]

for i in range(n_samples):
train_id = train_ids[i]
test_id = test_ids[i]

dtype_max = np.iinfo(dtype).max
train_arr = np.random.randint(dtype_max, size=(size, size, 3), dtype=dtype)
train_img = Image.fromarray(train_arr)
train_img.save(os.path.join(train_img_dir, str(train_id) + "_sat.jpg"))

test_arr = np.random.randint(dtype_max, size=(size, size, 3), dtype=dtype)
test_img = Image.fromarray(test_arr)
test_img.save(os.path.join(test_img_dir, str(test_id) + "_sat.jpg"))

train_mask_arr = np.full((size, size, 3), (0, 255, 255), dtype=dtype)
train_mask_img = Image.fromarray(train_mask_arr)
train_mask_img.save(os.path.join(train_mask_dir, str(train_id) + "_mask.png"))

test_mask_arr = np.full((size, size, 3), (255, 0, 255), dtype=dtype)
test_mask_img = Image.fromarray(test_mask_arr)
test_mask_img.save(os.path.join(test_mask_dir, str(test_id) + "_mask.png"))

# Create archive
shutil.make_archive(folder_path, "zip", folder_path)
shutil.rmtree(folder_path)
return calculate_md5(f"{folder_path}.zip")


if __name__ == "__main__":
md5_hash = generate_test_data(os.getcwd(), 3)
print(md5_hash + "\n")
Binary file added tests/data/deepglobelandcover/data.zip
Binary file not shown.
33 changes: 33 additions & 0 deletions tests/datamodules/test_deepglobelandcover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
# Licensed under the MIT License.

import os

import pytest
from _pytest.fixtures import SubRequest

from torchgeo.datamodules import DeepGlobeLandCoverDataModule


class TestDeepGlobeLandCoverDataModule:
@pytest.fixture(scope="class", params=[0.0, 0.5])
def datamodule(self, request: SubRequest) -> DeepGlobeLandCoverDataModule:
root = os.path.join("tests", "data", "deepglobelandcover")
batch_size = 1
num_workers = 0
val_split_size = request.param
dm = DeepGlobeLandCoverDataModule(
root, batch_size, num_workers, val_split_pct=val_split_size
)
dm.prepare_data()
dm.setup()
return dm

def test_train_dataloader(self, datamodule: DeepGlobeLandCoverDataModule) -> None:
next(iter(datamodule.train_dataloader()))

def test_val_dataloader(self, datamodule: DeepGlobeLandCoverDataModule) -> None:
next(iter(datamodule.val_dataloader()))

def test_test_dataloader(self, datamodule: DeepGlobeLandCoverDataModule) -> None:
next(iter(datamodule.test_dataloader()))
76 changes: 76 additions & 0 deletions tests/datasets/test_deepglobelandcover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from _pytest.monkeypatch import MonkeyPatch

from torchgeo.datasets import DeepGlobeLandCover


class TestDeepGlobeLandCover:
@pytest.fixture(params=["train", "test"])
def dataset(
self, monkeypatch: MonkeyPatch, request: SubRequest
) -> DeepGlobeLandCover:
md5 = "2cbd68d36b1485f09f32d874dde7c5c5"
splits = ["train", "test"]
monkeypatch.setattr(DeepGlobeLandCover, "md5", md5)
monkeypatch.setattr(DeepGlobeLandCover, "splits", splits)
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
root = os.path.join("tests", "data", "deepglobelandcover")
split = request.param
transforms = nn.Identity() # type: ignore[no-untyped-call]
calebrob6 marked this conversation as resolved.
Show resolved Hide resolved
return DeepGlobeLandCover(root, split, transforms, checksum=True)

def test_getitem(self, dataset: DeepGlobeLandCover) -> None:
x = dataset[0]
assert isinstance(x, dict)
assert isinstance(x["image"], torch.Tensor)
assert isinstance(x["mask"], torch.Tensor)

def test_len(self, dataset: DeepGlobeLandCover) -> None:
assert len(dataset) == 3

def test_extract(self, tmp_path: Path) -> None:
root = os.path.join("tests", "data", "deepglobelandcover")
filename = "data.zip"
shutil.copyfile(
os.path.join(root, filename), os.path.join(str(tmp_path), filename)
)
DeepGlobeLandCover(root=str(tmp_path))

def test_corrupted(self, tmp_path: Path) -> None:
with open(os.path.join(tmp_path, "data.zip"), "w") as f:
f.write("bad")
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
DeepGlobeLandCover(root=str(tmp_path), checksum=True)

def test_invalid_split(self) -> None:
with pytest.raises(AssertionError):
DeepGlobeLandCover(split="foo")

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(
RuntimeError,
match="Dataset not found in `root`, either"
+ " specify a different `root` directory or manually download"
+ " the dataset to this directory.",
):
DeepGlobeLandCover(str(tmp_path))

def test_plot(self, dataset: DeepGlobeLandCover) -> None:
x = dataset[0].copy()
dataset.plot(x, suptitle="Test")
plt.close()
dataset.plot(x, show_titles=False)
plt.close()
x["prediction"] = x["mask"].clone()
dataset.plot(x)
plt.close()
2 changes: 2 additions & 0 deletions torchgeo/datamodules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .chesapeake import ChesapeakeCVPRDataModule
from .cowc import COWCCountingDataModule
from .cyclone import CycloneDataModule
from .deepglobelandcover import DeepGlobeLandCoverDataModule
from .etci2021 import ETCI2021DataModule
from .eurosat import EuroSATDataModule
from .fair1m import FAIR1MDataModule
Expand All @@ -32,6 +33,7 @@
# VisionDataset
"BigEarthNetDataModule",
"COWCCountingDataModule",
"DeepGlobeLandCoverDataModule",
"ETCI2021DataModule",
"EuroSATDataModule",
"FAIR1MDataModule",
Expand Down
Loading