-
Notifications
You must be signed in to change notification settings - Fork 370
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding Esri 2020 Land Cover/Land Use Dataset (#390)
* Adding Esri 2020 dataset * long url string * requested changes * Add period at end of sentence Co-authored-by: Adam J. Stewart <[email protected]>
- Loading branch information
1 parent
34e4dfb
commit fa6e4e1
Showing
6 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,11 @@ EnviroAtlas | |
|
||
.. autoclass:: EnviroAtlas | ||
|
||
Esri2020 | ||
^^^^^^^^ | ||
|
||
.. autoclass:: Esri2020 | ||
|
||
Landsat | ||
^^^^^^^ | ||
|
||
|
Binary file added
BIN
+4.84 KB
tests/data/esri2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip
Binary file not shown.
Binary file added
BIN
+4.35 KB
...2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01/00A_20200101-20210101.tif
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import os | ||
import shutil | ||
from pathlib import Path | ||
from typing import Generator | ||
|
||
import pytest | ||
import torch | ||
import torch.nn as nn | ||
from _pytest.monkeypatch import MonkeyPatch | ||
from rasterio.crs import CRS | ||
|
||
import torchgeo.datasets.utils | ||
from torchgeo.datasets import BoundingBox, Esri2020, IntersectionDataset, UnionDataset | ||
|
||
|
||
def download_url(url: str, root: str, *args: str, **kwargs: str) -> None: | ||
shutil.copy(url, root) | ||
|
||
|
||
class TestEsri2020: | ||
@pytest.fixture | ||
def dataset( | ||
self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path | ||
) -> Esri2020: | ||
monkeypatch.setattr( # type: ignore[attr-defined] | ||
torchgeo.datasets.esri2020, "download_url", download_url | ||
) | ||
zipfile = "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" | ||
monkeypatch.setattr(Esri2020, "zipfile", zipfile) # type: ignore[attr-defined] | ||
|
||
md5 = "4932855fcd00735a34b74b1f87db3df0" | ||
monkeypatch.setattr(Esri2020, "md5", md5) # type: ignore[attr-defined] | ||
url = os.path.join( | ||
"tests", | ||
"data", | ||
"esri2020", | ||
"io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip", | ||
) | ||
monkeypatch.setattr(Esri2020, "url", url) # type: ignore[attr-defined] | ||
root = str(tmp_path) | ||
transforms = nn.Identity() # type: ignore[attr-defined] | ||
return Esri2020(root, transforms=transforms, download=True, checksum=True) | ||
|
||
def test_already_downloaded(self, tmp_path: Path) -> None: | ||
url = os.path.join( | ||
"tests", | ||
"data", | ||
"esri2020", | ||
"io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip", | ||
) | ||
root = str(tmp_path) | ||
shutil.copy(url, root) | ||
Esri2020(root) | ||
|
||
def test_getitem(self, dataset: Esri2020) -> None: | ||
x = dataset[dataset.bounds] | ||
assert isinstance(x, dict) | ||
assert isinstance(x["crs"], CRS) | ||
assert isinstance(x["mask"], torch.Tensor) | ||
|
||
def test_already_extracted(self, dataset: Esri2020) -> None: | ||
Esri2020(root=dataset.root, download=True) | ||
|
||
def test_not_downloaded(self, tmp_path: Path) -> None: | ||
with pytest.raises(RuntimeError, match="Dataset not found"): | ||
Esri2020(str(tmp_path), checksum=True) | ||
|
||
def test_and(self, dataset: Esri2020) -> None: | ||
ds = dataset & dataset | ||
assert isinstance(ds, IntersectionDataset) | ||
|
||
def test_or(self, dataset: Esri2020) -> None: | ||
ds = dataset | dataset | ||
assert isinstance(ds, UnionDataset) | ||
|
||
def test_plot(self, dataset: Esri2020) -> None: | ||
query = dataset.bounds | ||
x = dataset[query] | ||
dataset.plot(x["mask"]) | ||
|
||
def test_url(self) -> None: | ||
ds = Esri2020(os.path.join("tests", "data", "esri2020")) | ||
assert "ai4edataeuwest.blob.core.windows.net" in ds.url | ||
|
||
def test_invalid_query(self, dataset: Esri2020) -> None: | ||
query = BoundingBox(0, 0, 0, 0, 0, 0) | ||
with pytest.raises( | ||
IndexError, match="query: .* not found in index with bounds:" | ||
): | ||
dataset[query] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
"""Esri 2020 Land Cover Dataset.""" | ||
|
||
import abc | ||
import glob | ||
import os | ||
from typing import Any, Callable, Dict, Optional | ||
|
||
from rasterio.crs import CRS | ||
|
||
from .geo import RasterDataset | ||
from .utils import download_url, extract_archive | ||
|
||
|
||
class Esri2020(RasterDataset, abc.ABC): | ||
"""Esri 2020 Land Cover Dataset. | ||
The `Esri 2020 Land Cover dataset | ||
<https://www.arcgis.com/home/item.html?id=fc92d38533d440078f17678ebc20e8e2>`_ | ||
consists of a global single band land use/land cover map derived from ESA | ||
Sentinel-2 imagery at 10m resolution with a total of 10 classes. | ||
It was published in July 2021 and used the Universal Transverse Mercator (UTM) | ||
projection. This dataset only contains labels, no raw satellite imagery. | ||
The 10 classes are: | ||
0. No Data | ||
1. Water | ||
2. Trees | ||
3. Grass | ||
4. Flooded Vegetation | ||
5. Crops | ||
6. Scrub/Shrub | ||
7. Built Area | ||
8. Bare Ground | ||
9. Snow/Ice | ||
10. Clouds | ||
A more detailed explanation of the invidual classes can be found | ||
`here <https://www.arcgis.com/home/item.html?id=fc92d38533d440078f17678ebc20e8e2>`_. | ||
If you use this dataset please cite the following paper: | ||
* https://ieeexplore.ieee.org/document/9553499 | ||
.. versionadded:: 0.3 | ||
""" | ||
|
||
is_image = False | ||
filename_glob = "*_20200101-20210101.*" | ||
filename_regex = r"""^ | ||
(?P<id>[0-9][0-9][A-Z]) | ||
_(?P<date>\d{8}) | ||
-(?P<processing_date>\d{8}) | ||
""" | ||
|
||
zipfile = "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" | ||
md5 = "4932855fcd00735a34b74b1f87db3df0" | ||
|
||
url = ( | ||
"https://ai4edataeuwest.blob.core.windows.net/io-lulc/" | ||
"io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" | ||
) | ||
|
||
def __init__( | ||
self, | ||
root: str = "data", | ||
crs: Optional[CRS] = None, | ||
res: Optional[float] = None, | ||
transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, | ||
cache: bool = True, | ||
download: bool = False, | ||
checksum: bool = False, | ||
) -> None: | ||
"""Initialize a new Dataset instance. | ||
Args: | ||
root: root directory where dataset can be found | ||
crs: :term:`coordinate reference system (CRS)` to warp to | ||
(defaults to the CRS of the first file found) | ||
res: resolution of the dataset in units of CRS | ||
(defaults to the resolution of the first file found) | ||
transforms: a function/transform that takes an input sample | ||
and returns a transformed version | ||
cache: if True, cache file handle to speed up repeated sampling | ||
download: if True, download dataset and store it in the root directory | ||
checksum: if True, check the MD5 of the downloaded files (may be slow) | ||
Raises: | ||
FileNotFoundError: if no files are found in ``root`` | ||
RuntimeError: if ``download=False`` but dataset is missing or checksum fails | ||
""" | ||
self.root = root | ||
self.download = download | ||
self.checksum = checksum | ||
|
||
self._verify() | ||
|
||
super().__init__(root, crs, res, transforms, cache) | ||
|
||
def _verify(self) -> None: | ||
"""Verify the integrity of the dataset. | ||
Raises: | ||
RuntimeError: if ``download=False`` but dataset is missing or checksum fails | ||
""" | ||
# Check if the extracted file already exists | ||
pathname = os.path.join(self.root, "**", self.filename_glob) | ||
if glob.glob(pathname): | ||
return | ||
|
||
# Check if the zip files have already been downloaded | ||
pathname = os.path.join(self.root, self.zipfile) | ||
if glob.glob(pathname): | ||
self._extract() | ||
return | ||
|
||
# Check if the user requested to download the dataset | ||
if not self.download: | ||
raise RuntimeError( | ||
f"Dataset not found in `root={self.root}` and `download=False`, " | ||
"either specify a different `root` directory or use `download=True` " | ||
"to automaticaly download the dataset." | ||
) | ||
|
||
# Download the dataset | ||
self._download() | ||
self._extract() | ||
|
||
def _download(self) -> None: | ||
"""Download the dataset.""" | ||
download_url(self.url, self.root, filename=self.zipfile, md5=self.md5) | ||
|
||
def _extract(self) -> None: | ||
"""Extract the dataset.""" | ||
extract_archive(os.path.join(self.root, self.zipfile)) |