From fa6e4e11b58a454ce7fdbf93edb77eef7f08747e Mon Sep 17 00:00:00 2001 From: Nils Lehmann <35272119+nilsleh@users.noreply.github.com> Date: Wed, 9 Feb 2022 21:34:22 +0100 Subject: [PATCH] Adding Esri 2020 Land Cover/Land Use Dataset (#390) * Adding Esri 2020 dataset * long url string * requested changes * Add period at end of sentence Co-authored-by: Adam J. Stewart --- docs/api/datasets.rst | 5 + ...1-composite-v03-supercell-v02-clip-v01.zip | Bin 0 -> 4958 bytes .../00A_20200101-20210101.tif | Bin 0 -> 4455 bytes tests/datasets/test_esri2020.py | 93 ++++++++++++ torchgeo/datasets/__init__.py | 2 + torchgeo/datasets/esri2020.py | 138 ++++++++++++++++++ 6 files changed, 238 insertions(+) create mode 100644 tests/data/esri2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip create mode 100644 tests/data/esri2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01/00A_20200101-20210101.tif create mode 100644 tests/datasets/test_esri2020.py create mode 100644 torchgeo/datasets/esri2020.py diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index a96bb4f389f..4c61a9d7067 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -42,6 +42,11 @@ EnviroAtlas .. autoclass:: EnviroAtlas +Esri2020 +^^^^^^^^ + +.. autoclass:: Esri2020 + Landsat ^^^^^^^ diff --git a/tests/data/esri2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip b/tests/data/esri2020/io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip new file mode 100644 index 0000000000000000000000000000000000000000..217161b7a152ccb3511a781dbceaee9561095389 GIT binary patch literal 4958 zcmcJTS2P^%n}COC3BoABL=RCXdN6`uj54AVEqX5@dWlYkC}H$2q9lkeh&ozyA$o7o zW}=KvhE0CE|2=!Ld$W7?{m$FY`ObNt>qirQi+~33H}cIDb^jUuqeuZ*0S>NUXD??f zu#2m;jWbwKPzdb(C#_sv+*~~zJZ=7LB47_MHyd{=8)xS~n=shQ*}?7aCjmWOascsy z;kCuupZwdsd`SU#1lzX&fPaeSKZi*GB!HPn=zkuz0|EfL|F^?}f^ufUg2I1b|DgUE z{xm_MN1hJ0{}9=$;w=8=e?#OI@1;rOIFInzdE~(1%|n*}38{mNt0E%!$C<{ z8>*N5G2sO>Ne|DR!-%?HCx0F=x~UFcC12F zSDz;0L0{Kmyh1nYMtc`!$8K39?^`M6?~V$N`kelO#1VUxivJBWIFt$Zx@OsUO+c}!; zpSQPY9g4HdnpL*mf5abKr^ai#RN=O)Qr$^N8lGow}fk~<>kGRLy-nDwGF#)C$ z{|hH!A36dwqs>wBbSUC8CqbPvYP9>sBPfo(r#aDmqgATFs2;q{S9$TN z?(3F>1ETWP7TBq=nR*^kBFF4I8PH|7OKsW|@72LeivHk2-P|;78j21Ab2k-L65}42 zLVTMk#SUGi9lLucZ4Cp8#SXDSu#8`$P!H0ylFG=xQ@28iX3I}lQ2PLU*aBi1t3-JawzpGy2fDZQNTpj1qKAiF?vzL8flX36%;Hjcl} z2R5A7_bp3e>3ccyiV5LwPGx_Lxf2q!85cqHkjK^TNW=DG`OVsfG!i#%`E&;<%4*)f z`S|`P8$69+Hc?fCe`dhER@b-9{bc;73TE$4Iujng*;slcLCq&p`)W3(KvT@z1SsxVP($L#jcpgyRvP(@0)9Wg<>cue=$@tppS8vfiDMW+w&0oWhTp;3*)HWK>Yx~;NBk=Gn5gcZdiXBjPj4Mf? z8Fklac@Dh5H~Pr=8p7};T(dJk@*-bvG>4qh_L*f=*yZ`|B8j?uMTCQ@6LHktn5?ez7SiU^D(@nTI~y#LdVsLnk_EXk7#@YAt< z8~!$(&2`|7mak&zz1gF@u3YiT@x2GPjK37b8Z!$kG!$9xxf_YHn7V)FTxQO=3r5Su z(F-`X%&iAv*o{EviPZJ?EyBlQ%)h*R=uEDaQ;%XA0PlA)G)!HhsV-AJx>&s9guG;K zQxa0>aU4XXNM>udfUZ16m>p?FAGkoqdXu&fyL6vWt^;@XjbZvX1=VeDS4#=OX)~>3 z%{iu3j%B2(;d2opg&>E4XQwF~1{saTVnQua``r&kycBpWfZ!1&_K)hiE}NOs22P3BAvJ#eifj4d z;19#U`kmLDQW-7=Lg8Y2^a{an$tPX5)~xzfozIVx=$~^yGg&o;VvW%FqT*p%$ zdSgMnjmk_*h4(IZ7~>{|=TN{X!VkWMw`@RpP85gR8qoxV{9+It{88fXuPjedr77zV>-y!)dG%8q$zp7o+wy9nQhKUItHs7U*i% z2Luvkm6 zo@A*8r4*9}HWMjw#fq?*&JeFUzQWCRHopS_*DiV4|Abtk#}^)dC<{(AK#l_E4x*0squ7^nOEhPIcr zk*xu;+WNQ2=Zll*@|}E*(|2Y`NulL51UlK=k9%b_TUfU=DjE>2=tJLF8f`1S>~|pg zp-67sitD0}In6v~*9mUjXhZJ0XlQ@6$LHSzKY(T?@-4k%(gqv+6DjJU%)c>4rlX0d zS)(m3VAlgsXl312P6A9L;7-65I$o z$S^tOo1acF_U1l26lA{_*bHh%C!j@kuW`D1TQ)c*e%#gt=tGZFl%{WkiABKJCwb%2 z?782HDngAI_=ax`ynXKZ6LRmbKf}TV-|D-!NM!fmlGqGmr|YuR*vO>pzWE5Qgffpw;2yly7~(RhLW0j&C8JRmo9eP4;XTwE9!}wC8MzVd>*zc;n;eIN?c#Cac&L zEWe11G~`}YcVJ-}Z?Hp#Y3Ha)hJKI^C9+f#)#$|h#e*FgjCRl3%P%}`^8%f8#@O~B&LbsnFZZC#ZD-*`+U znWTWmY-bvR`^UG(&)=9(YXqHZnt=3#7ekkFm0+dFmI)dlX!}qA+4%9m;DOKZ{ZF-R z-`xc7PpWHO=@$}{3h35jG&T+{4NY=R*&UL!MGeR8C4bvO&OEPP5mh~=f*xhSIwyFW zPk@V2m&4kUT9AHV;)$wcO|k_BOuj^qimVQCHWfQTd?Aq%XApm|ys`5DRkx5?UTe9! zvEib3<~?^bB_jUf+H_-x206JCNVzy@dnfmm<9o}UPj=NJ#osV@xE84(Rf-PB*USjr z)-iEPCXJJ)H?y?|i{XUHr=AT%))fnL)<-RBB6}XYDragD8hKzcMxK zo_7v%HNbXj8kZ?oV6*977OgKS@hH_lqr7mBVI8Yx#!-FBFQdQX`Rh47*lZfHk~;#v zwn2`I42JKGNJD2cBt}>42(4~~Wsy6h!R3G;Y9)0Y~PdB<$GphUuqc~UZFH)N1C8X^iaI5hyd(h1u=EjojYe(k<{%*Woz&d7Ae*~4$@ zHb@eCkwxZP-L9lX`$NytXDVcvW>q9UoI$Xaw|RSzZHM{zdrz<0URQ#P(y1SxDGB2- zNF4*_PpL2IQnJn2Nw_gDes6wXmMc0RM(6sL`6WJy1IP3THecq&Sz777wyW8K)yT!u5B`Z(BU~N*G_3+(Az9Sx7wN0SH3ZLLKOOD` z+4uUZu1a@Z6%z^g2Abc_Zd;x)lk#^4(*QP%T+`2GvMGm$CXf91m$G}SALc2fk~J~& z4$>N`ND4se#@uI-e2o3T&d|=eFAr*nTuenFMZE@tc5Hs=qI8e^u<@_UuC(D+AD#tGuG*9GM{oPHw9*jkRua0!N8O z0y7)4vPj#$%i)d1$`ICKY1A-{Q}R9=E9VrDE#jjsrzH+H`S(wZhlEYcnoyOWi%=gz z7L4<_BPnEp*)H{6TcUkl>tNVx!g;2r$NMTyE$Za2X*Jr=0!T5W9r2>EIcp)muJ&^s z+Gh>^M*Qwn{g*JTKA142;-oMK%tdn#i*-RC$7;}q>hOO0eYPL$aI$&P6nzZFD zo^+bHT0Yg1N3UNuCR`Hru;emu5fKsgp#epD*GBjAcYUAq7arHnCYzc1;A`$?+G({d z*55XbMFOiT=%7uZ_IvWp%f)vjae-A)oXQ`Qec+$2e%;bs)$O7|S2TyG-xiw(u;6z5 zL_eRsuss{A;Ttw@dU@^8nm?f9@{o;!O1X-iVx!UiTkOys8G>;pn^DUkLd7O%&Z-hNMOrQL5 z+ooXMzBiRGe;X*8FQJ*+?x<`E95J$}=zE?*6(IT&Gv4pU5|b>NLMmt?OW&`ytSqrn zPh=c2D8Zt3jk!ge@Nk*SkIw^Oi^-zDb_ zA73UBHHm)hIwfen#gMNWYkAi3G6}zc+yJtBMny})bko_O7kyqtaCXOV?45biOm&X@ z$8R&ENpk^9>tfo=6ieChMli>W6szNu2GD^eHt&86FJ zCh)LuKx-|I_$-L7RcqMxMt%TXkY}mQLULooDG8|yNV{~SbV$e|-674=ElVTa!b(U;ED}qnbgMKjAibm@ z&WHEC?>l$qd!9M-|IIo7bLKgZx;ig_0001x0{}p50Pvn5_W}GDWBtQ_T-g8cA0OVo zm=cKdcdz|@f#zR%+JE@ZOuBzO015!$;lDik|6;O85&-tSxA#6GqDTOE_xj~N>SD10 zZ1+lZKffUs3&4EO8h;+~zrz9f5B^vGSNMNGuaPGZtU)WE?Pbj%K`kc#Ujy*(4e$W~ zocsO%+$P{og@ut8NyW?C6>d8zme^o{FG|U5P8J%wV`sSYpx6wpE!G@o;wXu+y`Kjy zkU7GuqVtF@&2)J*J{Xtn`FD@;@L?|6a;@UHA_?#3&8WO#pZy8XnAJrKqB_2FrJQIk ze#I3FreY>UZskwL;~I-&>C;j3DPhx&D}y0|Vt@Z_Ni?jg<0*|AW;tQy4IhPO_Z&O* z@HO_lMGa?nV*Btk$Q`G8Z}?1Y!ZX*Q4p?Pwb+m14JNL#jVBNO`DZBLTpnt?u8)uhC zRqT6@Dxp!eY1W=eO}w11VP^Xak=z7PFTiYS!k(dLU&tq0aMCAe-eUFY2GARpQ*4z5!}#XN z8ggpWnB<~-^J{MRtY3{*b2DTc*a%kG1Y2Ey;#62y9PmXy+`a75slAQ;hUH?!O!{DjrwLOp9_-pQ5AB0mUID%+yeeMl zL-uB!i>QU8hQ zpe+0qv`~xxd@^hYu88w`@=>d<8w()JipT&7@%GWJ#mWDw1B;-@!OOOXVeOV4+MAAZ zv3uHhJfVL>ea5gn@kz2!oH;6;5iMEG*DG${t5MHSVgKP$;)5E9yg(B4-o+^TTwiI7d4*k z@!qx(Ee(8#iQ0{@!RSRWOE&|ihM>+${|@Pzcj1IxYxQjjb<4it2fap{IGy;5fQKp) zxWdZpPsu{ZKHrRoSO(5WnM|~CCA$AK5eq=OhxqxFk<`CxtgoWGYC*{+c5n1Y(Yn`7DDF7naNQX9Ac+|H$=8h2Z^%enG?l7;uFw%RGnGy0Z(h)X>%MVz zxjC_sI@>VpdDeo zLYpP;ak;Orc(hps^f%35vcGH`7Fv2$s>iACu&Nmc>$UmK`ThK_30>UJr`J{ z3~y2m`FxJTXd3Qd@yCLZTv7qjjW4x^xb&uk0|N(e1CI>r2Cr`GcPOmc0Lu^BTW{jR zC`b$!Ro?f=?C%RGFUOphF_7}3fgv`NTaO%}r|&7`b-BNd5ObbISaI3 zBMFy>KO=;!-MDe+;zQN5A@nJMN@=_Sv74E8XGBRL_~LDn*$~GUd-v&~ige#Kwi%Eq z{Zh92po&QxjuZKj7=>k@kZ37<{Xvr8X0HdaqQ*n@JoXdnFX<&?bP2dRCZ;mAE4Q-! zOoW)e$SfBIfz`wAsaN_4e`@-3a&n}Lc?@#e^CfAuBG0pHwQ$xoD#=Ia7cp>LUo!Z6 zc#Xjh4gc|}Yo!nXMQ_S`qNV}l+wJ37*Yrc_IJJ;w3r{+mi6<)Is!0(=B9&9MdVK^K zpnW>N9Px#IS-HQ8$dA{-T0~(z-5;$ms6|;HY%hR<>1)R>-M)MT_Sq+28aC|GlVWs4 zASEpUt2$XHlszF{$Q$lre*M&lVMXE*G)+-J?@r}w56iQ#*9TQeB*mz!>Dhyy%nji* zr2(S+;u1@+fsc`=(K8USA@2qHUCXpDWNa6!tKTyF<`-{bxiA*jkyI3G1In9Czy zx%!uaR_>-;`D6SM&le$(CL%?L&b9I@4+mT^aj_Z)w;IQOf5!%=6YSuQ9DBWoqaiNjjlKN*ridrrGb7eX;}w!8-;_6me-%27*) zRWY@a!j#)}g6ny~!aS^|^OdCJ^dU)IBi!7_KDb^x!yDD63D;&SU#B`FcZ}d19%V+S zN5}jY*iUp*18OH$+#sKj%yqnJP&X%aOT|CgtWT zTlo4IY4X(i%7A0y*-vk>OT$WrJJ7rH$Ve5U*BZit13i94;ujU}YosoDBXw36>h`9i zS}CgEi*%vkg(?>PiEN&~imTIJC2f+6Buf}Bhu=)+o^C(Q0M8%n^*bmW*MW_a)K?SXt(Tk+1SlCZ#F> z{mAV6wqG+yu*_I)IngQ$!^Ti9la$oGpkJQpizL*V#lvmyw;qPebowxbHJZ=3dt=KUKQ|R>N=?4$lV^MS$#3i%nxVfgFRnko zDDjFfL3o#vomtP+ad(Rw)*R5-UyJnluPXGl09`v&MV^$kotDsvts`9+6OMu=@(G0v z!`7)J*3R-E-dzuus_#;7Up-*V`ADU5kz>Zo&(09NFS0tD6XWre?kA!ITUz;f7H&w! zdP?B~gNG*iueiOT3G6=KS>HIgZ!Hu6WLs|-ct4&H%*@sEx-Z_G#xWjuftC@dmsMmpU$*lod7JdN~8sHPIL+bf}Fq}4BQaC=v0AO>X1(XNt zfU>q=^FwZZpU3p+Fno|_Ajs%IWTW|GSJ)Yb5xlBe*E98f)V)$h@kY5F1gH6mc9m|s^ z>GE1{Y!c92kV)vkpooiYVucg(rB7XFk<5YH5yS-|!q#TMWLK&`OT3mR!&z%is38Q` zz~0`5eq$vC9tGsy zdPm8B^!^Lpw|e76xZ{&z8<8DEzP^@5a(f`@WBEdK6#;{nyQLO zy5;j?;gd+_w-UFiDadrqXYDLGA<*F0?^>s4ysPjWQ;Fq%{P;_=H_s17P*t;>Jh!`< zw2ug9w&MOwx1xoTK7}_q*6-^)#upg94O*}-qiBV+xygGEe%f~DMBirw!XfeNP@rkHo#>_v?!Sq6A%PtT*N zz3wu*kCt7i2FrEwPwNGgpKCD)x)bpPDRA)|7wwpmSOQ5f=dg6qAfKI@maPKrRw=xA z$#{Fmo2Ip>xZgxASW1vjvt)AGLKACb{NeqA6|}N$e+~aSWlg9*DGQGfXMli!}Wp?jI@pTL!N8U z1(>wc!Q`(#PyuoBL!x)m2VnZ{$;t3-g4r0q5NERpnL}VFsayUzY}I)p%M*ocOLpE None: + shutil.copy(url, root) + + +class TestEsri2020: + @pytest.fixture + def dataset( + self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path + ) -> Esri2020: + monkeypatch.setattr( # type: ignore[attr-defined] + torchgeo.datasets.esri2020, "download_url", download_url + ) + zipfile = "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" + monkeypatch.setattr(Esri2020, "zipfile", zipfile) # type: ignore[attr-defined] + + md5 = "4932855fcd00735a34b74b1f87db3df0" + monkeypatch.setattr(Esri2020, "md5", md5) # type: ignore[attr-defined] + url = os.path.join( + "tests", + "data", + "esri2020", + "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip", + ) + monkeypatch.setattr(Esri2020, "url", url) # type: ignore[attr-defined] + root = str(tmp_path) + transforms = nn.Identity() # type: ignore[attr-defined] + return Esri2020(root, transforms=transforms, download=True, checksum=True) + + def test_already_downloaded(self, tmp_path: Path) -> None: + url = os.path.join( + "tests", + "data", + "esri2020", + "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip", + ) + root = str(tmp_path) + shutil.copy(url, root) + Esri2020(root) + + def test_getitem(self, dataset: Esri2020) -> None: + x = dataset[dataset.bounds] + assert isinstance(x, dict) + assert isinstance(x["crs"], CRS) + assert isinstance(x["mask"], torch.Tensor) + + def test_already_extracted(self, dataset: Esri2020) -> None: + Esri2020(root=dataset.root, download=True) + + def test_not_downloaded(self, tmp_path: Path) -> None: + with pytest.raises(RuntimeError, match="Dataset not found"): + Esri2020(str(tmp_path), checksum=True) + + def test_and(self, dataset: Esri2020) -> None: + ds = dataset & dataset + assert isinstance(ds, IntersectionDataset) + + def test_or(self, dataset: Esri2020) -> None: + ds = dataset | dataset + assert isinstance(ds, UnionDataset) + + def test_plot(self, dataset: Esri2020) -> None: + query = dataset.bounds + x = dataset[query] + dataset.plot(x["mask"]) + + def test_url(self) -> None: + ds = Esri2020(os.path.join("tests", "data", "esri2020")) + assert "ai4edataeuwest.blob.core.windows.net" in ds.url + + def test_invalid_query(self, dataset: Esri2020) -> None: + query = BoundingBox(0, 0, 0, 0, 0, 0) + with pytest.raises( + IndexError, match="query: .* not found in index with bounds:" + ): + dataset[query] diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index 9250920c301..e99e857eda4 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -26,6 +26,7 @@ from .cyclone import TropicalCycloneWindEstimation from .dfc2022 import DFC2022 from .enviroatlas import EnviroAtlas +from .esri2020 import Esri2020 from .etci2021 import ETCI2021 from .eurosat import EuroSAT from .fair1m import FAIR1M @@ -96,6 +97,7 @@ "ChesapeakeVA", "ChesapeakeWV", "ChesapeakeCVPR", + "Esri2020", "Landsat", "Landsat1", "Landsat2", diff --git a/torchgeo/datasets/esri2020.py b/torchgeo/datasets/esri2020.py new file mode 100644 index 00000000000..4d8c651a6a0 --- /dev/null +++ b/torchgeo/datasets/esri2020.py @@ -0,0 +1,138 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Esri 2020 Land Cover Dataset.""" + +import abc +import glob +import os +from typing import Any, Callable, Dict, Optional + +from rasterio.crs import CRS + +from .geo import RasterDataset +from .utils import download_url, extract_archive + + +class Esri2020(RasterDataset, abc.ABC): + """Esri 2020 Land Cover Dataset. + + The `Esri 2020 Land Cover dataset + `_ + consists of a global single band land use/land cover map derived from ESA + Sentinel-2 imagery at 10m resolution with a total of 10 classes. + It was published in July 2021 and used the Universal Transverse Mercator (UTM) + projection. This dataset only contains labels, no raw satellite imagery. + + The 10 classes are: + + 0. No Data + 1. Water + 2. Trees + 3. Grass + 4. Flooded Vegetation + 5. Crops + 6. Scrub/Shrub + 7. Built Area + 8. Bare Ground + 9. Snow/Ice + 10. Clouds + + A more detailed explanation of the invidual classes can be found + `here `_. + + If you use this dataset please cite the following paper: + + * https://ieeexplore.ieee.org/document/9553499 + + .. versionadded:: 0.3 + """ + + is_image = False + filename_glob = "*_20200101-20210101.*" + filename_regex = r"""^ + (?P[0-9][0-9][A-Z]) + _(?P\d{8}) + -(?P\d{8}) + """ + + zipfile = "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" + md5 = "4932855fcd00735a34b74b1f87db3df0" + + url = ( + "https://ai4edataeuwest.blob.core.windows.net/io-lulc/" + "io-lulc-model-001-v01-composite-v03-supercell-v02-clip-v01.zip" + ) + + def __init__( + self, + root: str = "data", + crs: Optional[CRS] = None, + res: Optional[float] = None, + transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, + cache: bool = True, + download: bool = False, + checksum: bool = False, + ) -> None: + """Initialize a new Dataset instance. + + Args: + root: root directory where dataset can be found + crs: :term:`coordinate reference system (CRS)` to warp to + (defaults to the CRS of the first file found) + res: resolution of the dataset in units of CRS + (defaults to the resolution of the first file found) + transforms: a function/transform that takes an input sample + and returns a transformed version + cache: if True, cache file handle to speed up repeated sampling + download: if True, download dataset and store it in the root directory + checksum: if True, check the MD5 of the downloaded files (may be slow) + + Raises: + FileNotFoundError: if no files are found in ``root`` + RuntimeError: if ``download=False`` but dataset is missing or checksum fails + """ + self.root = root + self.download = download + self.checksum = checksum + + self._verify() + + super().__init__(root, crs, res, transforms, cache) + + def _verify(self) -> None: + """Verify the integrity of the dataset. + + Raises: + RuntimeError: if ``download=False`` but dataset is missing or checksum fails + """ + # Check if the extracted file already exists + pathname = os.path.join(self.root, "**", self.filename_glob) + if glob.glob(pathname): + return + + # Check if the zip files have already been downloaded + pathname = os.path.join(self.root, self.zipfile) + if glob.glob(pathname): + self._extract() + return + + # Check if the user requested to download the dataset + if not self.download: + raise RuntimeError( + f"Dataset not found in `root={self.root}` and `download=False`, " + "either specify a different `root` directory or use `download=True` " + "to automaticaly download the dataset." + ) + + # Download the dataset + self._download() + self._extract() + + def _download(self) -> None: + """Download the dataset.""" + download_url(self.url, self.root, filename=self.zipfile, md5=self.md5) + + def _extract(self) -> None: + """Extract the dataset.""" + extract_archive(os.path.join(self.root, self.zipfile))