From cb71d86c8c9b6b25fdca898ef54e6bc83e1b4288 Mon Sep 17 00:00:00 2001 From: jamesvrt Date: Fri, 15 Oct 2021 18:22:03 -0700 Subject: [PATCH 01/19] Initial commit --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- LICENSE | 2 +- README-template.md | 32 -- README.md | 55 ++- docker_env | 6 +- docs/installation_and_basic_usage.ipynb | 11 +- environment.yml | 2 +- examples/collection.json | 449 ++++++++++++++++++++ examples/items/item.json | 543 ++++++++++++++++++++++++ mypy.ini | 4 + scripts/lint | 2 +- setup.cfg | 14 +- src/stactools/ephemeral/__init__.py | 15 - src/stactools/ephemeral/commands.py | 54 --- src/stactools/ephemeral/stac.py | 112 ----- src/stactools/hwsd/__init__.py | 15 + src/stactools/hwsd/commands.py | 132 ++++++ src/stactools/hwsd/constants.py | 182 ++++++++ src/stactools/hwsd/stac.py | 181 ++++++++ tests/test_commands.py | 52 ++- tests/test_module.py | 4 +- tests/test_stac.py | 24 +- 22 files changed, 1610 insertions(+), 283 deletions(-) delete mode 100644 README-template.md create mode 100644 examples/collection.json create mode 100644 examples/items/item.json delete mode 100644 src/stactools/ephemeral/__init__.py delete mode 100644 src/stactools/ephemeral/commands.py delete mode 100644 src/stactools/ephemeral/stac.py create mode 100644 src/stactools/hwsd/__init__.py create mode 100644 src/stactools/hwsd/commands.py create mode 100644 src/stactools/hwsd/constants.py create mode 100644 src/stactools/hwsd/stac.py diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index a955eb7..9a4e033 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -15,7 +15,7 @@ Steps to reproduce the behavior: > Ex. > -> 1. Install stactools-ephemeral +> 1. Install stactools-hwsd > 2. Run `scripts/test` > 3. See error diff --git a/LICENSE b/LICENSE index f574b4a..ba213ec 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ This software is licensed under the Apache 2 license, quoted below. -Copyright 2021 COMPANY [COMPANY WEBPAGE URL] +Copyright 2021 Microsoft Corporation [https://microsoft.com/] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/README-template.md b/README-template.md deleted file mode 100644 index 1cdce27..0000000 --- a/README-template.md +++ /dev/null @@ -1,32 +0,0 @@ -[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/stactools-packages/ephemeral/main?filepath=docs/installation_and_basic_usage.ipynb) - -# stactools-ephemeral - -- Name: ephemeral -- Package: `stactools.ephemeral` -- PyPI: https://pypi.org/project/stactools-ephemeral/ -- Owner: @githubusername -- Dataset homepage: http://example.com -- STAC extensions used: - - [proj](https://github.com/stac-extensions/projection/) -- Extra fields: - - `ephemeral:custom`: A custom attribute - -A short description of the package and its usage. - -## Examples - -### STAC objects - -- [Collection](examples/collection.json) -- [Item](examples/item/item.json) - -### Command-line usage - -Description of the command line functions - -```bash -$ stac ephemeral create-item source destination -``` - -Use `stac ephemeral --help` to see all subcommands and options. diff --git a/README.md b/README.md index c5cb6bb..7c40947 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,37 @@ -# stactools-template - -This is a template repo used for creating new packages for `stactools`. - -## How to use - -1. Clone this template repository as your package name, e.g. `landsat`. - This name should be short, memorable, and a valid Python package name (i.e. it shouldn't start with a number, etc). - It can, however, include a hyphen, in which case the name for Python imports will be the underscored version, e.g. `landsat-8` goes to `stactools.landsat_8`. - Your name will be used on PyPI to publish the package in the stactools namespace, e.g. `stactools-landsat`. -2. Change into the top-level directory of your package and run `scripts/rename`. - This will update _most_ of the files in the repository with your new package name. -3. Update `setup.cfg` with your package description and such. -4. Update the LICENSE with your company's information (or whomever holds the copyright). -5. Run `sphinx-quickstart` in the `docs` directory to create the documentation template. -6. Update `docs/installation_and_basic_usage.ipynb` to provide an interactive notebook to help users get started. Include the following badge at the top of the README to launch the notebook: [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/stactools-packages/template/main?filepath=docs/installation_and_basic_usage.ipynb). Be sure to modify the badge href to match your package repo. -7. Add example Items (and Collections and Catalogs, if included) to a `examples/` directory. -8. Delete this file, and rename `README-template.md` to `README.md`. Update your new README to provide information about how to use your package. +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/stactools-packages/hwsd/main?filepath=docs/installation_and_basic_usage.ipynb) + +# stactools-hwsd + +- Name: hwsd +- Package: `stactools.hwsd` +- PyPI: https://pypi.org/project/stactools-hwsd/ +- Owner: @jamesvrt +- Dataset homepage: http://example.com +- STAC extensions used: + - [proj](https://github.com/stac-extensions/projection/) + - [scientific](https://github.com/stac-extensions/scientific/) + - [item-assets](https://github.com/stac-extensions/item-assets/) + - [raster](https://github.com/stac-extensions/raster/) + +A short description of the package and its usage. + +## Examples + +### STAC objects + +- [Collection](examples/collection.json) +- [Item](examples/item/item.json) + +### Command-line usage + +Description of the command line functions + +```bash +$ stac hwsd create-item -s source -d destination + +$ stac hwsd create-collection -d destination + +$ stac hwsd populate-collection -s source -d destination +``` + +Use `stac hwsd --help` to see all subcommands and options. diff --git a/docker_env b/docker_env index acc0a82..c72b151 100644 --- a/docker_env +++ b/docker_env @@ -1,7 +1,7 @@ DOCKER_REGISTRY=ghcr.io DOCKER_ORG=stactools-packages -DOCKER_REPO=ephemeral +DOCKER_REPO=hwsd DOCKER_TAG=local DOCKER_TAG_DEV=local-dev -DOCKER_WORKDIR=/opt/stactools-ephemeral -DOCKER_NAMESPACE_PACKAGE_DIR=stactools/ephemeral +DOCKER_WORKDIR=/opt/stactools-hwsd +DOCKER_NAMESPACE_PACKAGE_DIR=stactools/hwsd diff --git a/docs/installation_and_basic_usage.ipynb b/docs/installation_and_basic_usage.ipynb index 4fd0860..618a277 100644 --- a/docs/installation_and_basic_usage.ipynb +++ b/docs/installation_and_basic_usage.ipynb @@ -28,7 +28,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "!pip install stactools-ephemeral" + "!pip install stactools-hwsd" ], "outputs": [], "metadata": {} @@ -67,7 +67,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "!stac ephemeral --help" + "!stac hwsd --help" ], "outputs": [], "metadata": {} @@ -83,7 +83,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "!stac ephemeral create-item --help" + "!stac hwsd create-item --help" ], "outputs": [], "metadata": {} @@ -106,10 +106,9 @@ "cell_type": "code", "execution_count": null, "source": [ - "# Alter these with examples to match your package\n", - "from stactools.ephemeral import stac, cog\n", + "from stactools.hwsd import stac, cog\n", "\n", - "stac.create_item()\n", + "stac.create_item(\"path/to/data/assets/\")\n", "stac.create_collection()\n", "cog.create_cog()" ], diff --git a/environment.yml b/environment.yml index 5421dc8..e9ae866 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: stactools-ephemeral +name: stactools-hwsd channels: - conda-forge - defaults diff --git a/examples/collection.json b/examples/collection.json new file mode 100644 index 0000000..9d94271 --- /dev/null +++ b/examples/collection.json @@ -0,0 +1,449 @@ +{ + "type": "Collection", + "id": "hwsd", + "stac_version": "1.0.0", + "description": "This data set describes select global soil parameters from the Harmonized World Soil Database (HWSD) v1.2, including additional calculated parameters such as area weighted soil organic carbon (kg C per m2), as high resolution NetCDF files. These data were regridded and upscaled from the Harmonized World Soil Database v1.2.", + "links": [ + { + "rel": "root", + "href": "./collection.json", + "type": "application/json" + }, + { + "rel": "license", + "href": "https://earthdata.nasa.gov/earth-observation-data/data-use-policy", + "title": "EOSDIS Data Use Policy" + }, + { + "rel": "via", + "href": "https://www.fao.org/soils-portal/data-hub/soil-maps-and-databases/harmonized-world-soil-database-v12", + "title": "Homepage" + }, + { + "rel": "via", + "href": "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11", + "title": "Homepage, Alternate" + }, + { + "rel": "via", + "href": "https://daac.ornl.gov/SOILS/guides/HWSD.html", + "title": "Homepage, Regridded" + }, + { + "rel": "cite-as", + "href": "https://doi.org/10.3334/ORNLDAAC/1247" + }, + { + "rel": "self", + "href": "/home/jvrt/gdspark/202105_stactools/.vscode/hwsd/collection.json", + "type": "application/json" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" + ], + "sci:doi": "10.3334/ORNLDAAC/1247", + "sci:citation": "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 .", + "item_assets": { + "AWC_CLASS": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "AWC_CLASS", + "proj:epsg": 4326 + }, + "ISSOIL": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "ISSOIL", + "proj:epsg": 4326 + }, + "MU_GLOBAL": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "MU_GLOBAL", + "proj:epsg": 4326 + }, + "REF_DEPTH": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "REF_DEPTH", + "proj:epsg": 4326 + }, + "ROOTS": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "ROOTS", + "proj:epsg": 4326 + }, + "T_BULK_DEN": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_BULK_DEN", + "proj:epsg": 4326 + }, + "S_BULK_DEN": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_BULK_DEN", + "proj:epsg": 4326 + }, + "T_REF_BULK": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_REF_BULK", + "proj:epsg": 4326 + }, + "S_REF_BULK": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_REF_BULK", + "proj:epsg": 4326 + }, + "T_CEC_CLAY": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_CEC_CLAY", + "proj:epsg": 4326 + }, + "S_CEC_CLAY": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_CEC_CLAY", + "proj:epsg": 4326 + }, + "T_CLAY": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_CLAY", + "proj:epsg": 4326 + }, + "S_CLAY": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_CLAY", + "proj:epsg": 4326 + }, + "T_GRAVEL": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_GRAVEL", + "proj:epsg": 4326 + }, + "S_GRAVEL": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_GRAVEL", + "proj:epsg": 4326 + }, + "T_SAND": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_SAND", + "proj:epsg": 4326 + }, + "S_SAND": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_SAND", + "proj:epsg": 4326 + }, + "T_SILT": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_SILT", + "proj:epsg": 4326 + }, + "S_SILT": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_SILT", + "proj:epsg": 4326 + }, + "T_PH_H20": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_PH_H20", + "proj:epsg": 4326 + }, + "S_PH_H20": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_PH_H20", + "proj:epsg": 4326 + }, + "T_C": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_C", + "proj:epsg": 4326 + }, + "S_C": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_C", + "proj:epsg": 4326 + }, + "T_OC": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "T_OC", + "proj:epsg": 4326 + }, + "S_OC": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "S_OC", + "proj:epsg": 4326 + }, + "AWT_S_SOC": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "AWT_S_SOC", + "proj:epsg": 4326 + }, + "AWT_T_SOC": { + "types": [ + "image/tiff; application=geotiff; profile=cloud-optimized" + ], + "roles": [ + "data" + ], + "title": "AWT_T_SOC", + "proj:epsg": 4326 + }, + "Documentation": { + "types": [ + "application/pdf" + ], + "roles": [ + "metadata" + ], + "title": "Documentation" + } + }, + "title": "Harmonized World Soil Database", + "extent": { + "spatial": { + "bbox": [ + [ + -180.0, + 90.0, + 180.0, + -90.0 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2000-01-01T00:00:00Z", + "2000-12-31T23:59:59Z" + ] + ] + } + }, + "license": "various", + "keywords": [ + "HWSD", + "Soil", + "Soils", + "Harmonized World Soil Database", + "regridded" + ], + "providers": [ + { + "name": "FAO", + "roles": [ + "host", + "licensor", + "processor", + "producer" + ], + "url": "https://www.fao.org/" + }, + { + "name": "IIASA", + "roles": [ + "licensor", + "producer" + ], + "url": "https://iiasa.ac.at/" + }, + { + "name": "ISRIC", + "roles": [ + "licensor", + "producer" + ], + "url": "https://www.isric.org/" + }, + { + "name": "ISS-CAS", + "roles": [ + "licensor", + "producer" + ], + "url": "http://english.issas.cas.cn/" + }, + { + "name": "JRC", + "roles": [ + "licensor", + "producer" + ], + "url": "https://esdac.jrc.ec.europa.eu/" + }, + { + "name": "ORNL", + "roles": [ + "host", + "processor" + ], + "url": "https://www.ornl.gov/" + }, + { + "name": "NCAR", + "roles": [ + "producer", + "processor" + ], + "url": "https://ncar.ucar.edu/" + }, + { + "name": "Microsoft", + "roles": [ + "host", + "processor" + ], + "url": "https://planetarycomputer.microsoft.com" + } + ], + "summaries": { + "proj:epsg": [ + 4326 + ] + }, + "assets": { + "documentation": { + "href": "http://daac.ornl.gov/daacdata/global_soil/HWSD/comp/HWSD1.2_documentation.pdf", + "type": "application/pdf", + "title": "Documentation", + "roles": [ + "metadata" + ] + }, + "thumbnail": { + "href": "https://daac.ornl.gov/SOILS/guides/HWSD_Fig1.png", + "type": "image/png", + "title": "Thumbnail", + "roles": [ + "thumbnail" + ] + } + } +} \ No newline at end of file diff --git a/examples/items/item.json b/examples/items/item.json new file mode 100644 index 0000000..50e22fc --- /dev/null +++ b/examples/items/item.json @@ -0,0 +1,543 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "hwsd", + "properties": { + "title": "Harmonized World Soil Database", + "description": "This data set describes select global soil parameters from the Harmonized World Soil Database (HWSD) v1.2, including additional calculated parameters such as area weighted soil organic carbon (kg C per m2), as high resolution NetCDF files. These data were regridded and upscaled from the Harmonized World Soil Database v1.2.", + "start_datetime": "2000-01-01T00:00:00Z", + "end_datetime": "2000-12-31T23:59:59Z", + "sci:citation": "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 .", + "sci:doi": "10.3334/ORNLDAAC/1247", + "proj:epsg": 4326, + "datetime": "2000-01-01T00:00:00Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 180.0, + 90.0 + ], + [ + 180.0, + -90.0 + ], + [ + -180.0, + -90.0 + ], + [ + -180.0, + 90.0 + ], + [ + 180.0, + 90.0 + ] + ] + ] + }, + "links": [ + { + "rel": "via", + "href": "https://www.fao.org/soils-portal/data-hub/soil-maps-and-databases/harmonized-world-soil-database-v12", + "title": "Homepage" + }, + { + "rel": "via", + "href": "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11", + "title": "Homepage, Alternate" + }, + { + "rel": "via", + "href": "https://daac.ornl.gov/SOILS/guides/HWSD.html", + "title": "Homepage, Regridded" + }, + { + "rel": "cite-as", + "href": "https://doi.org/10.3334/ORNLDAAC/1247" + } + ], + "assets": { + "AWC_CLASS": { + "href": "path/to/files/1900.csv.gz/AWC_CLASS.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "AWC_CLASS", + "description": "Available water storage capacity ", + "units": "Coded values 1 through 7 ", + "notes": "1 = 150 mm water per m of the soil unit, 2 = 125 mm, 3 = 100 mm, 4 = 75 mm, 5 = 50 mm, 6 = 15 mm, 7 = 0 mm.", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "ISSOIL": { + "href": "path/to/files/1900.csv.gz/ISSOIL.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "ISSOIL", + "description": "Soil or non-soil units ", + "units": "0 or 1 ", + "notes": "ISSOIL indicates whether the soil mapping unit is a soil (1) or non-soil (0)", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "MU_GLOBAL": { + "href": "path/to/files/1900.csv.gz/MU_GLOBAL.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "MU_GLOBAL", + "description": "HWSD global mapping unit identifier ", + "units": "numerical ID ", + "notes": "MU_GLOBAL provides a link from the grid cell to the other attributes.The HWSD v1.2 attribute lookup table is available from the HWSD project (FAO 2012)", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "REF_DEPTH": { + "href": "path/to/files/1900.csv.gz/REF_DEPTH.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "REF_DEPTH", + "description": "Reference soil depth ", + "units": "cm ", + "notes": "Reference soil depth of all soil units are set at 100 cm, except for Rendzinas and Rankers of FAO-74 and Leptosols of FAO-90, where the reference soil depth is set at 30 cm, and for Lithosols of FAO-74 and Lithic Leptosols of FAO-90, where it is set at 10 cm.", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "ROOTS": { + "href": "path/to/files/1900.csv.gz/ROOTS.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "ROOTS", + "description": "Depth of obstacles to roots ", + "units": "Coded values 0 through 6 ", + "notes": "0 = no information, 1 = no obstacles to roots between 0 and 80 cm depth, 2 = obstacles to roots between 60 and 80 cm depth, 3 = obstacles between 40 and 60 cm, 4 = 20 and 40 cm, 5 = 0 and 80 cm, 6 = 0 and 20 cm.", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_BULK_DEN": { + "href": "path/to/files/1900.csv.gz/T_BULK_DEN.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_BULK_DEN", + "description": "Topsoil bulk density ", + "units": "kg dm-3 ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_BULK_DEN": { + "href": "path/to/files/1900.csv.gz/S_BULK_DEN.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_BULK_DEN", + "description": "Subsoil bulk density ", + "units": "kg dm-3 ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_REF_BULK": { + "href": "path/to/files/1900.csv.gz/T_REF_BULK.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_REF_BULK", + "description": "topsoil bulk density ", + "units": "kg dm-3 ", + "notes": "Reference bulk density values are calculated from equations developed by Saxton et al. (1986) that relate to the texture of the soil only. These estimates, although generally reliable, overestimate the bulk density in soils that have a high porosity (Andosols) or that are high in organic matter content (Histosols). The calculation procedures for reference bulk density can be found in Saxton et al (1986)", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_REF_BULK": { + "href": "path/to/files/1900.csv.gz/S_REF_BULK.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_REF_BULK", + "description": "Subsoil reference bulk density ", + "units": "kg dm-3", + "notes": null, + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_CEC_CLAY": { + "href": "path/to/files/1900.csv.gz/T_CEC_CLAY.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_CEC_CLAY", + "description": "Cation exchange capacity of the clay fraction in the topsoil ", + "units": "cmol per kg ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_CEC_CLAY": { + "href": "path/to/files/1900.csv.gz/S_CEC_CLAY.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_CEC_CLAY", + "description": "Cation exchange capacity of the clay fraction in the subsoil ", + "units": "cmol per kg ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_CLAY": { + "href": "path/to/files/1900.csv.gz/T_CLAY.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_CLAY", + "description": "Topsoil clay fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_CLAY": { + "href": "path/to/files/1900.csv.gz/S_CLAY.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_CLAY", + "description": "Subsoil clay fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_GRAVEL": { + "href": "path/to/files/1900.csv.gz/T_GRAVEL.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_GRAVEL", + "description": "Topsoil gravel content ", + "units": "% volume ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_GRAVEL": { + "href": "path/to/files/1900.csv.gz/S_GRAVEL.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_GRAVEL", + "description": "Subsoil gravel content ", + "units": "% volume ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_SAND": { + "href": "path/to/files/1900.csv.gz/T_SAND.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_SAND", + "description": "Topsoil sand fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_SAND": { + "href": "path/to/files/1900.csv.gz/S_SAND.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_SAND", + "description": "Subsoil sand fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_SILT": { + "href": "path/to/files/1900.csv.gz/T_SILT.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_SILT", + "description": "Topsoil silt fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_SILT": { + "href": "path/to/files/1900.csv.gz/S_SILT.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_SILT", + "description": "Subsoil silt fraction ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_PH_H20": { + "href": "path/to/files/1900.csv.gz/T_PH_H20.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_PH_H20", + "description": "Topsoil pH (in H2O) ", + "units": "-log(H+) ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_PH_H20": { + "href": "path/to/files/1900.csv.gz/S_PH_H20.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_PH_H20", + "description": "Subsoil pH (in water) ", + "units": "-log(H+) ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_C": { + "href": "path/to/files/1900.csv.gz/T_C.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_C", + "description": "Topsoil carbon content ", + "units": "kg C m-2 ", + "notes": "Topsoil and subsoil carbon content (T_C and S_C) are based on the carbon content of the dominant soil type in each regridded cell rather than a weighted average.", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_C": { + "href": "path/to/files/1900.csv.gz/S_C.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_C", + "description": "Dominant soil type subsoil carbon content ", + "units": "kg C m-2 ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "T_OC": { + "href": "path/to/files/1900.csv.gz/T_OC.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "T_OC", + "description": "Topsoil organic carbon ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "S_OC": { + "href": "path/to/files/1900.csv.gz/S_OC.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "S_OC", + "description": "Subsoil organic carbon ", + "units": "% weight ", + "notes": "", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "AWT_S_SOC": { + "href": "path/to/files/1900.csv.gz/AWT_S_SOC.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "AWT_S_SOC", + "description": "Area weighted subsoil carbon content ", + "units": "kg C m-2 ", + "notes": "AWT_S_SOC = (sum(SEQ(SHARE * S_OC)) * 7 * S_BULK_DENSITY)", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "AWT_T_SOC": { + "href": "path/to/files/1900.csv.gz/AWT_T_SOC.nc4", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "title": "AWT_T_SOC", + "description": "Area weighted topsoil carbon content ", + "units": "kg C m-2 ", + "notes": "AWT_T_SOC = (sum(SEQ(SHARE * T_OC)) * 3 * T_BULK_DENSITY) ", + "raster:bands": [ + { + "nodata": -1, + "sampling": "area" + } + ], + "roles": [ + "data" + ] + }, + "documentation": { + "href": "http://daac.ornl.gov/daacdata/global_soil/HWSD/comp/HWSD1.2_documentation.pdf", + "type": "application/pdf", + "title": "HWSD Documentation", + "roles": [ + "metadata" + ] + } + }, + "bbox": [ + -180.0, + 90.0, + 180.0, + -90.0 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.0.0/schema.json" + ] +} \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 8cfa412..a7943b3 100644 --- a/mypy.ini +++ b/mypy.ini @@ -2,3 +2,7 @@ mypy_path = src explicit_package_bases = True namespace_packages = True +exclude = examples/ + +[mypy-shapely.*] +ignore_missing_imports = True diff --git a/scripts/lint b/scripts/lint index 437f786..78fc643 100755 --- a/scripts/lint +++ b/scripts/lint @@ -13,7 +13,7 @@ Execute project linters. " } -EC_EXCLUDE="(__pycache__|.git|.coverage|coverage.xml|.*\.egg-info|.mypy_cache|.tif|.tiff|.npy|.ipynb)" +EC_EXCLUDE="(__pycache__|.git|.coverage|coverage.xml|.*\.egg-info|.mypy_cache|.tif|.tiff|.npy|.ipynb|examples/)" DIRS_TO_CHECK=("src" "tests" "scripts") diff --git a/setup.cfg b/setup.cfg index f55f83f..04e2c99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,15 +1,15 @@ [metadata] -name = stactools-ephemeral -version = attr: stactools.ephemeral.__version__ -description = PROVIDE DESCRIPTION HERE +name = stactools-hwsd +version = attr: stactools.hwsd.__version__ +description = Harmonized World Soils Database long_description = file: README.md long_description_content_type = text/markdown author = stac-utils -author_email = stac@radiant.earth -url = https://github.com/stactools-packages/ephemeral +author_email = jtownend@sparkgeo.com +url = https://github.com/stactools-packages/hwsd project_urls = - Documentation = https://stactools-ephemeral.readthedocs.io/en/latest/ - Issues = https://github.com/stactools-packages/ephemeral/issues + Documentation = https://stactools-hwsd.readthedocs.io/en/latest/ + Issues = https://github.com/stactools-packages/hwsd/issues keywords = stactools pystac diff --git a/src/stactools/ephemeral/__init__.py b/src/stactools/ephemeral/__init__.py deleted file mode 100644 index e3355df..0000000 --- a/src/stactools/ephemeral/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -import stactools.core - -from stactools.ephemeral.stac import create_collection, create_item - -__all__ = ['create_collection', 'create_item'] - -stactools.core.use_fsspec() - - -def register_plugin(registry): - from stactools.ephemeral import commands - registry.register_subcommand(commands.create_ephemeralcmd_command) - - -__version__ = "0.1.0" diff --git a/src/stactools/ephemeral/commands.py b/src/stactools/ephemeral/commands.py deleted file mode 100644 index 6d9a1d8..0000000 --- a/src/stactools/ephemeral/commands.py +++ /dev/null @@ -1,54 +0,0 @@ -import logging - -import click - -from stactools.ephemeral import stac - -logger = logging.getLogger(__name__) - - -def create_ephemeralcmd_command(cli): - """Creates the stactools-ephemeral command line utility.""" - @cli.group( - "ephemeralcmd", - short_help=("Commands for working with stactools-ephemeral"), - ) - def ephemeralcmd(): - pass - - @ephemeralcmd.command( - "create-collection", - short_help="Creates a STAC collection", - ) - @click.argument("destination") - def create_collection_command(destination: str): - """Creates a STAC Collection - - Args: - destination (str): An HREF for the Collection JSON - """ - collection = stac.create_collection() - - collection.set_self_href(destination) - - collection.save_object() - - return None - - @ephemeralcmd.command("create-item", short_help="Create a STAC item") - @click.argument("source") - @click.argument("destination") - def create_item_command(source: str, destination: str): - """Creates a STAC Item - - Args: - source (str): HREF of the Asset associated with the Item - destination (str): An HREF for the STAC Collection - """ - item = stac.create_item(source) - - item.save_object(dest_href=destination) - - return None - - return ephemeralcmd diff --git a/src/stactools/ephemeral/stac.py b/src/stactools/ephemeral/stac.py deleted file mode 100644 index d780791..0000000 --- a/src/stactools/ephemeral/stac.py +++ /dev/null @@ -1,112 +0,0 @@ -import logging -from datetime import datetime, timezone - -from pystac import (Asset, CatalogType, Collection, Extent, Item, MediaType, - Provider, ProviderRole, SpatialExtent, TemporalExtent) -from pystac.extensions.projection import ProjectionExtension - -logger = logging.getLogger(__name__) - - -def create_collection() -> Collection: - """Create a STAC Collection - - This function includes logic to extract all relevant metadata from - an asset describing the STAC collection and/or metadata coded into an - accompanying constants.py file. - - See `Collection`_. - - Returns: - Collection: STAC Collection object - """ - providers = [ - Provider( - name="The OS Community", - roles=[ - ProviderRole.PRODUCER, ProviderRole.PROCESSOR, - ProviderRole.HOST - ], - url="https://github.com/stac-utils/stactools", - ) - ] - - # Time must be in UTC - demo_time = datetime.now(tz=timezone.utc) - - extent = Extent( - SpatialExtent([[-180., 90., 180., -90.]]), - TemporalExtent([demo_time, None]), - ) - - collection = Collection( - id="my-collection-id", - title="A dummy STAC Collection", - description="Used for demonstration purposes", - license="CC-0", - providers=providers, - extent=extent, - catalog_type=CatalogType.RELATIVE_PUBLISHED, - ) - - return collection - - -def create_item(asset_href: str) -> Item: - """Create a STAC Item - - This function should include logic to extract all relevant metadata from an - asset, metadata asset, and/or a constants.py file. - - See `Item`_. - - Args: - asset_href (str): The HREF pointing to an asset associated with the item - - Returns: - Item: STAC Item object - """ - - properties = { - "title": "A dummy STAC Item", - "description": "Used for demonstration purposes", - } - - demo_geom = { - "type": - "Polygon", - "coordinates": [[[-180, -90], [180, -90], [180, 90], [-180, 90], - [-180, -90]]], - } - - # Time must be in UTC - demo_time = datetime.now(tz=timezone.utc) - - item = Item( - id="my-item-id", - properties=properties, - geometry=demo_geom, - bbox=[-180, 90, 180, -90], - datetime=demo_time, - stac_extensions=[], - ) - - # It is a good idea to include proj attributes to optimize for libs like stac-vrt - proj_attrs = ProjectionExtension.ext(item, add_if_missing=True) - proj_attrs.epsg = 4326 - proj_attrs.bbox = [-180, 90, 180, -90] - proj_attrs.shape = [1, 1] # Raster shape - proj_attrs.transform = [-180, 360, 0, 90, 0, 180] # Raster GeoTransform - - # Add an asset to the item (COG for example) - item.add_asset( - "image", - Asset( - href=asset_href, - media_type=MediaType.COG, - roles=["data"], - title="A dummy STAC Item COG", - ), - ) - - return item diff --git a/src/stactools/hwsd/__init__.py b/src/stactools/hwsd/__init__.py new file mode 100644 index 0000000..ce25e65 --- /dev/null +++ b/src/stactools/hwsd/__init__.py @@ -0,0 +1,15 @@ +import stactools.core + +from stactools.hwsd.stac import create_collection, create_item + +__all__ = ['create_collection', 'create_item'] + +stactools.core.use_fsspec() + + +def register_plugin(registry): + from stactools.hwsd import commands + registry.register_subcommand(commands.create_hwsd_command) + + +__version__ = "0.1.0" diff --git a/src/stactools/hwsd/commands.py b/src/stactools/hwsd/commands.py new file mode 100644 index 0000000..4e71707 --- /dev/null +++ b/src/stactools/hwsd/commands.py @@ -0,0 +1,132 @@ +import logging +import os + +import click +from stactools.core.utils.convert import cogify + +from stactools.hwsd import stac + +logger = logging.getLogger(__name__) + + +def create_hwsd_command(cli): + """Creates the stactools-hwsd command line utility.""" + @cli.group( + "hwsd", + short_help=("Commands for working with stactools-hwsd"), + ) + def hwsd(): + pass + + @hwsd.command( + "create-collection", + short_help="Creates a STAC collection", + ) + @click.option( + "-d", + "--destination", + required=True, + help="The output location for the STAC Collection.", + ) + def create_collection_command(destination: str): + """Creates a STAC Collection + + Args: + destination (str): The output folder for the Collection. + """ + collection = stac.create_collection() + collection.normalize_hrefs(destination) + collection.save(dest_href=destination) + collection.validate() + + return None + + @hwsd.command("create-item", short_help="Create a STAC item") + @click.option( + "-s", + "--source", + required=True, + help="The HREF for the location containing the data assets.", + ) + @click.option( + "-d", + "--destination", + required=True, + help="An HREF for the STAC Collection.", + ) + def create_item_command(source: str, destination: str): + """Creates a STAC Item + + Args: + source (str): HREF of the Assets associated with the Item + destination (str): An HREF for the STAC Collection + """ + item = stac.create_item(source) + item.save_object(dest_href=destination) + item.validate() + + return None + + @hwsd.command("populate-collection", + short_help="Populate the HWSD STAC Collection with all items" + ) + @click.option("-s", + "--source", + required=True, + help="The source directory for the Item data assets.") + @click.option( + "-d", + "--destination", + required=True, + help="The output directory for the populated STAC Collection.", + ) + def populate_collection_command(source: str, destination: str): + """Populate the HWSD STAC Collection with all items + + Args: + source (str): The source directory for the Item data assets + destination (str): An HREF for the STAC Collection + """ + + collection = stac.create_collection() + + item = stac.create_item(source) + collection.add_item(item) + + collection.normalize_hrefs(destination) + collection.save(dest_href=destination) + collection.validate() + + return None + + @hwsd.command( + "create-cog", + short_help="Transform Geotiff to Cloud-Optimized Geotiff.", + ) + @click.option("-d", + "--destination", + required=True, + help="The output directory for the COG") + @click.option("-s", + "--source", + required=True, + help="Path to an input GeoTiff") + def create_cog_command(destination: str, source: str) -> None: + """Generate a COG from a GeoTiff. The COG will be saved in the desination + with `_cog.tif` appended to the name. + + Args: + destination (str): Local directory to save output COGs + source (str): A GeoTIFF + """ + if not os.path.isdir(destination): + raise IOError(f'Destination folder "{destination}" not found') + + output_path = os.path.join(destination, + os.path.basename(source)[:-4] + "_cog.tif") + + args = ["-co", "OVERVIEWS=IGNORE_EXISTING"] + + cogify(source, output_path, args) + + return hwsd diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py new file mode 100644 index 0000000..806a174 --- /dev/null +++ b/src/stactools/hwsd/constants.py @@ -0,0 +1,182 @@ +# flake8: noqa + +from typing import Any, Dict + +from pyproj import CRS +from pystac import Link, Provider, ProviderRole + +ID = "hwsd" +EPSG = 4326 +HWSD_CRS = CRS.from_epsg(EPSG) +SPATIAL_EXTENT = [-180., 90., 180., -90.] +TEMPORAL_EXTENT = ["2000-01-01T00:00:00Z", "2000-12-31T23:59:59Z"] +TITLE = "Harmonized World Soil Database" +DESCRIPTION = "This data set describes select global soil parameters from the Harmonized World Soil Database (HWSD) v1.2, including additional calculated parameters such as area weighted soil organic carbon (kg C per m2), as high resolution NetCDF files. These data were regridded and upscaled from the Harmonized World Soil Database v1.2." + +HOMEPAGE_REGRIDDED = "https://daac.ornl.gov/SOILS/guides/HWSD.html" +HOMEPAGE_2 = "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11" +HOMEPAGE_1 = "https://www.fao.org/soils-portal/data-hub/soil-maps-and-databases/harmonized-world-soil-database-v12" +DOCUMENTATION = "http://daac.ornl.gov/daacdata/global_soil/HWSD/comp/HWSD1.2_documentation.pdf" + +LICENSE = "proprietary" +LICENSE_LINK = Link( + rel="license", + target="https://earthdata.nasa.gov/earth-observation-data/data-use-policy", + title="EOSDIS Data Use Policy", +) + +PROVIDERS = [ + Provider(name="FAO", + roles=[ + ProviderRole.HOST, + ProviderRole.LICENSOR, + ProviderRole.PROCESSOR, + ProviderRole.PRODUCER, + ], + url="https://www.fao.org/"), + Provider(name="IIASA", + roles=[ + ProviderRole.LICENSOR, + ProviderRole.PRODUCER, + ], + url="https://iiasa.ac.at/"), + Provider(name="ISRIC", + roles=[ + ProviderRole.LICENSOR, + ProviderRole.PRODUCER, + ], + url="https://www.isric.org/"), + Provider(name="ISS-CAS", + roles=[ + ProviderRole.LICENSOR, + ProviderRole.PRODUCER, + ], + url="http://english.issas.cas.cn/"), + Provider(name="JRC", + roles=[ + ProviderRole.LICENSOR, + ProviderRole.PRODUCER, + ], + url="https://esdac.jrc.ec.europa.eu/"), + Provider(name="ORNL", + roles=[ProviderRole.HOST, ProviderRole.PROCESSOR], + url="https://www.ornl.gov/"), + Provider(name="NCAR", + roles=[ProviderRole.PRODUCER, ProviderRole.PROCESSOR], + url="https://ncar.ucar.edu/"), + Provider(name="Microsoft", + roles=[ProviderRole.HOST, ProviderRole.PROCESSOR], + url="https://planetarycomputer.microsoft.com"), +] + +KEYWORDS = [ + "HWSD", "Soil", "Soils", "Harmonized World Soil Database", "regridded" +] + +CITATION = "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 ." +DOI = "10.3334/ORNLDAAC/1247" + +THUMBNAIL = "https://daac.ornl.gov/SOILS/guides/HWSD_Fig1.png" + +ASSETS_METADATA: Dict[str, Any] = { + "Description": { + "AWC_CLASS": "Available water storage capacity ", + "ISSOIL": "Soil or non-soil units ", + "MU_GLOBAL": "HWSD global mapping unit identifier ", + "REF_DEPTH": "Reference soil depth ", + "ROOTS": "Depth of obstacles to roots ", + "T_BULK_DEN": "Topsoil bulk density ", + "S_BULK_DEN": "Subsoil bulk density ", + "T_REF_BULK": "topsoil bulk density ", + "S_REF_BULK": "Subsoil reference bulk density ", + "T_CEC_CLAY": + "Cation exchange capacity of the clay fraction in the topsoil ", + "S_CEC_CLAY": + "Cation exchange capacity of the clay fraction in the subsoil ", + "T_CLAY": "Topsoil clay fraction ", + "S_CLAY": "Subsoil clay fraction ", + "T_GRAVEL": "Topsoil gravel content ", + "S_GRAVEL": "Subsoil gravel content ", + "T_SAND": "Topsoil sand fraction ", + "S_SAND": "Subsoil sand fraction ", + "T_SILT": "Topsoil silt fraction ", + "S_SILT": "Subsoil silt fraction ", + "T_PH_H20": "Topsoil pH (in H2O) ", + "S_PH_H20": "Subsoil pH (in water) ", + "T_C": "Topsoil carbon content ", + "S_C": "Dominant soil type subsoil carbon content ", + "T_OC": "Topsoil organic carbon ", + "S_OC": "Subsoil organic carbon ", + "AWT_S_SOC": "Area weighted subsoil carbon content ", + "AWT_T_SOC": "Area weighted topsoil carbon content " + }, + "Units": { + "AWC_CLASS": "Coded values 1 through 7 ", + "ISSOIL": "0 or 1 ", + "MU_GLOBAL": "numerical ID ", + "REF_DEPTH": "cm ", + "ROOTS": "Coded values 0 through 6 ", + "T_BULK_DEN": "kg dm-3 ", + "S_BULK_DEN": "kg dm-3 ", + "T_REF_BULK": "kg dm-3 ", + "S_REF_BULK": "kg dm-3", + "T_CEC_CLAY": "cmol per kg ", + "S_CEC_CLAY": "cmol per kg ", + "T_CLAY": "% weight ", + "S_CLAY": "% weight ", + "T_GRAVEL": "% volume ", + "S_GRAVEL": "% volume ", + "T_SAND": "% weight ", + "S_SAND": "% weight ", + "T_SILT": "% weight ", + "S_SILT": "% weight ", + "T_PH_H20": "-log(H+) ", + "S_PH_H20": "-log(H+) ", + "T_C": "kg C m-2 ", + "S_C": "kg C m-2 ", + "T_OC": "% weight ", + "S_OC": "% weight ", + "AWT_S_SOC": "kg C m-2 ", + "AWT_T_SOC": "kg C m-2 " + }, + "Notes": { + "AWC_CLASS": + "1 = 150 mm water per m of the soil unit, 2 = 125 mm, 3 = 100 mm, 4 = 75 mm, 5 = 50 mm, 6 = 15 mm, 7 = 0 mm.", + "ISSOIL": + "ISSOIL indicates whether the soil mapping unit is a soil (1) or non-soil (0)", + "MU_GLOBAL": + "MU_GLOBAL provides a link from the grid cell to the other attributes.The HWSD v1.2 attribute lookup table is available from the HWSD project (FAO 2012)", + "REF_DEPTH": + "Reference soil depth of all soil units are set at 100 cm, except for Rendzinas and Rankers of FAO-74 and Leptosols of FAO-90, where the reference soil depth is set at 30 cm, and for Lithosols of FAO-74 and Lithic Leptosols of FAO-90, where it is set at 10 cm.", + "ROOTS": + "0 = no information, 1 = no obstacles to roots between 0 and 80 cm depth, 2 = obstacles to roots between 60 and 80 cm depth, 3 = obstacles between 40 and 60 cm, 4 = 20 and 40 cm, 5 = 0 and 80 cm, 6 = 0 and 20 cm.", + "T_BULK_DEN": "", + "S_BULK_DEN": "", + "T_REF_BULK": + "Reference bulk density values are calculated from equations developed by Saxton et al. (1986) that relate to the texture of the soil only. These estimates, although generally reliable, overestimate the bulk density in soils that have a high porosity (Andosols) or that are high in organic matter content (Histosols). The calculation procedures for reference bulk density can be found in Saxton et al (1986)", + "S_REF_BULK": None, + "T_CEC_CLAY": "", + "S_CEC_CLAY": "", + "T_CLAY": "", + "S_CLAY": "", + "T_GRAVEL": "", + "S_GRAVEL": "", + "T_SAND": "", + "S_SAND": "", + "T_SILT": "", + "S_SILT": "", + "T_PH_H20": "", + "S_PH_H20": "", + "T_C": + "Topsoil and subsoil carbon content (T_C and S_C) are based on the carbon content of the dominant soil type in each regridded cell rather than a weighted average.", + "S_C": "", + "T_OC": "", + "S_OC": "", + "AWT_S_SOC": + "AWT_S_SOC = (sum(SEQ(SHARE * S_OC)) * 7 * S_BULK_DENSITY)", + "AWT_T_SOC": + "AWT_T_SOC = (sum(SEQ(SHARE * T_OC)) * 3 * T_BULK_DENSITY) " + } +} + +NODATA = -1 diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py new file mode 100644 index 0000000..250bcde --- /dev/null +++ b/src/stactools/hwsd/stac.py @@ -0,0 +1,181 @@ +import logging +import os +from typing import Any, List + +from pystac import (CatalogType, Collection, Extent, MediaType, SpatialExtent, + TemporalExtent) +from pystac.asset import Asset +from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.extensions.projection import (ProjectionExtension, + SummariesProjectionExtension) +from pystac.extensions.raster import RasterBand, RasterExtension +from pystac.extensions.scientific import ScientificExtension +from pystac.item import Item +from pystac.link import Link +from pystac.rel_type import RelType +from pystac.utils import str_to_datetime +from shapely.geometry.geo import box + +from stactools.hwsd.constants import (ASSETS_METADATA, CITATION, DESCRIPTION, + DOCUMENTATION, DOI, EPSG, HOMEPAGE_1, + HOMEPAGE_2, HOMEPAGE_REGRIDDED, ID, + KEYWORDS, LICENSE, LICENSE_LINK, NODATA, + PROVIDERS, SPATIAL_EXTENT, + TEMPORAL_EXTENT, THUMBNAIL, TITLE) + +logger = logging.getLogger(__name__) + + +def create_collection() -> Collection: + """Create a STAC Collection + Create a STAC Collection for the HWSD. + + Returns: + Collection: STAC Collection object + """ + + temporal_extent: List[Any] = [ + str_to_datetime(dt) if dt is not None else None + for dt in TEMPORAL_EXTENT + ] + extent = Extent( + SpatialExtent([SPATIAL_EXTENT]), + TemporalExtent(temporal_extent), + ) + + collection = Collection( + id=ID, + title=TITLE, + description=DESCRIPTION, + keywords=KEYWORDS, + license=LICENSE, + providers=PROVIDERS, + extent=extent, + catalog_type=CatalogType.RELATIVE_PUBLISHED, + ) + + collection.add_link(LICENSE_LINK) + collection.add_link(Link(RelType.VIA, target=HOMEPAGE_1, title="Homepage")) + collection.add_link( + Link(RelType.VIA, target=HOMEPAGE_2, title="Homepage, Alternate")) + collection.add_link( + Link(RelType.VIA, + target=HOMEPAGE_REGRIDDED, + title="Homepage, Regridded")) + + proj_ext = SummariesProjectionExtension(collection) + proj_ext.epsg = [EPSG] + + sci_ext = ScientificExtension.ext(collection, add_if_missing=True) + sci_ext.doi = DOI + sci_ext.citation = CITATION + + collection.add_asset( + "documentation", + Asset(media_type="application/pdf", + roles=["metadata"], + title="Documentation", + href=DOCUMENTATION)) + + collection.add_asset( + "thumbnail", + Asset(media_type=MediaType.PNG, + roles=["thumbnail"], + title="Thumbnail", + href=THUMBNAIL)) + + item_asset_ext = ItemAssetsExtension.ext(collection, add_if_missing=True) + asset_names = list(ASSETS_METADATA["Description"].keys()) + item_assets = { + a: AssetDefinition({ + "types": [MediaType.COG], + "roles": ["data"], + "title": a, + "proj:epsg": EPSG + }) + for a in asset_names + } + item_assets["Documentation"] = AssetDefinition({ + "types": ["application/pdf"], + "roles": ["metadata"], + "title": "Documentation", + }) + item_asset_ext.item_assets = item_assets + + return collection + + +def create_item(assets_location: str) -> Item: + """Create a STAC Item + Create a STAC Item for one year of the HWSD. The asset_href should include + the observation year as the first part of the filename. + + Args: + assets_location (str): The HREF pointing to the location containing all item data assets + + Returns: + Item: STAC Item object + """ + + polygon = box(*SPATIAL_EXTENT, ccw=True) + coordinates = [list(i) for i in list(polygon.exterior.coords)] + geometry = {"type": "Polygon", "coordinates": [coordinates]} + + properties = { + "title": TITLE, + "description": DESCRIPTION, + "start_datetime": TEMPORAL_EXTENT[0], + "end_datetime": TEMPORAL_EXTENT[1], + } + + item = Item( + id=ID, + geometry=geometry, + bbox=SPATIAL_EXTENT, + datetime=str_to_datetime(TEMPORAL_EXTENT[0]), + properties=properties, + ) + + item.add_link(Link(RelType.VIA, target=HOMEPAGE_1, title="Homepage")) + item.add_link( + Link(RelType.VIA, target=HOMEPAGE_2, title="Homepage, Alternate")) + item.add_link( + Link(RelType.VIA, + target=HOMEPAGE_REGRIDDED, + title="Homepage, Regridded")) + + sci_ext = ScientificExtension.ext(item, add_if_missing=True) + sci_ext.citation = CITATION + sci_ext.doi = DOI + + proj_attrs = ProjectionExtension.ext(item, add_if_missing=True) + proj_attrs.epsg = EPSG + + item.add_asset( + "documentation", + Asset(media_type="application/pdf", + roles=["metadata"], + title="HWSD Documentation", + href=DOCUMENTATION)) + + asset_names = list(ASSETS_METADATA["Description"].keys()) + for asset_name in asset_names: + data_asset = Asset( + href=os.path.join(assets_location, f"{asset_name}.nc4"), + media_type=MediaType.COG, + roles=["data"], + title=asset_name, + description=ASSETS_METADATA["Description"][asset_name], + extra_fields={ + "units": ASSETS_METADATA["Units"][asset_name], + "notes": ASSETS_METADATA["Notes"][asset_name], + }) + item.add_asset(asset_name, data_asset) + + # Include raster information + sampling: Any = "area" + rast_band = RasterBand.create(nodata=NODATA, sampling=sampling) + rast_ext = RasterExtension.ext(data_asset, add_if_missing=True) + rast_ext.bands = [rast_band] + + return item diff --git a/tests/test_commands.py b/tests/test_commands.py index 9a7bfb2..c01185d 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,25 +1,23 @@ import os.path +from pathlib import Path from tempfile import TemporaryDirectory import pystac from stactools.testing import CliTestCase -from stactools.ephemeral.commands import create_ephemeralcmd_command +from stactools.hwsd.commands import create_hwsd_command +from stactools.hwsd.constants import DOI, EPSG, ID, LICENSE class CommandsTest(CliTestCase): def create_subcommand_functions(self): - return [create_ephemeralcmd_command] + return [create_hwsd_command] def test_create_collection(self): with TemporaryDirectory() as tmp_dir: - # Run your custom create-collection command and validate - - # Example: - destination = os.path.join(tmp_dir, "collection.json") result = self.run_command( - ["ephemeralcmd", "create-collection", destination]) + ["hwsd", "create-collection", "-d", tmp_dir]) self.assertEqual(result.exit_code, 0, @@ -28,22 +26,23 @@ def test_create_collection(self): jsons = [p for p in os.listdir(tmp_dir) if p.endswith(".json")] self.assertEqual(len(jsons), 1) - collection = pystac.read_file(destination) - self.assertEqual(collection.id, "my-collection-id") - # self.assertEqual(item.other_attr... + collection = pystac.read_file(os.path.join(tmp_dir, jsons[0])) + self.assertEqual(collection.id, ID) + self.assertEqual(collection.license, LICENSE) + self.assertEqual(collection.extra_fields["sci:doi"], DOI) + self.assertEqual(len(collection.extra_fields["item_assets"]), 28) collection.validate() def test_create_item(self): with TemporaryDirectory() as tmp_dir: - # Run your custom create-item command and validate - - # Example: destination = os.path.join(tmp_dir, "item.json") result = self.run_command([ - "ephemeralcmd", + "hwsd", "create-item", - "/path/to/asset.tif", + "-s", + "path/to/assets/", + "-d", destination, ]) self.assertEqual(result.exit_code, @@ -54,7 +53,26 @@ def test_create_item(self): self.assertEqual(len(jsons), 1) item = pystac.read_file(destination) - self.assertEqual(item.id, "my-item-id") - # self.assertEqual(item.other_attr... + self.assertEqual(item.id, ID) + self.assertEqual(item.properties["sci:doi"], DOI) + self.assertEqual(item.properties["proj:epsg"], EPSG) + self.assertEqual(len(item.assets), 28) item.validate() + + def test_populate_collection(self): + with TemporaryDirectory() as tmp_dir: + result = self.run_command([ + "hwsd", + "populate-collection", + "-s", + "path/to/assets/", + "-d", + tmp_dir, + ]) + self.assertEqual(result.exit_code, + 0, + msg="\n{}".format(result.output)) + + jsons = [p for p in Path(tmp_dir).rglob('*.json')] + self.assertEqual(len(jsons), 2) diff --git a/tests/test_module.py b/tests/test_module.py index 5824ede..c6be462 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -1,8 +1,8 @@ import unittest -import stactools.ephemeral +import stactools.hwsd class TestModule(unittest.TestCase): def test_version(self): - self.assertIsNotNone(stactools.ephemeral.__version__) + self.assertIsNotNone(stactools.hwsd.__version__) diff --git a/tests/test_stac.py b/tests/test_stac.py index 39cec1e..56fe116 100644 --- a/tests/test_stac.py +++ b/tests/test_stac.py @@ -1,30 +1,28 @@ import unittest -from stactools.ephemeral import stac +from stactools.hwsd import stac +from stactools.hwsd.constants import DOI, EPSG, ID, LICENSE class StacTest(unittest.TestCase): def test_create_collection(self): - # Write tests for each for the creation of a STAC Collection - # Create the STAC Collection... collection = stac.create_collection() collection.set_self_href("") - # Check that it has some required attributes - self.assertEqual(collection.id, "my-collection-id") - # self.assertEqual(collection.other_attr... + self.assertEqual(collection.id, ID) + self.assertEqual(collection.license, LICENSE) + self.assertEqual(collection.extra_fields["sci:doi"], DOI) + self.assertEqual(len(collection.extra_fields["item_assets"]), 28) - # Validate collection.validate() def test_create_item(self): - # Write tests for each for the creation of STAC Items - # Create the STAC Item... - item = stac.create_item("/path/to/asset.tif") + item = stac.create_item("path/to/files/1900.csv.gz") - # Check that it has some required attributes - self.assertEqual(item.id, "my-item-id") - # self.assertEqual(item.other_attr... + self.assertEqual(item.id, ID) + self.assertEqual(item.properties["sci:doi"], DOI) + self.assertEqual(item.properties["proj:epsg"], EPSG) + self.assertEqual(len(item.assets), 28) # Validate item.validate() From 9df5bc9c40d0b37e3568b5c71b546e626ae123de Mon Sep 17 00:00:00 2001 From: jamesvrt Date: Sat, 16 Oct 2021 13:03:58 -0700 Subject: [PATCH 02/19] Fixed providers --- src/stactools/hwsd/constants.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 806a174..981b88f 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -64,9 +64,6 @@ Provider(name="NCAR", roles=[ProviderRole.PRODUCER, ProviderRole.PROCESSOR], url="https://ncar.ucar.edu/"), - Provider(name="Microsoft", - roles=[ProviderRole.HOST, ProviderRole.PROCESSOR], - url="https://planetarycomputer.microsoft.com"), ] KEYWORDS = [ From ea98b9d1004d462994e92b4327b0484ccf58c976 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 04:33:59 -0700 Subject: [PATCH 03/19] Lint/format config --- .isort.cfg | 3 +++ scripts/lint | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .isort.cfg diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..14d7c2d --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,3 @@ +[isort] +multi_line_output = 3 +include_trailing_comma = true diff --git a/scripts/lint b/scripts/lint index 78fc643..afccb8e 100755 --- a/scripts/lint +++ b/scripts/lint @@ -13,7 +13,7 @@ Execute project linters. " } -EC_EXCLUDE="(__pycache__|.git|.coverage|coverage.xml|.*\.egg-info|.mypy_cache|.tif|.tiff|.npy|.ipynb|examples/)" +EC_EXCLUDE="(__pycache__|.git|.coverage|coverage.xml|.*\.egg-info|.mypy_cache|.tif|.tiff|.npy|.ipynb|examples/|.nc4|.json)" DIRS_TO_CHECK=("src" "tests" "scripts") From c7bd8a93297e3aa3e622e277ab92eb46c9bb9071 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 04:34:47 -0700 Subject: [PATCH 04/19] Water is not twenty hydrogen atoms --- src/stactools/hwsd/constants.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 981b88f..781ff99 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -98,8 +98,8 @@ "S_SAND": "Subsoil sand fraction ", "T_SILT": "Topsoil silt fraction ", "S_SILT": "Subsoil silt fraction ", - "T_PH_H20": "Topsoil pH (in H2O) ", - "S_PH_H20": "Subsoil pH (in water) ", + "T_PH_H2O": "Topsoil pH (in H2O) ", + "S_PH_H2O": "Subsoil pH (in water) ", "T_C": "Topsoil carbon content ", "S_C": "Dominant soil type subsoil carbon content ", "T_OC": "Topsoil organic carbon ", @@ -127,8 +127,8 @@ "S_SAND": "% weight ", "T_SILT": "% weight ", "S_SILT": "% weight ", - "T_PH_H20": "-log(H+) ", - "S_PH_H20": "-log(H+) ", + "T_PH_H2O": "-log(H+) ", + "S_PH_H2O": "-log(H+) ", "T_C": "kg C m-2 ", "S_C": "kg C m-2 ", "T_OC": "% weight ", @@ -162,8 +162,8 @@ "S_SAND": "", "T_SILT": "", "S_SILT": "", - "T_PH_H20": "", - "S_PH_H20": "", + "T_PH_H2O": "", + "S_PH_H2O": "", "T_C": "Topsoil and subsoil carbon content (T_C and S_C) are based on the carbon content of the dominant soil type in each regridded cell rather than a weighted average.", "S_C": "", From 2229b94593eff1e17046b90b85cfb9f623b23595 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 04:35:55 -0700 Subject: [PATCH 05/19] One Item per Asset. Convert assets to COGs. --- src/stactools/hwsd/cog.py | 102 +++++++++++++++++++++++++++++++ src/stactools/hwsd/commands.py | 38 ++++++------ src/stactools/hwsd/constants.py | 33 +++++++++- src/stactools/hwsd/stac.py | 103 +++++++++++++++++++++----------- tests/test_commands.py | 40 +++++++------ tests/test_stac.py | 4 +- 6 files changed, 246 insertions(+), 74 deletions(-) create mode 100644 src/stactools/hwsd/cog.py diff --git a/src/stactools/hwsd/cog.py b/src/stactools/hwsd/cog.py new file mode 100644 index 0000000..98cbc62 --- /dev/null +++ b/src/stactools/hwsd/cog.py @@ -0,0 +1,102 @@ +import logging +import os +from glob import glob +from subprocess import CalledProcessError, check_output + +# import rasterio +from stactools.hwsd.constants import DATA_TYPES, NO_DATA +from stactools.hwsd.stac import asset_name_from_href + +logger = logging.getLogger(__name__) + + +def create_cogs( + input_directory: str, + output_directory: str, +) -> None: + """Create COG from a NetCDF file + + Args: + input_directory (str): The directory containing NetCDF files. + output_directory (str): The directory to which the COGs will be written. + + Returns: + None + """ + + for in_file in glob(f"{input_directory}/*.nc4"): + if os.path.basename(in_file) != "HWSD_SOIL_CLM_RES.nc4": + out_file = os.path.join(output_directory, + f"{asset_name_from_href(in_file)}.tif") + create_cog(in_file, out_file) + + +def create_cog( + input_path: str, + output_path: str, +) -> None: + """Create COG from a NetCDF file + + Args: + input_path (str): Path to a NetCDF file. + output_path (str): The path to which the COG will be written. + + Returns: + None + """ + + output = None + try: + logger.info("Converting NetCDF to COG") + logger.debug(f"input_path: {input_path}") + logger.debug(f"output_path: {output_path}") + cmd = [ + "gdal_translate", + "-ot", + DATA_TYPES[asset_name_from_href(output_path)].value, + # "-strict", + # "-unscale", + # "-scale", + # "-1", "7", "-1", "7", + "-of", + "COG", + "-co", + "NUM_THREADS=ALL_CPUS", + "-co", + "BLOCKSIZE=512", + "-co", + "COMPRESS=DEFLATE", + "-co", + "LEVEL=9", + "-co", + "PREDICTOR=YES", + "-co", + "OVERVIEWS=IGNORE_EXISTING", + "-a_nodata", + str(NO_DATA), + input_path, + output_path, + ] + + try: + output = check_output(cmd) + except CalledProcessError as e: + output = e.output + raise + finally: + logger.info(f"output: {str(output)}") + # with rasterio.open(output_path, "r+") as dataset: + # # dataset.write_colormap(1, COLOUR_MAP) + + # data = dataset.read(1) + + # dt = rasterio.dtypes.get_minimum_dtype(data) + # print(dt) + # print(rasterio.dtypes.can_cast_dtype(data, "float32")) + # print(rasterio.dtypes.can_cast_dtype(data, "int16")) + # print(rasterio.dtypes.can_cast_dtype(data, "byte")) + # print(data) + + except Exception: + logger.error("Failed to process {}".format(output_path)) + raise diff --git a/src/stactools/hwsd/commands.py b/src/stactools/hwsd/commands.py index 4e71707..3e298fe 100644 --- a/src/stactools/hwsd/commands.py +++ b/src/stactools/hwsd/commands.py @@ -1,10 +1,10 @@ import logging import os +from glob import glob import click -from stactools.core.utils.convert import cogify -from stactools.hwsd import stac +from stactools.hwsd import cog, stac logger = logging.getLogger(__name__) @@ -89,9 +89,16 @@ def populate_collection_command(source: str, destination: str): """ collection = stac.create_collection() + collection.normalize_hrefs(destination) + collection.save(dest_href=destination) - item = stac.create_item(source) - collection.add_item(item) + cog.create_cogs(source, destination) + for cog_file in glob(f"{destination}/*.tif"): + item = stac.create_item(cog_file) + collection.add_item(item) + item.set_self_href(cog_file.replace(".tif", ".json")) + item.make_asset_hrefs_relative() + item.save_object() collection.normalize_hrefs(destination) collection.save(dest_href=destination) @@ -101,32 +108,27 @@ def populate_collection_command(source: str, destination: str): @hwsd.command( "create-cog", - short_help="Transform Geotiff to Cloud-Optimized Geotiff.", + short_help="Transform NetCDF to Cloud-Optimized Geotiff.", ) @click.option("-d", "--destination", required=True, - help="The output directory for the COG") - @click.option("-s", - "--source", - required=True, - help="Path to an input GeoTiff") + help="The output directory for the COGs") + @click.option("-s", "--source", required=True, help="The input NetCDF fle") def create_cog_command(destination: str, source: str) -> None: - """Generate a COG from a GeoTiff. The COG will be saved in the desination - with `_cog.tif` appended to the name. + """Generate a COG from a NetCDF. Args: destination (str): Local directory to save output COGs - source (str): A GeoTIFF + source (str): The input JNetCDF file """ if not os.path.isdir(destination): raise IOError(f'Destination folder "{destination}" not found') - output_path = os.path.join(destination, - os.path.basename(source)[:-4] + "_cog.tif") - - args = ["-co", "OVERVIEWS=IGNORE_EXISTING"] + output_path = os.path.join( + destination, + os.path.basename(source).replace(".nc4", "") + ".tif") - cogify(source, output_path, args) + cog.create_cog(source, output_path) return hwsd diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 781ff99..f7ccd45 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -4,6 +4,7 @@ from pyproj import CRS from pystac import Link, Provider, ProviderRole +from pystac.extensions.raster import DataType ID = "hwsd" EPSG = 4326 @@ -176,4 +177,34 @@ } } -NODATA = -1 +NO_DATA = -1 + +DATA_TYPES = { + "AWC_CLASS": DataType.INT16, + "ISSOIL": DataType.INT16, + "MU_GLOBAL": DataType.INT32, + "REF_DEPTH": DataType.INT16, + "ROOTS": DataType.INT16, + "T_BULK_DEN": DataType.FLOAT64, + "S_BULK_DEN": DataType.FLOAT64, + "T_REF_BULK": DataType.FLOAT64, + "S_REF_BULK": DataType.FLOAT64, + "T_CEC_CLAY": DataType.FLOAT64, + "S_CEC_CLAY": DataType.FLOAT64, + "T_CLAY": DataType.FLOAT64, + "S_CLAY": DataType.FLOAT64, + "T_GRAVEL": DataType.FLOAT64, + "S_GRAVEL": DataType.FLOAT64, + "T_SAND": DataType.FLOAT64, + "S_SAND": DataType.FLOAT64, + "T_SILT": DataType.FLOAT64, + "S_SILT": DataType.FLOAT64, + "T_PH_H2O": DataType.FLOAT64, + "S_PH_H2O": DataType.FLOAT64, + "T_C": DataType.FLOAT64, + "S_C": DataType.FLOAT64, + "T_OC": DataType.FLOAT64, + "S_OC": DataType.FLOAT64, + "AWT_S_SOC": DataType.FLOAT64, + "AWT_T_SOC": DataType.FLOAT64, +} diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 250bcde..69514c9 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -1,31 +1,60 @@ import logging import os -from typing import Any, List - -from pystac import (CatalogType, Collection, Extent, MediaType, SpatialExtent, - TemporalExtent) +from typing import Any, List, Optional + +from pystac import ( + CatalogType, + Collection, + Extent, + MediaType, + SpatialExtent, + TemporalExtent, +) from pystac.asset import Asset from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension -from pystac.extensions.projection import (ProjectionExtension, - SummariesProjectionExtension) -from pystac.extensions.raster import RasterBand, RasterExtension +from pystac.extensions.projection import ( + ProjectionExtension, + SummariesProjectionExtension, +) +from pystac.extensions.raster import RasterBand, RasterExtension, Sampling from pystac.extensions.scientific import ScientificExtension from pystac.item import Item from pystac.link import Link from pystac.rel_type import RelType from pystac.utils import str_to_datetime from shapely.geometry.geo import box - -from stactools.hwsd.constants import (ASSETS_METADATA, CITATION, DESCRIPTION, - DOCUMENTATION, DOI, EPSG, HOMEPAGE_1, - HOMEPAGE_2, HOMEPAGE_REGRIDDED, ID, - KEYWORDS, LICENSE, LICENSE_LINK, NODATA, - PROVIDERS, SPATIAL_EXTENT, - TEMPORAL_EXTENT, THUMBNAIL, TITLE) +from stactools.core.io import ReadHrefModifier + +from stactools.hwsd.constants import ( + ASSETS_METADATA, + CITATION, + DATA_TYPES, + DESCRIPTION, + DOCUMENTATION, + DOI, + EPSG, + HOMEPAGE_1, + HOMEPAGE_2, + HOMEPAGE_REGRIDDED, + ID, + KEYWORDS, + LICENSE, + LICENSE_LINK, + NO_DATA, + PROVIDERS, + SPATIAL_EXTENT, + TEMPORAL_EXTENT, + THUMBNAIL, + TITLE, +) logger = logging.getLogger(__name__) +def asset_name_from_href(href): + return os.path.basename(href).replace(".nc4", "").replace(".tif", "") + + def create_collection() -> Collection: """Create a STAC Collection Create a STAC Collection for the HWSD. @@ -105,7 +134,10 @@ def create_collection() -> Collection: return collection -def create_item(assets_location: str) -> Item: +def create_item( + cog_href: str, + cog_href_modifier: Optional[ReadHrefModifier] = None, +) -> Item: """Create a STAC Item Create a STAC Item for one year of the HWSD. The asset_href should include the observation year as the first part of the filename. @@ -158,24 +190,27 @@ def create_item(assets_location: str) -> Item: title="HWSD Documentation", href=DOCUMENTATION)) - asset_names = list(ASSETS_METADATA["Description"].keys()) - for asset_name in asset_names: - data_asset = Asset( - href=os.path.join(assets_location, f"{asset_name}.nc4"), - media_type=MediaType.COG, - roles=["data"], - title=asset_name, - description=ASSETS_METADATA["Description"][asset_name], - extra_fields={ - "units": ASSETS_METADATA["Units"][asset_name], - "notes": ASSETS_METADATA["Notes"][asset_name], - }) - item.add_asset(asset_name, data_asset) - - # Include raster information - sampling: Any = "area" - rast_band = RasterBand.create(nodata=NODATA, sampling=sampling) - rast_ext = RasterExtension.ext(data_asset, add_if_missing=True) - rast_ext.bands = [rast_band] + asset_name = asset_name_from_href(cog_href) + data_asset = Asset(href=cog_href, + media_type=MediaType.COG, + roles=["data"], + title=asset_name, + description=ASSETS_METADATA["Description"][asset_name], + extra_fields={ + "units": ASSETS_METADATA["Units"][asset_name], + "notes": ASSETS_METADATA["Notes"][asset_name], + }) + item.add_asset("data", data_asset) + + # Include raster information + rast_ext = RasterExtension.ext(data_asset, add_if_missing=True) + rast_ext.bands = [ + RasterBand.create( + nodata=NO_DATA, + sampling=Sampling.AREA, + data_type=DATA_TYPES[asset_name], + # spatial_resolution=30, + ) + ] return item diff --git a/tests/test_commands.py b/tests/test_commands.py index c01185d..32b12ed 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,5 +1,5 @@ import os.path -from pathlib import Path +# from pathlib import Path from tempfile import TemporaryDirectory import pystac @@ -41,7 +41,7 @@ def test_create_item(self): "hwsd", "create-item", "-s", - "path/to/assets/", + "AWC_CLASS.tif", "-d", destination, ]) @@ -56,23 +56,25 @@ def test_create_item(self): self.assertEqual(item.id, ID) self.assertEqual(item.properties["sci:doi"], DOI) self.assertEqual(item.properties["proj:epsg"], EPSG) - self.assertEqual(len(item.assets), 28) + self.assertEqual(len(item.assets), 2) item.validate() - def test_populate_collection(self): - with TemporaryDirectory() as tmp_dir: - result = self.run_command([ - "hwsd", - "populate-collection", - "-s", - "path/to/assets/", - "-d", - tmp_dir, - ]) - self.assertEqual(result.exit_code, - 0, - msg="\n{}".format(result.output)) - - jsons = [p for p in Path(tmp_dir).rglob('*.json')] - self.assertEqual(len(jsons), 2) + # TODO: Fix tests + + # def test_populate_collection(self): + # with TemporaryDirectory() as tmp_dir: + # result = self.run_command([ + # "hwsd", + # "populate-collection", + # "-s", + # "path/to/assets/", + # "-d", + # tmp_dir, + # ]) + # self.assertEqual(result.exit_code, + # 0, + # msg="\n{}".format(result.output)) + + # jsons = [p for p in Path(tmp_dir).rglob('*.json')] + # self.assertEqual(len(jsons), 2) diff --git a/tests/test_stac.py b/tests/test_stac.py index 56fe116..f92d5ae 100644 --- a/tests/test_stac.py +++ b/tests/test_stac.py @@ -17,12 +17,12 @@ def test_create_collection(self): collection.validate() def test_create_item(self): - item = stac.create_item("path/to/files/1900.csv.gz") + item = stac.create_item("path/to/files/AWC_CLASS.nc4") self.assertEqual(item.id, ID) self.assertEqual(item.properties["sci:doi"], DOI) self.assertEqual(item.properties["proj:epsg"], EPSG) - self.assertEqual(len(item.assets), 28) + self.assertEqual(len(item.assets), 2) # Validate item.validate() From 51f237dd57fe845e5c891940b9598be538341998 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 16:30:57 -0700 Subject: [PATCH 06/19] Restructure metadata --- src/stactools/hwsd/cog.py | 4 +- src/stactools/hwsd/constants.py | 190 +++++++++++++++----------------- src/stactools/hwsd/stac.py | 34 +++--- 3 files changed, 110 insertions(+), 118 deletions(-) diff --git a/src/stactools/hwsd/cog.py b/src/stactools/hwsd/cog.py index 98cbc62..ee65ae3 100644 --- a/src/stactools/hwsd/cog.py +++ b/src/stactools/hwsd/cog.py @@ -4,7 +4,7 @@ from subprocess import CalledProcessError, check_output # import rasterio -from stactools.hwsd.constants import DATA_TYPES, NO_DATA +from stactools.hwsd.constants import ASSET_DATA_TYPES, NO_DATA from stactools.hwsd.stac import asset_name_from_href logger = logging.getLogger(__name__) @@ -53,7 +53,7 @@ def create_cog( cmd = [ "gdal_translate", "-ot", - DATA_TYPES[asset_name_from_href(output_path)].value, + ASSET_DATA_TYPES[asset_name_from_href(output_path)].value, # "-strict", # "-unscale", # "-scale", diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index f7ccd45..6707e6e 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -68,7 +68,11 @@ ] KEYWORDS = [ - "HWSD", "Soil", "Soils", "Harmonized World Soil Database", "regridded" + "HWSD", + "Soil", + "Soils", + "Harmonized World Soil Database", + "regridded", ] CITATION = "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 ." @@ -76,110 +80,9 @@ THUMBNAIL = "https://daac.ornl.gov/SOILS/guides/HWSD_Fig1.png" -ASSETS_METADATA: Dict[str, Any] = { - "Description": { - "AWC_CLASS": "Available water storage capacity ", - "ISSOIL": "Soil or non-soil units ", - "MU_GLOBAL": "HWSD global mapping unit identifier ", - "REF_DEPTH": "Reference soil depth ", - "ROOTS": "Depth of obstacles to roots ", - "T_BULK_DEN": "Topsoil bulk density ", - "S_BULK_DEN": "Subsoil bulk density ", - "T_REF_BULK": "topsoil bulk density ", - "S_REF_BULK": "Subsoil reference bulk density ", - "T_CEC_CLAY": - "Cation exchange capacity of the clay fraction in the topsoil ", - "S_CEC_CLAY": - "Cation exchange capacity of the clay fraction in the subsoil ", - "T_CLAY": "Topsoil clay fraction ", - "S_CLAY": "Subsoil clay fraction ", - "T_GRAVEL": "Topsoil gravel content ", - "S_GRAVEL": "Subsoil gravel content ", - "T_SAND": "Topsoil sand fraction ", - "S_SAND": "Subsoil sand fraction ", - "T_SILT": "Topsoil silt fraction ", - "S_SILT": "Subsoil silt fraction ", - "T_PH_H2O": "Topsoil pH (in H2O) ", - "S_PH_H2O": "Subsoil pH (in water) ", - "T_C": "Topsoil carbon content ", - "S_C": "Dominant soil type subsoil carbon content ", - "T_OC": "Topsoil organic carbon ", - "S_OC": "Subsoil organic carbon ", - "AWT_S_SOC": "Area weighted subsoil carbon content ", - "AWT_T_SOC": "Area weighted topsoil carbon content " - }, - "Units": { - "AWC_CLASS": "Coded values 1 through 7 ", - "ISSOIL": "0 or 1 ", - "MU_GLOBAL": "numerical ID ", - "REF_DEPTH": "cm ", - "ROOTS": "Coded values 0 through 6 ", - "T_BULK_DEN": "kg dm-3 ", - "S_BULK_DEN": "kg dm-3 ", - "T_REF_BULK": "kg dm-3 ", - "S_REF_BULK": "kg dm-3", - "T_CEC_CLAY": "cmol per kg ", - "S_CEC_CLAY": "cmol per kg ", - "T_CLAY": "% weight ", - "S_CLAY": "% weight ", - "T_GRAVEL": "% volume ", - "S_GRAVEL": "% volume ", - "T_SAND": "% weight ", - "S_SAND": "% weight ", - "T_SILT": "% weight ", - "S_SILT": "% weight ", - "T_PH_H2O": "-log(H+) ", - "S_PH_H2O": "-log(H+) ", - "T_C": "kg C m-2 ", - "S_C": "kg C m-2 ", - "T_OC": "% weight ", - "S_OC": "% weight ", - "AWT_S_SOC": "kg C m-2 ", - "AWT_T_SOC": "kg C m-2 " - }, - "Notes": { - "AWC_CLASS": - "1 = 150 mm water per m of the soil unit, 2 = 125 mm, 3 = 100 mm, 4 = 75 mm, 5 = 50 mm, 6 = 15 mm, 7 = 0 mm.", - "ISSOIL": - "ISSOIL indicates whether the soil mapping unit is a soil (1) or non-soil (0)", - "MU_GLOBAL": - "MU_GLOBAL provides a link from the grid cell to the other attributes.The HWSD v1.2 attribute lookup table is available from the HWSD project (FAO 2012)", - "REF_DEPTH": - "Reference soil depth of all soil units are set at 100 cm, except for Rendzinas and Rankers of FAO-74 and Leptosols of FAO-90, where the reference soil depth is set at 30 cm, and for Lithosols of FAO-74 and Lithic Leptosols of FAO-90, where it is set at 10 cm.", - "ROOTS": - "0 = no information, 1 = no obstacles to roots between 0 and 80 cm depth, 2 = obstacles to roots between 60 and 80 cm depth, 3 = obstacles between 40 and 60 cm, 4 = 20 and 40 cm, 5 = 0 and 80 cm, 6 = 0 and 20 cm.", - "T_BULK_DEN": "", - "S_BULK_DEN": "", - "T_REF_BULK": - "Reference bulk density values are calculated from equations developed by Saxton et al. (1986) that relate to the texture of the soil only. These estimates, although generally reliable, overestimate the bulk density in soils that have a high porosity (Andosols) or that are high in organic matter content (Histosols). The calculation procedures for reference bulk density can be found in Saxton et al (1986)", - "S_REF_BULK": None, - "T_CEC_CLAY": "", - "S_CEC_CLAY": "", - "T_CLAY": "", - "S_CLAY": "", - "T_GRAVEL": "", - "S_GRAVEL": "", - "T_SAND": "", - "S_SAND": "", - "T_SILT": "", - "S_SILT": "", - "T_PH_H2O": "", - "S_PH_H2O": "", - "T_C": - "Topsoil and subsoil carbon content (T_C and S_C) are based on the carbon content of the dominant soil type in each regridded cell rather than a weighted average.", - "S_C": "", - "T_OC": "", - "S_OC": "", - "AWT_S_SOC": - "AWT_S_SOC = (sum(SEQ(SHARE * S_OC)) * 7 * S_BULK_DENSITY)", - "AWT_T_SOC": - "AWT_T_SOC = (sum(SEQ(SHARE * T_OC)) * 3 * T_BULK_DENSITY) " - } -} - NO_DATA = -1 -DATA_TYPES = { +ASSET_DATA_TYPES = { "AWC_CLASS": DataType.INT16, "ISSOIL": DataType.INT16, "MU_GLOBAL": DataType.INT32, @@ -208,3 +111,84 @@ "AWT_S_SOC": DataType.FLOAT64, "AWT_T_SOC": DataType.FLOAT64, } + +ASSET_DESCRIPTIONS = { + "AWC_CLASS": "Available water storage capacity", + "ISSOIL": "Soil or non-soil units", + "MU_GLOBAL": "HWSD global mapping unit identifier", + "REF_DEPTH": "Reference soil depth", + "ROOTS": "Depth of obstacles to roots", + "T_BULK_DEN": "Topsoil bulk density", + "S_BULK_DEN": "Subsoil bulk density", + "T_REF_BULK": "topsoil bulk density", + "S_REF_BULK": "Subsoil reference bulk density", + "T_CEC_CLAY": + "Cation exchange capacity of the clay fraction in the topsoil", + "S_CEC_CLAY": + "Cation exchange capacity of the clay fraction in the subsoil", + "T_CLAY": "Topsoil clay fraction", + "S_CLAY": "Subsoil clay fraction", + "T_GRAVEL": "Topsoil gravel content", + "S_GRAVEL": "Subsoil gravel content", + "T_SAND": "Topsoil sand fraction", + "S_SAND": "Subsoil sand fraction", + "T_SILT": "Topsoil silt fraction", + "S_SILT": "Subsoil silt fraction", + "T_PH_H2O": "Topsoil pH (in H2O)", + "S_PH_H2O": "Subsoil pH (in water)", + "T_C": "Topsoil carbon content", + "S_C": "Dominant soil type subsoil carbon content", + "T_OC": "Topsoil organic carbon", + "S_OC": "Subsoil organic carbon", + "AWT_S_SOC": "Area weighted subsoil carbon content", + "AWT_T_SOC": "Area weighted topsoil carbon content", +} + +ASSET_UNITS = { + "AWC_CLASS": "Coded values 1 through 7", + "ISSOIL": "0 or 1", + "MU_GLOBAL": "numerical ID", + "REF_DEPTH": "cm", + "ROOTS": "Coded values 0 through 6", + "T_BULK_DEN": "kg dm-3", + "S_BULK_DEN": "kg dm-3", + "T_REF_BULK": "kg dm-3", + "S_REF_BULK": "kg dm-3", + "T_CEC_CLAY": "cmol per kg", + "S_CEC_CLAY": "cmol per kg", + "T_CLAY": "% weight", + "S_CLAY": "% weight", + "T_GRAVEL": "% volume", + "S_GRAVEL": "% volume", + "T_SAND": "% weight", + "S_SAND": "% weight", + "T_SILT": "% weight", + "S_SILT": "% weight", + "T_PH_H2O": "-log(H+)", + "S_PH_H2O": "-log(H+)", + "T_C": "kg C m-2", + "S_C": "kg C m-2", + "T_OC": "% weight", + "S_OC": "% weight", + "AWT_S_SOC": "kg C m-2", + "AWT_T_SOC": "kg C m-2", +} + +ASSET_NOTES = { + "AWC_CLASS": + "1 = 150 mm water per m of the soil unit, 2 = 125 mm, 3 = 100 mm, 4 = 75 mm, 5 = 50 mm, 6 = 15 mm, 7 = 0 mm.", + "ISSOIL": + "ISSOIL indicates whether the soil mapping unit is a soil (1) or non-soil (0)", + "MU_GLOBAL": + "MU_GLOBAL provides a link from the grid cell to the other attributes.The HWSD v1.2 attribute lookup table is available from the HWSD project (FAO 2012)", + "REF_DEPTH": + "Reference soil depth of all soil units are set at 100 cm, except for Rendzinas and Rankers of FAO-74 and Leptosols of FAO-90, where the reference soil depth is set at 30 cm, and for Lithosols of FAO-74 and Lithic Leptosols of FAO-90, where it is set at 10 cm.", + "ROOTS": + "0 = no information, 1 = no obstacles to roots between 0 and 80 cm depth, 2 = obstacles to roots between 60 and 80 cm depth, 3 = obstacles between 40 and 60 cm, 4 = 20 and 40 cm, 5 = 0 and 80 cm, 6 = 0 and 20 cm.", + "T_REF_BULK": + "Reference bulk density values are calculated from equations developed by Saxton et al. (1986) that relate to the texture of the soil only. These estimates, although generally reliable, overestimate the bulk density in soils that have a high porosity (Andosols) or that are high in organic matter content (Histosols). The calculation procedures for reference bulk density can be found in Saxton et al (1986)", + "T_C": + "Topsoil and subsoil carbon content (T_C and S_C) are based on the carbon content of the dominant soil type in each regridded cell rather than a weighted average.", + "AWT_S_SOC": "AWT_S_SOC = (sum(SEQ(SHARE * S_OC)) * 7 * S_BULK_DENSITY)", + "AWT_T_SOC": "AWT_T_SOC = (sum(SEQ(SHARE * T_OC)) * 3 * T_BULK_DENSITY)", +} diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 69514c9..da91977 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -26,9 +26,11 @@ from stactools.core.io import ReadHrefModifier from stactools.hwsd.constants import ( - ASSETS_METADATA, + ASSET_DATA_TYPES, + ASSET_DESCRIPTIONS, + ASSET_NOTES, + ASSET_UNITS, CITATION, - DATA_TYPES, DESCRIPTION, DOCUMENTATION, DOI, @@ -114,7 +116,7 @@ def create_collection() -> Collection: href=THUMBNAIL)) item_asset_ext = ItemAssetsExtension.ext(collection, add_if_missing=True) - asset_names = list(ASSETS_METADATA["Description"].keys()) + asset_names = list(ASSET_DESCRIPTIONS.keys()) item_assets = { a: AssetDefinition({ "types": [MediaType.COG], @@ -191,15 +193,21 @@ def create_item( href=DOCUMENTATION)) asset_name = asset_name_from_href(cog_href) - data_asset = Asset(href=cog_href, - media_type=MediaType.COG, - roles=["data"], - title=asset_name, - description=ASSETS_METADATA["Description"][asset_name], - extra_fields={ - "units": ASSETS_METADATA["Units"][asset_name], - "notes": ASSETS_METADATA["Notes"][asset_name], - }) + extra_fields = { + "units": ASSET_UNITS[asset_name], + } + if asset_name in ASSET_NOTES: + extra_fields["notes"] = ASSET_NOTES[asset_name] + data_asset = Asset( + href=cog_href, + media_type=MediaType.COG, + roles=["data"], + title=asset_name, + description=ASSET_DESCRIPTIONS[asset_name], + extra_fields={ + "units": ASSET_UNITS[asset_name], + }, + ) item.add_asset("data", data_asset) # Include raster information @@ -208,7 +216,7 @@ def create_item( RasterBand.create( nodata=NO_DATA, sampling=Sampling.AREA, - data_type=DATA_TYPES[asset_name], + data_type=ASSET_DATA_TYPES[asset_name], # spatial_resolution=30, ) ] From af461405251b1ac9d4cf7279d040b4d60840146d Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 16:43:08 -0700 Subject: [PATCH 07/19] ItemAssets should now only list one data Asset --- src/stactools/hwsd/stac.py | 50 ++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index da91977..042e9a8 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -103,35 +103,37 @@ def create_collection() -> Collection: collection.add_asset( "documentation", - Asset(media_type="application/pdf", - roles=["metadata"], - title="Documentation", - href=DOCUMENTATION)) + Asset( + media_type="application/pdf", + roles=["metadata"], + title="Documentation", + href=DOCUMENTATION, + )) collection.add_asset( "thumbnail", - Asset(media_type=MediaType.PNG, - roles=["thumbnail"], - title="Thumbnail", - href=THUMBNAIL)) - - item_asset_ext = ItemAssetsExtension.ext(collection, add_if_missing=True) - asset_names = list(ASSET_DESCRIPTIONS.keys()) - item_assets = { - a: AssetDefinition({ - "types": [MediaType.COG], + Asset( + media_type=MediaType.PNG, + roles=["thumbnail"], + title="Thumbnail", + href=THUMBNAIL, + )) + + item_assets_ext = ItemAssetsExtension.ext(collection, add_if_missing=True) + item_assets_ext.item_assets = { + "data": + AssetDefinition({ + "types": MediaType.COG, "roles": ["data"], - "title": a, - "proj:epsg": EPSG - }) - for a in asset_names + "proj:epsg": EPSG, + }), + "documentation": + AssetDefinition({ + "types": "application/pdf", + "roles": ["metadata"], + "title": "Documentation", + }), } - item_assets["Documentation"] = AssetDefinition({ - "types": ["application/pdf"], - "roles": ["metadata"], - "title": "Documentation", - }) - item_asset_ext.item_assets = item_assets return collection From 827fb4aca7a3b176cdb37671a6b87ae56cba72ff Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 17:16:30 -0700 Subject: [PATCH 08/19] Add more Projection Extension fields --- src/stactools/hwsd/constants.py | 3 +++ src/stactools/hwsd/stac.py | 39 ++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 6707e6e..71d3118 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -13,6 +13,9 @@ TEMPORAL_EXTENT = ["2000-01-01T00:00:00Z", "2000-12-31T23:59:59Z"] TITLE = "Harmonized World Soil Database" DESCRIPTION = "This data set describes select global soil parameters from the Harmonized World Soil Database (HWSD) v1.2, including additional calculated parameters such as area weighted soil organic carbon (kg C per m2), as high resolution NetCDF files. These data were regridded and upscaled from the Harmonized World Soil Database v1.2." +SHAPE = (7200, 3600) +TRANSFORM = (0.049999999999999996, 0.0, -180.0, 0.0, -0.049999999999999996, + 90.0, 0.0, 0.0, 1.0) HOMEPAGE_REGRIDDED = "https://daac.ornl.gov/SOILS/guides/HWSD.html" HOMEPAGE_2 = "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11" diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 042e9a8..c68e59f 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -1,6 +1,6 @@ import logging import os -from typing import Any, List, Optional +from typing import Any, Dict, List, Optional from pystac import ( CatalogType, @@ -38,16 +38,19 @@ HOMEPAGE_1, HOMEPAGE_2, HOMEPAGE_REGRIDDED, + HWSD_CRS, ID, KEYWORDS, LICENSE, LICENSE_LINK, NO_DATA, PROVIDERS, + SHAPE, SPATIAL_EXTENT, TEMPORAL_EXTENT, THUMBNAIL, TITLE, + TRANSFORM, ) logger = logging.getLogger(__name__) @@ -57,6 +60,12 @@ def asset_name_from_href(href): return os.path.basename(href).replace(".nc4", "").replace(".tif", "") +def get_geometry() -> Dict[str, Any]: + polygon = box(*SPATIAL_EXTENT, ccw=True) + coordinates = [list(i) for i in list(polygon.exterior.coords)] + return {"type": "Polygon", "coordinates": [coordinates]} + + def create_collection() -> Collection: """Create a STAC Collection Create a STAC Collection for the HWSD. @@ -126,6 +135,11 @@ def create_collection() -> Collection: "types": MediaType.COG, "roles": ["data"], "proj:epsg": EPSG, + "proj:wkt2": HWSD_CRS.to_wkt(), + "proj:bbox": SPATIAL_EXTENT, + "proj:geometry": get_geometry(), + "proj:shape": SHAPE, + "proj:transform": TRANSFORM, }), "documentation": AssetDefinition({ @@ -153,9 +167,7 @@ def create_item( Item: STAC Item object """ - polygon = box(*SPATIAL_EXTENT, ccw=True) - coordinates = [list(i) for i in list(polygon.exterior.coords)] - geometry = {"type": "Polygon", "coordinates": [coordinates]} + geometry = get_geometry() properties = { "title": TITLE, @@ -184,8 +196,13 @@ def create_item( sci_ext.citation = CITATION sci_ext.doi = DOI - proj_attrs = ProjectionExtension.ext(item, add_if_missing=True) - proj_attrs.epsg = EPSG + proj_ext = ProjectionExtension.ext(item, add_if_missing=True) + proj_ext.epsg = EPSG + proj_ext.wkt2 = HWSD_CRS.to_wkt() + proj_ext.bbox = SPATIAL_EXTENT + proj_ext.geometry = geometry + proj_ext.shape = SHAPE + proj_ext.transform = TRANSFORM item.add_asset( "documentation", @@ -212,6 +229,16 @@ def create_item( ) item.add_asset("data", data_asset) + # Asset Projection Extension + data_asset_proj_ext = ProjectionExtension.ext(data_asset, + add_if_missing=True) + data_asset_proj_ext.epsg = proj_ext.epsg + data_asset_proj_ext.wkt2 = proj_ext.wkt2 + data_asset_proj_ext.bbox = proj_ext.bbox + data_asset_proj_ext.geometry = proj_ext.geometry + data_asset_proj_ext.shape = proj_ext.shape + data_asset_proj_ext.transform = proj_ext.transform + # Include raster information rast_ext = RasterExtension.ext(data_asset, add_if_missing=True) rast_ext.bands = [ From 538715af8c15bd9b700802712078ed23aa7d6fa9 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 19:46:59 -0700 Subject: [PATCH 09/19] Add Classification Labels. Add Label and File Extensions to Items. --- src/stactools/hwsd/constants.py | 25 +++++++++++++++++ src/stactools/hwsd/stac.py | 48 +++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 71d3118..42d0548 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -195,3 +195,28 @@ "AWT_S_SOC": "AWT_S_SOC = (sum(SEQ(SHARE * S_OC)) * 7 * S_BULK_DENSITY)", "AWT_T_SOC": "AWT_T_SOC = (sum(SEQ(SHARE * T_OC)) * 3 * T_BULK_DENSITY)", } + +ASSET_LABELS = { + "AWC_CLASS": { + 1: "150 mm per m of the soil unit", + 2: "125 mm per m", + 3: "100 mm per m", + 4: "75 mm per m", + 5: "50 mm per m", + 6: "15 mm per m", + 7: "0 mm per m", + }, + "ISSOIL": { + 0: "not soil", + 1: "soil", + }, + "ROOTS": { + 0: "no information", + 1: "no obstacles to roots between 0 and 80 cm depth", + 2: "obstacles to roots between 60 and 80 cm depth", + 3: "obstacle to roots between 40 and 60 cm depth", + 4: "obstacle to roots between 20 and 40 cm depth", + 5: "obstacle to roots between 0 and 80 cm depth", + 6: "obstacle to roots between 0 and 20 cm depth", + }, +} diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index c68e59f..66c5a2a 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -2,6 +2,7 @@ import os from typing import Any, Dict, List, Optional +import fsspec from pystac import ( CatalogType, Collection, @@ -11,7 +12,14 @@ TemporalExtent, ) from pystac.asset import Asset +from pystac.extensions.file import FileExtension from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension +from pystac.extensions.label import ( + LabelClasses, + LabelExtension, + LabelTask, + LabelType, +) from pystac.extensions.projection import ( ProjectionExtension, SummariesProjectionExtension, @@ -28,6 +36,7 @@ from stactools.hwsd.constants import ( ASSET_DATA_TYPES, ASSET_DESCRIPTIONS, + ASSET_LABELS, ASSET_NOTES, ASSET_UNITS, CITATION, @@ -167,6 +176,11 @@ def create_item( Item: STAC Item object """ + if cog_href_modifier is not None: + cog_access_href = cog_href_modifier(cog_href) + else: + cog_access_href = cog_href + geometry = get_geometry() properties = { @@ -217,10 +231,13 @@ def create_item( } if asset_name in ASSET_NOTES: extra_fields["notes"] = ASSET_NOTES[asset_name] + roles = ["data"] + if asset_name in ASSET_LABELS: + roles.extend(["labels", "labels-raster"]) data_asset = Asset( href=cog_href, media_type=MediaType.COG, - roles=["data"], + roles=roles, title=asset_name, description=ASSET_DESCRIPTIONS[asset_name], extra_fields={ @@ -239,7 +256,34 @@ def create_item( data_asset_proj_ext.shape = proj_ext.shape data_asset_proj_ext.transform = proj_ext.transform - # Include raster information + # Label Extension + item_label = LabelExtension.ext(item, add_if_missing=True) + item_label.label_type = LabelType.RASTER + item_label.label_tasks = [LabelTask.CLASSIFICATION] + item_label.label_properties = None + item_label.label_description = ASSET_DESCRIPTIONS[asset_name] + item_label.label_classes = [ + # TODO: The STAC Label extension JSON Schema is incorrect. + # https://github.com/stac-extensions/label/pull/8 + # https://github.com/stac-utils/pystac/issues/611 + # When it is fixed, this should be None, not the empty string. + LabelClasses.create(list(ASSET_LABELS[asset_name].values()), "") + ] + + # File Extension + data_asset_file_ext = FileExtension.ext(data_asset, add_if_missing=True) + if asset_name in ASSET_LABELS: + # The following odd type annotation is needed + data_asset_file_ext.values = [{ + "values": [value], + "summary": summary, + } for value, summary in ASSET_LABELS[asset_name].items()] + with fsspec.open(cog_access_href) as file: + size = file.size + if size is not None: + data_asset_file_ext.size = size + + # Raster Extension rast_ext = RasterExtension.ext(data_asset, add_if_missing=True) rast_ext.bands = [ RasterBand.create( From 2e1cee69fc4af0c1160b9b298b3f280678b080e6 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Sun, 17 Oct 2021 19:54:03 -0700 Subject: [PATCH 10/19] Fix Item extra_fields --- src/stactools/hwsd/stac.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 66c5a2a..05f6159 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -240,9 +240,7 @@ def create_item( roles=roles, title=asset_name, description=ASSET_DESCRIPTIONS[asset_name], - extra_fields={ - "units": ASSET_UNITS[asset_name], - }, + extra_fields=extra_fields, ) item.add_asset("data", data_asset) From 61e08c930e04618b6521c582dd06b88dfee76962 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:21:03 -0700 Subject: [PATCH 11/19] Make formatting more consistent --- src/stactools/hwsd/commands.py | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/stactools/hwsd/commands.py b/src/stactools/hwsd/commands.py index 3e298fe..cbd98c8 100644 --- a/src/stactools/hwsd/commands.py +++ b/src/stactools/hwsd/commands.py @@ -67,13 +67,16 @@ def create_item_command(source: str, destination: str): return None - @hwsd.command("populate-collection", - short_help="Populate the HWSD STAC Collection with all items" - ) - @click.option("-s", - "--source", - required=True, - help="The source directory for the Item data assets.") + @hwsd.command( + "populate-collection", + short_help="Populate the HWSD STAC Collection with all items", + ) + @click.option( + "-s", + "--source", + required=True, + help="The source directory for the Item data assets.", + ) @click.option( "-d", "--destination", @@ -110,17 +113,24 @@ def populate_collection_command(source: str, destination: str): "create-cog", short_help="Transform NetCDF to Cloud-Optimized Geotiff.", ) - @click.option("-d", - "--destination", - required=True, - help="The output directory for the COGs") - @click.option("-s", "--source", required=True, help="The input NetCDF fle") + @click.option( + "-d", + "--destination", + required=True, + help="The output directory for the COGs", + ) + @click.option( + "-s", + "--source", + required=True, + help="The input NetCDF fle", + ) def create_cog_command(destination: str, source: str) -> None: """Generate a COG from a NetCDF. Args: destination (str): Local directory to save output COGs - source (str): The input JNetCDF file + source (str): The input NetCDF file """ if not os.path.isdir(destination): raise IOError(f'Destination folder "{destination}" not found') From 5cda2c42a0eab6af64690c35545f6860e6534469 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:21:25 -0700 Subject: [PATCH 12/19] Fix saving --- src/stactools/hwsd/commands.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/stactools/hwsd/commands.py b/src/stactools/hwsd/commands.py index cbd98c8..7e4d45e 100644 --- a/src/stactools/hwsd/commands.py +++ b/src/stactools/hwsd/commands.py @@ -101,9 +101,7 @@ def populate_collection_command(source: str, destination: str): collection.add_item(item) item.set_self_href(cog_file.replace(".tif", ".json")) item.make_asset_hrefs_relative() - item.save_object() - collection.normalize_hrefs(destination) collection.save(dest_href=destination) collection.validate() From b30b536b5c448812252a6fd2e31e840ece7ba18c Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:23:30 -0700 Subject: [PATCH 13/19] Cleanup constant names --- src/stactools/hwsd/constants.py | 10 +++++----- src/stactools/hwsd/stac.py | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 42d0548..bb19779 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -17,10 +17,10 @@ TRANSFORM = (0.049999999999999996, 0.0, -180.0, 0.0, -0.049999999999999996, 90.0, 0.0, 0.0, 1.0) -HOMEPAGE_REGRIDDED = "https://daac.ornl.gov/SOILS/guides/HWSD.html" -HOMEPAGE_2 = "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11" -HOMEPAGE_1 = "https://www.fao.org/soils-portal/data-hub/soil-maps-and-databases/harmonized-world-soil-database-v12" -DOCUMENTATION = "http://daac.ornl.gov/daacdata/global_soil/HWSD/comp/HWSD1.2_documentation.pdf" +HOMEPAGE_REGRIDDED_URL = "https://daac.ornl.gov/SOILS/guides/HWSD.html" +HOMEPAGE_2_URL = "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11" +HOMEPAGE_1_URL = "https://www.fao.org/soils-portal/data-hub/soil-maps-and-databases/harmonized-world-soil-database-v12" +DOCUMENTATION_URL = "http://daac.ornl.gov/daacdata/global_soil/HWSD/comp/HWSD1.2_documentation.pdf" LICENSE = "proprietary" LICENSE_LINK = Link( @@ -81,7 +81,7 @@ CITATION = "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 ." DOI = "10.3334/ORNLDAAC/1247" -THUMBNAIL = "https://daac.ornl.gov/SOILS/guides/HWSD_Fig1.png" +THUMBNAIL_URL = "https://daac.ornl.gov/SOILS/guides/HWSD_Fig1.png" NO_DATA = -1 diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 05f6159..89ca405 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -41,12 +41,12 @@ ASSET_UNITS, CITATION, DESCRIPTION, - DOCUMENTATION, + DOCUMENTATION_URL, DOI, EPSG, - HOMEPAGE_1, - HOMEPAGE_2, - HOMEPAGE_REGRIDDED, + HOMEPAGE_1_URL, + HOMEPAGE_2_URL, + HOMEPAGE_REGRIDDED_URL, HWSD_CRS, ID, KEYWORDS, @@ -57,7 +57,7 @@ SHAPE, SPATIAL_EXTENT, TEMPORAL_EXTENT, - THUMBNAIL, + THUMBNAIL_URL, TITLE, TRANSFORM, ) @@ -104,12 +104,13 @@ def create_collection() -> Collection: ) collection.add_link(LICENSE_LINK) - collection.add_link(Link(RelType.VIA, target=HOMEPAGE_1, title="Homepage")) collection.add_link( - Link(RelType.VIA, target=HOMEPAGE_2, title="Homepage, Alternate")) + Link(RelType.VIA, target=HOMEPAGE_1_URL, title="Homepage")) + collection.add_link( + Link(RelType.VIA, target=HOMEPAGE_2_URL, title="Homepage, Alternate")) collection.add_link( Link(RelType.VIA, - target=HOMEPAGE_REGRIDDED, + target=HOMEPAGE_REGRIDDED_URL, title="Homepage, Regridded")) proj_ext = SummariesProjectionExtension(collection) @@ -123,9 +124,9 @@ def create_collection() -> Collection: "documentation", Asset( media_type="application/pdf", - roles=["metadata"], + roles=["documentation", "metadata"], title="Documentation", - href=DOCUMENTATION, + href=DOCUMENTATION_URL, )) collection.add_asset( @@ -134,7 +135,7 @@ def create_collection() -> Collection: media_type=MediaType.PNG, roles=["thumbnail"], title="Thumbnail", - href=THUMBNAIL, + href=THUMBNAIL_URL, )) item_assets_ext = ItemAssetsExtension.ext(collection, add_if_missing=True) @@ -198,12 +199,12 @@ def create_item( properties=properties, ) - item.add_link(Link(RelType.VIA, target=HOMEPAGE_1, title="Homepage")) + item.add_link(Link(RelType.VIA, target=HOMEPAGE_1_URL, title="Homepage")) item.add_link( - Link(RelType.VIA, target=HOMEPAGE_2, title="Homepage, Alternate")) + Link(RelType.VIA, target=HOMEPAGE_2_URL, title="Homepage, Alternate")) item.add_link( Link(RelType.VIA, - target=HOMEPAGE_REGRIDDED, + target=HOMEPAGE_REGRIDDED_URL, title="Homepage, Regridded")) sci_ext = ScientificExtension.ext(item, add_if_missing=True) @@ -223,7 +224,7 @@ def create_item( Asset(media_type="application/pdf", roles=["metadata"], title="HWSD Documentation", - href=DOCUMENTATION)) + href=DOCUMENTATION_URL)) asset_name = asset_name_from_href(cog_href) extra_fields = { From f17948768f70c3d263944a0257b50568b6c35e29 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:24:15 -0700 Subject: [PATCH 14/19] Fix Item ID, title and description --- src/stactools/hwsd/stac.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index 89ca405..c370cae 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -182,17 +182,17 @@ def create_item( else: cog_access_href = cog_href + asset_name = asset_name_from_href(cog_href) geometry = get_geometry() - properties = { - "title": TITLE, - "description": DESCRIPTION, + "title": asset_name, + "description": ASSET_DESCRIPTIONS[asset_name], "start_datetime": TEMPORAL_EXTENT[0], "end_datetime": TEMPORAL_EXTENT[1], } item = Item( - id=ID, + id=asset_name, geometry=geometry, bbox=SPATIAL_EXTENT, datetime=str_to_datetime(TEMPORAL_EXTENT[0]), @@ -226,7 +226,6 @@ def create_item( title="HWSD Documentation", href=DOCUMENTATION_URL)) - asset_name = asset_name_from_href(cog_href) extra_fields = { "units": ASSET_UNITS[asset_name], } From 2ed0007ac75c862244d9b4f7c3c9ca6e16c864e0 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:25:34 -0700 Subject: [PATCH 15/19] Add documentation role --- src/stactools/hwsd/stac.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index c370cae..d17ad36 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -154,7 +154,7 @@ def create_collection() -> Collection: "documentation": AssetDefinition({ "types": "application/pdf", - "roles": ["metadata"], + "roles": ["documentation", "metadata"], "title": "Documentation", }), } @@ -222,7 +222,7 @@ def create_item( item.add_asset( "documentation", Asset(media_type="application/pdf", - roles=["metadata"], + roles=["documentation", "metadata"], title="HWSD Documentation", href=DOCUMENTATION_URL)) From 9836bddeb814e0f6877eb17619581f0a2494172e Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 01:26:03 -0700 Subject: [PATCH 16/19] Only add the Label Extension to an Item if it's a classification --- src/stactools/hwsd/stac.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index d17ad36..e6b5480 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -255,18 +255,19 @@ def create_item( data_asset_proj_ext.transform = proj_ext.transform # Label Extension - item_label = LabelExtension.ext(item, add_if_missing=True) - item_label.label_type = LabelType.RASTER - item_label.label_tasks = [LabelTask.CLASSIFICATION] - item_label.label_properties = None - item_label.label_description = ASSET_DESCRIPTIONS[asset_name] - item_label.label_classes = [ - # TODO: The STAC Label extension JSON Schema is incorrect. - # https://github.com/stac-extensions/label/pull/8 - # https://github.com/stac-utils/pystac/issues/611 - # When it is fixed, this should be None, not the empty string. - LabelClasses.create(list(ASSET_LABELS[asset_name].values()), "") - ] + if asset_name in ASSET_LABELS: + item_label = LabelExtension.ext(item, add_if_missing=True) + item_label.label_type = LabelType.RASTER + item_label.label_tasks = [LabelTask.CLASSIFICATION] + item_label.label_properties = None + item_label.label_description = ASSET_DESCRIPTIONS[asset_name] + item_label.label_classes = [ + # TODO: The STAC Label extension JSON Schema is incorrect. + # https://github.com/stac-extensions/label/pull/8 + # https://github.com/stac-utils/pystac/issues/611 + # When it is fixed, this should be None, not the empty string. + LabelClasses.create(list(ASSET_LABELS[asset_name].values()), "") + ] # File Extension data_asset_file_ext = FileExtension.ext(data_asset, add_if_missing=True) From baae2a7ce0c835f05aa54ba3cd47aec1581bad78 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 02:12:22 -0700 Subject: [PATCH 17/19] Update mypy config. Fix tests. --- mypy.ini | 19 ++++++++++++++++++- src/stactools/hwsd/__init__.py | 3 ++- src/stactools/hwsd/commands.py | 16 +++++----------- src/stactools/hwsd/constants.py | 8 +++++--- src/stactools/hwsd/stac.py | 5 +++-- tests/__init__.py | 3 +++ tests/test_commands.py | 10 ++++++---- tests/test_stac.py | 24 +++++++++++++++++++++--- 8 files changed, 63 insertions(+), 25 deletions(-) diff --git a/mypy.ini b/mypy.ini index a7943b3..5d3a534 100644 --- a/mypy.ini +++ b/mypy.ini @@ -2,7 +2,24 @@ mypy_path = src explicit_package_bases = True namespace_packages = True -exclude = examples/ + +check_untyped_defs = True +disallow_any_generics = True +disallow_incomplete_defs = True +disallow_subclassing_any = True +disallow_untyped_calls = False +disallow_untyped_decorators = False +disallow_untyped_defs = True +no_implicit_optional = True +show_error_codes = True +strict_equality = True +warn_redundant_casts = True +warn_return_any = True +warn_unused_configs = True +warn_unused_ignores = True + +[mypy-fsspec.*] +ignore_missing_imports = True [mypy-shapely.*] ignore_missing_imports = True diff --git a/src/stactools/hwsd/__init__.py b/src/stactools/hwsd/__init__.py index ce25e65..6c20953 100644 --- a/src/stactools/hwsd/__init__.py +++ b/src/stactools/hwsd/__init__.py @@ -1,4 +1,5 @@ import stactools.core +from stactools.cli import Registry from stactools.hwsd.stac import create_collection, create_item @@ -7,7 +8,7 @@ stactools.core.use_fsspec() -def register_plugin(registry): +def register_plugin(registry: Registry) -> None: from stactools.hwsd import commands registry.register_subcommand(commands.create_hwsd_command) diff --git a/src/stactools/hwsd/commands.py b/src/stactools/hwsd/commands.py index 7e4d45e..0aebadf 100644 --- a/src/stactools/hwsd/commands.py +++ b/src/stactools/hwsd/commands.py @@ -9,13 +9,13 @@ logger = logging.getLogger(__name__) -def create_hwsd_command(cli): +def create_hwsd_command(cli: click.Group) -> click.Command: """Creates the stactools-hwsd command line utility.""" @cli.group( "hwsd", short_help=("Commands for working with stactools-hwsd"), ) - def hwsd(): + def hwsd() -> None: pass @hwsd.command( @@ -28,7 +28,7 @@ def hwsd(): required=True, help="The output location for the STAC Collection.", ) - def create_collection_command(destination: str): + def create_collection_command(destination: str) -> None: """Creates a STAC Collection Args: @@ -39,8 +39,6 @@ def create_collection_command(destination: str): collection.save(dest_href=destination) collection.validate() - return None - @hwsd.command("create-item", short_help="Create a STAC item") @click.option( "-s", @@ -54,7 +52,7 @@ def create_collection_command(destination: str): required=True, help="An HREF for the STAC Collection.", ) - def create_item_command(source: str, destination: str): + def create_item_command(source: str, destination: str) -> None: """Creates a STAC Item Args: @@ -65,8 +63,6 @@ def create_item_command(source: str, destination: str): item.save_object(dest_href=destination) item.validate() - return None - @hwsd.command( "populate-collection", short_help="Populate the HWSD STAC Collection with all items", @@ -83,7 +79,7 @@ def create_item_command(source: str, destination: str): required=True, help="The output directory for the populated STAC Collection.", ) - def populate_collection_command(source: str, destination: str): + def populate_collection_command(source: str, destination: str) -> None: """Populate the HWSD STAC Collection with all items Args: @@ -105,8 +101,6 @@ def populate_collection_command(source: str, destination: str): collection.save(dest_href=destination) collection.validate() - return None - @hwsd.command( "create-cog", short_help="Transform NetCDF to Cloud-Optimized Geotiff.", diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index bb19779..01bf059 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -13,9 +13,11 @@ TEMPORAL_EXTENT = ["2000-01-01T00:00:00Z", "2000-12-31T23:59:59Z"] TITLE = "Harmonized World Soil Database" DESCRIPTION = "This data set describes select global soil parameters from the Harmonized World Soil Database (HWSD) v1.2, including additional calculated parameters such as area weighted soil organic carbon (kg C per m2), as high resolution NetCDF files. These data were regridded and upscaled from the Harmonized World Soil Database v1.2." -SHAPE = (7200, 3600) -TRANSFORM = (0.049999999999999996, 0.0, -180.0, 0.0, -0.049999999999999996, - 90.0, 0.0, 0.0, 1.0) +SHAPE = [7200, 3600] +TRANSFORM = [ + 0.049999999999999996, 0.0, -180.0, 0.0, -0.049999999999999996, 90.0, 0.0, + 0.0, 1.0 +] HOMEPAGE_REGRIDDED_URL = "https://daac.ornl.gov/SOILS/guides/HWSD.html" HOMEPAGE_2_URL = "http://webarchive.iiasa.ac.at/Research/LUC/External-World-soil-database/HTML/SoilQualityData.html?sb=11" diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index e6b5480..f00ae8c 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -65,7 +65,7 @@ logger = logging.getLogger(__name__) -def asset_name_from_href(href): +def asset_name_from_href(href: str) -> str: return os.path.basename(href).replace(".nc4", "").replace(".tif", "") @@ -273,10 +273,11 @@ def create_item( data_asset_file_ext = FileExtension.ext(data_asset, add_if_missing=True) if asset_name in ASSET_LABELS: # The following odd type annotation is needed - data_asset_file_ext.values = [{ + mapping: List[Any] = [{ "values": [value], "summary": summary, } for value, summary in ASSET_LABELS[asset_name].items()] + data_asset_file_ext.values = mapping with fsspec.open(cog_access_href) as file: size = file.size if size is not None: diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..5067606 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +from stactools.testing import TestData + +test_data = TestData(__file__) diff --git a/tests/test_commands.py b/tests/test_commands.py index 32b12ed..e8a24a9 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -1,5 +1,4 @@ import os.path -# from pathlib import Path from tempfile import TemporaryDirectory import pystac @@ -7,6 +6,7 @@ from stactools.hwsd.commands import create_hwsd_command from stactools.hwsd.constants import DOI, EPSG, ID, LICENSE +from tests import test_data class CommandsTest(CliTestCase): @@ -30,18 +30,20 @@ def test_create_collection(self): self.assertEqual(collection.id, ID) self.assertEqual(collection.license, LICENSE) self.assertEqual(collection.extra_fields["sci:doi"], DOI) - self.assertEqual(len(collection.extra_fields["item_assets"]), 28) + self.assertEqual(len(collection.extra_fields["item_assets"]), 2) collection.validate() def test_create_item(self): + test_path = test_data.get_path("data-files") with TemporaryDirectory() as tmp_dir: + destination = os.path.join(tmp_dir, "item.json") result = self.run_command([ "hwsd", "create-item", "-s", - "AWC_CLASS.tif", + os.path.join(test_path, 'AWC_CLASS.tif'), "-d", destination, ]) @@ -53,7 +55,7 @@ def test_create_item(self): self.assertEqual(len(jsons), 1) item = pystac.read_file(destination) - self.assertEqual(item.id, ID) + self.assertEqual(item.id, "AWC_CLASS") self.assertEqual(item.properties["sci:doi"], DOI) self.assertEqual(item.properties["proj:epsg"], EPSG) self.assertEqual(len(item.assets), 2) diff --git a/tests/test_stac.py b/tests/test_stac.py index f92d5ae..f42255f 100644 --- a/tests/test_stac.py +++ b/tests/test_stac.py @@ -1,7 +1,9 @@ +import os import unittest from stactools.hwsd import stac from stactools.hwsd.constants import DOI, EPSG, ID, LICENSE +from tests import test_data class StacTest(unittest.TestCase): @@ -12,14 +14,30 @@ def test_create_collection(self): self.assertEqual(collection.id, ID) self.assertEqual(collection.license, LICENSE) self.assertEqual(collection.extra_fields["sci:doi"], DOI) - self.assertEqual(len(collection.extra_fields["item_assets"]), 28) + self.assertEqual(len(collection.extra_fields["item_assets"]), 2) collection.validate() + def test_create_classification_item(self): + test_path = test_data.get_path("data-files") + item = stac.create_item(os.path.join(test_path, "AWC_CLASS.tif")) + + self.assertEqual(item.id, "AWC_CLASS") + self.assertEqual(item.properties["sci:doi"], DOI) + self.assertEqual(item.properties["proj:epsg"], EPSG) + self.assertEqual(len(item.assets), 2) + # TODO: Check Label and File extensions + + # Validate + item.validate() + def test_create_item(self): - item = stac.create_item("path/to/files/AWC_CLASS.nc4") + test_path = test_data.get_path("data-files") + in_file = os.path.join(test_path, "T_GRAVEL.tif") + print(in_file) + item = stac.create_item(in_file) - self.assertEqual(item.id, ID) + self.assertEqual(item.id, "T_GRAVEL") self.assertEqual(item.properties["sci:doi"], DOI) self.assertEqual(item.properties["proj:epsg"], EPSG) self.assertEqual(len(item.assets), 2) From 5d2a36de7140c13d7d9f0780d0e17c6278e358ad Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 03:12:49 -0700 Subject: [PATCH 18/19] Add documentation_href param to create_item() --- src/stactools/hwsd/stac.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/stactools/hwsd/stac.py b/src/stactools/hwsd/stac.py index f00ae8c..b831e3c 100644 --- a/src/stactools/hwsd/stac.py +++ b/src/stactools/hwsd/stac.py @@ -165,6 +165,7 @@ def create_collection() -> Collection: def create_item( cog_href: str, cog_href_modifier: Optional[ReadHrefModifier] = None, + documentation_href: str = DOCUMENTATION_URL, ) -> Item: """Create a STAC Item Create a STAC Item for one year of the HWSD. The asset_href should include @@ -223,8 +224,8 @@ def create_item( "documentation", Asset(media_type="application/pdf", roles=["documentation", "metadata"], - title="HWSD Documentation", - href=DOCUMENTATION_URL)) + title="Documentation", + href=documentation_href)) extra_fields = { "units": ASSET_UNITS[asset_name], From 28307e4fa4a9db9412a34d05a66533e7e25a1947 Mon Sep 17 00:00:00 2001 From: Tyler Battle Date: Mon, 18 Oct 2021 03:25:44 -0700 Subject: [PATCH 19/19] Add "Global" keyword --- src/stactools/hwsd/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/stactools/hwsd/constants.py b/src/stactools/hwsd/constants.py index 01bf059..42a450b 100644 --- a/src/stactools/hwsd/constants.py +++ b/src/stactools/hwsd/constants.py @@ -78,6 +78,7 @@ "Soils", "Harmonized World Soil Database", "regridded", + "Global", ] CITATION = "Wieder, W.R., J. Boehnert, G.B. Bonan, and M. Langseth. 2014. Regridded Harmonized World Soil Database v1.2. Data set. Available on-line [http://daac.ornl.gov] from Oak Ridge National Laboratory Distributed Active Archive Center, Oak Ridge, Tennessee, USA. http://dx.doi.org/10.3334/ORNLDAAC/1247 ."