Skip to content

Commit

Permalink
One Item per Asset. Convert assets to COGs.
Browse files Browse the repository at this point in the history
  • Loading branch information
cuttlefish committed Oct 17, 2021
1 parent c7bd8a9 commit 2229b94
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 74 deletions.
102 changes: 102 additions & 0 deletions src/stactools/hwsd/cog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import logging
import os
from glob import glob
from subprocess import CalledProcessError, check_output

# import rasterio
from stactools.hwsd.constants import DATA_TYPES, NO_DATA
from stactools.hwsd.stac import asset_name_from_href

logger = logging.getLogger(__name__)


def create_cogs(
input_directory: str,
output_directory: str,
) -> None:
"""Create COG from a NetCDF file
Args:
input_directory (str): The directory containing NetCDF files.
output_directory (str): The directory to which the COGs will be written.
Returns:
None
"""

for in_file in glob(f"{input_directory}/*.nc4"):
if os.path.basename(in_file) != "HWSD_SOIL_CLM_RES.nc4":
out_file = os.path.join(output_directory,
f"{asset_name_from_href(in_file)}.tif")
create_cog(in_file, out_file)


def create_cog(
input_path: str,
output_path: str,
) -> None:
"""Create COG from a NetCDF file
Args:
input_path (str): Path to a NetCDF file.
output_path (str): The path to which the COG will be written.
Returns:
None
"""

output = None
try:
logger.info("Converting NetCDF to COG")
logger.debug(f"input_path: {input_path}")
logger.debug(f"output_path: {output_path}")
cmd = [
"gdal_translate",
"-ot",
DATA_TYPES[asset_name_from_href(output_path)].value,
# "-strict",
# "-unscale",
# "-scale",
# "-1", "7", "-1", "7",
"-of",
"COG",
"-co",
"NUM_THREADS=ALL_CPUS",
"-co",
"BLOCKSIZE=512",
"-co",
"COMPRESS=DEFLATE",
"-co",
"LEVEL=9",
"-co",
"PREDICTOR=YES",
"-co",
"OVERVIEWS=IGNORE_EXISTING",
"-a_nodata",
str(NO_DATA),
input_path,
output_path,
]

try:
output = check_output(cmd)
except CalledProcessError as e:
output = e.output
raise
finally:
logger.info(f"output: {str(output)}")
# with rasterio.open(output_path, "r+") as dataset:
# # dataset.write_colormap(1, COLOUR_MAP)

# data = dataset.read(1)

# dt = rasterio.dtypes.get_minimum_dtype(data)
# print(dt)
# print(rasterio.dtypes.can_cast_dtype(data, "float32"))
# print(rasterio.dtypes.can_cast_dtype(data, "int16"))
# print(rasterio.dtypes.can_cast_dtype(data, "byte"))
# print(data)

except Exception:
logger.error("Failed to process {}".format(output_path))
raise
38 changes: 20 additions & 18 deletions src/stactools/hwsd/commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
import os
from glob import glob

import click
from stactools.core.utils.convert import cogify

from stactools.hwsd import stac
from stactools.hwsd import cog, stac

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,9 +89,16 @@ def populate_collection_command(source: str, destination: str):
"""

collection = stac.create_collection()
collection.normalize_hrefs(destination)
collection.save(dest_href=destination)

item = stac.create_item(source)
collection.add_item(item)
cog.create_cogs(source, destination)
for cog_file in glob(f"{destination}/*.tif"):
item = stac.create_item(cog_file)
collection.add_item(item)
item.set_self_href(cog_file.replace(".tif", ".json"))
item.make_asset_hrefs_relative()
item.save_object()

collection.normalize_hrefs(destination)
collection.save(dest_href=destination)
Expand All @@ -101,32 +108,27 @@ def populate_collection_command(source: str, destination: str):

@hwsd.command(
"create-cog",
short_help="Transform Geotiff to Cloud-Optimized Geotiff.",
short_help="Transform NetCDF to Cloud-Optimized Geotiff.",
)
@click.option("-d",
"--destination",
required=True,
help="The output directory for the COG")
@click.option("-s",
"--source",
required=True,
help="Path to an input GeoTiff")
help="The output directory for the COGs")
@click.option("-s", "--source", required=True, help="The input NetCDF fle")
def create_cog_command(destination: str, source: str) -> None:
"""Generate a COG from a GeoTiff. The COG will be saved in the desination
with `_cog.tif` appended to the name.
"""Generate a COG from a NetCDF.
Args:
destination (str): Local directory to save output COGs
source (str): A GeoTIFF
source (str): The input JNetCDF file
"""
if not os.path.isdir(destination):
raise IOError(f'Destination folder "{destination}" not found')

output_path = os.path.join(destination,
os.path.basename(source)[:-4] + "_cog.tif")

args = ["-co", "OVERVIEWS=IGNORE_EXISTING"]
output_path = os.path.join(
destination,
os.path.basename(source).replace(".nc4", "") + ".tif")

cogify(source, output_path, args)
cog.create_cog(source, output_path)

return hwsd
33 changes: 32 additions & 1 deletion src/stactools/hwsd/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pyproj import CRS
from pystac import Link, Provider, ProviderRole
from pystac.extensions.raster import DataType

ID = "hwsd"
EPSG = 4326
Expand Down Expand Up @@ -176,4 +177,34 @@
}
}

NODATA = -1
NO_DATA = -1

DATA_TYPES = {
"AWC_CLASS": DataType.INT16,
"ISSOIL": DataType.INT16,
"MU_GLOBAL": DataType.INT32,
"REF_DEPTH": DataType.INT16,
"ROOTS": DataType.INT16,
"T_BULK_DEN": DataType.FLOAT64,
"S_BULK_DEN": DataType.FLOAT64,
"T_REF_BULK": DataType.FLOAT64,
"S_REF_BULK": DataType.FLOAT64,
"T_CEC_CLAY": DataType.FLOAT64,
"S_CEC_CLAY": DataType.FLOAT64,
"T_CLAY": DataType.FLOAT64,
"S_CLAY": DataType.FLOAT64,
"T_GRAVEL": DataType.FLOAT64,
"S_GRAVEL": DataType.FLOAT64,
"T_SAND": DataType.FLOAT64,
"S_SAND": DataType.FLOAT64,
"T_SILT": DataType.FLOAT64,
"S_SILT": DataType.FLOAT64,
"T_PH_H2O": DataType.FLOAT64,
"S_PH_H2O": DataType.FLOAT64,
"T_C": DataType.FLOAT64,
"S_C": DataType.FLOAT64,
"T_OC": DataType.FLOAT64,
"S_OC": DataType.FLOAT64,
"AWT_S_SOC": DataType.FLOAT64,
"AWT_T_SOC": DataType.FLOAT64,
}
103 changes: 69 additions & 34 deletions src/stactools/hwsd/stac.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,60 @@
import logging
import os
from typing import Any, List

from pystac import (CatalogType, Collection, Extent, MediaType, SpatialExtent,
TemporalExtent)
from typing import Any, List, Optional

from pystac import (
CatalogType,
Collection,
Extent,
MediaType,
SpatialExtent,
TemporalExtent,
)
from pystac.asset import Asset
from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension
from pystac.extensions.projection import (ProjectionExtension,
SummariesProjectionExtension)
from pystac.extensions.raster import RasterBand, RasterExtension
from pystac.extensions.projection import (
ProjectionExtension,
SummariesProjectionExtension,
)
from pystac.extensions.raster import RasterBand, RasterExtension, Sampling
from pystac.extensions.scientific import ScientificExtension
from pystac.item import Item
from pystac.link import Link
from pystac.rel_type import RelType
from pystac.utils import str_to_datetime
from shapely.geometry.geo import box

from stactools.hwsd.constants import (ASSETS_METADATA, CITATION, DESCRIPTION,
DOCUMENTATION, DOI, EPSG, HOMEPAGE_1,
HOMEPAGE_2, HOMEPAGE_REGRIDDED, ID,
KEYWORDS, LICENSE, LICENSE_LINK, NODATA,
PROVIDERS, SPATIAL_EXTENT,
TEMPORAL_EXTENT, THUMBNAIL, TITLE)
from stactools.core.io import ReadHrefModifier

from stactools.hwsd.constants import (
ASSETS_METADATA,
CITATION,
DATA_TYPES,
DESCRIPTION,
DOCUMENTATION,
DOI,
EPSG,
HOMEPAGE_1,
HOMEPAGE_2,
HOMEPAGE_REGRIDDED,
ID,
KEYWORDS,
LICENSE,
LICENSE_LINK,
NO_DATA,
PROVIDERS,
SPATIAL_EXTENT,
TEMPORAL_EXTENT,
THUMBNAIL,
TITLE,
)

logger = logging.getLogger(__name__)


def asset_name_from_href(href):
return os.path.basename(href).replace(".nc4", "").replace(".tif", "")


def create_collection() -> Collection:
"""Create a STAC Collection
Create a STAC Collection for the HWSD.
Expand Down Expand Up @@ -105,7 +134,10 @@ def create_collection() -> Collection:
return collection


def create_item(assets_location: str) -> Item:
def create_item(
cog_href: str,
cog_href_modifier: Optional[ReadHrefModifier] = None,
) -> Item:
"""Create a STAC Item
Create a STAC Item for one year of the HWSD. The asset_href should include
the observation year as the first part of the filename.
Expand Down Expand Up @@ -158,24 +190,27 @@ def create_item(assets_location: str) -> Item:
title="HWSD Documentation",
href=DOCUMENTATION))

asset_names = list(ASSETS_METADATA["Description"].keys())
for asset_name in asset_names:
data_asset = Asset(
href=os.path.join(assets_location, f"{asset_name}.nc4"),
media_type=MediaType.COG,
roles=["data"],
title=asset_name,
description=ASSETS_METADATA["Description"][asset_name],
extra_fields={
"units": ASSETS_METADATA["Units"][asset_name],
"notes": ASSETS_METADATA["Notes"][asset_name],
})
item.add_asset(asset_name, data_asset)

# Include raster information
sampling: Any = "area"
rast_band = RasterBand.create(nodata=NODATA, sampling=sampling)
rast_ext = RasterExtension.ext(data_asset, add_if_missing=True)
rast_ext.bands = [rast_band]
asset_name = asset_name_from_href(cog_href)
data_asset = Asset(href=cog_href,
media_type=MediaType.COG,
roles=["data"],
title=asset_name,
description=ASSETS_METADATA["Description"][asset_name],
extra_fields={
"units": ASSETS_METADATA["Units"][asset_name],
"notes": ASSETS_METADATA["Notes"][asset_name],
})
item.add_asset("data", data_asset)

# Include raster information
rast_ext = RasterExtension.ext(data_asset, add_if_missing=True)
rast_ext.bands = [
RasterBand.create(
nodata=NO_DATA,
sampling=Sampling.AREA,
data_type=DATA_TYPES[asset_name],
# spatial_resolution=30,
)
]

return item
Loading

0 comments on commit 2229b94

Please sign in to comment.