Skip to content

Commit

Permalink
Add an internal function to load GMT remote datasets (GenericMappingT…
Browse files Browse the repository at this point in the history
…ools#2200)

Co-authored-by: Dongdong Tian <[email protected]>
Co-authored-by: Wei Ji <[email protected]>
  • Loading branch information
3 people authored and Josh Sixsmith committed Dec 21, 2022
1 parent 47e1d7f commit 7587a5c
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 108 deletions.
61 changes: 10 additions & 51 deletions pygmt/datasets/earth_age.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
The grids are available in various resolutions.
"""
from pygmt.exceptions import GMTInvalidInput
from pygmt.datasets.load_remote_dataset import _load_remote_dataset
from pygmt.helpers import kwargs_to_strings
from pygmt.io import load_dataarray
from pygmt.src import grdcut, which


@kwargs_to_strings(region="sequence")
Expand Down Expand Up @@ -60,52 +58,13 @@ def load_earth_age(resolution="01d", region=None, registration=None):
Earth seafloor crustal age with resolutions of 5 arc-minutes or higher,
which are stored as smaller tiles.
"""

# earth seafloor crust age data stored as single grids for low resolutions
non_tiled_resolutions = ["01d", "30m", "20m", "15m", "10m", "06m"]
# earth seafloor crust age data stored as tiles for high resolutions
tiled_resolutions = ["05m", "04m", "03m", "02m", "01m"]

if registration in ("pixel", "gridline", None):
# If None, let GMT decide on Pixel/Gridline type
reg = f"_{registration[0]}" if registration else ""
else:
raise GMTInvalidInput(
f"Invalid grid registration: '{registration}', should be either "
"'pixel', 'gridline' or None. Default is None, where a "
"pixel-registered grid is returned unless only the "
"gridline-registered grid is available."
)

if resolution not in non_tiled_resolutions + tiled_resolutions:
raise GMTInvalidInput(f"Invalid Earth age resolution '{resolution}'.")

# Choose earth age data prefix
earth_age_prefix = "earth_age_"

# different ways to load tiled and non-tiled earth age data
# Known issue: tiled grids don't support slice operation
# See https://github.com/GenericMappingTools/pygmt/issues/524
if region is None:
if resolution not in non_tiled_resolutions:
raise GMTInvalidInput(
f"'region' is required for Earth age resolution '{resolution}'."
)
fname = which(f"@earth_age_{resolution}{reg}", download="a")
grid = load_dataarray(fname, engine="netcdf4")
else:
grid = grdcut(f"@{earth_age_prefix}{resolution}{reg}", region=region)

# Add some metadata to the grid
grid.name = "seafloor_age"
grid.attrs["long_name"] = "age of seafloor crust"
grid.attrs["units"] = "Myr"
grid.attrs["vertical_datum"] = "EMG96"
grid.attrs["horizontal_datum"] = "WGS84"
# Remove the actual range because it gets outdated when indexing the grid,
# which causes problems when exporting it to netCDF for usage on the
# command-line.
grid.attrs.pop("actual_range")
for coord in grid.coords:
grid[coord].attrs.pop("actual_range")
dataset_prefix = "earth_age_"
dataset_name = "earth_age"
grid = _load_remote_dataset(
dataset_name=dataset_name,
dataset_prefix=dataset_prefix,
resolution=resolution,
region=region,
registration=registration,
)
return grid
69 changes: 12 additions & 57 deletions pygmt/datasets/earth_relief.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
"""
from packaging.version import Version
from pygmt.clib import Session
from pygmt.datasets.load_remote_dataset import _load_remote_dataset
from pygmt.exceptions import GMTInvalidInput, GMTVersionError
from pygmt.helpers import kwargs_to_strings
from pygmt.io import load_dataarray
from pygmt.src import grdcut, which


@kwargs_to_strings(region="sequence")
Expand Down Expand Up @@ -118,36 +117,9 @@ def load_earth_relief(
... use_srtm=True,
... )
"""
# pylint: disable=too-many-branches
# earth relief data stored as single grids for low resolutions
non_tiled_resolutions = ["01d", "30m", "20m", "15m", "10m", "06m"]
# earth relief data stored as tiles for high resolutions
tiled_resolutions = ["05m", "04m", "03m", "02m", "01m", "30s", "15s", "03s", "01s"]
# resolutions of original land-only SRTM tiles from NASA
land_only_srtm_resolutions = ["03s", "01s"]

if registration in ("pixel", "gridline", None):
# If None, let GMT decide on Pixel/Gridline type
reg = f"_{registration[0]}" if registration else ""
else:
raise GMTInvalidInput(
f"Invalid grid registration: '{registration}', should be either "
"'pixel', 'gridline' or None. Default is None, where a "
"pixel-registered grid is returned unless only the "
"gridline-registered grid is available."
)

if resolution not in non_tiled_resolutions + tiled_resolutions:
raise GMTInvalidInput(f"Invalid Earth relief resolution '{resolution}'.")

# Check combination of resolution and registration.
if (resolution == "15s" and registration == "gridline") or (
resolution in ("03s", "01s") and registration == "pixel"
):
raise GMTInvalidInput(
f"{registration}-registered Earth relief data for "
f"resolution '{resolution}' is not supported."
)
earth_relief_sources = {
"igpp": "earth_relief_",
"gebco": "earth_gebco_",
Expand All @@ -169,38 +141,21 @@ def load_earth_relief(
# Choose earth relief data prefix
if use_srtm and resolution in land_only_srtm_resolutions:
if data_source == "igpp":
earth_relief_prefix = "srtm_relief_"
dataset_prefix = "srtm_relief_"
else:
raise GMTInvalidInput(
f"The {data_source} option is not available if 'use_srtm=True'."
" Set data_source to 'igpp'."
)
else:
earth_relief_prefix = earth_relief_sources.get(data_source)

# different ways to load tiled and non-tiled earth relief data
# Known issue: tiled grids don't support slice operation
# See https://github.com/GenericMappingTools/pygmt/issues/524
if region is None:
if resolution not in non_tiled_resolutions:
raise GMTInvalidInput(
f"'region' is required for Earth relief resolution '{resolution}'."
)
fname = which(f"@{earth_relief_prefix}{resolution}{reg}", download="a")
grid = load_dataarray(fname, engine="netcdf4")
else:
grid = grdcut(f"@{earth_relief_prefix}{resolution}{reg}", region=region)

# Add some metadata to the grid
grid.name = "elevation"
grid.attrs["long_name"] = "elevation relative to the geoid"
grid.attrs["units"] = "meters"
grid.attrs["vertical_datum"] = "EMG96"
grid.attrs["horizontal_datum"] = "WGS84"
# Remove the actual range because it gets outdated when indexing the grid,
# which causes problems when exporting it to netCDF for usage on the
# command-line.
grid.attrs.pop("actual_range")
for coord in grid.coords:
grid[coord].attrs.pop("actual_range")
dataset_prefix = earth_relief_sources[data_source]

dataset_name = "earth_relief"
grid = _load_remote_dataset(
dataset_name=dataset_name,
dataset_prefix=dataset_prefix,
resolution=resolution,
region=region,
registration=registration,
)
return grid
204 changes: 204 additions & 0 deletions pygmt/datasets/load_remote_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""
Internal function to load GMT remote datasets.
"""
from typing import Dict, NamedTuple

from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import kwargs_to_strings
from pygmt.io import load_dataarray
from pygmt.src import grdcut, which


class Resolution(NamedTuple):
"""
The available grid registrations for a given resolution and whether it is a
tiled grid.
Attributes
----------
registrations : list
A list of the accepted registrations for a given resolution.
Can be either "pixel" or "gridline".
tiled : bool
States if the given resolution is tiled, which requires an
argument for ``region``."
"""

registrations: list
tiled: bool


class GMTRemoteDataset(NamedTuple):
"""
Standard information about a dataset and grid metadata.
Attributes
----------
title : str
The title of the dataset, used in error messages.
name : str
The name assigned as an attribute to the DataArray.
long_name : str
The long name assigned as an attribute to the DataArray.
units : str
The units of the values in the DataArray.
resolutions : dict
Dictionary of available resolution as keys and the values are
Resolution objects.
extra_attributes : dict
A dictionary of extra or unique attributes of the dataset.
"""

title: str
name: str
long_name: str
units: str
resolutions: Dict[str, Resolution]
extra_attributes: dict


datasets = {
"earth_relief": GMTRemoteDataset(
title="Earth relief",
name="elevation",
long_name="Earth elevation relative to the geoid",
units="meters",
extra_attributes={"vertical_datum": "EGM96", "horizontal_datum": "WGS84"},
resolutions={
"01d": Resolution(["pixel", "gridline"], False),
"30m": Resolution(["pixel", "gridline"], False),
"20m": Resolution(["pixel", "gridline"], False),
"15m": Resolution(["pixel", "gridline"], False),
"10m": Resolution(["pixel", "gridline"], False),
"06m": Resolution(["pixel", "gridline"], False),
"05m": Resolution(["pixel", "gridline"], True),
"04m": Resolution(["pixel", "gridline"], True),
"03m": Resolution(["pixel", "gridline"], True),
"02m": Resolution(["pixel", "gridline"], True),
"01m": Resolution(["pixel", "gridline"], True),
"30s": Resolution(["pixel", "gridline"], True),
"15s": Resolution(["pixel"], True),
"03s": Resolution(["gridline"], True),
"01s": Resolution(["gridline"], True),
},
),
"earth_age": GMTRemoteDataset(
title="seafloor age",
name="seafloor_age",
long_name="age of seafloor crust",
units="Myr",
extra_attributes={"horizontal_datum": "WGS84"},
resolutions={
"01d": Resolution(["pixel", "gridline"], False),
"30m": Resolution(["pixel", "gridline"], False),
"20m": Resolution(["pixel", "gridline"], False),
"15m": Resolution(["pixel", "gridline"], False),
"10m": Resolution(["pixel", "gridline"], False),
"06m": Resolution(["pixel", "gridline"], False),
"05m": Resolution(["pixel", "gridline"], True),
"04m": Resolution(["pixel", "gridline"], True),
"03m": Resolution(["pixel", "gridline"], True),
"02m": Resolution(["pixel", "gridline"], True),
"01m": Resolution(["gridline"], True),
},
),
}


@kwargs_to_strings(region="sequence")
def _load_remote_dataset(
dataset_name, dataset_prefix, resolution, region, registration
):
r"""
Load GMT remote datasets.
Parameters
----------
dataset_name : str
The name for the dataset in the 'datasets' dictionary.
dataset_prefix : str
The prefix for the dataset that will be passed to the GMT C API.
resolution : str
The grid resolution. The suffix ``d``, ``m``, and ``s`` stand for
arc-degree, arc-minute, and arc-second respectively.
region : str or list
The subregion of the grid to load, in the forms of a list
[*xmin*, *xmax*, *ymin*, *ymax*] or a string *xmin/xmax/ymin/ymax*.
Required for tiled grids.
registration : str
Grid registration type. Either ``"pixel"`` for pixel registration or
``"gridline"`` for gridline registration. Default is ``None``, where
a pixel-registered grid is returned unless only the
gridline-registered grid is available.
Returns
-------
grid : :class:`xarray.DataArray`
The GMT remote dataset grid.
Note
----
The returned :class:`xarray.DataArray` doesn't support slice operation for
tiled grids.
"""

if registration in ("pixel", "gridline", None):
# If None, let GMT decide on Pixel/Gridline type
reg = f"_{registration[0]}" if registration else ""
else:
raise GMTInvalidInput(
f"Invalid grid registration: '{registration}', should be either "
"'pixel', 'gridline' or None. Default is None, where a "
"pixel-registered grid is returned unless only the "
"gridline-registered grid is available."
)
dataset = datasets[dataset_name]
if resolution not in dataset.resolutions.keys():
raise GMTInvalidInput(f"Invalid resolution '{resolution}'.")
if registration and (
registration not in dataset.resolutions[resolution].registrations
):
raise GMTInvalidInput(
f"{registration} registration is not available for the "
f"{resolution} {dataset.title} dataset. Only "
f"{dataset.resolutions[resolution].registrations[0]}"
" registration is available."
)

# different ways to load tiled and non-tiled grids.
# Known issue: tiled grids don't support slice operation
# See https://github.com/GenericMappingTools/pygmt/issues/524
if region is None:
if dataset.resolutions[resolution].tiled:
raise GMTInvalidInput(
f"'region' is required for {dataset.title}"
f"resolution '{resolution}'."
)
fname = which(f"@{dataset_prefix}{resolution}{reg}", download="a")
grid = load_dataarray(fname, engine="netcdf4")
else:
grid = grdcut(f"@{dataset_prefix}{resolution}{reg}", region=region)

# Add some metadata to the grid
grid.name = dataset.name
grid.attrs["long_name"] = dataset.long_name
grid.attrs["units"] = dataset.units
for key, value in dataset.extra_attributes.items():
grid.attrs[key] = value
# Remove the actual range because it gets outdated when indexing the grid,
# which causes problems when exporting it to netCDF for usage on the
# command-line.
grid.attrs.pop("actual_range")
for coord in grid.coords:
grid[coord].attrs.pop("actual_range")
return grid
Loading

0 comments on commit 7587a5c

Please sign in to comment.