-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds open_datatree and load_datatree to the tutorial module #10082
base: main
Are you sure you want to change the base?
Changes from all commits
9a42379
0caf54a
ef4cbaa
e092710
37d5cc0
d15e1a9
aab8e1d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,10 @@ | |
import numpy as np | ||
|
||
from xarray.backends.api import open_dataset as _open_dataset | ||
from xarray.backends.api import open_datatree as _open_datatree | ||
from xarray.core.dataarray import DataArray | ||
from xarray.core.dataset import Dataset | ||
from xarray.core.datatree import DataTree | ||
|
||
if TYPE_CHECKING: | ||
from xarray.backends.api import T_Engine | ||
|
@@ -248,3 +250,140 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset: | |
ds.B.attrs["units"] = "Bunits" | ||
|
||
return ds | ||
|
||
|
||
def open_datatree( | ||
name: str, | ||
cache: bool = True, | ||
cache_dir: None | str | os.PathLike = None, | ||
*, | ||
engine: T_Engine = None, | ||
**kws, | ||
) -> DataTree: | ||
""" | ||
Open a dataset as a `DataTree` from the online repository (requires internet). | ||
|
||
If a local copy is found then always use that to avoid network traffic. | ||
|
||
Available datasets: | ||
|
||
* ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z | ||
* ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z | ||
* ``"air_temperature"``: NCEP reanalysis subset | ||
* ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients | ||
* ``"basin_mask"``: Dataset with ocean basins marked using integers | ||
* ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 | ||
* ``"rasm"``: Output of the Regional Arctic System Model (RASM) | ||
* ``"ROMS_example"``: Regional Ocean Model System (ROMS) output | ||
* ``"tiny"``: small synthetic dataset with a 1D data variable | ||
* ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK | ||
* ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data | ||
* ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages | ||
|
||
Parameters | ||
---------- | ||
name : str | ||
Name of the file containing the dataset. | ||
e.g. 'air_temperature' | ||
cache_dir : path-like, optional | ||
The directory in which to search for and write cached data. | ||
cache : bool, optional | ||
If True, then cache data locally for use on subsequent calls | ||
**kws : dict, optional | ||
Passed to xarray.open_dataset | ||
|
||
See Also | ||
-------- | ||
tutorial.load_datatree | ||
open_datatree | ||
""" | ||
try: | ||
import pooch | ||
except ImportError as e: | ||
raise ImportError( | ||
"tutorial.open_dataset depends on pooch to download and manage datasets." | ||
" To proceed please install pooch." | ||
) from e | ||
|
||
logger = pooch.get_logger() | ||
logger.setLevel("WARNING") | ||
|
||
cache_dir = _construct_cache_dir(cache_dir) | ||
if name in external_urls: | ||
url = external_urls[name] | ||
else: | ||
path = pathlib.Path(name) | ||
if not path.suffix: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, imerghh_730.HDF5 and imerghh_830.HDF5, works for both engines. I think if we wanted to add EDIT: |
||
# process the name | ||
default_extension = ".nc" | ||
if engine is None: | ||
_check_netcdf_engine_installed(name) | ||
path = path.with_suffix(default_extension) | ||
elif path.suffix == ".grib": | ||
if engine is None: | ||
engine = "cfgrib" | ||
try: | ||
import cfgrib # noqa: F401 | ||
except ImportError as e: | ||
raise ImportError( | ||
"Reading this tutorial dataset requires the cfgrib package." | ||
) from e | ||
|
||
url = f"{base_url}/raw/{version}/{path.name}" | ||
|
||
headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"} | ||
downloader = pooch.HTTPDownloader(headers=headers) | ||
|
||
# retrieve the file | ||
filepath = pooch.retrieve( | ||
url=url, known_hash=None, path=cache_dir, downloader=downloader | ||
) | ||
ds = _open_datatree(filepath, engine=engine, **kws) | ||
if not cache: | ||
ds = ds.load() | ||
pathlib.Path(filepath).unlink() | ||
|
||
return ds | ||
|
||
|
||
def load_datatree(*args, **kwargs) -> DataTree: | ||
""" | ||
Open, load into memory (as a `DataTree`), and close a dataset from the online repository | ||
(requires internet). | ||
|
||
If a local copy is found then always use that to avoid network traffic. | ||
|
||
Available datasets: | ||
|
||
* ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z | ||
* ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z | ||
* ``"air_temperature"``: NCEP reanalysis subset | ||
* ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients | ||
* ``"basin_mask"``: Dataset with ocean basins marked using integers | ||
* ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 | ||
* ``"rasm"``: Output of the Regional Arctic System Model (RASM) | ||
* ``"ROMS_example"``: Regional Ocean Model System (ROMS) output | ||
* ``"tiny"``: small synthetic dataset with a 1D data variable | ||
* ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK | ||
* ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data | ||
* ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages | ||
|
||
Parameters | ||
---------- | ||
name : str | ||
Name of the file containing the dataset. | ||
e.g. 'air_temperature' | ||
cache_dir : path-like, optional | ||
The directory in which to search for and write cached data. | ||
cache : bool, optional | ||
If True, then cache data locally for use on subsequent calls | ||
**kws : dict, optional | ||
Passed to xarray.open_datatree | ||
|
||
See Also | ||
-------- | ||
tutorial.open_datatree | ||
open_datatree | ||
""" | ||
with open_datatree(*args, **kwargs) as ds: | ||
return ds.load() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated this to use the
xarray.testing
module'sassert_identical
becausexarray.tests
didn't support DataTree objects.