pydata · eni-awowale · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/doc/api.rst b/doc/api.rst
@@ -1590,6 +1590,8 @@ Tutorial
 
    tutorial.open_dataset
    tutorial.load_dataset
+   tutorial.open_datatree
+   tutorial.load_datatree
 
 Testing
 =======

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,6 +21,8 @@ v2025.02.0 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Added :py:meth:`tutorial.open_datatree` and :py:meth:`tutorial.load_datatree`
+  By `Eni Awowale <https://github.com/eni-awowale>`_.
 - Added :py:meth:`Coordinates.from_xindex` as convenience for creating a new :py:class:`Coordinates` object
   directly from an existing Xarray index object if the latter supports it (:pull:`10000`)
   By `Benoit Bovy <https://github.com/benbovy>`_.

diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py
@@ -1,20 +1,15 @@
 from __future__ import annotations
 
-import pytest
-
-from xarray import DataArray, tutorial
-from xarray.tests import assert_identical, network
+from xarray import DataArray, DataTree, tutorial
+from xarray.testing import assert_identical
+from xarray.tests import network
 
 
 @network
 class TestLoadDataset:
-    @pytest.fixture(autouse=True)
-    def setUp(self):
-        self.testfile = "tiny"
-
     def test_download_from_github(self, tmp_path) -> None:
         cache_dir = tmp_path / tutorial._default_cache_dir_name
-        ds = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load()
+        ds = tutorial.open_dataset("tiny", cache_dir=cache_dir).load()
         tiny = DataArray(range(5), name="tiny").to_dataset()
         assert_identical(ds, tiny)
 
@@ -24,7 +19,27 @@ def test_download_from_github_load_without_cache(
         cache_dir = tmp_path / tutorial._default_cache_dir_name
 
         ds_nocache = tutorial.open_dataset(
-            self.testfile, cache=False, cache_dir=cache_dir
+            "tiny", cache=False, cache_dir=cache_dir
+        ).load()
+        ds_cache = tutorial.open_dataset("tiny", cache_dir=cache_dir).load()
+        assert_identical(ds_cache, ds_nocache)
+
+
+@network
+class TestLoadDataTree:
+    def test_download_from_github(self, tmp_path) -> None:
+        cache_dir = tmp_path / tutorial._default_cache_dir_name
+        ds = tutorial.open_datatree("tiny", cache_dir=cache_dir).load()
+        tiny = DataTree.from_dict({"/": DataArray(range(5), name="tiny").to_dataset()})
+        assert_identical(ds, tiny)
+
+    def test_download_from_github_load_without_cache(
+        self, tmp_path, monkeypatch
+    ) -> None:
+        cache_dir = tmp_path / tutorial._default_cache_dir_name
+
+        ds_nocache = tutorial.open_datatree(
+            "tiny", cache=False, cache_dir=cache_dir
         ).load()
-        ds_cache = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load()
+        ds_cache = tutorial.open_datatree("tiny", cache_dir=cache_dir).load()
         assert_identical(ds_cache, ds_nocache)
diff --git a/xarray/tutorial.py b/xarray/tutorial.py
@@ -16,8 +16,10 @@
 import numpy as np
 
 from xarray.backends.api import open_dataset as _open_dataset
+from xarray.backends.api import open_datatree as _open_datatree
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
+from xarray.core.datatree import DataTree
 
 if TYPE_CHECKING:
     from xarray.backends.api import T_Engine
@@ -248,3 +250,140 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset:
     ds.B.attrs["units"] = "Bunits"
 
     return ds
+
+
+def open_datatree(
+    name: str,
+    cache: bool = True,
+    cache_dir: None | str | os.PathLike = None,
+    *,
+    engine: T_Engine = None,
+    **kws,
+) -> DataTree:
+    """
+    Open a dataset as a `DataTree` from the online repository (requires internet).
+
+    If a local copy is found then always use that to avoid network traffic.
+
+    Available datasets:
+
+    * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z
+    * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z
+    * ``"air_temperature"``: NCEP reanalysis subset
+    * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients
+    * ``"basin_mask"``: Dataset with ocean basins marked using integers
+    * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1
+    * ``"rasm"``: Output of the Regional Arctic System Model (RASM)
+    * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
+    * ``"tiny"``: small synthetic dataset with a 1D data variable
+    * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
+    * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
+    * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages
+
+    Parameters
+    ----------
+    name : str
+        Name of the file containing the dataset.
+        e.g. 'air_temperature'
+    cache_dir : path-like, optional
+        The directory in which to search for and write cached data.
+    cache : bool, optional
+        If True, then cache data locally for use on subsequent calls
+    **kws : dict, optional
+        Passed to xarray.open_dataset
+
+    See Also
+    --------
+    tutorial.load_datatree
+    open_datatree
+    """
+    try:
+        import pooch
+    except ImportError as e:
+        raise ImportError(
+            "tutorial.open_dataset depends on pooch to download and manage datasets."
+            " To proceed please install pooch."
+        ) from e
+
+    logger = pooch.get_logger()
+    logger.setLevel("WARNING")
+
+    cache_dir = _construct_cache_dir(cache_dir)
+    if name in external_urls:
+        url = external_urls[name]
+    else:
+        path = pathlib.Path(name)
+        if not path.suffix:
+            # process the name
+            default_extension = ".nc"
+            if engine is None:
+                _check_netcdf_engine_installed(name)
+            path = path.with_suffix(default_extension)
+        elif path.suffix == ".grib":
+            if engine is None:
+                engine = "cfgrib"
+                try:
+                    import cfgrib  # noqa: F401
+                except ImportError as e:
+                    raise ImportError(
+                        "Reading this tutorial dataset requires the cfgrib package."
+                    ) from e
+
+        url = f"{base_url}/raw/{version}/{path.name}"
+
+    headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"}
+    downloader = pooch.HTTPDownloader(headers=headers)
+
+    # retrieve the file
+    filepath = pooch.retrieve(
+        url=url, known_hash=None, path=cache_dir, downloader=downloader
+    )
+    ds = _open_datatree(filepath, engine=engine, **kws)
+    if not cache:
+        ds = ds.load()
+        pathlib.Path(filepath).unlink()
+
+    return ds
+
+
+def load_datatree(*args, **kwargs) -> DataTree:
+    """
+    Open, load into memory (as a `DataTree`), and close a dataset from the online repository
+    (requires internet).
+
+    If a local copy is found then always use that to avoid network traffic.
+
+    Available datasets:
+
+    * ``"imerghh_730"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T07:30:00.000Z
+    * ``"imerghh_830"``: GPM IMERG Final Precipitation L3 Half Hourly 0.1 degree x 0.1 degree V07 from 2021-08-29T08:30:00.000Z
+    * ``"air_temperature"``: NCEP reanalysis subset
+    * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients
+    * ``"basin_mask"``: Dataset with ocean basins marked using integers
+    * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1
+    * ``"rasm"``: Output of the Regional Arctic System Model (RASM)
+    * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
+    * ``"tiny"``: small synthetic dataset with a 1D data variable
+    * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
+    * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
+    * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages
+
+    Parameters
+    ----------
+    name : str
+        Name of the file containing the dataset.
+        e.g. 'air_temperature'
+    cache_dir : path-like, optional
+        The directory in which to search for and write cached data.
+    cache : bool, optional
+        If True, then cache data locally for use on subsequent calls
+    **kws : dict, optional
+        Passed to xarray.open_datatree
+
+    See Also
+    --------
+    tutorial.open_datatree
+    open_datatree
+    """
+    with open_datatree(*args, **kwargs) as ds:
+        return ds.load()