Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add coordinate disambiguation with cf_xarray #83

Merged
merged 6 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,089 changes: 1,070 additions & 19 deletions docs/examples/example_002_coord_aliases.ipynb

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions docs/faq.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Some common problems (and how to solve them)
============================================
FAQ
===

yt and xarray have many similarities in how they handle their Datasets, but
there are also many aspects that differ to varying degree. This page describes
some of the difficulties you may encounter while using yt_xarray to communicate
between the two.
between the two and how to solve those issues.

xarray datasets with a mix of dimensionality
********************************************
Expand All @@ -26,7 +26,9 @@ yt datasets have a fixed expectation for coordinate names. In cartesian, these
coordinate names are ``'x'``, ``'y'``, ``'z'`` while for geographic coordinate systems
the coordinate names are ``'latitude'``, ``'longtiude'`` and then either ``'altitude'``
or ``'depth'``. To work with xarray variables defined with coordinate names that
differ from these, yt_xarray provides some coordinate aliasing.
differ from these, yt_xarray provides some coordinate aliasing, which in part relies
on `cf_xarray <https://cf-xarray.readthedocs.io>`_ (if it is installed) for
additional conversion to standard names.

See :doc:`examples/example_002_coord_aliases` for an example.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies=['yt>=4.2.0', 'h5py>=3.4.0', 'pooch>=1.5.1', 'xarray']
"Bug Tracker" = "https://github.com/data-exp-lab/yt_xarray/issues"

[project.optional-dependencies]
full = ["netCDF4", "scipy", "dask[complete]"]
full = ["netCDF4", "scipy", "dask[complete]", "cf_xarray"]
test = ["pytest", "pytest-cov", "cartopy"]
docs = ["Sphinx==7.2.6", "jinja2==3.1.2", "nbsphinx==0.9.3"]

Expand Down
2 changes: 1 addition & 1 deletion yt_xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
# import the xarray accessor so it is registered with xarray

from .accessor import YtAccessor
from .accessor._xr_to_yt import known_coord_aliases
from .accessor._xr_to_yt import known_coord_aliases, reset_coordinate_aliases
from .yt_xarray import open_dataset
73 changes: 64 additions & 9 deletions yt_xarray/accessor/_xr_to_yt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections.abc
import enum
from collections import defaultdict
from typing import List, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -57,7 +58,10 @@ def __init__(
self.time_index_number: int = None
self._process_selection(xr_ds)

self.yt_coord_names = _convert_to_yt_internal_coords(self.selected_coords)
xr_field = xr_ds.data_vars[fields[0]]
self.yt_coord_names = _convert_to_yt_internal_coords(
self.selected_coords, xr_field
)

def _find_units(self, xr_ds) -> dict:
units = {}
Expand Down Expand Up @@ -332,10 +336,26 @@ def interp_validation(self, geometry):
}


known_coord_aliases = {}
_default_known_coord_aliases = {}
for ky, vals in _coord_aliases.items():
for val in vals:
known_coord_aliases[val] = ky
_default_known_coord_aliases[val] = ky

known_coord_aliases = _default_known_coord_aliases.copy()


def reset_coordinate_aliases():
kys_to_pop = [
ky
for ky in known_coord_aliases.keys()
if ky not in _default_known_coord_aliases
]
for ky in kys_to_pop:
known_coord_aliases.pop(ky)

for ky, val in _default_known_coord_aliases.items():
known_coord_aliases[ky] = val


_expected_yt_axes = {
"cartesian": set(["x", "y", "z"]),
Expand All @@ -351,20 +371,55 @@ def interp_validation(self, geometry):
_yt_coord_names += list(vals)


def _convert_to_yt_internal_coords(coord_list):
def _invert_cf_standard_names(standard_names: dict):
inverted_mapping = defaultdict(lambda: set())
for ky, vals in standard_names.items():
for val in vals:
inverted_mapping[val].add(ky)
return inverted_mapping


def _cf_xr_coord_disamb(
cname: str, xr_field: xr.DataArray
) -> Tuple[Optional[str], bool]:
# returns a tuple of (validated name, cf_xarray_is_installed)
try:
import cf_xarray as cfx # noqa: F401
except ImportError:
return None, False

nm_to_standard = _invert_cf_standard_names(xr_field.cf.standard_names)
if cname in nm_to_standard:
cf_standard_name = nm_to_standard[cname]
if len(cf_standard_name):
cf_standard_name = list(cf_standard_name)[0]
if cf_standard_name in known_coord_aliases:
return cf_standard_name, True
return None, True


def _convert_to_yt_internal_coords(coord_list: List[str], xr_field: xr.DataArray):
yt_coords = []
for c in coord_list:
cname = c.lower()
cf_xarray_exists = None
if cname in known_coord_aliases:
yt_coords.append(known_coord_aliases[cname])
valid_coord_name = known_coord_aliases[cname]
elif cname in _yt_coord_names:
yt_coords.append(cname)
valid_coord_name = cname
else:
raise ValueError(
valid_coord_name, cf_xarray_exists = _cf_xr_coord_disamb(cname, xr_field)
if valid_coord_name is None:
msg = (
f"{c} is not a known coordinate. To load in yt, you "
f"must supply an alias via the yt_xarray.known_coord_aliases"
f" dictionary."
"must supply an alias via the yt_xarray.known_coord_aliases"
" dictionary"
)
if cf_xarray_exists is False:
msg += " or install cf_xarray to check for additional aliases."
raise ValueError(msg)

yt_coords.append(valid_coord_name)

return yt_coords

Expand Down
39 changes: 39 additions & 0 deletions yt_xarray/tests/test_xr_to_yt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import builtins

import numpy as np
import pytest
import xarray as xr
Expand Down Expand Up @@ -535,6 +537,43 @@ def test_reversed_axis(stretched, use_callable, chunksizes):
assert np.all(np.isfinite(vals))


def test_cf_xarray_disambiguation():
from cf_xarray.datasets import airds

# run the whole selection (will internally run coord disambiguation)
sel = xr2yt.Selection(
airds, fields=["air"], sel_dict={"time": 0}, sel_dict_type="isel"
)
xr_da = airds.air
selected_names = []
for c in sel.selected_coords:
selected_names.append(xr2yt._cf_xr_coord_disamb(c, xr_da)[0])

assert "latitude" in selected_names
assert "longitude" in selected_names


def test_missing_cfxarray(monkeypatch):
from cf_xarray.datasets import airds

def _bad_import(name, globals=None, locals=None, fromlist=(), level=0):
raise ImportError

xr_da = airds.air
clist = list(xr_da.dims)
with monkeypatch.context() as m:
m.setattr(builtins, "__import__", _bad_import)
with pytest.raises(ValueError, match=f"{clist[0]} is not"):

_ = xr2yt._convert_to_yt_internal_coords(clist, xr_da)


def test_coord_alias_reset():
xr2yt.known_coord_aliases["blah"] = "lwkerj"
xr2yt.reset_coordinate_aliases()
assert "blah" not in xr2yt.known_coord_aliases


def test_reader_with_2d_space_time_and_reverse_axis():

# test for https://github.com/data-exp-lab/yt_xarray/issues/86
Expand Down
Loading