Skip to content

Commit

Permalink
fix multiindex unstack future warning (#427)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [ ] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes #xyz
- [ ] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [ ] This PR does not seem to break the templates.
- [x] CHANGELOG.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* explicitely assign the lat and lon coords of the lov multiindex.
* in response to: 
```
/tmp/ipykernel_94931/2504572846.py:29: FutureWarning: the `pandas.MultiIndex` object(s) passed as 'loc' coordinate(s) or data variable(s) will no longer be implicitly promoted and wrapped into multiple indexed coordinates in the future (i.e., one coordinate for each multi-index level + one dimension coordinate). If you want to keep this behavior, you need to first wrap it explicitly using `mindex_coords = xarray.Coordinates.from_pandas_multiindex(mindex_obj, 'dim')` and pass it as coordinates, e.g., `xarray.Dataset(coords=mindex_coords)`, `dataset.assign_coords(mindex_coords)` or `dataarray.assign_coords(mindex_coords)`.
```

### Does this PR introduce a breaking change?
no

### Other information:
  • Loading branch information
juliettelavoie authored Jul 9, 2024
2 parents d30d0e7 + 8ea1ab8 commit bba8c97
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Internal changes
* Include domain in `weight_location` in ``regrid_dataset``. (:pull:`414`).
* Added pins to `xarray`, `xclim`, `h5py`, and `netcdf4`. (:pull:`414`).
* Add ``.zip`` and ``.zarr.zip`` as possible file extensions for Zarr datasets. (:pull:`426`).
* Explicitly assign coords of multiindex in `xs.unstack_fill_nan`. (:pull:`427`).

v0.9.1 (2024-06-04)
-------------------
Expand Down
20 changes: 10 additions & 10 deletions src/xscen/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,11 +453,11 @@ def unstack_fill_nan(
if crd.dims == (dim,) and name in coords_and_dims
]
)
out = (
ds.drop_vars(dims)
.assign_coords({dim: pd.MultiIndex.from_arrays(crds, names=dims)})
.unstack(dim)
)

mindex_obj = pd.MultiIndex.from_arrays(crds, names=dims)
mindex_coords = xr.Coordinates.from_pandas_multiindex(mindex_obj, dim)

out = ds.drop_vars(dims).assign_coords(mindex_coords).unstack(dim)

# only reindex with the dims
out = out.reindex(**coords_and_dims)
Expand All @@ -476,11 +476,11 @@ def unstack_fill_nan(
]
)

out = (
ds.drop_vars(dims)
.assign_coords({dim: pd.MultiIndex.from_arrays(crds, names=dims)})
.unstack(dim)
)
# explicitly get lat and lon
mindex_obj = pd.MultiIndex.from_arrays(crds, names=dims)
mindex_coords = xr.Coordinates.from_pandas_multiindex(mindex_obj, dim)

out = ds.drop_vars(dims).assign_coords(mindex_coords).unstack(dim)

if not isinstance(coords, (list, tuple)) and coords is not None:
out = out.reindex(**coords.coords)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import numpy as np
import pandas as pd
import pytest
import xarray as xr
from xclim.testing.helpers import test_timeseries as timeseries

import xscen as xs
from xscen.testing import datablock_3d


class TestDateParser:
Expand Down Expand Up @@ -83,3 +85,60 @@ def test_get_cat_attrs(self, prefix, var_as_str):
}
elif prefix == "dog:":
assert out == {"source": "CanESM5"}


class TestStackNan:

def test_no_nan(self):
ds = datablock_3d(
np.zeros((20, 10, 10)),
"tas",
"lon",
-5,
"lat",
80.5,
1,
1,
"2000-01-01",
as_dataset=True,
)
mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time")
out = xs.utils.stack_drop_nans(ds, mask=mask)
assert "loc" in out.dims
assert out.sizes["loc"] == 100

ds_unstack = xs.utils.unstack_fill_nan(out)
assert ds_unstack.equals(ds)

def test_nan(self, tmp_path):
data = np.zeros((20, 10, 10))
data[:, 0, 0] = [np.nan] * 20
ds = datablock_3d(
data,
"tas",
"lon",
-5,
"lat",
80.5,
1,
1,
"2000-01-01",
as_dataset=True,
)

mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time")
ds.attrs["cat:domain"] = "RegionEssai"
out = xs.utils.stack_drop_nans(
ds,
mask=mask,
new_dim="loc1",
to_file=str(tmp_path / "coords_{domain}_{shape}.nc"),
)
assert "loc1" in out.dims
assert out.sizes["loc1"] == 99
assert (tmp_path / "coords_RegionEssai_10x10.nc").is_file()

ds_unstack = xs.utils.unstack_fill_nan(
out, dim="loc1", coords=str(tmp_path / "coords_{domain}_{shape}.nc")
)
assert ds_unstack.equals(ds)

0 comments on commit bba8c97

Please sign in to comment.