From 1a687ee85271e53cc99520e907e9223f2c7fabeb Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 26 Mar 2024 15:23:39 -0700 Subject: [PATCH 1/6] Add pyramid_create --- docs/generate-pyramids.md | 22 +++++++++++++ ndpyramid/coarsen.py | 68 ++++++++++++++++++++++++++++++++++++--- tests/test_pyramids.py | 28 +++++++++++++++- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/docs/generate-pyramids.md b/docs/generate-pyramids.md index 2e1ca0e..831a911 100644 --- a/docs/generate-pyramids.md +++ b/docs/generate-pyramids.md @@ -23,3 +23,25 @@ pyramid = pyramid_reproject(ds, levels=2) # write the pyramid to zarr pyramid.to_zarr('./path/to/write') ``` + +There's also `pyramid_create`--a more versatile alternative to pyramid_coarsen. + +This function accepts a custom function with the signature: `ds`, `factor`, `dims`. + +Here, the `sel_coarsen` function uses `ds.sel` to perform coarsening: + +```python +def sel_coarsen(ds, factor, dims, **kwargs): + return ds.sel(**{dim: slice(None, None, factor) for dim in dims}) + +factors = [4, 2, 1] +pyramid = pyramid_create( + temperature, + dims=('lat', 'lon'), + factors=factors, + boundary='trim', + func=sel_coarsen, + method_label=method_label, + type_label='pick', +) +``` diff --git a/ndpyramid/coarsen.py b/ndpyramid/coarsen.py index 5e941bf..280c8c6 100644 --- a/ndpyramid/coarsen.py +++ b/ndpyramid/coarsen.py @@ -1,5 +1,7 @@ from __future__ import annotations # noqa: F401 +from typing import Callable + import datatree as dt import xarray as xr @@ -23,6 +25,56 @@ def pyramid_coarsen( Additional keyword arguments to pass to xarray.Dataset.coarsen. """ + def coarsen(ds: xr.Dataset, factor: int, **kwargs): + # merge dictionary via union operator + kwargs |= {d: factor for d in dims} + return ds.coarsen(**kwargs).mean() # type: ignore + + return pyramid_create( + ds, + factors=factors, + dims=dims, + func=coarsen, + method_label="pyramid_coarsen", + type_label="reduce", + **kwargs, + ) + + +def pyramid_create( + ds: xr.Dataset, + *, + factors: list[int], + dims: list[str], + func: Callable, + type_label: str = "reduce", + method_label: str | None = None, + **kwargs, +): + """Create a multiscale pyramid via a given function applied to a dataset. + The generalized version of pyramid_coarsen. + + Parameters + ---------- + ds : xarray.Dataset + The dataset to apply the function to. + factors : list[int] + The factors to coarsen by. + dims : list[str] + The dimensions to coarsen. + func : Callable + The function to apply to the dataset; must accept the + `ds`, `factor`, and `dims` as positional arguments. + type_label : str, optional + The type label to use as metadata for the multiscales spec. + The default is 'reduce'. + method_label : str, optional + The method label to use as metadata for the multiscales spec. + The default is the name of the function. + kwargs : dict + Additional keyword arguments to pass to the func. + + """ # multiscales spec save_kwargs = locals() del save_kwargs['ds'] @@ -30,8 +82,8 @@ def pyramid_coarsen( attrs = { 'multiscales': multiscales_template( datasets=[{'path': str(i)} for i in range(len(factors))], - type='reduce', - method='pyramid_coarsen', + type=type_label, + method=method_label or func.__name__, version=get_version(), kwargs=save_kwargs, ) @@ -42,9 +94,15 @@ def pyramid_coarsen( # pyramid data for key, factor in enumerate(factors): - # merge dictionary via union operator - kwargs |= {d: factor for d in dims} - plevels[str(key)] = ds.coarsen(**kwargs).mean() # type: ignore + plevels[str(key)] = func(ds, factor, dims, **kwargs) plevels['/'] = xr.Dataset(attrs=attrs) return dt.DataTree.from_dict(plevels) + + + +# + +""" + +""" \ No newline at end of file diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index e5f60be..d3bca9a 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -3,7 +3,7 @@ import xarray as xr from zarr.storage import MemoryStore -from ndpyramid import pyramid_coarsen, pyramid_regrid, pyramid_reproject +from ndpyramid import pyramid_coarsen, pyramid_create, pyramid_regrid, pyramid_reproject from ndpyramid.regrid import generate_weights_pyramid, make_grid_ds @@ -21,6 +21,32 @@ def test_xarray_coarsened_pyramid(temperature, benchmark): ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) + assert pyramid.ds.attrs['multiscales'][0]['method'] == 'pyramid_coarsen' + assert pyramid.ds.attrs['multiscales'][0]['type'] == 'reduce' + pyramid.to_zarr(MemoryStore()) + + +@pytest.mark.parametrize("method_label", [None, "sel_coarsen"]) +def test_xarray_custom_coarsened_pyramid(temperature, benchmark, method_label): + def sel_coarsen(ds, factor, dims, **kwargs): + return ds.sel(**{dim: slice(None, None, factor) for dim in dims}) + + factors = [4, 2, 1] + pyramid = benchmark( + lambda: pyramid_create( + temperature, + dims=('lat', 'lon'), + factors=factors, + boundary='trim', + func=sel_coarsen, + method_label=method_label, + type_label='pick', + ) + ) + assert pyramid.ds.attrs['multiscales'] + assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) + assert pyramid.ds.attrs['multiscales'][0]['method'] == 'sel_coarsen' + assert pyramid.ds.attrs['multiscales'][0]['type'] == 'pick' pyramid.to_zarr(MemoryStore()) From 2ba5f0408d26bf342cf0bef640a29ddec84d22c2 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Tue, 26 Mar 2024 15:30:05 -0700 Subject: [PATCH 2/6] Add pyramid create --- ndpyramid/__init__.py | 1 + ndpyramid/coarsen.py | 75 ++---------------------------------------- ndpyramid/create.py | 67 +++++++++++++++++++++++++++++++++++++ tests/test_pyramids.py | 4 +-- 4 files changed, 73 insertions(+), 74 deletions(-) create mode 100644 ndpyramid/create.py diff --git a/ndpyramid/__init__.py b/ndpyramid/__init__.py index 23a2d95..9f126bd 100644 --- a/ndpyramid/__init__.py +++ b/ndpyramid/__init__.py @@ -1,5 +1,6 @@ # flake8: noqa +from .create import pyramid_create from .coarsen import pyramid_coarsen from .reproject import pyramid_reproject from .regrid import pyramid_regrid diff --git a/ndpyramid/coarsen.py b/ndpyramid/coarsen.py index 280c8c6..cc05023 100644 --- a/ndpyramid/coarsen.py +++ b/ndpyramid/coarsen.py @@ -1,11 +1,9 @@ from __future__ import annotations # noqa: F401 -from typing import Callable - import datatree as dt import xarray as xr -from .utils import get_version, multiscales_template +from .create import pyramid_create def pyramid_coarsen( @@ -35,74 +33,7 @@ def coarsen(ds: xr.Dataset, factor: int, **kwargs): factors=factors, dims=dims, func=coarsen, - method_label="pyramid_coarsen", - type_label="reduce", + method_label='pyramid_coarsen', + type_label='reduce', **kwargs, ) - - -def pyramid_create( - ds: xr.Dataset, - *, - factors: list[int], - dims: list[str], - func: Callable, - type_label: str = "reduce", - method_label: str | None = None, - **kwargs, -): - """Create a multiscale pyramid via a given function applied to a dataset. - The generalized version of pyramid_coarsen. - - Parameters - ---------- - ds : xarray.Dataset - The dataset to apply the function to. - factors : list[int] - The factors to coarsen by. - dims : list[str] - The dimensions to coarsen. - func : Callable - The function to apply to the dataset; must accept the - `ds`, `factor`, and `dims` as positional arguments. - type_label : str, optional - The type label to use as metadata for the multiscales spec. - The default is 'reduce'. - method_label : str, optional - The method label to use as metadata for the multiscales spec. - The default is the name of the function. - kwargs : dict - Additional keyword arguments to pass to the func. - - """ - # multiscales spec - save_kwargs = locals() - del save_kwargs['ds'] - - attrs = { - 'multiscales': multiscales_template( - datasets=[{'path': str(i)} for i in range(len(factors))], - type=type_label, - method=method_label or func.__name__, - version=get_version(), - kwargs=save_kwargs, - ) - } - - # set up pyramid - plevels = {} - - # pyramid data - for key, factor in enumerate(factors): - plevels[str(key)] = func(ds, factor, dims, **kwargs) - - plevels['/'] = xr.Dataset(attrs=attrs) - return dt.DataTree.from_dict(plevels) - - - -# - -""" - -""" \ No newline at end of file diff --git a/ndpyramid/create.py b/ndpyramid/create.py new file mode 100644 index 0000000..9f9d329 --- /dev/null +++ b/ndpyramid/create.py @@ -0,0 +1,67 @@ +from __future__ import annotations # noqa: F401 + +from typing import Callable + +import datatree as dt +import xarray as xr + +from .utils import get_version, multiscales_template + + +def pyramid_create( + ds: xr.Dataset, + *, + factors: list[int], + dims: list[str], + func: Callable, + type_label: str = 'reduce', + method_label: str | None = None, + **kwargs, +): + """Create a multiscale pyramid via a given function applied to a dataset. + The generalized version of pyramid_coarsen. + + Parameters + ---------- + ds : xarray.Dataset + The dataset to apply the function to. + factors : list[int] + The factors to coarsen by. + dims : list[str] + The dimensions to coarsen. + func : Callable + The function to apply to the dataset; must accept the + `ds`, `factor`, and `dims` as positional arguments. + type_label : str, optional + The type label to use as metadata for the multiscales spec. + The default is 'reduce'. + method_label : str, optional + The method label to use as metadata for the multiscales spec. + The default is the name of the function. + kwargs : dict + Additional keyword arguments to pass to the func. + + """ + # multiscales spec + save_kwargs = locals() + del save_kwargs['ds'] + + attrs = { + 'multiscales': multiscales_template( + datasets=[{'path': str(i)} for i in range(len(factors))], + type=type_label, + method=method_label or func.__name__, + version=get_version(), + kwargs=save_kwargs, + ) + } + + # set up pyramid + plevels = {} + + # pyramid data + for key, factor in enumerate(factors): + plevels[str(key)] = func(ds, factor, dims, **kwargs) + + plevels['/'] = xr.Dataset(attrs=attrs) + return dt.DataTree.from_dict(plevels) diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index d3bca9a..5f00d5f 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -26,7 +26,7 @@ def test_xarray_coarsened_pyramid(temperature, benchmark): pyramid.to_zarr(MemoryStore()) -@pytest.mark.parametrize("method_label", [None, "sel_coarsen"]) +@pytest.mark.parametrize('method_label', [None, 'sel_coarsen']) def test_xarray_custom_coarsened_pyramid(temperature, benchmark, method_label): def sel_coarsen(ds, factor, dims, **kwargs): return ds.sel(**{dim: slice(None, None, factor) for dim in dims}) @@ -45,7 +45,7 @@ def sel_coarsen(ds, factor, dims, **kwargs): ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) - assert pyramid.ds.attrs['multiscales'][0]['method'] == 'sel_coarsen' + assert pyramid.ds.attrs['multiscales'][0]['method'] == 'sel_coarsen' assert pyramid.ds.attrs['multiscales'][0]['type'] == 'pick' pyramid.to_zarr(MemoryStore()) From 220de520a5b6a4ef99016a9c606ec7888963c11a Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:50:10 -0700 Subject: [PATCH 3/6] Update docs/generate-pyramids.md --- docs/generate-pyramids.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/generate-pyramids.md b/docs/generate-pyramids.md index 831a911..d1c66d7 100644 --- a/docs/generate-pyramids.md +++ b/docs/generate-pyramids.md @@ -24,7 +24,7 @@ pyramid = pyramid_reproject(ds, levels=2) pyramid.to_zarr('./path/to/write') ``` -There's also `pyramid_create`--a more versatile alternative to pyramid_coarsen. +There's also `pyramid_create`--a more versatile alternative to `pyramid_coarsen`. This function accepts a custom function with the signature: `ds`, `factor`, `dims`. From ecdb722aaee3745938267a068fc59b37c6de1624 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 28 Mar 2024 14:20:35 -0700 Subject: [PATCH 4/6] Fix tests --- ndpyramid/coarsen.py | 2 +- ndpyramid/create.py | 3 +++ tests/test_pyramids.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ndpyramid/coarsen.py b/ndpyramid/coarsen.py index cc05023..5ef1076 100644 --- a/ndpyramid/coarsen.py +++ b/ndpyramid/coarsen.py @@ -23,7 +23,7 @@ def pyramid_coarsen( Additional keyword arguments to pass to xarray.Dataset.coarsen. """ - def coarsen(ds: xr.Dataset, factor: int, **kwargs): + def coarsen(ds: xr.Dataset, factor: int, dims: list[str], **kwargs): # merge dictionary via union operator kwargs |= {d: factor for d in dims} return ds.coarsen(**kwargs).mean() # type: ignore diff --git a/ndpyramid/create.py b/ndpyramid/create.py index 9f9d329..3bbeabb 100644 --- a/ndpyramid/create.py +++ b/ndpyramid/create.py @@ -45,6 +45,9 @@ def pyramid_create( # multiscales spec save_kwargs = locals() del save_kwargs['ds'] + del save_kwargs['func'] + del save_kwargs['type_label'] + del save_kwargs['method_label'] attrs = { 'multiscales': multiscales_template( diff --git a/tests/test_pyramids.py b/tests/test_pyramids.py index 5f00d5f..f38bc5f 100644 --- a/tests/test_pyramids.py +++ b/tests/test_pyramids.py @@ -21,7 +21,7 @@ def test_xarray_coarsened_pyramid(temperature, benchmark): ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) - assert pyramid.ds.attrs['multiscales'][0]['method'] == 'pyramid_coarsen' + assert pyramid.ds.attrs['multiscales'][0]['metadata']['method'] == 'pyramid_coarsen' assert pyramid.ds.attrs['multiscales'][0]['type'] == 'reduce' pyramid.to_zarr(MemoryStore()) @@ -45,7 +45,7 @@ def sel_coarsen(ds, factor, dims, **kwargs): ) assert pyramid.ds.attrs['multiscales'] assert len(pyramid.ds.attrs['multiscales'][0]['datasets']) == len(factors) - assert pyramid.ds.attrs['multiscales'][0]['method'] == 'sel_coarsen' + assert pyramid.ds.attrs['multiscales'][0]['metadata']['method'] == 'sel_coarsen' assert pyramid.ds.attrs['multiscales'][0]['type'] == 'pick' pyramid.to_zarr(MemoryStore()) From b5b2ee6ff492aa385ba89fe30948fd2ce36cf9d3 Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Thu, 4 Apr 2024 10:46:37 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> --- docs/generate-pyramids.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/generate-pyramids.md b/docs/generate-pyramids.md index d1c66d7..04464b9 100644 --- a/docs/generate-pyramids.md +++ b/docs/generate-pyramids.md @@ -24,13 +24,13 @@ pyramid = pyramid_reproject(ds, levels=2) pyramid.to_zarr('./path/to/write') ``` -There's also `pyramid_create`--a more versatile alternative to `pyramid_coarsen`. - +There's also `pyramid_create` -- a more versatile alternative to `pyramid_coarsen`. This function accepts a custom function with the signature: `ds`, `factor`, `dims`. - Here, the `sel_coarsen` function uses `ds.sel` to perform coarsening: ```python +from ndpyramid import pyramid_create + def sel_coarsen(ds, factor, dims, **kwargs): return ds.sel(**{dim: slice(None, None, factor) for dim in dims}) @@ -41,7 +41,7 @@ pyramid = pyramid_create( factors=factors, boundary='trim', func=sel_coarsen, - method_label=method_label, + method_label="slice_coarsen", type_label='pick', ) ``` From b869acd3356b1a653e24efe1c9deacd1fc3933f3 Mon Sep 17 00:00:00 2001 From: Andrew Huang Date: Thu, 4 Apr 2024 10:48:10 -0700 Subject: [PATCH 6/6] Add to api rst --- docs/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api.rst b/docs/api.rst index 723ec9a..81d9f83 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -11,5 +11,6 @@ Top level API :toctree: generated/ pyramid_coarsen + pyramid_create pyramid_reproject pyramid_regrid