Skip to content

Commit

Permalink
fix docs?
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Apr 18, 2024
1 parent 9736707 commit 32e70d4
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 35 deletions.
36 changes: 21 additions & 15 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,21 @@ DataArray
DataArrayGroupBy.dims
DataArrayGroupBy.groups

Grouper Objects
---------------

.. currentmodule:: xarray.core

.. autosummary::
:toctree: generated/

groupers.Grouper
groupers.Resampler
groupers.BinGrouper
groupers.UniqueGrouper
groupers.TimeResampler
groupers.EncodedGroups


Rolling objects
===============
Expand Down Expand Up @@ -1026,29 +1041,20 @@ DataArray
Accessors
=========

.. currentmodule:: xarray
.. currentmodule:: xarray.core

.. autosummary::
:toctree: generated/

core.accessor_dt.DatetimeAccessor
core.accessor_dt.TimedeltaAccessor
core.accessor_str.StringAccessor
accessor_dt.DatetimeAccessor
accessor_dt.TimedeltaAccessor
accessor_str.StringAccessor

Grouper Objects
===============
.. autosummary::
:toctree: generated/

groupers.Grouper
groupers.Resampler
groupers.BinGrouper
groupers.UniqueGrouper
groupers.TimeResampler
groupers.EncodedGroups

Custom Indexes
==============
.. currentmodule:: xarray

.. autosummary::
:toctree: generated/

Expand Down
85 changes: 65 additions & 20 deletions xarray/core/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,20 @@
class EncodedGroups:
"""
Dataclass for storing intermediate values for GroupBy operation.
Returned by factorize method on Grouper objects.
Returned by the ``factorize`` method on Grouper objects.
Parameters
----------
codes: integer codes for each group
full_index: pandas Index for the group coordinate
group_indices: optional, List of indices of array elements belonging
to each group. Inferred if not provided.
unique_coord: Unique group values present in dataset. Inferred if not provided
codes: DataArray
Same shape as the DataArray to group by. Values consist of a unique integer code for each group.
full_index: pd.Index
Pandas Index for the group coordinate containing unique group labels.
This can differ from ``unique_coord`` in the case of resampling and binning,
where certain groups in the output need not be present in the input.
group_indices: list of int or slice or list of int, optional
List of indices of array elements belonging to each group. Inferred if not provided.
unique_coord: Variable, optional
Unique group values present in dataset. Inferred if not provided
"""

codes: DataArray
Expand All @@ -69,18 +74,18 @@ def can_squeeze(self) -> bool:
return False

@abstractmethod
def factorize(self, group) -> EncodedGroups:
def factorize(self, group: T_Group) -> EncodedGroups:
"""
Takes the group, and creates intermediates necessary for GroupBy.
These intermediates are
1. codes - Same shape as `group` containing a unique integer code for each group.
2. group_indices - Indexes that let us index out the members of each group.
3. unique_coord - Unique groups present in the dataset.
4. full_index - Unique groups in the output. This differs from `unique_coord` in the
case of resampling and binning, where certain groups in the output are not present in
the input.
Returns an instance of EncodedGroups.
Parameters
----------
group: DataArray
DataArray we are grouping by.
Returns
-------
EncodedGroups
"""
pass

Expand Down Expand Up @@ -108,6 +113,7 @@ def is_unique_and_monotonic(self) -> bool:

@property
def group_as_index(self) -> pd.Index:
"""Caches the group DataArray as a pandas Index."""
if self._group_as_index is None:
self._group_as_index = self.group.to_index()
return self._group_as_index
Expand All @@ -118,7 +124,7 @@ def can_squeeze(self) -> bool:
is_dimension = self.group.dims == (self.group.name,)
return is_dimension and self.is_unique_and_monotonic

def factorize(self, group1d) -> EncodedGroups:
def factorize(self, group1d: T_Group) -> EncodedGroups:
self.group = group1d

if self.can_squeeze:
Expand Down Expand Up @@ -180,7 +186,7 @@ def __post_init__(self) -> None:
if duck_array_ops.isnull(self.bins).all():
raise ValueError("All bin edges are NaN.")

def factorize(self, group) -> EncodedGroups:
def factorize(self, group: T_Group) -> EncodedGroups:
from xarray.core.dataarray import DataArray

data = group.data
Expand Down Expand Up @@ -210,7 +216,46 @@ def factorize(self, group) -> EncodedGroups:

@dataclass
class TimeResampler(Resampler):
"""Grouper object specialized to resampling the time coordinate."""
"""
Grouper object specialized to resampling the time coordinate.
Parameters
----------
closed : {"left", "right"}, optional
Side of each interval to treat as closed.
label : {"left", "right"}, optional
Side of each interval to use for labeling.
base : int, optional
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
If a datetime is not used, these values are also supported:
- 'epoch': `origin` is 1970-01-01
- 'start': `origin` is the first value of the timeseries
- 'start_day': `origin` is the first day at midnight of the timeseries
- 'end': `origin` is the last value of the timeseries
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
"""

freq: str
closed: SideOptions | None = field(default=None)
Expand Down Expand Up @@ -310,7 +355,7 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
_apply_loffset(self.loffset, first_items)
return first_items, codes

def factorize(self, group) -> EncodedGroups:
def factorize(self, group: T_Group) -> EncodedGroups:
self._init_properties(group)
full_index, first_items, codes_ = self._get_index_and_items()
sbins = first_items.values.astype(np.int64)
Expand Down

0 comments on commit 32e70d4

Please sign in to comment.