Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite quantify and dequantify #17

Merged
merged 38 commits into from
Jul 17, 2020
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6209c98
add a function to extract / retrieve unit attributes
keewis Jul 11, 2020
0edc9fc
add a function to zip mappings
keewis Jul 11, 2020
05ff46a
partially rewrite Dataset.pint.quantify
keewis Jul 11, 2020
a831408
add tests for the unit attribute extraction function
keewis Jul 13, 2020
a4dd9e5
also make sure that deleting attributes works
keewis Jul 13, 2020
96d64c9
add a function to attach units as string attributes
keewis Jul 13, 2020
5cb7605
also extract unit attributes from variable objects
keewis Jul 13, 2020
33e6027
refactor the unit attr extraction tests
keewis Jul 13, 2020
0dbec68
allow using a different attr name
keewis Jul 13, 2020
cb2fa65
use the conversion functions to implement dequantify
keewis Jul 13, 2020
29add91
update the docs on the return value
keewis Jul 13, 2020
a8b14af
update the conditions for _decide_units
keewis Jul 13, 2020
1fde9a0
rewrite the Dataset quantify and dequantify methods
keewis Jul 13, 2020
6868442
raise on unit stripped warnings
keewis Jul 14, 2020
68df60c
remove the test checking that dequantify raises if there were no quan…
keewis Jul 14, 2020
ce40356
make sure Dataset.pint.quantify raises only if we would overwrite a q…
keewis Jul 14, 2020
c4dca20
don't try to put units in indexes
keewis Jul 14, 2020
61cc2e0
check the attributes after dequantify more thoroughly
keewis Jul 14, 2020
43e91f0
make sure the attributes are strings
keewis Jul 14, 2020
c1c5e52
move the str conversion of pint.Unit to a util function
keewis Jul 14, 2020
1ead700
update the dequantify docstrings
keewis Jul 14, 2020
a34c753
update the quantify docstrings
keewis Jul 14, 2020
66cfdbd
add tests for Dataset.pint.dequantify
keewis Jul 14, 2020
ee036b3
update the parameter spec of Dataset.pint.quantify
keewis Jul 14, 2020
75d26ad
remove the old attach and quantify functions
keewis Jul 14, 2020
72c6cb9
fix the template used to format accessor attributes
keewis Jul 14, 2020
bb38e64
remove the attrs on quantify
keewis Jul 14, 2020
fc5d615
make the description of the units kwargs easier to understand
keewis Jul 14, 2020
c4f74db
mention the format of the units
keewis Jul 14, 2020
4fdf8e8
Merge branch 'master' into quantify
keewis Jul 14, 2020
ac63a01
don't use rst references in the descriptions in docstrings
keewis Jul 14, 2020
7e16985
warn about loading data into memory while quantifying
keewis Jul 14, 2020
131403c
don't try to quantify indexes
keewis Jul 15, 2020
f764586
replace the delete parameter with a strip function
keewis Jul 15, 2020
e703909
update the docstrings
keewis Jul 15, 2020
f6e0882
more docstring updates
keewis Jul 15, 2020
9d59e6f
Merge branch 'master' into quantify
keewis Jul 17, 2020
08b2c1e
use K instead of degK
keewis Jul 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 155 additions & 75 deletions pint_xarray/accessors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# TODO is it possible to import pint-xarray from within xarray if pint is present?
import itertools

import numpy as np
import pint
from pint.quantity import Quantity
Expand Down Expand Up @@ -35,6 +37,40 @@ def is_dict_like(obj):
return hasattr(obj, "keys") and hasattr(obj, "__getitem__")


def zip_mappings(*mappings, fill_value=None):
""" zip mappings by combining values for common keys into a tuple

Works like itertools.zip_longest, so if a key is missing from a
mapping, it is replaced by ``fill_value``.

Parameters
----------
*mappings : dict-like
The mappings to zip
fill_value
The value to use if a key is missing from a mapping.

Returns
-------
zipped : dict-like
The zipped mapping
"""
keys = set(itertools.chain.from_iterable(mapping.keys() for mapping in mappings))

# TODO: could this be made more efficient using itertools.groupby?
zipped = {
key: tuple(mapping.get(key, fill_value) for mapping in mappings) for key in keys
}
return zipped


def units_to_str_or_none(mapping):
return {
key: str(value) if isinstance(value, Unit) else value
for key, value in mapping.items()
}


# based on xarray.core.utils.either_dict_or_kwargs
# https://github.com/pydata/xarray/blob/v0.15.1/xarray/core/utils.py#L249-L268
def either_dict_or_kwargs(positional, keywords, method_name):
Expand Down Expand Up @@ -106,11 +142,13 @@ def _get_registry(unit_registry, registry_kwargs):
return unit_registry


def _decide_units(units, registry, attrs):
if units is None:
def _decide_units(units, registry, unit_attribute):
if units is None and unit_attribute is None:
# or warn and return None?
raise ValueError("no units given")
elif units is None:
# TODO option to read and decode units according to CF conventions (see MetPy)?
attr_units = attrs["units"]
units = registry.parse_expression(attr_units)
units = registry.parse_expression(unit_attribute).units
elif isinstance(units, Unit):
# TODO do we have to check what happens if someone passes a Unit instance
# without creating a unit registry?
Expand Down Expand Up @@ -144,7 +182,9 @@ class PintDataArrayAccessor:
def __init__(self, da):
self.da = da

def quantify(self, units=None, unit_registry=None, registry_kwargs=None):
def quantify(
self, units=None, unit_registry=None, registry_kwargs=None, **unit_kwargs
):
"""
Attaches units to the DataArray.

Expand All @@ -156,75 +196,87 @@ def quantify(self, units=None, unit_registry=None, registry_kwargs=None):

Parameters
----------
units : pint.Unit or str, optional
Physical units to use for this DataArray. If not provided, will try
to read them from `DataArray.attrs['units']` using pint's parser.
unit_registry : `pint.UnitRegistry`, optional
units : pint.Unit or str or mapping of hashable to pint.Unit or str, optional
Physical units to use for this DataArray: . If not provided, will try
to read them from ``DataArray.attrs['units']`` using pint's parser.
unit_registry : pint.UnitRegistry, optional
Unit registry to be used for the units attached to this DataArray.
If not given then a default registry will be created.
registry_kwargs : dict, optional
Keyword arguments to be passed to `pint.UnitRegistry`.
**unit_kwargs
Keyword argument form of units.

Returns
-------
quantified - DataArray whose wrapped array data will now be a Quantity
array with the specified units.
quantified : DataArray
DataArray whose wrapped array data will now be a Quantity
array with the specified units.

Examples
--------
>>> da = xr.DataArray(
... data=[0.4, 0.9, 1.7, 4.8, 3.2, 9.1],
... dims="frequency",
... coords={"wavelength": [1e-4, 2e-4, 4e-4, 6e-4, 1e-3, 2e-3]},
... )
>>> da.pint.quantify(units='Hz')
<xarray.DataArray (frequency: 6)>
Quantity([ 0.4, 0.9, 1.7, 4.8, 3.2, 9.1], 'Hz')
Coordinates:
* wavelength (wavelength) np.array 1e-4, 2e-4, 4e-4, 6e-4, 1e-3, 2e-3
"""

# TODO should also quantify coordinates (once explicit indexes ready)

if isinstance(self.da.data, Quantity):
raise ValueError(
f"Cannot attach unit {units} to quantity: data "
f"already has units {self.da.data.units}"
)

registry = _get_registry(unit_registry, registry_kwargs)
if isinstance(units, (str, pint.Unit)):
if self.da.name in unit_kwargs:
raise ValueError(
f"ambiguous values given for {repr(self.da.name)}:"
f" {repr(units)} and {repr(unit_kwargs[self.da.name])}"
)
unit_kwargs[self.da.name] = units
units = None

units = _decide_units(units, registry, self.da.attrs)
units = either_dict_or_kwargs(units, unit_kwargs, ".quantify")

quantity = _array_attach_units(self.da.data, units, convert_from=None)
registry = _get_registry(unit_registry, registry_kwargs)

# TODO should we (temporarily) remove the attrs here so that they don't become inconsistent?
return DataArray(
dims=self.da.dims, data=quantity, coords=self.da.coords, attrs=self.da.attrs
)
unit_attrs = conversion.extract_unit_attributes(self.da, delete=False)

units = {
name: _decide_units(unit, registry, unit_attribute)
for name, (unit, unit_attribute) in zip_mappings(units, unit_attrs).items()
if unit is not None or unit_attribute is not None
}

return conversion.attach_units(self.da, units)

def dequantify(self):
"""
Removes units from the DataArray and it's coordinates.
Removes units from the DataArray and its coordinates.

Will replace `.attrs['units']` on each variable with a string
Will replace ``.attrs['units']`` on each variable with a string
representation of the `pint.Unit` instance.

Returns
-------
dequantified - DataArray whose array data is unitless, and of the type
that was previously wrapped by `pint.Quantity`.
dequantified : DataArray
DataArray whose array data is unitless, and of the type
that was previously wrapped by `pint.Quantity`.
"""

if not isinstance(self.da.data, Quantity):
raise ValueError(
"Cannot remove units from data that does not have" " units"
)

# TODO also dequantify coords (once explicit indexes ready)
da = DataArray(
dims=self.da.dims,
data=self.da.pint.magnitude,
coords=self.da.coords,
attrs=self.da.attrs,
units = units_to_str_or_none(conversion.extract_units(self.da))
new_obj = conversion.attach_unit_attributes(
conversion.strip_units(self.da), units,
)
da.attrs["units"] = str(self.da.data.units)
return da

return new_obj

@property
def magnitude(self):
Expand Down Expand Up @@ -400,69 +452,97 @@ class PintDatasetAccessor:
def __init__(self, ds):
self.ds = ds

def quantify(self, units=None, unit_registry=None, registry_kwargs=None):
def quantify(
self, units=None, unit_registry=None, registry_kwargs=None, **unit_kwargs
):
"""
Attaches units to each variable in the Dataset.

Units can be specified as a pint.Unit or as a string, which will
be parsed by the given unit registry. If no units are specified then
the units will be parsed from the `'units'` entry of the DataArray's
`.attrs`. Will raise a ValueError if any of the DataArrays already
contain a unit-aware array.
Units can be specified as a :py:class:`pint.Unit` or as a
string, which will be parsed by the given unit registry. If no
units are specified then the units will be parsed from the
``"units"`` entry of the Dataset variable's ``.attrs``. Will
raise a ValueError if any of the variables already contain a
unit-aware array.

Parameters
----------
units : mapping from variable names to pint.Unit or str, optional
Physical units to use for particular DataArrays in this Dataset. If
not provided, will try to read them from
`Dataset[var].attrs['units']` using pint's parser.
units : mapping of hashable to pint.Unit or str, optional
dcherian marked this conversation as resolved.
Show resolved Hide resolved
Physical units to use for particular DataArrays in this
Dataset. If not provided, will try to read them from
``Dataset[var].attrs['units']`` using pint's parser.
unit_registry : `pint.UnitRegistry`, optional
Unit registry to be used for the units attached to each DataArray
in this Dataset. If not given then a default registry will be
created.
registry_kwargs : dict, optional
Keyword arguments to be passed to `pint.UnitRegistry`.
**unit_kwargs
Keyword argument form of ``units``.

Returns
-------
quantified - Dataset whose variables will now contain Quantity
arrays with units.
"""
quantified : Dataset
Dataset whose variables will now contain Quantity arrays
with units.

for var in self.ds.data_vars:
if isinstance(self.ds[var].data, Quantity):
raise ValueError(
f"Cannot attach unit to quantity: data "
f"variable {var} already has units "
f"{self.ds[var].data.units}"
)
Examples
--------
>>> ds = xr.Dataset(
... {"a": ("x", [0, 3, 2], {"units": "m"}), "b": ("x", 5, -2, 1)},
... coords={"x": [0, 1, 2], "u": ("x", [-1, 0, 1], {"units": "s"})},
... )

>>> ds.pint.quantify()
<xarray.Dataset>
Dimensions: (x: 3)
Coordinates:
* x (x) int64 0 1 2
u (x) int64 <Quantity([-1 0 1], 'second')>
Data variables:
a (x) int64 <Quantity([0 3 2], 'meter')>
b (x) int64 5 -2 1
>>> ds.pint.quantify({"b": "dm"})
<xarray.Dataset>
Dimensions: (x: 3)
Coordinates:
* x (x) int64 0 1 2
u (x) int64 <Quantity([-1 0 1], 'second')>
Data variables:
a (x) int64 <Quantity([0 3 2], 'meter')>
b (x) int64 <Quantity([ 5 -2 1], 'decimeter')>
"""
units = either_dict_or_kwargs(units, unit_kwargs, ".quantify")
registry = _get_registry(unit_registry, registry_kwargs)

if units is None:
units = {name: None for name in self.ds}

# TODO should we (temporarily) remove the attrs here so that they don't become inconsistent?
unit_attrs = conversion.extract_unit_attributes(self.ds, delete=False)
units = {
name: _decide_units(units.get(name, None), registry, var.attrs)
for name, var in self.ds.data_vars.items()
name: _decide_units(unit, registry, attr)
for name, (unit, attr) in zip_mappings(units, unit_attrs).items()
if unit is not None or attr is not None
}

quantified_vars = {
name: _quantify_variable(var, units[name])
for name, var in self.ds.data_vars.items()
}

# TODO should also quantify coordinates (once explicit indexes ready)
# TODO should we (temporarily) remove the attrs here so that they don't become inconsistent?
return Dataset(
data_vars=quantified_vars, coords=self.ds.coords, attrs=self.ds.attrs
)
return conversion.attach_units(self.ds, units)

def dequantify(self):
dequantified_vars = {
name: da.pint.to_base_units() for name, da in self.ds.items()
}
return Dataset(dequantified_vars, coords=self.ds.coords, attrs=self.ds.attrs)
"""
Removes units from the Dataset and its coordinates.

Will replace ``.attrs['units']`` on each variable with a string
representation of the :py:class:`pint.Unit` instance.

Returns
-------
dequantified : Dataset
Dataset whose data variables are unitless, and of the type
that was previously wrapped by :py:class:`pint.Quantity`.
"""
units = units_to_str_or_none(conversion.extract_units(self.ds))
new_obj = conversion.attach_unit_attributes(
conversion.strip_units(self.ds), units
)
return new_obj

def to(self, units=None, **unit_kwargs):
""" convert the quantities in a DataArray
Expand Down
45 changes: 45 additions & 0 deletions pint_xarray/conversion.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import itertools

import pint
from xarray import DataArray, Dataset, Variable

Expand Down Expand Up @@ -130,6 +132,30 @@ def attach_units(obj, units, registry=None):
return new_obj


def attach_unit_attributes(obj, units, attr="units"):
new_obj = obj.copy()
if isinstance(obj, DataArray):
for name, var in itertools.chain([(obj.name, new_obj)], new_obj.coords.items()):
unit = units.get(name)
if unit is None:
continue

var.attrs[attr] = unit
elif isinstance(obj, Dataset):
for name, var in new_obj.variables.items():
unit = units.get(name)
if unit is None:
continue

var.attrs[attr] = unit
elif isinstance(obj, Variable):
new_obj.attrs[attr] = units.get(None)
else:
raise ValueError(f"cannot attach unit attributes to {obj!r}: unknown type")

return new_obj


def convert_units(obj, units):
if not isinstance(units, dict):
units = {None: units}
Expand Down Expand Up @@ -196,6 +222,25 @@ def extract_units(obj):
return units


def extract_unit_attributes(obj, delete=False, attr="units"):
method = dict.pop if delete else dict.get
if isinstance(obj, DataArray):
variables = itertools.chain([(obj.name, obj)], obj.coords.items())
units = {name: method(var.attrs, attr, None) for name, var in variables}
elif isinstance(obj, Dataset):
units = {
name: method(var.attrs, attr, None) for name, var in obj.variables.items()
}
elif isinstance(obj, Variable):
units = {None: method(obj.attrs, attr, None)}
else:
raise ValueError(
f"cannot retrieve unit attributes from unknown type: {type(obj)}"
)

return units


def strip_units(obj):
if isinstance(obj, Variable):
data = array_strip_units(obj.data)
Expand Down
Loading