Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean-up and implement the conversion methods #11

Merged
merged 33 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e9f4c91
add a function to attach units to xarray objects
keewis Jun 16, 2020
d047f0d
remove the old and commented out assert_equal_with_units function
keewis Jun 16, 2020
d2c4a04
add utility functions to attach, convert, extract, and strip units
keewis Jun 27, 2020
f8567c2
add the missing assert functions
keewis Jun 27, 2020
33ddd39
reorder the array functions
keewis Jun 27, 2020
b1d9268
implement a extract_units function
keewis Jun 28, 2020
00ea3fd
implement convert_units
keewis Jun 28, 2020
54f385b
implement attach_units
keewis Jun 28, 2020
3101db2
implement strip_units
keewis Jun 28, 2020
b37bdd4
isort and black
keewis Jun 28, 2020
013b056
implement DataArray.pint.to
keewis Jun 28, 2020
860aad2
implement Dataset.pint.to
keewis Jun 28, 2020
0f076a6
set units as optional
keewis Jun 28, 2020
5b1d334
remove a leftover comment about mypy
keewis Jun 29, 2020
e3d3e72
update the minimum required pint version
keewis Jul 1, 2020
6855b31
Merge branch 'master' into to
keewis Jul 8, 2020
3fa00aa
Merge branch 'master' into to
keewis Jul 8, 2020
5052f76
document Dataset.pint.to
keewis Jul 8, 2020
9a17b22
add docstrings to the conversion methods
keewis Jul 8, 2020
50a7c8e
don't require the variable names to be str
keewis Jul 8, 2020
cc999e4
don't try to always convert the DataArray's unit
keewis Jul 8, 2020
cf47518
add the example output
keewis Jul 8, 2020
e56aa30
make sure conversion of only the coords works
keewis Jul 8, 2020
e970515
don't raise if the default value for units is passed
keewis Jul 8, 2020
30b75bc
undo the test changes
keewis Jul 8, 2020
f2c17c3
properly separate the conversions
keewis Jul 8, 2020
2e07ad8
try separating the example descriptions
keewis Jul 8, 2020
1e20fda
properly refer to pint.Unit
keewis Jul 8, 2020
3077b60
show how to use a dict-like to convert the data
keewis Jul 8, 2020
c875299
fix the docstrings
keewis Jul 8, 2020
682ef84
add a comment on the source of the utility function
keewis Jul 9, 2020
426c607
link to the source and add xarray's license
keewis Jul 10, 2020
7ac1c56
add a copyright notice to the xarray license
keewis Jul 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 45 additions & 17 deletions pintxarray/accessors.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
# TODO is it possible to import pint-xarray from within xarray if pint is present?
import numpy as np
import pint
from pint.quantity import Quantity
from pint.unit import Unit
from xarray import (
register_dataarray_accessor,
register_dataset_accessor,
DataArray,
Dataset,
Variable,
register_dataarray_accessor,
register_dataset_accessor,
)
from xarray.core.npcompat import IS_NEP18_ACTIVE

import numpy as np

import pint
from pint.quantity import Quantity
from pint.unit import Unit

from . import conversion

if not hasattr(Quantity, "__array_function__"):
raise ImportError(
Expand All @@ -32,6 +31,26 @@
# TODO type hints


def is_dict_like(obj):
return hasattr(obj, "keys") and hasattr(obj, "__getitem__")


def either_dict_or_kwargs(positional, keywords, method_name):
if positional is not None:
if not is_dict_like(positional):
raise ValueError(
f"the first argument to .{method_name} must be a dictionary"
)
if keywords:
raise ValueError(
"cannot specify both keyword and positional "
f"arguments to .{method_name}"
)
return positional
else:
return keywords


def _array_attach_units(data, unit, convert_from=None):
"""
Internal utility function for attaching units to a numpy-like array,
Expand Down Expand Up @@ -233,15 +252,19 @@ def registry(self):
def registry(self, _):
raise AttributeError("Don't try to change the registry once created")

def to(self, units):
quantity = self.da.data.to(units)
return DataArray(
dim=self.da.dims,
data=quantity,
coords=self.da.coords,
attrs=self.da.attrs,
encoding=self.da.encoding,
)
def to(self, units=None, *, registry=None, **unit_kwargs):
if isinstance(units, (str, pint.Unit)):
unit_kwargs[self.da.name] = units
units = None
elif not is_dict_like(units):
raise ValueError(
"units must be either a string, a pint.Unit object or a dict-like,"
f" but got {units!r}"
)

units = either_dict_or_kwargs(units, unit_kwargs, "to")

return conversion.convert_units(self.da, units)

def to_base_units(self):
quantity = self.da.data.to_base_units()
Expand Down Expand Up @@ -349,6 +372,11 @@ def dequantify(self):
}
return Dataset(dequantified_vars, coords=self.ds.coords, attrs=self.ds.attrs)

def to(self, units=None, **unit_kwargs):
units = either_dict_or_kwargs(units, unit_kwargs, "to")

return conversion.convert_units(self.ds, units)

def to_base_units(self):
base_vars = {name: da.pint.to_base_units() for name, da in self.ds.items()}
return Dataset(base_vars, coords=self.ds.coords, attrs=self.ds.attrs)
Expand Down
221 changes: 221 additions & 0 deletions pintxarray/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import pint
from xarray import DataArray, Dataset, Variable


def array_attach_units(data, unit, registry=None):
""" attach a unit to the data

Parameters
----------
data : array-like
The data to attach units to.
unit : str or pint.Unit
The desired unit.
registry : pint.UnitRegistry, optional
The registry to use if ``unit`` is a string. Must not be
specified otherwise.

Returns
-------
quantity : pint.Quantity
"""

if unit is None:
return data

if not isinstance(unit, (pint.Unit, str)):
raise ValueError(f"cannot use {unit!r} as a unit")

if isinstance(data, pint.Quantity):
raise ValueError(
f"Cannot attach unit {unit!r} to quantity: data "
f"already has units {data.units}"
)

if registry is None:
if isinstance(unit, str):
raise ValueError(
"cannot use a string as unit without specifying a registry"
)

registry = unit._REGISTRY

return registry.Quantity(data, unit)


def array_convert_units(data, unit):
""" convert the units of an array

This is roughly the same as ``data.to(unit)``.

Parameters
----------
data : quantity or array-like
The data to convert. If it is not a quantity, it is assumed to be
dimensionless.
unit : str or pint.Unit
The unit to convert to. If a string ``data`` has to be a quantity.

Returns
-------
result : pint.Quantity
The converted data
"""
if unit is None:
return data

if not isinstance(unit, (str, pint.Unit)):
raise ValueError(f"cannot use {unit!r} as a unit")
elif isinstance(unit, str) and not isinstance(data, pint.Quantity):
raise ValueError(f"cannot convert a non-quantity using {unit!r} as unit")

registry = data._REGISTRY if isinstance(unit, str) else unit._REGISTRY

if not isinstance(data, pint.Quantity):
data = registry.Quantity(data, "dimensionless")

return data.to(unit)


def array_extract_units(data):
""" extract the units of an array

If ``data`` is not a quantity, the units are ``None``
"""
try:
return data.units
except AttributeError:
return None


def array_strip_units(data):
""" strip the units of a quantity """
try:
return data.magnitude
except AttributeError:
return data


def attach_units(obj, units, registry=None):
if isinstance(obj, DataArray):
old_name = obj.name
new_name = old_name if old_name is not None else "<this-array>"
ds = obj.rename(new_name).to_dataset()
units = units.copy()
units[new_name] = units.get(old_name)

new_ds = attach_units(ds, units, registry=registry)
new_obj = new_ds.get(new_name).rename(old_name)
elif isinstance(obj, Dataset):
data_vars = {
name: attach_units(
array.variable, {None: units.get(name)}, registry=registry
)
for name, array in obj.data_vars.items()
}
coords = {
name: attach_units(
array.variable, {None: units.get(name)}, registry=registry
)
for name, array in obj.coords.items()
}

new_obj = Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
elif isinstance(obj, Variable):
new_data = array_attach_units(obj.data, units.get(None), registry=registry)
new_obj = obj.copy(data=new_data)
else:
raise ValueError(f"cannot attach units to {obj!r}: unknown type")

return new_obj


def convert_units(obj, units):
if not isinstance(units, dict):
units = {None: units}

if isinstance(obj, Variable):
new_data = array_convert_units(obj.data, units.get(None))
new_obj = obj.copy(data=new_data)
elif isinstance(obj, DataArray):
original_name = obj.name
name = obj.name if obj.name is not None else "<this-array>"

units_ = units.copy()
units_[name] = units_[obj.name]

ds = obj.rename(name).to_dataset()
converted = convert_units(ds, units_)

new_obj = converted[name].rename(original_name)
elif isinstance(obj, Dataset):
coords = {
name: convert_units(data.variable, units.get(name))
if name not in obj.dims
else data
for name, data in obj.coords.items()
}
data_vars = {
name: convert_units(data.variable, units.get(name))
for name, data in obj.items()
}

new_obj = Dataset(coords=coords, data_vars=data_vars, attrs=obj.attrs)
else:
raise ValueError("cannot convert non-xarray objects")

return new_obj


def extract_units(obj):
if isinstance(obj, Dataset):
vars_units = {
name: array_extract_units(value.data)
for name, value in obj.data_vars.items()
}
coords_units = {
name: array_extract_units(value.data) for name, value in obj.coords.items()
}

units = {**vars_units, **coords_units}
elif isinstance(obj, DataArray):
vars_units = {obj.name: array_extract_units(obj.data)}
coords_units = {
name: array_extract_units(value.data) for name, value in obj.coords.items()
}

units = {**vars_units, **coords_units}
elif isinstance(obj, Variable):
vars_units = {None: array_extract_units(obj.data)}

units = {**vars_units}
else:
raise ValueError(f"unknown type: {type(obj)}")

return units


def strip_units(obj):
if isinstance(obj, Variable):
data = array_strip_units(obj.data)
new_obj = obj.copy(data=data)
elif isinstance(obj, DataArray):
original_name = obj.name
name = obj.name if obj.name is not None else "<this-array>"
ds = obj.rename(name).to_dataset()
stripped = strip_units(ds)

new_obj = stripped[name].rename(original_name)
elif isinstance(obj, Dataset):
data_vars = {
name: strip_units(array.variable) for name, array in obj.data_vars.items()
}
coords = {
name: strip_units(array.variable) for name, array in obj.coords.items()
}

new_obj = Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs)
else:
raise ValueError("cannot strip units from {obj!r}: unknown type")

return new_obj
15 changes: 4 additions & 11 deletions pintxarray/tests/test_accessors.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
import numpy as np
import pytest

import xarray as xr
from xarray.testing import assert_equal

import numpy as np
from numpy.testing import assert_array_equal

from pint import UnitRegistry
from pint.errors import UndefinedUnitError
from xarray.testing import assert_equal

# from pint.unit import Unit
from pint.errors import UndefinedUnitError # , DimensionalityError

# from pintxarray.accessors import PintDataArrayAccessor, PintDatasetAccessor
from .utils import raises_regex # extract_units

from .utils import raises_regex

# make sure scalars are converted to 0d arrays so quantities can
# always be treated like ndarrays
Expand Down
Loading