Skip to content

Commit

Permalink
Normalise units of coordinate bounds (#5746)
Browse files Browse the repository at this point in the history
* Normalise coordinate bounds units

* fix mock tests

* add normalise unit tests

* extend build aux/dim coords unit tests with normalisation

* support build aux coord with rollaxis

* add whatsnew entry

* review actions
  • Loading branch information
bjlittle authored Feb 16, 2024
1 parent fd9b8cb commit 63b44f0
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 66 deletions.
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ This document explains the changes made to Iris for this release
:mod:`iris.fileformats._nc_load_rules.helpers` to lessen warning duplication.
(:issue:`5536`, :pull:`5685`)

#. `@bjlittle`_ fixed coordinate construction in the NetCDF loading pipeline to
ensure that bounds have the same units as the associated points.
(:issue:`1801`, :pull:`5746`)


💣 Incompatible Changes
=======================
Expand Down
70 changes: 67 additions & 3 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
build routines, and which it does not use.
"""
from __future__ import annotations

import re
from typing import List
from typing import TYPE_CHECKING, List, Optional
import warnings

import cf_units
Expand All @@ -35,6 +37,12 @@
import iris.std_names
import iris.util

if TYPE_CHECKING:
from numpy.ma import MaskedArray
from numpy.typing import ArrayLike

from iris.fileformats.cf import CFBoundaryVariable

# TODO: should un-addable coords / cell measures / etcetera be skipped? iris#5068.

#
Expand Down Expand Up @@ -896,8 +904,9 @@ def get_attr_units(cf_var, attributes):
cf_units.as_unit(attr_units)
except ValueError:
# Using converted unicode message. Can be reverted with Python 3.
msg = "Ignoring netCDF variable {!r} invalid units {!r}".format(
cf_var.cf_name, attr_units
msg = (
f"Ignoring invalid units {attr_units!r} on netCDF variable "
f"{cf_var.cf_name!r}."
)
warnings.warn(
msg,
Expand Down Expand Up @@ -1024,6 +1033,57 @@ def reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var):
return bounds_data


################################################################################
def _normalise_bounds_units(
points_units: str, cf_bounds_var: CFBoundaryVariable, bounds_data: ArrayLike
) -> Optional[MaskedArray]:
"""Ensure bounds have units compatible with points.
If required, the `bounds_data` will be converted to the `points_units`.
If the bounds units are not convertible, a warning will be issued and
the `bounds_data` will be ignored.
Bounds with invalid units will be gracefully left unconverted and passed through.
Parameters
----------
points_units : str
The units of the coordinate points.
cf_bounds_var : CFBoundaryVariable
The serialized NetCDF bounds variable.
bounds_data : MaskedArray
The pre-processed data of the bounds variable.
Returns
-------
MaskedArray or None
The bounds data with the same units as the points, or ``None``
if the bounds units are not convertible to the points units.
"""
bounds_units = get_attr_units(cf_bounds_var, {})

if bounds_units != UNKNOWN_UNIT_STRING:
points_units = cf_units.Unit(points_units)
bounds_units = cf_units.Unit(bounds_units)

if bounds_units != points_units:
if bounds_units.is_convertible(points_units):
bounds_data = bounds_units.convert(bounds_data, points_units)
else:
wmsg = (
f"Ignoring bounds on NetCDF variable {cf_bounds_var.cf_name!r}. "
f"Expected units compatible with {points_units.origin!r}, got "
f"{bounds_units.origin!r}."
)
warnings.warn(
wmsg, category=iris.exceptions.IrisCfLoadWarning, stacklevel=2
)
bounds_data = None

return bounds_data


################################################################################
def build_dimension_coordinate(
engine, cf_coord_var, coord_name=None, coord_system=None
Expand Down Expand Up @@ -1061,6 +1121,8 @@ def build_dimension_coordinate(
# dimension names.
if cf_bounds_var.shape[:-1] != cf_coord_var.shape:
bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var)

bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data)
else:
bounds_data = None

Expand Down Expand Up @@ -1186,6 +1248,8 @@ def build_auxiliary_coordinate(
# compatibility with array creators (i.e. dask)
bounds_data = np.asarray(bounds_data)
bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var)

bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data)
else:
bounds_data = None

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Test function :func:`iris.fileformats._nc_load_rules.helpers._normalise_bounds_units`."""

# import iris tests first so that some things can be initialised before
# importing anything else
from typing import Optional
from unittest import mock

import numpy as np
import pytest

from iris.exceptions import IrisCfLoadWarning
from iris.fileformats._nc_load_rules.helpers import (
_normalise_bounds_units,
_WarnComboIgnoringCfLoad,
)

BOUNDS = mock.sentinel.bounds
CF_NAME = "dummy_bnds"


def _make_cf_bounds_var(
units: Optional[str] = None,
unitless: bool = False,
) -> mock.MagicMock:
"""Construct a mock CF bounds variable."""
if units is None:
units = "days since 1970-01-01"

cf_data = mock.Mock(spec=[])
# we want to mock the absence of flag attributes to helpers.get_attr_units
# see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes
del cf_data.flag_values
del cf_data.flag_masks
del cf_data.flag_meanings

cf_var = mock.MagicMock(
cf_name=CF_NAME,
cf_data=cf_data,
units=units,
calendar=None,
dtype=float,
)

if unitless:
del cf_var.units

return cf_var


def test_unitless() -> None:
"""Test bounds variable with no units."""
cf_bounds_var = _make_cf_bounds_var(unitless=True)
result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS)
assert result == BOUNDS


def test_invalid_units__pass_through() -> None:
"""Test bounds variable with invalid units."""
units = "invalid"
cf_bounds_var = _make_cf_bounds_var(units=units)
wmsg = f"Ignoring invalid units {units!r} on netCDF variable {CF_NAME!r}"
with pytest.warns(_WarnComboIgnoringCfLoad, match=wmsg):
result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS)
assert result == BOUNDS


@pytest.mark.parametrize("units", ["unknown", "no_unit", "1", "kelvin"])
def test_ignore_bounds(units) -> None:
"""Test bounds variable with incompatible units compared to points."""
points_units = "km"
cf_bounds_var = _make_cf_bounds_var(units=units)
wmsg = (
f"Ignoring bounds on NetCDF variable {CF_NAME!r}. "
f"Expected units compatible with {points_units!r}"
)
with pytest.warns(IrisCfLoadWarning, match=wmsg):
result = _normalise_bounds_units(points_units, cf_bounds_var, BOUNDS)
assert result is None


def test_compatible() -> None:
"""Test bounds variable with compatible units requiring conversion."""
points_units, bounds_units = "days since 1970-01-01", "hours since 1970-01-01"
cf_bounds_var = _make_cf_bounds_var(units=bounds_units)
bounds = np.arange(10, dtype=float) * 24
result = _normalise_bounds_units(points_units, cf_bounds_var, bounds)
expected = bounds / 24
np.testing.assert_array_equal(result, expected)


def test_same_units() -> None:
"""Test bounds variable with same units as points."""
units = "days since 1970-01-01"
cf_bounds_var = _make_cf_bounds_var(units=units)
bounds = np.arange(10, dtype=float)
result = _normalise_bounds_units(units, cf_bounds_var, bounds)
np.testing.assert_array_equal(result, bounds)
assert result is bounds
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def setUp(self):
cf_data=cf_data,
standard_name=None,
long_name="wibble",
units="m",
units="km",
shape=points.shape,
size=np.prod(points.shape),
dtype=points.dtype,
Expand Down Expand Up @@ -96,31 +96,42 @@ def _get_per_test_bounds_var(_coord_unused):
)

@classmethod
def _make_array_and_cf_data(cls, dimension_names):
def _make_array_and_cf_data(cls, dimension_names, rollaxis=False):
shape = tuple(cls.dim_names_lens[name] for name in dimension_names)
cf_data = mock.MagicMock(_FillValue=None, spec=[])
cf_data.chunking = mock.MagicMock(return_value=shape)
return np.zeros(shape), cf_data

def _make_cf_bounds_var(self, dimension_names):
data = np.arange(np.prod(shape), dtype=float)
if rollaxis:
shape = shape[1:] + (shape[0],)
data = data.reshape(shape)
data = np.rollaxis(data, -1)
else:
data = data.reshape(shape)
return data, cf_data

def _make_cf_bounds_var(self, dimension_names, rollaxis=False):
# Create the bounds cf variable.
bounds, cf_data = self._make_array_and_cf_data(dimension_names)
bounds, cf_data = self._make_array_and_cf_data(
dimension_names, rollaxis=rollaxis
)
bounds *= 1000 # Convert to metres.
cf_bounds_var = mock.Mock(
spec=CFVariable,
dimensions=dimension_names,
cf_name="wibble_bnds",
cf_data=cf_data,
units="m",
shape=bounds.shape,
size=np.prod(bounds.shape),
dtype=bounds.dtype,
__getitem__=lambda self, key: bounds[key],
)

return bounds, cf_bounds_var
return cf_bounds_var

def _check_case(self, dimension_names):
bounds, self.cf_bounds_var = self._make_cf_bounds_var(
dimension_names=dimension_names
def _check_case(self, dimension_names, rollaxis=False):
self.cf_bounds_var = self._make_cf_bounds_var(
dimension_names, rollaxis=rollaxis
)

# Asserts must lie within context manager because of deferred loading.
Expand All @@ -133,15 +144,15 @@ def _check_case(self, dimension_names):
expected_list = [(self.expected_coord, self.cf_coord_var.cf_name)]
self.assertEqual(self.engine.cube_parts["coordinates"], expected_list)

def test_fastest_varying_vertex_dim(self):
def test_fastest_varying_vertex_dim__normalise_bounds(self):
# The usual order.
self._check_case(dimension_names=("foo", "bar", "nv"))

def test_slowest_varying_vertex_dim(self):
def test_slowest_varying_vertex_dim__normalise_bounds(self):
# Bounds in the first (slowest varying) dimension.
self._check_case(dimension_names=("nv", "foo", "bar"))
self._check_case(dimension_names=("nv", "foo", "bar"), rollaxis=True)

def test_fastest_with_different_dim_names(self):
def test_fastest_with_different_dim_names__normalise_bounds(self):
# Despite the dimension names ('x', and 'y') differing from the coord's
# which are 'foo' and 'bar' (as permitted by the cf spec),
# this should still work because the vertex dim is the fastest varying.
Expand Down Expand Up @@ -232,6 +243,7 @@ def setUp(self):
)

points = np.arange(6)
units = "days since 1970-01-01"
self.cf_coord_var = mock.Mock(
spec=threadsafe_nc.VariableWrapper,
dimensions=("foo",),
Expand All @@ -241,7 +253,7 @@ def setUp(self):
cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None), spec=[]),
standard_name=None,
long_name="wibble",
units="days since 1970-01-01",
units=units,
calendar=None,
shape=points.shape,
size=np.prod(points.shape),
Expand All @@ -250,13 +262,20 @@ def setUp(self):
)

bounds = np.arange(12).reshape(6, 2)
cf_data = mock.MagicMock(chunking=mock.Mock(return_value=None))
# we want to mock the absence of flag attributes to helpers.get_attr_units
# see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes
del cf_data.flag_values
del cf_data.flag_masks
del cf_data.flag_meanings
self.cf_bounds_var = mock.Mock(
spec=threadsafe_nc.VariableWrapper,
dimensions=("x", "nv"),
scale_factor=1,
add_offset=0,
cf_name="wibble_bnds",
cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None)),
cf_data=cf_data,
units=units,
shape=bounds.shape,
size=np.prod(bounds.shape),
dtype=bounds.dtype,
Expand Down
Loading

0 comments on commit 63b44f0

Please sign in to comment.