Skip to content

Commit

Permalink
date_microseconds FUTURE flag (#6260)
Browse files Browse the repository at this point in the history
* PoC monkeypatch precision.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add FUTURE flag.

* FutureWarning.

* Corrected behaviour and added tests.

* Corrected behaviour and added tests.

* What's New entry.

* Make sensitive to cf-units version.

* Further test improvements.

* Clearer FutureWarning text.

* Use a cf-units subclass instead.

* Rename _IrisUnit to Unit.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
trexfeathers and pre-commit-ci[bot] authored Dec 17, 2024
1 parent 01bbdf6 commit df4c52b
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 5 deletions.
13 changes: 11 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ This document explains the changes made to Iris for this release
✨ Features
===========

#. N/A
#. `@trexfeathers`_ added a new :class:`~iris.Future` flag -
``date_microseconds`` - which sets whether Iris should use the new
microsecond-precision units (see :class:`cf_units.Unit`, microseconds
introduced in version 3.3) when the unit
is a time unit. The previous maximum precision was seconds. You should check
your code for new floating point problems if activating this (e.g. when
using the :class:`~iris.Constraint` API). (:pull:`6260`)


🐛 Bugs Fixed
Expand All @@ -50,7 +56,10 @@ This document explains the changes made to Iris for this release
🚀 Performance Enhancements
===========================

#. N/A
#. Note that due to the new ``date_microseconds`` :class:`~iris.Future` flag,
the time coordinate categorisation speedup introduced in
:doc:`/whatsnew/3.11` will only be available when
``iris.FUTURE.date_microseconds == True``.


🔥 Deprecations
Expand Down
23 changes: 21 additions & 2 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,13 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=False):
def __init__(
self,
datum_support=False,
pandas_ndim=False,
save_split_attrs=False,
date_microseconds=False,
):
"""Container for run-time options controls.
To adjust the values simply update the relevant attribute from
Expand All @@ -169,6 +175,13 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.
date_microseconds : bool, default=False
Newer versions of cftime and cf-units support microsecond precision
for dates, compared to the legacy behaviour that only works with
seconds. Enabling microsecond precision will alter core Iris
behaviour, such as when using :class:`~iris.Constraint`, and you
may need to defend against floating point precision issues where
you didn't need to before.
"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -181,6 +194,7 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs
self.__dict__["date_microseconds"] = date_microseconds

# TODO: next major release: set IrisDeprecation to subclass
# DeprecationWarning instead of UserWarning.
Expand All @@ -189,7 +203,12 @@ def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(self.datum_support, self.pandas_ndim, self.save_split_attrs)
return msg.format(
self.datum_support,
self.pandas_ndim,
self.save_split_attrs,
self.date_microseconds,
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options: dict[str, Literal["error", "warning"]] = {}
Expand Down
67 changes: 66 additions & 1 deletion lib/iris/common/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from __future__ import annotations

from collections.abc import Mapping
from datetime import timedelta
from functools import wraps
from typing import Any
import warnings

import cf_units
import numpy as np
Expand Down Expand Up @@ -139,6 +141,68 @@ def update(self, other, **kwargs):
dict.update(self, other, **kwargs)


class Unit(cf_units.Unit):
# TODO: remove this subclass once FUTURE.date_microseconds is removed.

@classmethod
def from_unit(cls, unit: cf_units.Unit):
"""Cast a :class:`cf_units.Unit` to an :class:`Unit`."""
if isinstance(unit, Unit):
result = unit
elif isinstance(unit, cf_units.Unit):
result = cls.__new__(cls)
result.__dict__.update(unit.__dict__)
else:
message = f"Expected a cf_units.Unit, got {type(unit)}"
raise TypeError(message)
return result

def num2date(
self,
time_value,
only_use_cftime_datetimes=True,
only_use_python_datetimes=False,
):
# Used to patch the cf_units.Unit.num2date method to round to the
# nearest second, which was the legacy behaviour. This is under a FUTURE
# flag - users will need to adapt to microsecond precision eventually,
# which may involve floating point issues.
from iris import FUTURE

def _round(date):
if date.microsecond == 0:
return date
elif date.microsecond < 500000:
return date - timedelta(microseconds=date.microsecond)
else:
return (
date
+ timedelta(seconds=1)
- timedelta(microseconds=date.microsecond)
)

result = super().num2date(
time_value, only_use_cftime_datetimes, only_use_python_datetimes
)
if FUTURE.date_microseconds is False:
message = (
"You are using legacy date precision for Iris units - max "
"precision is seconds. In future, Iris will use microsecond "
"precision - available since cf-units version 3.3 - which may "
"affect core behaviour. To opt-in to the "
"new behaviour, set `iris.FUTURE.date_microseconds = True`."
)
warnings.warn(message, category=FutureWarning)

if hasattr(result, "shape"):
vfunc = np.vectorize(_round)
result = vfunc(result)
else:
result = _round(result)

return result


class CFVariableMixin:
_metadata_manager: Any

Expand Down Expand Up @@ -207,7 +271,8 @@ def units(self) -> cf_units.Unit:

@units.setter
def units(self, unit: cf_units.Unit | str | None) -> None:
self._metadata_manager.units = cf_units.as_unit(unit)
unit = cf_units.as_unit(unit)
self._metadata_manager.units = Unit.from_unit(unit)

@property
def attributes(self) -> LimitedAttributeDict:
Expand Down
98 changes: 98 additions & 0 deletions lib/iris/tests/unit/common/metadata/test_microsecond_future.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the opt-in FUTURE.date_microseconds behaviour."""

import warnings

import cf_units
import numpy as np
from packaging.version import Version
import pytest

from iris import FUTURE
from iris.coords import DimCoord
from iris.tests._shared_utils import assert_array_equal

cf_units_legacy = Version(cf_units.__version__) < Version("3.3.0")


@pytest.fixture(
params=[0, 1000, 500000],
ids=["no_microseconds", "1_millisecond", "half_second"],
)
def time_coord(request) -> tuple[bool, DimCoord]:
points = np.array([0.0, 1.0, 2.0])
points += request.param / 1e6
return request.param, DimCoord(
points,
"time",
units="seconds since 1970-01-01 00:00:00",
)


@pytest.fixture(
params=[False, True],
ids=["without_future", "with_future"],
)
def future_date_microseconds(request):
FUTURE.date_microseconds = request.param
yield request.param
FUTURE.date_microseconds = False


def test_warning(time_coord, future_date_microseconds):
# Warning should be raised whether the coordinate has microseconds or not.
# Want users to be aware, and opt-in, as early as possible.
n_microseconds, coord = time_coord

def _op():
_ = coord.units.num2date(coord.points)

if future_date_microseconds:
with warnings.catch_warnings():
warnings.simplefilter("error", FutureWarning)
_op()
else:
with pytest.warns(FutureWarning):
_op()


@pytest.mark.parametrize(
"indexing",
(np.s_[0], np.s_[:], np.s_[:, np.newaxis]),
ids=("single", "array", "array_2d"),
)
def test_num2date(time_coord, future_date_microseconds, indexing):
n_microseconds, coord = time_coord
result = coord.units.num2date(coord.points[indexing])

if indexing == np.s_[0]:
assert hasattr(result, "microsecond")
# Convert to iterable for more consistency downstream.
result = [result]
else:
assert hasattr(result, "shape")
assert hasattr(result.flatten()[0], "microsecond")
result = result.flatten()

expected_microseconds = n_microseconds
if not future_date_microseconds or cf_units_legacy:
expected_microseconds = 0

result_microseconds = np.array([r.microsecond for r in result])
assert_array_equal(result_microseconds, expected_microseconds)


def test_roundup(time_coord, future_date_microseconds):
n_microseconds, coord = time_coord
result = coord.units.num2date(coord.points)

expected_seconds = np.floor(coord.points)
if n_microseconds >= 500000 and (not future_date_microseconds or cf_units_legacy):
# Legacy cf-units versions round microseconds and ignore the future flag.
expected_seconds += 1

result_seconds = np.array([r.second for r in result])
assert_array_equal(result_seconds, expected_seconds)

0 comments on commit df4c52b

Please sign in to comment.