Skip to content

Commit

Permalink
add at_date_annually knowledge horizon function (#172)
Browse files Browse the repository at this point in the history
* add at_date_annually

Signed-off-by: Victor Garcia Reolid <[email protected]>

* allow shifting more than 1 year

Signed-off-by: Victor Garcia Reolid <[email protected]>

* add timezone

Signed-off-by: Victor Garcia Reolid <[email protected]>

* PR change suggestions

Signed-off-by: Victor Garcia Reolid <[email protected]>

* apply black

Signed-off-by: Victor Garcia Reolid <[email protected]>

* get bounds

Signed-off-by: Victor Garcia Reolid <[email protected]>

* apply black

Signed-off-by: Victor Garcia Reolid <[email protected]>

* fix sign

Signed-off-by: Victor Garcia Reolid <[email protected]>

* add comments

Signed-off-by: Victor Garcia Reolid <[email protected]>

* fix constant

Signed-off-by: Victor Garcia Reolid <[email protected]>

* vectorize x_years_ago_at_date

Signed-off-by: Victor Garcia Reolid <[email protected]>

* fix bound and vectorized implementations

Signed-off-by: Victor Garcia Reolid <[email protected]>

* docs: fix typos

Signed-off-by: F.N. Claessen <[email protected]>

* style: black

Signed-off-by: F.N. Claessen <[email protected]>

* refactor: clarify test parameter

Signed-off-by: F.N. Claessen <[email protected]>

* fix: move +1

Signed-off-by: F.N. Claessen <[email protected]>

* refactor: align use of util module with x_days_ago_at_y_o_clock

Signed-off-by: F.N. Claessen <[email protected]>

* fix: DST transitions

Signed-off-by: F.N. Claessen <[email protected]>

* feature: also test DatetimeIndex against DST transitions

Signed-off-by: F.N. Claessen <[email protected]>

* style: black and isort

Signed-off-by: F.N. Claessen <[email protected]>

* style: black

Signed-off-by: F.N. Claessen <[email protected]>

---------

Signed-off-by: Victor Garcia Reolid <[email protected]>
Signed-off-by: F.N. Claessen <[email protected]>
Co-authored-by: F.N. Claessen <[email protected]>
  • Loading branch information
victorgarcia98 and Flix6x authored Mar 23, 2024
1 parent 8fc6cc2 commit 8868139
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 3 deletions.
43 changes: 42 additions & 1 deletion timely_beliefs/sensors/func_store/knowledge_horizons.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""Function store for computing knowledge horizons given a certain event start and resolution.
When passed get_bounds=True, these functions return bounds on the knowledge horizon,
i.e. a duration window in which the knowledge horizon must lie (e.g. between 0 and 2 days before the event start)."""

from __future__ import annotations

from datetime import datetime, timedelta

import pandas as pd

from timely_beliefs.sensors.func_store.utils import datetime_x_days_ago_at_y_oclock
from timely_beliefs.sensors.func_store.utils import (
datetime_x_days_ago_at_y_oclock,
datetime_x_years_ago_at_date,
)


def at_date(
Expand All @@ -31,6 +35,43 @@ def at_date(
return event_start - knowledge_time.astimezone(event_start.tzinfo)


def x_years_ago_at_date(
event_start: datetime | pd.DatetimeIndex,
x: int,
day: int,
month: int,
z: str,
get_bounds: bool = False,
) -> timedelta | pd.TimedeltaIndex | tuple[timedelta, timedelta]:
"""Compute the sensor's knowledge horizon to represent the event could be known since some date, `x` years ago.
For example, it can be used for a tax rate that changes annually and with a known publication date.
:param event_start: Start of the event, used as an anchor for determining the knowledge horizon.
:param x: The number of years to shift the reference date to.
:param day: Reference day of the month of the annual date to compare against.
:param month: The month of the annual date to compare against.
:param z: Timezone string.
:param get_bounds: If True, this function returns bounds on the possible return value.
These bounds are normally useful for creating more efficient database queries when filtering by belief time.
"""

MAX_DAYS_IN_A_YEAR = 366
MIN_DAYS_IN_A_YEAR = 365

if x <= 0:
raise ValueError("Only positive values for `x` are supported.")

if get_bounds:
# The minimum corresponds to an event at the 1st of January and a publication date on the 31st of December on the year `x` years ago.
# The maximum corresponds to an event just before new year's midnight and a publication date on the 1st of January on the year `x` years ago.
return timedelta(days=(x - 1) * MIN_DAYS_IN_A_YEAR + 1), timedelta(
days=(x + 1) * MAX_DAYS_IN_A_YEAR
)

return event_start - datetime_x_years_ago_at_date(event_start, x, day, month, z)


def ex_post(
event_resolution: timedelta,
ex_post_horizon: timedelta,
Expand Down
153 changes: 152 additions & 1 deletion timely_beliefs/sensors/func_store/test_knowledge_horizons.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from datetime import datetime, timedelta

import pandas as pd
import pytest
from pandas.testing import assert_index_equal
from pytz import utc
from pytz import timezone, utc

from timely_beliefs.sensors.func_store.knowledge_horizons import (
at_date,
ex_ante,
ex_post,
x_days_ago_at_y_oclock,
x_years_ago_at_date,
)


Expand Down Expand Up @@ -54,6 +56,155 @@ def test_fixed_knowledge_time():
)


def test_x_years_ago_at_date():
"""Check definition of knowledge horizon for events known at a fixed date annually."""

knowledge_func_params = dict(x=1, month=11, day=20, z="UTC")

# Events that occur before the reference
# year 2024 is leap
assert x_years_ago_at_date(
event_start=datetime(2024, 11, 19, 1, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=365, hours=1
) # 366 days - 1

# year 2025 is not leap, but 2024 is
assert x_years_ago_at_date(
event_start=datetime(2025, 11, 19, 2, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=364, hours=2
) # 365 - 1

# year 2023 is not leap and 2022 neither
assert x_years_ago_at_date(
event_start=datetime(2022, 11, 19, 2, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=364, hours=2
) # 365 - 1

# Events that occur after the reference
assert x_years_ago_at_date(
event_start=datetime(2021, 11, 21, 3, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=366, hours=3
) # 365 + 1

assert x_years_ago_at_date(
event_start=datetime(2021, 11, 21, 4, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=366, hours=4
) # 365 + 1

assert x_years_ago_at_date(
event_start=datetime(2020, 11, 21, 4, tzinfo=utc), **knowledge_func_params
) == timedelta(
days=367, hours=4
) # 366 (leap year) + 1

# Repeat test with pd.DatetimeIndex instead
event_start = pd.DatetimeIndex(
[
"2024-11-19T01:00:00",
"2025-11-19T02:00:00",
"2022-11-19T02:00:00",
"2021-11-21T03:00:00",
"2021-11-21T04:00:00",
],
tz="utc",
)
assert_index_equal(
x_years_ago_at_date(event_start=event_start, **knowledge_func_params),
pd.TimedeltaIndex(
[
timedelta(days=365, hours=1),
timedelta(days=364, hours=2),
timedelta(days=364, hours=2),
timedelta(days=366, hours=3),
timedelta(days=366, hours=4),
]
),
)

knowledge_func_params_2_years = dict(x=2, month=11, day=20, z="UTC")

# Check years parameter
assert x_years_ago_at_date(
event_start=datetime(2024, 11, 19, 1, tzinfo=utc),
**knowledge_func_params_2_years,
) == timedelta(
days=2 * 365, hours=1
) # 365 days + 366 days - 1 day


def test_x_years_ago_at_date_with_dst():
"""Check x_years_ago_at_date specifically against Daylight Savings Transition.
Note that 2023-03-28 lies after the spring DST transition, and 2024-03-28 lies before the spring DST transition.
- 2023-03-26
- 2023-10-29
- 2024-03-30
"""

knowledge_func_params = dict(
x=1, month=3, day=28, z="Europe/Amsterdam"
) # before first DST transition 2024
assert x_years_ago_at_date(
event_start=timezone("Europe/Amsterdam").localize(datetime(2024, 3, 28, 0)),
**knowledge_func_params,
) == timedelta(
days=366, hours=1
) # 365 + 1 day (because of the leap day on 2024-02-29) + 1 hour (fall transition)

# Try 4 days later, at which we crossed the spring DST transition
assert x_years_ago_at_date(
event_start=timezone("Europe/Amsterdam").localize(datetime(2024, 4, 1, 0)),
**knowledge_func_params,
) == timedelta(
days=370, hours=0
) # 0 hours (fall and spring transitions cancelled each other out)

# Repeat test with pd.DatetimeIndex instead
event_start = pd.DatetimeIndex(
[
"2024-03-28T00:00:00",
"2024-04-01T00:00:00",
],
tz="Europe/Amsterdam",
)
assert_index_equal(
x_years_ago_at_date(event_start=event_start, **knowledge_func_params),
pd.TimedeltaIndex(
[
timedelta(days=366, hours=1),
timedelta(days=370, hours=0),
]
),
)


@pytest.mark.parametrize(
"event_start",
[
timezone("Europe/Amsterdam").localize(datetime(2024, 1, 1, 0)),
timezone("Europe/Amsterdam").localize(datetime(2024, 12, 31, 23, 59, 59)),
],
)
@pytest.mark.parametrize("years", list(range(1, 6)))
def test_x_years_ago_at_date_bounds(event_start, years):
knowledge_func_params = dict(x=years, month=12, day=31, z="Europe/Amsterdam")

timedelta_bounds = x_years_ago_at_date(
event_start, get_bounds=True, **knowledge_func_params
)

assert (
timedelta_bounds[0]
<= x_years_ago_at_date(event_start, **knowledge_func_params)
<= timedelta_bounds[1]
)


def test_dst():
"""Check definition of knowledge horizon for events known x days ago at y o'clock in some timezone z,
especially around daylight savings time (DST) transitions."""
Expand Down
37 changes: 36 additions & 1 deletion timely_beliefs/sensors/func_store/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from datetime import datetime
from datetime import datetime, timedelta

import pandas as pd
from pytz import timezone
Expand Down Expand Up @@ -45,3 +45,38 @@ def datetime_x_days_ago_at_y_oclock(
)

return tz_aware_earlier_time


def datetime_x_years_ago_at_date(
tz_aware_original_time: datetime | pd.DatetimeIndex,
x: int,
day: int,
month: int,
z: str,
) -> timedelta:
"""Returns the datetime x years ago at the midnight start of the given date, from the perspective of timezone z."""
tz = timezone(z)
original_tz = tz_aware_original_time.tzinfo
micros = 0
s = 0
m = 0
h = 0
if isinstance(tz_aware_original_time, datetime):
tz_naive_original_time = tz_aware_original_time.astimezone(tz).replace(
tzinfo=None
)
tz_naive_earlier_time = (
pd.Timestamp(tz_naive_original_time).to_period("1Y").to_timestamp()
- pd.DateOffset(years=x)
).replace(month=month, day=day, hour=h, minute=m, second=s, microsecond=micros)
tz_aware_earlier_time = tz.localize(tz_naive_earlier_time).astimezone(
original_tz
)
else:
tz_aware_earlier_time = tz_aware_original_time.to_period(
"1Y"
).to_timestamp().tz_localize(tz_aware_original_time.tz) + pd.DateOffset(
month=month, day=day, years=-x
)

return tz_aware_earlier_time

0 comments on commit 8868139

Please sign in to comment.