Skip to content

Commit

Permalink
Add datetime parsing for executing UDFs at timestamps.
Browse files Browse the repository at this point in the history
This change adds the initial step for executing UDFs at timestamps.
To allow users to specify a timestamp of a registered UDF to use,
we add `@` followed by a timestamp to the name of the UDF.

For example, to execute the version of `my/udf` as it was as of
2023-06-17 at 07:16 UTC, they would execute:

    "my/udf@2023-06-17 07:16"

This is unambiguous and avoids the need to add a new parameter, which
would be difficult to name and could shadow parameters in user code.
  • Loading branch information
thetorpedodog committed Sep 17, 2024
1 parent 4f115b5 commit 004d9be
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
34 changes: 33 additions & 1 deletion src/tiledb/cloud/udf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import base64
import datetime
import uuid
import warnings
from typing import Any, Callable, Iterable, Optional, Union
from typing import Any, Callable, Iterable, Optional, Tuple, Union

import cloudpickle

Expand Down Expand Up @@ -172,6 +173,37 @@ def exec_async(*args, **kwargs) -> Any:
return sender.wrap_async_base_call(exec_base, *args, **kwargs)


_FULL_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
_TIME_FORMATS = (
"%Y-%m-%d",
"%Y-%m-%d %H:%M",
"%Y-%m-%d %H:%M:%S",
_FULL_FORMAT,
)


def _parse_udf_name_timestamp(
full_name: str,
) -> Tuple[str, Optional[datetime.datetime]]:
name, at, ts_str = full_name.partition("@")
if not at:
# This means that "@" was not found in the string,
# and we're just running a normal UDF.
return name, None
ts_str = ts_str.replace("T", " ")
for fmt in _TIME_FORMATS:
try:
naive_ts = datetime.datetime.strptime(ts_str, fmt)
except ValueError:
continue
return name, naive_ts.replace(tzinfo=datetime.timezone.utc)
raise ValueError(
f"Could not parse {ts_str} as a timestamp. "
"Timestamp must be formatted as yyyy-MM-dd[ HH:mm[:ss[.SSS]]] "
"and is interpreted as UTC."
)


def register_udf(
func,
name,
Expand Down
36 changes: 36 additions & 0 deletions tests/test_generic_udf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import unittest

import numpy as np
Expand Down Expand Up @@ -122,3 +123,38 @@ def test():

with self.assertRaises(tiledb_cloud_error.TileDBCloudError):
udf.exec(test, timeout=1)


class ParserTest(unittest.TestCase):
def test_parse_udf_name_timestamp(self) -> None:
inouts = (
("just-a-name", ("just-a-name", None)),
("udf/name@2022-03-04", ("udf/name", _utc(2022, 3, 4))),
("other/name@2022-03-04 05:06", ("other/name", _utc(2022, 3, 4, 5, 6))),
("prince@1999-09-09 21:21:21", ("prince", _utc(1999, 9, 9, 21, 21, 21))),
(
"uses-t@2024-09-17T20:59:59.999999",
("uses-t", _utc(2024, 9, 17, 20, 59, 59, 999999)),
),
)
for inval, outs in inouts:
with self.subTest(inval):
self.assertEqual(outs, udf._parse_udf_name_timestamp(inval))

def test_parse_udf_name_timestamp_bad(self) -> None:
bads = (
"name@not a time at all",
"too-short@2020-01",
"no-space@2020-01-0203",
"lowercase-t@2020-01-02t03:04",
"hour-only@2020-01-02 03",
"too-precise@2020-01-02 03:04:05.67890123456",
)
for bad in bads:
with self.subTest(bad):
with self.assertRaises(ValueError):
udf._parse_udf_name_timestamp(bad)


def _utc(*args: int) -> datetime.datetime:
return datetime.datetime(*args, tzinfo=datetime.timezone.utc)

0 comments on commit 004d9be

Please sign in to comment.