diff --git a/src/tiledb/cloud/udf.py b/src/tiledb/cloud/udf.py index 1dbf4dc3..39b06f31 100644 --- a/src/tiledb/cloud/udf.py +++ b/src/tiledb/cloud/udf.py @@ -1,7 +1,8 @@ import base64 +import datetime import uuid import warnings -from typing import Any, Callable, Iterable, Optional, Union +from typing import Any, Callable, Iterable, Optional, Tuple, Union import cloudpickle @@ -172,6 +173,37 @@ def exec_async(*args, **kwargs) -> Any: return sender.wrap_async_base_call(exec_base, *args, **kwargs) +_FULL_FORMAT = "%Y-%m-%d %H:%M:%S.%f" +_TIME_FORMATS = ( + "%Y-%m-%d", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + _FULL_FORMAT, +) + + +def _parse_udf_name_timestamp( + full_name: str, +) -> Tuple[str, Optional[datetime.datetime]]: + name, at, ts_str = full_name.partition("@") + if not at: + # This means that "@" was not found in the string, + # and we're just running a normal UDF. + return name, None + ts_str = ts_str.replace("T", " ") + for fmt in _TIME_FORMATS: + try: + naive_ts = datetime.datetime.strptime(ts_str, fmt) + except ValueError: + continue + return name, naive_ts.replace(tzinfo=datetime.timezone.utc) + raise ValueError( + f"Could not parse {ts_str} as a timestamp. " + "Timestamp must be formatted as yyyy-MM-dd[ HH:mm[:ss[.SSS]]] " + "and is interpreted as UTC." + ) + + def register_udf( func, name, diff --git a/tests/test_generic_udf.py b/tests/test_generic_udf.py index 5f460ef1..ba0a9c0a 100644 --- a/tests/test_generic_udf.py +++ b/tests/test_generic_udf.py @@ -1,3 +1,4 @@ +import datetime import unittest import numpy as np @@ -122,3 +123,38 @@ def test(): with self.assertRaises(tiledb_cloud_error.TileDBCloudError): udf.exec(test, timeout=1) + + +class ParserTest(unittest.TestCase): + def test_parse_udf_name_timestamp(self) -> None: + inouts = ( + ("just-a-name", ("just-a-name", None)), + ("udf/name@2022-03-04", ("udf/name", _utc(2022, 3, 4))), + ("other/name@2022-03-04 05:06", ("other/name", _utc(2022, 3, 4, 5, 6))), + ("prince@1999-09-09 21:21:21", ("prince", _utc(1999, 9, 9, 21, 21, 21))), + ( + "uses-t@2024-09-17T20:59:59.999999", + ("uses-t", _utc(2024, 9, 17, 20, 59, 59, 999999)), + ), + ) + for inval, outs in inouts: + with self.subTest(inval): + self.assertEqual(outs, udf._parse_udf_name_timestamp(inval)) + + def test_parse_udf_name_timestamp_bad(self) -> None: + bads = ( + "name@not a time at all", + "too-short@2020-01", + "no-space@2020-01-0203", + "lowercase-t@2020-01-02t03:04", + "hour-only@2020-01-02 03", + "too-precise@2020-01-02 03:04:05.67890123456", + ) + for bad in bads: + with self.subTest(bad): + with self.assertRaises(ValueError): + udf._parse_udf_name_timestamp(bad) + + +def _utc(*args: int) -> datetime.datetime: + return datetime.datetime(*args, tzinfo=datetime.timezone.utc)