From e2643920bbbc5dbcbff8f36d9d79f79b09742b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:10:26 -0600 Subject: [PATCH] fix: Safely compare UUID replication keys with state bookmarks (#2756) --- singer_sdk/helpers/_state.py | 4 ++++ singer_sdk/helpers/_typing.py | 3 +++ tests/core/test_state_handling.py | 19 +++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/singer_sdk/helpers/_state.py b/singer_sdk/helpers/_state.py index fd7dee377..565dbd51d 100644 --- a/singer_sdk/helpers/_state.py +++ b/singer_sdk/helpers/_state.py @@ -218,6 +218,10 @@ def increment_state( extra={"replication_key": replication_key}, ) progress_dict = stream_or_partition_state[PROGRESS_MARKERS] + # TODO: Instead of forcing all values to be JSON-compatible strings and hope + # we catch all cases, we should allow the stream to define how to + # the values from the state and the record should be pre-processed. + # https://github.com/meltano/sdk/issues/2753 old_rk_value = to_json_compatible(progress_dict.get("replication_key_value")) new_rk_value = to_json_compatible(latest_record[replication_key]) diff --git a/singer_sdk/helpers/_typing.py b/singer_sdk/helpers/_typing.py index b02ecb512..d3b629c11 100644 --- a/singer_sdk/helpers/_typing.py +++ b/singer_sdk/helpers/_typing.py @@ -6,6 +6,7 @@ import datetime import logging import typing as t +import uuid from enum import Enum from functools import lru_cache @@ -42,6 +43,8 @@ def to_json_compatible(val: t.Any) -> t.Any: # noqa: ANN401 if isinstance(val, (datetime.datetime,)): # Make naive datetimes UTC return (val.replace(tzinfo=UTC) if val.tzinfo is None else val).isoformat("T") + if isinstance(val, (uuid.UUID,)): + return str(val) return val diff --git a/tests/core/test_state_handling.py b/tests/core/test_state_handling.py index f58a0128b..85fd4812f 100644 --- a/tests/core/test_state_handling.py +++ b/tests/core/test_state_handling.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import uuid import pytest @@ -155,3 +156,21 @@ def test_null_replication_value(caplog): ), "State should not be updated." assert caplog.records[0].levelname == "WARNING" assert "is null" in caplog.records[0].message + + +def test_uuidv7_replication_value(): + stream_state = { + "replication_key": "id", + "replication_key_value": "01931c63-b14e-7ff3-8621-e577ed392dc8", + } + new_string_val = "01931c63-b14e-7ff3-8621-e578edbca9a3" + + _state.increment_state( + stream_state, + latest_record={"id": uuid.UUID(new_string_val)}, + replication_key="id", + is_sorted=True, + check_sorted=True, + ) + + assert stream_state["replication_key_value"] == new_string_val