From e4df7104d936e74f6a82a443de9d051f4e0c23fa Mon Sep 17 00:00:00 2001 From: Dominik Hoffmann Date: Sun, 1 Dec 2024 10:58:04 +0100 Subject: [PATCH] #74: Applied feedback to interval filtering Signed-off-by: Dominik Hoffmann --- .../spark/interval_filtering.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/interval_filtering.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/interval_filtering.py index dbecb5464..165a747f3 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/interval_filtering.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/interval_filtering.py @@ -104,7 +104,7 @@ def filter(self) -> DataFrame: current_row = rows[i] current_time_stamp = current_row[self.time_stamp_column_name] - if self.check_if_outside_of_interval( + if self.check_outside_of_interval( current_time_stamp, last_time_stamp, time_delta_in_ms, tolerance_in_ms ): current_row_dict = current_row.asDict() @@ -167,21 +167,17 @@ def get_time_delta(self, value: int) -> timedelta: "interval_unit must be either 'days', 'hours', 'minutes', 'seconds' or 'milliseconds'" ) - def check_if_outside_of_interval( + def check_outside_of_interval( self, current_time_stamp: pd.Timestamp, last_time_stamp: pd.Timestamp, time_delta_in_ms: float, tolerance_in_ms: float, ) -> bool: - if tolerance_in_ms is None: - return ( - (current_time_stamp - last_time_stamp).total_seconds() * 1000 - ) >= time_delta_in_ms - else: - return ( - (current_time_stamp - last_time_stamp).total_seconds() * 1000 - ) + tolerance_in_ms >= time_delta_in_ms + time_difference = (current_time_stamp - last_time_stamp).total_seconds() * 1000 + if not tolerance_in_ms is None: + time_difference += tolerance_in_ms + return time_difference >= time_delta_in_ms def format_date_time_to_string(self, time_stamp: pd.Timestamp) -> str: try: