diff --git a/rdt/transformers/datetime.py b/rdt/transformers/datetime.py index 5b8ef9b5..242d2e09 100644 --- a/rdt/transformers/datetime.py +++ b/rdt/transformers/datetime.py @@ -23,7 +23,7 @@ class UnixTimestampEncoder(BaseTransformer): Indicate what to replace the null values with. If the strings ``'mean'`` or ``'mode'`` are given, replace them with the corresponding aggregation, if ``'random'``, use random values from the dataset to fill the nan values. - Defaults to ``mean``. + Defaults to ``random``. model_missing_values (bool): **DEPRECATED** Whether to create a new column to indicate which values were null or not. The column will be created only if there are null values. If ``True``, create @@ -53,14 +53,14 @@ class UnixTimestampEncoder(BaseTransformer): def __init__( self, - missing_value_replacement='mean', + missing_value_replacement='random', model_missing_values=None, datetime_format=None, missing_value_generation='random', enforce_min_max_values=False, ): super().__init__() - self._set_missing_value_replacement('mean', missing_value_replacement) + self._set_missing_value_replacement('random', missing_value_replacement) self._set_missing_value_generation(missing_value_generation) self.enforce_min_max_values = enforce_min_max_values if model_missing_values is not None: @@ -248,7 +248,7 @@ class OptimizedTimestampEncoder(UnixTimestampEncoder): Indicate what to replace the null values with. If the strings ``'mean'`` or ``'mode'`` are given, replace them with the corresponding aggregation, if ``'random'``, use random values from the dataset to fill the nan values. - Defaults to ``mean``. + Defaults to ``random``. model_missing_values (bool): **DEPRECATED** Whether to create a new column to indicate which values were null or not. The column will be created only if there are null values. If ``True``, create @@ -275,7 +275,7 @@ class OptimizedTimestampEncoder(UnixTimestampEncoder): def __init__( self, - missing_value_replacement=None, + missing_value_replacement='random', model_missing_values=None, datetime_format=None, missing_value_generation='random', diff --git a/tests/unit/transformers/test_datetime.py b/tests/unit/transformers/test_datetime.py index 8d67e85b..dba24286 100644 --- a/tests/unit/transformers/test_datetime.py +++ b/tests/unit/transformers/test_datetime.py @@ -45,6 +45,14 @@ def test___init__with_model_missing_values(self): assert transformer.missing_value_generation == 'random' assert transformer.datetime_format == '%M-%d-%Y' + def test_default_missing_value_replacement(self): + """Test the default value of missing_value_replacement is 'random'""" + # Run + transformer = UnixTimestampEncoder() + + # Assert + assert transformer.missing_value_replacement == 'random' + def test__convert_to_datetime(self): """Test the ``_convert_to_datetime`` method. @@ -270,7 +278,7 @@ def test__fit(self, null_transformer_mock): """ # Setup data = pd.to_datetime(['2020-01-01', '2020-02-01', '2020-03-01']) - transformer = UnixTimestampEncoder() + transformer = UnixTimestampEncoder(missing_value_replacement='mean') # Run transformer._fit(data) @@ -604,6 +612,14 @@ def test___init__(self): assert transformer.divider is None assert transformer.null_transformer is None + def test_default_missing_value_replacement(self): + """Test the default value of missing_value_replacement is 'random'""" + # Run + transformer = OptimizedTimestampEncoder() + + # Assert + assert transformer.missing_value_replacement == 'random' + def test__find_divider(self): """Test the ``_find_divider`` method.