From 69c3c87f399e7916fc007700be30e1025f59dfa0 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 1 Oct 2021 11:05:09 -0400 Subject: [PATCH 01/18] Add initial file from `DataFrame.rst` as template --- .../cudf/source/api_docs/subword_tokenize.rst | 254 ++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 docs/cudf/source/api_docs/subword_tokenize.rst diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst new file mode 100644 index 00000000000..12ff1f13bc4 --- /dev/null +++ b/docs/cudf/source/api_docs/subword_tokenize.rst @@ -0,0 +1,254 @@ +========= +DataFrame +========= +.. currentmodule:: cudf + +Constructor +~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + :template: autosummary/class_with_autosummary.rst + + DataFrame + +Attributes and underlying data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +**Axes** + +.. autosummary:: + :toctree: api/ + + DataFrame.index + DataFrame.columns + +.. autosummary:: + :toctree: api/ + + DataFrame.dtypes + DataFrame.info + DataFrame.select_dtypes + DataFrame.values + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.memory_usage + DataFrame.empty + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.astype + DataFrame.copy + +Indexing, iteration +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.head + DataFrame.at + DataFrame.iat + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.iteritems + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.pop + DataFrame.tail + DataFrame.isin + DataFrame.where + DataFrame.mask + DataFrame.query + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + +Function application, GroupBy & window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.apply + DataFrame.apply_chunks + DataFrame.apply_rows + DataFrame.pipe + DataFrame.agg + DataFrame.groupby + DataFrame.rolling + +.. _api.dataframe.stats: + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.all + DataFrame.any + DataFrame.clip + DataFrame.corr + DataFrame.count + DataFrame.cov + DataFrame.cummax + DataFrame.cummin + DataFrame.cumprod + DataFrame.cumsum + DataFrame.describe + DataFrame.kurt + DataFrame.kurtosis + DataFrame.max + DataFrame.mean + DataFrame.min + DataFrame.mode + DataFrame.prod + DataFrame.product + DataFrame.quantile + DataFrame.quantiles + DataFrame.rank + DataFrame.round + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + +Reindexing / selection / label manipulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.equals + DataFrame.head + DataFrame.reindex + DataFrame.rename + DataFrame.reset_index + DataFrame.sample + DataFrame.searchsorted + DataFrame.set_index + DataFrame.repeat + DataFrame.tail + DataFrame.take + DataFrame.tile + +.. _api.dataframe.missing: + +Missing data handling +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.dropna + DataFrame.fillna + DataFrame.isna + DataFrame.isnull + DataFrame.nans_to_nulls + DataFrame.notna + DataFrame.notnull + DataFrame.replace + +Reshaping, sorting, transposing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.argsort + DataFrame.interleave_columns + DataFrame.partition_by_hash + DataFrame.pivot + DataFrame.scatter_by_map + DataFrame.sort_values + DataFrame.sort_index + DataFrame.nlargest + DataFrame.nsmallest + DataFrame.stack + DataFrame.unstack + DataFrame.melt + DataFrame.explode + DataFrame.T + DataFrame.transpose + +Combining / comparing / joining / merging / encoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.append + DataFrame.assign + DataFrame.join + DataFrame.merge + DataFrame.update + DataFrame.label_encoding + DataFrame.one_hot_encoding + +Numerical operations +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.acos + DataFrame.asin + DataFrame.atan + DataFrame.cos + DataFrame.exp + DataFrame.log + DataFrame.sin + DataFrame.sqrt + DataFrame.tan + +Time Series-related +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.shift + +Serialization / IO / conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.as_gpu_matrix + DataFrame.as_matrix + DataFrame.from_arrow + DataFrame.from_pandas + DataFrame.from_records + DataFrame.hash_columns + DataFrame.to_arrow + DataFrame.to_dlpack + DataFrame.to_parquet + DataFrame.to_csv + DataFrame.to_hdf + DataFrame.to_dict + DataFrame.to_json + DataFrame.to_pandas + DataFrame.to_feather + DataFrame.to_records + DataFrame.to_string From 9a8e65f2e623c4d74668d12cf61477c5b7eb0c7a Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 1 Oct 2021 11:12:47 -0400 Subject: [PATCH 02/18] Added docs for `SubwordTokenizer` --- .../cudf/source/api_docs/subword_tokenize.rst | 248 +----------------- 1 file changed, 2 insertions(+), 246 deletions(-) diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst index 12ff1f13bc4..20a5e344e42 100644 --- a/docs/cudf/source/api_docs/subword_tokenize.rst +++ b/docs/cudf/source/api_docs/subword_tokenize.rst @@ -1,254 +1,10 @@ ========= -DataFrame +SubwordTokenizer ========= -.. currentmodule:: cudf +.. currentmodule:: cudf.core.subword_tokenizer Constructor ~~~~~~~~~~~ .. autosummary:: :toctree: api/ :template: autosummary/class_with_autosummary.rst - - DataFrame - -Attributes and underlying data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**Axes** - -.. autosummary:: - :toctree: api/ - - DataFrame.index - DataFrame.columns - -.. autosummary:: - :toctree: api/ - - DataFrame.dtypes - DataFrame.info - DataFrame.select_dtypes - DataFrame.values - DataFrame.ndim - DataFrame.size - DataFrame.shape - DataFrame.memory_usage - DataFrame.empty - -Conversion -~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.astype - DataFrame.copy - -Indexing, iteration -~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.head - DataFrame.at - DataFrame.iat - DataFrame.loc - DataFrame.iloc - DataFrame.insert - DataFrame.__iter__ - DataFrame.iteritems - DataFrame.keys - DataFrame.iterrows - DataFrame.itertuples - DataFrame.pop - DataFrame.tail - DataFrame.isin - DataFrame.where - DataFrame.mask - DataFrame.query - -For more information on ``.at``, ``.iat``, ``.loc``, and -``.iloc``, see the :ref:`indexing documentation `. - -Binary operator functions -~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.add - DataFrame.sub - DataFrame.mul - DataFrame.div - DataFrame.truediv - DataFrame.floordiv - DataFrame.mod - DataFrame.pow - DataFrame.radd - DataFrame.rsub - DataFrame.rmul - DataFrame.rdiv - DataFrame.rtruediv - DataFrame.rfloordiv - DataFrame.rmod - DataFrame.rpow - -Function application, GroupBy & window -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.apply - DataFrame.apply_chunks - DataFrame.apply_rows - DataFrame.pipe - DataFrame.agg - DataFrame.groupby - DataFrame.rolling - -.. _api.dataframe.stats: - -Computations / descriptive stats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.all - DataFrame.any - DataFrame.clip - DataFrame.corr - DataFrame.count - DataFrame.cov - DataFrame.cummax - DataFrame.cummin - DataFrame.cumprod - DataFrame.cumsum - DataFrame.describe - DataFrame.kurt - DataFrame.kurtosis - DataFrame.max - DataFrame.mean - DataFrame.min - DataFrame.mode - DataFrame.prod - DataFrame.product - DataFrame.quantile - DataFrame.quantiles - DataFrame.rank - DataFrame.round - DataFrame.skew - DataFrame.sum - DataFrame.std - DataFrame.var - -Reindexing / selection / label manipulation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.drop - DataFrame.drop_duplicates - DataFrame.equals - DataFrame.head - DataFrame.reindex - DataFrame.rename - DataFrame.reset_index - DataFrame.sample - DataFrame.searchsorted - DataFrame.set_index - DataFrame.repeat - DataFrame.tail - DataFrame.take - DataFrame.tile - -.. _api.dataframe.missing: - -Missing data handling -~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.dropna - DataFrame.fillna - DataFrame.isna - DataFrame.isnull - DataFrame.nans_to_nulls - DataFrame.notna - DataFrame.notnull - DataFrame.replace - -Reshaping, sorting, transposing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.argsort - DataFrame.interleave_columns - DataFrame.partition_by_hash - DataFrame.pivot - DataFrame.scatter_by_map - DataFrame.sort_values - DataFrame.sort_index - DataFrame.nlargest - DataFrame.nsmallest - DataFrame.stack - DataFrame.unstack - DataFrame.melt - DataFrame.explode - DataFrame.T - DataFrame.transpose - -Combining / comparing / joining / merging / encoding -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.append - DataFrame.assign - DataFrame.join - DataFrame.merge - DataFrame.update - DataFrame.label_encoding - DataFrame.one_hot_encoding - -Numerical operations -~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.acos - DataFrame.asin - DataFrame.atan - DataFrame.cos - DataFrame.exp - DataFrame.log - DataFrame.sin - DataFrame.sqrt - DataFrame.tan - -Time Series-related -~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.shift - -Serialization / IO / conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DataFrame.as_gpu_matrix - DataFrame.as_matrix - DataFrame.from_arrow - DataFrame.from_pandas - DataFrame.from_records - DataFrame.hash_columns - DataFrame.to_arrow - DataFrame.to_dlpack - DataFrame.to_parquet - DataFrame.to_csv - DataFrame.to_hdf - DataFrame.to_dict - DataFrame.to_json - DataFrame.to_pandas - DataFrame.to_feather - DataFrame.to_records - DataFrame.to_string From 0a7472a8978547b27b442d35bb79a730c25f0195 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 1 Oct 2021 12:15:08 -0400 Subject: [PATCH 03/18] Update subword_tokenize.rst --- docs/cudf/source/api_docs/subword_tokenize.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst index 20a5e344e42..64634cdf412 100644 --- a/docs/cudf/source/api_docs/subword_tokenize.rst +++ b/docs/cudf/source/api_docs/subword_tokenize.rst @@ -8,3 +8,6 @@ Constructor .. autosummary:: :toctree: api/ :template: autosummary/class_with_autosummary.rst + + SubwordTokenizer + From 415dbd3e6896ae45c2a3e713169b4ca68ffd733a Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 1 Oct 2021 12:24:37 -0400 Subject: [PATCH 04/18] Update subword_tokenize.rst --- docs/cudf/source/api_docs/subword_tokenize.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst index 64634cdf412..e8737a9ee0a 100644 --- a/docs/cudf/source/api_docs/subword_tokenize.rst +++ b/docs/cudf/source/api_docs/subword_tokenize.rst @@ -1,6 +1,6 @@ -========= +================ SubwordTokenizer -========= +================ .. currentmodule:: cudf.core.subword_tokenizer Constructor @@ -10,4 +10,3 @@ Constructor :template: autosummary/class_with_autosummary.rst SubwordTokenizer - From b6de7a72b8984a0f74d18f75cc73417542c1d9b6 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Fri, 1 Oct 2021 12:51:15 -0400 Subject: [PATCH 05/18] Added `subword_tokenize` entry in the index --- docs/cudf/source/api_docs/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index 960608d8f3c..0bf1d11bff4 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -17,4 +17,5 @@ This page provides a list of all publicly accessible modules, methods and classe general_utilities window io + subword_tokenize From 93bc413dbdbda652b7e3f02e5eaf8fc1449a81bd Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Thu, 28 Oct 2021 15:52:48 -0400 Subject: [PATCH 06/18] added initial changes --- python/cudf/cudf/core/index.py | 78 ++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c003454fb59..98c90dbb3f4 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1857,6 +1857,84 @@ def _get_dt_field(self, field): def is_boolean(self): return False + + + def ceil(self, field): + """ + Perform ceil operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + DatetimeIndex or Series + Index of the same type for a DatetimeIndex, + or a Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.ceil("T") + 0 2001-01-01 00:05:00 + 1 2001-01-01 00:05:00 + 2 2001-01-01 00:06:00 + dtype: datetime64[ns] + """ + out_column = self._values.ceil(field) + + out_column = column.build_column( + data=out_column.base_data, + dtype=out_column.dtype, + mask=out_column.base_mask, + offset=out_column.offset, + ) + + return as_index( + out_column, index=self.series._index, name=self.series.name + ) + + def floor(self, field): + """ + Perform floor operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + DatetimeIndex or Series + Index of the same type for a DatetimeIndex, + or a Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.floor("T") + 0 2001-01-01 00:04:00 + 1 2001-01-01 00:04:00 + 2 2001-01-01 00:05:00 + dtype: datetime64[ns] + """ + out_column = self.series._column.floor(field) + + return Series( + data=out_column, index=self.series._index, name=self.series.name + ) class TimedeltaIndex(GenericIndex): From 1f77dd48dbaa3874a60ac3a97136bbe9b7978ea8 Mon Sep 17 00:00:00 2001 From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com> Date: Thu, 28 Oct 2021 16:57:45 -0400 Subject: [PATCH 07/18] added floor/ceil with DatetimeIndex --- python/cudf/cudf/core/index.py | 48 ++++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 98c90dbb3f4..7aebaef1702 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1873,20 +1873,15 @@ def ceil(self, field): Returns ------- - DatetimeIndex or Series - Index of the same type for a DatetimeIndex, - or a Series with the same index for a Series. + DatetimeIndex + Index of the same type for a DatetimeIndex Examples -------- >>> import cudf - >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", - ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") - >>> t.dt.ceil("T") - 0 2001-01-01 00:05:00 - 1 2001-01-01 00:05:00 - 2 2001-01-01 00:06:00 - dtype: datetime64[ns] + >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00","1999-12-31 18:40:00"]) + >>> gIndex.ceil("T") + DatetimeIndex(['2020-05-31 08:00:00', '1999-12-31 18:40:00'], dtype='datetime64[ns]', freq=None) """ out_column = self._values.ceil(field) @@ -1898,7 +1893,7 @@ def ceil(self, field): ) return as_index( - out_column, index=self.series._index, name=self.series.name + out_column, name=self.series.name ) def floor(self, field): @@ -1915,27 +1910,28 @@ def floor(self, field): Returns ------- - DatetimeIndex or Series - Index of the same type for a DatetimeIndex, - or a Series with the same index for a Series. + DatetimeIndex + Index of the same type for a DatetimeIndex Examples -------- >>> import cudf - >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", - ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") - >>> t.dt.floor("T") - 0 2001-01-01 00:04:00 - 1 2001-01-01 00:04:00 - 2 2001-01-01 00:05:00 - dtype: datetime64[ns] - """ - out_column = self.series._column.floor(field) - - return Series( - data=out_column, index=self.series._index, name=self.series.name + >>> gIndex = pd.DatetimeIndex(["2020-05-31 08:59:59","1999-12-31 18:44:59"]) + >>> gIndex.floor("T") + DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]', freq=None) + """ + out_column = self._values.floor(field) + + out_column = column.build_column( + data=out_column.base_data, + dtype=out_column.dtype, + mask=out_column.base_mask, + offset=out_column.offset, ) + return as_index( + out_column, name=self.series.name + ) class TimedeltaIndex(GenericIndex): """ From bcddcdeb53a9ffe4918ee331ce632c2926a32c89 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 28 Oct 2021 21:07:00 +0000 Subject: [PATCH 08/18] added docs for floor/ceil --- docs/cudf/source/api_docs/index_objects.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 30269bb2a72..2a4dd5ff9c8 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -280,6 +280,8 @@ Time-specific operations :toctree: api/ DatetimeIndex.round + DatetimeIndex.ceil + DatetimeIndex.floor Conversion ~~~~~~~~~~ From 937f7b42c2de88104a66295e588a4880255e349c Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Thu, 28 Oct 2021 21:13:11 +0000 Subject: [PATCH 09/18] fixed style issues --- python/cudf/cudf/core/index.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 7aebaef1702..72640a43694 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1857,8 +1857,7 @@ def _get_dt_field(self, field): def is_boolean(self): return False - - + def ceil(self, field): """ Perform ceil operation on the data to the specified freq. @@ -1879,9 +1878,11 @@ def ceil(self, field): Examples -------- >>> import cudf - >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00","1999-12-31 18:40:00"]) + >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00", + ... "1999-12-31 18:40:00"]) >>> gIndex.ceil("T") - DatetimeIndex(['2020-05-31 08:00:00', '1999-12-31 18:40:00'], dtype='datetime64[ns]', freq=None) + DatetimeIndex(['2020-05-31 08:00:00', '1999-12-31 18:40:00'], + dtype='datetime64[ns]', freq=None) """ out_column = self._values.ceil(field) @@ -1892,9 +1893,7 @@ def ceil(self, field): offset=out_column.offset, ) - return as_index( - out_column, name=self.series.name - ) + return as_index(out_column, name=self.series.name) def floor(self, field): """ @@ -1916,9 +1915,11 @@ def floor(self, field): Examples -------- >>> import cudf - >>> gIndex = pd.DatetimeIndex(["2020-05-31 08:59:59","1999-12-31 18:44:59"]) + >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:59:59" + ... ,"1999-12-31 18:44:59"]) >>> gIndex.floor("T") - DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]', freq=None) + DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], + dtype='datetime64[ns]', freq=None) """ out_column = self._values.floor(field) @@ -1929,9 +1930,8 @@ def floor(self, field): offset=out_column.offset, ) - return as_index( - out_column, name=self.series.name - ) + return as_index(out_column, name=self.series.name) + class TimedeltaIndex(GenericIndex): """ From 74e2b0eef3e1df3e03d3e0ca46e3eb72d6efbaf6 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 29 Oct 2021 01:02:55 +0000 Subject: [PATCH 10/18] added python tests --- python/cudf/cudf/tests/test_index.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index c6cf7c4e6f5..0424fe576a6 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2470,3 +2470,25 @@ def test_index_type_methods(data, func): assert_eq(False, actual) else: assert_eq(expected, actual) + + +@pytest.mark.parametrize("data", [1000000, 2000000, 3000000, 4000000, 5000000]) +def test_index_datetime_ceil(data): + cuidx = cudf.DatetimeIndex(data) + pidx = cuidx.to_pandas() + + pidx_ceil = pidx.ceil("T") + cuidx_ceil = cuidx.ceil("T") + + assert_eq(pidx_ceil, cuidx_ceil) + + +@pytest.mark.parametrize("data", [1000000, 2000000, 3000000, 4000000, 5000000]) +def test_index_datetime_floor(data): + cuidx = cudf.DatetimeIndex(data) + pidx = cuidx.to_pandas() + + pidx_floor = pidx.floor("T") + cuidx_floor = cuidx.floor("T") + + assert_eq(pidx_floor, cuidx_floor) From 0eb4f1cf5e9ee0ac51949b0c3e6eb1b1b94cd899 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 29 Oct 2021 03:13:25 +0000 Subject: [PATCH 11/18] added python tests --- python/cudf/cudf/tests/test_index.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 0424fe576a6..4b36807ea52 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2472,9 +2472,8 @@ def test_index_type_methods(data, func): assert_eq(expected, actual) -@pytest.mark.parametrize("data", [1000000, 2000000, 3000000, 4000000, 5000000]) -def test_index_datetime_ceil(data): - cuidx = cudf.DatetimeIndex(data) +def test_index_datetime_ceil(): + cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) pidx = cuidx.to_pandas() pidx_ceil = pidx.ceil("T") @@ -2483,9 +2482,8 @@ def test_index_datetime_ceil(data): assert_eq(pidx_ceil, cuidx_ceil) -@pytest.mark.parametrize("data", [1000000, 2000000, 3000000, 4000000, 5000000]) -def test_index_datetime_floor(data): - cuidx = cudf.DatetimeIndex(data) +def test_index_datetime_floor(): + cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) pidx = cuidx.to_pandas() pidx_floor = pidx.floor("T") From e7bcc7b51c6e2292af31758746293777ef70b414 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Fri, 29 Oct 2021 14:59:49 +0000 Subject: [PATCH 12/18] fixed return arg --- python/cudf/cudf/core/index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 72640a43694..ae667ec14d4 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1893,7 +1893,7 @@ def ceil(self, field): offset=out_column.offset, ) - return as_index(out_column, name=self.series.name) + return as_index(out_column, name=self.name) def floor(self, field): """ @@ -1930,7 +1930,7 @@ def floor(self, field): offset=out_column.offset, ) - return as_index(out_column, name=self.series.name) + return as_index(out_column, name=self.name) class TimedeltaIndex(GenericIndex): From 2c97aa5fba28ce39a8b0b7752846303dee990fc4 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Mon, 1 Nov 2021 16:34:22 +0000 Subject: [PATCH 13/18] added depr. warnings --- python/cudf/cudf/core/frame.py | 12 ++++++++++++ python/cudf/cudf/tests/test_index.py | 12 ++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 0b895460410..edb931f1440 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3517,6 +3517,12 @@ def ceil(self): 3 5.0 dtype: float64 """ + + warnings.warn( + "Series.ceil is deprecated and will be removed in the future", + DeprecationWarning, + ) + return self._unaryop("ceil") def floor(self): @@ -3549,6 +3555,12 @@ def floor(self): 5 3.0 dtype: float64 """ + + warnings.warn( + "Series.floor is deprecated and will be removed in the future", + DeprecationWarning, + ) + return self._unaryop("floor") def scale(self): diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 4b36807ea52..ab88cf553d4 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2482,11 +2482,11 @@ def test_index_datetime_ceil(): assert_eq(pidx_ceil, cuidx_ceil) -def test_index_datetime_floor(): - cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) - pidx = cuidx.to_pandas() +# def test_index_datetime_floor(): +# cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) +# pidx = cuidx.to_pandas() - pidx_floor = pidx.floor("T") - cuidx_floor = cuidx.floor("T") +# pidx_floor = pidx.floor("T") +# cuidx_floor = cuidx.floor("T") - assert_eq(pidx_floor, cuidx_floor) +# assert_eq(pidx_floor, cuidx_floor) From b790eac159c1db563b56632d29cd0e9426adefcc Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Tue, 2 Nov 2021 00:25:53 +0000 Subject: [PATCH 14/18] added floor test --- python/cudf/cudf/tests/test_index.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index ab88cf553d4..4b36807ea52 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2482,11 +2482,11 @@ def test_index_datetime_ceil(): assert_eq(pidx_ceil, cuidx_ceil) -# def test_index_datetime_floor(): -# cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) -# pidx = cuidx.to_pandas() +def test_index_datetime_floor(): + cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) + pidx = cuidx.to_pandas() -# pidx_floor = pidx.floor("T") -# cuidx_floor = cuidx.floor("T") + pidx_floor = pidx.floor("T") + cuidx_floor = cuidx.floor("T") -# assert_eq(pidx_floor, cuidx_floor) + assert_eq(pidx_floor, cuidx_floor) From f2c3d1fd4ba9daa5a8fead1574fc5e1b8fa7f23d Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 10 Nov 2021 20:39:12 +0000 Subject: [PATCH 15/18] addressing some reviews --- python/cudf/cudf/core/index.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 00146e248ee..6609ae0dcf0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1924,14 +1924,7 @@ def ceil(self, field): """ out_column = self._values.ceil(field) - out_column = column.build_column( - data=out_column.base_data, - dtype=out_column.dtype, - mask=out_column.base_mask, - offset=out_column.offset, - ) - - return as_index(out_column, name=self.name) + return self.__class__._from_data({self.name: out_column}) def floor(self, field): """ @@ -1961,14 +1954,7 @@ def floor(self, field): """ out_column = self._values.floor(field) - out_column = column.build_column( - data=out_column.base_data, - dtype=out_column.dtype, - mask=out_column.base_mask, - offset=out_column.offset, - ) - - return as_index(out_column, name=self.name) + return self.__class__._from_data({self.name: out_column}) class TimedeltaIndex(GenericIndex): From b3b8684250e687186bf0885ab8f99e70d1db9501 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 10 Nov 2021 20:44:36 +0000 Subject: [PATCH 16/18] parametrizing test cases --- python/cudf/cudf/tests/test_index.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 4b36807ea52..ab211616a02 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -2472,21 +2472,27 @@ def test_index_type_methods(data, func): assert_eq(expected, actual) -def test_index_datetime_ceil(): +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) +def test_index_datetime_ceil(resolution): cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) pidx = cuidx.to_pandas() - pidx_ceil = pidx.ceil("T") - cuidx_ceil = cuidx.ceil("T") + pidx_ceil = pidx.ceil(resolution) + cuidx_ceil = cuidx.ceil(resolution) assert_eq(pidx_ceil, cuidx_ceil) -def test_index_datetime_floor(): +@pytest.mark.parametrize( + "resolution", ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"] +) +def test_index_datetime_floor(resolution): cuidx = cudf.DatetimeIndex([1000000, 2000000, 3000000, 4000000, 5000000]) pidx = cuidx.to_pandas() - pidx_floor = pidx.floor("T") - cuidx_floor = cuidx.floor("T") + pidx_floor = pidx.floor(resolution) + cuidx_floor = cuidx.floor(resolution) assert_eq(pidx_floor, cuidx_floor) From 2f56bebc6d8513d22ffc4389c167d8ecad5ea369 Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 10 Nov 2021 20:50:35 +0000 Subject: [PATCH 17/18] fixed docs issues --- python/cudf/cudf/core/index.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 6609ae0dcf0..295a3cb1569 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1903,9 +1903,10 @@ def ceil(self, field): Parameters ---------- field : str - One of ["D", "H", "T", "S", "L", "U", "N"] + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases `_ + user_guide/timeseries.html#timeseries-offset-aliases>`__ for more details on these aliases. Returns @@ -1933,9 +1934,10 @@ def floor(self, field): Parameters ---------- field : str - One of ["D", "H", "T", "S", "L", "U", "N"] + One of ["D", "H", "T", "min", "S", "L", "ms", "U", "us", "N"]. + Must be a fixed frequency like 'S' (second) not 'ME' (month end). See `frequency aliases `_ + user_guide/timeseries.html#timeseries-offset-aliases>`__ for more details on these aliases. Returns From b8ff1b4dd8809c848fa9deef91f382a6ff8f499b Mon Sep 17 00:00:00 2001 From: Mayank Anand Date: Wed, 17 Nov 2021 19:01:22 +0000 Subject: [PATCH 18/18] modify warning message --- python/cudf/cudf/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 4f8ff55032d..d6b6a19a610 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3675,7 +3675,8 @@ def ceil(self): """ warnings.warn( - "Series.ceil is deprecated and will be removed in the future", + "Series.ceil and DataFrame.ceil are deprecated and will be \ + removed in the future", DeprecationWarning, ) @@ -3713,7 +3714,8 @@ def floor(self): """ warnings.warn( - "Series.floor is deprecated and will be removed in the future", + "Series.ceil and DataFrame.ceil are deprecated and will be \ + removed in the future", DeprecationWarning, )