From cfcf5a6463745c49687f0891b0384cfad4c92855 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 07:21:44 -0700 Subject: [PATCH 1/3] Minor improvements to deprecation warnings. --- python/cudf/cudf/core/column/string.py | 13 ++++++------- python/dask_cudf/dask_cudf/core.py | 6 +++--- python/dask_cudf/dask_cudf/io/parquet.py | 3 ++- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c59081e4b59..2728e8181ee 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -4595,7 +4595,7 @@ def subword_tokenize( This function requires about 21x the number of character bytes in the input strings column as working memory. - ``ser.str.subword_tokenize`` will be depreciated in future versions. + ``Series.str.subword_tokenize`` will be deprecated in future versions. Use ``cudf.core.subword_tokenizer.SubwordTokenizer`` instead. Parameters @@ -4669,14 +4669,13 @@ def subword_tokenize( array([[0, 0, 2], [1, 0, 1]], dtype=uint32) """ - warning_message = ( - "`ser.str.subword_tokenize` API will be depreciated" - " in future versions of cudf.\n" - "Use `cudf.core.subword_tokenizer.SubwordTokenizer` " - "instead" + warnings.warn( + "`Series.str.subword_tokenize` will be deprecated " + "in future versions of cudf.\n" + "Use `cudf.core.subword_tokenizer.SubwordTokenizer` instead.", + FutureWarning, ) - warnings.warn(warning_message, FutureWarning) tokens, masks, metadata = libstrings.subword_tokenize_vocab_file( self._column, hash_file, diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index e604e5511da..0aa3cb239aa 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -298,15 +298,15 @@ def var( return _parallel_var(self, meta, skipna, split_every, out) def repartition(self, *args, **kwargs): - """ Wraps dask.dataframe DataFrame.repartition method. + """Wraps dask.dataframe DataFrame.repartition method. Uses DataFrame.shuffle if `columns=` is specified. """ # TODO: Remove this function in future(0.17 release) columns = kwargs.pop("columns", None) if columns: warnings.warn( - "The column argument will be removed from repartition in " - " future versions of dask_cudf. Use DataFrame.shuffle().", + "The columns argument will be removed from repartition in " + "future versions of dask_cudf. Use DataFrame.shuffle().", DeprecationWarning, ) warnings.warn( diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 850cc0843cc..6a03c26a210 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -370,7 +370,8 @@ def read_parquet( if row_groups_per_part: warnings.warn( "row_groups_per_part is deprecated. " - "Pass an integer value to split_row_groups instead." + "Pass an integer value to split_row_groups instead.", + DeprecationWarning, ) if split_row_groups is None: split_row_groups = row_groups_per_part From 842d78e8bab527e4b99d049c74c3c75012f90aff Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 12:18:58 -0700 Subject: [PATCH 2/3] Use FutureWarning instead of DeprecationWarning. --- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/multiindex.py | 4 ++-- python/cudf/cudf/core/series.py | 12 ++++++------ python/dask_cudf/dask_cudf/core.py | 2 +- python/dask_cudf/dask_cudf/io/parquet.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 574da689e79..3d2cd810417 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -610,7 +610,7 @@ def deserialize(cls, header, frames): "21.10 or older will no longer be deserializable " "after version 21.12. Please load and resave any " "pickles before upgrading to version 22.02.", - DeprecationWarning, + FutureWarning, ) header["columns"] = [header.pop("index_column")] header["column_names"] = pickle.dumps( diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 744c5cdc25d..9f999462044 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -867,7 +867,7 @@ def deserialize(cls, header, frames): "21.10 or older will no longer be deserializable " "after version 21.12. Please load and resave any " "pickles before upgrading to version 22.02.", - DeprecationWarning, + FutureWarning, ) header["column_names"] = header["names"] column_names = pickle.loads(header["column_names"]) @@ -877,7 +877,7 @@ def deserialize(cls, header, frames): "21.08 or older will no longer be deserializable " "after version 21.10. Please load and resave any " "pickles before upgrading to version 21.12.", - DeprecationWarning, + FutureWarning, ) df = cudf.DataFrame.deserialize(header["source_data"], frames) return cls.from_frame(df)._set_names(column_names) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 9439de5b23b..156ae4636b6 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -378,7 +378,7 @@ def deserialize(cls, header, frames): "21.10 or older will no longer be deserializable " "after version 21.12. Please load and resave any " "pickles before upgrading to version 22.02.", - DeprecationWarning, + FutureWarning, ) header["columns"] = [header.pop("column")] header["column_names"] = pickle.dumps( @@ -838,7 +838,7 @@ def set_mask(self, mask, null_count=None): """ warnings.warn( "Series.set_mask is deprecated and will be removed in the future.", - DeprecationWarning, + FutureWarning, ) return self._from_data( {self.name: self._column.set_mask(mask)}, self._index @@ -3249,7 +3249,7 @@ def update(self, other): def reverse(self): warnings.warn( "Series.reverse is deprecated and will be removed in the future.", - DeprecationWarning, + FutureWarning, ) rinds = column.arange((self._column.size - 1), -1, -1, dtype=np.int32) return self._from_data( @@ -3373,9 +3373,9 @@ def label_encoding(self, cats, dtype=None, na_sentinel=-1): """ warnings.warn( - "Series.label_encoding is deprecated and will be removed in the future.\ - Consider using cuML's LabelEncoder instead", - DeprecationWarning, + "Series.label_encoding is deprecated and will be removed in the " + "future. Consider using cuML's LabelEncoder instead.", + FutureWarning, ) def _return_sentinel_series(): diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index 0aa3cb239aa..328a26ff81c 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -307,7 +307,7 @@ def repartition(self, *args, **kwargs): warnings.warn( "The columns argument will be removed from repartition in " "future versions of dask_cudf. Use DataFrame.shuffle().", - DeprecationWarning, + FutureWarning, ) warnings.warn( "Rearranging data by column hash. Divisions will lost. " diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 6a03c26a210..cb9268c41f9 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -371,7 +371,7 @@ def read_parquet( warnings.warn( "row_groups_per_part is deprecated. " "Pass an integer value to split_row_groups instead.", - DeprecationWarning, + FutureWarning, ) if split_row_groups is None: split_row_groups = row_groups_per_part From 8aff51ed37d732911fd55e165c559b658b796552 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 30 Sep 2021 12:22:48 -0700 Subject: [PATCH 3/3] Clarify deprecation notice text. --- python/cudf/cudf/core/column/string.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 2728e8181ee..81a4b50f00f 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -4595,7 +4595,7 @@ def subword_tokenize( This function requires about 21x the number of character bytes in the input strings column as working memory. - ``Series.str.subword_tokenize`` will be deprecated in future versions. + ``Series.str.subword_tokenize`` is deprecated and will be removed. Use ``cudf.core.subword_tokenizer.SubwordTokenizer`` instead. Parameters @@ -4670,9 +4670,9 @@ def subword_tokenize( [1, 0, 1]], dtype=uint32) """ warnings.warn( - "`Series.str.subword_tokenize` will be deprecated " - "in future versions of cudf.\n" - "Use `cudf.core.subword_tokenizer.SubwordTokenizer` instead.", + "`Series.str.subword_tokenize` is deprecated and will be removed " + "in future versions of cudf. Use " + "`cudf.core.subword_tokenizer.SubwordTokenizer` instead.", FutureWarning, )