From 866d112b221dce6856a426f1432c5793abb54a2d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 26 Aug 2021 14:26:32 -0500 Subject: [PATCH 01/33] fix test --- python/cudf/cudf/tests/test_dataframe.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a337660b5b0..1cb393c27c1 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1259,7 +1259,8 @@ def test_dataframe_concat_different_numerical_columns(dtype1, dtype2): else: pres = pd.concat([df1, df2]) gres = cudf.concat([cudf.from_pandas(df1), cudf.from_pandas(df2)]) - assert_eq(cudf.from_pandas(pres), gres) + # Pandas 1.3.2+ returns mixed `object` dtype result. + assert_eq(cudf.from_pandas(pres.astype(gres.dtypes)), gres) def test_dataframe_concat_different_column_types(): @@ -1767,12 +1768,16 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype): for i in range(num_cols): colname = string.ascii_lowercase[i] - data = pd.Series(np.random.randint(0, 26, num_rows).astype(dtype)) + data = pd.Series( + np.random.randint(0, 26, num_rows).astype(dtype), + dtype=pd.BooleanDtype() if dtype == "bool" else None, + ) if nulls == "some": idx = np.random.choice( num_rows, size=int(num_rows / 2), replace=False ) - data[idx] = null_rep + if len(idx): + data[idx] = null_rep elif nulls == "all": data[:] = null_rep pdf[colname] = data From b8be4916c98f78294a3c76fc2720772250731966 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 30 Aug 2021 12:13:56 -0500 Subject: [PATCH 02/33] fix initial pass of issues related to pandas 1.3 upgrade. --- conda/environments/cudf_dev_cuda11.0.yml | 2 +- conda/environments/cudf_dev_cuda11.2.yml | 2 +- python/cudf/cudf/core/dtypes.py | 8 ++--- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/multiindex.py | 2 +- python/cudf/cudf/tests/test_categorical.py | 39 +++++++++++++++++++-- python/cudf/cudf/tests/test_concat.py | 40 +++++++++++++++++++--- python/cudf/cudf/tests/test_dataframe.py | 21 +++++++----- python/cudf/cudf/tests/test_groupby.py | 3 +- python/cudf/cudf/tests/test_index.py | 19 ++++++---- python/cudf/cudf/tests/test_multiindex.py | 18 +++++----- 11 files changed, 116 insertions(+), 40 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 2c0984569db..bbe1ae70499 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -16,7 +16,7 @@ dependencies: - python>=3.7,<3.9 - numba>=0.53.1 - numpy - - pandas>=1.0,<1.3.0dev0 + - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda - fastavro>=0.22.9 - notebook>=0.5.0 diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 766d85e957b..ed4c3ee2efc 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -16,7 +16,7 @@ dependencies: - python>=3.7,<3.9 - numba>=0.53.1 - numpy - - pandas>=1.0,<1.3.0dev0 + - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda - fastavro>=0.22.9 - notebook>=0.5.0 diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 1b504310e99..5f21e883a4d 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -112,15 +112,15 @@ def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype": ) def to_pandas(self) -> pd.CategoricalDtype: - if self.categories is None: + if self._categories is None: categories = None else: if isinstance( - self.categories, (cudf.Float32Index, cudf.Float64Index) + self._categories, (cudf.Float32Index, cudf.Float64Index) ): - categories = self.categories.dropna().to_pandas() + categories = self._categories.dropna().to_pandas() else: - categories = self.categories.to_pandas() + categories = self._categories.to_pandas() return pd.CategoricalDtype(categories=categories, ordered=self.ordered) def _init_categories(self, categories: Any): diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 6be21ce74d2..f18c6fafd89 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -665,7 +665,7 @@ def difference(self, other, sort=None): if self.dtype != other.dtype: difference = difference.astype(self.dtype) - if sort is None: + if sort is None and len(other): return difference.sort_values() return difference diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 079a6d902b6..f17f8181c4e 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1476,7 +1476,7 @@ def to_pandas(self, nullable=False, **kwargs): if hasattr(self, "_source_data"): result = self._source_data.to_pandas(nullable=nullable) result.columns = self.names - return pd.MultiIndex.from_frame(result) + return pd.MultiIndex.from_frame(result, names=self.names) pandas_codes = [] for code in self.codes.columns: diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index 51327038c39..8d6c551761d 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -396,7 +396,18 @@ def test_categorical_as_unordered(pd_str_cat, inplace): @pytest.mark.parametrize("from_ordered", [True, False]) @pytest.mark.parametrize("to_ordered", [True, False]) -@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "inplace", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/43232" + ), + ), + False, + ], +) def test_categorical_reorder_categories( pd_str_cat, from_ordered, to_ordered, inplace ): @@ -420,7 +431,18 @@ def test_categorical_reorder_categories( assert str(cd_sr_1) == str(pd_sr_1) -@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "inplace", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/43232" + ), + ), + False, + ], +) def test_categorical_add_categories(pd_str_cat, inplace): pd_sr = pd.Series(pd_str_cat.copy()) @@ -441,7 +463,18 @@ def test_categorical_add_categories(pd_str_cat, inplace): assert_eq(pd_sr_1, cd_sr_1) -@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "inplace", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/43232" + ), + ), + False, + ], +) def test_categorical_remove_categories(pd_str_cat, inplace): pd_sr = pd.Series(pd_str_cat.copy()) diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index b1b71bd882e..3983e8a5f4a 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -525,9 +525,22 @@ def test_concat_empty_dataframes(df, other, ignore_index): if expected.shape != df.shape: for key, col in actual[actual.columns].iteritems(): if is_categorical_dtype(col.dtype): - expected[key] = expected[key].fillna("-1") + if expected[key].dtype != "category": + # TODO: Pandas bug: + # https://github.com/pandas-dev/pandas/issues/42840 + expected[key] = expected[key].fillna("-1").astype("str") + else: + expected[key] = ( + expected[key] + .cat.add_categories(["-1"]) + .fillna("-1") + .astype("str") + ) actual[key] = col.astype("str").fillna("-1") - assert_eq(expected.fillna(-1), actual.fillna(-1), check_dtype=False) + else: + expected[key] = expected[key].fillna(-1) + actual[key] = col.fillna(-1) + assert_eq(expected, actual, check_dtype=False) else: assert_eq( expected, actual, check_index_type=False if gdf.empty else True @@ -1079,8 +1092,23 @@ def test_concat_join_empty_dataframes( if axis == 0: for key, col in actual[actual.columns].iteritems(): if is_categorical_dtype(col.dtype): - expected[key] = expected[key].fillna("-1") + if expected[key].dtype != "category": + # TODO: Pandas bug: + # https://github.com/pandas-dev/pandas/issues/42840 + expected[key] = ( + expected[key].fillna("-1").astype("str") + ) + else: + expected[key] = ( + expected[key] + .cat.add_categories(["-1"]) + .fillna("-1") + .astype("str") + ) actual[key] = col.astype("str").fillna("-1") + else: + expected[key] = expected[key].fillna(-1) + actual[key] = col.fillna(-1) assert_eq( expected.fillna(-1), @@ -1100,7 +1128,11 @@ def test_concat_join_empty_dataframes( check_column_type=False, ) assert_eq( - expected, actual, check_index_type=False, check_column_type=False + expected, + actual, + check_dtype=False, + check_index_type=False, + check_column_type=False, ) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 822613b599e..d5a690dc8a1 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1765,12 +1765,13 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype): pdf = pd.DataFrame() null_rep = np.nan if dtype in ["float32", "float64"] else None - + np_dtype = dtype + dtype = np.dtype(dtype) + dtype = cudf.utils.dtypes.np_dtypes_to_pandas_dtypes.get(dtype, dtype) for i in range(num_cols): colname = string.ascii_lowercase[i] data = pd.Series( - np.random.randint(0, 26, num_rows).astype(dtype), - dtype=pd.BooleanDtype() if dtype == "bool" else None, + np.random.randint(0, 26, num_rows).astype(np_dtype), dtype=dtype, ) if nulls == "some": idx = np.random.choice( @@ -1789,8 +1790,8 @@ def test_dataframe_transpose(nulls, num_cols, num_rows, dtype): expect = pdf.transpose() - assert_eq(expect, got_function) - assert_eq(expect, got_property) + assert_eq(expect, got_function.to_pandas(nullable=True)) + assert_eq(expect, got_property.to_pandas(nullable=True)) @pytest.mark.parametrize("num_cols", [1, 2, 10]) @@ -7849,8 +7850,13 @@ def test_describe_misc_exclude(df, exclude): cudf.DataFrame( { "a": ["hello", "world", "rapids", "ai", "nvidia"], - "b": cudf.Series([1, 21, 21, 11, 11], dtype="timedelta64[s]"), - } + "b": cudf.Series( + [1, 21, 21, 11, 11], + dtype="timedelta64[s]", + index=["a", "b", "c", "d", " e"], + ), + }, + index=["a", "b", "c", "d", " e"], ), cudf.DataFrame( { @@ -7866,7 +7872,6 @@ def test_describe_misc_exclude(df, exclude): @pytest.mark.parametrize("dropna", [True, False]) def test_dataframe_mode(df, numeric_only, dropna): pdf = df.to_pandas() - expected = pdf.mode(numeric_only=numeric_only, dropna=dropna) actual = df.mode(numeric_only=numeric_only, dropna=dropna) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index df6a9336e97..80f5155e330 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -370,6 +370,7 @@ def test_groupby_2keys_agg(nelem, func): # https://github.com/pandas-dev/pandas/issues/40685 is resolved. # "func", ["min", "max", "idxmin", "idxmax", "count", "sum"], ) +@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/43209") def test_groupby_agg_decimal(num_groups, nelem_per_group, func): # The number of digits after the decimal to use. decimal_digits = 2 @@ -1948,7 +1949,7 @@ def test_groupby_fillna_multi_value(nelem): # In this specific case, Pandas returns the rows in grouped order. # Cudf returns columns in orginal order. - expect.index = expect.index.get_level_values(1) + expect.index = expect.index.get_level_values(0) assert_groupby_results_equal(expect[value_cols], got[value_cols]) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index f80bdec0ab5..e20328c296a 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -550,7 +550,15 @@ def test_empty_df_head_tail_index(n): None, ), (pd.Index(range(5)), pd.Index(range(4)) > 0, None, ValueError), - (pd.Index(range(5)), pd.Index(range(5)) > 1, 10, None), + pytest.param( + pd.Index(range(5)), + pd.Index(range(5)) > 1, + 10, + None, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/43240" + ), + ), ( pd.Index(np.arange(10)), (pd.Index(np.arange(10)) % 3) == 0, @@ -671,13 +679,12 @@ def test_index_where(data, condition, other, error): assert_eq(expect.categories, got.categories) else: assert_eq( - ps.where(ps_condition, other=ps_other) - .fillna(gs._columns[0].default_na_value()) - .values, + ps.where(ps_condition, other=ps_other).fillna( + gs._columns[0].default_na_value() + ), gs.where(gs_condition, other=gs_other) .to_pandas() - .fillna(gs._columns[0].default_na_value()) - .values, + .fillna(gs._columns[0].default_na_value()), ) else: assert_exceptions_equal( diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 18a82b58670..c7d6f4f0456 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -1061,42 +1061,42 @@ def test_multiindex_values_host(): @pytest.mark.parametrize( - "pdi, fill_value, expected", + "gdi, fill_value, expected", [ ( - pd.MultiIndex( + cudf.MultiIndex( levels=[[1, 3, 4, None], [1, 2, 5]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], ), 5, - pd.MultiIndex( + cudf.MultiIndex( levels=[[1, 3, 4, 5], [1, 2, 5]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], ), ), ( - pd.MultiIndex( + cudf.MultiIndex( levels=[[1, 3, 4, None], [1, None, 5]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], ), 100, - pd.MultiIndex( + cudf.MultiIndex( levels=[[1, 3, 4, 100], [1, 100, 5]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], ), ), ( - pd.MultiIndex( + cudf.MultiIndex( levels=[["a", "b", "c", None], ["1", None, "5"]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], ), "100", - pd.MultiIndex( + cudf.MultiIndex( levels=[["a", "b", "c", "100"], ["1", "100", "5"]], codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]], names=["x", "y"], @@ -1104,9 +1104,7 @@ def test_multiindex_values_host(): ), ], ) -def test_multiIndex_fillna(pdi, fill_value, expected): - gdi = cudf.from_pandas(pdi) - +def test_multiIndex_fillna(gdi, fill_value, expected): assert_eq(expected, gdi.fillna(fill_value)) From 2890baca01047d5c8217e99c34728d110e858c26 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 30 Aug 2021 21:56:22 -0500 Subject: [PATCH 03/33] fix failures. --- python/cudf/cudf/core/column/datetime.py | 2 +- python/cudf/cudf/core/column/timedelta.py | 2 +- python/cudf/cudf/core/window/rolling.py | 4 +- python/cudf/cudf/tests/test_dataframe.py | 9 +- python/cudf/cudf/tests/test_rank.py | 21 ++-- python/cudf/cudf/tests/test_replace.py | 144 +++++++++++----------- python/cudf/cudf/tests/test_timedelta.py | 17 ++- 7 files changed, 106 insertions(+), 93 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 46ff1990ac2..3278cb0be63 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -258,7 +258,7 @@ def as_timedelta_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.TimeDeltaColumn": raise TypeError( - f"cannot astype a datetimelike from [{self.dtype}] to [{dtype}]" + f"cannot astype a datetimelike from {self.dtype} to {dtype}" ) def as_numerical_column( diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 1c4ed4c7f98..7e03e87ac0a 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -343,7 +343,7 @@ def as_datetime_column( self, dtype: Dtype, **kwargs ) -> "cudf.core.column.DatetimeColumn": raise TypeError( - f"cannot astype a timedelta from [{self.dtype}] to [{dtype}]" + f"cannot astype a timedelta from {self.dtype} to {dtype}" ) def as_string_column( diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 317ce29d00e..8724cd47d00 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -393,7 +393,9 @@ def __init__(self, groupby, window, min_periods=None, center=False): # of `groupby.grouping.keys` and `groupby.obj`. # As an optimization, avoid gathering those twice. self._group_keys = groupby.grouping.keys.take(sort_order) - obj = groupby.obj.take(sort_order) + obj = groupby.obj.drop( + columns=groupby.grouping._key_column_names_from_obj + ).take(sort_order) gb_size = groupby.size().sort_index() self._group_starts = ( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index d5a690dc8a1..2100bca2606 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8612,7 +8612,14 @@ def test_explode(data, labels, ignore_index, p_index, label_to_explode): pdf = pd.DataFrame(data, index=p_index, columns=labels) gdf = cudf.from_pandas(pdf) - expect = pdf.explode(label_to_explode, ignore_index) + # TODO: Remove this workaround after + # following issue is fixed: + # https://github.com/pandas-dev/pandas/issues/43314 + if isinstance(label_to_explode, int): + pdlabel_to_explode = [label_to_explode] + else: + pdlabel_to_explode = label_to_explode + expect = pdf.explode(pdlabel_to_explode, ignore_index) got = gdf.explode(label_to_explode, ignore_index) assert_eq(expect, got, check_dtype=False) diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 563278e3a8f..7b89d6f667a 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -58,19 +58,16 @@ def test_rank_all_arguments( expect = pdf["str"].rank(**kwargs) got = gdf["str"].rank(**kwargs) assert expect.empty == got.empty - - # TODO: https://github.com/pandas-dev/pandas/issues/32593 - # Dataframe (bug in pandas) - if ( - na_option == "top" - and method == "first" - and not dtype == "O" - and ascending - ): - assert_eq(gdf.rank(**kwargs), pdf.rank(**kwargs)) + expected = pdf.select_dtypes(include=np.number) else: - with pytest.raises(AssertionError, match="values are different"): - assert_eq(gdf.rank(**kwargs), pdf.rank(**kwargs)) + expected = pdf.copy(deep=True) + + # TODO: Remove per column iteration once the + # following issue is fixedhttps://github.com/pandas-dev/pandas/issues/43310 + for col in expected.columns: + expected[col] = pdf[col].rank(**kwargs) + actual = gdf.rank(**kwargs) + assert_eq(expected, actual) def test_rank_error_arguments(pdf): diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 33bc56a2522..43d477190ae 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -958,88 +958,82 @@ def test_numeric_series_replace_dtype(series_dtype, replacement): assert_eq(expect, got) -def test_replace_inplace(): - data = np.array([5, 1, 2, 3, 4]) - sr = cudf.Series(data) - psr = pd.Series(data) - - sr_copy = sr.copy() - psr_copy = psr.copy() - - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - sr.replace(5, 0, inplace=True) - psr.replace(5, 0, inplace=True) - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - - sr = cudf.Series(data) - psr = pd.Series(data) - - sr_copy = sr.copy() - psr_copy = psr.copy() - - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - sr.replace({5: 0, 3: -5}) - psr.replace({5: 0, 3: -5}) - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - srr = sr.replace() - psrr = psr.replace() - assert_eq(srr, psrr) - - psr = pd.Series(["one", "two", "three"], dtype="category") - sr = cudf.from_pandas(psr) - - sr_copy = sr.copy() - psr_copy = psr.copy() - - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - sr.replace("one", "two", inplace=True) - psr.replace("one", "two", inplace=True) - assert_eq(sr, psr) - assert_eq(sr_copy, psr_copy) - - pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}) - gdf = cudf.from_pandas(pdf) - - pdf_copy = pdf.copy() - gdf_copy = gdf.copy() - assert_eq(pdf, gdf) - assert_eq(pdf_copy, gdf_copy) - pdf.replace(5, 0, inplace=True) - gdf.replace(5, 0, inplace=True) - assert_eq(pdf, gdf) - assert_eq(pdf_copy, gdf_copy) +@pytest.mark.parametrize( + "pframe, replace_args", + [ + ( + pd.Series([5, 1, 2, 3, 4]), + {"to_replace": 5, "value": 0, "inplace": True}, + ), + ( + pd.Series([5, 1, 2, 3, 4]), + {"to_replace": {5: 0, 3: -5}, "inplace": True}, + ), + (pd.Series([5, 1, 2, 3, 4]), {}), + pytest.param( + pd.Series(["one", "two", "three"], dtype="category"), + {"to_replace": "one", "value": "two", "inplace": True}, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/43232" + ), + ), + ( + pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}), + {"to_replace": 5, "value": 0, "inplace": True}, + ), + ( + pd.Series([1, 2, 3, 45]), + { + "to_replace": np.array([]).astype(int), + "value": 77, + "inplace": True, + }, + ), + ( + pd.Series([1, 2, 3, 45]), + { + "to_replace": np.array([]).astype(int), + "value": 77, + "inplace": False, + }, + ), + ( + pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}), + {"to_replace": {"a": 2}, "value": {"a": -33}, "inplace": True}, + ), + ( + pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}), + { + "to_replace": {"a": [2, 5]}, + "value": {"a": [9, 10]}, + "inplace": True, + }, + ), + ( + pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}), + {"to_replace": [], "value": [], "inplace": True}, + ), + ], +) +def test_replace_inplace(pframe, replace_args): + gpu_frame = cudf.from_pandas(pframe) + pandas_frame = pframe.copy() - pds = pd.Series([1, 2, 3, 45]) - gds = cudf.from_pandas(pds) - vals = np.array([]).astype(int) + gpu_copy = gpu_frame.copy() + cpu_copy = pandas_frame.copy() - assert_eq(pds.replace(vals, -1), gds.replace(vals, -1)) + assert_eq(gpu_frame, pandas_frame) + assert_eq(gpu_copy, cpu_copy) + gpu_frame.replace(**replace_args) + pandas_frame.replace(**replace_args) + assert_eq(gpu_frame, pandas_frame) + assert_eq(gpu_copy, cpu_copy) - pds.replace(vals, 77, inplace=True) - gds.replace(vals, 77, inplace=True) - assert_eq(pds, gds) +def test_replace_df_error(): pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}) gdf = cudf.from_pandas(pdf) - assert_eq( - pdf.replace({"a": 2}, {"a": -33}), gdf.replace({"a": 2}, {"a": -33}) - ) - - assert_eq( - pdf.replace({"a": [2, 5]}, {"a": [9, 10]}), - gdf.replace({"a": [2, 5]}, {"a": [9, 10]}), - ) - - assert_eq( - pdf.replace([], []), gdf.replace([], []), - ) - assert_exceptions_equal( lfunc=pdf.replace, rfunc=gdf.replace, diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 75923a0b284..773bec56634 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -1289,14 +1289,27 @@ def test_timedelta_datetime_cast_invalid(): psr = sr.to_pandas() assert_exceptions_equal( - psr.astype, sr.astype, (["datetime64[ns]"],), (["datetime64[ns]"],) + psr.astype, + sr.astype, + (["datetime64[ns]"],), + (["datetime64[ns]"],), + expected_error_message=re.escape( + "cannot astype a timedelta from timedelta64[ns] to datetime64[ns]" + ), ) sr = cudf.Series([1, 2, 3], dtype="datetime64[ns]") psr = sr.to_pandas() assert_exceptions_equal( - psr.astype, sr.astype, (["timedelta64[ns]"],), (["timedelta64[ns]"],) + psr.astype, + sr.astype, + (["timedelta64[ns]"],), + (["timedelta64[ns]"],), + expected_error_message=re.escape( + "cannot astype a datetimelike from " + "datetime64[ns] to timedelta64[ns]" + ), ) From 3c720bd5bf70f03fcc0a0e9b53f39af8a6731202 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 31 Aug 2021 14:37:04 -0700 Subject: [PATCH 04/33] more fixes --- python/cudf/cudf/core/_compat.py | 2 ++ python/cudf/cudf/tests/test_dataframe.py | 1 + python/cudf/cudf/tests/test_groupby.py | 11 +++++++++-- python/cudf/cudf/tests/test_index.py | 7 ++++++- python/cudf/cudf/tests/test_series.py | 7 +------ 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 24b25b6eec0..2cf579ce3f1 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -8,3 +8,5 @@ PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1") PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2") PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2") +PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0") +PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0") diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 2100bca2606..0b32cd053d8 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -7872,6 +7872,7 @@ def test_describe_misc_exclude(df, exclude): @pytest.mark.parametrize("dropna", [True, False]) def test_dataframe_mode(df, numeric_only, dropna): pdf = df.to_pandas() + expected = pdf.mode(numeric_only=numeric_only, dropna=dropna) actual = df.mode(numeric_only=numeric_only, dropna=dropna) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 80f5155e330..f65d08a38f1 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -14,7 +14,7 @@ import cudf from cudf import DataFrame, Series -from cudf.core._compat import PANDAS_GE_110 +from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_130, PANDAS_LT_140 from cudf.testing._utils import ( DATETIME_TYPES, SIGNED_TYPES, @@ -370,7 +370,10 @@ def test_groupby_2keys_agg(nelem, func): # https://github.com/pandas-dev/pandas/issues/40685 is resolved. # "func", ["min", "max", "idxmin", "idxmax", "count", "sum"], ) -@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/43209") +@pytest.mark.xfail( + condition=PANDAS_GE_130 and PANDAS_LT_140, + reason="https://github.com/pandas-dev/pandas/issues/43209", +) def test_groupby_agg_decimal(num_groups, nelem_per_group, func): # The number of digits after the decimal to use. decimal_digits = 2 @@ -1304,6 +1307,10 @@ def test_groupby_nth(n, by): assert_groupby_results_equal(expect, got, check_dtype=False) +@pytest.mark.xfail( + condition=PANDAS_GE_130 and PANDAS_LT_140, + reason="https://github.com/pandas-dev/pandas/issues/43209", +) def test_raise_data_error(): pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index a8c27a92a04..d64267b149d 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1873,7 +1873,12 @@ def test_index_fillna(data, fill_value): pdi = pd.Index(data) gdi = cudf.Index(data) - assert_eq(pdi.fillna(fill_value), gdi.fillna(fill_value)) + if isinstance(gdi, cudf.Int64Index) and isinstance(pdi, pd.Float64Index): + assert_eq( + pdi.fillna(fill_value).astype(gdi.dtype), gdi.fillna(fill_value) + ) + else: + assert_eq(pdi.fillna(fill_value), gdi.fillna(fill_value)) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index bbc7ecc0c28..4c7640d0217 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1199,12 +1199,7 @@ def test_explode(data, ignore_index, p_index): expect = pdf.explode(ignore_index) got = gdf.explode(ignore_index) - if data == [1, 2, 3, 4, 5] and ignore_index and p_index is not None: - # https://github.com/pandas-dev/pandas/issues/40487 - with pytest.raises(AssertionError, match="different"): - assert_eq(expect, got, check_dtype=False) - else: - assert_eq(expect, got, check_dtype=False) + assert_eq(expect, got, check_dtype=False) @pytest.mark.parametrize( From 626685b9ea85c87932888a205c12f6277a350b41 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 31 Aug 2021 15:02:14 -0700 Subject: [PATCH 05/33] remove pandas bug workarounds --- python/cudf/cudf/tests/test_dataframe.py | 27 +++--------------------- python/cudf/cudf/tests/test_rank.py | 9 ++++---- python/cudf/cudf/tests/test_repr.py | 17 +-------------- python/cudf/cudf/tests/test_series.py | 24 +++------------------ python/cudf/cudf/tests/test_string.py | 7 +----- 5 files changed, 12 insertions(+), 72 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 0b32cd053d8..95d5727e2fd 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -423,27 +423,9 @@ def test_dataframe_drop_index(pdf, index, inplace): ("speed", 1), ("weight", 1), ("length", 1), - pytest.param( - "cow", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), - pytest.param( - "lama", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), - pytest.param( - "falcon", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), + ("cow", None), + ("lama", None,), + ("falcon", None,), ], ) @pytest.mark.parametrize("inplace", [True, False]) @@ -1944,9 +1926,6 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count): psr = pd.DataFrame(data) gsr = cudf.DataFrame(data) - if PANDAS_GE_120 and psr.shape[0] * psr.shape[1] < min_count: - pytest.xfail("https://github.com/pandas-dev/pandas/issues/39738") - assert_eq( getattr(psr, ops)(skipna=skipna, min_count=min_count), getattr(gsr, ops)(skipna=skipna, min_count=min_count), diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 7b89d6f667a..e1ca006e0ac 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. from itertools import chain, combinations_with_replacement, product @@ -63,7 +63,8 @@ def test_rank_all_arguments( expected = pdf.copy(deep=True) # TODO: Remove per column iteration once the - # following issue is fixedhttps://github.com/pandas-dev/pandas/issues/43310 + # following issue is fixed : + # https://github.com/pandas-dev/pandas/issues/43310 for col in expected.columns: expected[col] = pdf[col].rank(**kwargs) actual = gdf.rank(**kwargs) @@ -126,9 +127,7 @@ def test_rank_error_arguments(pdf): np.full((3,), np.inf), np.full((3,), -np.inf), ] -sort_dtype_args = [np.int32, np.float32, np.float64] -# TODO: np.int64, disabled because of bug -# https://github.com/pandas-dev/pandas/issues/32859 +sort_dtype_args = [np.int32, np.int64, np.float32, np.float64] @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index fa6c4d9bf24..6a8e9bbc527 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -1152,22 +1152,7 @@ def test_timedelta_index_repr(index, expected_repr): ), ], ) -@pytest.mark.parametrize( - "max_seq_items", - [ - None, - pytest.param( - 1, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/38415" - ), - ), - 2, - 5, - 10, - 100, - ], -) +@pytest.mark.parametrize("max_seq_items", [None, 1, 2, 5, 10, 100]) def test_mulitIndex_repr(pmi, max_seq_items): pd.set_option("display.max_seq_items", max_seq_items) gmi = cudf.from_pandas(pmi) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 4c7640d0217..d29ba08a848 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1080,27 +1080,9 @@ def test_series_drop_index(ps, index, inplace): ("speed", 1), ("weight", 1), ("length", 1), - pytest.param( - "cow", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), - pytest.param( - "lama", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), - pytest.param( - "falcon", - None, - marks=pytest.mark.xfail( - reason="https://github.com/pandas-dev/pandas/issues/36293" - ), - ), + ("cow", None,), + ("lama", None,), + ("falcon", None,), ], ) @pytest.mark.parametrize("inplace", [True, False]) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 9a7ef4e2099..a4ceed258db 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -775,13 +775,8 @@ def test_string_index_duplicate_str_cat(data, others, sep, na_rep, name): # in `.str.cat` # https://github.com/rapidsai/cudf/issues/5862 - # TODO: Replace ``pd.Index(expect.to_series().sort_values())`` with - # ``expect.sort_values()`` once the below issue is fixed - # https://github.com/pandas-dev/pandas/issues/35584 assert_eq( - pd.Index(expect.to_series().sort_values()) - if not isinstance(expect, str) - else expect, + expect.sort_values() if not isinstance(expect, str) else expect, got.sort_values() if not isinstance(got, str) else got, exact=False, ) From d1db17757bf8a529966f241880fd2580f3e387dd Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 31 Aug 2021 15:43:52 -0700 Subject: [PATCH 06/33] more cleanup --- python/cudf/cudf/tests/test_csv.py | 13 +------------ python/cudf/cudf/tests/test_dataframe.py | 5 +---- python/cudf/cudf/tests/test_numerical.py | 9 --------- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index f04a5e6dca0..dca18207e54 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1762,18 +1762,7 @@ def test_csv_write_empty_column_name(df, index, columns): cudf.DataFrame(index=cudf.Index([], name="index name")), ], ) -@pytest.mark.parametrize( - "index", - [ - True, - pytest.param( - False, - marks=pytest.mark.xfail( - reason="https://github.com/rapidsai/cudf/issues/6691" - ), - ), - ], -) +@pytest.mark.parametrize("index", [True, False]) def test_csv_write_empty_dataframe(df, index): pdf = df.to_pandas() diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 95d5727e2fd..c065d7c2e18 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4016,10 +4016,7 @@ def test_series_values_property(data): reason="Nulls not supported by as_gpu_matrix" ), ), - pytest.param( - {"A": [], "B": []}, - marks=pytest.mark.xfail(reason="Requires at least 1 row"), - ), + {"A": [], "B": []}, pytest.param( {"A": [1, 2, 3], "B": ["a", "b", "c"]}, marks=pytest.mark.xfail( diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index cb4757d1ab7..cf329afa8ab 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -5,7 +5,6 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_100 from cudf.testing._utils import NUMERIC_TYPES, assert_eq from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes @@ -92,10 +91,6 @@ def test_can_cast_safely_mixed_kind(): assert not data.can_cast_safely(to_dtype) -@pytest.mark.xfail( - condition=not PANDAS_GE_100, - reason="cuDF null <-> pd.NA compatibility not yet supported", -) def test_to_pandas_nullable_integer(): gsr_not_null = cudf.Series([1, 2, 3]) gsr_has_null = cudf.Series([1, 2, None]) @@ -107,10 +102,6 @@ def test_to_pandas_nullable_integer(): assert_eq(gsr_has_null.to_pandas(nullable=True), psr_has_null) -@pytest.mark.xfail( - condition=not PANDAS_GE_100, - reason="cuDF null <-> pd.NA compatibility not yet supported", -) def test_to_pandas_nullable_bool(): gsr_not_null = cudf.Series([True, False, True]) gsr_has_null = cudf.Series([True, False, None]) From 94d46aa9c8bf5adea39c6994d77f13f78dd4b095 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 31 Aug 2021 16:06:08 -0700 Subject: [PATCH 07/33] more fixes --- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/tests/test_groupby.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index fd425d9de76..2e7c79baaad 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -464,7 +464,7 @@ def mult(df): chunk_results = [function(chk) for chk in chunks] if not len(chunk_results): - return self.obj.__class__() + return self.obj.head(0) if cudf.utils.dtypes.is_scalar(chunk_results[0]): result = cudf.Series(chunk_results, index=group_names) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index f65d08a38f1..762a775cff0 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -1658,6 +1658,7 @@ def test_groupby_apply_no_keys(pdf): assert_groupby_results_equal( pdf.groupby([]).apply(lambda x: x.max()), gdf.groupby([]).apply(lambda x: x.max()), + check_index_type=False, # Int64Index v/s Float64Index ) From 33855424f6408dcf082b4ff71fe4f40cb741de30 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 1 Sep 2021 11:18:57 -0700 Subject: [PATCH 08/33] match pandas behavior --- python/cudf/cudf/core/dataframe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index aac0b027c0b..bfa604ec688 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6561,8 +6561,7 @@ def mode(self, axis=0, numeric_only=False, dropna=True): ] if len(mode_results) == 0: - df = DataFrame(index=self.index) - return df + return DataFrame() df = cudf.concat(mode_results, axis=1) if isinstance(df, Series): From f69a9976f376622bd8ef19dc741f99450a632783 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 1 Sep 2021 13:54:21 -0700 Subject: [PATCH 09/33] add conditional xfail --- python/cudf/cudf/tests/test_multiindex.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index c7d6f4f0456..dfb951ff8d0 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -13,6 +13,7 @@ import pytest import cudf +from cudf.core._compat import PANDAS_GE_130, PANDAS_LT_140 from cudf.core.column import as_column from cudf.core.index import as_index from cudf.testing._utils import assert_eq, assert_exceptions_equal, assert_neq @@ -1007,6 +1008,10 @@ def test_multicolumn_loc(pdf, pdfIndex): assert_eq(pdf.loc[:, ["a", "b"]], gdf.loc[:, ["a", "b"]]) +@pytest.mark.xfail( + condition=PANDAS_GE_130 and PANDAS_LT_140, + reason="https://github.com/pandas-dev/pandas/issues/43351", +) def test_multicolumn_set_item(pdf, pdfIndex): pdf = pdf.T pdf.columns = pdfIndex From f843245d6a57e4a14e0134c68a01c808d2965973 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 1 Sep 2021 18:33:43 -0700 Subject: [PATCH 10/33] temp commit --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 8e5b4d80115..a2bb02804b4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -# gpuci_mamba_retry install -y "your-pkg=1.0.0" +gpuci_mamba_retry install -y "pandas=1.3.2" gpuci_logger "Check compiler versions" From 7362ba244319b2c78b9b4833184c19d35a7fefb9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 1 Sep 2021 20:40:17 -0700 Subject: [PATCH 11/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index a2bb02804b4..94b85c7a3f9 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -83,7 +83,7 @@ gpuci_mamba_retry install -y \ "ucx-py=0.22.*" # https://docs.rapids.ai/maintainers/depmgmt/ -# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env gpuci_mamba_retry install -y "pandas=1.3.2" From a58f88e371ddab9bb6d8217528b43a94e7006ded Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Sep 2021 11:09:56 -0700 Subject: [PATCH 12/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 94b85c7a3f9..47ca80d89b4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "pandas=1.3.2" +gpuci_mamba_retry install -y "pandas=1.3.2" "arrow-cpp=5.0.0" "arrow-cpp-proc * cuda" "pyarrow=5.0.0=*cuda" gpuci_logger "Check compiler versions" From 2793df49a7b075ec80f145fa6d6d1fadb387a06d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Sep 2021 15:23:34 -0700 Subject: [PATCH 13/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 47ca80d89b4..511fc7d9532 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "pandas=1.3.2" "arrow-cpp=5.0.0" "arrow-cpp-proc * cuda" "pyarrow=5.0.0=*cuda" +gpuci_mamba_retry install -y "pandas=1.3.2 arrow-cpp=5.0.0 arrow-cpp-proc * cuda pyarrow=5.0.0=*cuda" gpuci_logger "Check compiler versions" From bdb31aebce58755af385cfbb42434cc1c5ecaafc Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 2 Sep 2021 18:20:11 -0500 Subject: [PATCH 14/33] Update ci/gpu/build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 511fc7d9532..99927946798 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "pandas=1.3.2 arrow-cpp=5.0.0 arrow-cpp-proc * cuda pyarrow=5.0.0=*cuda" +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gpuci_logger "Check compiler versions" From fa851319407d1656b032d4f95d72e4ad683413d0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Sep 2021 17:21:56 -0700 Subject: [PATCH 15/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 99927946798..a62040319b4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda +gpuci_mamba_retry install -y pandas=1.3.2 --force-reinstall gpuci_logger "Check compiler versions" From f8cd6c4e24bf615dae8697b9d4d0ebe890f824c9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 2 Sep 2021 17:22:30 -0700 Subject: [PATCH 16/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index a62040319b4..bfc379d32e4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -83,7 +83,7 @@ gpuci_mamba_retry install -y \ "ucx-py=0.22.*" # https://docs.rapids.ai/maintainers/depmgmt/ -gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env gpuci_mamba_retry install -y pandas=1.3.2 --force-reinstall From ac1cf22eb162a6c943555b44f8b67152685b34a4 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 08:21:46 -0500 Subject: [PATCH 17/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index bfc379d32e4..2024c4c91af 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 --force-reinstall +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock gpuci_logger "Check compiler versions" From 061fa6cc7fd18d280e844ed051695f79d55394fb Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 09:10:08 -0500 Subject: [PATCH 18/33] Update ci/gpu/build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 2024c4c91af..1f3780c27f7 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -83,7 +83,7 @@ gpuci_mamba_retry install -y \ "ucx-py=0.22.*" # https://docs.rapids.ai/maintainers/depmgmt/ -# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock From cde3ba70a347c6ffcc806d853e4569260ee70256 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 10:11:40 -0500 Subject: [PATCH 19/33] Update ci/gpu/build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 1f3780c27f7..96c8d78bca1 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 gpuci_logger "Check compiler versions" From 82465f1864e9d939cd1aff3edbf5eefcfb705b21 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 11:22:52 -0500 Subject: [PATCH 20/33] Update ci/gpu/build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 96c8d78bca1..f2a6beac5a7 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist gpuci_logger "Check compiler versions" From 21c110c2e19a6fcd9b893ce889aab5763f8cb45d Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 13:02:14 -0500 Subject: [PATCH 21/33] Update ci/gpu/build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index f2a6beac5a7..4bbbe72fa9f 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout gpuci_logger "Check compiler versions" From 16455a3379005a351b4d84859a4a7d7fc939ad02 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 14:21:27 -0500 Subject: [PATCH 22/33] Update build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4bbbe72fa9f..02f6eb246ab 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 gpuci_logger "Check compiler versions" From 35b1c5d33563857fa0bd1466a5f53b9c42c7faae Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 15:41:37 -0500 Subject: [PATCH 23/33] tmp --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 02f6eb246ab..d21b1056458 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 gpuci_logger "Check compiler versions" From e0cfcbf5b4bbc5d8dd690f27ae73e202aa0ab693 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 3 Sep 2021 17:20:54 -0500 Subject: [PATCH 24/33] Update build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index d21b1056458..d05c2733d5f 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 pyorc gpuci_logger "Check compiler versions" From 078d9d950a31fab8321b72e4ab9f6d3a3c8a4c21 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Sat, 4 Sep 2021 09:39:33 -0500 Subject: [PATCH 25/33] Update build.sh --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index d05c2733d5f..17ee11f3ece 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -84,7 +84,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 pyorc +gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 pyorc python-confluent-kafka=1.6.0 gpuci_logger "Check compiler versions" From b1a7cf0e8fe923aac600f71acd61e2bc1170201f Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sat, 4 Sep 2021 10:39:59 -0700 Subject: [PATCH 26/33] remove unnecessary code --- python/cudf/cudf/tests/test_groupby.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 48e0eabe587..b58078818dd 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -1955,9 +1955,6 @@ def test_groupby_fillna_multi_value(nelem): got = gdf.groupby(key_col).fillna(value=fill_values) - # In this specific case, Pandas returns the rows in grouped order. - # Cudf returns columns in orginal order. - expect.index = expect.index.get_level_values(0) assert_groupby_results_equal(expect[value_cols], got[value_cols]) From 0aef02123215b6aa81bb43fb112fcb56b188b15f Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sat, 4 Sep 2021 10:52:45 -0700 Subject: [PATCH 27/33] misc doc fixes --- python/cudf/cudf/core/groupby/groupby.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 7704c92fe04..7a4b221bf6f 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. + import collections import pickle import warnings @@ -630,7 +631,7 @@ def rolling_avg(val, avg): .. code-block:: python Results: - cat val avg + cat val avg 0 1 16 1 1 45 2 1 62 41.0 @@ -713,8 +714,8 @@ def describe(self, include=None, exclude=None): 2 24.0 90 3 26.0 80 >>> gdf.groupby('Score').describe() - Speed - count mean std min 25% 50% 75% max + Speed + count mean std min 25% 50% 75% max Score 30 1 370.0 370.0 370.0 370.0 370.0 370.0 50 1 380.0 380.0 380.0 380.0 380.0 380.0 @@ -946,13 +947,13 @@ def fillna( >>> df = pd.DataFrame({'k': [1, 1, 2], 'v': [2, None, 4]}) >>> gdf = cudf.from_pandas(df) >>> df.groupby('k').fillna({'v': 4}) # pandas - v + v k 1 0 2.0 - 1 4.0 + 1 4.0 2 2 4.0 >>> gdf.groupby('k').fillna({'v': 4}) # cudf - v + v 0 2.0 1 4.0 2 4.0 @@ -1127,9 +1128,9 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin): Max Speed Animal Type Falcon Captive 390.0 - Wild 350.0 + Wild 350.0 Parrot Captive 30.0 - Wild 20.0 + Wild 20.0 >>> df.groupby(level=0).mean() Max Speed Animal From 11bd27a3b3495bd22b12bb036b2b845edea75f15 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sat, 4 Sep 2021 10:56:16 -0700 Subject: [PATCH 28/33] copyright --- python/cudf/cudf/core/multiindex.py | 3 ++- python/cudf/cudf/core/window/rolling.py | 2 +- python/cudf/cudf/tests/test_multiindex.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 37f27c77196..8d2b05ef4ec 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. + from __future__ import annotations import itertools diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 8724cd47d00..aa377f81735 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION +# Copyright (c) 2020-2021, NVIDIA CORPORATION import itertools diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index dfb951ff8d0..465cf36e1f3 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. """ Test related to MultiIndex From bd1c35e3cd9c9d09ee54e4dc9bdf3c79746e1b71 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 7 Sep 2021 08:54:40 -0500 Subject: [PATCH 29/33] Update build.sh --- ci/gpu/build.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 17ee11f3ece..0434d8ac937 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -83,9 +83,8 @@ gpuci_mamba_retry install -y \ "ucx-py=0.22.*" # https://docs.rapids.ai/maintainers/depmgmt/ -gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y pandas=1.3.2 arrow-cpp=5.0.0 pyarrow=5.0.0=*cuda gmock cython=0.29.24 pytest pytest-benchmark pytest-xdist pytest-cov pytest-timeout cupy==9.4.0 cachetools transformers nvtx=0.2.3 protobuf packaging mimesis=4.0.0 hypothesis rapidjson double-conversion dlpack=0.5 streamz typing_extensions mypy=0.782 pandoc=1.19.2 fsspec=2021.8.1 fastavro=1.4.4 numba=0.53.1 pyorc python-confluent-kafka=1.6.0 - +# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_mamba_retry install -y "your-pkg=1.0.0" gpuci_logger "Check compiler versions" python --version From c0435e1a3fc64924f1a62090a46da64ab2e604d8 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 7 Sep 2021 08:55:05 -0500 Subject: [PATCH 30/33] Update build.sh --- ci/gpu/build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0434d8ac937..8e5b4d80115 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -86,6 +86,7 @@ gpuci_mamba_retry install -y \ # gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env # gpuci_mamba_retry install -y "your-pkg=1.0.0" + gpuci_logger "Check compiler versions" python --version $CC --version From cfac5b91522b8de1937c9e733719a599f8c75e83 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 Sep 2021 11:30:47 -0500 Subject: [PATCH 31/33] address reviews --- python/cudf/cudf/tests/test_dataframe.py | 3 +-- python/cudf/cudf/tests/test_index.py | 9 +++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index c065d7c2e18..62026287d83 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1241,8 +1241,7 @@ def test_dataframe_concat_different_numerical_columns(dtype1, dtype2): else: pres = pd.concat([df1, df2]) gres = cudf.concat([cudf.from_pandas(df1), cudf.from_pandas(df2)]) - # Pandas 1.3.2+ returns mixed `object` dtype result. - assert_eq(cudf.from_pandas(pres.astype(gres.dtypes)), gres) + assert_eq(pres, gres, check_dtype=False) def test_dataframe_concat_different_column_types(): diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index d64267b149d..a3de92ba9e5 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -1873,12 +1873,9 @@ def test_index_fillna(data, fill_value): pdi = pd.Index(data) gdi = cudf.Index(data) - if isinstance(gdi, cudf.Int64Index) and isinstance(pdi, pd.Float64Index): - assert_eq( - pdi.fillna(fill_value).astype(gdi.dtype), gdi.fillna(fill_value) - ) - else: - assert_eq(pdi.fillna(fill_value), gdi.fillna(fill_value)) + assert_eq( + pdi.fillna(fill_value), gdi.fillna(fill_value), exact=False + ) # Int64Index v/s Float64Index @pytest.mark.parametrize( From 1f7c111e83317e051c83adb08a3050302453ea8e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 Sep 2021 11:36:29 -0500 Subject: [PATCH 32/33] add comment --- python/cudf/cudf/tests/test_dataframe.py | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 62026287d83..3b74fe91e05 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -1743,6 +1743,43 @@ def test_dataframe_shape_empty(): @pytest.mark.parametrize("dtype", dtypes) @pytest.mark.parametrize("nulls", ["none", "some", "all"]) def test_dataframe_transpose(nulls, num_cols, num_rows, dtype): + # In case of `bool` dtype: pandas <= 1.2.5 type-casts + # a boolean series to `float64` series if a `np.nan` is assigned to it: + # >>> s = pd.Series([True, False, True]) + # >>> s + # 0 True + # 1 False + # 2 True + # dtype: bool + # >>> s[[2]] = np.nan + # >>> s + # 0 1.0 + # 1 0.0 + # 2 NaN + # dtype: float64 + # In pandas >= 1.3.2 this behavior is fixed: + # >>> s = pd.Series([True, False, True]) + # >>> s + # 0 + # True + # 1 + # False + # 2 + # True + # dtype: bool + # >>> s[[2]] = np.nan + # >>> s + # 0 + # True + # 1 + # False + # 2 + # NaN + # dtype: object + # In cudf we change `object` dtype to `str` type - for which there + # is no transpose implemented yet. Hence we need to test transpose + # against pandas nullable types as they are the ones that closely + # resemble `cudf` dtypes behavior. pdf = pd.DataFrame() null_rep = np.nan if dtype in ["float32", "float64"] else None From 1ded3a3ca99bd59d38b749f043a16e8879b08c9a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 7 Sep 2021 12:06:37 -0500 Subject: [PATCH 33/33] make `kind` an optional param. --- python/cudf/cudf/core/_base_index.py | 2 +- python/cudf/cudf/core/column/column.py | 9 +++++++-- python/cudf/cudf/core/index.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 8637f96f34a..163af62677e 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -821,7 +821,7 @@ def to_series(self, index=None, name=None): name=self.name if name is None else name, ) - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind=None): """ Calculate slice bound that corresponds to given label. Returns leftmost (one-past-the-rightmost if ``side=='right'``) position diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 2e9a0c6c79d..8f18d83eb31 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -852,8 +852,13 @@ def get_slice_bound( side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} """ - assert kind in ["ix", "loc", "getitem", None] - if side not in ("left", "right"): + if kind not in {"ix", "loc", "getitem", None}: + raise ValueError( + f"Invalid value for ``kind`` parameter," + f" must be either one of the following: " + f"{'ix', 'loc', 'getitem', None}, but found: {kind}" + ) + if side not in {"left", "right"}: raise ValueError( "Invalid value for side kwarg," " must be either 'left' or 'right': %s" % (side,) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 6b4b77fabc5..b009d12262f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -990,7 +990,7 @@ def find_label_range(self, first, last): end += 1 return begin, end - def get_slice_bound(self, label, side, kind): + def get_slice_bound(self, label, side, kind=None): return self._values.get_slice_bound(label, side, kind)