xorbitsai · aresnow1 · Sep 21, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py b/python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
@@ -1729,12 +1729,10 @@ def test_value_counts_execution(setup):
     r = series.value_counts()
     pd.testing.assert_series_equal(r.execute().fetch(), s.value_counts())
 
-    # pandas issue: https://github.com/pandas-dev/pandas/issues/54857
-    if pd.__version__ != "2.1.0":
-        r = series.value_counts(bins=5, normalize=True)
-        pd.testing.assert_series_equal(
-            r.execute().fetch(), s.value_counts(bins=5, normalize=True)
-        )
+    r = series.value_counts(bins=5, normalize=True)
+    pd.testing.assert_series_equal(
+        r.execute().fetch(), s.value_counts(bins=5, normalize=True)
+    )
 
     # test multi chunks
     series = from_pandas_series(s, chunk_size=30)
@@ -1746,11 +1744,10 @@ def test_value_counts_execution(setup):
     pd.testing.assert_series_equal(r.execute().fetch(), s.value_counts(normalize=True))
 
     # test bins and normalize
-    if pd.__version__ != "2.1.0":
-        r = series.value_counts(method="tree", bins=5, normalize=True)
-        pd.testing.assert_series_equal(
-            r.execute().fetch(), s.value_counts(bins=5, normalize=True)
-        )
+    r = series.value_counts(method="tree", bins=5, normalize=True)
+    pd.testing.assert_series_equal(
+        r.execute().fetch(), s.value_counts(bins=5, normalize=True)
+    )
 
 
 def test_astype(setup):

diff --git a/python/xorbits/_mars/dataframe/base/value_counts.py b/python/xorbits/_mars/dataframe/base/value_counts.py
@@ -193,6 +193,9 @@ def execute(cls, ctx, op: "DataFrameValueCounts"):
             # convert CategoricalDtype which generated in `cut`
             # to IntervalDtype
             result.index = result.index.astype("interval")
+            # index name changed since pandas 2.1.1
+            if pd_release_version >= (2, 1, 1):
+                result.index.name = None
         if op.nrows:
             result = result.head(op.nrows)
         result.name = op.outputs[0].name

diff --git a/python/xorbits/_mars/dataframe/groupby/fill.py b/python/xorbits/_mars/dataframe/groupby/fill.py
@@ -18,7 +18,7 @@
 
 from ... import opcodes
 from ...core import OutputType
-from ...serialization.serializables import AnyField, DictField, Int64Field, StringField
+from ...serialization.serializables import AnyField, Int64Field, StringField
 from ..operands import DataFrameOperand, DataFrameOperandMixin
 from ..utils import build_empty_df, build_empty_series, parse_index
 
@@ -29,7 +29,6 @@ class GroupByFillOperand(DataFrameOperand, DataFrameOperandMixin):
     value = AnyField("value", default=None)
     method = StringField("method", default=None)
     limit = Int64Field("limit", default=None)
-    downcast = DictField("downcast", default=None)
 
     def _calc_out_dtypes(self, in_groupby):
         mock_groupby = in_groupby.op.build_mock_groupby()
@@ -40,7 +39,6 @@ def _calc_out_dtypes(self, in_groupby):
                 value=self.value,
                 method=self.method,
                 limit=self.limit,
-                downcast=self.downcast,
             )
         else:
             result_df = getattr(mock_groupby, func_name)(limit=self.limit)
@@ -133,7 +131,6 @@ def execute(cls, ctx, op: "GroupByFillOperand"):
                 value=op.value,
                 method=op.method,
                 limit=op.limit,
-                downcast=op.downcast,
             )
         else:
             result = getattr(in_data, func_name)(limit=op.limit)
@@ -184,7 +181,7 @@ def bfill(groupby, limit=None):
     return op(groupby)
 
 
-def fillna(groupby, value=None, method=None, limit=None, downcast=None):
+def fillna(groupby, value=None, method=None, limit=None):
     """
     Fill NA/NaN values using the specified method
 
@@ -197,11 +194,8 @@ def fillna(groupby, value=None, method=None, limit=None, downcast=None):
     limit:  int, default None
             If method is specified, this is the maximum number of consecutive
             NaN values to forward/backward fill
-    downcast:   dict, default None
-                A dict of item->dtype of what to downcast if possible,
-                or the string ‘infer’ which will try to downcast to an appropriate equal type
 
     return: DataFrame or None
     """
-    op = GroupByFillNa(value=value, method=method, limit=limit, downcast=downcast)
+    op = GroupByFillNa(value=value, method=method, limit=limit)
     return op(groupby)
diff --git a/python/xorbits/_mars/dataframe/hash_utils.py b/python/xorbits/_mars/dataframe/hash_utils.py
@@ -9,10 +9,11 @@
 from typing import TYPE_CHECKING, Hashable, Iterable, Iterator, cast
 
 import numpy as np
+import pandas as pd
 from pandas._libs import lib
 from pandas._libs.hashing import hash_object_array
 from pandas._typing import ArrayLike, npt
-from pandas.core.dtypes.common import is_categorical_dtype, is_list_like
+from pandas.core.dtypes.common import is_list_like
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCExtensionArray,
@@ -272,7 +273,7 @@ def hash_array(
     # For categoricals, we hash the categories, then remap the codes to the
     # hash values. (This check is above the complex check so that we don't ask
     # numpy if categorical is a subdtype of complex, as it will choke).
-    if is_categorical_dtype(dtype):
+    if isinstance(dtype, pd.CategoricalDtype):
         vals = cast("Categorical", vals)
         return _hash_categorical(vals, encoding, hash_key)
 

diff --git a/python/xorbits/_mars/dataframe/missing/fillna.py b/python/xorbits/_mars/dataframe/missing/fillna.py
@@ -41,7 +41,6 @@ class FillNA(DataFrameOperand, DataFrameOperandMixin):
     _method = StringField("method")
     _axis = AnyField("axis")
     _limit = Int64Field("limit")
-    _downcast = AnyField("downcast")
 
     _output_limit = Int64Field("output_limit")
 
@@ -51,7 +50,6 @@ def __init__(
         method=None,
         axis=None,
         limit=None,
-        downcast=None,
         output_types=None,
         output_limit=None,
         **kw
@@ -61,7 +59,6 @@ def __init__(
             _method=method,
             _axis=axis,
             _limit=limit,
-            _downcast=downcast,
             _output_types=output_types,
             _output_limit=output_limit,
             **kw
@@ -83,10 +80,6 @@ def axis(self):
     def limit(self):
         return self._limit
 
-    @property
-    def downcast(self):
-        return self._downcast
-
     def _set_inputs(self, inputs):
         super()._set_inputs(inputs)
         if self._method is None and len(inputs) > 1:
@@ -123,7 +116,9 @@ def _execute_map(cls, ctx, op):
         method = op.method
 
         filled = input_data.fillna(
-            method=method, axis=axis, limit=limit, downcast=op.downcast
+            method=method,
+            axis=axis,
+            limit=limit,
         )
         ctx[op.outputs[0].key] = cls._get_first_slice(op, filled, 1)
         del filled
@@ -143,7 +138,9 @@ def _execute_combine(cls, ctx, op):
 
         if not summaries:
             ctx[op.outputs[0].key] = input_data.fillna(
-                method=method, axis=axis, limit=limit, downcast=op.downcast
+                method=method,
+                axis=axis,
+                limit=limit,
             )
             return
 
@@ -158,15 +155,16 @@ def _execute_combine(cls, ctx, op):
 
         if is_pandas_2():
             concat_df = concat_df.fillna(
-                method=method, axis=axis, limit=limit, downcast=op.downcast
+                method=method,
+                axis=axis,
+                limit=limit,
             )
         else:
             concat_df.fillna(
                 method=method,
                 axis=axis,
                 inplace=True,
                 limit=limit,
-                downcast=op.downcast,
             )
         ctx[op.outputs[0].key] = cls._get_first_slice(op, concat_df, -1)
 
@@ -187,12 +185,9 @@ def execute(cls, ctx, op):
                     method=op.method,
                     axis=op.axis,
                     limit=op.limit,
-                    downcast=op.downcast,
                 )
             else:
-                ctx[op.outputs[0].key] = input_data.fillna(
-                    value=value, downcast=op.downcast
-                )
+                ctx[op.outputs[0].key] = input_data.fillna(value=value)
 
     @classmethod
     def _tile_one_by_one(cls, op):
@@ -478,9 +473,7 @@ def __call__(self, a, value_df=None):
             )
 
 
-def fillna(
-    df, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None
-):
+def fillna(df, value=None, method=None, axis=None, inplace=False, limit=None):
     """
     Fill NA/NaN values using the specified method.
 
@@ -509,10 +502,6 @@ def fillna(
         be partially filled. If method is not specified, this is the
         maximum number of entries along the entire axis where NaNs will be
         filled. Must be greater than 0 if not None.
-    downcast : dict, default is None
-        A dict of item->dtype of what to downcast if possible,
-        or the string 'infer' which will try to downcast to an appropriate
-        equal type (e.g. float64 to int64 if possible).
 
     Returns
     -------
@@ -583,10 +572,6 @@ def fillna(
             % type(value).__name__
         )
 
-    if downcast is not None:
-        raise NotImplementedError(
-            'Currently argument "downcast" is not implemented yet'
-        )
     if limit is not None:
         raise NotImplementedError('Currently argument "limit" is not implemented yet')
 
@@ -600,7 +585,6 @@ def fillna(
         method=method,
         axis=axis,
         limit=limit,
-        downcast=downcast,
         output_types=get_output_types(df),
     )
     out_df = op(df, value_df=value_df)
@@ -610,7 +594,7 @@ def fillna(
         return out_df
 
 
-def ffill(df, axis=None, inplace=False, limit=None, downcast=None):
+def ffill(df, axis=None, inplace=False, limit=None):
     """
     Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
 
@@ -619,12 +603,10 @@ def ffill(df, axis=None, inplace=False, limit=None, downcast=None):
     {klass} or None
         Object with missing values filled or None if ``inplace=True``.
     """
-    return fillna(
-        df, method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
-    )
+    return fillna(df, method="ffill", axis=axis, inplace=inplace, limit=limit)
 
 
-def bfill(df, axis=None, inplace=False, limit=None, downcast=None):
+def bfill(df, axis=None, inplace=False, limit=None):
     """
     Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
 
@@ -633,12 +615,10 @@ def bfill(df, axis=None, inplace=False, limit=None, downcast=None):
     {klass} or None
         Object with missing values filled or None if ``inplace=True``.
     """
-    return fillna(
-        df, method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
-    )
+    return fillna(df, method="bfill", axis=axis, inplace=inplace, limit=limit)
 
 
-def index_fillna(index, value=None, downcast=None):
+def index_fillna(index, value=None):
     """
     Fill NA/NaN values with the specified value.
 
@@ -647,10 +627,6 @@ def index_fillna(index, value=None, downcast=None):
     value : scalar
         Scalar value to use to fill holes (e.g. 0).
         This value cannot be a list-likes.
-    downcast : dict, default is None
-        A dict of item->dtype of what to downcast if possible,
-        or the string 'infer' which will try to downcast to an appropriate
-        equal type (e.g. float64 to int64 if possible).
 
     Returns
     -------
@@ -666,7 +642,6 @@ def index_fillna(index, value=None, downcast=None):
 
     op = FillNA(
         value=value,
-        downcast=downcast,
         output_types=get_output_types(index),
     )
     return op(index)
diff --git a/python/xorbits/_mars/dataframe/missing/tests/test_missing.py b/python/xorbits/_mars/dataframe/missing/tests/test_missing.py
@@ -56,8 +56,6 @@ def test_fill_na():
         series.fillna(value=df)
     with pytest.raises(ValueError):
         series.fillna(value=df_raw)
-    with pytest.raises(NotImplementedError):
-        series.fillna(value=series_raw, downcast="infer")
     with pytest.raises(NotImplementedError):
         series.ffill(limit=1)