From b3e0f0890977807691dc1a14aa07a0e17c9bc921 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 22 Nov 2024 11:21:29 -0600 Subject: [PATCH 1/6] Update data.py --- python-package/xgboost/data.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 82ea3400492f..cd0e686d168f 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -845,6 +845,12 @@ def _arrow_transform(data: DataType) -> Any: def _is_cudf_df(data: DataType) -> bool: return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame") +def _is_cudf_pandas_df(data: DataType) -> bool: + return str(type(data)) == "" and str(type(type(data))) == "" + +def _is_cudf_pandas_ser(data: DataType) -> bool: + return str(type(data)) == "" and str(type(type(data))) == "" + def _get_cudf_cat_predicate() -> Callable[[Any], bool]: try: @@ -1480,6 +1486,8 @@ def _proxy_transform( feature_types: Optional[FeatureTypes], enable_categorical: bool, ) -> TransformedData: + if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): + data = data._fsproxy_fast if _is_cudf_df(data) or _is_cudf_ser(data): return _transform_cudf_df( data, feature_names, feature_types, enable_categorical From 24e304b68b3d3d0651ee35ab50f41fe5b9baa57c Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 22 Nov 2024 11:23:47 -0600 Subject: [PATCH 2/6] Update core.py --- python-package/xgboost/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 8ed826ed0812..1b73dac8da53 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2669,6 +2669,8 @@ def inplace_predict( _arrow_transform, _is_arrow, _is_cudf_df, + _is_cudf_pandas_df, + _is_cudf_pandas_ser, _is_cupy_alike, _is_list, _is_np_array_like, @@ -2677,6 +2679,8 @@ def inplace_predict( _is_tuple, _transform_pandas_df, ) + if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): + data = data._fsproxy_fast enable_categorical = True if _is_arrow(data): From cfc62cb31df74d6590b0880b83f4d731d54f436a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 22 Nov 2024 18:54:16 +0000 Subject: [PATCH 3/6] black --- python-package/xgboost/data.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index cd0e686d168f..53383171e54e 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -845,11 +845,21 @@ def _arrow_transform(data: DataType) -> Any: def _is_cudf_df(data: DataType) -> bool: return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame") + def _is_cudf_pandas_df(data: DataType) -> bool: - return str(type(data)) == "" and str(type(type(data))) == "" + return ( + str(type(data)) == "" + and str(type(type(data))) + == "" + ) + def _is_cudf_pandas_ser(data: DataType) -> bool: - return str(type(data)) == "" and str(type(type(data))) == "" + return ( + str(type(data)) == "" + and str(type(type(data))) + == "" + ) def _get_cudf_cat_predicate() -> Callable[[Any], bool]: From 45bbbcad2cfa7660104f739a9df7739c575f6212 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 22 Nov 2024 20:02:38 +0000 Subject: [PATCH 4/6] black --- python-package/xgboost/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 1b73dac8da53..14d36d7809d6 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2679,6 +2679,7 @@ def inplace_predict( _is_tuple, _transform_pandas_df, ) + if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): data = data._fsproxy_fast From 0c59b53a2c3bf08e15d902d57696c457fe22cb5e Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Fri, 22 Nov 2024 12:10:30 -0800 Subject: [PATCH 5/6] Suppress pylint warning --- python-package/xgboost/core.py | 2 +- python-package/xgboost/data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 14d36d7809d6..e01faee9ca54 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2681,7 +2681,7 @@ def inplace_predict( ) if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): - data = data._fsproxy_fast + data = data._fsproxy_fast # pylint: disable=protected-access enable_categorical = True if _is_arrow(data): diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 53383171e54e..721f8976b3e2 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -1497,7 +1497,7 @@ def _proxy_transform( enable_categorical: bool, ) -> TransformedData: if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): - data = data._fsproxy_fast + data = data._fsproxy_fast # pylint: disable=protected-access if _is_cudf_df(data) or _is_cudf_ser(data): return _transform_cudf_df( data, feature_names, feature_types, enable_categorical From e644adae6100b9667492d6e8581dd0eb85651e99 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 24 Nov 2024 03:23:02 +0800 Subject: [PATCH 6/6] Fix for DMatrix --- python-package/xgboost/core.py | 5 ++--- python-package/xgboost/data.py | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index e01faee9ca54..05c0cc30fa82 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2669,8 +2669,7 @@ def inplace_predict( _arrow_transform, _is_arrow, _is_cudf_df, - _is_cudf_pandas_df, - _is_cudf_pandas_ser, + _is_cudf_pandas, _is_cupy_alike, _is_list, _is_np_array_like, @@ -2680,7 +2679,7 @@ def inplace_predict( _transform_pandas_df, ) - if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): + if _is_cudf_pandas(data): data = data._fsproxy_fast # pylint: disable=protected-access enable_categorical = True diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 721f8976b3e2..29647f88a893 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -846,19 +846,13 @@ def _is_cudf_df(data: DataType) -> bool: return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame") -def _is_cudf_pandas_df(data: DataType) -> bool: +def _is_cudf_pandas(data: DataType) -> bool: + """Must go before both pandas and cudf checks.""" return ( - str(type(data)) == "" - and str(type(type(data))) - == "" - ) - - -def _is_cudf_pandas_ser(data: DataType) -> bool: - return ( - str(type(data)) == "" - and str(type(type(data))) - == "" + lazy_isinstance(data, "pandas.core.frame", "DataFrame") + or lazy_isinstance(data, "pandas.core.series", "Series") + ) and lazy_isinstance( + type(data), "cudf.pandas.fast_slow_proxy", "_FastSlowProxyMeta" ) @@ -1253,6 +1247,8 @@ def dispatch_data_backend( ) if _is_arrow(data): data = _arrow_transform(data) + if _is_cudf_pandas(data): + data = data._fsproxy_fast # pylint: disable=protected-access if _is_pandas_series(data): import pandas as pd @@ -1425,6 +1421,8 @@ def dispatch_meta_backend( return if _is_arrow(data): data = _arrow_transform(data) + if _is_cudf_pandas(data): + data = data._fsproxy_fast # pylint: disable=protected-access if _is_pandas_df(data): _meta_from_pandas_df(data, name, dtype=dtype, handle=handle) return @@ -1496,7 +1494,7 @@ def _proxy_transform( feature_types: Optional[FeatureTypes], enable_categorical: bool, ) -> TransformedData: - if _is_cudf_pandas_df(data) or _is_cudf_pandas_ser(data): + if _is_cudf_pandas(data): data = data._fsproxy_fast # pylint: disable=protected-access if _is_cudf_df(data) or _is_cudf_ser(data): return _transform_cudf_df(