From 9b57a907fb1df7b4439b7091ea8dce256b086e83 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 17 Jul 2023 18:50:08 +0800 Subject: [PATCH] restore `use_gpu`. --- python-package/xgboost/sklearn.py | 10 ++++-- python-package/xgboost/spark/core.py | 32 +++++++++---------- python-package/xgboost/spark/estimator.py | 29 ++++++++++++++++- .../test_gpu_with_spark/test_gpu_spark.py | 13 ++++++++ 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index d69cb3a014d7..46a3ffa4aec1 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1004,13 +1004,17 @@ def fit( Validation metrics will help us track the performance of the model. eval_metric : str, list of str, or callable, optional + .. deprecated:: 1.6.0 - Use `eval_metric` in :py:meth:`__init__` or :py:meth:`set_params` instead. + + Use `eval_metric` in :py:meth:`__init__` or :py:meth:`set_params` instead. early_stopping_rounds : int + .. deprecated:: 1.6.0 - Use `early_stopping_rounds` in :py:meth:`__init__` or - :py:meth:`set_params` instead. + + Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params` + instead. verbose : If `verbose` is True and an evaluation set is used, the evaluation metric measured on the validation set is printed to stdout at each boosting stage. diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 4be4ccdfcddc..871b5ef88db6 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -340,8 +340,18 @@ def _validate_params(self) -> None: f"It cannot be less than 1 [Default is 1]" ) + tree_method = self.getOrDefault(self.getParam("tree_method")) + if ( + self.getOrDefault(self.use_gpu) or use_cuda(self.getOrDefault(self.device)) + ) and not _can_use_qdm(tree_method): + raise ValueError( + f"The `{tree_method}` tree method is not supported on GPU." + ) + if self.getOrDefault(self.features_cols): - if not use_cuda(self.getOrDefault(self.device)): + if not use_cuda(self.getOrDefault(self.device)) and not self.getOrDefault( + self.use_gpu + ): raise ValueError( "features_col param with list value requires `device=cuda`." ) @@ -396,7 +406,7 @@ def _validate_params(self) -> None: "`pyspark.ml.linalg.Vector` type." ) - if use_cuda(self.getOrDefault(self.device)): + if use_cuda(self.getOrDefault(self.device)) or self.getOrDefault(self.use_gpu): gpu_per_task = ( _get_spark_session() .sparkContext.getConf() @@ -553,6 +563,7 @@ def __init__(self) -> None: self._setDefault( num_workers=1, device="cpu", + use_gpu=False, force_repartition=False, repartition_random_shuffle=False, feature_names=None, @@ -874,20 +885,9 @@ def _fit(self, dataset: DataFrame) -> "_SparkXGBModel": dmatrix_kwargs, ) = self._get_xgb_parameters(dataset) - run_on_gpu = use_cuda(self.getOrDefault(self.device)) - tree_method = self.getParam("tree_method") - # Validation before submitting function to worker. - if ( - run_on_gpu - and self.getOrDefault(tree_method) - and self.getOrDefault(tree_method) != "hist" - ): - raise ValueError( - f"The `{self.getOrDefault(tree_method)}` tree method is" - " not supported" - " on GPU." - ) - + run_on_gpu = use_cuda(self.getOrDefault(self.device)) or self.getOrDefault( + self.use_gpu + ) is_local = _is_local(_get_spark_session().sparkContext) num_workers = self.getOrDefault(self.num_workers) diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py index 68263140fa7c..cd7179edacf7 100644 --- a/python-package/xgboost/spark/estimator.py +++ b/python-package/xgboost/spark/estimator.py @@ -3,7 +3,7 @@ # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name # pylint: disable=unused-argument, too-many-locals - +import warnings from typing import Any, List, Optional, Type, Union import numpy as np @@ -134,6 +134,10 @@ class SparkXGBRegressor(_SparkXGBEstimator): num_workers: How many XGBoost workers to be used to train. Each XGBoost worker corresponds to one spark task. + use_gpu: + .. deprecated:: 2.0.0 + + Use `device` instead. device: Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`. force_repartition: @@ -194,6 +198,7 @@ def __init__( weight_col: Optional[str] = None, base_margin_col: Optional[str] = None, num_workers: int = 1, + use_gpu: Optional[bool] = None, device: Optional[str] = None, force_repartition: bool = False, repartition_random_shuffle: bool = False, @@ -202,6 +207,10 @@ def __init__( ) -> None: super().__init__() input_kwargs = self._input_kwargs + if use_gpu: + warnings.warn( + "`use_gpu` is deprecated, use `device` instead", FutureWarning + ) self.setParams(**input_kwargs) @classmethod @@ -302,6 +311,10 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction num_workers: How many XGBoost workers to be used to train. Each XGBoost worker corresponds to one spark task. + use_gpu: + .. deprecated:: 2.0.0 + + Use `device` instead. device: Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`. force_repartition: @@ -362,6 +375,7 @@ def __init__( weight_col: Optional[str] = None, base_margin_col: Optional[str] = None, num_workers: int = 1, + use_gpu: Optional[bool] = None, device: Optional[str] = None, force_repartition: bool = False, repartition_random_shuffle: bool = False, @@ -374,6 +388,10 @@ def __init__( # binary or multinomial input dataset, and we need to remove the fixed default # param value as well to avoid causing ambiguity. input_kwargs = self._input_kwargs + if use_gpu: + warnings.warn( + "`use_gpu` is deprecated, use `device` instead", FutureWarning + ) self.setParams(**input_kwargs) self._setDefault(objective=None) @@ -473,6 +491,10 @@ class SparkXGBRanker(_SparkXGBEstimator): num_workers: How many XGBoost workers to be used to train. Each XGBoost worker corresponds to one spark task. + use_gpu: + .. deprecated:: 2.0.0 + + Use `device` instead. device: Device for XGBoost workers, available options are `cpu`, `cuda`, and `gpu`. force_repartition: @@ -539,6 +561,7 @@ def __init__( base_margin_col: Optional[str] = None, qid_col: Optional[str] = None, num_workers: int = 1, + use_gpu: Optional[bool] = None, device: Optional[str] = None, force_repartition: bool = False, repartition_random_shuffle: bool = False, @@ -547,6 +570,10 @@ def __init__( ) -> None: super().__init__() input_kwargs = self._input_kwargs + if use_gpu: + warnings.warn( + "`use_gpu` is deprecated, use `device` instead", FutureWarning + ) self.setParams(**input_kwargs) @classmethod diff --git a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py index f6ca4663acda..a962f778e888 100644 --- a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py +++ b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py @@ -197,6 +197,19 @@ def test_cv_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature f1 = evaluator.evaluate(pred_result_df) assert f1 >= 0.97 + clf = SparkXGBClassifier( + features_col=feature_names, use_gpu=True, num_workers=num_workers + ) + grid = ParamGridBuilder().addGrid(clf.max_depth, [6, 8]).build() + evaluator = MulticlassClassificationEvaluator(metricName="f1") + cv = CrossValidator( + estimator=clf, evaluator=evaluator, estimatorParamMaps=grid, numFolds=3 + ) + cvModel = cv.fit(train_df) + pred_result_df = cvModel.transform(test_df) + f1 = evaluator.evaluate(pred_result_df) + assert f1 >= 0.97 + def test_sparkxgb_regressor_with_gpu(spark_diabetes_dataset): from pyspark.ml.evaluation import RegressionEvaluator