From c03ddce374dcb023df090b2c2e7dde28ddf24c58 Mon Sep 17 00:00:00 2001 From: jiamingy Date: Wed, 18 Jan 2023 06:36:52 +0800 Subject: [PATCH 1/3] [doc] Add missing document for pyspark ranker. [skip ci] --- doc/python/python_api.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index 03b431c77d41..b27542a8becb 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -173,3 +173,13 @@ PySpark API :members: :inherited-members: :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRanker + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRankerModel + :members: + :inherited-members: + :show-inheritance: From 3c2cdb62936c170274c63660ababa19ae56b8dec Mon Sep 17 00:00:00 2001 From: jiamingy Date: Wed, 18 Jan 2023 06:54:14 +0800 Subject: [PATCH 2/3] fix import. --- python-package/xgboost/spark/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py index 7c18eeba46b5..224f87d0399a 100644 --- a/python-package/xgboost/spark/__init__.py +++ b/python-package/xgboost/spark/__init__.py @@ -1,5 +1,4 @@ -"""PySpark XGBoost integration interface -""" +"""PySpark XGBoost integration interface""" try: import pyspark @@ -10,6 +9,7 @@ SparkXGBClassifier, SparkXGBClassifierModel, SparkXGBRanker, + SparkXGBRankerModel, SparkXGBRegressor, SparkXGBRegressorModel, ) @@ -20,4 +20,5 @@ "SparkXGBRegressor", "SparkXGBRegressorModel", "SparkXGBRanker", + "SparkXGBRankerModel", ] From dab1bd35039e9152dde65e00f089ec05b2d272dd Mon Sep 17 00:00:00 2001 From: jiamingy Date: Wed, 18 Jan 2023 06:56:09 +0800 Subject: [PATCH 3/3] Fix spark tutorial formatting. --- doc/tutorials/spark_estimator.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index aae9f9ef61c7..02ddb60ea008 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -45,7 +45,7 @@ such as ``weight_col``, ``validation_indicator_col``, ``use_gpu``, for details p The following code snippet shows how to train a spark xgboost regressor model, first we need to prepare a training dataset as a spark dataframe contains -"label" column and "features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector` +"label" column and "features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector`` type or spark array type or a list of feature column names. @@ -56,7 +56,7 @@ type or spark array type or a list of feature column names. The following code snippet shows how to predict test data using a spark xgboost regressor model, first we need to prepare a test dataset as a spark dataframe contains -"features" and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector` +"features" and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector`` type or spark array type. .. code-block:: python @@ -97,7 +97,7 @@ Aside from the PySpark and XGBoost modules, we also need the `cuDF `_ package for handling Spark dataframe. We recommend using either Conda or Virtualenv to manage python dependencies for PySpark jobs. Please refer to `How to Manage Python Dependencies in PySpark -`_ +`_ for more details on PySpark dependency management. In short, to create a Python environment that can be sent to a remote cluster using