Merge remote-tracking branch 'upstream/master' into fix-mgpu-quantile…

…-tests
dmlc · Feb 12, 2023 · 7127abd · 7127abd
2 parents 05739ab + 225b315
commit 7127abd
Show file tree

Hide file tree

Showing 16 changed files with 188 additions and 96 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 XGBoost is an optimized distributed gradient boosting library designed to be highly ***efficient***, ***flexible*** and ***portable***.
 It implements machine learning algorithms under the [Gradient Boosting](https://en.wikipedia.org/wiki/Gradient_boosting) framework.
 XGBoost provides a parallel tree boosting (also known as GBDT, GBM) that solve many data science problems in a fast and accurate way.
-The same code runs on major distributed environment (Kubernetes, Hadoop, SGE, MPI, Dask) and can solve problems beyond billions of examples.
+The same code runs on major distributed environment (Kubernetes, Hadoop, SGE, Dask, Spark, PySpark) and can solve problems beyond billions of examples.
 
 License
 -------

diff --git a/demo/guide-python/categorical.py b/demo/guide-python/categorical.py
@@ -2,9 +2,7 @@
 Getting started with categorical data
 =====================================
 
-Experimental support for categorical data.  After 1.5 XGBoost `gpu_hist` tree method has
-experimental support for one-hot encoding based tree split, and in 1.6 `approx` support
-was added.
+Experimental support for categorical data.
 
 In before, users need to run an encoder themselves before passing the data into XGBoost,
 which creates a sparse matrix and potentially increase memory usage.  This demo

diff --git a/doc/conf.py b/doc/conf.py
@@ -211,8 +211,8 @@
 
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3.8", None),
-    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
-    "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy/", None),
     "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
     "sklearn": ("https://scikit-learn.org/stable", None),
     "dask": ("https://docs.dask.org/en/stable/", None),

diff --git a/doc/faq.rst b/doc/faq.rst
@@ -19,15 +19,14 @@ I have a big dataset
 ********************
 XGBoost is designed to be memory efficient. Usually it can handle problems as long as the data fit into your memory.
 This usually means millions of instances.
-If you are running out of memory, checkout :doc:`external memory version </tutorials/external_memory>` or
-:doc:`distributed version </tutorials/aws_yarn>` of XGBoost.
 
-**************************************************
-Running XGBoost on platform X (Hadoop/Yarn, Mesos)
-**************************************************
-The distributed version of XGBoost is designed to be portable to various environment.
-Distributed XGBoost can be ported to any platform that supports `rabit <https://github.com/dmlc/rabit>`_.
-You can directly run XGBoost on Yarn. In theory Mesos and other resource allocation engines can be easily supported as well.
+If you are running out of memory, checkout the tutorial page for using :doc:`distributed training </tutorials/index>` with one of the many frameworks, or the :doc:`external memory version </tutorials/external_memory>` for using external memory.
+
+
+**********************************
+How to handle categorical feature?
+**********************************
+Visit :doc:`this tutorial </tutorials/categorical>` for a walk through of categorical data handling and some worked examples.
 
 ******************************************************************
 Why not implement distributed XGBoost on top of X (Spark, Hadoop)?
@@ -50,7 +49,7 @@ which means the model trained by one language can be loaded in another.
 This means you can train the model using R, while running prediction using
 Java or C++, which are more common in production systems.
 You can also train the model using distributed versions,
-and load them in from Python to do some interactive analysis.
+and load them in from Python to do some interactive analysis. See :doc:`Model IO </tutorials/saving_model>` for more information.
 
 **************************
 Do you support LambdaMART?
@@ -70,11 +69,10 @@ When the ``missing`` parameter is specifed, values in the input predictor that i
 **************************************
 Slightly different result between runs
 **************************************
-This could happen, due to non-determinism in floating point summation order and multi-threading.
-Though the general accuracy will usually remain the same.
+This could happen, due to non-determinism in floating point summation order and multi-threading. Also, data partitioning changes by distributed framework can be an issue as well. Though the general accuracy will usually remain the same.
 
 **********************************************************
 Why do I see different results with sparse and dense data?
 **********************************************************
-"Sparse" elements are treated as if they were "missing" by the tree booster, and as zeros by the linear booster.
-For tree models, it is important to use consistent data formats during training and scoring.
+
+"Sparse" elements are treated as if they were "missing" by the tree booster, and as zeros by the linear booster. However, if we convert the sparse matrix back to dense matrix, the sparse matrix might fill the missing entries with 0, which is a valid value for xgboost.
diff --git a/doc/python/model.rst b/doc/python/model.rst
@@ -35,4 +35,5 @@ list of trees and can be sliced into multiple sub-models.
 
 The sliced model is a copy of selected trees, that means the model itself is immutable
 during slicing.  This feature is the basis of `save_best` option in early stopping
-callback.
+callback. See :ref:`sphx_glr_python_examples_individual_trees.py` for a worked example on
+how to combine prediction with sliced trees.
diff --git a/doc/tutorials/aws_yarn.rst b/doc/tutorials/aws_yarn.rst
diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst
@@ -149,7 +149,7 @@ performance reasons.
 References
 **********
 
-[1] Walter D. Fisher. "`On Grouping for Maximum Homogeneity`_." Journal of the American Statistical Association. Vol. 53, No. 284 (Dec., 1958), pp. 789-798.
+[1] Walter D. Fisher. "`On Grouping for Maximum Homogeneity`_". Journal of the American Statistical Association. Vol. 53, No. 284 (Dec., 1958), pp. 789-798.
 
 [2] Trevor Hastie, Robert Tibshirani, Jerome Friedman. "`The Elements of Statistical Learning`_". Springer Series in Statistics Springer New York Inc. (2001).
 

diff --git a/doc/tutorials/index.rst b/doc/tutorials/index.rst
@@ -3,15 +3,14 @@ XGBoost Tutorials
 #################
 
 This section contains official tutorials inside XGBoost package.
-See `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for more resources.
+See `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for more resources. Also, don't miss the feature introductions in each package.
 
 .. toctree::
   :maxdepth: 1
   :caption: Contents:
 
   model
   saving_model
-  Distributed XGBoost with AWS YARN <aws_yarn>
   kubernetes
   Distributed XGBoost with XGBoost4J-Spark <https://xgboost.readthedocs.io/en/latest/jvm/xgboost4j_spark_tutorial.html>
   Distributed XGBoost with XGBoost4J-Spark-GPU <https://xgboost.readthedocs.io/en/latest/jvm/xgboost4j_spark_gpu_tutorial.html>

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -4,6 +4,7 @@
 import json
 import os
 import warnings
+from concurrent.futures import ThreadPoolExecutor
 from typing import (
     Any,
     Callable,
@@ -127,6 +128,49 @@ def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
     return inner
 
 
+def ltr_metric_decorator(func: Callable, n_jobs: Optional[int]) -> Metric:
+    """Decorate a learning to rank metric."""
+
+    def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
+        y_true = dmatrix.get_label()
+        group_ptr = dmatrix.get_uint_info("group_ptr")
+        if group_ptr.size < 2:
+            raise ValueError(
+                "Invalid `group_ptr`. Likely caused by invalid qid or group."
+            )
+        scores = np.empty(group_ptr.size - 1)
+        futures = []
+        weight = dmatrix.get_group()
+        no_weight = weight.size == 0
+
+        def task(i: int) -> float:
+            begin = group_ptr[i - 1]
+            end = group_ptr[i]
+            gy = y_true[begin:end]
+            gp = y_score[begin:end]
+            if gy.size == 1:
+                # Maybe there's a better default? 1.0 because many ranking score
+                # functions have output in range [0, 1].
+                return 1.0
+            return func(gy, gp)
+
+        workers = n_jobs if n_jobs is not None else os.cpu_count()
+        with ThreadPoolExecutor(max_workers=workers) as executor:
+            for i in range(1, group_ptr.size):
+                f = executor.submit(task, i)
+                futures.append(f)
+
+            for i, f in enumerate(futures):
+                scores[i] = f.result()
+
+        if no_weight:
+            return func.__name__, scores.mean()
+
+        return func.__name__, np.average(scores, weights=weight)
+
+    return inner
+
+
 __estimator_doc = """
     n_estimators : int
         Number of gradient boosted trees.  Equivalent to number of boosting
@@ -868,7 +912,10 @@ def _duplicated(parameter: str) -> None:
                 metric = eval_metric
             elif callable(eval_metric):
                 # Parameter from constructor or set_params
-                metric = _metric_decorator(eval_metric)
+                if self._get_type() == "ranker":
+                    metric = ltr_metric_decorator(eval_metric, self.n_jobs)
+                else:
+                    metric = _metric_decorator(eval_metric)
             else:
                 params.update({"eval_metric": eval_metric})
 
@@ -1979,10 +2026,6 @@ def fit(
             ) = self._configure_fit(
                 xgb_model, eval_metric, params, early_stopping_rounds, callbacks
             )
-            if callable(metric):
-                raise ValueError(
-                    "Custom evaluation metric is not yet supported for XGBRanker."
-                )
 
             self._Booster = train(
                 params,

diff --git a/src/common/deterministic.cuh b/src/common/deterministic.cuh
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2020-2023 by XGBoost Contributors
+ */
+#ifndef XGBOOST_COMMON_DETERMINISTIC_CUH_
+#define XGBOOST_COMMON_DETERMINISTIC_CUH_
+
+#include <cmath>
+#include <limits>          // std::numeric_limits
+
+#include "xgboost/base.h"  // XGBOOST_DEVICE
+
+namespace xgboost {
+namespace common {
+// Following 2 functions are slightly modified version of fbcuda.
+
+/**
+ * \brief Constructs a rounding factor used to truncate elements in a sum such that the
+ *        sum of the truncated elements is the same no matter what the order of the sum
+ *        is.
+ *
+ * Algorithm 5: Reproducible Sequential Sum in 'Fast Reproducible Floating-Point
+ * Summation' by Demmel and Nguyen.
+ */
+template <typename T>
+XGBOOST_DEVICE T CreateRoundingFactor(T max_abs, int n) {
+  T delta = max_abs / (static_cast<T>(1.0) -
+                       static_cast<T>(2.0) * static_cast<T>(n) * std::numeric_limits<T>::epsilon());
+
+  // Calculate ceil(log_2(delta)).
+  // frexpf() calculates exp and returns `x` such that
+  // delta = x * 2^exp, where `x` in (-1.0, -0.5] U [0.5, 1).
+  // Because |x| < 1, exp is exactly ceil(log_2(delta)).
+  int exp;
+  std::frexp(delta, &exp);
+
+  // return M = 2 ^ ceil(log_2(delta))
+  return std::ldexp(static_cast<T>(1.0), exp);
+}
+
+template <typename T>
+XGBOOST_DEVICE T TruncateWithRounding(T const rounding_factor, T const x) {
+  return (rounding_factor + x) - rounding_factor;
+}
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_DETERMINISTIC_CUH_
diff --git a/src/common/ranking_utils.cuh → src/common/threading_utils.cuh b/src/common/ranking_utils.cuh → src/common/threading_utils.cuh
@@ -1,24 +1,28 @@
-/*!
- * Copyright 2021 by XGBoost Contributors
+/**
+ * Copyright 2021-2023 by XGBoost Contributors
  */
-#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
-#define XGBOOST_COMMON_RANKING_UTILS_H_
+#ifndef XGBOOST_COMMON_THREADING_UTILS_CUH_
+#define XGBOOST_COMMON_THREADING_UTILS_CUH_
+
+#include <algorithm>           // std::min
+#include <cstddef>             // std::size_t
 
-#include <cub/cub.cuh>
-#include "xgboost/base.h"
-#include "device_helpers.cuh"
-#include "./math.h"
+#include "./math.h"            // Sqr
+#include "common.h"
+#include "device_helpers.cuh"  // LaunchN
+#include "xgboost/base.h"      // XGBOOST_DEVICE
+#include "xgboost/span.h"      // Span
 
 namespace xgboost {
 namespace common {
 /**
  * \param n Number of items (length of the base)
  * \param h hight
  */
-XGBOOST_DEVICE inline size_t DiscreteTrapezoidArea(size_t n, size_t h) {
-  n -= 1;             // without diagonal entries
-  h = std::min(n, h);  // Specific for ranking.
-  size_t total = ((n - (h - 1)) + n) * h / 2;
+XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size_t h) {
+  n -= 1;              // without diagonal entries
+  h = std::min(n, h);  // Used for ranking, h <= n
+  std::size_t total = ((n - (h - 1)) + n) * h / 2;
   return total;
 }
 
@@ -29,12 +33,14 @@ XGBOOST_DEVICE inline size_t DiscreteTrapezoidArea(size_t n, size_t h) {
  * Equivalent to loops like:
  *
  * \code
- *   for (size i = 0; i < h; ++i) {
- *     for (size_t j = i + 1; j < n; ++j) {
+ *   for (std::size_t i = 0; i < h; ++i) {
+ *     for (std::size_t j = i + 1; j < n; ++j) {
  *        do_something();
  *     }
  *   }
  * \endcode
+ *
+ * with h <= n
  */
 template <typename U>
 inline size_t
@@ -79,6 +85,6 @@ XGBOOST_DEVICE inline void UnravelTrapeziodIdx(size_t i_idx, size_t n,
 
   j = idx - n_elems + i + 1;
 }
-}      // namespace common
-}      // namespace xgboost
-#endif  // XGBOOST_COMMON_RANKING_UTILS_H_
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_THREADING_UTILS_CUH_
diff --git a/src/metric/auc.cu b/src/metric/auc.cu
@@ -12,8 +12,8 @@
 #include <utility>
 
 #include "../collective/device_communicator.cuh"
-#include "../common/optional_weight.h"  // OptionalWeights
-#include "../common/ranking_utils.cuh"
+#include "../common/optional_weight.h"    // OptionalWeights
+#include "../common/threading_utils.cuh"  // UnravelTrapeziodIdx,SegmentedTrapezoidThreads
 #include "auc.h"
 #include "xgboost/data.h"
 #include "xgboost/span.h"