Skip to content

Commit

Permalink
Extract CPU sampling routines. (#8697)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Jan 19, 2023
1 parent 7a068af commit e49e099
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 120 deletions.
24 changes: 20 additions & 4 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
* \file linalg.h
* \brief Linear algebra related utilities.
*/
Expand All @@ -8,7 +8,7 @@

#include <dmlc/endian.h>
#include <xgboost/base.h>
#include <xgboost/context.h> // fixme(jiamingy): Remove the dependency on this header.
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/json.h>
#include <xgboost/span.h>
Expand Down Expand Up @@ -834,9 +834,26 @@ class Tensor {
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
};

template <typename T>
using Matrix = Tensor<T, 2>;

template <typename T>
using Vector = Tensor<T, 1>;

/**
* \brief Create an array without initialization.
*/
template <typename T, typename... Index>
auto Empty(Context const *ctx, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->gpu_id);
t.Reshape(index...);
return t;
}

/**
* \brief Create an array with value v.
*/
template <typename T, typename... Index>
auto Constant(Context const *ctx, T v, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
Expand All @@ -846,7 +863,6 @@ auto Constant(Context const *ctx, T v, Index &&...index) {
return t;
}


/**
* \brief Like `np.zeros`, return a new array of given shape and type, filled with zeros.
*/
Expand Down
109 changes: 109 additions & 0 deletions src/tree/hist/sampler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_SAMPLER_H_
#define XGBOOST_TREE_HIST_SAMPLER_H_

#include <cstddef> // std::size-t
#include <cstdint> // std::uint64_t
#include <random> // bernoulli_distribution, linear_congruential_engine

#include "../../common/random.h" // GlobalRandom
#include "../param.h" // TrainParam
#include "xgboost/base.h" // GradientPair
#include "xgboost/context.h" // Context
#include "xgboost/data.h" // MetaInfo
#include "xgboost/linalg.h" // TensorView

namespace xgboost {
namespace tree {
struct RandomReplace {
public:
// similar value as for minstd_rand
static constexpr std::uint64_t kBase = 16807;
static constexpr std::uint64_t kMod = static_cast<std::uint64_t>(1) << 63;

using EngineT = std::linear_congruential_engine<uint64_t, kBase, 0, kMod>;

/*
Right-to-left binary method: https://en.wikipedia.org/wiki/Modular_exponentiation
*/
static std::uint64_t SimpleSkip(std::uint64_t exponent, std::uint64_t initial_seed,
std::uint64_t base, std::uint64_t mod) {
CHECK_LE(exponent, mod);
std::uint64_t result = 1;
while (exponent > 0) {
if (exponent % 2 == 1) {
result = (result * base) % mod;
}
base = (base * base) % mod;
exponent = exponent >> 1;
}
// with result we can now find the new seed
return (result * initial_seed) % mod;
}
};

// Only uniform sampling, no gradient-based yet.
inline void SampleGradient(Context const* ctx, TrainParam param,
linalg::MatrixView<GradientPair> out) {
CHECK(out.Contiguous());
CHECK_EQ(param.sampling_method, TrainParam::kUniform)
<< "Only uniform sampling is supported, gradient-based sampling is only support by GPU Hist.";

if (param.subsample >= 1.0) {
return;
}
bst_row_t n_samples = out.Shape(0);
auto& rnd = common::GlobalRandom();

#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
std::bernoulli_distribution coin_flip(param.subsample);
CHECK_EQ(out.Shape(1), 1) << "Multi-target with sampling for R is not yet supported.";
for (size_t i = 0; i < n_samples; ++i) {
if (!(out(i, 0).GetHess() >= 0.0f && coin_flip(rnd)) || out(i, 0).GetGrad() == 0.0f) {
out(i, 0) = GradientPair(0);
}
}
#else
std::uint64_t initial_seed = rnd();

auto n_threads = static_cast<size_t>(ctx->Threads());
std::size_t const discard_size = n_samples / n_threads;
std::bernoulli_distribution coin_flip(param.subsample);

dmlc::OMPException exc;
#pragma omp parallel num_threads(n_threads)
{
exc.Run([&]() {
const size_t tid = omp_get_thread_num();
const size_t ibegin = tid * discard_size;
const size_t iend = (tid == (n_threads - 1)) ? n_samples : ibegin + discard_size;

const uint64_t displaced_seed = RandomReplace::SimpleSkip(
ibegin, initial_seed, RandomReplace::kBase, RandomReplace::kMod);
RandomReplace::EngineT eng(displaced_seed);
std::size_t n_targets = out.Shape(1);
if (n_targets > 1) {
for (std::size_t i = ibegin; i < iend; ++i) {
if (!coin_flip(eng)) {
for (std::size_t j = 0; j < n_targets; ++j) {
out(i, j) = GradientPair{};
}
}
}
} else {
for (std::size_t i = ibegin; i < iend; ++i) {
if (!coin_flip(eng)) {
out(i, 0) = GradientPair{};
}
}
}
});
}
exc.Rethrow();
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
}
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_HIST_SAMPLER_H_
44 changes: 17 additions & 27 deletions src/tree/updater_approx.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 XGBoost contributors
/**
* Copyright 2021-2023 by XGBoost contributors
*
* \brief Implementation for the approx tree method.
*/
Expand All @@ -14,9 +14,12 @@
#include "driver.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/sampler.h" // SampleGradient
#include "param.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/linalg.h"
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"

Expand Down Expand Up @@ -256,8 +259,7 @@ class GlobalApproxUpdater : public TreeUpdater {
ObjInfo task_;

public:
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo task)
: TreeUpdater(ctx), task_{task} {
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {
monitor_.Init(__func__);
}

Expand All @@ -272,24 +274,11 @@ class GlobalApproxUpdater : public TreeUpdater {
}

void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair,
std::vector<GradientPair> *sampled) {
auto const &h_gpair = gpair->ConstHostVector();
sampled->resize(h_gpair.size());
std::copy(h_gpair.cbegin(), h_gpair.cend(), sampled->begin());
auto &rnd = common::GlobalRandom();

if (param.subsample != 1.0) {
CHECK(param.sampling_method != TrainParam::kGradientBased)
<< "Gradient based sampling is not supported for approx tree method.";
std::bernoulli_distribution coin_flip(param.subsample);
std::transform(sampled->begin(), sampled->end(), sampled->begin(), [&](GradientPair &g) {
if (coin_flip(rnd)) {
return g;
} else {
return GradientPair{};
}
});
}
linalg::Matrix<GradientPair> *sampled) {
*sampled = linalg::Empty<GradientPair>(ctx_, gpair->Size(), 1);
sampled->Data()->Copy(*gpair);

SampleGradient(ctx_, param, sampled->HostView());
}

char const *Name() const override { return "grow_histmaker"; }
Expand All @@ -303,18 +292,19 @@ class GlobalApproxUpdater : public TreeUpdater {
pimpl_ = std::make_unique<GloablApproxBuilder>(param_, m->Info(), ctx_, column_sampler_, task_,
&monitor_);

std::vector<GradientPair> h_gpair;
InitData(param_, gpair, &h_gpair);
linalg::Matrix<GradientPair> h_gpair;
// Obtain the hessian values for weighted sketching
std::vector<float> hess(h_gpair.size());
std::transform(h_gpair.begin(), h_gpair.end(), hess.begin(),
InitData(param_, gpair, &h_gpair);
std::vector<float> hess(h_gpair.Size());
auto const &s_gpair = h_gpair.Data()->ConstHostVector();
std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(),
[](auto g) { return g.GetHess(); });

cached_ = m;

size_t t_idx = 0;
for (auto p_tree : trees) {
this->pimpl_->UpdateTree(m, h_gpair, hess, p_tree, &out_position[t_idx]);
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
++t_idx;
}
param_.learning_rate = lr;
Expand Down
55 changes: 9 additions & 46 deletions src/tree/updater_quantile_hist.cc
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
/*!
* Copyright 2017-2022 by XGBoost Contributors
/**
* Copyright 2017-2023 by XGBoost Contributors
* \file updater_quantile_hist.cc
* \brief use quantized feature values to construct a tree
* \author Philip Cho, Tianqi Checn, Egor Smirnov
*/
#include "./updater_quantile_hist.h"

#include <algorithm>
#include <cstddef>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "common_row_partitioner.h"
#include "constraints.h"
#include "hist/histogram.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/sampler.h"
#include "param.h"
#include "xgboost/linalg.h"
#include "xgboost/logging.h"
#include "xgboost/tree_updater.h"

Expand Down Expand Up @@ -257,43 +260,6 @@ bool QuantileHistMaker::Builder::UpdatePredictionCache(DMatrix const *data,
return true;
}

void QuantileHistMaker::Builder::InitSampling(const DMatrix &fmat,
std::vector<GradientPair> *gpair) {
monitor_->Start(__func__);
const auto &info = fmat.Info();
auto& rnd = common::GlobalRandom();
std::vector<GradientPair>& gpair_ref = *gpair;

#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
std::bernoulli_distribution coin_flip(param_.subsample);
for (size_t i = 0; i < info.num_row_; ++i) {
if (!(gpair_ref[i].GetHess() >= 0.0f && coin_flip(rnd)) || gpair_ref[i].GetGrad() == 0.0f) {
gpair_ref[i] = GradientPair(0);
}
}
#else
uint64_t initial_seed = rnd();

auto n_threads = static_cast<size_t>(ctx_->Threads());
const size_t discard_size = info.num_row_ / n_threads;
std::bernoulli_distribution coin_flip(param_.subsample);

dmlc::OMPException exc;
#pragma omp parallel num_threads(n_threads)
{
exc.Run([&]() {
const size_t tid = omp_get_thread_num();
const size_t ibegin = tid * discard_size;
const size_t iend = (tid == (n_threads - 1)) ? info.num_row_ : ibegin + discard_size;
RandomReplace::MakeIf([&](size_t i, RandomReplace::EngineT& eng) {
return !(gpair_ref[i].GetHess() >= 0.0f && coin_flip(eng));
}, GradientPair(0), initial_seed, ibegin, iend, &gpair_ref);
});
}
exc.Rethrow();
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
monitor_->Stop(__func__);
}
size_t QuantileHistMaker::Builder::GetNumberOfTrees() { return n_trees_; }

void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
Expand All @@ -317,12 +283,9 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed());

if (param_.subsample < 1.0f) {
CHECK_EQ(param_.sampling_method, TrainParam::kUniform)
<< "Only uniform sampling is supported, "
<< "gradient-based sampling is only support by GPU Hist.";
InitSampling(*fmat, gpair);
}
auto m_gpair =
linalg::MakeTensorView(*gpair, {gpair->size(), static_cast<std::size_t>(1)}, ctx_->gpu_id);
SampleGradient(ctx_, param_, m_gpair);
}

// store a pointer to the tree
Expand Down
43 changes: 0 additions & 43 deletions src/tree/updater_quantile_hist.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,47 +36,6 @@
#include "../common/column_matrix.h"

namespace xgboost {
struct RandomReplace {
public:
// similar value as for minstd_rand
static constexpr uint64_t kBase = 16807;
static constexpr uint64_t kMod = static_cast<uint64_t>(1) << 63;

using EngineT = std::linear_congruential_engine<uint64_t, kBase, 0, kMod>;

/*
Right-to-left binary method: https://en.wikipedia.org/wiki/Modular_exponentiation
*/
static uint64_t SimpleSkip(uint64_t exponent, uint64_t initial_seed,
uint64_t base, uint64_t mod) {
CHECK_LE(exponent, mod);
uint64_t result = 1;
while (exponent > 0) {
if (exponent % 2 == 1) {
result = (result * base) % mod;
}
base = (base * base) % mod;
exponent = exponent >> 1;
}
// with result we can now find the new seed
return (result * initial_seed) % mod;
}

template<typename Condition, typename ContainerData>
static void MakeIf(Condition condition, const typename ContainerData::value_type replace_value,
const uint64_t initial_seed, const size_t ibegin,
const size_t iend, ContainerData* gpair) {
ContainerData& gpair_ref = *gpair;
const uint64_t displaced_seed = SimpleSkip(ibegin, initial_seed, kBase, kMod);
EngineT eng(displaced_seed);
for (size_t i = ibegin; i < iend; ++i) {
if (condition(i, eng)) {
gpair_ref[i] = replace_value;
}
}
}
};

namespace tree {
inline BatchParam HistBatch(TrainParam const& param) {
return {param.max_bin, param.sparse_threshold};
Expand Down Expand Up @@ -141,8 +100,6 @@ class QuantileHistMaker: public TreeUpdater {

size_t GetNumberOfTrees();

void InitSampling(const DMatrix& fmat, std::vector<GradientPair>* gpair);

CPUExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree,
const std::vector<GradientPair>& gpair_h);

Expand Down
Loading

0 comments on commit e49e099

Please sign in to comment.