Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More support for column split in cpu predictor #9244

Merged
merged 10 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions include/xgboost/predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,18 @@ class Predictor {
* usually more efficient than online prediction This function is NOT
* threadsafe, make sure you only call from one thread.
*
* \param inst The instance to predict.
* \param [in,out] out_preds The output preds.
* \param model The model to predict from
* \param tree_end (Optional) The tree end index.
* \param inst The instance to predict.
* \param [in,out] out_preds The output preds.
* \param model The model to predict from
* \param tree_end (Optional) The tree end index.
* \param is_column_split (Optional) If the data is split column-wise.
*/

virtual void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned tree_end = 0) const = 0;
unsigned tree_end = 0,
bool is_column_split = false) const = 0;

/**
* \brief predict the leaf index of each tree, the output will be nsample *
Expand Down
85 changes: 70 additions & 15 deletions src/predictor/cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ struct SparsePageView {
size_t Size() const { return view.Size(); }
};

struct SingleInstanceView {
bst_row_t base_rowid{};
SparsePage::Inst const &inst;

explicit SingleInstanceView(SparsePage::Inst const &instance) : inst{instance} {}
SparsePage::Inst operator[](size_t i) {
CHECK_EQ(i, 0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a heavy check. Can we remove it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

return inst;
}
static size_t Size() { return 1; }
};

struct GHistIndexMatrixView {
private:
GHistIndexMatrix const &page_;
Expand Down Expand Up @@ -409,6 +421,24 @@ class ColumnSplitHelper {
}
}

void PredictInstance(SparsePage::Inst const &inst, std::vector<bst_float> *out_preds) {
CHECK(xgboost::collective::IsDistributed())
<< "column-split prediction is only supported for distributed training";

PredictBatchKernel<SingleInstanceView, 1>(SingleInstanceView{inst}, out_preds);
}

void PredictLeaf(DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
CHECK(xgboost::collective::IsDistributed())
<< "column-split prediction is only supported for distributed training";

for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
CHECK_EQ(out_preds->size(),
p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);
PredictBatchKernel<SparsePageView, kBlockOfRowsSize, true>(SparsePageView{&batch}, out_preds);
}
}

private:
using BitVector = RBitField8;

Expand Down Expand Up @@ -498,24 +528,31 @@ class ColumnSplitHelper {
return nid;
}

template <bool predict_leaf = false>
bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) {
auto const &tree = *model_.trees[tree_id];
auto const leaf = GetLeafIndex(tree, tree_id, row_id);
return tree[leaf].LeafValue();
if constexpr (predict_leaf) {
return static_cast<bst_float>(leaf);
} else {
return tree[leaf].LeafValue();
}
}

template <bool predict_leaf = false>
void PredictAllTrees(std::vector<bst_float> *out_preds, std::size_t batch_offset,
std::size_t predict_offset, std::size_t num_group, std::size_t block_size) {
auto &preds = *out_preds;
for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
auto const gid = model_.tree_info[tree_id];
for (size_t i = 0; i < block_size; ++i) {
preds[(predict_offset + i) * num_group + gid] += PredictOneTree(tree_id, batch_offset + i);
preds[(predict_offset + i) * num_group + gid] +=
PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
}
}
}

template <typename DataView, size_t block_of_rows_size>
template <typename DataView, size_t block_of_rows_size, bool predict_leaf = false>
void PredictBatchKernel(DataView batch, std::vector<bst_float> *out_preds) {
auto const num_group = model_.learner_model_param->num_output_group;

Expand Down Expand Up @@ -544,8 +581,8 @@ class ColumnSplitHelper {
auto const batch_offset = block_id * block_of_rows_size;
auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
static_cast<std::size_t>(block_of_rows_size));
PredictAllTrees(out_preds, batch_offset, batch_offset + batch.base_rowid, num_group,
block_size);
PredictAllTrees<predict_leaf>(out_preds, batch_offset, batch_offset + batch.base_rowid,
num_group, block_size);
});

ClearBitVectors();
Expand Down Expand Up @@ -728,18 +765,25 @@ class CPUPredictor : public Predictor {
return true;
}

void PredictInstance(const SparsePage::Inst& inst,
std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) const override {
void PredictInstance(const SparsePage::Inst &inst, std::vector<bst_float> *out_preds,
const gbm::GBTreeModel &model, unsigned ntree_limit,
bool is_column_split) const override {
CHECK(!model.learner_model_param->IsVectorLeaf()) << "predict instance" << MTNotImplemented();
std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature);
ntree_limit *= model.learner_model_param->num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
out_preds->resize(model.learner_model_param->num_output_group);

if (is_column_split) {
ColumnSplitHelper helper(this->ctx_->Threads(), model, 0, ntree_limit);
helper.PredictInstance(inst, out_preds);
return;
}

std::vector<RegTree::FVec> feat_vecs;
feat_vecs.resize(1, RegTree::FVec());
feat_vecs[0].Init(model.learner_model_param->num_feature);
auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
// loop over output groups
for (uint32_t gid = 0; gid < model.learner_model_param->num_output_group; ++gid) {
Expand All @@ -752,16 +796,23 @@ class CPUPredictor : public Predictor {
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *out_preds,
const gbm::GBTreeModel &model, unsigned ntree_limit) const override {
auto const n_threads = this->ctx_->Threads();
std::vector<RegTree::FVec> feat_vecs;
const int num_feature = model.learner_model_param->num_feature;
InitThreadTemp(n_threads, &feat_vecs);
const MetaInfo &info = p_fmat->Info();
// number of valid trees
if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size());
}
const MetaInfo &info = p_fmat->Info();
std::vector<bst_float> &preds = out_preds->HostVector();
preds.resize(info.num_row_ * ntree_limit);

if (p_fmat->Info().IsColumnSplit()) {
ColumnSplitHelper helper(n_threads, model, 0, ntree_limit);
helper.PredictLeaf(p_fmat, &preds);
return;
}

std::vector<RegTree::FVec> feat_vecs;
const int num_feature = model.learner_model_param->num_feature;
InitThreadTemp(n_threads, &feat_vecs);
// start collecting the prediction
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
Expand Down Expand Up @@ -796,6 +847,8 @@ class CPUPredictor : public Predictor {
int condition, unsigned condition_feature) const override {
CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict contribution" << MTNotImplemented();
CHECK(!p_fmat->Info().IsColumnSplit())
<< "Predict contribution support for column-wise data split is not yet implemented.";
auto const n_threads = this->ctx_->Threads();
const int num_feature = model.learner_model_param->num_feature;
std::vector<RegTree::FVec> feat_vecs;
Expand Down Expand Up @@ -877,6 +930,8 @@ class CPUPredictor : public Predictor {
bool approximate) const override {
CHECK(!model.learner_model_param->IsVectorLeaf())
<< "Predict interaction contribution" << MTNotImplemented();
CHECK(!p_fmat->Info().IsColumnSplit()) << "Predict interaction contribution support for "
"column-wise data split is not yet implemented.";
const MetaInfo& info = p_fmat->Info();
const int ngroup = model.learner_model_param->num_output_group;
size_t const ncolumns = model.learner_model_param->num_feature;
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ class GPUPredictor : public xgboost::Predictor {

void PredictInstance(const SparsePage::Inst&,
std::vector<bst_float>*,
const gbm::GBTreeModel&, unsigned) const override {
const gbm::GBTreeModel&, unsigned, bool) const override {
LOG(FATAL) << "[Internal error]: " << __func__
<< " is not implemented in GPU Predictor.";
}
Expand Down
Loading