diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index c9b50792d073..1d950e70a3a5 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -8,8 +8,8 @@ #include -#include "../common/common.h" -#include "column_matrix.h" +#include "../data/adapter.h" // for SparsePageAdapterBatch +#include "../data/gradient_index.h" // for GHistIndexMatrix #include "quantile.h" #include "xgboost/base.h" #include "xgboost/context.h" // Context @@ -24,9 +24,7 @@ #define PREFETCH_READ_T0(addr) do {} while (0) #endif // defined(XGBOOST_MM_PREFETCH_PRESENT) -namespace xgboost { -namespace common { - +namespace xgboost::common { HistogramCuts::HistogramCuts() { cut_ptrs_.HostVector().emplace_back(0); } @@ -350,9 +348,8 @@ void BuildHistDispatch(Span gpair, const RowSetCollection::E } template -void GHistBuilder::BuildHist(Span gpair, - const RowSetCollection::Elem row_indices, const GHistIndexMatrix &gmat, - GHistRow hist, bool force_read_by_column) const { +void BuildHist(Span gpair, const RowSetCollection::Elem row_indices, + const GHistIndexMatrix &gmat, GHistRow hist, bool force_read_by_column) { /* force_read_by_column is used for testing the columnwise building of histograms. * default force_read_by_column = false */ @@ -369,14 +366,13 @@ void GHistBuilder::BuildHist(Span gpair, }); } -template void GHistBuilder::BuildHist(Span gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexMatrix &gmat, GHistRow hist, - bool force_read_by_column) const; - -template void GHistBuilder::BuildHist(Span gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexMatrix &gmat, GHistRow hist, - bool force_read_by_column) const; -} // namespace common -} // namespace xgboost +template void BuildHist(Span gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexMatrix &gmat, GHistRow hist, + bool force_read_by_column); + +template void BuildHist(Span gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexMatrix &gmat, GHistRow hist, + bool force_read_by_column); +} // namespace xgboost::common diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 2781da8e0cff..c0fe5b44f60d 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -16,11 +16,9 @@ #include #include "categorical.h" -#include "common.h" #include "quantile.h" #include "row_set.h" #include "threading_utils.h" -#include "timer.h" #include "xgboost/base.h" // for bst_feature_t, bst_bin_t #include "xgboost/data.h" @@ -598,6 +596,8 @@ class ParallelGHistBuilder { } } + [[nodiscard]] bst_bin_t TotalBins() const { return nbins_; } + private: void MatchNodeNidPairToHist() { size_t hist_allocated_additionally = 0; @@ -643,27 +643,10 @@ class ParallelGHistBuilder { std::map, int> tid_nid_to_hist_; }; -/*! - * \brief builder for histograms of gradient statistics - */ -class GHistBuilder { - public: - GHistBuilder() = default; - explicit GHistBuilder(uint32_t nbins): nbins_{nbins} {} - - // construct a histogram via histogram aggregation - template - void BuildHist(Span gpair, const RowSetCollection::Elem row_indices, - const GHistIndexMatrix& gmat, GHistRow hist, - bool force_read_by_column = false) const; - uint32_t GetNumBins() const { - return nbins_; - } - - private: - /*! \brief number of all bins over all features */ - uint32_t nbins_ { 0 }; -}; +// construct a histogram via histogram aggregation +template +void BuildHist(Span gpair, const RowSetCollection::Elem row_indices, + const GHistIndexMatrix& gmat, GHistRow hist, bool force_read_by_column = false); } // namespace common } // namespace xgboost #endif // XGBOOST_COMMON_HIST_UTIL_H_ diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h index d80008cc0809..0247e4dccbf5 100644 --- a/src/common/threading_utils.h +++ b/src/common/threading_utils.h @@ -30,9 +30,7 @@ inline int32_t omp_get_thread_limit() { return std::numeric_limits::max } #endif // defined(_MSC_VER) -namespace xgboost { -namespace common { - +namespace xgboost::common { // Represent simple range of indexes [begin, end) // Inspired by tbb::blocked_range class Range1d { @@ -69,7 +67,7 @@ class Range1d { // [1,2], [3,4], [5,6], [7,8], [9] // The class helps to process data in several tree nodes (non-balanced usually) in parallel // Using nested parallelism (by nodes and by data in each node) -// it helps to improve CPU resources utilization +// it helps to improve CPU resources utilization class BlockedSpace2d { public: // Example of space: @@ -86,39 +84,47 @@ class BlockedSpace2d { // dim1 - size of the first dimension in the space // getter_size_dim2 - functor to get the second dimensions for each 'row' by row-index // grain_size - max size of produced blocks - template - BlockedSpace2d(size_t dim1, Func getter_size_dim2, size_t grain_size) { - for (size_t i = 0; i < dim1; ++i) { - const size_t size = getter_size_dim2(i); - const size_t n_blocks = size/grain_size + !!(size % grain_size); - for (size_t iblock = 0; iblock < n_blocks; ++iblock) { - const size_t begin = iblock * grain_size; - const size_t end = std::min(begin + grain_size, size); + template + BlockedSpace2d(std::size_t dim1, Func getter_size_dim2, std::size_t grain_size) { + for (std::size_t i = 0; i < dim1; ++i) { + std::size_t size = getter_size_dim2(i); + // Each row (second dim) is divided into n_blocks + std::size_t n_blocks = size / grain_size + !!(size % grain_size); + for (std::size_t iblock = 0; iblock < n_blocks; ++iblock) { + std::size_t begin = iblock * grain_size; + std::size_t end = std::min(begin + grain_size, size); AddBlock(i, begin, end); } } } // Amount of blocks(tasks) in a space - size_t Size() const { + [[nodiscard]] std::size_t Size() const { return ranges_.size(); } // get index of the first dimension of i-th block(task) - size_t GetFirstDimension(size_t i) const { + [[nodiscard]] std::size_t GetFirstDimension(size_t i) const { CHECK_LT(i, first_dimension_.size()); return first_dimension_[i]; } // get a range of indexes for the second dimension of i-th block(task) - Range1d GetRange(size_t i) const { + [[nodiscard]] Range1d GetRange(size_t i) const { CHECK_LT(i, ranges_.size()); return ranges_[i]; } private: - void AddBlock(size_t first_dimension, size_t begin, size_t end) { - first_dimension_.push_back(first_dimension); + /** + * @brief Add a parallel block. + * + * @param first_dim The row index. + * @param begin The begin of the second dimension. + * @param end The end of the second dimension. + */ + void AddBlock(std::size_t first_dim, std::size_t begin, std::size_t end) { + first_dimension_.push_back(first_dim); ranges_.emplace_back(begin, end); } @@ -303,7 +309,6 @@ class MemStackAllocator { * \brief Constant that can be used for initializing static thread local memory. */ std::int32_t constexpr DefaultMaxThreads() { return 128; } -} // namespace common -} // namespace xgboost +} // namespace xgboost::common #endif // XGBOOST_COMMON_THREADING_UTILS_H_ diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h index 562a0b2d44dc..b7f5f5da65fe 100644 --- a/src/tree/hist/histogram.h +++ b/src/tree/hist/histogram.h @@ -22,7 +22,6 @@ class HistogramBuilder { common::HistCollection hist_; /*! \brief culmulative local parent histogram of gradients. */ common::HistCollection hist_local_worker_; - common::GHistBuilder builder_; common::ParallelGHistBuilder buffer_; BatchParam param_; int32_t n_threads_{-1}; @@ -49,7 +48,6 @@ class HistogramBuilder { hist_.Init(total_bins); hist_local_worker_.Init(total_bins); buffer_.Init(total_bins); - builder_ = common::GHistBuilder(total_bins); is_distributed_ = is_distributed; is_col_split_ = is_col_split; // Workaround s390x gcc 7.5.0 @@ -88,8 +86,7 @@ class HistogramBuilder { elem.begin + end_of_row_set, nid); auto hist = buffer_.GetInitializedHist(tid, nid_in_set); if (rid_set.Size() != 0) { - builder_.template BuildHist(gpair_h, rid_set, gidx, hist, - force_read_by_column); + common::BuildHist(gpair_h, rid_set, gidx, hist, force_read_by_column); } }); } @@ -163,9 +160,9 @@ class HistogramBuilder { std::vector const &nodes_for_explicit_hist_build, std::vector const &nodes_for_subtraction_trick, int starting_index, int sync_count) { - const size_t nbins = builder_.GetNumBins(); + auto n_bins = buffer_.TotalBins(); common::BlockedSpace2d space( - nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 1024); + nodes_for_explicit_hist_build.size(), [&](size_t) { return n_bins; }, 1024); common::ParallelFor2d(space, n_threads_, [&](size_t node, common::Range1d r) { const auto &entry = nodes_for_explicit_hist_build[node]; auto this_hist = this->hist_[entry.nid]; @@ -188,14 +185,13 @@ class HistogramBuilder { }); collective::Allreduce( - reinterpret_cast(this->hist_[starting_index].data()), - builder_.GetNumBins() * sync_count * 2); + reinterpret_cast(this->hist_[starting_index].data()), n_bins * sync_count * 2); ParallelSubtractionHist(space, nodes_for_explicit_hist_build, nodes_for_subtraction_trick, p_tree); common::BlockedSpace2d space2( - nodes_for_subtraction_trick.size(), [&](size_t) { return nbins; }, 1024); + nodes_for_subtraction_trick.size(), [&](size_t) { return n_bins; }, 1024); ParallelSubtractionHist(space2, nodes_for_subtraction_trick, nodes_for_explicit_hist_build, p_tree); } @@ -203,7 +199,7 @@ class HistogramBuilder { void SyncHistogramLocal(RegTree const *p_tree, std::vector const &nodes_for_explicit_hist_build, std::vector const &nodes_for_subtraction_trick) { - const size_t nbins = this->builder_.GetNumBins(); + const size_t nbins = this->buffer_.TotalBins(); common::BlockedSpace2d space( nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 1024); diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc index 677687255a5e..7bde3aca2ae0 100644 --- a/tests/cpp/tree/hist/test_evaluate_splits.cc +++ b/tests/cpp/tree/hist/test_evaluate_splits.cc @@ -48,12 +48,10 @@ void TestEvaluateSplits(bool force_read_by_column) { std::iota(row_indices.begin(), row_indices.end(), 0); row_set_collection.Init(); - auto hist_builder = common::GHistBuilder(gmat.cut.Ptrs().back()); hist.Init(gmat.cut.Ptrs().back()); hist.AddHistRow(0); hist.AllocateAllData(); - hist_builder.template BuildHist(row_gpairs, row_set_collection[0], - gmat, hist[0], force_read_by_column); + common::BuildHist(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column); // Compute total gradient for all data points GradientPairPrecise total_gpair; diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc index 8eb043ceca20..0198c6c80279 100644 --- a/tests/cpp/tree/hist/test_histogram.cc +++ b/tests/cpp/tree/hist/test_histogram.cc @@ -13,8 +13,7 @@ #include "../../categorical_helpers.h" #include "../../helpers.h" -namespace xgboost { -namespace tree { +namespace xgboost::tree { namespace { void InitRowPartitionForTest(common::RowSetCollection *row_set, size_t n_samples, size_t base_rowid = 0) { auto &row_indices = *row_set->Data(); @@ -487,5 +486,5 @@ TEST(CPUHistogram, ExternalMemory) { TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false); TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true); } -} // namespace tree -} // namespace xgboost +} // namespace xgboost::tree +