Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test categorical features with column-split gpu quantile #9595

Merged
merged 7 commits into from
Sep 23, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions src/common/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -634,12 +634,25 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts, bool is_column_split) {
});
CHECK_EQ(num_columns_, d_in_columns_ptr.size() - 1);
max_values.resize(d_in_columns_ptr.size() - 1);

// In some cases (e.g. column-wise data split), we may have empty columns, so we need to keep
// track of the unique keys (feature indices) after the thrust::reduce_by_key` call.
dh::caching_device_vector<size_t> d_max_keys(d_in_columns_ptr.size() - 1);
dh::caching_device_vector<SketchEntry> d_max_values(d_in_columns_ptr.size() - 1);
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it,
thrust::make_discard_iterator(), d_max_values.begin(),
thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_values));
auto new_end = thrust::reduce_by_key(
thrust::cuda::par(alloc), key_it, key_it + in_cut_values.size(), val_it, d_max_keys.begin(),
d_max_values.begin(), thrust::equal_to<bst_feature_t>{},
[] __device__(auto l, auto r) { return l.value > r.value ? l : r; });
d_max_keys.erase(new_end.first, d_max_keys.end());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused by these two erases, what are they doing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shrink the two vectors to actual size. If we have missing columns, they won't be fully populated.

d_max_values.erase(new_end.second, d_max_values.end());

// The device vector needs to be initialized explicitly since we may have some missing columns.
SketchEntry default_entry{};
dh::caching_device_vector<SketchEntry> d_max_results(d_in_columns_ptr.size() - 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure the caching device vector does initialize the value? (call constructor)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, I think we ran into trouble with it before as commented in the XGBCcachingDeviceAllocatorImpl. But you are correct.

default_entry);
thrust::scatter(d_max_values.begin(), d_max_values.end(), d_max_keys.begin(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exec policy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

d_max_results.begin());
dh::CopyDeviceSpanToVector(&max_values, dh::ToSpan(d_max_results));
auto max_it = MakeIndexTransformIter([&](auto i) {
if (IsCat(h_feature_types, i)) {
return max_values[i].value;
Expand Down
8 changes: 4 additions & 4 deletions src/common/quantile.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ struct WQSummary {
/*! \brief an entry in the sketch summary */
struct Entry {
/*! \brief minimum rank */
RType rmin;
RType rmin{};
/*! \brief maximum rank */
RType rmax;
RType rmax{};
/*! \brief maximum weight */
RType wmin;
RType wmin{};
/*! \brief the value of data */
DType value;
DType value{};
// constructor
XGBOOST_DEVICE Entry() {} // NOLINT
// constructor
Expand Down
56 changes: 48 additions & 8 deletions tests/cpp/common/test_quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,31 @@ TEST(GPUQuantile, MultiMerge) {
});
}

TEST(GPUQuantile, MissingColumns) {
auto dmat = std::unique_ptr<DMatrix>{[=]() {
std::size_t constexpr kRows = 1000, kCols = 100;
auto sparsity = 0.5f;
std::vector<FeatureType> ft(kCols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
.Seed(0)
.Lower(.0f)
.Upper(1.0f)
.Type(ft)
.MaxCategory(13)
.GenerateDMatrix();
return dmat->SliceCol(2, 1);
}()};
dmat->Info().data_split_mode = DataSplitMode::kRow;

auto ctx = MakeCUDACtx(0);
std::size_t constexpr kBins = 64;
HistogramCuts cuts = common::DeviceSketch(&ctx, dmat.get(), kBins);
ASSERT_TRUE(cuts.HasCategorical());
}

namespace {
void TestAllReduceBasic() {
auto const world = collective::GetWorldSize();
Expand Down Expand Up @@ -422,18 +447,14 @@ TEST_F(MGPUQuantileTest, AllReduceBasic) {
}

namespace {
void TestColumnSplitBasic() {
void TestColumnSplit(DMatrix* dmat) {
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::size_t constexpr kRows = 1000, kCols = 100, kBins = 64;

auto m = std::unique_ptr<DMatrix>{[=]() {
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
return dmat->SliceCol(world, rank);
}()};
auto m = std::unique_ptr<DMatrix>{dmat->SliceCol(world, rank)};

// Generate cuts for distributed environment.
auto ctx = MakeCUDACtx(GPUIDX);
std::size_t constexpr kBins = 64;
HistogramCuts distributed_cuts = common::DeviceSketch(&ctx, m.get(), kBins);

// Generate cuts for single node environment
Expand Down Expand Up @@ -466,7 +487,26 @@ void TestColumnSplitBasic() {
} // anonymous namespace

TEST_F(MGPUQuantileTest, ColumnSplitBasic) {
DoTest(TestColumnSplitBasic);
std::size_t constexpr kRows = 1000, kCols = 100;
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
DoTest(TestColumnSplit, dmat.get());
}

TEST_F(MGPUQuantileTest, ColumnSplitCategorical) {
std::size_t constexpr kRows = 1000, kCols = 100;
auto sparsity = 0.5f;
std::vector<FeatureType> ft(kCols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 2 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
auto dmat = RandomDataGenerator{kRows, kCols, sparsity}
.Seed(0)
.Lower(.0f)
.Upper(1.0f)
.Type(ft)
.MaxCategory(13)
.GenerateDMatrix();
DoTest(TestColumnSplit, dmat.get());
}

namespace {
Expand Down
Loading