From 334fb394a2af725c2a97a91ea862f7df7c345ce6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 29 Mar 2023 10:11:17 -0700 Subject: [PATCH 1/6] add some tests for colsplit objectives --- src/objective/adaptive.cc | 4 +- src/objective/adaptive.cu | 4 +- src/objective/adaptive.h | 14 ++++-- src/objective/regression_obj.cu | 6 ++- tests/cpp/test_learner.cc | 75 ++++++++++++++++++++++----------- 5 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc index 4a67e848bb63..7187e47e7a4a 100644 --- a/src/objective/adaptive.cc +++ b/src/objective/adaptive.cc @@ -85,7 +85,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit size_t n_leaf = nidx.size(); if (nptr.empty()) { std::vector quantiles; - UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree); + UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree); return; } @@ -131,7 +131,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit quantiles.at(k) = q; }); - UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree); + UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree); } #if !defined(XGBOOST_USE_CUDA) diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu index 662b0330beb7..bba8b85ad837 100644 --- a/src/objective/adaptive.cu +++ b/src/objective/adaptive.cu @@ -151,7 +151,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span pos if (nptr.Empty()) { std::vector quantiles; - UpdateLeafValues(&quantiles, nidx.ConstHostVector(), learning_rate, p_tree); + UpdateLeafValues(&quantiles, nidx.ConstHostVector(), info, learning_rate, p_tree); } HostDeviceVector quantiles; @@ -186,7 +186,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span pos w_it + d_weights.size(), &quantiles); } - UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), learning_rate, p_tree); + UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), info, learning_rate, p_tree); } } // namespace detail } // namespace obj diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h index fef920ec9848..7494bceb1989 100644 --- a/src/objective/adaptive.h +++ b/src/objective/adaptive.h @@ -36,13 +36,15 @@ inline void FillMissingLeaf(std::vector const& maybe_missing, } inline void UpdateLeafValues(std::vector* p_quantiles, std::vector const& nidx, - float learning_rate, RegTree* p_tree) { + MetaInfo const& info, float learning_rate, RegTree* p_tree) { auto& tree = *p_tree; auto& quantiles = *p_quantiles; auto const& h_node_idx = nidx; size_t n_leaf{h_node_idx.size()}; - collective::Allreduce(&n_leaf, 1); + if (info.IsRowSplit()) { + collective::Allreduce(&n_leaf, 1); + } CHECK(quantiles.empty() || quantiles.size() == n_leaf); if (quantiles.empty()) { quantiles.resize(n_leaf, std::numeric_limits::quiet_NaN()); @@ -52,12 +54,16 @@ inline void UpdateLeafValues(std::vector* p_quantiles, std::vector n_valids(quantiles.size()); std::transform(quantiles.cbegin(), quantiles.cend(), n_valids.begin(), [](float q) { return static_cast(!std::isnan(q)); }); - collective::Allreduce(n_valids.data(), n_valids.size()); + if (info.IsRowSplit()) { + collective::Allreduce(n_valids.data(), n_valids.size()); + } // convert to 0 for all reduce std::replace_if( quantiles.begin(), quantiles.end(), [](float q) { return std::isnan(q); }, 0.f); // use the mean value - collective::Allreduce(quantiles.data(), quantiles.size()); + if (info.IsRowSplit()) { + collective::Allreduce(quantiles.data(), quantiles.size()); + } for (size_t i = 0; i < n_leaf; ++i) { if (n_valids[i] > 0) { quantiles[i] /= static_cast(n_valids[i]); diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index d7999f8c129b..e0dbb2edc817 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -728,8 +728,10 @@ class MeanAbsoluteError : public ObjFunction { std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out), [w](float v) { return v * w; }); - collective::Allreduce(out.Values().data(), out.Values().size()); - collective::Allreduce(&w, 1); + if (info.IsRowSplit()) { + collective::Allreduce(out.Values().data(), out.Values().size()); + collective::Allreduce(&w, 1); + } if (common::CloseTo(w, 0.0)) { // Mostly for handling empty dataset test. diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index e4313125d3c4..10566589b8e0 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -608,31 +608,56 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); } TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); } -void TestColumnSplitBaseScore(std::shared_ptr Xy_, float expected_base_score) { - auto const world_size = collective::GetWorldSize(); - auto const rank = collective::GetRank(); - std::shared_ptr sliced{Xy_->SliceCol(world_size, rank)}; - std::unique_ptr learner{Learner::Create({sliced})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); - learner->UpdateOneIter(0, sliced); - Json config{Object{}}; - learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); - ASSERT_EQ(base_score, expected_base_score); -} +class ColumnSplit : public ::testing::Test { + protected: + static void TestColumnSplit(std::shared_ptr dmat, std::string const& objective, + float expected_base_score, Json const& expected_model) { + auto const world_size = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; + std::unique_ptr learner{Learner::Create({sliced})}; + learner->SetParam("tree_method", "approx"); + learner->SetParam("objective", objective); + learner->UpdateOneIter(0, sliced); + Json config{Object{}}; + learner->SaveConfig(&config); + auto base_score = GetBaseScore(config); + ASSERT_EQ(base_score, expected_base_score); -TEST_F(InitBaseScore, ColumnSplit) { - std::unique_ptr learner{Learner::Create({Xy_})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); - learner->UpdateOneIter(0, Xy_); - Json config{Object{}}; - learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); - ASSERT_NE(base_score, ObjFunction::DefaultBaseScore()); + Json model{Object{}}; + learner->SaveModel(&model); + ASSERT_EQ(model, expected_model); + } - auto constexpr kWorldSize{3}; - RunWithInMemoryCommunicator(kWorldSize, &TestColumnSplitBaseScore, Xy_, base_score); -} + void TestBaseScoreAndModel(std::string const& objective) { + std::shared_ptr dmat{RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true)}; + std::unique_ptr learner{Learner::Create({dmat})}; + learner->SetParam("tree_method", "approx"); + learner->SetParam("objective", objective); + learner->UpdateOneIter(0, dmat); + + Json config{Object{}}; + learner->SaveConfig(&config); + auto base_score = GetBaseScore(config); + ASSERT_NE(base_score, ObjFunction::DefaultBaseScore()); + + Json model{Object{}}; + learner->SaveModel(&model); + + auto constexpr kWorldSize{3}; + RunWithInMemoryCommunicator(kWorldSize, &TestColumnSplit, dmat, objective, base_score, model); + } +}; + +TEST_F(ColumnSplit, RegSquaredError) { this->TestBaseScoreAndModel("reg:squarederror"); } + +TEST_F(ColumnSplit, RegSquaredLogError) { this->TestBaseScoreAndModel("reg:squaredlogerror"); } + +TEST_F(ColumnSplit, RegLogistic) { this->TestBaseScoreAndModel("reg:logistic"); } + +TEST_F(ColumnSplit, RegPseudoHuberError) { this->TestBaseScoreAndModel("reg:pseudohubererror"); } + +TEST_F(ColumnSplit, RegAsoluteError) { this->TestBaseScoreAndModel("reg:absoluteerror"); } + +TEST_F(ColumnSplit, BinaryLogistic) { this->TestBaseScoreAndModel("binary:logistic"); } } // namespace xgboost From a1f84d114793ff81da60c02b28aa9a42d0ba4437 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 29 Mar 2023 12:32:51 -0700 Subject: [PATCH 2/6] test all objectives for colsplit --- src/objective/quantile_obj.cu | 6 ++-- tests/cpp/test_learner.cc | 52 +++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index 0a40758bc86d..92c08840636d 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -167,8 +167,10 @@ class QuantileRegression : public ObjFunction { common::Mean(ctx_, *base_score, &temp); double meanq = temp(0) * sw; - collective::Allreduce(&meanq, 1); - collective::Allreduce(&sw, 1); + if (info.IsRowSplit()) { + collective::Allreduce(&meanq, 1); + collective::Allreduce(&sw, 1); + } meanq /= (sw + kRtEps); base_score->Reshape(1); base_score->Data()->Fill(meanq); diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 10566589b8e0..0b4945c3c65a 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -618,6 +618,12 @@ class ColumnSplit : public ::testing::Test { std::unique_ptr learner{Learner::Create({sliced})}; learner->SetParam("tree_method", "approx"); learner->SetParam("objective", objective); + if (objective.find("quantile") != std::string::npos) { + learner->SetParam("quantile_alpha", "0.5"); + } + if (objective.find("multi") != std::string::npos) { + learner->SetParam("num_class", "3"); + } learner->UpdateOneIter(0, sliced); Json config{Object{}}; learner->SaveConfig(&config); @@ -630,16 +636,32 @@ class ColumnSplit : public ::testing::Test { } void TestBaseScoreAndModel(std::string const& objective) { - std::shared_ptr dmat{RandomDataGenerator{10, 10, 0}.GenerateDMatrix(true)}; + auto constexpr kRows = 10, kCols = 10; + std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; + + auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); + auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); + h_lower.resize(kRows); + h_upper.resize(kRows); + for (size_t i = 0; i < kRows; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } + std::unique_ptr learner{Learner::Create({dmat})}; learner->SetParam("tree_method", "approx"); learner->SetParam("objective", objective); + if (objective.find("quantile") != std::string::npos) { + learner->SetParam("quantile_alpha", "0.5"); + } + if (objective.find("multi") != std::string::npos) { + learner->SetParam("num_class", "3"); + } learner->UpdateOneIter(0, dmat); Json config{Object{}}; learner->SaveConfig(&config); auto base_score = GetBaseScore(config); - ASSERT_NE(base_score, ObjFunction::DefaultBaseScore()); Json model{Object{}}; learner->SaveModel(&model); @@ -659,5 +681,31 @@ TEST_F(ColumnSplit, RegPseudoHuberError) { this->TestBaseScoreAndModel("reg:pseu TEST_F(ColumnSplit, RegAsoluteError) { this->TestBaseScoreAndModel("reg:absoluteerror"); } +TEST_F(ColumnSplit, RegQuantileError) { this->TestBaseScoreAndModel("reg:quantileerror"); } + TEST_F(ColumnSplit, BinaryLogistic) { this->TestBaseScoreAndModel("binary:logistic"); } + +TEST_F(ColumnSplit, BinaryLogitRaw) { this->TestBaseScoreAndModel("binary:logitraw"); } + +TEST_F(ColumnSplit, BinaryHinge) { this->TestBaseScoreAndModel("binary:hinge"); } + +TEST_F(ColumnSplit, CountPoisson) { this->TestBaseScoreAndModel("count:poisson"); } + +TEST_F(ColumnSplit, SurvivalCox) { this->TestBaseScoreAndModel("survival:cox"); } + +TEST_F(ColumnSplit, SurvivalAft) { this->TestBaseScoreAndModel("survival:aft"); } + +TEST_F(ColumnSplit, MultiSoftmax) { this->TestBaseScoreAndModel("multi:softmax"); } + +TEST_F(ColumnSplit, MultiSoftprob) { this->TestBaseScoreAndModel("multi:softprob"); } + +TEST_F(ColumnSplit, RankPairwise) { this->TestBaseScoreAndModel("rank:pairwise"); } + +TEST_F(ColumnSplit, RankNdcg) { this->TestBaseScoreAndModel("rank:ndcg"); } + +TEST_F(ColumnSplit, RankMap) { this->TestBaseScoreAndModel("rank:map"); } + +TEST_F(ColumnSplit, RegGamma) { this->TestBaseScoreAndModel("reg:gamma"); } + +TEST_F(ColumnSplit, RegTweedie) { this->TestBaseScoreAndModel("reg:tweedie"); } } // namespace xgboost From 332a2cbc9950175abb1c038f7acb6a00c867a26a Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 30 Mar 2023 11:57:18 -0700 Subject: [PATCH 3/6] test objectives for vertical federated learning --- src/objective/adaptive.h | 35 ++++- src/objective/quantile_obj.cu | 2 +- tests/cpp/plugin/test_federated_learner.cc | 141 ++++++++++++++------- 3 files changed, 131 insertions(+), 47 deletions(-) diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h index 7494bceb1989..f6eecb85076b 100644 --- a/src/objective/adaptive.h +++ b/src/objective/adaptive.h @@ -103,8 +103,39 @@ inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector cons std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { if (ctx->IsCPU()) { - detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, - predt, alpha, p_tree); + // When doing vertical federated learning, we assume only worker 0 has access to the labels, + // so update the leaf values there and broadcast them to other workers. + if (info.IsVerticalFederated()) { + if (collective::GetRank() == 0) { + detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, + predt, alpha, p_tree); + std::vector leaf_values(p_tree->GetNumLeaves()); + auto i = 0; + for (auto const& node : p_tree->GetNodes()) { + if (node.IsLeaf()) { + leaf_values[i] = node.LeafValue(); + i++; + } + } + collective::Broadcast(static_cast(leaf_values.data()), + leaf_values.size() * sizeof(bst_float), 0); + } else { + std::vector leaf_values(p_tree->GetNumLeaves()); + collective::Broadcast(static_cast(leaf_values.data()), + leaf_values.size() * sizeof(bst_float), 0); + auto i = 0; + auto& tree = *p_tree; + for (auto nid = 0; nid < tree.NumNodes(); nid++) { + if (tree[nid].IsLeaf()) { + tree[nid].SetLeaf(leaf_values[i]); + i++; + } + } + } + } else { + detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, + predt, alpha, p_tree); + } } else { position.SetDevice(ctx->gpu_id); detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate, diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index 92c08840636d..cfd2f62a8a85 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -35,7 +35,7 @@ class QuantileRegression : public ObjFunction { bst_target_t Targets(MetaInfo const& info) const override { auto const& alpha = param_.quantile_alpha.Get(); CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured."; - CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; + CHECK_LE(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; CHECK(!alpha.empty()); // We have some placeholders for multi-target in the quantile loss. But it's not // supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim diff --git a/tests/cpp/plugin/test_federated_learner.cc b/tests/cpp/plugin/test_federated_learner.cc index 67e322323052..40accb512d67 100644 --- a/tests/cpp/plugin/test_federated_learner.cc +++ b/tests/cpp/plugin/test_federated_learner.cc @@ -13,66 +13,119 @@ namespace xgboost { -class FederatedLearnerTest : public BaseFederatedTest { - protected: - static auto constexpr kRows{16}; - static auto constexpr kCols{16}; -}; - -void VerifyBaseScore(size_t rows, size_t cols, float expected_base_score) { +void VerifyBaseScoreAndModel(size_t rows, size_t cols, std::string const& objective, + float expected_base_score, Json const& expected_model) { auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); - std::shared_ptr Xy_{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)}; - std::shared_ptr sliced{Xy_->SliceCol(world_size, rank)}; + std::shared_ptr dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)}; + + if (rank == 0) { + auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); + auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); + h_lower.resize(rows); + h_upper.resize(rows); + for (size_t i = 0; i < rows; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } + } + + std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; std::unique_ptr learner{Learner::Create({sliced})}; learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); + learner->SetParam("objective", objective); + if (objective.find("quantile") != std::string::npos) { + learner->SetParam("quantile_alpha", "0.5"); + } + if (objective.find("multi") != std::string::npos) { + learner->SetParam("num_class", "3"); + } learner->UpdateOneIter(0, sliced); + Json config{Object{}}; learner->SaveConfig(&config); auto base_score = GetBaseScore(config); ASSERT_EQ(base_score, expected_base_score); -} -void VerifyModel(size_t rows, size_t cols, Json const& expected_model) { - auto const world_size = collective::GetWorldSize(); - auto const rank = collective::GetRank(); - std::shared_ptr Xy_{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)}; - std::shared_ptr sliced{Xy_->SliceCol(world_size, rank)}; - std::unique_ptr learner{Learner::Create({sliced})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); - learner->UpdateOneIter(0, sliced); Json model{Object{}}; learner->SaveModel(&model); ASSERT_EQ(model, expected_model); } -TEST_F(FederatedLearnerTest, BaseScore) { - std::shared_ptr Xy_{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; - std::unique_ptr learner{Learner::Create({Xy_})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); - learner->UpdateOneIter(0, Xy_); - Json config{Object{}}; - learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); - ASSERT_NE(base_score, ObjFunction::DefaultBaseScore()); +class FederatedLearnerTest : public BaseFederatedTest { + protected: + static auto constexpr kRows{16}; + static auto constexpr kCols{16}; - RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyBaseScore, kRows, kCols, - base_score); -} + void TestObjective(std::string const& objective) { + std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; -TEST_F(FederatedLearnerTest, Model) { - std::shared_ptr Xy_{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; - std::unique_ptr learner{Learner::Create({Xy_})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", "binary:logistic"); - learner->UpdateOneIter(0, Xy_); - Json model{Object{}}; - learner->SaveModel(&model); + auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); + auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); + h_lower.resize(kRows); + h_upper.resize(kRows); + for (size_t i = 0; i < kRows; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } - RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyModel, kRows, kCols, - std::cref(model)); -} + std::unique_ptr learner{Learner::Create({dmat})}; + learner->SetParam("tree_method", "approx"); + learner->SetParam("objective", objective); + if (objective.find("quantile") != std::string::npos) { + learner->SetParam("quantile_alpha", "0.5"); + } + if (objective.find("multi") != std::string::npos) { + learner->SetParam("num_class", "3"); + } + learner->UpdateOneIter(0, dmat); + Json config{Object{}}; + learner->SaveConfig(&config); + auto base_score = GetBaseScore(config); + + Json model{Object{}}; + learner->SaveModel(&model); + + RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyBaseScoreAndModel, kRows, + kCols, objective, base_score, model); + } +}; + +TEST_F(FederatedLearnerTest, RegSquaredError) { TestObjective("reg:squarederror"); } + +TEST_F(FederatedLearnerTest, RegSquaredLogError) { TestObjective("reg:squaredlogerror"); } + +TEST_F(FederatedLearnerTest, RegLogistic) { TestObjective("reg:logistic"); } + +TEST_F(FederatedLearnerTest, RegPseudoHuberError) { TestObjective("reg:pseudohubererror"); } + +TEST_F(FederatedLearnerTest, RegAsoluteError) { TestObjective("reg:absoluteerror"); } + +TEST_F(FederatedLearnerTest, RegQuantileError) { TestObjective("reg:quantileerror"); } + +TEST_F(FederatedLearnerTest, BinaryLogistic) { TestObjective("binary:logistic"); } + +TEST_F(FederatedLearnerTest, BinaryLogitRaw) { TestObjective("binary:logitraw"); } + +TEST_F(FederatedLearnerTest, BinaryHinge) { TestObjective("binary:hinge"); } + +TEST_F(FederatedLearnerTest, CountPoisson) { TestObjective("count:poisson"); } + +TEST_F(FederatedLearnerTest, SurvivalCox) { TestObjective("survival:cox"); } + +TEST_F(FederatedLearnerTest, SurvivalAft) { TestObjective("survival:aft"); } + +TEST_F(FederatedLearnerTest, MultiSoftmax) { TestObjective("multi:softmax"); } + +TEST_F(FederatedLearnerTest, MultiSoftprob) { TestObjective("multi:softprob"); } + +TEST_F(FederatedLearnerTest, RankPairwise) { TestObjective("rank:pairwise"); } + +TEST_F(FederatedLearnerTest, RankNdcg) { TestObjective("rank:ndcg"); } + +TEST_F(FederatedLearnerTest, RankMap) { TestObjective("rank:map"); } + +TEST_F(FederatedLearnerTest, RegGamma) { TestObjective("reg:gamma"); } + +TEST_F(FederatedLearnerTest, RegTweedie) { TestObjective("reg:tweedie"); } } // namespace xgboost From 9b547078c3c54801f59f5a9b0c82db03c5a9d13f Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 31 Mar 2023 09:57:17 -0700 Subject: [PATCH 4/6] more strict label shapes --- src/objective/quantile_obj.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index cfd2f62a8a85..7dcc85ac0f22 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -35,7 +35,9 @@ class QuantileRegression : public ObjFunction { bst_target_t Targets(MetaInfo const& info) const override { auto const& alpha = param_.quantile_alpha.Get(); CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured."; - CHECK_LE(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; + if (!info.IsVerticalFederated() || collective::GetRank() == 0) { + CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; + } CHECK(!alpha.empty()); // We have some placeholders for multi-target in the quantile loss. But it's not // supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim From a7d2622a89c835074002d4540d652b1cdb213ff3 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 31 Mar 2023 10:09:23 -0700 Subject: [PATCH 5/6] fix long line --- src/objective/quantile_obj.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu index 7dcc85ac0f22..b6e540b2401e 100644 --- a/src/objective/quantile_obj.cu +++ b/src/objective/quantile_obj.cu @@ -36,7 +36,8 @@ class QuantileRegression : public ObjFunction { auto const& alpha = param_.quantile_alpha.Get(); CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured."; if (!info.IsVerticalFederated() || collective::GetRank() == 0) { - CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss."; + CHECK_EQ(info.labels.Shape(1), 1) + << "Multi-target is not yet supported by the quantile loss."; } CHECK(!alpha.empty()); // We have some placeholders for multi-target in the quantile loss. But it's not From fc53748d1230dabf624b49736e6e80b157ddeae8 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 31 Mar 2023 18:11:22 -0700 Subject: [PATCH 6/6] address review feedback --- src/objective/adaptive.cc | 65 ++++++----- src/objective/adaptive.h | 35 +----- tests/cpp/plugin/test_federated_learner.cc | 122 ++++++++------------- tests/cpp/test_learner.cc | 106 +++++++----------- 4 files changed, 123 insertions(+), 205 deletions(-) diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc index 7187e47e7a4a..32fda9ef17b2 100644 --- a/src/objective/adaptive.cc +++ b/src/objective/adaptive.cc @@ -99,37 +99,44 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector const& posit auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_, predt.Size() / info.num_row_); - // loop over each leaf - common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { - auto nidx = h_node_idx[k]; - CHECK(tree[nidx].IsLeaf()); - CHECK_LT(k + 1, h_node_ptr.size()); - size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; - auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); - - auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); - auto h_weights = linalg::MakeVec(&info.weights_); - - auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { - auto row_idx = h_row_set[i]; - return h_labels(row_idx) - h_predt(row_idx, group_idx); - }); - auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float { - auto row_idx = h_row_set[i]; - return h_weights(row_idx); + if (!info.IsVerticalFederated() || collective::GetRank() == 0) { + // loop over each leaf + common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) { + auto nidx = h_node_idx[k]; + CHECK(tree[nidx].IsLeaf()); + CHECK_LT(k + 1, h_node_ptr.size()); + size_t n = h_node_ptr[k + 1] - h_node_ptr[k]; + auto h_row_set = common::Span{ridx}.subspan(h_node_ptr[k], n); + + auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx)); + auto h_weights = linalg::MakeVec(&info.weights_); + + auto iter = common::MakeIndexTransformIter([&](size_t i) -> float { + auto row_idx = h_row_set[i]; + return h_labels(row_idx) - h_predt(row_idx, group_idx); + }); + auto w_it = common::MakeIndexTransformIter([&](size_t i) -> float { + auto row_idx = h_row_set[i]; + return h_weights(row_idx); + }); + + float q{0}; + if (info.weights_.Empty()) { + q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); + } else { + q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); + } + if (std::isnan(q)) { + CHECK(h_row_set.empty()); + } + quantiles.at(k) = q; }); + } - float q{0}; - if (info.weights_.Empty()) { - q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size()); - } else { - q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it); - } - if (std::isnan(q)) { - CHECK(h_row_set.empty()); - } - quantiles.at(k) = q; - }); + if (info.IsVerticalFederated()) { + collective::Broadcast(static_cast(quantiles.data()), quantiles.size() * sizeof(float), + 0); + } UpdateLeafValues(&quantiles, nidx, info, learning_rate, p_tree); } diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h index f6eecb85076b..7494bceb1989 100644 --- a/src/objective/adaptive.h +++ b/src/objective/adaptive.h @@ -103,39 +103,8 @@ inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector cons std::int32_t group_idx, MetaInfo const& info, float learning_rate, HostDeviceVector const& predt, float alpha, RegTree* p_tree) { if (ctx->IsCPU()) { - // When doing vertical federated learning, we assume only worker 0 has access to the labels, - // so update the leaf values there and broadcast them to other workers. - if (info.IsVerticalFederated()) { - if (collective::GetRank() == 0) { - detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, - predt, alpha, p_tree); - std::vector leaf_values(p_tree->GetNumLeaves()); - auto i = 0; - for (auto const& node : p_tree->GetNodes()) { - if (node.IsLeaf()) { - leaf_values[i] = node.LeafValue(); - i++; - } - } - collective::Broadcast(static_cast(leaf_values.data()), - leaf_values.size() * sizeof(bst_float), 0); - } else { - std::vector leaf_values(p_tree->GetNumLeaves()); - collective::Broadcast(static_cast(leaf_values.data()), - leaf_values.size() * sizeof(bst_float), 0); - auto i = 0; - auto& tree = *p_tree; - for (auto nid = 0; nid < tree.NumNodes(); nid++) { - if (tree[nid].IsLeaf()) { - tree[nid].SetLeaf(leaf_values[i]); - i++; - } - } - } - } else { - detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, - predt, alpha, p_tree); - } + detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate, + predt, alpha, p_tree); } else { position.SetDevice(ctx->gpu_id); detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate, diff --git a/tests/cpp/plugin/test_federated_learner.cc b/tests/cpp/plugin/test_federated_learner.cc index 40accb512d67..fe7fe6854c1a 100644 --- a/tests/cpp/plugin/test_federated_learner.cc +++ b/tests/cpp/plugin/test_federated_learner.cc @@ -13,8 +13,8 @@ namespace xgboost { -void VerifyBaseScoreAndModel(size_t rows, size_t cols, std::string const& objective, - float expected_base_score, Json const& expected_model) { +void VerifyObjectives(size_t rows, size_t cols, std::vector const &expected_base_scores, + std::vector const &expected_models) { auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); std::shared_ptr dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)}; @@ -29,103 +29,75 @@ void VerifyBaseScoreAndModel(size_t rows, size_t cols, std::string const& object h_upper[i] = 10; } } - std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; - std::unique_ptr learner{Learner::Create({sliced})}; - learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", objective); - if (objective.find("quantile") != std::string::npos) { - learner->SetParam("quantile_alpha", "0.5"); - } - if (objective.find("multi") != std::string::npos) { - learner->SetParam("num_class", "3"); - } - learner->UpdateOneIter(0, sliced); - Json config{Object{}}; - learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); - ASSERT_EQ(base_score, expected_base_score); + auto i = 0; + for (auto const *entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { + std::unique_ptr learner{Learner::Create({sliced})}; + learner->SetParam("tree_method", "approx"); + learner->SetParam("objective", entry->name); + if (entry->name.find("quantile") != std::string::npos) { + learner->SetParam("quantile_alpha", "0.5"); + } + if (entry->name.find("multi") != std::string::npos) { + learner->SetParam("num_class", "3"); + } + learner->UpdateOneIter(0, sliced); + + Json config{Object{}}; + learner->SaveConfig(&config); + auto base_score = GetBaseScore(config); + ASSERT_EQ(base_score, expected_base_scores[i]); - Json model{Object{}}; - learner->SaveModel(&model); - ASSERT_EQ(model, expected_model); + Json model{Object{}}; + learner->SaveModel(&model); + ASSERT_EQ(model, expected_models[i]); + + i++; + } } class FederatedLearnerTest : public BaseFederatedTest { protected: static auto constexpr kRows{16}; static auto constexpr kCols{16}; +}; - void TestObjective(std::string const& objective) { - std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; +TEST_F(FederatedLearnerTest, Objectives) { + std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; - auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); - auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); - h_lower.resize(kRows); - h_upper.resize(kRows); - for (size_t i = 0; i < kRows; ++i) { - h_lower[i] = 1; - h_upper[i] = 10; - } + auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); + auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); + h_lower.resize(kRows); + h_upper.resize(kRows); + for (size_t i = 0; i < kRows; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } + std::vector base_scores; + std::vector models; + for (auto const *entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { std::unique_ptr learner{Learner::Create({dmat})}; learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", objective); - if (objective.find("quantile") != std::string::npos) { + learner->SetParam("objective", entry->name); + if (entry->name.find("quantile") != std::string::npos) { learner->SetParam("quantile_alpha", "0.5"); } - if (objective.find("multi") != std::string::npos) { + if (entry->name.find("multi") != std::string::npos) { learner->SetParam("num_class", "3"); } learner->UpdateOneIter(0, dmat); Json config{Object{}}; learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); + base_scores.emplace_back(GetBaseScore(config)); Json model{Object{}}; learner->SaveModel(&model); - - RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyBaseScoreAndModel, kRows, - kCols, objective, base_score, model); + models.emplace_back(model); } -}; - -TEST_F(FederatedLearnerTest, RegSquaredError) { TestObjective("reg:squarederror"); } - -TEST_F(FederatedLearnerTest, RegSquaredLogError) { TestObjective("reg:squaredlogerror"); } - -TEST_F(FederatedLearnerTest, RegLogistic) { TestObjective("reg:logistic"); } - -TEST_F(FederatedLearnerTest, RegPseudoHuberError) { TestObjective("reg:pseudohubererror"); } - -TEST_F(FederatedLearnerTest, RegAsoluteError) { TestObjective("reg:absoluteerror"); } - -TEST_F(FederatedLearnerTest, RegQuantileError) { TestObjective("reg:quantileerror"); } - -TEST_F(FederatedLearnerTest, BinaryLogistic) { TestObjective("binary:logistic"); } - -TEST_F(FederatedLearnerTest, BinaryLogitRaw) { TestObjective("binary:logitraw"); } -TEST_F(FederatedLearnerTest, BinaryHinge) { TestObjective("binary:hinge"); } - -TEST_F(FederatedLearnerTest, CountPoisson) { TestObjective("count:poisson"); } - -TEST_F(FederatedLearnerTest, SurvivalCox) { TestObjective("survival:cox"); } - -TEST_F(FederatedLearnerTest, SurvivalAft) { TestObjective("survival:aft"); } - -TEST_F(FederatedLearnerTest, MultiSoftmax) { TestObjective("multi:softmax"); } - -TEST_F(FederatedLearnerTest, MultiSoftprob) { TestObjective("multi:softprob"); } - -TEST_F(FederatedLearnerTest, RankPairwise) { TestObjective("rank:pairwise"); } - -TEST_F(FederatedLearnerTest, RankNdcg) { TestObjective("rank:ndcg"); } - -TEST_F(FederatedLearnerTest, RankMap) { TestObjective("rank:map"); } - -TEST_F(FederatedLearnerTest, RegGamma) { TestObjective("reg:gamma"); } - -TEST_F(FederatedLearnerTest, RegTweedie) { TestObjective("reg:tweedie"); } + RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyObjectives, kRows, kCols, + base_scores, models); +} } // namespace xgboost diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 0b4945c3c65a..537820e40c7e 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -608,104 +608,74 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); } TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); } -class ColumnSplit : public ::testing::Test { - protected: - static void TestColumnSplit(std::shared_ptr dmat, std::string const& objective, - float expected_base_score, Json const& expected_model) { - auto const world_size = collective::GetWorldSize(); - auto const rank = collective::GetRank(); - std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; +void TestColumnSplit(std::shared_ptr dmat, std::vector const& expected_base_scores, + std::vector const& expected_models) { + auto const world_size = collective::GetWorldSize(); + auto const rank = collective::GetRank(); + std::shared_ptr sliced{dmat->SliceCol(world_size, rank)}; + + auto i = 0; + for (auto const* entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { std::unique_ptr learner{Learner::Create({sliced})}; learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", objective); - if (objective.find("quantile") != std::string::npos) { + learner->SetParam("objective", entry->name); + if (entry->name.find("quantile") != std::string::npos) { learner->SetParam("quantile_alpha", "0.5"); } - if (objective.find("multi") != std::string::npos) { + if (entry->name.find("multi") != std::string::npos) { learner->SetParam("num_class", "3"); } learner->UpdateOneIter(0, sliced); Json config{Object{}}; learner->SaveConfig(&config); auto base_score = GetBaseScore(config); - ASSERT_EQ(base_score, expected_base_score); + ASSERT_EQ(base_score, expected_base_scores[i]); Json model{Object{}}; learner->SaveModel(&model); - ASSERT_EQ(model, expected_model); + ASSERT_EQ(model, expected_models[i]); + + i++; } +} - void TestBaseScoreAndModel(std::string const& objective) { - auto constexpr kRows = 10, kCols = 10; - std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; +TEST(ColumnSplit, Objectives) { + auto constexpr kRows = 10, kCols = 10; + std::shared_ptr dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)}; - auto &h_upper = dmat->Info().labels_upper_bound_.HostVector(); - auto &h_lower = dmat->Info().labels_lower_bound_.HostVector(); - h_lower.resize(kRows); - h_upper.resize(kRows); - for (size_t i = 0; i < kRows; ++i) { - h_lower[i] = 1; - h_upper[i] = 10; - } + auto& h_upper = dmat->Info().labels_upper_bound_.HostVector(); + auto& h_lower = dmat->Info().labels_lower_bound_.HostVector(); + h_lower.resize(kRows); + h_upper.resize(kRows); + for (size_t i = 0; i < kRows; ++i) { + h_lower[i] = 1; + h_upper[i] = 10; + } + std::vector base_scores; + std::vector models; + for (auto const* entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) { std::unique_ptr learner{Learner::Create({dmat})}; learner->SetParam("tree_method", "approx"); - learner->SetParam("objective", objective); - if (objective.find("quantile") != std::string::npos) { + learner->SetParam("objective", entry->name); + if (entry->name.find("quantile") != std::string::npos) { learner->SetParam("quantile_alpha", "0.5"); } - if (objective.find("multi") != std::string::npos) { + if (entry->name.find("multi") != std::string::npos) { learner->SetParam("num_class", "3"); } learner->UpdateOneIter(0, dmat); Json config{Object{}}; learner->SaveConfig(&config); - auto base_score = GetBaseScore(config); + base_scores.emplace_back(GetBaseScore(config)); Json model{Object{}}; learner->SaveModel(&model); - - auto constexpr kWorldSize{3}; - RunWithInMemoryCommunicator(kWorldSize, &TestColumnSplit, dmat, objective, base_score, model); + models.emplace_back(model); } -}; - -TEST_F(ColumnSplit, RegSquaredError) { this->TestBaseScoreAndModel("reg:squarederror"); } - -TEST_F(ColumnSplit, RegSquaredLogError) { this->TestBaseScoreAndModel("reg:squaredlogerror"); } - -TEST_F(ColumnSplit, RegLogistic) { this->TestBaseScoreAndModel("reg:logistic"); } - -TEST_F(ColumnSplit, RegPseudoHuberError) { this->TestBaseScoreAndModel("reg:pseudohubererror"); } - -TEST_F(ColumnSplit, RegAsoluteError) { this->TestBaseScoreAndModel("reg:absoluteerror"); } - -TEST_F(ColumnSplit, RegQuantileError) { this->TestBaseScoreAndModel("reg:quantileerror"); } -TEST_F(ColumnSplit, BinaryLogistic) { this->TestBaseScoreAndModel("binary:logistic"); } - -TEST_F(ColumnSplit, BinaryLogitRaw) { this->TestBaseScoreAndModel("binary:logitraw"); } - -TEST_F(ColumnSplit, BinaryHinge) { this->TestBaseScoreAndModel("binary:hinge"); } - -TEST_F(ColumnSplit, CountPoisson) { this->TestBaseScoreAndModel("count:poisson"); } - -TEST_F(ColumnSplit, SurvivalCox) { this->TestBaseScoreAndModel("survival:cox"); } - -TEST_F(ColumnSplit, SurvivalAft) { this->TestBaseScoreAndModel("survival:aft"); } - -TEST_F(ColumnSplit, MultiSoftmax) { this->TestBaseScoreAndModel("multi:softmax"); } - -TEST_F(ColumnSplit, MultiSoftprob) { this->TestBaseScoreAndModel("multi:softprob"); } - -TEST_F(ColumnSplit, RankPairwise) { this->TestBaseScoreAndModel("rank:pairwise"); } - -TEST_F(ColumnSplit, RankNdcg) { this->TestBaseScoreAndModel("rank:ndcg"); } - -TEST_F(ColumnSplit, RankMap) { this->TestBaseScoreAndModel("rank:map"); } - -TEST_F(ColumnSplit, RegGamma) { this->TestBaseScoreAndModel("reg:gamma"); } - -TEST_F(ColumnSplit, RegTweedie) { this->TestBaseScoreAndModel("reg:tweedie"); } + auto constexpr kWorldSize{3}; + RunWithInMemoryCommunicator(kWorldSize, &TestColumnSplit, dmat, base_scores, models); +} } // namespace xgboost