diff --git a/include/LightGBM/cuda/cuda_tree.hpp b/include/LightGBM/cuda/cuda_tree.hpp index 9d89dc3b7465..d557798270e0 100644 --- a/include/LightGBM/cuda/cuda_tree.hpp +++ b/include/LightGBM/cuda/cuda_tree.hpp @@ -77,6 +77,8 @@ class CUDATree : public Tree { const data_size_t* used_data_indices, data_size_t num_data, double* score) const override; + inline void AsConstantTree(double val) override; + const int* cuda_leaf_parent() const { return cuda_leaf_parent_; } const int* cuda_left_child() const { return cuda_left_child_; } diff --git a/include/LightGBM/cuda/cuda_utils.h b/include/LightGBM/cuda/cuda_utils.h index 797e9f1b44d5..c74016a72ea1 100644 --- a/include/LightGBM/cuda/cuda_utils.h +++ b/include/LightGBM/cuda/cuda_utils.h @@ -119,6 +119,9 @@ class CUDAVector { } void Resize(size_t size) { + if (size == size_) { + return; + } if (size == 0) { Clear(); } diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h index 0d28bc57eb4a..376a6f1a071d 100644 --- a/include/LightGBM/objective_function.h +++ b/include/LightGBM/objective_function.h @@ -101,9 +101,12 @@ class ObjectiveFunction { /*! * \brief Convert output for CUDA version */ - const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const { + virtual const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const { return input; } + + virtual bool NeedConvertOutputCUDA () const { return false; } + #endif // USE_CUDA_EXP }; diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h index 6ff0370e2ea6..3e403b16e89b 100644 --- a/include/LightGBM/tree.h +++ b/include/LightGBM/tree.h @@ -228,7 +228,7 @@ class Tree { shrinkage_ = 1.0f; } - inline void AsConstantTree(double val) { + virtual inline void AsConstantTree(double val) { num_leaves_ = 1; shrinkage_ = 1.0f; leaf_value_[0] = val; diff --git a/src/io/cuda/cuda_tree.cpp b/src/io/cuda/cuda_tree.cpp index b7ecee6e6167..196563340ae5 100644 --- a/src/io/cuda/cuda_tree.cpp +++ b/src/io/cuda/cuda_tree.cpp @@ -330,6 +330,10 @@ void CUDATree::SyncLeafOutputFromCUDAToHost() { CopyFromCUDADeviceToHost(leaf_value_.data(), cuda_leaf_value_, leaf_value_.size(), __FILE__, __LINE__); } +void CUDATree::AsConstantTree(double val) { + Tree::AsConstantTree(val); + CopyFromHostToCUDADevice(cuda_leaf_value_, &val, 1, __FILE__, __LINE__); +} } // namespace LightGBM diff --git a/src/metric/cuda/cuda_regression_metric.cpp b/src/metric/cuda/cuda_regression_metric.cpp index 5d8ae39fd3e4..f8232b9d1f2e 100644 --- a/src/metric/cuda/cuda_regression_metric.cpp +++ b/src/metric/cuda/cuda_regression_metric.cpp @@ -31,13 +31,19 @@ void CUDARegressionMetricInterface::Init(const Metadat template std::vector CUDARegressionMetricInterface::Eval(const double* score, const ObjectiveFunction* objective) const { - const double* score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData()); + const double* score_convert = score; + if (objective != nullptr && objective->NeedConvertOutputCUDA()) { + score_convert_buffer_.Resize(static_cast(this->num_data_) * static_cast(this->num_class_)); + score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData()); + } const double eval_score = LaunchEvalKernel(score_convert); return std::vector{eval_score}; } CUDARMSEMetric::CUDARMSEMetric(const Config& config): CUDARegressionMetricInterface(config) {} +CUDAL2Metric::CUDAL2Metric(const Config& config): CUDARegressionMetricInterface(config) {} + } // namespace LightGBM #endif // USE_CUDA_EXP diff --git a/src/metric/cuda/cuda_regression_metric.cu b/src/metric/cuda/cuda_regression_metric.cu index 0442416459c5..e6f37d5cb131 100644 --- a/src/metric/cuda/cuda_regression_metric.cu +++ b/src/metric/cuda/cuda_regression_metric.cu @@ -19,16 +19,22 @@ __global__ void EvalKernel(const data_size_t num_data, const label_t* labels, co const data_size_t index = static_cast(threadIdx.x + blockIdx.x * blockDim.x); double point_metric = 0.0; if (index < num_data) { - point_metric = CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]); + point_metric = USE_WEIGHTS ? + CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]) * weights[index] : + CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]); } const double block_sum_point_metric = ShuffleReduceSum(point_metric, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD); - reduce_block_buffer[blockIdx.x] = block_sum_point_metric; + if (threadIdx.x == 0) { + reduce_block_buffer[blockIdx.x] = block_sum_point_metric; + } if (USE_WEIGHTS) { double weight = 0.0; if (index < num_data) { weight = static_cast(weights[index]); const double block_sum_weight = ShuffleReduceSum(weight, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD); - reduce_block_buffer[blockIdx.x + blockDim.x] = block_sum_weight; + if (threadIdx.x == 0) { + reduce_block_buffer[blockIdx.x + gridDim.x] = block_sum_weight; + } } } } @@ -55,6 +61,7 @@ double CUDARegressionMetricInterface::LaunchEvalKernel } template double CUDARegressionMetricInterface::LaunchEvalKernel(const double* score) const; +template double CUDARegressionMetricInterface::LaunchEvalKernel(const double* score) const; } // namespace LightGBM diff --git a/src/metric/cuda/cuda_regression_metric.hpp b/src/metric/cuda/cuda_regression_metric.hpp index fe49bd0d729d..aece087dc448 100644 --- a/src/metric/cuda/cuda_regression_metric.hpp +++ b/src/metric/cuda/cuda_regression_metric.hpp @@ -23,7 +23,7 @@ namespace LightGBM { template class CUDARegressionMetricInterface: public CUDAMetricInterface { public: - explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface(config) {} + explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface(config), num_class_(config.num_class) {} virtual ~CUDARegressionMetricInterface() {} @@ -34,9 +34,10 @@ class CUDARegressionMetricInterface: public CUDAMetricInterface { protected: double LaunchEvalKernel(const double* score_convert) const; - CUDAVector score_convert_buffer_; + mutable CUDAVector score_convert_buffer_; CUDAVector reduce_block_buffer_; CUDAVector reduce_block_buffer_inner_; + const int num_class_; }; class CUDARMSEMetric: public CUDARegressionMetricInterface { @@ -45,8 +46,19 @@ class CUDARMSEMetric: public CUDARegressionMetricInterface(label)); + __device__ inline static double MetricOnPointCUDA(label_t label, double score) { + return (score - label) * (score - label); + } +}; + +class CUDAL2Metric : public CUDARegressionMetricInterface { + public: + explicit CUDAL2Metric(const Config& config); + + virtual ~CUDAL2Metric() {} + + __device__ inline static double MetricOnPointCUDA(label_t label, double score) { + return (score - label) * (score - label); } }; diff --git a/src/metric/metric.cpp b/src/metric/metric.cpp index 32241656cc4c..9cbd72c76188 100644 --- a/src/metric/metric.cpp +++ b/src/metric/metric.cpp @@ -19,8 +19,7 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) { #ifdef USE_CUDA_EXP if (config.device_type == std::string("cuda_exp")) { if (type == std::string("l2")) { - Log::Warning("Metric l2 is not implemented in cuda_exp version. Fall back to evaluation on CPU."); - return new L2Metric(config); + return new CUDAL2Metric(config); } else if (type == std::string("rmse")) { return new CUDARMSEMetric(config); } else if (type == std::string("l1")) { diff --git a/src/objective/cuda/cuda_regression_objective.cu b/src/objective/cuda/cuda_regression_objective.cu index d231cb890624..99feec132508 100644 --- a/src/objective/cuda/cuda_regression_objective.cu +++ b/src/objective/cuda/cuda_regression_objective.cu @@ -70,8 +70,12 @@ __global__ void ConvertOutputCUDAKernel_Regression(const bool sqrt, const data_s const double* CUDARegressionL2loss::LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const { const int num_blocks = (num_data + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION; - ConvertOutputCUDAKernel_Regression<<>>(sqrt_, num_data, input, output); - return output; + if (sqrt_) { + ConvertOutputCUDAKernel_Regression<<>>(sqrt_, num_data, input, output); + return output; + } else { + return input; + } } template diff --git a/src/objective/cuda/cuda_regression_objective.hpp b/src/objective/cuda/cuda_regression_objective.hpp index 2e9a747e3a25..593fcf1cfcb6 100644 --- a/src/objective/cuda/cuda_regression_objective.hpp +++ b/src/objective/cuda/cuda_regression_objective.hpp @@ -50,6 +50,8 @@ class CUDARegressionL2loss : public CUDARegressionObjectiveInterface(config.poisson_max_delta_step); if (sqrt_) { Log::Warning("Cannot use sqrt transform in %s Regression, will auto disable it", GetName());