Skip to content

Commit

Permalink
decouple bagging with num_threads (#2804)
Browse files Browse the repository at this point in the history
* fix bagging

* fixed cpplint issues

* updated docs

Co-authored-by: Nikita Titov <[email protected]>
  • Loading branch information
guolinke and StrikerRUS authored Feb 26, 2020
1 parent 84fb5e5 commit d20ceac
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 232 deletions.
8 changes: 8 additions & 0 deletions docs/FAQ.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.. role:: raw-html(raw)
:format: html

LightGBM FAQ
############

Expand Down Expand Up @@ -82,8 +85,13 @@ You may also use the CPU version.
6. Bagging is not reproducible when changing the number of threads.
-------------------------------------------------------------------

:raw-html:`<strike>`
LightGBM bagging is multithreaded, so its output depends on the number of threads used.
There is `no workaround currently <https://github.com/microsoft/LightGBM/issues/632>`__.
:raw-html:`</strike>`

Starting from `#2804 <https://github.com/microsoft/LightGBM/pull/2804>`__ bagging result doesn't depend on the number of threads.
So this issue should be solved in the latest version.

7. I tried to use Random Forest mode, and LightGBM crashes!
-----------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ Dataset Parameters

- ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, aliases: ``data_seed``

- random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
- random seed for sampling data to construct histogram bins

- ``is_enable_sparse`` :raw-html:`<a id="is_enable_sparse" title="Permalink to this parameter" href="#is_enable_sparse">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool, aliases: ``is_sparse``, ``enable_sparse``, ``sparse``

Expand Down
2 changes: 1 addition & 1 deletion include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ struct Config {
int bin_construct_sample_cnt = 200000;

// alias = data_seed
// desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
// desc = random seed for sampling data to construct histogram bins
int data_random_seed = 1;

// alias = is_sparse, enable_sparse, sparse
Expand Down
137 changes: 134 additions & 3 deletions include/LightGBM/utils/threading.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_UTILS_THREADING_H_
#define LIGHTGBM_UTILS_THREADING_H_

#include <LightGBM/meta.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>

#include <algorithm>
#include <functional>
#include <vector>

Expand Down Expand Up @@ -37,6 +41,23 @@ class Threading {
*block_size = cnt;
}
}
template <typename INDEX_T>
static inline void BlockInfoForceSize(int num_threads, INDEX_T cnt,
INDEX_T min_cnt_per_block,
int* out_nblock, INDEX_T* block_size) {
*out_nblock = std::min<int>(
num_threads,
static_cast<int>((cnt + min_cnt_per_block - 1) / min_cnt_per_block));
if (*out_nblock > 1) {
*block_size = (cnt + (*out_nblock) - 1) / (*out_nblock);
// force the block size to the times of min_cnt_per_block
*block_size = (*block_size + min_cnt_per_block - 1) / min_cnt_per_block *
min_cnt_per_block;
} else {
*block_size = cnt;
}
}

template <typename INDEX_T>
static inline int For(
INDEX_T start, INDEX_T end, INDEX_T min_block_size,
Expand All @@ -58,6 +79,116 @@ class Threading {
}
};

} // namespace LightGBM
template <typename INDEX_T, bool TWO_BUFFER>
class ParallelPartitionRunner {
public:
ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size)
: min_block_size_(min_block_size) {
num_threads_ = 1;
#pragma omp parallel
#pragma omp master
{ num_threads_ = omp_get_num_threads(); }
left_.resize(num_data);
if (TWO_BUFFER) {
right_.resize(num_data);
}
offsets_.resize(num_threads_);
left_cnts_.resize(num_threads_);
right_cnts_.resize(num_threads_);
left_write_pos_.resize(num_threads_);
right_write_pos_.resize(num_threads_);
}

~ParallelPartitionRunner() {}

void ReSize(INDEX_T num_data) {
left_.resize(num_data);
if (TWO_BUFFER) {
right_.resize(num_data);
}
}

template<bool FORCE_SIZE>
INDEX_T Run(
INDEX_T cnt,
const std::function<INDEX_T(int, INDEX_T, INDEX_T, INDEX_T*, INDEX_T*)>& func,
INDEX_T* out) {
int nblock = 1;
INDEX_T inner_size = cnt;
if (FORCE_SIZE) {
Threading::BlockInfoForceSize<INDEX_T>(num_threads_, cnt, min_block_size_,
&nblock, &inner_size);
} else {
Threading::BlockInfo<INDEX_T>(num_threads_, cnt, min_block_size_, &nblock,
&inner_size);
}

OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < nblock; ++i) {
OMP_LOOP_EX_BEGIN();
INDEX_T cur_start = i * inner_size;
INDEX_T cur_cnt = std::min(inner_size, cnt - cur_start);
offsets_[i] = cur_start;
if (cur_cnt <= 0) {
left_cnts_[i] = 0;
right_cnts_[i] = 0;
continue;
}
auto left_ptr = left_.data() + cur_start;
INDEX_T* right_ptr = nullptr;
if (TWO_BUFFER) {
right_ptr = right_.data() + cur_start;
}
// split data inner, reduce the times of function called
INDEX_T cur_left_count =
func(i, cur_start, cur_cnt, left_ptr, right_ptr);
if (!TWO_BUFFER) {
// reverse for one buffer
std::reverse(left_ptr + cur_left_count, left_ptr + cur_cnt);
}
left_cnts_[i] = cur_left_count;
right_cnts_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();

left_write_pos_[0] = 0;
right_write_pos_[0] = 0;
for (int i = 1; i < nblock; ++i) {
left_write_pos_[i] = left_write_pos_[i - 1] + left_cnts_[i - 1];
right_write_pos_[i] = right_write_pos_[i - 1] + right_cnts_[i - 1];
}
data_size_t left_cnt = left_write_pos_[nblock - 1] + left_cnts_[nblock - 1];

auto right_start = out + left_cnt;
#pragma omp parallel for schedule(static)
for (int i = 0; i < nblock; ++i) {
std::copy_n(left_.data() + offsets_[i], left_cnts_[i],
out + left_write_pos_[i]);
if (TWO_BUFFER) {
std::copy_n(right_.data() + offsets_[i], right_cnts_[i],
right_start + right_write_pos_[i]);
} else {
std::copy_n(left_.data() + offsets_[i] + left_cnts_[i], right_cnts_[i],
right_start + right_write_pos_[i]);
}
}
return left_cnt;
}

private:
int num_threads_;
INDEX_T min_block_size_;
std::vector<INDEX_T> left_;
std::vector<INDEX_T> right_;
std::vector<INDEX_T> offsets_;
std::vector<INDEX_T> left_cnts_;
std::vector<INDEX_T> right_cnts_;
std::vector<INDEX_T> left_write_pos_;
std::vector<INDEX_T> right_write_pos_;
};

} // namespace LightGBM

#endif // LightGBM_UTILS_THREADING_H_
#endif // LightGBM_UTILS_THREADING_H_
Loading

0 comments on commit d20ceac

Please sign in to comment.