Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

decouple bagging with num_threads #2804

Merged
merged 3 commits into from
Feb 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/FAQ.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.. role:: raw-html(raw)
:format: html

LightGBM FAQ
############

Expand Down Expand Up @@ -82,8 +85,13 @@ You may also use the CPU version.
6. Bagging is not reproducible when changing the number of threads.
-------------------------------------------------------------------

:raw-html:`<strike>`
LightGBM bagging is multithreaded, so its output depends on the number of threads used.
There is `no workaround currently <https://github.com/microsoft/LightGBM/issues/632>`__.
:raw-html:`</strike>`

Starting from `#2804 <https://github.com/microsoft/LightGBM/pull/2804>`__ bagging result doesn't depend on the number of threads.
So this issue should be solved in the latest version.

7. I tried to use Random Forest mode, and LightGBM crashes!
-----------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ IO Parameters

- ``data_random_seed`` :raw-html:`<a id="data_random_seed" title="Permalink to this parameter" href="#data_random_seed">&#x1F517;&#xFE0E;</a>`, default = ``1``, type = int, aliases: ``data_seed``

- random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
- random seed for sampling data to construct histogram bins

- ``output_model`` :raw-html:`<a id="output_model" title="Permalink to this parameter" href="#output_model">&#x1F517;&#xFE0E;</a>`, default = ``LightGBM_model.txt``, type = string, aliases: ``model_output``, ``model_out``

Expand Down
2 changes: 1 addition & 1 deletion include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ struct Config {
double histogram_pool_size = -1.0;

// alias = data_seed
// desc = random seed for data partition in parallel learning (excluding the ``feature_parallel`` mode)
// desc = random seed for sampling data to construct histogram bins
int data_random_seed = 1;

// alias = model_output, model_out
Expand Down
137 changes: 134 additions & 3 deletions include/LightGBM/utils/threading.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
/*!
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_UTILS_THREADING_H_
#define LIGHTGBM_UTILS_THREADING_H_

#include <LightGBM/meta.h>
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/openmp_wrapper.h>

#include <algorithm>
#include <functional>
#include <vector>

Expand Down Expand Up @@ -37,6 +41,23 @@ class Threading {
*block_size = cnt;
}
}
template <typename INDEX_T>
static inline void BlockInfoForceSize(int num_threads, INDEX_T cnt,
INDEX_T min_cnt_per_block,
int* out_nblock, INDEX_T* block_size) {
*out_nblock = std::min<int>(
num_threads,
static_cast<int>((cnt + min_cnt_per_block - 1) / min_cnt_per_block));
if (*out_nblock > 1) {
*block_size = (cnt + (*out_nblock) - 1) / (*out_nblock);
// force the block size to the times of min_cnt_per_block
*block_size = (*block_size + min_cnt_per_block - 1) / min_cnt_per_block *
min_cnt_per_block;
} else {
*block_size = cnt;
}
}

template <typename INDEX_T>
static inline int For(
INDEX_T start, INDEX_T end, INDEX_T min_block_size,
Expand All @@ -58,6 +79,116 @@ class Threading {
}
};

} // namespace LightGBM
template <typename INDEX_T, bool TWO_BUFFER>
class ParallelPartitionRunner {
public:
ParallelPartitionRunner(INDEX_T num_data, INDEX_T min_block_size)
: min_block_size_(min_block_size) {
num_threads_ = 1;
#pragma omp parallel
#pragma omp master
{ num_threads_ = omp_get_num_threads(); }
left_.resize(num_data);
if (TWO_BUFFER) {
right_.resize(num_data);
}
offsets_.resize(num_threads_);
left_cnts_.resize(num_threads_);
right_cnts_.resize(num_threads_);
left_write_pos_.resize(num_threads_);
right_write_pos_.resize(num_threads_);
}

~ParallelPartitionRunner() {}

void ReSize(INDEX_T num_data) {
left_.resize(num_data);
if (TWO_BUFFER) {
right_.resize(num_data);
}
}

template<bool FORCE_SIZE>
INDEX_T Run(
INDEX_T cnt,
const std::function<INDEX_T(int, INDEX_T, INDEX_T, INDEX_T*, INDEX_T*)>& func,
INDEX_T* out) {
int nblock = 1;
INDEX_T inner_size = cnt;
if (FORCE_SIZE) {
Threading::BlockInfoForceSize<INDEX_T>(num_threads_, cnt, min_block_size_,
&nblock, &inner_size);
} else {
Threading::BlockInfo<INDEX_T>(num_threads_, cnt, min_block_size_, &nblock,
&inner_size);
}

OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < nblock; ++i) {
OMP_LOOP_EX_BEGIN();
INDEX_T cur_start = i * inner_size;
INDEX_T cur_cnt = std::min(inner_size, cnt - cur_start);
offsets_[i] = cur_start;
if (cur_cnt <= 0) {
left_cnts_[i] = 0;
right_cnts_[i] = 0;
continue;
}
auto left_ptr = left_.data() + cur_start;
INDEX_T* right_ptr = nullptr;
if (TWO_BUFFER) {
right_ptr = right_.data() + cur_start;
}
// split data inner, reduce the times of function called
INDEX_T cur_left_count =
func(i, cur_start, cur_cnt, left_ptr, right_ptr);
if (!TWO_BUFFER) {
// reverse for one buffer
std::reverse(left_ptr + cur_left_count, left_ptr + cur_cnt);
}
left_cnts_[i] = cur_left_count;
right_cnts_[i] = cur_cnt - cur_left_count;
OMP_LOOP_EX_END();
}
OMP_THROW_EX();

left_write_pos_[0] = 0;
right_write_pos_[0] = 0;
for (int i = 1; i < nblock; ++i) {
left_write_pos_[i] = left_write_pos_[i - 1] + left_cnts_[i - 1];
right_write_pos_[i] = right_write_pos_[i - 1] + right_cnts_[i - 1];
}
data_size_t left_cnt = left_write_pos_[nblock - 1] + left_cnts_[nblock - 1];

auto right_start = out + left_cnt;
#pragma omp parallel for schedule(static)
for (int i = 0; i < nblock; ++i) {
std::copy_n(left_.data() + offsets_[i], left_cnts_[i],
out + left_write_pos_[i]);
if (TWO_BUFFER) {
std::copy_n(right_.data() + offsets_[i], right_cnts_[i],
right_start + right_write_pos_[i]);
} else {
std::copy_n(left_.data() + offsets_[i] + left_cnts_[i], right_cnts_[i],
right_start + right_write_pos_[i]);
}
}
return left_cnt;
}

private:
int num_threads_;
INDEX_T min_block_size_;
std::vector<INDEX_T> left_;
std::vector<INDEX_T> right_;
std::vector<INDEX_T> offsets_;
std::vector<INDEX_T> left_cnts_;
std::vector<INDEX_T> right_cnts_;
std::vector<INDEX_T> left_write_pos_;
std::vector<INDEX_T> right_write_pos_;
};

} // namespace LightGBM

#endif // LightGBM_UTILS_THREADING_H_
#endif // LightGBM_UTILS_THREADING_H_
Loading