From 44f8cb52a05b81edcdf05079d1b5b59552963b25 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Mon, 5 Nov 2018 12:58:41 -0800 Subject: [PATCH 1/3] Make C++ unit tests run and pass on Windows * Fix logic for external memory. The letter ':' is part of drive letter, so remove the drive letter before splitting on ':'. * Cosmetic syntax changes to keep MSVC happy. --- src/data/sparse_page_source.cc | 33 +++++++++++-- tests/cpp/common/test_transform_range.cc | 4 +- tests/cpp/helpers.cc | 6 +-- tests/cpp/helpers.h | 4 +- tests/cpp/objective/test_multiclass_obj.cc | 8 ++-- tests/cpp/tree/test_gpu_hist.cu | 54 +++++++++++----------- 6 files changed, 68 insertions(+), 41 deletions(-) diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc index 517978446774..443e2bef8137 100644 --- a/src/data/sparse_page_source.cc +++ b/src/data/sparse_page_source.cc @@ -6,11 +6,36 @@ #include #include #include +#include +#include +#include #if DMLC_ENABLE_STD_THREAD #include "./sparse_page_source.h" #include "../common/common.h" +namespace { + +// Split a cache info string with delimiter ':' +// If cache info string contains drive letter (e.g. C:), exclude it before splitting +inline std::vector +GetCacheShards(const std::string& cache_info) { + if (cache_info.length() < 2) { + return {cache_info}; + } + if (std::isalpha(cache_info[0], std::locale::classic()) + && cache_info[1] == ':') { + std::vector cache_shards + = xgboost::common::Split(cache_info.substr(2), ':'); + cache_shards[0] = cache_info.substr(0, 2) + cache_shards[0]; + return cache_shards; + } else { + return xgboost::common::Split(cache_info, ':'); + } +} + +} // namespace anonymous + namespace xgboost { namespace data { @@ -18,7 +43,7 @@ SparsePageSource::SparsePageSource(const std::string& cache_info, const std::string& page_type) : base_rowid_(0), page_(nullptr), clock_ptr_(0) { // read in the info files - std::vector cache_shards = common::Split(cache_info, ':'); + std::vector cache_shards = GetCacheShards(cache_info); CHECK_NE(cache_shards.size(), 0U); { std::string name_info = cache_shards[0]; @@ -86,7 +111,7 @@ const SparsePage& SparsePageSource::Value() const { bool SparsePageSource::CacheExist(const std::string& cache_info, const std::string& page_type) { - std::vector cache_shards = common::Split(cache_info, ':'); + std::vector cache_shards = GetCacheShards(cache_info); CHECK_NE(cache_shards.size(), 0U); { std::string name_info = cache_shards[0]; @@ -104,7 +129,7 @@ bool SparsePageSource::CacheExist(const std::string& cache_info, void SparsePageSource::CreateRowPage(dmlc::Parser* src, const std::string& cache_info) { const std::string page_type = ".row.page"; - std::vector cache_shards = common::Split(cache_info, ':'); + std::vector cache_shards = GetCacheShards(cache_info); CHECK_NE(cache_shards.size(), 0U); // read in the info files. std::string name_info = cache_shards[0]; @@ -198,7 +223,7 @@ void SparsePageSource::CreateRowPage(dmlc::Parser* src, void SparsePageSource::CreatePageFromDMatrix(DMatrix* src, const std::string& cache_info, const std::string& page_type) { - std::vector cache_shards = common::Split(cache_info, ':'); + std::vector cache_shards = GetCacheShards(cache_info); CHECK_NE(cache_shards.size(), 0U); // read in the info files. std::string name_info = cache_shards[0]; diff --git a/tests/cpp/common/test_transform_range.cc b/tests/cpp/common/test_transform_range.cc index b09e4cd0d28e..902c282bca4d 100644 --- a/tests/cpp/common/test_transform_range.cc +++ b/tests/cpp/common/test_transform_range.cc @@ -50,7 +50,9 @@ TEST(Transform, DeclareUnifiedTest(Basic)) { HostDeviceVector out_vec{h_out, TRANSFORM_GPU_DIST}; out_vec.Fill(0); - Transform<>::Init(TestTransformRange{}, Range{0, size}, TRANSFORM_GPU_RANGE) + Transform<>::Init(TestTransformRange{}, + Range{0, static_cast(size)}, + TRANSFORM_GPU_RANGE) .Eval(&out_vec, &in_vec); std::vector res = out_vec.HostVector(); diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index 7585dc929872..04bb48198dae 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -5,12 +5,12 @@ #include "xgboost/c_api.h" #include -bool FileExists(const std::string name) { +bool FileExists(const std::string& filename) { struct stat st; - return stat(name.c_str(), &st) == 0; + return stat(filename.c_str(), &st) == 0; } -long GetFileSize(const std::string filename) { +long GetFileSize(const std::string& filename) { struct stat st; stat(filename.c_str(), &st); return st.st_size; diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 4d990884effb..641eec3bed22 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -24,9 +24,9 @@ #define DeclareUnifiedTest(name) name #endif -bool FileExists(const std::string name); +bool FileExists(const std::string& filename); -long GetFileSize(const std::string filename); +long GetFileSize(const std::string& filename); void CreateSimpleTestData(const std::string& filename); diff --git a/tests/cpp/objective/test_multiclass_obj.cc b/tests/cpp/objective/test_multiclass_obj.cc index 3ff229bdd5a1..0591d21832ef 100644 --- a/tests/cpp/objective/test_multiclass_obj.cc +++ b/tests/cpp/objective/test_multiclass_obj.cc @@ -10,11 +10,11 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) { std::vector> args {{"num_class", "3"}}; obj->Configure(args); CheckObjFunction(obj, - {1, 0, 2, 2, 0, 1}, // preds - {1.0, 0.0}, // labels - {1.0, 1.0}, // weights + {1.0f, 0.0f, 2.0f, 2.0f, 0.0f, 1.0f}, // preds + {1.0f, 0.0f}, // labels + {1.0f, 1.0f}, // weights {0.24f, -0.91f, 0.66f, -0.33f, 0.09f, 0.24f}, // grad - {0.36, 0.16, 0.44, 0.45, 0.16, 0.37}); // hess + {0.36f, 0.16f, 0.44f, 0.45f, 0.16f, 0.37f}); // hess ASSERT_NO_THROW(obj->DefaultEvalMetric()); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 40228b8c1201..bcea85cdaf6a 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -24,16 +24,16 @@ void BuildGidx(DeviceShard* shard, int n_rows, int n_cols, common::HistCutMatrix cmat; cmat.row_ptr = {0, 3, 6, 9, 12, 15, 18, 21, 24}; - cmat.min_val = {0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.2, 0.2}; + cmat.min_val = {0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f}; // 24 cut fields, 3 cut fields for each feature (column). - cmat.cut = {0.30, 0.67, 1.64, - 0.32, 0.77, 1.95, - 0.29, 0.70, 1.80, - 0.32, 0.75, 1.85, - 0.18, 0.59, 1.69, - 0.25, 0.74, 2.00, - 0.26, 0.74, 1.98, - 0.26, 0.71, 1.83}; + cmat.cut = {0.30f, 0.67f, 1.64f, + 0.32f, 0.77f, 1.95f, + 0.29f, 0.70f, 1.80f, + 0.32f, 0.75f, 1.85f, + 0.18f, 0.59f, 1.69f, + 0.25f, 0.74f, 2.00f, + 0.26f, 0.74f, 1.98f, + 0.26f, 0.71f, 1.83f}; shard->InitRowPtrs(batch); shard->InitCompressedData(cmat, batch); @@ -110,14 +110,14 @@ TEST(GpuHist, BuildGidxSparse) { std::vector GetHostHistGpair() { // 24 bins, 3 bins for each feature (column). std::vector hist_gpair = { - {0.8314, 0.7147}, {1.7989, 3.7312}, {3.3846, 3.4598}, - {2.9277, 3.5886}, {1.8429, 2.4152}, {1.2443, 1.9019}, - {1.6380, 2.9174}, {1.5657, 2.5107}, {2.8111, 2.4776}, - {2.1322, 3.0651}, {3.2927, 3.8540}, {0.5899, 0.9866}, - {1.5185, 1.6263}, {2.0686, 3.1844}, {2.4278, 3.0950}, - {1.5105, 2.1403}, {2.6922, 4.2217}, {1.8122, 1.5437}, - {0.0000, 0.0000}, {4.3245, 5.7955}, {1.6903, 2.1103}, - {2.4012, 4.4754}, {3.6136, 3.4303}, {0.0000, 0.0000} + {0.8314f, 0.7147f}, {1.7989f, 3.7312f}, {3.3846f, 3.4598f}, + {2.9277f, 3.5886f}, {1.8429f, 2.4152f}, {1.2443f, 1.9019f}, + {1.6380f, 2.9174f}, {1.5657f, 2.5107f}, {2.8111f, 2.4776f}, + {2.1322f, 3.0651f}, {3.2927f, 3.8540f}, {0.5899f, 0.9866f}, + {1.5185f, 1.6263f}, {2.0686f, 3.1844f}, {2.4278f, 3.0950f}, + {1.5105f, 2.1403f}, {2.6922f, 4.2217f}, {1.8122f, 1.5437f}, + {0.0000f, 0.0000f}, {4.3245f, 5.7955f}, {1.6903f, 2.1103f}, + {2.4012f, 4.4754f}, {3.6136f, 3.4303f}, {0.0000f, 0.0000f} }; return hist_gpair; } @@ -198,17 +198,17 @@ TEST(GpuHist, BuildHistSharedMem) { common::HistCutMatrix GetHostCutMatrix () { common::HistCutMatrix cmat; cmat.row_ptr = {0, 3, 6, 9, 12, 15, 18, 21, 24}; - cmat.min_val = {0.1, 0.2, 0.3, 0.1, 0.2, 0.3, 0.2, 0.2}; + cmat.min_val = {0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f}; // 24 cut fields, 3 cut fields for each feature (column). // Each row of the cut represents the cuts for a data column. - cmat.cut = {0.30, 0.67, 1.64, - 0.32, 0.77, 1.95, - 0.29, 0.70, 1.80, - 0.32, 0.75, 1.85, - 0.18, 0.59, 1.69, - 0.25, 0.74, 2.00, - 0.26, 0.74, 1.98, - 0.26, 0.71, 1.83}; + cmat.cut = {0.30f, 0.67f, 1.64f, + 0.32f, 0.77f, 1.95f, + 0.29f, 0.70f, 1.80f, + 0.32f, 0.75f, 1.85f, + 0.18f, 0.59f, 1.69f, + 0.25f, 0.74f, 2.00f, + 0.26f, 0.74f, 1.98f, + 0.26f, 0.71f, 1.83f}; return cmat; } @@ -238,7 +238,7 @@ TEST(GpuHist, EvaluateSplits) { // Initialize DeviceShard std::unique_ptr shard {new DeviceShard(0, 0, 0, n_rows, param)}; // Initialize DeviceShard::node_sum_gradients - shard->node_sum_gradients = {{6.4, 12.8}}; + shard->node_sum_gradients = {{6.4f, 12.8f}}; // Initialize DeviceShard::cut common::HistCutMatrix cmat = GetHostCutMatrix(); From d425e7edfdfaeb7d51fa3ea8ee1bd13ff444dced Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Mon, 5 Nov 2018 17:48:55 -0800 Subject: [PATCH 2/3] Fix lint --- src/data/sparse_page_source.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc index 443e2bef8137..5c6383e3efea 100644 --- a/src/data/sparse_page_source.cc +++ b/src/data/sparse_page_source.cc @@ -34,7 +34,7 @@ GetCacheShards(const std::string& cache_info) { } } -} // namespace anonymous +} // anonymous namespace namespace xgboost { namespace data { From 5865e52676a6394866a984538e684d1419c259cb Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Tue, 6 Nov 2018 01:11:36 -0800 Subject: [PATCH 3/3] Add Windows guard --- src/data/sparse_page_source.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc index 5c6383e3efea..9c15219dcd78 100644 --- a/src/data/sparse_page_source.cc +++ b/src/data/sparse_page_source.cc @@ -20,18 +20,17 @@ namespace { // If cache info string contains drive letter (e.g. C:), exclude it before splitting inline std::vector GetCacheShards(const std::string& cache_info) { - if (cache_info.length() < 2) { - return {cache_info}; - } - if (std::isalpha(cache_info[0], std::locale::classic()) +#if (defined _WIN32) || (defined __CYGWIN__) + if (cache_info.length() >= 2 + && std::isalpha(cache_info[0], std::locale::classic()) && cache_info[1] == ':') { std::vector cache_shards = xgboost::common::Split(cache_info.substr(2), ':'); cache_shards[0] = cache_info.substr(0, 2) + cache_shards[0]; return cache_shards; - } else { - return xgboost::common::Split(cache_info, ':'); } +#endif + return xgboost::common::Split(cache_info, ':'); } } // anonymous namespace