From 7a93e0706411c470d40f9d3ee8550efa3adca22a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 17 Feb 2021 21:06:26 -0700 Subject: [PATCH 01/19] Add a benchmark for string <=> floats conversion. --- cpp/benchmarks/CMakeLists.txt | 101 +++++++------- .../string/convert_floats_benchmark.cpp | 132 ++++++++++++++++++ 2 files changed, 183 insertions(+), 50 deletions(-) create mode 100644 cpp/benchmarks/string/convert_floats_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index e78f25a360b..810107a08c7 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -15,8 +15,8 @@ #============================================================================= add_library(cudf_datagen STATIC - "${CMAKE_SOURCE_DIR}/benchmarks/common/generate_benchmark_input.cpp" -) + "${CMAKE_SOURCE_DIR}/benchmarks/common/generate_benchmark_input.cpp" + ) ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -24,16 +24,16 @@ add_library(cudf_datagen STATIC set(BENCHMARK_LIST CACHE INTERNAL "BENCHMARK_LIST") function(ConfigureBench CMAKE_BENCH_NAME CMAKE_BENCH_SRC) - add_executable(${CMAKE_BENCH_NAME} - ${CMAKE_BENCH_SRC} - "${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp" - "${CMAKE_SOURCE_DIR}/tests/utilities/base_fixture.cpp" - "${CMAKE_SOURCE_DIR}/benchmarks/io/cuio_benchmark_common.cpp") - set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cudf_datagen cudf) - set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks") - set(BENCHMARK_LIST ${BENCHMARK_LIST} ${CMAKE_BENCH_NAME} CACHE INTERNAL "BENCHMARK_LIST") + add_executable(${CMAKE_BENCH_NAME} + ${CMAKE_BENCH_SRC} + "${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp" + "${CMAKE_SOURCE_DIR}/tests/utilities/base_fixture.cpp" + "${CMAKE_SOURCE_DIR}/benchmarks/io/cuio_benchmark_common.cpp") + set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cudf_datagen cudf) + set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks") + set(BENCHMARK_LIST ${BENCHMARK_LIST} ${CMAKE_BENCH_NAME} CACHE INTERNAL "BENCHMARK_LIST") endfunction(ConfigureBench) ################################################################################################### @@ -44,7 +44,7 @@ include_directories("${THRUST_INCLUDE_DIR}" "${LIBCUDACXX_INCLUDE_DIR}") if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") + include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) include_directories("${CMAKE_BINARY_DIR}/include" @@ -76,7 +76,7 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT # - column benchmarks ----------------------------------------------------------------------------- set(COLUMN_CONCAT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/column/concatenate_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/column/concatenate_benchmark.cpp") ConfigureBench(COLUMN_CONCAT_BENCH "${COLUMN_CONCAT_BENCH_SRC}") @@ -84,7 +84,7 @@ ConfigureBench(COLUMN_CONCAT_BENCH "${COLUMN_CONCAT_BENCH_SRC}") # - gather benchmark ------------------------------------------------------------------------------ set(GATHER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/gather_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/gather_benchmark.cu") ConfigureBench(GATHER_BENCH "${GATHER_BENCH_SRC}") @@ -92,7 +92,7 @@ ConfigureBench(GATHER_BENCH "${GATHER_BENCH_SRC}") # - scatter benchmark ----------------------------------------------------------------------------- set(SCATTER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/scatter_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/scatter_benchmark.cu") ConfigureBench(SCATTER_BENCH "${SCATTER_BENCH_SRC}") @@ -100,7 +100,7 @@ ConfigureBench(SCATTER_BENCH "${SCATTER_BENCH_SRC}") # - contiguous_split benchmark ------------------------------------------------------------------- set(CONTIGUOUS_SPLIT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/contiguous_split_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/contiguous_split_benchmark.cu") ConfigureBench(CONTIGUOUS_SPLIT_BENCH "${CONTIGUOUS_SPLIT_BENCH_SRC}") @@ -108,7 +108,7 @@ ConfigureBench(CONTIGUOUS_SPLIT_BENCH "${CONTIGUOUS_SPLIT_BENCH_SRC}") # - shift benchmark ------------------------------------------------------------------------------- set(SHIFT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/shift_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/shift_benchmark.cu") ConfigureBench(SHIFT_BENCH "${SHIFT_BENCH_SRC}") @@ -116,7 +116,7 @@ ConfigureBench(SHIFT_BENCH "${SHIFT_BENCH_SRC}") # - transpose benchmark --------------------------------------------------------------------------- set(TRANSPOSE_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_benchmark.cu") ConfigureBench(TRANSPOSE_BENCH "${TRANSPOSE_BENCH_SRC}") @@ -124,7 +124,7 @@ ConfigureBench(TRANSPOSE_BENCH "${TRANSPOSE_BENCH_SRC}") # - apply_boolean_mask benchmark ------------------------------------------------------------------ set(APPLY_BOOLEAN_MASK_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/apply_boolean_mask_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/apply_boolean_mask_benchmark.cpp") ConfigureBench(APPLY_BOOLEAN_MASK_BENCH "${APPLY_BOOLEAN_MASK_BENCH_SRC}") @@ -132,7 +132,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH "${APPLY_BOOLEAN_MASK_BENCH_SRC}") # - stream_compaction benchmark ------------------------------------------------------------------- set(STREAM_COMPACTION_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_benchmark.cpp") ConfigureBench(STREAM_COMPACTION_BENCH "${STREAM_COMPACTION_BENCH_SRC}") @@ -140,7 +140,7 @@ ConfigureBench(STREAM_COMPACTION_BENCH "${STREAM_COMPACTION_BENCH_SRC}") # - join benchmark -------------------------------------------------------------------------------- set(JOIN_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/join/join_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/join/join_benchmark.cu") ConfigureBench(JOIN_BENCH "${JOIN_BENCH_SRC}") @@ -148,7 +148,7 @@ ConfigureBench(JOIN_BENCH "${JOIN_BENCH_SRC}") # - iterator benchmark ---------------------------------------------------------------------------- set(ITERATOR_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/iterator/iterator_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/iterator/iterator_benchmark.cu") ConfigureBench(ITERATOR_BENCH "${ITERATOR_BENCH_SRC}") @@ -156,7 +156,7 @@ ConfigureBench(ITERATOR_BENCH "${ITERATOR_BENCH_SRC}") # - search benchmark ------------------------------------------------------------------------------ set(SEARCH_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/search/search_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/search/search_benchmark.cu") ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}") @@ -164,8 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}") # - sort benchmark -------------------------------------------------------------------------------- set(SORT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp") ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}") @@ -173,7 +173,7 @@ ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}") # - type_dispatcher benchmark --------------------------------------------------------------------- set(TD_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/type_dispatcher/type_dispatcher_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/type_dispatcher/type_dispatcher_benchmark.cu") ConfigureBench(TYPE_DISPATCHER_BENCH "${TD_BENCH_SRC}") @@ -181,10 +181,10 @@ ConfigureBench(TYPE_DISPATCHER_BENCH "${TD_BENCH_SRC}") # - reduction benchmark --------------------------------------------------------------------------- set(REDUCTION_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/anyall_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/dictionary_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/reduce_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/minmax_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/anyall_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/dictionary_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/reduce_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/minmax_benchmark.cpp") ConfigureBench(REDUCTION_BENCH "${REDUCTION_BENCH_SRC}") @@ -192,8 +192,8 @@ ConfigureBench(REDUCTION_BENCH "${REDUCTION_BENCH_SRC}") # - groupby benchmark ----------------------------------------------------------------------------- set(GROUPBY_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_benchmark.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_nth_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_benchmark.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_nth_benchmark.cu") ConfigureBench(GROUPBY_BENCH "${GROUPBY_BENCH_SRC}") @@ -201,7 +201,7 @@ ConfigureBench(GROUPBY_BENCH "${GROUPBY_BENCH_SRC}") # - hashing benchmark ----------------------------------------------------------------------------- set(HASHING_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/hashing/hashing_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/hashing/hashing_benchmark.cpp") ConfigureBench(HASHING_BENCH "${HASHING_BENCH_SRC}") @@ -209,7 +209,7 @@ ConfigureBench(HASHING_BENCH "${HASHING_BENCH_SRC}") # - merge benchmark ------------------------------------------------------------------------------- set(MERGE_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/merge/merge_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/merge/merge_benchmark.cpp") ConfigureBench(MERGE_BENCH "${MERGE_BENCH_SRC}") @@ -217,7 +217,7 @@ ConfigureBench(MERGE_BENCH "${MERGE_BENCH_SRC}") # - null_mask benchmark --------------------------------------------------------------------------- set(NULLMASK_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/null_mask/set_null_mask_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/null_mask/set_null_mask_benchmark.cpp") ConfigureBench(NULLMASK_BENCH "${NULLMASK_BENCH_SRC}") @@ -225,7 +225,7 @@ ConfigureBench(NULLMASK_BENCH "${NULLMASK_BENCH_SRC}") # - parquet writer chunks benchmark --------------------------------------------------------------- set(PARQUET_WRITER_CHUNKS_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_chunks_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_chunks_benchmark.cpp") ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH "${PARQUET_WRITER_CHUNKS_BENCH_SRC}") @@ -233,7 +233,7 @@ ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH "${PARQUET_WRITER_CHUNKS_BENCH_SRC}") # - parquet reader benchmark ---------------------------------------------------------------------- set(PARQUET_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_reader_benchmark.cpp") ConfigureBench(PARQUET_READER_BENCH "${PARQUET_READER_BENCH_SRC}") @@ -241,7 +241,7 @@ ConfigureBench(PARQUET_READER_BENCH "${PARQUET_READER_BENCH_SRC}") # - orc reader benchmark -------------------------------------------------------------------------- set(ORC_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_reader_benchmark.cpp") ConfigureBench(ORC_READER_BENCH "${ORC_READER_BENCH_SRC}") @@ -249,7 +249,7 @@ ConfigureBench(ORC_READER_BENCH "${ORC_READER_BENCH_SRC}") # - csv reader benchmark -------------------------------------------------------------------------- set(CSV_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_reader_benchmark.cpp") ConfigureBench(CSV_READER_BENCH "${CSV_READER_BENCH_SRC}") @@ -257,7 +257,7 @@ ConfigureBench(CSV_READER_BENCH "${CSV_READER_BENCH_SRC}") # - parquet writer benchmark ---------------------------------------------------------------------- set(PARQUET_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_benchmark.cpp") ConfigureBench(PARQUET_WRITER_BENCH "${PARQUET_WRITER_BENCH_SRC}") @@ -265,7 +265,7 @@ ConfigureBench(PARQUET_WRITER_BENCH "${PARQUET_WRITER_BENCH_SRC}") # - orc writer benchmark -------------------------------------------------------------------------- set(ORC_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_writer_benchmark.cpp") ConfigureBench(ORC_WRITER_BENCH "${ORC_WRITER_BENCH_SRC}") @@ -273,7 +273,7 @@ ConfigureBench(ORC_WRITER_BENCH "${ORC_WRITER_BENCH_SRC}") # - csv writer benchmark -------------------------------------------------------------------------- set(CSV_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_writer_benchmark.cpp") ConfigureBench(CSV_WRITER_BENCH "${CSV_WRITER_BENCH_SRC}") @@ -281,7 +281,7 @@ ConfigureBench(CSV_WRITER_BENCH "${CSV_WRITER_BENCH_SRC}") # - ast benchmark --------------------------------------------------------------------------------- set(AST_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/ast/transform_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/ast/transform_benchmark.cpp") ConfigureBench(AST_BENCH "${AST_BENCH_SRC}") @@ -289,7 +289,7 @@ ConfigureBench(AST_BENCH "${AST_BENCH_SRC}") # - binaryop benchmark ---------------------------------------------------------------------------- set(BINARYOP_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/binaryop/binaryop_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/binaryop/binaryop_benchmark.cu") ConfigureBench(BINARYOP_BENCH "${BINARYOP_BENCH_SRC}") @@ -297,7 +297,7 @@ ConfigureBench(BINARYOP_BENCH "${BINARYOP_BENCH_SRC}") # - subword tokenizer benchmark ------------------------------------------------------------------- set(SUBWORD_TOKENIZER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/text/subword_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/text/subword_benchmark.cpp") ConfigureBench(SUBWORD_TOKENIZER_BENCH "${SUBWORD_TOKENIZER_BENCH_SRC}") @@ -305,9 +305,10 @@ ConfigureBench(SUBWORD_TOKENIZER_BENCH "${SUBWORD_TOKENIZER_BENCH_SRC}") # - strings benchmark ------------------------------------------------------------------- set(STRINGS_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_scalar_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_floats_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_scalar_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") ConfigureBench(STRINGS_BENCH "${STRINGS_BENCH_SRC}") diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp new file mode 100644 index 00000000000..38183fd941f --- /dev/null +++ b/cpp/benchmarks/string/convert_floats_benchmark.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../fixture/benchmark_fixture.hpp" +#include "../synchronization/synchronization.hpp" + +#include + +#include +#include +#include + +#include +#include + +#include +#include + +namespace { +static constexpr cudf::size_type array_size{1 << 10}; + +template +static const std::vector& get_float_numbers() +{ + static std::vector numbers; + if (numbers.size() == 0) { + numbers.reserve(array_size); + cudf::test::UniformRandomGenerator rand_gen(std::numeric_limits::min(), + std::numeric_limits::max()); + std::generate_n( + std::back_inserter(numbers), array_size, [&rand_gen]() { return rand_gen.generate(); }); + } + return numbers; +} + +template +static const std::vector& get_floats_numbers_as_string() +{ + static std::vector numbers_str; + if (numbers_str.size() == 0) { + numbers_str.reserve(array_size); + const auto& numbers = get_float_numbers(); + std::transform(numbers.begin(), numbers.end(), std::back_inserter(numbers_str), [](auto x) { + return std::to_string(x); + }); + } + return numbers_str; +} + +} // anonymous namespace + +class StringToFloatNumber : public cudf::benchmark { +}; +template +void convert_to_float_number(benchmark::State& state) +{ + const auto& h_strings = get_floats_numbers_as_string(); + const auto strings_size = std::accumulate( + h_strings.begin(), h_strings.end(), std::size_t{0}, [](std::size_t size, const auto& str) { + return size + str.length(); + }); + + cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); + const auto strings_view = cudf::strings_column_view(strings); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + volatile auto results = cudf::strings::to_floats(strings_view, cudf::data_type{float_type}); + } + + state.SetBytesProcessed(state.iterations() * state.range(0) * strings_size); +} + +class StringFromFloatNumber : public cudf::benchmark { +}; +template +void convert_from_float_number(benchmark::State& state) +{ + const auto& h_floats = get_float_numbers(); + const auto floats_size = h_floats.size() * sizeof(FloatType); + + cudf::test::fixed_width_column_wrapper floats(h_floats.begin(), h_floats.end()); + const auto floats_view = cudf::column_view(floats); + + for (auto _ : state) { + cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + volatile auto results = cudf::strings::from_floats(floats_view); + } + + state.SetBytesProcessed(state.iterations() * state.range(0) * floats_size); +} + +#define CV_TO_FLOATS_BENCHMARK_DEFINE(name, float_type_id) \ + BENCHMARK_DEFINE_F(StringToFloatNumber, name)(::benchmark::State & state) \ + { \ + convert_to_float_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringToFloatNumber, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +#define CV_FROM_FLOATS_BENCHMARK_DEFINE(name, float_type) \ + BENCHMARK_DEFINE_F(StringFromFloatNumber, name)(::benchmark::State & state) \ + { \ + convert_from_float_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringFromFloatNumber, name) \ + ->RangeMultiplier(1 << 5) \ + ->Range(1 << 10, 1 << 25) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +CV_TO_FLOATS_BENCHMARK_DEFINE(string_to_float32, cudf::type_id::FLOAT32); +CV_TO_FLOATS_BENCHMARK_DEFINE(string_to_float64, cudf::type_id::FLOAT64); + +CV_FROM_FLOATS_BENCHMARK_DEFINE(string_from_float32, float); +CV_FROM_FLOATS_BENCHMARK_DEFINE(string_from_float64, double); From 3ea06fb39b13cac687938625c843aff55ba705f4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 09:01:48 -0700 Subject: [PATCH 02/19] Fix error and improve the function converting fromn string to float types --- cpp/src/strings/convert/convert_floats.cu | 41 +++++++++++------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 6871b1dd564..97f16e1f1b0 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -49,24 +49,25 @@ namespace { * * This function will also handle scientific notation format. */ -__device__ inline double stod(string_view const& d_str) +template +__device__ inline FloatType stof(string_view const& d_str) { const char* in_ptr = d_str.data(); const char* end = in_ptr + d_str.size_bytes(); if (end == in_ptr) return 0.0; // special strings - if (d_str.compare("NaN", 3) == 0) return std::numeric_limits::quiet_NaN(); - if (d_str.compare("Inf", 3) == 0) return std::numeric_limits::infinity(); - if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits::infinity(); - double sign = 1.0; + if (d_str.compare("NaN", 3) == 0) return std::numeric_limits::quiet_NaN(); + if (d_str.compare("Inf", 3) == 0) return std::numeric_limits::infinity(); + if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits::infinity(); + FloatType sign{1.0}; if (*in_ptr == '-' || *in_ptr == '+') { sign = (*in_ptr == '-' ? -1 : 1); ++in_ptr; } - unsigned long max_mantissa = 0x0FFFFFFFFFFFFF; - unsigned long digits = 0; - int exp_off = 0; - bool decimal = false; + constexpr uint64_t max_holding = (std::numeric_limits::max() - 9L) / 10L; + uint64_t digits = 0; + int exp_off = 0; + bool decimal = false; while (in_ptr < end) { char ch = *in_ptr; if (ch == '.') { @@ -75,11 +76,11 @@ __device__ inline double stod(string_view const& d_str) continue; } if (ch < '0' || ch > '9') break; - if (digits > max_mantissa) + if (digits > max_holding) exp_off += (int)!decimal; else { - digits = (digits * 10L) + (unsigned long)(ch - '0'); - if (digits > max_mantissa) { + digits = (digits * 10L) + static_cast(ch - '0'); + if (digits > max_holding) { digits = digits / 10L; exp_off += (int)!decimal; } else @@ -109,14 +110,14 @@ __device__ inline double stod(string_view const& d_str) } exp_ten *= exp_sign; exp_ten += exp_off; - if (exp_ten > 308) - return sign > 0 ? std::numeric_limits::infinity() - : -std::numeric_limits::infinity(); - else if (exp_ten < -308) - return 0.0; + if (exp_ten > std::numeric_limits::max_exponent10) + return sign > 0 ? std::numeric_limits::infinity() + : -std::numeric_limits::infinity(); + else if (exp_ten < std::numeric_limits::min_exponent10) + return FloatType{0}; // using exp10() since the pow(10.0,exp_ten) function is // very inaccurate in 10.2: http://nvbugs/2971187 - double value = static_cast(digits) * exp10(static_cast(exp_ten)); + const FloatType value = static_cast(digits) * exp10(static_cast(exp_ten)); return (value * sign); } @@ -132,9 +133,7 @@ struct string_to_float_fn { __device__ FloatType operator()(size_type idx) { if (strings_column.is_null(idx)) return static_cast(0); - // the cast to FloatType will create predictable results - // for floats that are larger than the FloatType can hold - return static_cast(stod(strings_column.element(idx))); + return stof(strings_column.element(idx)); } }; From 4bfd9068af8d0c3d36e5278d3d45ec9344964a44 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 10:36:56 -0700 Subject: [PATCH 03/19] Fix the equivalent check function for floating point numbers that incorrectly handles inf and nan. --- cpp/tests/utilities/column_utilities.cu | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 091919463a8..9585cae5ffb 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -127,11 +127,20 @@ class corresponding_rows_not_equivalent { column_device_view const& lhs, column_device_view const& rhs, size_type index) { if (lhs.is_valid(index) and rhs.is_valid(index)) { - int ulp = 4; // value taken from google test - T x = lhs.element(index); - T y = rhs.element(index); - return std::abs(x - y) > std::numeric_limits::epsilon() * std::abs(x + y) * ulp && - std::abs(x - y) >= std::numeric_limits::min(); + const T x = lhs.element(index); + const T y = rhs.element(index); + + // Must handle inf and nan separately + if (std::isinf(x) || std::isinf(y)) { + return x != y; // comparison of (inf==inf) returns true + } else if (std::isnan(x) || std::isnan(y)) { + return std::isnan(x) ^ std::isnan(y); // comparison of (nan==nan) returns false + } else { + constexpr int ulp = 4; // ulp = unit of least precision, value taken from google test + const T abs_x_minus_y = std::abs(x - y); + return abs_x_minus_y >= std::numeric_limits::min() && + abs_x_minus_y > std::numeric_limits::epsilon() * std::abs(x + y) * ulp; + } } else { // if either is null, then the inequality was checked already return true; From 0cfdbcdba7c239a08710aab825615570a884531b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 10:41:22 -0700 Subject: [PATCH 04/19] Add a test for converting string to double number --- cpp/tests/strings/floats_tests.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 40775382e16..7738d2347e2 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -128,7 +128,8 @@ TEST_F(StringsConvertTest, ToFloats64) "456e", "-1.78e+5", "-122.33644782", - "12e+309"}; + "12e+309", + "1.7976931348623159E308"}; cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), @@ -152,6 +153,7 @@ TEST_F(StringsConvertTest, ToFloats64) 456.0, -178000.0, -122.33644781999999, + infval, infval}; auto strings_view = cudf::strings_column_view(strings); From a5ede5dd6524ab77b5535979f0a212cf2f644efe Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 11:06:35 -0700 Subject: [PATCH 05/19] Update copyright header --- cpp/src/strings/convert/convert_floats.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 97f16e1f1b0..10b6895a328 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 25848403beb1c5a12ca202e8be81d47505f873b0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 12:43:02 -0700 Subject: [PATCH 06/19] Fix const qualifier position and change CMakeLists.txt --- cpp/benchmarks/CMakeLists.txt | 104 ++++++++++++------------ cpp/tests/utilities/column_utilities.cu | 6 +- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 4198dd81ee2..64a32f3ea90 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -15,8 +15,8 @@ #============================================================================= add_library(cudf_datagen STATIC - "${CMAKE_SOURCE_DIR}/benchmarks/common/generate_benchmark_input.cpp" - ) + "${CMAKE_SOURCE_DIR}/benchmarks/common/generate_benchmark_input.cpp" +) ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -24,16 +24,16 @@ add_library(cudf_datagen STATIC set(BENCHMARK_LIST CACHE INTERNAL "BENCHMARK_LIST") function(ConfigureBench CMAKE_BENCH_NAME CMAKE_BENCH_SRC) - add_executable(${CMAKE_BENCH_NAME} - ${CMAKE_BENCH_SRC} - "${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp" - "${CMAKE_SOURCE_DIR}/tests/utilities/base_fixture.cpp" - "${CMAKE_SOURCE_DIR}/benchmarks/io/cuio_benchmark_common.cpp") - set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cudf_datagen cudf) - set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks") - set(BENCHMARK_LIST ${BENCHMARK_LIST} ${CMAKE_BENCH_NAME} CACHE INTERNAL "BENCHMARK_LIST") + add_executable(${CMAKE_BENCH_NAME} + ${CMAKE_BENCH_SRC} + "${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp" + "${CMAKE_SOURCE_DIR}/tests/utilities/base_fixture.cpp" + "${CMAKE_SOURCE_DIR}/benchmarks/io/cuio_benchmark_common.cpp") + set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread cudf_datagen cudf) + set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks") + set(BENCHMARK_LIST ${BENCHMARK_LIST} ${CMAKE_BENCH_NAME} CACHE INTERNAL "BENCHMARK_LIST") endfunction(ConfigureBench) ################################################################################################### @@ -44,7 +44,7 @@ include_directories("${THRUST_INCLUDE_DIR}" "${LIBCUDACXX_INCLUDE_DIR}") if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") + include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") endif(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) include_directories("${CMAKE_BINARY_DIR}/include" @@ -76,7 +76,7 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT # - column benchmarks ----------------------------------------------------------------------------- set(COLUMN_CONCAT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/column/concatenate_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/column/concatenate_benchmark.cpp") ConfigureBench(COLUMN_CONCAT_BENCH "${COLUMN_CONCAT_BENCH_SRC}") @@ -84,7 +84,7 @@ ConfigureBench(COLUMN_CONCAT_BENCH "${COLUMN_CONCAT_BENCH_SRC}") # - gather benchmark ------------------------------------------------------------------------------ set(GATHER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/gather_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/gather_benchmark.cu") ConfigureBench(GATHER_BENCH "${GATHER_BENCH_SRC}") @@ -92,7 +92,7 @@ ConfigureBench(GATHER_BENCH "${GATHER_BENCH_SRC}") # - scatter benchmark ----------------------------------------------------------------------------- set(SCATTER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/scatter_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/scatter_benchmark.cu") ConfigureBench(SCATTER_BENCH "${SCATTER_BENCH_SRC}") @@ -100,7 +100,7 @@ ConfigureBench(SCATTER_BENCH "${SCATTER_BENCH_SRC}") # - contiguous_split benchmark ------------------------------------------------------------------- set(CONTIGUOUS_SPLIT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/contiguous_split_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/contiguous_split_benchmark.cu") ConfigureBench(CONTIGUOUS_SPLIT_BENCH "${CONTIGUOUS_SPLIT_BENCH_SRC}") @@ -108,7 +108,7 @@ ConfigureBench(CONTIGUOUS_SPLIT_BENCH "${CONTIGUOUS_SPLIT_BENCH_SRC}") # - shift benchmark ------------------------------------------------------------------------------- set(SHIFT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/shift_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/shift_benchmark.cu") ConfigureBench(SHIFT_BENCH "${SHIFT_BENCH_SRC}") @@ -116,7 +116,7 @@ ConfigureBench(SHIFT_BENCH "${SHIFT_BENCH_SRC}") # - transpose benchmark --------------------------------------------------------------------------- set(TRANSPOSE_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_benchmark.cu") ConfigureBench(TRANSPOSE_BENCH "${TRANSPOSE_BENCH_SRC}") @@ -124,7 +124,7 @@ ConfigureBench(TRANSPOSE_BENCH "${TRANSPOSE_BENCH_SRC}") # - apply_boolean_mask benchmark ------------------------------------------------------------------ set(APPLY_BOOLEAN_MASK_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/apply_boolean_mask_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/apply_boolean_mask_benchmark.cpp") ConfigureBench(APPLY_BOOLEAN_MASK_BENCH "${APPLY_BOOLEAN_MASK_BENCH_SRC}") @@ -132,7 +132,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH "${APPLY_BOOLEAN_MASK_BENCH_SRC}") # - stream_compaction benchmark ------------------------------------------------------------------- set(STREAM_COMPACTION_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_benchmark.cpp") ConfigureBench(STREAM_COMPACTION_BENCH "${STREAM_COMPACTION_BENCH_SRC}") @@ -140,7 +140,7 @@ ConfigureBench(STREAM_COMPACTION_BENCH "${STREAM_COMPACTION_BENCH_SRC}") # - join benchmark -------------------------------------------------------------------------------- set(JOIN_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/join/join_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/join/join_benchmark.cu") ConfigureBench(JOIN_BENCH "${JOIN_BENCH_SRC}") @@ -148,7 +148,7 @@ ConfigureBench(JOIN_BENCH "${JOIN_BENCH_SRC}") # - iterator benchmark ---------------------------------------------------------------------------- set(ITERATOR_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/iterator/iterator_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/iterator/iterator_benchmark.cu") ConfigureBench(ITERATOR_BENCH "${ITERATOR_BENCH_SRC}") @@ -156,7 +156,7 @@ ConfigureBench(ITERATOR_BENCH "${ITERATOR_BENCH_SRC}") # - search benchmark ------------------------------------------------------------------------------ set(SEARCH_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/search/search_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/search/search_benchmark.cu") ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}") @@ -164,8 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}") # - sort benchmark -------------------------------------------------------------------------------- set(SORT_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp") ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}") @@ -173,7 +173,7 @@ ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}") # - type_dispatcher benchmark --------------------------------------------------------------------- set(TD_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/type_dispatcher/type_dispatcher_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/type_dispatcher/type_dispatcher_benchmark.cu") ConfigureBench(TYPE_DISPATCHER_BENCH "${TD_BENCH_SRC}") @@ -181,10 +181,10 @@ ConfigureBench(TYPE_DISPATCHER_BENCH "${TD_BENCH_SRC}") # - reduction benchmark --------------------------------------------------------------------------- set(REDUCTION_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/anyall_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/dictionary_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/reduce_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/reduction/minmax_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/anyall_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/dictionary_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/reduce_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduction/minmax_benchmark.cpp") ConfigureBench(REDUCTION_BENCH "${REDUCTION_BENCH_SRC}") @@ -192,8 +192,8 @@ ConfigureBench(REDUCTION_BENCH "${REDUCTION_BENCH_SRC}") # - groupby benchmark ----------------------------------------------------------------------------- set(GROUPBY_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_benchmark.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_nth_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_sum_benchmark.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/groupby/group_nth_benchmark.cu") ConfigureBench(GROUPBY_BENCH "${GROUPBY_BENCH_SRC}") @@ -201,7 +201,7 @@ ConfigureBench(GROUPBY_BENCH "${GROUPBY_BENCH_SRC}") # - hashing benchmark ----------------------------------------------------------------------------- set(HASHING_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/hashing/hashing_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/hashing/hashing_benchmark.cpp") ConfigureBench(HASHING_BENCH "${HASHING_BENCH_SRC}") @@ -209,7 +209,7 @@ ConfigureBench(HASHING_BENCH "${HASHING_BENCH_SRC}") # - merge benchmark ------------------------------------------------------------------------------- set(MERGE_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/merge/merge_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/merge/merge_benchmark.cpp") ConfigureBench(MERGE_BENCH "${MERGE_BENCH_SRC}") @@ -217,7 +217,7 @@ ConfigureBench(MERGE_BENCH "${MERGE_BENCH_SRC}") # - null_mask benchmark --------------------------------------------------------------------------- set(NULLMASK_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/null_mask/set_null_mask_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/null_mask/set_null_mask_benchmark.cpp") ConfigureBench(NULLMASK_BENCH "${NULLMASK_BENCH_SRC}") @@ -225,7 +225,7 @@ ConfigureBench(NULLMASK_BENCH "${NULLMASK_BENCH_SRC}") # - parquet writer chunks benchmark --------------------------------------------------------------- set(PARQUET_WRITER_CHUNKS_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_chunks_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_chunks_benchmark.cpp") ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH "${PARQUET_WRITER_CHUNKS_BENCH_SRC}") @@ -233,7 +233,7 @@ ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH "${PARQUET_WRITER_CHUNKS_BENCH_SRC}") # - parquet reader benchmark ---------------------------------------------------------------------- set(PARQUET_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_reader_benchmark.cpp") ConfigureBench(PARQUET_READER_BENCH "${PARQUET_READER_BENCH_SRC}") @@ -241,7 +241,7 @@ ConfigureBench(PARQUET_READER_BENCH "${PARQUET_READER_BENCH_SRC}") # - orc reader benchmark -------------------------------------------------------------------------- set(ORC_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_reader_benchmark.cpp") ConfigureBench(ORC_READER_BENCH "${ORC_READER_BENCH_SRC}") @@ -249,7 +249,7 @@ ConfigureBench(ORC_READER_BENCH "${ORC_READER_BENCH_SRC}") # - csv reader benchmark -------------------------------------------------------------------------- set(CSV_READER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_reader_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_reader_benchmark.cpp") ConfigureBench(CSV_READER_BENCH "${CSV_READER_BENCH_SRC}") @@ -257,7 +257,7 @@ ConfigureBench(CSV_READER_BENCH "${CSV_READER_BENCH_SRC}") # - parquet writer benchmark ---------------------------------------------------------------------- set(PARQUET_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/parquet/parquet_writer_benchmark.cpp") ConfigureBench(PARQUET_WRITER_BENCH "${PARQUET_WRITER_BENCH_SRC}") @@ -265,7 +265,7 @@ ConfigureBench(PARQUET_WRITER_BENCH "${PARQUET_WRITER_BENCH_SRC}") # - orc writer benchmark -------------------------------------------------------------------------- set(ORC_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/orc/orc_writer_benchmark.cpp") ConfigureBench(ORC_WRITER_BENCH "${ORC_WRITER_BENCH_SRC}") @@ -273,7 +273,7 @@ ConfigureBench(ORC_WRITER_BENCH "${ORC_WRITER_BENCH_SRC}") # - csv writer benchmark -------------------------------------------------------------------------- set(CSV_WRITER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_writer_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/io/csv/csv_writer_benchmark.cpp") ConfigureBench(CSV_WRITER_BENCH "${CSV_WRITER_BENCH_SRC}") @@ -281,7 +281,7 @@ ConfigureBench(CSV_WRITER_BENCH "${CSV_WRITER_BENCH_SRC}") # - ast benchmark --------------------------------------------------------------------------------- set(AST_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/ast/transform_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/ast/transform_benchmark.cpp") ConfigureBench(AST_BENCH "${AST_BENCH_SRC}") @@ -289,7 +289,7 @@ ConfigureBench(AST_BENCH "${AST_BENCH_SRC}") # - binaryop benchmark ---------------------------------------------------------------------------- set(BINARYOP_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/binaryop/binaryop_benchmark.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/binaryop/binaryop_benchmark.cu") ConfigureBench(BINARYOP_BENCH "${BINARYOP_BENCH_SRC}") @@ -297,7 +297,7 @@ ConfigureBench(BINARYOP_BENCH "${BINARYOP_BENCH_SRC}") # - subword tokenizer benchmark ------------------------------------------------------------------- set(SUBWORD_TOKENIZER_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/text/subword_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/text/subword_benchmark.cpp") ConfigureBench(SUBWORD_TOKENIZER_BENCH "${SUBWORD_TOKENIZER_BENCH_SRC}") @@ -305,11 +305,11 @@ ConfigureBench(SUBWORD_TOKENIZER_BENCH "${SUBWORD_TOKENIZER_BENCH_SRC}") # - strings benchmark ------------------------------------------------------------------- set(STRINGS_BENCH_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_floats_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_scalar_benchmark.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_floats_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_scalar_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") ConfigureBench(STRINGS_BENCH "${STRINGS_BENCH_SRC}") diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 9585cae5ffb..c4c8e44c336 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -127,8 +127,8 @@ class corresponding_rows_not_equivalent { column_device_view const& lhs, column_device_view const& rhs, size_type index) { if (lhs.is_valid(index) and rhs.is_valid(index)) { - const T x = lhs.element(index); - const T y = rhs.element(index); + T const x = lhs.element(index); + T const y = rhs.element(index); // Must handle inf and nan separately if (std::isinf(x) || std::isinf(y)) { @@ -137,7 +137,7 @@ class corresponding_rows_not_equivalent { return std::isnan(x) ^ std::isnan(y); // comparison of (nan==nan) returns false } else { constexpr int ulp = 4; // ulp = unit of least precision, value taken from google test - const T abs_x_minus_y = std::abs(x - y); + T const abs_x_minus_y = std::abs(x - y); return abs_x_minus_y >= std::numeric_limits::min() && abs_x_minus_y > std::numeric_limits::epsilon() * std::abs(x + y) * ulp; } From 7fadcd0aa4b5f62051bde899b1af6e8d4abec97d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 18:38:10 -0700 Subject: [PATCH 07/19] Some improvement to atof --- cpp/src/strings/convert/convert_floats.cu | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 10b6895a328..14f382f832b 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -88,6 +88,8 @@ __device__ inline FloatType stof(string_view const& d_str) } ++in_ptr; } + if (digits == 0) return sign * static_cast(0); + // check for exponent char int exp_ten = 0; int exp_sign = 1; @@ -108,17 +110,23 @@ __device__ inline FloatType stof(string_view const& d_str) } } } + + int const num_digits = static_cast(log10(digits)) + 1; exp_ten *= exp_sign; exp_ten += exp_off; + exp_ten += num_digits - 1; if (exp_ten > std::numeric_limits::max_exponent10) return sign > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); else if (exp_ten < std::numeric_limits::min_exponent10) return FloatType{0}; + // using exp10() since the pow(10.0,exp_ten) function is // very inaccurate in 10.2: http://nvbugs/2971187 - const FloatType value = static_cast(digits) * exp10(static_cast(exp_ten)); - return (value * sign); + FloatType const base = + sign * static_cast(digits) * exp10(static_cast(1 - num_digits)); + FloatType const exponent = exp10(static_cast(exp_ten)); + return base * exponent; } /** From f3ead3d8eaa842232f40c25494c49a7ff58dba28 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 20:17:58 -0700 Subject: [PATCH 08/19] Use the results generated from std::stof to test our atof implementation --- cpp/tests/strings/floats_tests.cpp | 55 +++++++----------------------- 1 file changed, 12 insertions(+), 43 deletions(-) diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 7738d2347e2..3901ce6c099 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -51,25 +51,10 @@ TEST_F(StringsConvertTest, ToFloats32) h_strings.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - float nanval = std::numeric_limits::quiet_NaN(); - float infval = std::numeric_limits::infinity(); - std::vector h_expected{1234.0, - 0, - -876.0, - 543.2, - -0.12, - 0.25, - -0.002, - 0, - -0.0, - 12000, - nanval, - 0, - 123.0, - 456.0, - -178000.0, - -122.3364486694336, - infval}; + std::vector h_expected; + std::for_each(h_strings.begin(), h_strings.end(), [&](const char* str) { + h_expected.push_back(str ? std::atof(str) : 0); + }); auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT32}); @@ -78,7 +63,7 @@ TEST_F(StringsConvertTest, ToFloats32) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, true); } TEST_F(StringsConvertTest, FromFloats32) @@ -107,7 +92,7 @@ TEST_F(StringsConvertTest, FromFloats32) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, true); } TEST_F(StringsConvertTest, ToFloats64) @@ -135,26 +120,10 @@ TEST_F(StringsConvertTest, ToFloats64) h_strings.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - double nanval = std::numeric_limits::quiet_NaN(); - double infval = std::numeric_limits::infinity(); - std::vector h_expected{1234.0, - 0, - -876.0, - 543.2, - -0.12, - 0.25, - -0.002, - 0, - -0.0, - 1.28e256, - nanval, - 0, - 123.0, - 456.0, - -178000.0, - -122.33644781999999, - infval, - infval}; + std::vector h_expected; + std::for_each(h_strings.begin(), h_strings.end(), [&](const char* str) { + h_expected.push_back(str ? std::atof(str) : 0); + }); auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::to_floats(strings_view, cudf::data_type{cudf::type_id::FLOAT64}); @@ -163,7 +132,7 @@ TEST_F(StringsConvertTest, ToFloats64) h_expected.begin(), h_expected.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, true); } TEST_F(StringsConvertTest, FromFloats64) @@ -192,7 +161,7 @@ TEST_F(StringsConvertTest, FromFloats64) h_expected.end(), thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected, true); } TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat) From 30e98b353a66849d22824e4a69a02ba2ecd2b64a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 18 Feb 2021 20:31:29 -0700 Subject: [PATCH 09/19] Add a simple test case --- cpp/tests/strings/floats_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index 3901ce6c099..b98416d9edd 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -45,7 +45,8 @@ TEST_F(StringsConvertTest, ToFloats32) "456e", "-1.78e+5", "-122.33644782123456789", - "12e+309"}; + "12e+309", + "3.4028236E38"}; cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), From 354e65ea902abdbac627971586dce72896d4ea93 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 19 Feb 2021 09:41:19 -0700 Subject: [PATCH 10/19] Fix convert_floats_benchmark --- .../string/convert_floats_benchmark.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp index 38183fd941f..5cdd9ffee27 100644 --- a/cpp/benchmarks/string/convert_floats_benchmark.cpp +++ b/cpp/benchmarks/string/convert_floats_benchmark.cpp @@ -28,14 +28,15 @@ #include #include +#include namespace { -static constexpr cudf::size_type array_size{1 << 10}; template -static const std::vector& get_float_numbers() +static const std::vector& get_float_numbers(int64_t array_size) { - static std::vector numbers; + static std::unordered_map> number_arrays; + auto& numbers = number_arrays[array_size]; if (numbers.size() == 0) { numbers.reserve(array_size); cudf::test::UniformRandomGenerator rand_gen(std::numeric_limits::min(), @@ -47,12 +48,13 @@ static const std::vector& get_float_numbers() } template -static const std::vector& get_floats_numbers_as_string() +static const std::vector& get_floats_numbers_as_string(int64_t array_size) { - static std::vector numbers_str; + static std::unordered_map> str_arrays; + auto& numbers_str = str_arrays[array_size]; if (numbers_str.size() == 0) { numbers_str.reserve(array_size); - const auto& numbers = get_float_numbers(); + const auto& numbers = get_float_numbers(array_size); std::transform(numbers.begin(), numbers.end(), std::back_inserter(numbers_str), [](auto x) { return std::to_string(x); }); @@ -67,7 +69,7 @@ class StringToFloatNumber : public cudf::benchmark { template void convert_to_float_number(benchmark::State& state) { - const auto& h_strings = get_floats_numbers_as_string(); + const auto& h_strings = get_floats_numbers_as_string(state.range(0)); const auto strings_size = std::accumulate( h_strings.begin(), h_strings.end(), std::size_t{0}, [](std::size_t size, const auto& str) { return size + str.length(); @@ -81,7 +83,7 @@ void convert_to_float_number(benchmark::State& state) volatile auto results = cudf::strings::to_floats(strings_view, cudf::data_type{float_type}); } - state.SetBytesProcessed(state.iterations() * state.range(0) * strings_size); + state.SetBytesProcessed(state.iterations() * strings_size); } class StringFromFloatNumber : public cudf::benchmark { @@ -89,7 +91,7 @@ class StringFromFloatNumber : public cudf::benchmark { template void convert_from_float_number(benchmark::State& state) { - const auto& h_floats = get_float_numbers(); + const auto& h_floats = get_float_numbers(state.range(0)); const auto floats_size = h_floats.size() * sizeof(FloatType); cudf::test::fixed_width_column_wrapper floats(h_floats.begin(), h_floats.end()); @@ -100,7 +102,7 @@ void convert_from_float_number(benchmark::State& state) volatile auto results = cudf::strings::from_floats(floats_view); } - state.SetBytesProcessed(state.iterations() * state.range(0) * floats_size); + state.SetBytesProcessed(state.iterations() * floats_size); } #define CV_TO_FLOATS_BENCHMARK_DEFINE(name, float_type_id) \ @@ -110,7 +112,7 @@ void convert_from_float_number(benchmark::State& state) } \ BENCHMARK_REGISTER_F(StringToFloatNumber, name) \ ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 25) \ + ->Range(1 << 10, 1 << 20) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); @@ -121,7 +123,7 @@ void convert_from_float_number(benchmark::State& state) } \ BENCHMARK_REGISTER_F(StringFromFloatNumber, name) \ ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 25) \ + ->Range(1 << 10, 1 << 20) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); From 8f5a580429e89a09619ff56325c980ae3c9f470b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 19 Feb 2021 19:42:48 -0700 Subject: [PATCH 11/19] Improve accuracy by casting from string to double then to float --- cpp/src/strings/convert/convert_floats.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 14f382f832b..74ad51e930c 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -141,7 +141,7 @@ struct string_to_float_fn { __device__ FloatType operator()(size_type idx) { if (strings_column.is_null(idx)) return static_cast(0); - return stof(strings_column.element(idx)); + return static_cast(stof(strings_column.element(idx))); } }; From a112a1be2e3ed72f39299f492a8498246202b86a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 22 Feb 2021 08:04:11 -0700 Subject: [PATCH 12/19] Change the template function `stof` to a regular function `stod`. --- cpp/src/strings/convert/convert_floats.cu | 31 +++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 74ad51e930c..10ee46af985 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -49,17 +49,16 @@ namespace { * * This function will also handle scientific notation format. */ -template -__device__ inline FloatType stof(string_view const& d_str) +__device__ inline double stod(string_view const& d_str) { const char* in_ptr = d_str.data(); const char* end = in_ptr + d_str.size_bytes(); if (end == in_ptr) return 0.0; // special strings - if (d_str.compare("NaN", 3) == 0) return std::numeric_limits::quiet_NaN(); - if (d_str.compare("Inf", 3) == 0) return std::numeric_limits::infinity(); - if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits::infinity(); - FloatType sign{1.0}; + if (d_str.compare("NaN", 3) == 0) return std::numeric_limits::quiet_NaN(); + if (d_str.compare("Inf", 3) == 0) return std::numeric_limits::infinity(); + if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits::infinity(); + double sign{1.0}; if (*in_ptr == '-' || *in_ptr == '+') { sign = (*in_ptr == '-' ? -1 : 1); ++in_ptr; @@ -88,7 +87,7 @@ __device__ inline FloatType stof(string_view const& d_str) } ++in_ptr; } - if (digits == 0) return sign * static_cast(0); + if (digits == 0) return sign * static_cast(0); // check for exponent char int exp_ten = 0; @@ -115,17 +114,17 @@ __device__ inline FloatType stof(string_view const& d_str) exp_ten *= exp_sign; exp_ten += exp_off; exp_ten += num_digits - 1; - if (exp_ten > std::numeric_limits::max_exponent10) - return sign > 0 ? std::numeric_limits::infinity() - : -std::numeric_limits::infinity(); - else if (exp_ten < std::numeric_limits::min_exponent10) - return FloatType{0}; + if (exp_ten > std::numeric_limits::max_exponent10) + return sign > 0 ? std::numeric_limits::infinity() + : -std::numeric_limits::infinity(); + else if (exp_ten < std::numeric_limits::min_exponent10) + return double{0}; // using exp10() since the pow(10.0,exp_ten) function is // very inaccurate in 10.2: http://nvbugs/2971187 - FloatType const base = - sign * static_cast(digits) * exp10(static_cast(1 - num_digits)); - FloatType const exponent = exp10(static_cast(exp_ten)); + double const base = + sign * static_cast(digits) * exp10(static_cast(1 - num_digits)); + double const exponent = exp10(static_cast(exp_ten)); return base * exponent; } @@ -141,7 +140,7 @@ struct string_to_float_fn { __device__ FloatType operator()(size_type idx) { if (strings_column.is_null(idx)) return static_cast(0); - return static_cast(stof(strings_column.element(idx))); + return static_cast(stod(strings_column.element(idx))); } }; From f8077a7aaa08950db5cbde5553a4c4af641e2728 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 22 Feb 2021 08:14:14 -0700 Subject: [PATCH 13/19] Re-instating important comment --- cpp/src/strings/convert/convert_floats.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 10ee46af985..6d45de2e090 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -140,6 +140,8 @@ struct string_to_float_fn { __device__ FloatType operator()(size_type idx) { if (strings_column.is_null(idx)) return static_cast(0); + // The cast to FloatType will create predictable results for floats that are larger than the + // FloatType can hold return static_cast(stod(strings_column.element(idx))); } }; From 1238dba661d736fa17a93cac829edf6bd2ac8d9e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 22 Feb 2021 08:37:21 -0700 Subject: [PATCH 14/19] Minor change in handling nan in typed_element_not_equivalent struct --- cpp/tests/utilities/column_utilities.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index c4c8e44c336..7a775056b3f 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -134,7 +134,7 @@ class corresponding_rows_not_equivalent { if (std::isinf(x) || std::isinf(y)) { return x != y; // comparison of (inf==inf) returns true } else if (std::isnan(x) || std::isnan(y)) { - return std::isnan(x) ^ std::isnan(y); // comparison of (nan==nan) returns false + return std::isnan(x) != std::isnan(y); // comparison of (nan==nan) returns false } else { constexpr int ulp = 4; // ulp = unit of least precision, value taken from google test T const abs_x_minus_y = std::abs(x - y); From 4aaa83f1428d601d8dea8bc3c27ebcd5a9450260 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 22 Feb 2021 08:49:27 -0700 Subject: [PATCH 15/19] Simplify data generation and change ranges for the benchmarks --- .../string/convert_floats_benchmark.cpp | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp index 5cdd9ffee27..870dba81dd4 100644 --- a/cpp/benchmarks/string/convert_floats_benchmark.cpp +++ b/cpp/benchmarks/string/convert_floats_benchmark.cpp @@ -32,6 +32,8 @@ namespace { +// For each array_size, this function is called twice from both StringToFloatNumber and +// StringFromFloatNumber classes. Thus, the results are cached for reuse. template static const std::vector& get_float_numbers(int64_t array_size) { @@ -48,17 +50,12 @@ static const std::vector& get_float_numbers(int64_t array_size) } template -static const std::vector& get_floats_numbers_as_string(int64_t array_size) +static std::vector get_floats_numbers_as_string(int64_t array_size) { - static std::unordered_map> str_arrays; - auto& numbers_str = str_arrays[array_size]; - if (numbers_str.size() == 0) { - numbers_str.reserve(array_size); - const auto& numbers = get_float_numbers(array_size); - std::transform(numbers.begin(), numbers.end(), std::back_inserter(numbers_str), [](auto x) { - return std::to_string(x); - }); - } + std::vector numbers_str(array_size); + const auto& numbers = get_float_numbers(array_size); + std::transform( + numbers.begin(), numbers.end(), numbers_str.begin(), [](auto x) { return std::to_string(x); }); return numbers_str; } @@ -111,8 +108,8 @@ void convert_from_float_number(benchmark::State& state) convert_to_float_number(state); \ } \ BENCHMARK_REGISTER_F(StringToFloatNumber, name) \ - ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 20) \ + ->RangeMultiplier(1 << 2) \ + ->Range(1 << 10, 1 << 17) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); @@ -122,8 +119,8 @@ void convert_from_float_number(benchmark::State& state) convert_from_float_number(state); \ } \ BENCHMARK_REGISTER_F(StringFromFloatNumber, name) \ - ->RangeMultiplier(1 << 5) \ - ->Range(1 << 10, 1 << 20) \ + ->RangeMultiplier(1 << 2) \ + ->Range(1 << 10, 1 << 17) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); From 3a7c52eccafab7a7250682aeeb0c714c3629d5ac Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 23 Feb 2021 07:20:33 -0700 Subject: [PATCH 16/19] Update header format for cpp/benchmarks/string/convert_floats_benchmark.cpp Co-authored-by: David <45795991+davidwendt@users.noreply.github.com> --- cpp/benchmarks/string/convert_floats_benchmark.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp index 870dba81dd4..861aa584754 100644 --- a/cpp/benchmarks/string/convert_floats_benchmark.cpp +++ b/cpp/benchmarks/string/convert_floats_benchmark.cpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" +#include +#include #include From 4fbd4f61a575fc3ade017cfe617c6b563367a316 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Mar 2021 11:09:13 -0700 Subject: [PATCH 17/19] Add a comment to the stod function --- cpp/src/strings/convert/convert_floats.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 6d45de2e090..90dd0f4f6fc 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -63,6 +63,8 @@ __device__ inline double stod(string_view const& d_str) sign = (*in_ptr == '-' ? -1 : 1); ++in_ptr; } + + // Parse and store the mantissa as much as we can, until we are about to exceed the limit of uint64_t constexpr uint64_t max_holding = (std::numeric_limits::max() - 9L) / 10L; uint64_t digits = 0; int exp_off = 0; From 0b8633bd2295957cd31dbc29936e1d14ed5fcbed Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Mar 2021 11:18:36 -0700 Subject: [PATCH 18/19] Generate random float numbers by calling to create_random_table, and generate columns of strings by calling to cudf::strings::to_floats --- .../string/convert_floats_benchmark.cpp | 79 ++++++++----------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp index 861aa584754..03d3d4a9439 100644 --- a/cpp/benchmarks/string/convert_floats_benchmark.cpp +++ b/cpp/benchmarks/string/convert_floats_benchmark.cpp @@ -18,88 +18,77 @@ #include #include +#include -#include #include #include #include #include -#include -#include -#include - namespace { - -// For each array_size, this function is called twice from both StringToFloatNumber and -// StringFromFloatNumber classes. Thus, the results are cached for reuse. template -static const std::vector& get_float_numbers(int64_t array_size) +std::unique_ptr get_floats_column(int64_t array_size) { - static std::unordered_map> number_arrays; - auto& numbers = number_arrays[array_size]; - if (numbers.size() == 0) { - numbers.reserve(array_size); - cudf::test::UniformRandomGenerator rand_gen(std::numeric_limits::min(), - std::numeric_limits::max()); - std::generate_n( - std::back_inserter(numbers), array_size, [&rand_gen]() { return rand_gen.generate(); }); + std::unique_ptr tbl; + if (sizeof(FloatType) == sizeof(float)) { + tbl = create_random_table( + {cudf::type_id::FLOAT32}, 1, row_count{static_cast(array_size)}); + } else { + tbl = create_random_table( + {cudf::type_id::FLOAT64}, 1, row_count{static_cast(array_size)}); } - return numbers; + return std::move(tbl->release().front()); } -template -static std::vector get_floats_numbers_as_string(int64_t array_size) +std::unique_ptr get_floats_string_column(int64_t array_size) { - std::vector numbers_str(array_size); - const auto& numbers = get_float_numbers(array_size); - std::transform( - numbers.begin(), numbers.end(), numbers_str.begin(), [](auto x) { return std::to_string(x); }); - return numbers_str; + const auto floats = get_floats_column(array_size); + return cudf::strings::from_floats(floats->view()); } - } // anonymous namespace class StringToFloatNumber : public cudf::benchmark { }; + template void convert_to_float_number(benchmark::State& state) { - const auto& h_strings = get_floats_numbers_as_string(state.range(0)); - const auto strings_size = std::accumulate( - h_strings.begin(), h_strings.end(), std::size_t{0}, [](std::size_t size, const auto& str) { - return size + str.length(); - }); - - cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); - const auto strings_view = cudf::strings_column_view(strings); + const auto array_size = state.range(0); + const auto strings_col = get_floats_string_column(array_size); + const auto strings_view = cudf::strings_column_view(strings_col->view()); for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 + cuda_event_timer raii(state, true); volatile auto results = cudf::strings::to_floats(strings_view, cudf::data_type{float_type}); } - state.SetBytesProcessed(state.iterations() * strings_size); + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed( + state.iterations() * + (strings_view.chars_size() + array_size * cudf::size_of(cudf::data_type{float_type}))); } class StringFromFloatNumber : public cudf::benchmark { }; + template void convert_from_float_number(benchmark::State& state) { - const auto& h_floats = get_float_numbers(state.range(0)); - const auto floats_size = h_floats.size() * sizeof(FloatType); - - cudf::test::fixed_width_column_wrapper floats(h_floats.begin(), h_floats.end()); - const auto floats_view = cudf::column_view(floats); + const auto array_size = state.range(0); + const auto floats = get_floats_column(array_size); + const auto floats_view = floats->view(); + std::unique_ptr results = nullptr; for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - volatile auto results = cudf::strings::from_floats(floats_view); + results = cudf::strings::from_floats(floats_view); } - state.SetBytesProcessed(state.iterations() * floats_size); + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed( + state.iterations() * + (cudf::strings_column_view(results->view()).chars_size() + array_size * sizeof(FloatType))); } #define CV_TO_FLOATS_BENCHMARK_DEFINE(name, float_type_id) \ @@ -108,7 +97,7 @@ void convert_from_float_number(benchmark::State& state) convert_to_float_number(state); \ } \ BENCHMARK_REGISTER_F(StringToFloatNumber, name) \ - ->RangeMultiplier(1 << 2) \ + ->RangeMultiplier(4) \ ->Range(1 << 10, 1 << 17) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); @@ -119,7 +108,7 @@ void convert_from_float_number(benchmark::State& state) convert_from_float_number(state); \ } \ BENCHMARK_REGISTER_F(StringFromFloatNumber, name) \ - ->RangeMultiplier(1 << 2) \ + ->RangeMultiplier(4) \ ->Range(1 << 10, 1 << 17) \ ->UseManualTime() \ ->Unit(benchmark::kMicrosecond); From 886fc57f539d1486ced1287d88e30720a7d92357 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Mar 2021 15:15:34 -0700 Subject: [PATCH 19/19] Fix format check --- cpp/src/strings/convert/convert_floats.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 90dd0f4f6fc..2bf65976986 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -64,7 +64,8 @@ __device__ inline double stod(string_view const& d_str) ++in_ptr; } - // Parse and store the mantissa as much as we can, until we are about to exceed the limit of uint64_t + // Parse and store the mantissa as much as we can, + // until we are about to exceed the limit of uint64_t constexpr uint64_t max_holding = (std::numeric_limits::max() - 9L) / 10L; uint64_t digits = 0; int exp_off = 0;