-
Notifications
You must be signed in to change notification settings - Fork 429
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add micro benchmarking project, setup number parsing benchmark
relating to previous work #422 Results are matching what was observed at the time ^^ was being worked on std::from_chars is included as the target to beat, but since only MSVC has it for floating point it's not hugely useful yet uniform real distribution is probably a horrible choice, and it might be good to randomise the number of sf in each string also (currently the y all end up at max length) Run on (4 X 3500 MHz CPU s) CPU Caches: L1 Data 32K (x4) L1 Instruction 32K (x4) L2 Unified 262K (x4) L3 Unified 6291K (x1) ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- RandFloats/double_from_string_sstream 804 ns 820 ns 896000 RandFloats/double_from_string_strtod 163 ns 162 ns 5973333 RandFloats/double_from_string_strtod_fixed 175 ns 172 ns 5352107 RandFloats/double_from_string_strtod_fixed_const_ref 150 ns 152 ns 5352107 RandFloats/double_from_string_std_from_chars 87.1 ns 88.3 ns 9557333 RandFloatsComma/double_from_string_strtod_fixed_comma_ref 172 ns 173 ns 5146257 RandFloatsComma/double_from_string_strtod_fixed_comma_const_ref 180 ns 175 ns 5352107
- Loading branch information
Showing
4 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# FetchContent added in cmake v3.11 | ||
# https://cmake.org/cmake/help/v3.11/module/FetchContent.html | ||
# this file is behind a feature flag (XLNT_MICROBENCH_ENABLED) so the primary build is not affected | ||
cmake_minimum_required(VERSION 3.11) | ||
project(xlnt_ubench) | ||
|
||
# acquire google benchmark dependency | ||
# disable generation of the various test projects | ||
set(BENCHMARK_ENABLE_TESTING OFF) | ||
# gtest not required | ||
set(BENCHMARK_ENABLE_GTEST_TESTS OFF) | ||
|
||
include(FetchContent) | ||
FetchContent_Declare( | ||
googlebenchmark | ||
GIT_REPOSITORY https://github.com/google/benchmark | ||
GIT_TAG v1.5.0 | ||
) | ||
# download if not already present | ||
FetchContent_GetProperties(googlebenchmark) | ||
if(NOT googlebenchmark_POPULATED) | ||
FetchContent_Populate(googlebenchmark) | ||
add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) | ||
endif() | ||
# equivalent of add_subdirectory, now available for use | ||
FetchContent_MakeAvailable(googlebenchmark) | ||
|
||
|
||
add_executable(xlnt_ubench) | ||
target_sources(xlnt_ubench | ||
PRIVATE | ||
string_to_double.cpp | ||
double_to_string.cpp | ||
) | ||
target_link_libraries(xlnt_ubench benchmark_main xlnt) | ||
target_compile_features(xlnt_ubench PRIVATE cxx_std_17) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#include "benchmark/benchmark.h" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
// A core part of the xlsx parsing routine is taking strings from the xml parser and parsing these to a double | ||
// this has a few requirements | ||
// - expect numbers in the form 1234.56 (i.e. no thousands seperator, '.' used for the decimal seperator) | ||
// - handles atleast 15 significant figures (excel only serialises numbers up to 15sf) | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <locale> | ||
#include <random> | ||
|
||
// setup a large quantity of random doubles as strings | ||
template <bool Decimal_Locale = true> | ||
class RandomFloats : public benchmark::Fixture | ||
{ | ||
static constexpr size_t Number_of_Elements = 1 << 20; | ||
static_assert(Number_of_Elements > 1'000'000, "ensure a decent set of random values is generated"); | ||
|
||
std::vector<std::string> inputs; | ||
|
||
size_t index = 0; | ||
const char *locale_str; | ||
|
||
public: | ||
void SetUp(const ::benchmark::State &state) | ||
{ | ||
if (Decimal_Locale) | ||
{ | ||
locale_str = setlocale(LC_ALL, "C"); | ||
} | ||
else | ||
{ | ||
locale_str = setlocale(LC_ALL, "de-DE"); | ||
} | ||
std::random_device rd; // obtain a seed for the random number engine | ||
std::mt19937 gen(rd()); | ||
// doing full range is stupid (<double>::min/max()...), it just ends up generating very large numbers | ||
// uniform is probably not the best distribution to use here, but it will do for now | ||
std::uniform_real_distribution<double> dis(-1'000, 1'000); | ||
// generate a large quantity of doubles to deserialise | ||
inputs.reserve(Number_of_Elements); | ||
for (int i = 0; i < Number_of_Elements; ++i) | ||
{ | ||
double d = dis(gen); | ||
char buf[16]; | ||
snprintf(buf, 16, "%.15f", d); | ||
inputs.push_back(std::string(buf)); | ||
} | ||
} | ||
|
||
void TearDown(const ::benchmark::State &state) | ||
{ | ||
// restore locale | ||
setlocale(LC_ALL, locale_str); | ||
// gbench is keeping the fixtures alive somewhere, need to clear the data... | ||
inputs = std::vector<std::string>{}; | ||
} | ||
|
||
std::string &get_rand() | ||
{ | ||
return inputs[++index & Number_of_Elements]; | ||
} | ||
}; | ||
|
||
// method used by xlsx_consumer.cpp in commit - ba01de47a7d430764c20ec9ac9600eec0eb38bcf | ||
// std::istringstream with the locale set to "C" | ||
#include <sstream> | ||
struct number_converter | ||
{ | ||
number_converter() | ||
{ | ||
stream.imbue(std::locale("C")); | ||
} | ||
|
||
double stold(const std::string &s) | ||
{ | ||
stream.str(s); | ||
stream.clear(); | ||
stream >> result; | ||
return result; | ||
} | ||
|
||
std::istringstream stream; | ||
double result; | ||
}; | ||
|
||
using RandFloats = RandomFloats<true>; | ||
|
||
BENCHMARK_F(RandFloats, double_from_string_sstream) | ||
(benchmark::State &state) | ||
{ | ||
number_converter converter; | ||
while (state.KeepRunning()) | ||
{ | ||
benchmark::DoNotOptimize( | ||
converter.stold(get_rand())); | ||
} | ||
} | ||
|
||
// using strotod | ||
// https://en.cppreference.com/w/cpp/string/byte/strtof | ||
// this naive usage is broken in the face of locales (fails condition 1) | ||
#include <cstdlib> | ||
BENCHMARK_F(RandFloats, double_from_string_strtod) | ||
(benchmark::State &state) | ||
{ | ||
while (state.KeepRunning()) | ||
{ | ||
benchmark::DoNotOptimize( | ||
strtod(get_rand().c_str(), nullptr)); | ||
} | ||
} | ||
|
||
// to resolve the locale issue with strtod, a little preprocessing of the input is required | ||
struct number_converter_mk2 | ||
{ | ||
explicit number_converter_mk2() | ||
: should_convert_to_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',') | ||
{ | ||
} | ||
|
||
double stold(std::string &s) const noexcept | ||
{ | ||
assert(!s.empty()); | ||
if (should_convert_to_comma) | ||
{ | ||
auto decimal_pt = std::find(s.begin(), s.end(), '.'); | ||
if (decimal_pt != s.end()) | ||
{ | ||
*decimal_pt = ','; | ||
} | ||
} | ||
return strtod(s.c_str(), nullptr); | ||
} | ||
|
||
double stold(const std::string &s) const | ||
{ | ||
assert(!s.empty()); | ||
if (!should_convert_to_comma) | ||
{ | ||
return strtod(s.c_str(), nullptr); | ||
} | ||
std::string copy(s); | ||
auto decimal_pt = std::find(copy.begin(), copy.end(), '.'); | ||
if (decimal_pt != copy.end()) | ||
{ | ||
*decimal_pt = ','; | ||
} | ||
return strtod(copy.c_str(), nullptr); | ||
} | ||
|
||
private: | ||
bool should_convert_to_comma = false; | ||
}; | ||
|
||
BENCHMARK_F(RandFloats, double_from_string_strtod_fixed) | ||
(benchmark::State &state) | ||
{ | ||
number_converter_mk2 converter; | ||
while (state.KeepRunning()) | ||
{ | ||
benchmark::DoNotOptimize( | ||
converter.stold(get_rand())); | ||
} | ||
} | ||
|
||
BENCHMARK_F(RandFloats, double_from_string_strtod_fixed_const_ref) | ||
(benchmark::State &state) | ||
{ | ||
number_converter_mk2 converter; | ||
while (state.KeepRunning()) | ||
{ | ||
const std::string &inp = get_rand(); | ||
benchmark::DoNotOptimize( | ||
converter.stold(inp)); | ||
} | ||
} | ||
|
||
// locale names are different between OS's, and std::from_chars is only complete in MSVC | ||
#ifdef _MSC_VER | ||
|
||
#include <charconv> | ||
BENCHMARK_F(RandFloats, double_from_string_std_from_chars) | ||
(benchmark::State &state) | ||
{ | ||
while (state.KeepRunning()) | ||
{ | ||
const std::string &input = get_rand(); | ||
double output; | ||
benchmark::DoNotOptimize( | ||
std::from_chars(input.data(), input.data() + input.size(), output)); | ||
} | ||
} | ||
|
||
// not using the standard "C" locale with '.' seperator | ||
// german locale uses ',' as the seperator | ||
using RandFloatsComma = RandomFloats<false>; | ||
BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_ref) | ||
(benchmark::State &state) | ||
{ | ||
number_converter_mk2 converter; | ||
while (state.KeepRunning()) | ||
{ | ||
benchmark::DoNotOptimize( | ||
converter.stold(get_rand())); | ||
} | ||
} | ||
|
||
BENCHMARK_F(RandFloatsComma, double_from_string_strtod_fixed_comma_const_ref) | ||
(benchmark::State &state) | ||
{ | ||
number_converter_mk2 converter; | ||
while (state.KeepRunning()) | ||
{ | ||
const std::string &inp = get_rand(); | ||
benchmark::DoNotOptimize( | ||
converter.stold(inp)); | ||
} | ||
} | ||
|
||
#endif |