From c55ada9659c140104e3366e67024a39f4376ce7b Mon Sep 17 00:00:00 2001 From: Billy O'Neal Date: Wed, 1 Aug 2018 16:39:58 -0700 Subject: [PATCH] Add branchless is_alnum borrowed from MSVC++'s std::regex' _Is_word; should be about 5x faster. (#823) The _Is_word change resulted in the following results in microbenchmarks; the previous is_alnum looks like branching_ranges. .\word_character_test.exe 08/01/18 16:33:03 Running .\word_character_test.exe Run on (12 X 2904 MHz CPU s) CPU Caches: L1 Data 32K (x6) L1 Instruction 32K (x6) L2 Unified 262K (x6) L3 Unified 12582K (x1) -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- strchr_search 19426572900 ns 19421875000 ns 1 branching_ranges 7582129000 ns 7578125000 ns 1 branching_search 6592977800 ns 6593750000 ns 1 table_index 1091321300 ns 1078125000 ns 1 --- Release/include/cpprest/asyncrt_utils.h | 43 ++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/Release/include/cpprest/asyncrt_utils.h b/Release/include/cpprest/asyncrt_utils.h index 68aa738be0..6abcc9fc34 100644 --- a/Release/include/cpprest/asyncrt_utils.h +++ b/Release/include/cpprest/asyncrt_utils.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "pplx/pplxtasks.h" #include "cpprest/details/basic_types.h" @@ -356,11 +357,45 @@ namespace details /// Our own implementation of alpha numeric instead of std::isalnum to avoid /// taking global lock for performance reasons. /// - inline bool __cdecl is_alnum(char ch) + inline bool __cdecl is_alnum(const unsigned char uch) noexcept + { // test if uch is an alnum character + // special casing char to avoid branches + static constexpr bool is_alnum_table[UCHAR_MAX + 1] = + { + /* X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + /* 0X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 1X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 2X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 3X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0-9 */ + /* 4X */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A-Z */ + /* 5X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + /* 6X */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* a-z */ + /* 7X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 + /* non-ASCII values initialized to 0 */ + }; + return (is_alnum_table[uch]); + } + + /// + /// Our own implementation of alpha numeric instead of std::isalnum to avoid + /// taking global lock for performance reasons. + /// + inline bool __cdecl is_alnum(const char ch) noexcept + { + return (is_alnum(static_cast(ch))); + } + + /// + /// Our own implementation of alpha numeric instead of std::isalnum to avoid + /// taking global lock for performance reasons. + /// + template + inline bool __cdecl is_alnum(Elem ch) noexcept { - return (ch >= '0' && ch <= '9') - || (ch >= 'A' && ch <= 'Z') - || (ch >= 'a' && ch <= 'z'); + // assumes 'x' == L'x' for the ASCII range + typedef typename std::make_unsigned::type UElem; + const auto uch = static_cast(ch); + return (uch <= static_cast('z') && is_alnum(static_cast(uch))); } ///