Skip to content

Commit

Permalink
ARROW-10328: [C++] Vendor fast_float number parsing library
Browse files Browse the repository at this point in the history
This library is 2x to 3x faster for parsing strings to binary floating-point numbers.

Closes #8494 from pitrou/ARROW-10328-fast-float

Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
pitrou committed Oct 21, 2020
1 parent bb4f2a0 commit 16412a1
Show file tree
Hide file tree
Showing 10 changed files with 1,996 additions and 56 deletions.
8 changes: 8 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2223,3 +2223,11 @@ exception of some code pulled in from other repositories (such as
public domain, released using the CC0 1.0 Universal dedication (*).

(*) https://creativecommons.org/publicdomain/zero/1.0/legalcode

--------------------------------------------------------------------------------

The files in cpp/src/arrow/vendored/fast_float/ contain code from

https://github.com/lemire/fast_float

which is made available under the Apache License 2.0.
61 changes: 5 additions & 56 deletions cpp/src/arrow/util/value_parsing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,70 +20,19 @@
#include <string>
#include <utility>

#include "arrow/util/double_conversion.h"
#include "arrow/vendored/fast_float/fast_float.h"

namespace arrow {
namespace internal {

namespace {

struct StringToFloatConverterImpl {
StringToFloatConverterImpl()
: main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
"nan") {}

// NOTE: This is only supported in double-conversion 3.1+
static constexpr int flags_ =
util::double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;

// Two unlikely values to signal a parsing error
static constexpr double main_junk_value_ = 0.7066424364107089;
static constexpr double fallback_junk_value_ = 0.40088499148279166;

util::double_conversion::StringToDoubleConverter main_converter_;
util::double_conversion::StringToDoubleConverter fallback_converter_;
};

static const StringToFloatConverterImpl g_string_to_float;

// Older clang versions need an explicit implementation definition.
constexpr double StringToFloatConverterImpl::main_junk_value_;
constexpr double StringToFloatConverterImpl::fallback_junk_value_;

} // namespace

bool StringToFloat(const char* s, size_t length, float* out) {
int processed_length;
float v;
v = g_string_to_float.main_converter_.StringToFloat(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == static_cast<float>(g_string_to_float.main_junk_value_))) {
v = g_string_to_float.fallback_converter_.StringToFloat(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v ==
static_cast<float>(g_string_to_float.fallback_junk_value_))) {
return false;
}
}
*out = v;
return true;
const auto res = ::arrow_vendored::fast_float::from_chars(s, s + length, *out);
return res.ec == std::errc() && res.ptr == s + length;
}

bool StringToFloat(const char* s, size_t length, double* out) {
int processed_length;
double v;
v = g_string_to_float.main_converter_.StringToDouble(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == g_string_to_float.main_junk_value_)) {
v = g_string_to_float.fallback_converter_.StringToDouble(s, static_cast<int>(length),
&processed_length);
if (ARROW_PREDICT_FALSE(v == g_string_to_float.fallback_junk_value_)) {
return false;
}
}
*out = v;
return true;
const auto res = ::arrow_vendored::fast_float::from_chars(s, s + length, *out);
return res.ec == std::errc() && res.ptr == s + length;
}

// ----------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/vendored/fast_float/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
The files in this directory are vendored from fast_float
git changeset `dc46ad4c606dc35cb63c947496a18ef8ab1e0f44`.

See https://github.com/lemire/fast_float

Changes:
- fixed include paths
- disabled unused `print()` function
- enclosed in `arrow_vendored` namespace.
Loading

0 comments on commit 16412a1

Please sign in to comment.