Make conversion of Decimal128 to/from string work for whole range (#4548

)
realm · Apr 6, 2021 · 8cda803 · 8cda803
1 parent 0f532f2
commit 8cda803
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 238 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
   ([#4166](https://github.com/realm/realm-core/pull/4166))
 * UUID allowed as partition value ([#4500](https://github.com/realm/realm-core/issues/4500))
 * The error message when the intial steps of opening a Realm file fails is now more descriptive.
+* Make conversion of Decimal128 to/from string work for numbers with more than 19 significant digits. ([#4548](https://github.com/realm/realm-core/issues/4548))
 
 ### Fixed
 * <How to hit and notice issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)

diff --git a/Package.swift b/Package.swift
@@ -93,6 +93,8 @@ let package = Package(
                 "bid128_div.c",
                 "bid128_add.c",
                 "bid128_fma.c",
+                "bid128_string.c",
+                "bid128_2_str_tables.c",
                 "bid64_to_bid128.c",
                 "bid_convert_data.c",
                 "bid_decimal_data.c",

diff --git a/src/external/IntelRDFPMathLib20U2/CMakeLists.txt b/src/external/IntelRDFPMathLib20U2/CMakeLists.txt
@@ -5,6 +5,8 @@ LIBRARY/src/bid128_mul.c
 LIBRARY/src/bid128_div.c
 LIBRARY/src/bid128_add.c
 LIBRARY/src/bid128_fma.c
+LIBRARY/src/bid128_string.c
+LIBRARY/src/bid128_2_str_tables.c
 LIBRARY/src/bid64_to_bid128.c
 LIBRARY/src/bid_convert_data.c
 LIBRARY/src/bid_decimal_data.c

diff --git a/src/realm/decimal128.cpp b/src/realm/decimal128.cpp
@@ -28,210 +28,10 @@
 
 namespace {
 constexpr int DECIMAL_EXPONENT_BIAS_128 = 6176;
-constexpr int MAX_STRING_DIGITS = 19;
 } // namespace
 
 namespace realm {
 
-// This is a cut down version of bid128_from_string() from the IntelRDFPMathLib20U2 library.
-// If we can live with only 19 significant digits, we can avoid a lot of complex code
-// as the significant can be stored in w[0] only.
-Decimal128::ParseError Decimal128::from_string(const char* ps) noexcept
-{
-    m_value.w[0] = 0;
-    // if null string, return NaN
-    if (!ps) {
-        m_value.w[1] = 0x7c00000000000000ull;
-        return ParseError::Invalid;
-    }
-    // eliminate leading white space
-    while ((*ps == ' ') || (*ps == '\t'))
-        ps++;
-
-    // c gets first character
-    char c = *ps;
-
-    // set up sign_x to be OR'ed with the upper word later
-    uint64_t sign_x = (c == '-') ? 0x8000000000000000ull : 0;
-
-    // go to next character if leading sign
-    if (c == '-' || c == '+')
-        ps++;
-
-    c = *ps;
-
-    if (tolower(c) == 'i') {
-        // Check for infinity
-        std::string inf = ps;
-        for (auto& chr : inf)
-            chr = tolower(chr);
-        if (inf == "inf" || inf == "infinity") {
-            m_value.w[1] = 0x7800000000000000ull | sign_x;
-            return ParseError::None;
-        }
-    }
-
-    // if c isn't a decimal point or a decimal digit, return NaN
-    if (!(c == '.' || (c >= '0' && c <= '9'))) {
-        m_value.w[1] = 0x7c00000000000000ull | sign_x;
-        return ParseError::Invalid;
-    }
-
-    bool rdx_pt_enc = false;
-    if (c == '.') {
-        rdx_pt_enc = true;
-        ps++;
-    }
-
-    // detect zero (and eliminate/ignore leading zeros)
-    unsigned right_radix_leading_zeros = 0;
-    if (*(ps) == '0') {
-
-        // if all numbers are zeros (with possibly 1 radix point, the number is zero
-        // should catch cases such as: 000.0
-        while (*ps == '0') {
-
-            ps++;
-
-            // for numbers such as 0.0000000000000000000000000000000000001001,
-            // we want to count the leading zeros
-            if (rdx_pt_enc) {
-                right_radix_leading_zeros++;
-            }
-            // if this character is a radix point, make sure we haven't already
-            // encountered one
-            if (*(ps) == '.') {
-                if (!rdx_pt_enc) {
-                    rdx_pt_enc = true;
-                    // if this is the first radix point, and the next character is NULL,
-                    // we have a zero
-                    if (!*(ps + 1)) {
-                        uint64_t tmp = right_radix_leading_zeros;
-                        m_value.w[1] = (0x3040000000000000ull - (tmp << 49)) | sign_x;
-                        return ParseError::None;
-                    }
-                    ps = ps + 1;
-                }
-                else {
-                    // if 2 radix points, return NaN
-                    m_value.w[1] = 0x7c00000000000000ull | sign_x;
-                    return ParseError::Invalid;
-                }
-            }
-            else if (!*(ps)) {
-                if (right_radix_leading_zeros > 6176)
-                    right_radix_leading_zeros = 6176;
-                uint64_t tmp = right_radix_leading_zeros;
-                m_value.w[1] = (0x3040000000000000ull - (tmp << 49)) | sign_x;
-                return ParseError::None;
-            }
-        }
-    }
-
-    c = *ps;
-
-    // initialize local variables
-    char buffer[MAX_STRING_DIGITS];
-    int ndigits_before = 0;
-    int ndigits_total = 0;
-    int sgn_exp = 0;
-    // pstart_coefficient = ps;
-
-    if (!rdx_pt_enc) {
-        // investigate string (before radix point)
-        while (c >= '0' && c <= '9') {
-            if (ndigits_before == MAX_STRING_DIGITS) {
-                return ParseError::TooLongBeforeRadix;
-            }
-            buffer[ndigits_before] = c;
-            ps++;
-            c = *ps;
-            ndigits_before++;
-        }
-
-        ndigits_total = ndigits_before;
-        if (c == '.') {
-            ps++;
-        }
-    }
-
-    if ((c = *ps)) {
-        // investigate string (after radix point)
-        while (c >= '0' && c <= '9') {
-            if (ndigits_total == MAX_STRING_DIGITS) {
-                return ParseError::TooLong;
-            }
-            buffer[ndigits_total] = c;
-            ps++;
-            c = *ps;
-            ndigits_total++;
-        }
-    }
-    int ndigits_after = ndigits_total - ndigits_before;
-
-    // get exponent
-    int dec_expon = 0;
-    if (c) {
-        if (c != 'e' && c != 'E') {
-            // return NaN
-            m_value.w[1] = 0x7c00000000000000ull;
-            return ParseError::Invalid;
-        }
-        ps++;
-        c = *ps;
-        auto c1 = ps[1];
-
-        // Either the next character must be a digit OR it must be either '-' or '+' AND the following
-        // character must be a digit.
-        if (!((c >= '0' && c <= '9') || ((c == '+' || c == '-') && c1 >= '0' && c1 <= '9'))) {
-            // return NaN
-            m_value.w[1] = 0x7c00000000000000ull;
-            return ParseError::Invalid;
-        }
-
-        if (c == '-') {
-            sgn_exp = -1;
-            ps++;
-            c = *ps;
-        }
-        else if (c == '+') {
-            ps++;
-            c = (*ps);
-        }
-
-        dec_expon = c - '0';
-        int i = 1;
-        ps++;
-
-        if (!dec_expon) {
-            while ((*ps) == '0')
-                ps++;
-        }
-        c = *ps;
-
-        while ((c >= '0' && c <= '9') && i < 7) {
-            dec_expon = 10 * dec_expon + (c - '0');
-            ps++;
-            c = *ps;
-            i++;
-        }
-    }
-
-    dec_expon = (dec_expon + sgn_exp) ^ sgn_exp;
-    dec_expon += DECIMAL_EXPONENT_BIAS_128 - ndigits_after - right_radix_leading_zeros;
-    uint64_t coeff = 0;
-    if (ndigits_total > 0) {
-        coeff = buffer[0] - '0';
-        for (int i = 1; i < ndigits_total; i++) {
-            coeff = 10 * coeff + (buffer[i] - '0');
-        }
-    }
-    m_value.w[0] = coeff;
-    uint64_t tmp = dec_expon;
-    m_value.w[1] = sign_x | (tmp << 49);
-    return ParseError::None;
-}
-
 Decimal128 to_decimal128(const BID_UINT128& val)
 {
     Decimal128 tmp;
@@ -253,7 +53,10 @@ Decimal128::Decimal128()
 
 Decimal128::Decimal128(double val)
 {
-    from_string(util::to_string(val).c_str());
+    unsigned flags = 0;
+    BID_UINT128 tmp;
+    bid128_from_string(&tmp, util::to_string(val).data(), &flags);
+    memcpy(this, &tmp, sizeof(*this));
 }
 
 void Decimal128::from_int64_t(int64_t val)
@@ -306,13 +109,10 @@ Decimal128::Decimal128(Bid128 coefficient, int exponent, bool sign)
 
 Decimal128::Decimal128(StringData init)
 {
-    auto ret = from_string(init.data());
-    if (ret == ParseError::TooLongBeforeRadix) {
-        throw std::overflow_error("Too many digits before radix point");
-    }
-    if (ret == ParseError::TooLong) {
-        throw std::overflow_error("Too many digits");
-    }
+    unsigned flags = 0;
+    BID_UINT128 tmp;
+    bid128_from_string(&tmp, const_cast<char*>(init.data()), &flags);
+    memcpy(this, &tmp, sizeof(*this));
 }
 
 Decimal128::Decimal128(null) noexcept
@@ -506,32 +306,34 @@ Decimal128& Decimal128::operator-=(Decimal128 rhs)
 
 bool Decimal128::is_valid_str(StringData str) noexcept
 {
-    return Decimal128().from_string(str.data()) == ParseError::None;
+    unsigned flags = 0;
+    BID_UINT128 tmp;
+    bid128_from_string(&tmp, const_cast<char*>(str.data()), &flags);
+
+    return (tmp.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull;
 }
 
 std::string Decimal128::to_string() const
 {
-    /*
-    char buffer[64];
-    unsigned flags = 0;
-    BID_UINT128 x;
-    memcpy(&x, this, sizeof(Decimal128));
-    bid128_to_string(buffer, &x, &flags);
-    return std::string(buffer);
-    // Reduce precision. Ensures that the result can be stored in a Mixed.
-    // FIXME: Should be seen as a temporary solution
-    BID_UINT64 res1;
-    bid128_to_bid64(&res1, &res, &flags);
-    bid64_to_bid128(&res, &res1, &flags);
-    */
-    // Primitive implementation.
-    // Assumes that the significant is stored in w[0] only.
-
     if (is_null()) {
         return "NULL";
     }
 
-    bool sign = (m_value.w[1] & 0x8000000000000000ull) != 0;
+    Bid128 coefficient;
+    int exponen;
+    bool sign;
+    unpack(coefficient, exponen, sign);
+    if (coefficient.w[1]) {
+        char buffer[64];
+        unsigned flags = 0;
+        BID_UINT128 x;
+        memcpy(&x, this, sizeof(Decimal128));
+        bid128_to_string(buffer, &x, &flags);
+        return std::string(buffer);
+    }
+
+    // The significant is stored in w[0] only. We can get a nicer printout by using this
+    // algorithm here.
     std::string ret;
     if (sign)
         ret = "-";
@@ -547,10 +349,7 @@ std::string Decimal128::to_string() const
         return ret;
     }
 
-    auto digits = util::to_string(m_value.w[0]);
-    int64_t exponen = m_value.w[1] & 0x7fffffffffffffffull;
-    exponen >>= 49;
-    exponen -= DECIMAL_EXPONENT_BIAS_128;
+    auto digits = util::to_string(coefficient.w[0]);
     size_t digits_before = digits.length();
     while (digits_before > 1 && exponen != 0) {
         digits_before--;

diff --git a/src/realm/decimal128.hpp b/src/realm/decimal128.hpp
@@ -114,9 +114,6 @@ class Decimal128 {
 private:
     Bid128 m_value;
 
-    enum class ParseError { None, Invalid, TooLongBeforeRadix, TooLong };
-
-    ParseError from_string(const char* ps) noexcept;
     void from_int64_t(int64_t val);
 };
 

diff --git a/test/test_decimal128.cpp b/test/test_decimal128.cpp
@@ -33,9 +33,6 @@ TEST(Decimal_Basics)
     auto test_str = [&](const std::string& str, const std::string& ref) {
         Decimal128 d = Decimal128(str);
         CHECK_EQUAL(d.to_string(), ref);
-        auto x = d.to_bid64();
-        Decimal128 d1(x);
-        CHECK_EQUAL(d, d1);
     };
     test_str("0", "0");
     test_str("0.000", "0E-3");
@@ -55,8 +52,9 @@ TEST(Decimal_Basics)
     test_str("  0", "0");
     test_str_nan(":");
     test_str_nan("0.0.0");
-    CHECK_THROW(Decimal128("10000000000000000000000000000000000000000000000000.0"), std::overflow_error);
-    CHECK_THROW(Decimal128("1.00000000000000000000000000000000000000000000000001"), std::overflow_error);
+    test_str("9.99e6144", "+9990000000000000000000000000000000E+6111"); // largest decimal128
+    test_str("1.701e38", "1.701E38");                                   // largest float
+    test_str("1.797e308", "1.797E308");                                 // largest double
     test_str_nan("0.0Q1");
     test_str_nan("0.0Eq");
     Decimal128 pi = Decimal128("3.141592653589793238"); // 19 significant digits