Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

locale aware double->string conversions #467

Merged
merged 12 commits into from
Jun 8, 2020
34 changes: 25 additions & 9 deletions include/xlnt/utils/numeric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,12 @@ class number_serialiser

public:
explicit number_serialiser()
: should_convert_comma(std::use_facet<std::numpunct<char>>(std::locale{}).decimal_point() == ',')
: should_convert_comma(localeconv()->decimal_point[0] == ',')
{
}

// for printing to file.
// This matches the output format of excel irrespective of current locale
std::string serialise(double d) const
{
char buf[30];
Expand All @@ -144,29 +146,43 @@ class number_serialiser
return std::string(buf, static_cast<size_t>(len));
}

double deserialise(std::string &s) const noexcept
// replacement for std::to_string / s*printf("%f", ...)
// behaves same irrespective of locale
std::string serialise_short(double d) const
{
assert(!s.empty());
char buf[30];
int len = snprintf(buf, sizeof(buf), "%f", d);
if (should_convert_comma)
{
// s.data() doesn't have a non-const overload until c++17, hence this little dance
convert_pt_to_comma(&s[0], s.size());
convert_comma_to_pt(buf, len);
}
return strtod(s.c_str(), nullptr);
return std::string(buf, static_cast<size_t>(len));
}

double deserialise(const std::string &s) const
double deserialise(const std::string &s, ptrdiff_t *len_converted) const
Copy link
Collaborator Author

@Crzyrndm Crzyrndm Apr 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change here allows deserialise to be used anywhere std::strtod was used. Matching the char** param is however not possible due to the temporary buffer used when locale conversion is required. In the one case this was required for, the number of characters converted was what was being checked anyway.

{
assert(!s.empty());
assert(len_converted != nullptr);
char *end_of_convert;
if (!should_convert_comma)
{
return strtod(s.c_str(), nullptr);
double d = strtod(s.c_str(), &end_of_convert);
*len_converted = end_of_convert - s.c_str();
return d;
}
char buf[30];
assert(s.size() < sizeof(buf));
auto copy_end = std::copy(s.begin(), s.end(), buf);
convert_pt_to_comma(buf, static_cast<size_t>(copy_end - buf));
return strtod(buf, nullptr);
double d = strtod(buf, &end_of_convert);
*len_converted = end_of_convert - buf;
return d;
}

double deserialise(const std::string &s) const
{
ptrdiff_t ignore;
return deserialise(s, &ignore);
}
};

Expand Down
15 changes: 7 additions & 8 deletions source/cell/cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,16 @@
#include <detail/implementations/hyperlink_impl.hpp>
#include <detail/implementations/stylesheet.hpp>
#include <detail/implementations/worksheet_impl.hpp>
#include <xlnt/utils/numeric.hpp>

namespace {

std::pair<bool, double> cast_numeric(const std::string &s)
{
auto str_end = static_cast<char *>(nullptr);
auto result = std::strtod(s.c_str(), &str_end);

return (str_end != s.c_str() + s.size())
xlnt::detail::number_serialiser ser;
ptrdiff_t len_convert;
double result = ser.deserialise(s, &len_convert);
return (len_convert != static_cast<ptrdiff_t>(s.size()))
? std::make_pair(false, 0.0)
: std::make_pair(true, result);
}
Expand Down Expand Up @@ -108,7 +109,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
}

std::vector<double> numeric_components;

xlnt::detail::number_serialiser ser;
for (auto component : time_components)
{
if (component.empty() || (component.substr(0, component.find('.')).size() > 2))
Expand All @@ -123,9 +124,7 @@ std::pair<bool, xlnt::time> cast_time(const std::string &s)
return {false, result};
}
}

auto without_leading_zero = component.front() == '0' ? component.substr(1) : component;
auto numeric = std::stod(without_leading_zero);
auto numeric = ser.deserialise(component);

numeric_components.push_back(numeric);
}
Expand Down
38 changes: 16 additions & 22 deletions source/detail/number_format/number_formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cmath>

#include <xlnt/utils/exceptions.hpp>
#include <xlnt/utils/numeric.hpp>
#include <detail/default_case.hpp>
#include <detail/number_format/number_formatter.hpp>

Expand Down Expand Up @@ -622,7 +623,8 @@ void number_format_parser::parse()
value = token.string.substr(1);
}

section.condition.value = std::stod(value);
detail::number_serialiser ser;
section.condition.value = ser.deserialise(value);
break;
}

Expand Down Expand Up @@ -1565,19 +1567,16 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
if (p.type == format_placeholders::placeholders_type::general
|| p.type == format_placeholders::placeholders_type::text)
{
result = std::to_string(number);

while (result.back() == '0')
auto s = serialiser_.serialise_short(number);
while (s.size() > 1 && s.back() == '0')
{
result.pop_back();
s.pop_back();
}

if (result.back() == '.')
if (s.back() == '.')
{
result.pop_back();
s.pop_back();
}

return result;
return s;
}

if (p.percentage)
Expand Down Expand Up @@ -1636,21 +1635,22 @@ std::string number_formatter::fill_placeholders(const format_placeholders &p, do
auto fractional_part = number - integer_part;
result = std::fabs(fractional_part) < std::numeric_limits<double>::min()
? std::string(".")
: std::to_string(fractional_part).substr(1);
: serialiser_.serialise_short(fractional_part).substr(1);

while (result.back() == '0' || result.size() > (p.num_zeros + p.num_optionals + p.num_spaces + 1))
{
result.pop_back();
}

while (result.size() < p.num_zeros + 1)

if (result.size() < p.num_zeros + 1)
{
result.push_back('0');
result.resize(p.num_zeros + 1, '0');
}

while (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1)
if (result.size() < p.num_zeros + p.num_optionals + p.num_spaces + 1)
{
result.push_back(' ');
result.resize(p.num_zeros + p.num_optionals + p.num_spaces + 1, ' ');
}

if (p.percentage)
Expand Down Expand Up @@ -1689,13 +1689,7 @@ std::string number_formatter::fill_scientific_placeholders(const format_placehol
integer_string = std::string(integer_part.num_zeros + integer_part.num_optionals, '0');
}

std::string fractional_string = std::to_string(fraction).substr(1);

while (fractional_string.size() > fractional_part.num_zeros + fractional_part.num_optionals + 1)
{
fractional_string.pop_back();
}

std::string fractional_string = serialiser_.serialise_short(fraction).substr(1, fractional_part.num_zeros + fractional_part.num_optionals + 1);
std::string exponent_string = std::to_string(logarithm);

while (exponent_string.size() < fractional_part.num_zeros)
Expand Down
2 changes: 2 additions & 0 deletions source/detail/number_format/number_formatter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <vector>

#include <xlnt/utils/datetime.hpp>
#include <xlnt/utils/numeric.hpp>

namespace xlnt {
namespace detail {
Expand Down Expand Up @@ -691,6 +692,7 @@ class XLNT_API number_formatter
number_format_parser parser_;
std::vector<format_code> format_;
xlnt::calendar calendar_;
xlnt::detail::number_serialiser serialiser_;
};

} // namespace detail
Expand Down
96 changes: 96 additions & 0 deletions source/detail/serialization/serialisation_helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#ifndef XLNT_DETAIL_SERIALISATION_HELPERS_HPP
#define XLNT_DETAIL_SERIALISATION_HELPERS_HPP
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was moved out of xlsx_consumer.cpp as a bit of cleanup from #421
I was orginally thinking about making similar changes to the serialiser but decided against it. This remains as a separate file as it probably always should have been (in general, consumers is too large already)


#include <xlnt/cell/cell_type.hpp>
#include <xlnt/cell/index_types.hpp>
#include <string>

namespace xlnt {
namespace detail {

/// parsing assumptions used by the following functions
/// - on entry, the start element for the element has been consumed by parser->next
/// - on exit, the closing element has been consumed by parser->next
/// using these assumptions, the following functions DO NOT use parser->peek (SLOW!!!)
/// probable further gains from not building an attribute map and using the attribute events instead as the impl just iterates the map

/// 'r' == cell reference e.g. 'A1'
/// https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/db11a912-b1cb-4dff-b46d-9bedfd10cef0
///
/// a lightweight version of xlnt::cell_reference with no extre functionality (absolute/relative, ...)
/// many thousands are created during (de)serialisation, so even minor overhead is noticable
struct Cell_Reference
{
// the obvious ctor
explicit Cell_Reference(xlnt::row_t row_arg, xlnt::column_t::index_t column_arg) noexcept
: row(row_arg), column(column_arg)
{
}

// the common case. row # is already known during parsing (from parent <row> element)
// just need to evaluate the column
explicit Cell_Reference(xlnt::row_t row_arg, const std::string &reference) noexcept
: row(row_arg)
{
// only three characters allowed for the column
// assumption:
// - regex pattern match: [A-Z]{1,3}\d{1,7}
const char *iter = reference.c_str();
int temp = *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // second char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
++iter;
if (*iter >= 'A') // third char
{
temp *= 26; // LHS values are more significant
temp += *iter - 'A' + 1; // 'A' == 1
}
}
column = static_cast<xlnt::column_t::index_t>(temp);
}

// for sorting purposes
bool operator<(const Cell_Reference &rhs)
{
// row first, serialisation is done by row then column
if (row < rhs.row)
{
return true;
}
else if (rhs.row < row)
{
return false;
}
// same row, column comparison
return column < rhs.column;
}

xlnt::row_t row; // range:[1, 1048576]
xlnt::column_t::index_t column; // range:["A", "ZZZ"] -> [1, 26^3] -> [1, 17576]
};

// <c> inside <row> element
// https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.cell?view=openxml-2.8.1
struct Cell
{
// sort cells by location, row first
bool operator<(const Cell &rhs)
{
return ref < rhs.ref;
}

bool is_phonetic = false; // 'ph'
xlnt::cell_type type = xlnt::cell_type::number; // 't'
int cell_metatdata_idx = -1; // 'cm'
int style_index = -1; // 's'
Cell_Reference ref{0, 0}; // 'r'
std::string value; // <v> OR <is>
std::string formula_string; // <f>
};

} // namespace detail
} // namespace xlnt
#endif
Loading