From 2a12af3c3a476cfd7a961525bc4047136be405e8 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 16 May 2023 14:54:36 +0100 Subject: [PATCH] GH-35594: [R][C++] Bump vendored date library (#35612) There was an issue with tzdb 0.4.0 and the shipped arrow `tz.cpp`. This PR bumps the vendored version of the date library to commit `cc4685a21e4a4fdae707ad1233c61bbaff241f93`. * Closes: #35594 Authored-by: Nic Crane Signed-off-by: Antoine Pitrou --- cpp/src/arrow/vendored/datetime/README.md | 2 +- cpp/src/arrow/vendored/datetime/date.h | 6 +- cpp/src/arrow/vendored/datetime/tz.cpp | 186 ++++++++++++++++++---- cpp/src/arrow/vendored/datetime/tz.h | 4 +- 4 files changed, 163 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index cff53e7e307bd..0dd663c5e5acc 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,7 +17,7 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from changeset 2e19c006e2218447ee31f864191859517603f59f +Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93 of the above project. The following changes are made: diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index 3b38b263a8f27..fd2569c6de0f6 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -1318,7 +1318,7 @@ CONSTCD11 std::chrono::duration abs(std::chrono::duration d) { - return d >= d.zero() ? d : -d; + return d >= d.zero() ? d : static_cast(-d); } // round down @@ -4208,8 +4208,8 @@ template inline typename std::enable_if < - std::ratio_less::value - , std::basic_ostream& + !std::is_convertible::value, + std::basic_ostream& >::type operator<<(std::basic_ostream& os, const sys_time& tp) { diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index 9047a31c79a59..6962a8b3c3572 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -202,6 +202,35 @@ namespace using co_task_mem_ptr = std::unique_ptr; } +static +std::wstring +convert_utf8_to_utf16(const std::string& s) +{ + std::wstring out; + const int size = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, NULL, 0); + + if (size == 0) + { + std::string msg = "Failed to determine required size when converting \""; + msg += s; + msg += "\" to UTF-16."; + throw std::runtime_error(msg); + } + + out.resize(size); + const int check = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, &out[0], size); + + if (size != check) + { + std::string msg = "Failed to convert \""; + msg += s; + msg += "\" to UTF-16."; + throw std::runtime_error(msg); + } + + return out; +} + // We might need to know certain locations even if not using the remote API, // so keep these routines out of that block for now. static @@ -271,8 +300,90 @@ get_download_folder() # endif // !_WIN32 -#endif // !USE_OS_TZDB +/* + * This class is provided to mimic the following usage of `ifstream`: + * + * std::ifstream is(filename); + * + * file_streambuf ibuf(filename); + * std::istream is(&ibuf); + * + * This is required because `ifstream` does not support opening files + * containing wide characters on Windows. On Windows, `file_streambuf` uses + * `file_open()` to convert the file name to UTF-16 before opening it with + * `_wfopen()`. + * + * Note that this is not an exact re-implementation of `ifstream`, + * but is enough for usage here. + * + * It is partially based on these two implementations: + * - fdinbuf from http://www.josuttis.com/cppcode/fdstream.html + * - stdiobuf https://stackoverflow.com/questions/12342542/convert-file-to-ifstream-c-android-ndk + * + * Apparently MSVC provides non-standard overloads of `ifstream` that support + * a `const wchar_t*` file name, but MinGW does not https://stackoverflow.com/a/822032 + */ +class file_streambuf + : public std::streambuf +{ +private: + FILE* file_; + static const int buffer_size_ = 1024; + char buffer_[buffer_size_]; + +public: + ~file_streambuf() + { + if (file_) + { + ::fclose(file_); + } + } + file_streambuf(const file_streambuf&) = delete; + file_streambuf& operator=(const file_streambuf&) = delete; + + file_streambuf(const std::string& filename) + : file_(file_open(filename)) + { + } + +protected: + virtual + int_type + underflow() + { + if (gptr() == egptr() && file_) + { + const size_t size = ::fread(buffer_, 1, buffer_size_, file_); + setg(buffer_, buffer_, buffer_ + size); + } + return (gptr() == egptr()) + ? traits_type::eof() + : traits_type::to_int_type(*gptr()); + } + +private: + FILE* + file_open(const std::string& filename) + { +# ifdef _WIN32 + std::wstring wfilename = convert_utf8_to_utf16(filename); + FILE* file = ::_wfopen(wfilename.c_str(), L"rb"); +# else // !_WIN32 + FILE* file = ::fopen(filename.c_str(), "rb"); +# endif // _WIN32 + if (file == NULL) + { + std::string msg = "Error opening file \""; + msg += filename; + msg += "\"."; + throw std::runtime_error(msg); + } + return file; + } +}; +#endif // !USE_OS_TZDB namespace arrow_vendored { namespace date @@ -309,9 +420,9 @@ access_install() } void -set_install(const std::string& s) +set_install(const std::string& install) { - access_install() = s; + access_install() = install; } static @@ -565,15 +676,8 @@ load_timezone_mappings_from_xml_file(const std::string& input_path) std::vector mappings; std::string line; - std::ifstream is(input_path); - if (!is.is_open()) - { - // We don't emit file exceptions because that's an implementation detail. - std::string msg = "Error opening time zone mapping file \""; - msg += input_path; - msg += "\"."; - throw std::runtime_error(msg); - } + file_streambuf ibuf(input_path); + std::istream is(&ibuf); auto error = [&input_path, &line_num](const char* info) { @@ -703,7 +807,6 @@ load_timezone_mappings_from_xml_file(const std::string& input_path) } } - is.close(); return mappings; } @@ -2675,16 +2778,16 @@ find_read_and_leap_seconds() std::getline(in, line); if (!line.empty() && line[0] != '#') { - std::istringstream in(line); - in.exceptions(std::ios::failbit | std::ios::badbit); + std::istringstream iss(line); + iss.exceptions(std::ios::failbit | std::ios::badbit); std::string word; - in >> word; + iss >> word; if (word == "Leap") { int y, m, d; - in >> y; - m = static_cast(parse_month(in)); - in >> d; + iss >> y; + m = static_cast(parse_month(iss)); + iss >> d; leap_seconds.push_back(leap_second(sys_days{year{y}/m/d} + days{1}, detail::undocumented{})); } @@ -2709,11 +2812,11 @@ find_read_and_leap_seconds() std::getline(in, line); if (!line.empty() && line[0] != '#') { - std::istringstream in(line); - in.exceptions(std::ios::failbit | std::ios::badbit); + std::istringstream iss(line); + iss.exceptions(std::ios::failbit | std::ios::badbit); using seconds = std::chrono::seconds; seconds::rep s; - in >> s; + iss >> s; if (s == 2272060800) continue; leap_seconds.push_back(leap_second(sys_seconds{seconds{s}} - offset, @@ -2722,6 +2825,7 @@ find_read_and_leap_seconds() } return leap_seconds; } +#if !MISSING_LEAP_SECONDS in.clear(); in.open(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC", std::ios_base::binary); @@ -2736,6 +2840,7 @@ find_read_and_leap_seconds() { return load_just_leaps(in); } +#endif return {}; } @@ -2842,7 +2947,8 @@ bool file_exists(const std::string& filename) { #ifdef _WIN32 - return ::_access(filename.c_str(), 0) == 0; + std::wstring wfilename = convert_utf8_to_utf16(filename); + return ::_waccess(wfilename.c_str(), 0) == 0; #else return ::access(filename.c_str(), F_OK) == 0; #endif @@ -3419,16 +3525,27 @@ std::string get_version(const std::string& path) { std::string version; - std::ifstream infile(path + "version"); - if (infile.is_open()) + + std::string path_version = path + "version"; + + if (file_exists(path_version)) { + file_streambuf inbuf(path_version); + std::istream infile(&inbuf); + infile >> version; + if (!infile.fail()) return version; } - else + + std::string path_news = path + "NEWS"; + + if (file_exists(path_news)) { - infile.open(path + "NEWS"); + file_streambuf inbuf(path_news); + std::istream infile(&inbuf); + while (infile) { infile >> version; @@ -3439,6 +3556,7 @@ get_version(const std::string& path) } } } + throw std::runtime_error("Unable to get Timezone database version from " + path); } @@ -3510,7 +3628,13 @@ init_tzdb() for (const auto& filename : files) { - std::ifstream infile(path + filename); + std::string file_path = path + filename; + if (!file_exists(file_path)) + { + continue; + } + file_streambuf inbuf(file_path); + std::istream infile(&inbuf); while (infile) { std::getline(infile, line); @@ -3543,6 +3667,10 @@ init_tzdb() { db->zones.back().add(line); } + else if (word.size() > 0 && word[0] == '#') + { + continue; + } else { std::cerr << line << '\n'; @@ -3925,7 +4053,7 @@ tzdb::current_zone() const auto p = result.find("ZONE=\""); if (p != std::string::npos) { - result.erase(p, p+6); + result.erase(0, p+6); result.erase(result.rfind('"')); return locate_zone(result); } diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 6d54e49ea22a1..467db6d199793 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -60,7 +60,7 @@ # else # define HAS_REMOTE_API 1 # endif -# else // HAS_REMOTE_API makes no since when using the OS timezone database +# else // HAS_REMOTE_API makes no sense when using the OS timezone database # define HAS_REMOTE_API 0 # endif #endif @@ -853,7 +853,7 @@ class time_zone load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt, std::int32_t tzh_typecnt, std::int32_t tzh_charcnt); #else // !USE_OS_TZDB - DATE_API sys_info get_info_impl(sys_seconds tp, int timezone) const; + DATE_API sys_info get_info_impl(sys_seconds tp, int tz_int) const; DATE_API void adjust_infos(const std::vector& rules); DATE_API void parse_info(std::istream& in); #endif // !USE_OS_TZDB