Skip to content

Commit

Permalink
apacheGH-35594: [R][C++] Bump vendored date library (apache#35612)
Browse files Browse the repository at this point in the history
There was an issue with tzdb 0.4.0 and the shipped arrow `tz.cpp`.

This PR bumps the vendored version of the date library to commit `cc4685a21e4a4fdae707ad1233c61bbaff241f93`.
* Closes: apache#35594

Authored-by: Nic Crane <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
thisisnic authored and rtpsw committed May 16, 2023
1 parent 530f7fe commit 2a12af3
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 35 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/vendored/datetime/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ copies or substantial portions of the Software.
Sources for datetime are adapted from Howard Hinnant's date library
(https://github.com/HowardHinnant/date).

Sources are taken from changeset 2e19c006e2218447ee31f864191859517603f59f
Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93
of the above project.

The following changes are made:
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/vendored/datetime/date.h
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,7 @@ CONSTCD11
std::chrono::duration<Rep, Period>
abs(std::chrono::duration<Rep, Period> d)
{
return d >= d.zero() ? d : -d;
return d >= d.zero() ? d : static_cast<decltype(d)>(-d);
}

// round down
Expand Down Expand Up @@ -4208,8 +4208,8 @@ template <class CharT, class Traits, class Duration>
inline
typename std::enable_if
<
std::ratio_less<typename Duration::period, days::period>::value
, std::basic_ostream<CharT, Traits>&
!std::is_convertible<Duration, days>::value,
std::basic_ostream<CharT, Traits>&
>::type
operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
{
Expand Down
186 changes: 157 additions & 29 deletions cpp/src/arrow/vendored/datetime/tz.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,35 @@ namespace
using co_task_mem_ptr = std::unique_ptr<wchar_t[], task_mem_deleter>;
}

static
std::wstring
convert_utf8_to_utf16(const std::string& s)
{
std::wstring out;
const int size = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, NULL, 0);

if (size == 0)
{
std::string msg = "Failed to determine required size when converting \"";
msg += s;
msg += "\" to UTF-16.";
throw std::runtime_error(msg);
}

out.resize(size);
const int check = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, &out[0], size);

if (size != check)
{
std::string msg = "Failed to convert \"";
msg += s;
msg += "\" to UTF-16.";
throw std::runtime_error(msg);
}

return out;
}

// We might need to know certain locations even if not using the remote API,
// so keep these routines out of that block for now.
static
Expand Down Expand Up @@ -271,8 +300,90 @@ get_download_folder()

# endif // !_WIN32

#endif // !USE_OS_TZDB
/*
* This class is provided to mimic the following usage of `ifstream`:
*
* std::ifstream is(filename);
*
* file_streambuf ibuf(filename);
* std::istream is(&ibuf);
*
* This is required because `ifstream` does not support opening files
* containing wide characters on Windows. On Windows, `file_streambuf` uses
* `file_open()` to convert the file name to UTF-16 before opening it with
* `_wfopen()`.
*
* Note that this is not an exact re-implementation of `ifstream`,
* but is enough for usage here.
*
* It is partially based on these two implementations:
* - fdinbuf from http://www.josuttis.com/cppcode/fdstream.html
* - stdiobuf https://stackoverflow.com/questions/12342542/convert-file-to-ifstream-c-android-ndk
*
* Apparently MSVC provides non-standard overloads of `ifstream` that support
* a `const wchar_t*` file name, but MinGW does not https://stackoverflow.com/a/822032
*/
class file_streambuf
: public std::streambuf
{
private:
FILE* file_;
static const int buffer_size_ = 1024;
char buffer_[buffer_size_];

public:
~file_streambuf()
{
if (file_)
{
::fclose(file_);
}
}
file_streambuf(const file_streambuf&) = delete;
file_streambuf& operator=(const file_streambuf&) = delete;

file_streambuf(const std::string& filename)
: file_(file_open(filename))
{
}

protected:
virtual
int_type
underflow()
{
if (gptr() == egptr() && file_)
{
const size_t size = ::fread(buffer_, 1, buffer_size_, file_);
setg(buffer_, buffer_, buffer_ + size);
}
return (gptr() == egptr())
? traits_type::eof()
: traits_type::to_int_type(*gptr());
}

private:
FILE*
file_open(const std::string& filename)
{
# ifdef _WIN32
std::wstring wfilename = convert_utf8_to_utf16(filename);
FILE* file = ::_wfopen(wfilename.c_str(), L"rb");
# else // !_WIN32
FILE* file = ::fopen(filename.c_str(), "rb");
# endif // _WIN32
if (file == NULL)
{
std::string msg = "Error opening file \"";
msg += filename;
msg += "\".";
throw std::runtime_error(msg);
}
return file;
}
};

#endif // !USE_OS_TZDB
namespace arrow_vendored
{
namespace date
Expand Down Expand Up @@ -309,9 +420,9 @@ access_install()
}

void
set_install(const std::string& s)
set_install(const std::string& install)
{
access_install() = s;
access_install() = install;
}

static
Expand Down Expand Up @@ -565,15 +676,8 @@ load_timezone_mappings_from_xml_file(const std::string& input_path)
std::vector<detail::timezone_mapping> mappings;
std::string line;

std::ifstream is(input_path);
if (!is.is_open())
{
// We don't emit file exceptions because that's an implementation detail.
std::string msg = "Error opening time zone mapping file \"";
msg += input_path;
msg += "\".";
throw std::runtime_error(msg);
}
file_streambuf ibuf(input_path);
std::istream is(&ibuf);

auto error = [&input_path, &line_num](const char* info)
{
Expand Down Expand Up @@ -703,7 +807,6 @@ load_timezone_mappings_from_xml_file(const std::string& input_path)
}
}

is.close();
return mappings;
}

Expand Down Expand Up @@ -2675,16 +2778,16 @@ find_read_and_leap_seconds()
std::getline(in, line);
if (!line.empty() && line[0] != '#')
{
std::istringstream in(line);
in.exceptions(std::ios::failbit | std::ios::badbit);
std::istringstream iss(line);
iss.exceptions(std::ios::failbit | std::ios::badbit);
std::string word;
in >> word;
iss >> word;
if (word == "Leap")
{
int y, m, d;
in >> y;
m = static_cast<int>(parse_month(in));
in >> d;
iss >> y;
m = static_cast<int>(parse_month(iss));
iss >> d;
leap_seconds.push_back(leap_second(sys_days{year{y}/m/d} + days{1},
detail::undocumented{}));
}
Expand All @@ -2709,11 +2812,11 @@ find_read_and_leap_seconds()
std::getline(in, line);
if (!line.empty() && line[0] != '#')
{
std::istringstream in(line);
in.exceptions(std::ios::failbit | std::ios::badbit);
std::istringstream iss(line);
iss.exceptions(std::ios::failbit | std::ios::badbit);
using seconds = std::chrono::seconds;
seconds::rep s;
in >> s;
iss >> s;
if (s == 2272060800)
continue;
leap_seconds.push_back(leap_second(sys_seconds{seconds{s}} - offset,
Expand All @@ -2722,6 +2825,7 @@ find_read_and_leap_seconds()
}
return leap_seconds;
}
#if !MISSING_LEAP_SECONDS
in.clear();
in.open(get_tz_dir() + std::string(1, folder_delimiter) + "right/UTC",
std::ios_base::binary);
Expand All @@ -2736,6 +2840,7 @@ find_read_and_leap_seconds()
{
return load_just_leaps(in);
}
#endif
return {};
}

Expand Down Expand Up @@ -2842,7 +2947,8 @@ bool
file_exists(const std::string& filename)
{
#ifdef _WIN32
return ::_access(filename.c_str(), 0) == 0;
std::wstring wfilename = convert_utf8_to_utf16(filename);
return ::_waccess(wfilename.c_str(), 0) == 0;
#else
return ::access(filename.c_str(), F_OK) == 0;
#endif
Expand Down Expand Up @@ -3419,16 +3525,27 @@ std::string
get_version(const std::string& path)
{
std::string version;
std::ifstream infile(path + "version");
if (infile.is_open())

std::string path_version = path + "version";

if (file_exists(path_version))
{
file_streambuf inbuf(path_version);
std::istream infile(&inbuf);

infile >> version;

if (!infile.fail())
return version;
}
else

std::string path_news = path + "NEWS";

if (file_exists(path_news))
{
infile.open(path + "NEWS");
file_streambuf inbuf(path_news);
std::istream infile(&inbuf);

while (infile)
{
infile >> version;
Expand All @@ -3439,6 +3556,7 @@ get_version(const std::string& path)
}
}
}

throw std::runtime_error("Unable to get Timezone database version from " + path);
}

Expand Down Expand Up @@ -3510,7 +3628,13 @@ init_tzdb()

for (const auto& filename : files)
{
std::ifstream infile(path + filename);
std::string file_path = path + filename;
if (!file_exists(file_path))
{
continue;
}
file_streambuf inbuf(file_path);
std::istream infile(&inbuf);
while (infile)
{
std::getline(infile, line);
Expand Down Expand Up @@ -3543,6 +3667,10 @@ init_tzdb()
{
db->zones.back().add(line);
}
else if (word.size() > 0 && word[0] == '#')
{
continue;
}
else
{
std::cerr << line << '\n';
Expand Down Expand Up @@ -3925,7 +4053,7 @@ tzdb::current_zone() const
auto p = result.find("ZONE=\"");
if (p != std::string::npos)
{
result.erase(p, p+6);
result.erase(0, p+6);
result.erase(result.rfind('"'));
return locate_zone(result);
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/vendored/datetime/tz.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
# else
# define HAS_REMOTE_API 1
# endif
# else // HAS_REMOTE_API makes no since when using the OS timezone database
# else // HAS_REMOTE_API makes no sense when using the OS timezone database
# define HAS_REMOTE_API 0
# endif
#endif
Expand Down Expand Up @@ -853,7 +853,7 @@ class time_zone
load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
#else // !USE_OS_TZDB
DATE_API sys_info get_info_impl(sys_seconds tp, int timezone) const;
DATE_API sys_info get_info_impl(sys_seconds tp, int tz_int) const;
DATE_API void adjust_infos(const std::vector<detail::Rule>& rules);
DATE_API void parse_info(std::istream& in);
#endif // !USE_OS_TZDB
Expand Down

0 comments on commit 2a12af3

Please sign in to comment.