From 5a7a5bcb681fefcf006ed7657b67ac343dc9ffe9 Mon Sep 17 00:00:00 2001 From: Elias Kosunen Date: Sun, 3 Nov 2024 15:24:45 +0200 Subject: [PATCH] Fix unnecessary blocking in `scn::input` --- CHANGELOG.md | 1 + include/scn/scan.h | 319 ++++++++++++++++++++++- src/scn/impl.cpp | 371 +-------------------------- src/scn/impl.h | 107 ++++++++ tests/unittests/stdin_test.cpp | 19 +- tests/unittests/stdin_test_input.txt | 2 + 6 files changed, 446 insertions(+), 373 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ded35363..8ba5bd87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ _Released 2024-11-xx_ * Fix formatting options of user-defined types sometimes being ignored + * Fix unnecessary blocking in `scn::input` * Fix usage of `std::regex_constants::multiline` on libstdc++ v11 (#130, thanks [@jiayuehua (Jia Yue Hua)](https://github.com/jiayuehua)) * Fix build failures on Emscripten * Update documentation to have a version-dropdown diff --git a/include/scn/scan.h b/include/scn/scan.h index 9b408a29..87270482 100644 --- a/include/scn/scan.h +++ b/include/scn/scan.h @@ -4074,14 +4074,15 @@ class basic_scan_buffer { virtual bool fill() = 0; - virtual void sync(std::ptrdiff_t position) + virtual bool sync(std::ptrdiff_t position) { SCN_UNUSED(position); + return true; } - void sync_all() + bool sync_all() { - sync(0); + return sync(0); } SCN_NODISCARD std::ptrdiff_t chars_available() const @@ -4450,21 +4451,323 @@ class basic_scan_forward_buffer_impl template basic_scan_forward_buffer_impl(const R&) -> basic_scan_forward_buffer_impl; -class scan_file_buffer : public basic_scan_buffer { +struct default_file_tag {}; +struct gnu_file_tag {}; +struct bsd_file_tag {}; +struct musl_file_tag {}; +struct win32_file_tag {}; + +// Non-pretty workaround for MSVC silliness +template +inline constexpr bool is_gnu_file = false; +template +inline constexpr bool + is_gnu_file> = true; + +template +inline constexpr bool is_bsd_file = false; +template +inline constexpr bool is_bsd_file< + F, + std::void_t> = + true; + +template +inline constexpr bool is_musl_file = false; +template +inline constexpr bool is_musl_file< + F, + std::void_t> = + true; + +template +inline constexpr bool is_win32_file = + std::is_same_v && SCN_WINDOWS && !SCN_MINGW; + +constexpr auto get_file_tag() +{ + if constexpr (is_gnu_file) { + return detail::tag_type{}; + } + else if constexpr (is_bsd_file) { + return detail::tag_type{}; + } + else if constexpr (is_musl_file) { + return detail::tag_type{}; + } + else if constexpr (is_win32_file) { + return detail::tag_type{}; + } + else { + return detail::tag_type{}; + } +} + +template +struct stdio_file_interface_base { + stdio_file_interface_base(File* f) : file(f) {} + ~stdio_file_interface_base() = default; + + stdio_file_interface_base(const stdio_file_interface_base&) = delete; + stdio_file_interface_base& operator=(const stdio_file_interface_base&) = + delete; + + stdio_file_interface_base(stdio_file_interface_base&& other) + : file(other.file) + { + other.file = nullptr; + } + stdio_file_interface_base& operator=(stdio_file_interface_base&& other) + { + file = other.file; + other.file = nullptr; + return *this; + } + + File* file; +}; + +template +struct stdio_file_interface_impl; + +template +struct stdio_file_interface_impl + : stdio_file_interface_base { + void lock() {} + void unlock() {} + + bool has_buffering() const + { + return false; + } + + std::string_view buffer() const + { + return {}; + } + void unsafe_advance_n(std::ptrdiff_t) + { + SCN_EXPECT(false); + SCN_UNREACHABLE; + } + void fill_buffer() + { + SCN_EXPECT(false); + SCN_UNREACHABLE; + } + + std::optional read_one() + { + auto res = std::fgetc(this->file); + if (res == EOF) { + return std::nullopt; + } + return static_cast(res); + } + + void prepare_putback() {} + void finalize_putback() {} + + bool putback(char ch) + { + return std::ungetc(static_cast(ch), this->file) != EOF; + } +}; + +template +struct posix_stdio_file_interface : stdio_file_interface_base { + void lock() + { + flockfile(this->file); + } + void unlock() + { + funlockfile(this->file); + } + + static bool has_buffering() + { + return true; + } + + std::optional read_one() + { + auto res = getc_unlocked(this->file); + if (res == EOF) { + return std::nullopt; + } + return static_cast(res); + } + + void prepare_putback() + { + unlock(); + } + void finalize_putback() + { + lock(); + } + + bool putback(char ch) + { + return std::ungetc(static_cast(ch), this->file) != EOF; + } +}; + +template +struct stdio_file_interface_impl + : posix_stdio_file_interface { + std::string_view buffer() const + { + return make_string_view_from_pointers(this->file->_IO_read_ptr, + this->file->_IO_read_end); + } + void unsafe_advance_n(std::ptrdiff_t n) + { + SCN_EXPECT(this->file->_IO_read_ptr != nullptr); + SCN_EXPECT(this->file->_IO_read_end - this->file->_IO_read_ptr >= n); + this->file->_IO_read_ptr += n; + } + void fill_buffer() + { + if (__uflow(this->file) != EOF) { + --this->file->_IO_read_ptr; + } + } +}; + +template +struct stdio_file_interface_impl + : posix_stdio_file_interface { + std::string_view buffer() const + { + return {reinterpret_cast(this->file->_p), + static_cast(this->file_->_r)}; + } + void unsafe_advance_n(std::ptrdiff_t n) + { + SCN_EXPECT(this->file->_p != nullptr); + SCN_EXPECT(this->file->_r >= n); + this->file->_p += n; + this->file->_r -= n; + } + void fill_buffer() + { + __srefill(this->file); + } +}; + +template +struct stdio_file_interface_impl + : posix_stdio_file_interface { + std::string_view buffer() const + { + return make_string_view_from_pointers( + reinterpret_cast(this->file->rpos), + reinterpret_cast(this->file->rend)); + } + void unsafe_advance_n(std::ptrdiff_t n) + { + SCN_EXPECT(this->file->rpos != nullptr); + SCN_EXPECT(this->file->rend - this->file->rpos >= n); + this->file->rpos += n; + } + void fill_buffer() + { + if (__uflow(this->file) != EOF) { + --this->file->rpos; + } + } +}; + +template +struct stdio_file_interface_impl + : stdio_file_interface_base { + void lock() + { + _lock_file(this->file); + } + void unlock() + { + _unlock_file(this->file); + } + + static bool has_buffering() + { + return false; + } + + std::string_view buffer() const + { + return {}; + } + void unsafe_advance_n(std::ptrdiff_t n) + { + SCN_EXPECT(false); + SCN_UNREACHABLE; + } + void fill_buffer() + { + SCN_EXPECT(false); + SCN_UNREACHABLE; + } + + std::optional read_one() + { + auto res = _fgetc_nolock(this->file); + if (res == EOF) { + return std::nullopt; + } + return static_cast(res); + } + + void prepare_putback() {} + void finalize_putback() {} + + bool putback(char ch) + { + return _ungetc_nolock(static_cast(ch), this->file) != + EOF; + } +}; + +using stdio_file_interface = + stdio_file_interface_impl; + +template +class basic_scan_file_buffer : public basic_scan_buffer { using base = basic_scan_buffer; public: - scan_file_buffer(std::FILE* file); - ~scan_file_buffer(); + explicit basic_scan_file_buffer(FileInterface file); + ~basic_scan_file_buffer(); bool fill() override; - void sync(std::ptrdiff_t position) override; + + bool sync(std::ptrdiff_t position) override; private: - std::FILE* m_file; + FileInterface m_file; std::optional m_latest{std::nullopt}; }; +struct scan_file_buffer : public basic_scan_file_buffer { + explicit scan_file_buffer(std::FILE* file) + : basic_scan_file_buffer(stdio_file_interface{file}) + { + } +}; + +extern template basic_scan_file_buffer< + stdio_file_interface>::basic_scan_file_buffer(stdio_file_interface); +extern template basic_scan_file_buffer< + stdio_file_interface>::~basic_scan_file_buffer(); +extern template bool basic_scan_file_buffer::fill(); +extern template bool basic_scan_file_buffer::sync( + std::ptrdiff_t); + template class basic_scan_ref_buffer : public basic_scan_buffer { using base = basic_scan_buffer; diff --git a/src/scn/impl.cpp b/src/scn/impl.cpp index 1e59fa87..4a111343 100644 --- a/src/scn/impl.cpp +++ b/src/scn/impl.cpp @@ -274,371 +274,14 @@ SCN_DEFINE_SCANNER_SCAN_FOR_CTX(wscan_context) // scan_buffer implementations ///////////////////////////////////////////////////////////////// -namespace { -struct file_wrapper_impl_base { -private: - static auto fgetc_impl(std::FILE* file) - { -#if SCN_POSIX - return getc_unlocked(file); -#elif SCN_WINDOWS - return _fgetc_nolock(file); -#else - return std::fgetc(file); -#endif - } - - static auto ungetc_impl(std::FILE* file, int ch) - { -#if SCN_WINDOWS && !SCN_MINGW - return _ungetc_nolock(ch, file); -#else - return std::ungetc(ch, file); -#endif - } - -public: - static void lock(std::FILE* file) - { -#if SCN_POSIX - ::flockfile(file); -#elif SCN_WINDOWS - ::_lock_file(file); -#else - SCN_UNUSED(file); -#endif - } - static void unlock(std::FILE* file) - { -#if SCN_POSIX - ::funlockfile(file); -#elif SCN_WINDOWS - ::_unlock_file(file); -#else - SCN_UNUSED(file); -#endif - } - - static void lock_for_unget(std::FILE* file) - { -#if SCN_WINDOWS && !SCN_MINGW - SCN_UNUSED(file); -#else - lock(file); -#endif - } - - static void unlock_for_unget(std::FILE* file) - { -#if SCN_WINDOWS && !SCN_MINGW - SCN_UNUSED(file); -#else - unlock(file); -#endif - } - - static std::optional read(std::FILE* file) - { - auto res = fgetc_impl(file); - if (res == EOF) { - return std::nullopt; - } - return static_cast(res); - } - - static void unget(std::FILE* file, char ch) - { - auto res = ungetc_impl(file, static_cast(ch)); - SCN_ENSURE(res != EOF); - } - - static std::optional peek(std::FILE* file) - { - if (auto res = read(file); res) { - unget(file, *res); - return res; - } - return std::nullopt; - } -}; - -struct default_file_tag {}; -struct gnu_file_tag {}; -struct bsd_file_tag {}; - -template -struct file_wrapper_impl; - -template -struct file_wrapper_impl : file_wrapper_impl_base { - constexpr static std::string_view get_current_buffer(F*) - { - return {}; - } - - constexpr static bool has_buffering() - { - return false; - } - - static bool fill_buffer(F*) - { - SCN_EXPECT(false); - SCN_UNREACHABLE; - } - - constexpr static void unsafe_advance_to_buffer_end(F*) {} - - static void unsafe_advance_n(F*, std::ptrdiff_t) - { - SCN_EXPECT(false); - SCN_UNREACHABLE; - } -}; - -// GNU libc (Linux) -template -struct file_wrapper_impl : file_wrapper_impl_base { - static std::string_view get_current_buffer(F* file) - { - return make_string_view_from_pointers(file->_IO_read_ptr, - file->_IO_read_end); - } - - constexpr static bool has_buffering() - { - return true; - } - - static bool fill_buffer(F* file) - { - return peek(file).has_value(); - } - - static void unsafe_advance_to_buffer_end(F* file) - { - SCN_EXPECT(file->_IO_read_ptr && file->_IO_read_end); - file->_IO_read_ptr = file->_IO_read_end; - } - - static void unsafe_advance_n(F* file, std::ptrdiff_t n) - { - SCN_EXPECT(file->_IO_read_ptr); - SCN_EXPECT(file->_IO_read_end - file->_IO_read_ptr >= n); - file->_IO_read_ptr += n; - } - - static std::optional peek(F* file) - { - if (file->_IO_read_ptr != file->_IO_read_end) { - return file->_IO_read_ptr[0]; - } - if (auto res = read(file); res) { - --file->_IO_read_ptr; - return res; - } - return std::nullopt; - } -}; - -// BSD libc (Apple) -template -struct file_wrapper_impl : file_wrapper_impl_base { - static std::string_view get_current_buffer(F* file) - { - return {reinterpret_cast(file->_p), - static_cast(file->_r)}; - } - - constexpr static bool has_buffering() - { - return true; - } - - static bool fill_buffer(F* file) - { - return peek(file).has_value(); - } - - static void unsafe_advance_to_buffer_end(F* file) - { - SCN_EXPECT(file->_p != nullptr); - file->_p += file->_r; - file->_r = 0; - } - - static void unsafe_advance_n(F* file, std::ptrdiff_t n) - { - SCN_EXPECT(file->_p != nullptr); - SCN_EXPECT(file->_r >= n); - file->_p += n; - file->_r -= n; - } - - static std::optional peek(F* file) - { - if (file->_p != nullptr && file->_r != 0) { - return static_cast(*file->_p); - } - if (auto res = read(file); res) { - --file->_p; - ++file->_r; - return res; - } - return std::nullopt; - } -}; - -// Non-pretty workaround for MSVC silliness -template -inline constexpr bool is_gnu_file = false; -template -inline constexpr bool - is_gnu_file> = true; - -template -inline constexpr bool is_bsd_file = false; -template -inline constexpr bool is_bsd_file< - F, - std::void_t> = - true; - -SCN_CLANG_PUSH -SCN_CLANG_IGNORE("-Wunneeded-internal-declaration") - -constexpr auto get_file_tag() -{ - if constexpr (is_gnu_file) { - return detail::tag_type{}; - } - else if constexpr (is_bsd_file) { - return detail::tag_type{}; - } - else { - return detail::tag_type{}; - } -} - -using file_wrapper = - file_wrapper_impl; - -SCN_CLANG_POP - -bool fill_with_buffering(std::FILE* file, std::string_view& current_view) -{ - SCN_EXPECT(file_wrapper::has_buffering()); - - if (!current_view.empty()) { - file_wrapper::unsafe_advance_to_buffer_end(file); - } - - if (!file_wrapper::fill_buffer(file)) { - current_view = {}; - return false; - } +template basic_scan_file_buffer::basic_scan_file_buffer( + stdio_file_interface); +template basic_scan_file_buffer< + stdio_file_interface>::~basic_scan_file_buffer(); +template bool basic_scan_file_buffer::fill(); +template bool basic_scan_file_buffer::sync( + std::ptrdiff_t); - current_view = file_wrapper::get_current_buffer(file); - return true; -} - -bool fill_without_buffering(std::FILE* file, - std::string_view& current_view, - std::optional& latest) -{ - latest = file_wrapper::read(file); - if (!latest) { - current_view = {}; - return false; - } - current_view = {&*latest, 1}; - return true; -} -} // namespace - -scan_file_buffer::scan_file_buffer(std::FILE* file) - : base(base::non_contiguous_tag{}), m_file(file) -{ - file_wrapper::lock(file); -} - -scan_file_buffer::~scan_file_buffer() -{ - file_wrapper::unlock(m_file); -} - -bool scan_file_buffer::fill() -{ - SCN_EXPECT(m_file); - - if (!this->m_current_view.empty()) { - this->m_putback_buffer.insert(this->m_putback_buffer.end(), - this->m_current_view.begin(), - this->m_current_view.end()); - } - - if (file_wrapper::has_buffering()) { - return fill_with_buffering(m_file, this->m_current_view); - } - - return fill_without_buffering(m_file, this->m_current_view, this->m_latest); -} - -namespace { -struct file_unlocker_for_unget { - file_unlocker_for_unget(std::FILE* f) : file(f) - { - file_wrapper::unlock_for_unget(file); - } - ~file_unlocker_for_unget() - { - file_wrapper::lock_for_unget(file); - } - - std::FILE* file; -}; -} // namespace - -void scan_file_buffer::sync(std::ptrdiff_t position) -{ - SCN_EXPECT(m_file); - - if (file_wrapper::has_buffering()) { - if (position < - static_cast(this->putback_buffer().size())) { - file_unlocker_for_unget unlocker{m_file}; - auto putback_segment = this->get_segment_starting_at(position); - for (auto rit = putback_segment.rbegin(); - rit != putback_segment.rend(); ++rit) { - file_wrapper::unget(m_file, *rit); - } - return; - } - - file_wrapper::unsafe_advance_n( - m_file, position - static_cast( - this->putback_buffer().size())); - return; - } - - const auto chars_avail = this->chars_available(); - if (position == chars_avail) { - return; - } - - file_unlocker_for_unget unlocker{m_file}; - SCN_EXPECT(m_current_view.size() == 1); - file_wrapper::unget(m_file, m_current_view.front()); - - auto putback_segment = - std::string_view{this->putback_buffer()}.substr(position); - for (auto rit = putback_segment.rbegin(); rit != putback_segment.rend(); - ++rit) { - file_wrapper::unget(m_file, *rit); - } -} } // namespace detail ///////////////////////////////////////////////////////////////// diff --git a/src/scn/impl.h b/src/scn/impl.h index 1f2cea50..2e9d38f4 100644 --- a/src/scn/impl.h +++ b/src/scn/impl.h @@ -1122,10 +1122,117 @@ struct iterator_value_result { SCN_NO_UNIQUE_ADDRESS T value; }; +} // namespace impl + +///////////////////////////////////////////////////////////////// +// File support +///////////////////////////////////////////////////////////////// + +namespace detail { + +template +basic_scan_file_buffer::basic_scan_file_buffer( + FileInterface file) + : base(base::non_contiguous_tag{}), m_file(SCN_MOVE(file)) +{ + m_file.lock(); +} + +template +basic_scan_file_buffer::~basic_scan_file_buffer() +{ + m_file.unlock(); +} + +template +bool basic_scan_file_buffer::fill() +{ + if (!this->m_current_view.empty()) { + this->m_putback_buffer.insert(this->m_putback_buffer.end(), + this->m_current_view.begin(), + this->m_current_view.end()); + } + + if (m_file.has_buffering()) { + if (!this->m_current_view.empty()) { + m_file.unsafe_advance_n(this->m_current_view.size()); + } + + m_file.fill_buffer(); + m_current_view = m_file.buffer(); + return !this->m_current_view.empty(); + } + + this->m_latest = m_file.read_one(); + if (!this->m_latest) { + this->m_current_view = {}; + return false; + } + + this->m_current_view = {&*this->m_latest, 1}; + return true; +} + +template +bool basic_scan_file_buffer::sync(std::ptrdiff_t position) +{ + struct putback_wrapper { + putback_wrapper(FileInterface& i) : i(i) + { + i.prepare_putback(); + } + ~putback_wrapper() + { + i.finalize_putback(); + } + + FileInterface& i; + }; + + if (m_file.has_buffering()) { + if (position < + static_cast(this->putback_buffer().size())) { + putback_wrapper wrapper{m_file}; + auto segment = this->get_segment_starting_at(position); + for (auto it = segment.rbegin(); it != segment.rend(); ++it) { + if (!m_file.putback(*it)) { + return false; + } + } + return true; + } + + m_file.unsafe_advance_n(position - static_cast( + this->putback_buffer().size())); + return true; + } + + const auto chars_avail = this->chars_available(); + if (position == chars_avail) { + return true; + } + + putback_wrapper wrapper{m_file}; + SCN_EXPECT(m_current_view.size() == 1); + m_file.putback(m_current_view.front()); + + auto segment = std::string_view{this->putback_buffer()}.substr(position); + for (auto it = segment.rbegin(); it != segment.rend(); ++it) { + if (!m_file.putback(*it)) { + return false; + } + } + return true; +} + +} // namespace detail + ///////////////////////////////////////////////////////////////// // Unicode ///////////////////////////////////////////////////////////////// +namespace impl { + template constexpr bool validate_unicode(std::basic_string_view src) { diff --git a/tests/unittests/stdin_test.cpp b/tests/unittests/stdin_test.cpp index ab529abd..068fd079 100644 --- a/tests/unittests/stdin_test.cpp +++ b/tests/unittests/stdin_test.cpp @@ -30,6 +30,7 @@ static std::optional read_scn() } return r->value(); } + template static std::optional read_scanf() { @@ -40,7 +41,7 @@ static std::optional read_scanf() } return i; } - else { + else if constexpr (std::is_same_v) { std::string val{}; val.resize(3); if (std::scanf(" %3c", &val[0]) != 1) { @@ -48,7 +49,18 @@ static std::optional read_scanf() } return val; } + else if constexpr (std::is_same_v) { + char val{}; + if (std::scanf("%c", &val) != 1) { + return std::nullopt; + } + return val; + } + else { + static_assert(scn::detail::dependent_false::value, ""); + } } + template static std::optional read_cin() { @@ -80,4 +92,9 @@ TEST(Stdin, Test) EXPECT_EQ(read_scn(), std::nullopt); EXPECT_THAT(read_cin(), Optional("ccc"s)); + + EXPECT_THAT(read_scn(), Optional('\n')); + EXPECT_THAT(read_scn(), Optional('d')); + EXPECT_THAT(read_scn(), Optional('\n')); + EXPECT_THAT(read_cin(), Optional('e')); } diff --git a/tests/unittests/stdin_test_input.txt b/tests/unittests/stdin_test_input.txt index 81cee76c..29d952a1 100644 --- a/tests/unittests/stdin_test_input.txt +++ b/tests/unittests/stdin_test_input.txt @@ -1 +1,3 @@ 100 101 102 103 104 105 aaa bbb ccc +d +e