diff --git a/include/cse/uri.hpp b/include/cse/uri.hpp index 5109d5ce9..a3a455fb9 100644 --- a/include/cse/uri.hpp +++ b/include/cse/uri.hpp @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -15,6 +16,15 @@ namespace cse { +namespace detail +{ +struct subrange +{ + std::size_t offset = 0; + std::size_t size = 0; +}; +} // namespace detail + /** * A URI reference. @@ -78,10 +88,7 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::string_view view() const noexcept - { - return std::string_view(data_); - } + std::string_view view() const noexcept; /** * Returns the scheme component, or null if there is none. @@ -89,10 +96,7 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::optional scheme() const noexcept - { - return scheme_; - } + std::optional scheme() const noexcept; /** * Returns the authority component, or null if there is none. @@ -100,10 +104,7 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::optional authority() const noexcept - { - return authority_; - } + std::optional authority() const noexcept; /** * Returns the path component. @@ -111,10 +112,7 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::string_view path() const noexcept - { - return path_; - } + std::string_view path() const noexcept; /** * Returns the query component, or null if there is none. @@ -122,10 +120,7 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::optional query() const noexcept - { - return query_; - } + std::optional query() const noexcept; /** * Returns the fragment component, or null if there is none. @@ -133,25 +128,19 @@ class uri * The returned `std::string_view` is only valid as long as the `uri` * object remains alive and unmodified. */ - std::optional fragment() const noexcept - { - return fragment_; - } + std::optional fragment() const noexcept; /// Returns whether the `uri` object is empty. - bool empty() const noexcept - { - return data_.empty(); - } + bool empty() const noexcept; private: std::string data_; - std::optional scheme_; - std::optional authority_; - std::string_view path_; - std::optional query_; - std::optional fragment_; + std::optional scheme_; + std::optional authority_; + detail::subrange path_; + std::optional query_; + std::optional fragment_; }; diff --git a/src/cpp/uri.cpp b/src/cpp/uri.cpp index 0646a1f53..ae38cb4ff 100644 --- a/src/cpp/uri.cpp +++ b/src/cpp/uri.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -112,6 +113,43 @@ bool all_chars_satisfy(bool (*is_valid_char)(char), std::string_view string) return true; } +detail::subrange to_subrange(const std::string& str, std::string_view substr) +{ + assert( // Check that substr is in fact a substring of str + reinterpret_cast(str.data()) <= + reinterpret_cast(substr.data()) && + reinterpret_cast(substr.data()) + substr.size() <= + reinterpret_cast(str.data()) + str.size()); + return {static_cast(substr.data() - str.data()), substr.size()}; +} + +std::optional to_subrange( + const std::string& str, + std::optional substr) +{ + if (substr) { + return to_subrange(str, *substr); + } else { + return std::nullopt; + } +} + +std::string_view to_substring(const std::string& str, detail::subrange subrange) +{ + return std::string_view(str).substr(subrange.offset, subrange.size); +} + +std::optional to_substring( + const std::string& str, + std::optional subrange) +{ + if (subrange) { + return to_substring(str, *subrange); + } else { + return std::nullopt; + } +} + } // namespace @@ -122,11 +160,11 @@ uri::uri() noexcept = default; : data_(std::move(string)) { auto view = std::string_view(data_); - scheme_ = consume_scheme(view); - authority_ = consume_authority(view); - path_ = consume_path(view); - query_ = consume_query(view); - fragment_ = consume_fragment(view); + scheme_ = to_subrange(data_, consume_scheme(view)); + authority_ = to_subrange(data_, consume_authority(view)); + path_ = to_subrange(data_, consume_path(view)); + query_ = to_subrange(data_, consume_query(view)); + fragment_ = to_subrange(data_, consume_fragment(view)); assert(view.empty()); } @@ -154,33 +192,75 @@ uri::uri( if (scheme) { data_ += *scheme; data_ += ':'; - scheme_ = view.substr(0, scheme->size()); + scheme_ = to_subrange(data_, view.substr(0, scheme->size())); view.remove_prefix(scheme->size() + 1); } if (authority) { data_ += "//"; data_ += *authority; - authority_ = view.substr(2, authority->size()); + authority_ = to_subrange(data_, view.substr(2, authority->size())); view.remove_prefix(2 + authority->size()); } data_ += path; - path_ = view.substr(0, path.size()); + path_ = to_subrange(data_, view.substr(0, path.size())); view.remove_prefix(path.size()); if (query) { data_ += '?'; data_ += *query; - query_ = view.substr(1, query->size()); + query_ = to_subrange(data_, view.substr(1, query->size())); view.remove_prefix(1 + query->size()); } if (fragment) { data_ += '#'; data_ += *fragment; - fragment_ = view.substr(1, fragment->size()); + fragment_ = to_subrange(data_, view.substr(1, fragment->size())); view.remove_prefix(1 + fragment->size()); } } +std::string_view uri::view() const noexcept +{ + return std::string_view(data_); +} + + +std::optional uri::scheme() const noexcept +{ + return to_substring(data_, scheme_); +} + + +std::optional uri::authority() const noexcept +{ + return to_substring(data_, authority_); +} + + +std::string_view uri::path() const noexcept +{ + return to_substring(data_, path_); +} + + +std::optional uri::query() const noexcept +{ + return to_substring(data_, query_); +} + + +std::optional uri::fragment() const noexcept +{ + return to_substring(data_, fragment_); +} + + +bool uri::empty() const noexcept +{ + return data_.empty(); +} + + // ============================================================================= // resolve_reference() // ============================================================================= diff --git a/test/cpp/uri_unittest.cpp b/test/cpp/uri_unittest.cpp index 7137b454d..7aaf9d546 100644 --- a/test/cpp/uri_unittest.cpp +++ b/test/cpp/uri_unittest.cpp @@ -81,6 +81,45 @@ BOOST_AUTO_TEST_CASE(uri_parser) } +BOOST_AUTO_TEST_CASE(uri_copy_and_move) +{ + auto orig = uri("http://user@example.com:1234/foo/bar?q=uux#frag"); + const auto copy = orig; + const auto move = std::move(orig); + orig = uri(); + + BOOST_REQUIRE(copy.scheme().has_value()); + BOOST_TEST(*copy.scheme() == "http"); + BOOST_REQUIRE(copy.authority().has_value()); + BOOST_TEST(*copy.authority() == "user@example.com:1234"); + BOOST_TEST(copy.path() == "/foo/bar"); + BOOST_REQUIRE(copy.query().has_value()); + BOOST_TEST(*copy.query() == "q=uux"); + BOOST_REQUIRE(copy.fragment().has_value()); + BOOST_TEST(*copy.fragment() == "frag"); + + BOOST_REQUIRE(move.scheme().has_value()); + BOOST_TEST(*move.scheme() == "http"); + BOOST_REQUIRE(move.authority().has_value()); + BOOST_TEST(*move.authority() == "user@example.com:1234"); + BOOST_TEST(move.path() == "/foo/bar"); + BOOST_REQUIRE(move.query().has_value()); + BOOST_TEST(*move.query() == "q=uux"); + BOOST_REQUIRE(move.fragment().has_value()); + BOOST_TEST(*move.fragment() == "frag"); + + // Special case: Short strings which may be affected by the small-string + // optimisation (see issue #361) + auto small = uri("x"); + const auto smallCopy = small; + const auto smallMove = std::move(small); + small = uri(); + + BOOST_TEST(smallCopy.path() == "x"); + BOOST_TEST(smallMove.path() == "x"); +} + + BOOST_AUTO_TEST_CASE(uri_comparison) { const auto httpURI = uri("http://user@example.com:1234/foo/bar?q=uux#frag");