From 3cdcef3382168b27e78ed1b2943110aa69be5b08 Mon Sep 17 00:00:00 2001
From: alandefreitas <alandefreitas@gmail.com>
Date: Fri, 4 Mar 2022 01:37:50 -0300
Subject: [PATCH] syntax-based normalization

fix #8, fix #65, close #136
---
 include/boost/url/detail/impl/normalize.ipp   | 259 +++++++++
 .../boost/url/detail/impl/pct_encoding.ipp    |   1 +
 .../url/detail/impl/remove_dot_segments.ipp   | 540 ++++++++++++++++++
 include/boost/url/detail/normalize.hpp        |  64 +++
 include/boost/url/detail/pct_encoding.hpp     |  12 +-
 .../boost/url/detail/remove_dot_segments.hpp  |  38 ++
 include/boost/url/grammar/ascii.hpp           |  32 +-
 include/boost/url/impl/url.hpp                |  28 -
 include/boost/url/impl/url.ipp                | 330 ++++++++---
 include/boost/url/impl/url_view.ipp           |  61 ++
 include/boost/url/ipv4_address.hpp            |   2 +-
 include/boost/url/ipv6_address.hpp            |   2 +-
 include/boost/url/pct_encoding.hpp            |   3 +-
 include/boost/url/rfc/detail/charsets.hpp     | 112 ++++
 include/boost/url/rfc/reg_name_rule.hpp       |   1 -
 include/boost/url/src.hpp                     |   3 +
 include/boost/url/url.hpp                     | 157 ++++-
 include/boost/url/url_view.hpp                |  26 +
 test/unit/url.cpp                             | 117 ++++
 19 files changed, 1648 insertions(+), 140 deletions(-)
 create mode 100644 include/boost/url/detail/impl/normalize.ipp
 create mode 100644 include/boost/url/detail/impl/remove_dot_segments.ipp
 create mode 100644 include/boost/url/detail/normalize.hpp
 create mode 100644 include/boost/url/detail/remove_dot_segments.hpp
 delete mode 100644 include/boost/url/impl/url.hpp
 create mode 100644 include/boost/url/rfc/detail/charsets.hpp
diff --git a/include/boost/url/detail/impl/normalize.ipp b/include/boost/url/detail/impl/normalize.ipp
new file mode 100644
index 00000000..5ae8a971
--- /dev/null
+++ b/include/boost/url/detail/impl/normalize.ipp
@@ -0,0 +1,259 @@
+//
+// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/CPPAlliance/url
+//
+
+#ifndef BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP
+#define BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP
+
+#include <boost/url/detail/normalize.hpp>
+#include <boost/url/detail/pct_encoding.hpp>
+#include <boost/assert.hpp>
+#include <cstring>
+
+namespace boost {
+namespace urls {
+namespace detail {
+
+int
+compare_encoded(
+    string_view lhs,
+    string_view rhs) noexcept
+{
+    auto consume_one = [](
+        string_view::iterator& it,
+        char& c,
+        std::size_t& n)
+    {
+        if(*it != '%')
+        {
+            c = *it;
+            ++it;
+        }
+        else
+        {
+            pct_decode_unchecked(
+                &c,
+                &c + 1,
+                string_view(it, 3));
+            it += 3;
+        }
+        ++n;
+    };
+
+    std::size_t n0 = 0;
+    std::size_t n1 = 0;
+    auto it0 = lhs.begin();
+    auto it1 = rhs.begin();
+    auto end0 = lhs.end();
+    auto end1 = rhs.end();
+    char c0 = 0;
+    char c1 = 0;
+    while(
+        it0 < end0 &&
+        it1 < end1)
+    {
+        consume_one(it0, c0, n0);
+        consume_one(it1, c1, n1);
+        if (c0 < c1)
+            return -1;
+        if (c1 < c0)
+            return 1;
+    }
+    n0 += pct_decode_bytes_unchecked(
+        string_view(it0, end0 - it0));
+    n1 += pct_decode_bytes_unchecked(
+        string_view(it1, end1 - it1));
+    if (n0 == n1)
+        return 0;
+    if (n0 < n1)
+        return -1;
+    return 1;
+}
+
+int
+ci_compare_encoded(
+    string_view lhs,
+    string_view rhs) noexcept
+{
+    auto consume_one =
+        []( string_view::iterator& it,
+            char &c,
+            std::size_t& n)
+    {
+        if(*it != '%')
+        {
+            c = grammar::ascii_tolower(*it);
+            ++it;
+        }
+        else
+        {
+            pct_decode_unchecked(
+                &c,
+                &c + 1,
+                string_view(it, 3));
+            c = grammar::ascii_tolower(c);
+            it += 3;
+        }
+        ++n;
+    };
+
+    std::size_t n0 = 0;
+    std::size_t n1 = 0;
+    auto it0 = lhs.begin();
+    auto it1 = rhs.begin();
+    auto end0 = lhs.end();
+    auto end1 = rhs.end();
+    char c0 = 0;
+    char c1 = 0;
+    while (
+        it0 < end0 &&
+        it1 < end1)
+    {
+        consume_one(it0, c0, n0);
+        consume_one(it1, c1, n1);
+        if (c0 < c1)
+            return -1;
+        if (c1 < c0)
+            return 1;
+    }
+    n0 += pct_decode_bytes_unchecked(
+        string_view(it0, end0 - it0));
+    n1 += pct_decode_bytes_unchecked(
+        string_view(it1, end1 - it1));
+    if (n0 == n1)
+        return 0;
+    if (n0 < n1)
+        return -1;
+    return 1;
+}
+
+int
+ci_compare(
+    string_view lhs,
+    string_view rhs) noexcept
+{
+    auto rlen = (std::min)(lhs.size(), rhs.size());
+    for (std::size_t i = 0; i < rlen; ++i)
+    {
+        char c0 = grammar::ascii_tolower(lhs[i]);
+        char c1 = grammar::ascii_tolower(rhs[i]);
+        if (c0 < c1)
+            return -1;
+        if (c1 < c0)
+            return 1;
+    }
+    if ( lhs.size() == rhs.size() )
+        return 0;
+    if ( lhs.size() < rhs.size() )
+        return -1;
+    return 1;
+}
+
+std::size_t
+path_starts_with(
+    string_view lhs,
+    string_view rhs) noexcept
+{
+    auto consume_one = [](
+        string_view::iterator& it,
+        char &c)
+    {
+        if(*it != '%')
+        {
+            c = *it;
+            ++it;
+            return;
+        }
+        pct_decode_unchecked(
+            &c,
+            &c + 1,
+            string_view(it, 3));
+        if (c != '/')
+        {
+            it += 3;
+            return;
+        }
+        c = *it;
+        ++it;
+    };
+
+    auto it0 = lhs.begin();
+    auto it1 = rhs.begin();
+    auto end0 = lhs.end();
+    auto end1 = rhs.end();
+    char c0 = 0;
+    char c1 = 0;
+    while (
+        it0 < end0 &&
+        it1 < end1)
+    {
+        consume_one(it0, c0);
+        consume_one(it1, c1);
+        if (c0 != c1)
+            return 0;
+    }
+    if (it1 == end1)
+        return it0 - lhs.begin();
+    return 0;
+}
+
+std::size_t
+path_ends_with(
+    string_view lhs,
+    string_view rhs) noexcept
+{
+    auto consume_last = [](
+        string_view::iterator& it,
+        string_view::iterator& end,
+        char& c)
+    {
+        if ((end - it) < 3 ||
+            *(std::prev(end, 3)) != '%')
+        {
+            c = *--end;
+            return;
+        }
+        pct_decode_unchecked(
+            &c,
+            &c + 1,
+            string_view(std::prev(
+                end, 3), 3));
+        if (c != '/')
+        {
+            end -= 3;
+            return;
+        }
+        c = *--end;
+    };
+
+    auto it0 = lhs.begin();
+    auto it1 = rhs.begin();
+    auto end0 = lhs.end();
+    auto end1 = rhs.end();
+    char c0 = 0;
+    char c1 = 0;
+    while(
+        it0 < end0 &&
+        it1 < end1)
+    {
+        consume_last(it0, end0, c0);
+        consume_last(it1, end1, c1);
+        if (c0 != c1)
+            return 0;
+    }
+    if (it1 == end1)
+        return lhs.end() - end0;
+    return 0;
+}
+
+} // detail
+} // urls
+} // boost
+
+#endif
diff --git a/include/boost/url/detail/impl/pct_encoding.ipp b/include/boost/url/detail/impl/pct_encoding.ipp
index d2565fdb..d10014f1 100644
--- a/include/boost/url/detail/impl/pct_encoding.ipp
+++ b/include/boost/url/detail/impl/pct_encoding.ipp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
diff --git a/include/boost/url/detail/impl/remove_dot_segments.ipp b/include/boost/url/detail/impl/remove_dot_segments.ipp
new file mode 100644
index 00000000..f977b849
--- /dev/null
+++ b/include/boost/url/detail/impl/remove_dot_segments.ipp
@@ -0,0 +1,540 @@
+//
+// Copyright (c) 2022 alandefreitas (alandefreitas@gmail.com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/CPPAlliance/url
+//
+
+#ifndef BOOST_URL_DETAIL_IMPL_REMOVE_DOT_SEGMENTS_IPP
+#define BOOST_URL_DETAIL_IMPL_REMOVE_DOT_SEGMENTS_IPP
+
+#include <boost/url/detail/remove_dot_segments.hpp>
+#include <boost/assert.hpp>
+#include <cstring>
+
+namespace boost {
+namespace urls {
+namespace detail {
+
+std::size_t
+remove_dot_segments(
+    char* dest0,
+    char const* end,
+    string_view s,
+    bool remove_unmatched) noexcept
+{
+    // 1. The input buffer is initialized with
+    // the now-appended path components and the
+    // output buffer is initialized to the empty
+    // string.
+    char* dest = dest0;
+    auto append =
+        [&dest, &end]
+        (string_view in)
+    {
+        BOOST_ASSERT(in.size() <= std::size_t(end - dest));
+        std::memcpy(dest, in.data(), in.size());
+        dest += in.size();
+    };
+
+    auto find_last_slash =
+        [&dest0, &dest]() -> std::size_t
+    {
+        char* const first = dest0;
+        char* last = dest;
+        while (last != first)
+        {
+            --last;
+            if (*last == '/')
+                return last - first;
+        }
+        return string_view::npos;
+    };
+
+    // Step 2 is a loop through 5 production rules
+    // There are no transitions between all rules,
+    // which enables some optimizations.
+    // A.  If the input buffer begins with a
+    // prefix of "../" or "./", then remove
+    // that prefix from the input buffer;
+    // otherwise,
+    // Rule A can only happen at the beginning:
+    // - B and C write "/" to the output
+    // - D can only happen at the end
+    // - E leaves "/" or happens at the end
+    while (!s.empty())
+    {
+        if (s.starts_with("../"))
+        {
+            if (!remove_unmatched)
+                append(s.substr(0, 3));
+            s.remove_prefix(3);
+            continue;
+        }
+        if (!s.starts_with("./"))
+            break;
+        s.remove_prefix(2);
+    }
+
+    // D.  if the input buffer consists only
+    // of "." or "..", then remove that from
+    // the input buffer; otherwise,
+    // Rule D can only happen after A is consumed:
+    // - B and C write "/" to the output
+    // - D can only happen at the end
+    // - E leaves "/" or happens at the end
+    if( s == "." ||
+        s == "..")
+    {
+        if( ! remove_unmatched &&
+                s == "..")
+            append(s);
+        s = {};
+    }
+
+    // 2. While the input buffer is not empty,
+    // loop as follows:
+    while (!s.empty())
+    {
+        // B.  if the input buffer begins with a
+        // prefix of "/./" or "/.", where "." is
+        // a complete path segment, then replace
+        // that prefix with "/" in the input
+        // buffer; otherwise,
+        if (s.starts_with("/./"))
+        {
+            s.remove_prefix(2);
+            continue;
+        }
+        if (s == "/.")
+        {
+            // equivalent to replacing s with '/'
+            // and executing the next iteration
+            append(s.substr(0, 1));
+            s.remove_prefix(2);
+            continue;
+        }
+
+        // C. if the input buffer begins with a
+        // prefix of "/../" or "/..", where ".."
+        // is a complete path segment, then
+        // replace that prefix with "/" in the
+        // input buffer and remove the last
+        // segment and its preceding "/"
+        // (if any) from the output buffer;
+        // otherwise,
+        if (s.starts_with("/../"))
+        {
+            std::size_t p = find_last_slash();
+            if (p != string_view::npos)
+                // "erase" [p, end]
+                dest = dest0 + p;
+            else if (!remove_unmatched)
+                append(s.substr(0, 3));
+            s.remove_prefix(3);
+            continue;
+        }
+        if (s == "/..")
+        {
+            std::size_t p = find_last_slash();
+            if (p != string_view::npos)
+            {
+                // erase [p, end]
+                dest = dest0 + p;
+                // equivalent to replacing s with '/'
+                // and executing the next iteration.
+                // this is the only point that would
+                // require input memory allocations
+                // in remove_dot_segments
+                append(s.substr(0, 1));
+            }
+            else if (remove_unmatched)
+                append(s.substr(0, 1));
+            else
+                append(s.substr(0, 3));
+            s.remove_prefix(3);
+            continue;
+        }
+
+        // E.  move the first path segment in the
+        // input buffer to the end of the output
+        // buffer, including the initial "/"
+        // character (if any) and any subsequent
+        // characters up to, but not including,
+        // the next "/" character or the end of
+        // the input buffer.
+        std::size_t p = s.find_first_of('/', 1);
+        if (p != string_view::npos)
+        {
+            append(s.substr(0, p));
+            s.remove_prefix(p);
+        }
+        else
+        {
+            append(s);
+            s = {};
+        }
+    }
+
+    // 3. Finally, the output buffer is returned
+    // as the result of remove_dot_segments.
+    return dest - dest0;
+}
+
+int
+normalized_path_compare(
+    string_view s0_init,
+    string_view s1_init,
+    bool r0,
+    bool r1) noexcept
+{
+    // Pseudocode:
+    // Execute remove_dot_segments iterations in reverse:
+    // - keep track of number of elements
+    // - keep track of normalized size
+    // Iterate the both path segments in reverse again:
+    // - use normalized size to identify the
+    //   positions we are comparing
+
+    // 1. The input buffer is initialized with
+    // the now-appended path components and the
+    // output buffer is initialized to the empty
+    // string.
+    // - These memory allocations are logically removed.
+
+    // Step 2 is a loop through 5 production rules
+    // There are no transitions between all rules,
+    // which enables some optimizations, such as
+    // ignoring the prefix rules before applying
+    // other rules.
+    auto remove_prefix =
+        []( string_view& s )
+    {
+        // A.  If the input buffer begins with a
+        // prefix of "../" or "./", then remove
+        // that prefix from the input buffer,
+        // otherwise,
+        // Rule A can only happen at the beginning:
+        // - B and C write "/" to the output
+        // - D can only happen at the end
+        // - E leaves "/" or happens at the end
+        std::size_t out = 0;
+        std::size_t n = 0;
+        while (!s.empty())
+        {
+            n = detail::path_starts_with(s, "../");
+            if (n)
+            {
+                out += 3;
+                s.remove_prefix(n);
+                continue;
+            }
+            n = detail::path_starts_with(s, "./");
+            if (n)
+            {
+                s.remove_prefix(n);
+                continue;
+            }
+            break;
+        }
+
+        // D.  if the input buffer consists only
+        // of "." or "..", then remove that from
+        // the input buffer; otherwise, ...
+        // Rule D can only happen after A is consumed:
+        // - B and C write "/" to the output
+        // - D can only happen at the end
+        // - E leaves "/" or happens at the end
+        if (detail::compare_encoded(s, ".") == 0)
+            s = {};
+        else if (detail::compare_encoded(s, "..") == 0)
+        {
+            out += 2;
+            s = {};
+        }
+        return out;
+    };
+    string_view s0 = s0_init;
+    string_view s1 = s1_init;
+    std::size_t s0_prefix_n = remove_prefix(s0);
+    std::size_t s1_prefix_n = remove_prefix(s1);
+
+    auto pop_last = [](
+        string_view& s,
+        string_view& c,
+        std::size_t& level,
+        bool r)
+    {
+        c = {};
+        std::size_t n = 0;
+        while (!s.empty())
+        {
+            // B.  if the input buffer begins with a
+            // prefix of "/./" or "/.", where "." is
+            // a complete path segment, then replace
+            // that prefix with "/" in the input
+            // buffer; otherwise,
+            n = detail::path_ends_with(s, "/./");
+            if (n)
+            {
+                c = s.substr(s.size() - n);
+                s.remove_suffix(n);
+                continue;
+            }
+            n = detail::path_ends_with(s, "/.");
+            if (n)
+            {
+                c = s.substr(s.size() - n, 1);
+                s.remove_suffix(n);
+                continue;
+            }
+
+            // C. if the input buffer begins with a
+            // prefix of "/../" or "/..", where ".."
+            // is a complete path segment, then
+            // replace that prefix with "/" in the
+            // input buffer and remove the last
+            // segment and its preceding "/"
+            // (if any) from the output buffer
+            // otherwise,
+            n = detail::path_ends_with(s, "/../");
+            if (n)
+            {
+                c = s.substr(s.size() - n);
+                s.remove_suffix(n);
+                ++level;
+                continue;
+            }
+            n = detail::path_ends_with(s, "/..");
+            if (n)
+            {
+                c = s.substr(s.size() - n);
+                s.remove_suffix(n);
+                ++level;
+                continue;
+            }
+
+            // E.  move the first path segment in the
+            // input buffer to the end of the output
+            // buffer, including the initial "/"
+            // character (if any) and any subsequent
+            // characters up to, but not including,
+            // the next "/" character or the end of
+            // the input buffer.
+            std::size_t p = s.size() > 1
+                ? s.find_last_of('/', s.size() - 2)
+                : string_view::npos;
+            if (p != string_view::npos)
+            {
+                c = s.substr(p + 1);
+                s.remove_suffix(c.size());
+            }
+            else
+            {
+                c = s;
+                s = {};
+            }
+
+            if (level == 0)
+                return;
+            if (!s.empty())
+                --level;
+        }
+        // we still need to skip n_skip + 1
+        // but the string is empty
+        if (r && level)
+        {
+            c = "/";
+            level = 0;
+            return;
+        }
+        else if (level)
+        {
+            if (c.empty())
+                c = "/..";
+            else
+                c = "/../";
+            --level;
+            return;
+        }
+        c = {};
+    };
+
+    // number of decoded bytes in a path segment
+    auto path_decoded_bytes =
+        []( string_view s )
+    {
+        auto it = s.data();
+        auto const end =
+            it + s.size();
+        std::size_t n = 0;
+        while(it < end)
+        {
+            if(*it != '%')
+            {
+                // unescaped
+                ++it;
+                ++n;
+                continue;
+            }
+            if(end - it < 3)
+                return n;
+            char c = 0;
+            pct_decode_unchecked(
+                &c,
+                &c + 1,
+                string_view(it, 3));
+            if (c != '/')
+                it += 3;
+            else
+                ++it;
+            ++n;
+        }
+        return n;
+    };
+
+    // Calculate the normalized size
+    auto norm_bytes =
+        [&pop_last, &path_decoded_bytes]
+        ( string_view p,
+          bool r)
+    {
+        string_view c;
+        std::size_t s{0};
+        std::size_t n{0};
+        do
+        {
+            pop_last(p, c, s, r);
+            n += path_decoded_bytes(c);
+        }
+        while (!c.empty());
+        return n;
+    };
+    std::size_t s0n = norm_bytes(
+        s0,
+        r0);
+    if (!r0)
+        s0n += s0_prefix_n;
+
+    std::size_t s1n = norm_bytes(
+        s1,
+        r1);
+    if (!r1)
+        s1n += s1_prefix_n;
+
+    // Remove child segments until last intersection
+    s0 = s0_init;
+    s1 = s1_init;
+    string_view s0c;
+    string_view s1c;
+    std::size_t s0l = 0;
+    std::size_t s1l = 0;
+    std::size_t s0i = s0n;
+    std::size_t s1i = s1n;
+    pop_last(
+        s0, s0c, s0l,
+        r0);
+    pop_last(
+        s1, s1c, s1l,
+        r1);
+
+    // Consume incomparable segments
+    auto pop_decoded_back =
+        []( string_view& s )
+    {
+        if (s.size() < 3 ||
+            *std::prev(s.end(), 3) != '%')
+        {
+            char c = s.back();
+            s.remove_suffix(1);
+            return c;
+        }
+        char c = 0;
+        pct_decode_unchecked(
+            &c, &c + 1, s.substr(s.size() - 3));
+        if (c != '/')
+        {
+            s.remove_suffix(3);
+            return c;
+        }
+        c = s.back();
+        s.remove_suffix(1);
+        return c;
+    };
+
+    while (s0i != s1i)
+    {
+        // Consume more child segments
+        if (s0c.empty())
+            pop_last(
+                s0, s0c, s0l, r0);
+        if (s1c.empty())
+            pop_last(
+                s1, s1c, s1l, r1);
+
+        // Remove incomparable suffix
+        while (
+            !s0c.empty() &&
+            !s1c.empty())
+        {
+            if (s1i > s0i)
+            {
+                pop_decoded_back(s1c);
+                --s1i;
+                continue;
+            }
+            else if (s0i > s1i)
+            {
+                pop_decoded_back(s0c);
+                --s0i;
+                continue;
+            }
+            break;
+        }
+    }
+
+    int cmp = 0;
+    BOOST_ASSERT(s0i == s1i);
+    while (s0i > 0)
+    {
+        // Consume more child segments
+        if (s0c.empty())
+            pop_last(
+                s0, s0c, s0l, r0);
+        if (s1c.empty())
+            pop_last(
+                s1, s1c, s1l, r1);
+
+        // Compare intersection
+        while (
+            !s0c.empty() &&
+            !s1c.empty())
+        {
+            BOOST_ASSERT(s0i == s1i);
+            char c0 = pop_decoded_back(s0c);
+            char c1 = pop_decoded_back(s1c);
+            if (c0 < c1)
+                cmp = -1;
+            else if (c1 < c0)
+                cmp = 1;
+            --s0i;
+            --s1i;
+        }
+    }
+
+    if (cmp != 0)
+        return cmp;
+    if (s0n == s1n )
+        return 0;
+    if (s0n < s1n )
+        return -1;
+    return 1;
+}
+
+} // detail
+} // urls
+} // boost
+
+#endif
diff --git a/include/boost/url/detail/normalize.hpp b/include/boost/url/detail/normalize.hpp
new file mode 100644
index 00000000..d3161455
--- /dev/null
+++ b/include/boost/url/detail/normalize.hpp
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/CPPAlliance/url
+//
+
+#ifndef BOOST_URL_DETAIL_NORMALIZED_HPP
+#define BOOST_URL_DETAIL_NORMALIZED_HPP
+
+#include <boost/url/pct_encoding_types.hpp>
+#include <boost/url/string_view.hpp>
+
+namespace boost {
+namespace urls {
+namespace detail {
+
+// compare two string_views as if they are both
+// percent-decoded
+int
+compare_encoded(
+    string_view lhs,
+    string_view rhs) noexcept;
+
+// check if string_view lhs starts with string_view
+// rhs as if they are both percent-decoded. If
+// lhs starts with rhs, return number of chars
+// matched in the encoded string_view
+std::size_t
+path_starts_with(
+    string_view lhs,
+    string_view rhs) noexcept;
+
+// check if string_view lhs ends with string_view
+// rhs as if they are both percent-decoded. If
+// lhs ends with rhs, return number of chars
+// matched in the encoded string_view
+std::size_t
+path_ends_with(
+    string_view lhs,
+    string_view rhs) noexcept;
+
+// compare two string_views as if they are both
+// percent-decoded and lowercase
+int
+ci_compare_encoded(
+    string_view lhs,
+    string_view rhs) noexcept;
+
+// compare two string_views as if they are both
+// lowercase
+int
+ci_compare(
+    string_view lhs,
+    string_view rhs) noexcept;
+
+} // detail
+} // urls
+} // boost
+
+#endif
diff --git a/include/boost/url/detail/pct_encoding.hpp b/include/boost/url/detail/pct_encoding.hpp
index 4d051d50..f08485a2 100644
--- a/include/boost/url/detail/pct_encoding.hpp
+++ b/include/boost/url/detail/pct_encoding.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -21,17 +22,6 @@ namespace boost {
 namespace urls {
 namespace detail {
 
-/** Return true if plain equals a decoded percent-encoded string
-
-    This function compares a plain key to a
-    percent-encoded string. The comparison is
-    made as if the key were percent-encoded.
-
-    @param plain_key The key to use for comparison.
-
-    @param encoded The percent-encoded string to
-        compare to.
-*/
 BOOST_URL_DECL
 bool
 key_equal_encoded(
diff --git a/include/boost/url/detail/remove_dot_segments.hpp b/include/boost/url/detail/remove_dot_segments.hpp
new file mode 100644
index 00000000..f8dfddd5
--- /dev/null
+++ b/include/boost/url/detail/remove_dot_segments.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright (c) 2022 alandefreitas (alandefreitas@gmail.com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/CPPAlliance/url
+//
+
+#ifndef BOOST_URL_DETAIL_REMOVE_DOT_SEGMENTS_HPP
+#define BOOST_URL_DETAIL_REMOVE_DOT_SEGMENTS_HPP
+
+#include <boost/url/string_view.hpp>
+#include <cstdint>
+
+namespace boost {
+namespace urls {
+namespace detail {
+
+std::size_t
+remove_dot_segments(
+    char* dest,
+    char const* end,
+    string_view s,
+    bool remove_unmatched) noexcept;
+
+int
+normalized_path_compare(
+    string_view lhs,
+    string_view rhs,
+    bool remove_unmatched_lhs,
+    bool remove_unmatched_rhs) noexcept;
+
+} // detail
+} // urls
+} // boost
+
+#endif
diff --git a/include/boost/url/grammar/ascii.hpp b/include/boost/url/grammar/ascii.hpp
index fba82546..a4831c33 100644
--- a/include/boost/url/grammar/ascii.hpp
+++ b/include/boost/url/grammar/ascii.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -40,8 +41,35 @@ constexpr
 char
 ascii_tolower(char c) noexcept
 {
-    return (c >= 'A' && c <= 'Z')
-        ? c + 'a' - 'A' : c;
+    return
+      (c >= 'A' &&
+       c <= 'Z')
+        ? c + 'a' - 'A'
+        : c;
+}
+
+/** Return c converted to upper case
+
+    This function returns the lowercase equivalent
+    if a character is an upper-case letter, otherwise
+    returns the same character.
+
+    @par Exception Safety
+    Throws nothing.
+
+    @return The converted character
+
+    @param c The ascii character to convert
+*/
+constexpr
+char
+ascii_toupper(char c) noexcept
+{
+    return
+      (c >= 'a' &&
+       c <= 'z')
+        ? c - ('a' - 'A')
+        : c;
 }
 
 } // grammar
diff --git a/include/boost/url/impl/url.hpp b/include/boost/url/impl/url.hpp
deleted file mode 100644
index d4d73f43..00000000
--- a/include/boost/url/impl/url.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
-//
-// Distributed under the Boost Software License, Version 1.0. (See accompanying
-// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-//
-// Official repository: https://github.com/CPPAlliance/url
-//
-
-#ifndef BOOST_URL_IMPL_URL_HPP
-#define BOOST_URL_IMPL_URL_HPP
-
-namespace boost {
-namespace urls {
-
-template<class Allocator>
-urls::segments
-url::
-segments(
-    Allocator const& a) noexcept
-{
-    return urls::segments(*this, a);
-}
-
-} // urls
-} // boost
-
-#endif
\ No newline at end of file
diff --git a/include/boost/url/impl/url.ipp b/include/boost/url/impl/url.ipp
index 465e112c..98f03613 100644
--- a/include/boost/url/impl/url.ipp
+++ b/include/boost/url/impl/url.ipp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,6 +15,7 @@
 #include <boost/url/error.hpp>
 #include <boost/url/scheme.hpp>
 #include <boost/url/url_view.hpp>
+#include <boost/url/host_type.hpp>
 #include <boost/url/grammar/parse.hpp>
 #include <boost/url/detail/except.hpp>
 #include <boost/url/detail/pct_encoding.hpp>
@@ -27,6 +29,7 @@
 #include <boost/url/rfc/query_rule.hpp>
 #include <boost/url/rfc/scheme_rule.hpp>
 #include <boost/url/rfc/userinfo_rule.hpp>
+#include <boost/url/rfc/detail/charsets.hpp>
 #include <boost/align/align_up.hpp>
 #include <cstring>
 #include <iostream>
@@ -428,14 +431,11 @@ set_user(string_view s)
         this->string());
     s = buf.maybe_copy(s);
     check_invariants();
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars;
-    auto const n =
-        pct_encode_bytes(s, {}, cs);
+    auto const n = pct_encode_bytes(
+        s, {}, detail::user_chars);
     auto dest = set_user_impl(n);
     pct_encode(dest, get(id_pass).data(),
-        s, {}, cs);
+        s, {}, detail::user_chars);
     decoded_[id_user] = s.size();
     check_invariants();
     return *this;
@@ -451,11 +451,8 @@ set_encoded_user(
     s = buf.maybe_copy(s);
     check_invariants();
     error_code ec;
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars;
     auto const n =
-        validate_pct_encoding(s, ec, {}, cs);
+        validate_pct_encoding(s, ec, {}, detail::user_chars);
     if(ec.failed())
         detail::throw_invalid_argument(
             BOOST_CURRENT_LOCATION);
@@ -528,18 +525,15 @@ set_password(string_view s)
         this->string());
     s = buf.maybe_copy(s);
     check_invariants();
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars + ':';
-    auto const n =
-        pct_encode_bytes(s, {}, cs);
+    auto const n = pct_encode_bytes(
+        s, {}, detail::password_chars);
     auto dest = set_password_impl(n);
     pct_encode(
         dest,
         get(id_host).data() - 1,
         s,
         {},
-        cs);
+        detail::password_chars);
     decoded_[id_pass] = s.size();
     check_invariants();
     return *this;
@@ -554,12 +548,9 @@ set_encoded_password(
         this->string());
     s = buf.maybe_copy(s);
     check_invariants();
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars + ':';
     error_code ec;
     auto const n =
-        validate_pct_encoding(s, ec, {}, cs);
+        validate_pct_encoding(s, ec, {}, detail::password_chars);
     if(ec.failed())
         detail::throw_invalid_argument(
             BOOST_CURRENT_LOCATION);
@@ -623,18 +614,15 @@ set_userinfo(
         this->string());
     s = buf.maybe_copy(s);
     check_invariants();
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars;
-    auto const n =
-        pct_encode_bytes(s, {}, cs);
+    auto const n = pct_encode_bytes(
+        s, {}, detail::userinfo_chars);
     auto dest = set_userinfo_impl(n);
     pct_encode(
         dest,
         get(id_host).data() - 1,
         s,
         {},
-        cs);
+        detail::userinfo_chars);
     decoded_[id_user] = s.size();
     check_invariants();
     return *this;
@@ -760,18 +748,15 @@ set_host(
             return set_host(r.value());
     }
     check_invariants();
-    static constexpr auto cs =
-        unreserved_chars +
-        subdelim_chars;
-    auto const n =
-        pct_encode_bytes(s, {}, cs);
+    auto const n = pct_encode_bytes(
+        s, {}, detail::host_chars);
     auto dest = set_host_impl(n);
     pct_encode(
         dest,
         get(id_path).data(),
         s,
         {},
-        cs);
+        detail::host_chars);
     decoded_[id_host] = s.size();
     host_type_ =
         urls::host_type::name;
@@ -1801,17 +1786,15 @@ set_fragment(
         this->string());
     s = buf.maybe_copy(s);
     check_invariants();
-    static constexpr auto cs =
-        pchars + '/' + '?';
-    auto const n =
-        pct_encode_bytes(s, {}, cs);
+    auto const n = pct_encode_bytes(
+        s, {}, fragment_chars);
     auto dest = set_fragment_impl(n);
     pct_encode(
         dest,
         get(id_end).data(),
         s,
         {},
-        cs);
+        fragment_chars);
     decoded_[id_frag] = s.size();
     check_invariants();
     return *this;
@@ -2042,26 +2025,167 @@ resolve(
 //
 //------------------------------------------------
 
-url&
+void
 url::
-normalize()
+normalize_octets_impl(
+    int id,
+    grammar::lut_chars const& cs) noexcept
 {
-#if 0
-    // scheme
+    char* it = s_ + offset(id);
+    char* end = s_ + offset(id + 1);
+    char buf = 0;
+    char* dest = it;
+    while (it < end)
     {
-        char* it = s_
-            + offset(id_scheme);
-        auto last = it + offset(id_user);
-        if(it < last)
+        if (*it != '%')
         {
-            --last;
-            while(it != last)
-            {
-                // to upper
-            }
+            *dest = *it;
+            ++it;
+            ++dest;
+            continue;
         }
+        if (end - it < 3)
+            break;
+
+        // decode unreserved octets
+        pct_decode_unchecked(
+            &buf,
+            &buf + 1,
+            string_view(it, 3));
+        if (cs(buf))
+        {
+            *dest = buf;
+            it += 3;
+            ++dest;
+            continue;
+        }
+
+        // uppercase percent-encoding triplets
+        ++it;
+        *it = grammar::ascii_toupper(*it);
+        ++it;
+        *it = grammar::ascii_toupper(*it);
+        ++it;
+        dest += 3;
     }
-#endif
+    if (it != dest)
+    {
+        std::size_t diff = it - dest;
+        std::size_t n = len(id) - diff;
+        shrink_impl(id, n);
+        s_[size()] = '\0';
+    }
+}
+
+void
+url::
+decoded_to_lower_impl(int id) noexcept
+{
+    char* it = s_ + offset(id);
+    char const* const end = s_ + offset(id + 1);
+    while(it < end)
+    {
+        if (*it != '%')
+        {
+            *it = grammar::ascii_tolower(
+                *it);
+            ++it;
+            continue;
+        }
+        it += 3;
+    }
+}
+
+void
+url::
+to_lower_impl(int id) noexcept
+{
+    char* it = s_ + offset(id);
+    char const* const end = s_ + offset(id + 1);
+    while(it < end)
+    {
+        *it = grammar::ascii_tolower(
+            *it);
+        ++it;
+    }
+}
+
+url&
+url::
+normalize_scheme()
+{
+    to_lower_impl(id_scheme);
+    return *this;
+}
+
+url&
+url::
+normalize_authority()
+{
+    // normalize host
+    if (host_type() == urls::host_type::name)
+    {
+        normalize_octets_impl(
+            id_host,
+            detail::reg_name_chars);
+    }
+    decoded_to_lower_impl(id_host);
+
+    // normalize password
+    normalize_octets_impl(id_pass, detail::password_chars);
+
+    // normalize user
+    normalize_octets_impl(id_user, detail::user_chars);
+    return *this;
+}
+
+url&
+url::
+normalize_path()
+{
+    normalize_octets_impl(id_path, detail::path_chars);
+    string_view p = encoded_path();
+    char* p_dest = s_ + offset(id_path);
+    char* p_end = s_ + offset(id_path + 1);
+    std::size_t pn = p.size();
+    bool abs = is_path_absolute();
+    std::size_t n = detail::remove_dot_segments(
+        p_dest, p_end, p, abs);
+    if (n != pn)
+    {
+        BOOST_ASSERT(n < pn);
+        shrink_impl(id_path, n);
+        nseg_ = std::count(
+            p.begin() + 1, p.end(), '/') + 1;
+    }
+    return *this;
+}
+
+url&
+url::
+normalize_query()
+{
+    normalize_octets_impl(id_query, query_chars);
+    return *this;
+}
+
+url&
+url::
+normalize_fragment()
+{
+    normalize_octets_impl(id_frag, fragment_chars);
+    return *this;
+}
+
+url&
+url::
+normalize()
+{
+    normalize_fragment();
+    normalize_query();
+    normalize_path();
+    normalize_authority();
+    normalize_scheme();
     return *this;
 }
 
@@ -2276,47 +2400,7 @@ resize_impl(
         return s_ + offset(first);
     if(new_len <= n0)
     {
-        // shrinking
-        std::size_t n = n0 - new_len;
-        auto const pos =
-            offset(last);
-        // adjust chars
-        std::memmove(
-            s_ + pos - n,
-            s_ + pos,
-            offset(
-                id_end) - pos + 1);
-        // collapse (first, last)
-        collapse(first,  last, 
-            offset(last) - n);
-        // shift (last, end) left
-        adjust(
-            last, id_end, 0 - n);
-#if 0
-        // update table
-        if( nseg > 1 &&
-            first <= id_path)
-        {
-            // adjust segments
-            auto const tab =
-                tab_end() - 1;
-            for(std::size_t i = 0;
-                i < nseg - 1; ++i)
-                tab[0-2*i] += 0 - n;
-        }
-        if( nparam > 1 &&
-            first <= id_query)
-        {
-            // adjust params
-            auto const tab =
-                tab_end() - 2;
-            for(std::size_t i = 0;
-                i < nparam - 1; ++i)
-                tab[0-2*i] += 0 - n;
-        }
-#endif
-        s_[size()] = '\0';
-        return s_ + offset(first);
+        return shrink_impl(first, last, new_len);
     }
 
     // growing
@@ -2365,6 +2449,68 @@ resize_impl(
     return s_ + offset(first);
 }
 
+char*
+url::
+shrink_impl(
+    int id,
+    std::size_t new_size)
+{
+    return shrink_impl(
+        id, id + 1, new_size);
+}
+
+char*
+url::
+shrink_impl(
+    int first,
+    int last,
+    std::size_t new_len)
+{
+    // shrinking
+    auto const n0 = len(first, last);
+    BOOST_ASSERT(new_len <= n0);
+    std::size_t n = n0 - new_len;
+    auto const pos =
+        offset(last);
+    // adjust chars
+    std::memmove(
+        s_ + pos - n,
+        s_ + pos,
+        offset(
+            id_end) - pos + 1);
+    // collapse (first, last)
+    collapse(first,  last,
+        offset(last) - n);
+    // shift (last, end) left
+    adjust(
+        last, id_end, 0 - n);
+#if 0
+    // update table
+    if( nseg > 1 &&
+        first <= id_path)
+    {
+        // adjust segments
+        auto const tab =
+            tab_end() - 1;
+        for(std::size_t i = 0;
+            i < nseg - 1; ++i)
+            tab[0-2*i] += 0 - n;
+    }
+    if( nparam > 1 &&
+        first <= id_query)
+    {
+        // adjust params
+        auto const tab =
+            tab_end() - 2;
+        for(std::size_t i = 0;
+            i < nparam - 1; ++i)
+            tab[0-2*i] += 0 - n;
+    }
+#endif
+    s_[size()] = '\0';
+    return s_ + offset(first);
+}
+
 //------------------------------------------------
 
 std::ostream&
diff --git a/include/boost/url/impl/url_view.ipp b/include/boost/url/impl/url_view.ipp
index 7ba571e7..1b7742e5 100644
--- a/include/boost/url/impl/url_view.ipp
+++ b/include/boost/url/impl/url_view.ipp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,7 +14,9 @@
 #include <boost/url/url_view.hpp>
 #include <boost/url/error.hpp>
 #include <boost/url/detail/over_allocator.hpp>
+#include <boost/url/detail/normalize.hpp>
 #include <boost/url/grammar/parse.hpp>
+#include <boost/url/grammar/ascii.hpp>
 #include <boost/url/rfc/authority_rule.hpp>
 #include <boost/url/rfc/fragment_rule.hpp>
 #include <boost/url/rfc/host_rule.hpp>
@@ -442,6 +445,64 @@ encoded_fragment() const noexcept
     return s.substr(1);
 }
 
+//------------------------------------------------
+//
+// Comparisons
+//
+//------------------------------------------------
+
+int
+url_view::
+compare(const url_view& other) const noexcept
+{
+    int comp = detail::ci_compare(
+        scheme(),
+        other.scheme());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::compare_encoded(
+        encoded_user(),
+        other.encoded_user());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::compare_encoded(
+        encoded_password(),
+        other.encoded_password());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::ci_compare_encoded(
+        encoded_host(),
+        other.encoded_host());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::normalized_path_compare(
+        encoded_path(),
+        other.encoded_path(),
+        is_path_absolute(),
+        other.is_path_absolute());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::compare_encoded(
+        encoded_query(),
+        other.encoded_query());
+    if ( comp != 0 )
+        return comp;
+
+    comp = detail::compare_encoded(
+        encoded_fragment(),
+        other.encoded_fragment());
+    if ( comp != 0 )
+        return comp;
+
+    return 0;
+}
+
+
 //------------------------------------------------
 //
 // Parsing
diff --git a/include/boost/url/ipv4_address.hpp b/include/boost/url/ipv4_address.hpp
index 985878ba..b906af15 100644
--- a/include/boost/url/ipv4_address.hpp
+++ b/include/boost/url/ipv4_address.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -262,7 +263,6 @@ class ipv4_address
 
     /** Customization point for parsing an IPv4 address.
     */
-    BOOST_URL_DECL
     friend
     void
     tag_invoke(
diff --git a/include/boost/url/ipv6_address.hpp b/include/boost/url/ipv6_address.hpp
index 11a01877..0c1d627a 100644
--- a/include/boost/url/ipv6_address.hpp
+++ b/include/boost/url/ipv6_address.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -317,7 +318,6 @@ class ipv6_address
 
         @param t Set to the result upon success.
     */
-    BOOST_URL_DECL
     friend
     void
     tag_invoke(
diff --git a/include/boost/url/pct_encoding.hpp b/include/boost/url/pct_encoding.hpp
index 6cf56cfb..2f03a213 100644
--- a/include/boost/url/pct_encoding.hpp
+++ b/include/boost/url/pct_encoding.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -245,7 +246,7 @@ pct_decode(
     this parameter is omitted, the default
     options will be used.
 
-    @param cs An opitionally specified
+    @param cs An optionally specified
     character set to use. If this parameter
     is omitted, all characters are considered
     unreserved.
diff --git a/include/boost/url/rfc/detail/charsets.hpp b/include/boost/url/rfc/detail/charsets.hpp
new file mode 100644
index 00000000..d3db9aee
--- /dev/null
+++ b/include/boost/url/rfc/detail/charsets.hpp
@@ -0,0 +1,112 @@
+//
+// Copyright (c) 2022 alandefreitas (alandefreitas@gmail.com)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Official repository: https://github.com/CPPAlliance/url
+//
+
+#ifndef BOOST_URL_RFC_DETAIL_CHARSETS_HPP
+#define BOOST_URL_RFC_DETAIL_CHARSETS_HPP
+
+#include <boost/url/rfc/charsets.hpp>
+
+namespace boost {
+namespace urls {
+namespace detail {
+
+struct user_chars_t : grammar::lut_chars
+{
+    constexpr
+    user_chars_t() noexcept
+        : grammar::lut_chars(
+            unreserved_chars +
+            subdelim_chars)
+    {
+    }
+};
+
+constexpr user_chars_t user_chars{};
+
+struct password_chars_t : grammar::lut_chars
+{
+    constexpr
+    password_chars_t() noexcept
+        : grammar::lut_chars(
+            unreserved_chars +
+            subdelim_chars + ':')
+    {
+    }
+};
+
+constexpr password_chars_t password_chars{};
+
+struct userinfo_chars_t : grammar::lut_chars
+{
+    constexpr
+        userinfo_chars_t() noexcept
+        : grammar::lut_chars(
+            unreserved_chars +
+            subdelim_chars)
+    {
+    }
+};
+
+constexpr userinfo_chars_t userinfo_chars{};
+
+struct host_chars_t : grammar::lut_chars
+{
+    constexpr
+        host_chars_t() noexcept
+        : grammar::lut_chars(
+            unreserved_chars +
+            subdelim_chars)
+    {
+    }
+};
+
+constexpr host_chars_t host_chars{};
+
+struct reg_name_chars_t : grammar::lut_chars
+{
+    constexpr
+        reg_name_chars_t() noexcept
+        : grammar::lut_chars(
+            unreserved_chars
+            + '-' + '.')
+    {
+    }
+};
+
+constexpr reg_name_chars_t reg_name_chars{};
+
+struct segment_chars_t : grammar::lut_chars
+{
+    constexpr
+        segment_chars_t() noexcept
+        : grammar::lut_chars(
+            pchars)
+    {
+    }
+};
+
+constexpr segment_chars_t segment_chars{};
+
+struct path_chars_t : grammar::lut_chars
+{
+    constexpr
+        path_chars_t() noexcept
+        : grammar::lut_chars(
+            segment_chars + '/')
+    {
+    }
+};
+
+constexpr path_chars_t path_chars{};
+
+} // detail
+} // urls
+} // boost
+
+#endif
diff --git a/include/boost/url/rfc/reg_name_rule.hpp b/include/boost/url/rfc/reg_name_rule.hpp
index bf9b4cd0..7fa93c14 100644
--- a/include/boost/url/rfc/reg_name_rule.hpp
+++ b/include/boost/url/rfc/reg_name_rule.hpp
@@ -36,7 +36,6 @@ struct reg_name_rule
 {
     pct_encoded_str v;
 
-    BOOST_URL_DECL
     friend
     void
     tag_invoke(
diff --git a/include/boost/url/src.hpp b/include/boost/url/src.hpp
index cd259801..8149afbd 100644
--- a/include/boost/url/src.hpp
+++ b/include/boost/url/src.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -30,8 +31,10 @@ in a translation unit of the program.
 #include <boost/url/detail/impl/any_query_iter.ipp>
 #include <boost/url/detail/impl/copied_strings.ipp>
 #include <boost/url/detail/impl/except.ipp>
+#include <boost/url/detail/impl/normalize.ipp>
 #include <boost/url/detail/impl/path.ipp>
 #include <boost/url/detail/impl/pct_encoding.ipp>
+#include <boost/url/detail/impl/remove_dot_segments.ipp>
 
 #include <boost/url/impl/authority_view.ipp>
 #include <boost/url/impl/const_string.ipp>
diff --git a/include/boost/url/url.hpp b/include/boost/url/url.hpp
index d80b936c..4703c03c 100644
--- a/include/boost/url/url.hpp
+++ b/include/boost/url/url.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -1073,7 +1074,10 @@ class BOOST_SYMBOL_VISIBLE url
     template<class Allocator =
         std::allocator<char>>
     urls::segments
-    segments(Allocator const& = {}) noexcept;
+    segments(Allocator const& a = {}) noexcept
+    {
+        return urls::segments(*this, a);
+    }
 
     //--------------------------------------------
     //
@@ -1291,13 +1295,150 @@ class BOOST_SYMBOL_VISIBLE url
     // Normalization
     //
     //--------------------------------------------
+private:
+    void
+    normalize_octets_impl(
+        int id,
+        grammar::lut_chars const& cs) noexcept;
+
+    void
+    decoded_to_lower_impl(int id) noexcept;
+
+    void
+    to_lower_impl(int id) noexcept;
+public:
+
+    /** Normalize the URL components
+
+        Applies Syntax-based normalization to
+        all components of the URL.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
 
-    /** Normalize everything.
     */
     BOOST_URL_DECL
     url&
     normalize();
 
+    /** Normalize the URL scheme
+
+        Applies Syntax-based normalization to the
+        URL scheme.
+
+        The scheme is normalized to lowercase.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+
+    */
+    BOOST_URL_DECL
+    url&
+    normalize_scheme();
+
+    /** Normalize the URL authority
+
+        Applies Syntax-based normalization to the
+        URL authority.
+
+        Percent-encoding triplets are normalized
+        to uppercase letters. Percent-encoded
+        octets that correspond to unreserved
+        characters are decoded.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+
+    */
+    BOOST_URL_DECL
+    url&
+    normalize_authority();
+
+    /** Normalize the URL path
+
+        Applies Syntax-based normalization to the
+        URL path.
+
+        Percent-encoding triplets are normalized
+        to uppercase letters. Percent-encoded
+        octets that correspond to unreserved
+        characters are decoded. Redundant
+        path-segments are removed.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+
+    */
+    BOOST_URL_DECL
+    url&
+    normalize_path();
+
+    /** Normalize the URL query
+
+        Applies Syntax-based normalization to the
+        URL query.
+
+        Percent-encoding triplets are normalized
+        to uppercase letters. Percent-encoded
+        octets that correspond to unreserved
+        characters are decoded.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+
+    */
+    BOOST_URL_DECL
+    url&
+    normalize_query();
+
+    /** Normalize the URL fragment
+
+        Applies Syntax-based normalization to the
+        URL fragment.
+
+        Percent-encoding triplets are normalized
+        to uppercase letters. Percent-encoded
+        octets that correspond to unreserved
+        characters are decoded.
+
+        @par Exception Safety
+        Strong guarantee.
+        Calls to allocate may throw.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+
+    */
+    BOOST_URL_DECL
+    url&
+    normalize_fragment();
+
     //--------------------------------------------
     //
     // Resolution
@@ -1342,6 +1483,17 @@ class BOOST_SYMBOL_VISIBLE url
         int last,
         std::size_t new_size);
 
+    char*
+    shrink_impl(
+        int id,
+        std::size_t new_size);
+
+    char*
+    shrink_impl(
+        int first,
+        int last,
+        std::size_t new_size);
+
     BOOST_URL_DECL
     bool
     resolve(
@@ -1417,6 +1569,5 @@ operator<<(std::ostream& os, url const& u);
 #include <boost/url/impl/params_encoded.hpp>
 #include <boost/url/impl/segments.hpp>
 #include <boost/url/impl/segments_encoded.hpp>
-#include <boost/url/impl/url.hpp>
 
 #endif
diff --git a/include/boost/url/url_view.hpp b/include/boost/url/url_view.hpp
index 3f7a1280..41283fa2 100644
--- a/include/boost/url/url_view.hpp
+++ b/include/boost/url/url_view.hpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -1700,6 +1701,30 @@ class BOOST_SYMBOL_VISIBLE url_view
             opt, a, decoded_[id_frag]);
     }
 
+    //--------------------------------------------
+    //
+    // Comparison
+    //
+    //--------------------------------------------
+
+    /** Compare two URLs.
+
+        This function compares two URLs
+        according to Syntax-Based comparison
+        algorithm.
+
+        @par Exception Safety
+        Throws nothing.
+
+        @par Specification
+        @li <a href="https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2"
+            >6.2.2 Syntax-Based Normalization (rfc3986)</a>
+    */
+    BOOST_URL_DECL
+    int
+    compare(const url_view& other) const noexcept;
+
+
     //--------------------------------------------
     //
     // Parsing
@@ -1715,6 +1740,7 @@ class BOOST_SYMBOL_VISIBLE url_view
     BOOST_URL_DECL friend result<url_view>
         parse_uri_reference(string_view s);
 
+
 private:
     void apply(scheme_part_rule const& t) noexcept;
     void apply(host_rule const& h) noexcept;
diff --git a/test/unit/url.cpp b/test/unit/url.cpp
index cb00b412..1130b40c 100644
--- a/test/unit/url.cpp
+++ b/test/unit/url.cpp
@@ -1,5 +1,6 @@
 //
 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
+// Copyright (c) 2022 Alan Freitas (alandefreitas@gmail.com)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -1809,6 +1810,121 @@ class url_test
 
     //--------------------------------------------
 
+    void
+    testNormalize()
+    {
+        // normalize
+        {
+            auto check = [](string_view before,
+                            string_view after)
+            {
+                url u1 = parse_uri(before).value();
+                url_view u2 = parse_uri(after).value();
+                BOOST_TEST(u1.compare(u2) == 0);
+                u1.normalize();
+                BOOST_TEST(u1.string() == after);
+            };
+
+            check("HtTp://cPpAlLiAnCe.oRG/",
+                  "http://cppalliance.org/");
+            check("http://%2a%2b%2C%2f%3A.org/",
+                  "http://%2A%2B%2C%2F%3A.org/");
+            check("http://%63%70%70%61%6c%6Ci%61n%63e.org/",
+                  "http://cppalliance.org/");
+            check("http://%43%70%50%61%6c%6Ci%61n%43e.org/",
+                  "http://cppalliance.org/");
+            check("http://cppalliance.org/a/b/c/./../../g",
+                  "http://cppalliance.org/a/g");
+            check("http://cppalliance.org/aa/bb/cc/./../../gg",
+                  "http://cppalliance.org/aa/gg");
+            check("http://cppalliance.org/a/b/../../g",
+                  "http://cppalliance.org/g");
+            check("http://cppalliance.org/a/b/../../../g",
+                  "http://cppalliance.org/g");
+            check("http://cppalliance.org/..",
+                  "http://cppalliance.org/");
+        }
+
+        // remove_dot_segments
+        {
+            auto check = [](string_view p,
+                            string_view e) {
+                url u1 = parse_relative_ref(p).value();
+                u1.normalize_path();
+                BOOST_TEST(u1.encoded_path() == e);
+                url u2 = parse_relative_ref(e).value();
+                BOOST_TEST(u1.compare(u2) == 0);
+            };
+
+
+            check("/a/b/c/./../../g", "/a/g");
+            check("/aa/bb/cc/./../../gg", "/aa/gg");
+            check("../a/b/c/./../../g", "../a/g");
+            check("./a/b/c/./../../g", "a/g");
+            check(".././a/b/c/./../../g", "../a/g");
+            check("%2E%2E/./a/b/c/./../../g", "../a/g");
+            check("/a/b/../../g", "/g");
+            check("/a/b/../../../g", "/g");
+            check("mid/content=5/../6", "mid/6");
+            check("mid/content=5/../6/.", "mid/6/");
+            check("mid/content=5/../6/..", "mid/");
+            check("/..", "/");
+            check(".", "");
+            check("..", "..");
+            check("", "");
+        }
+
+        // inequality
+        {
+            auto check = [](string_view e1,
+                            string_view e2,
+                            int cmp) {
+                url_view u1 = parse_uri(e1).value();
+                url_view u2 = parse_uri(e2).value();
+                BOOST_TEST(u1.compare(u2) == cmp);
+                BOOST_TEST(u2.compare(u1) == -cmp);
+            };
+
+            check("http://cppalliance.org", "https://cppalliance.org", -1);
+            check("https://cppalliance.org", "httpz://cppalliance.org", -1);
+            check("http://boost.org", "http://cppalliance.org", -1);
+            check("http://boost.orgg", "http://boost.org", +1);
+            check("http://cppalliance.org/%2E%2E/./b/b/c/./../../g", "http://cppalliance.org/../a/g", +1);
+            check("http://cppalliance.org?l=v", "http://cppalliance.org?k=v", 1);
+            check("http://cppalliance.org?%6C=v", "http://cppalliance.org?k=v", 1);
+            check("http://cppalliance.org#frag", "http://cppalliance.org#glob", -1);
+            check("http://cppalliance.org#fra", "http://cppalliance.org#frag", -1);
+            check("http://cppalliance.org#frag", "http://cppalliance.org#fra", 1);
+        }
+
+        // path inequality
+        {
+            auto check = [](string_view e1,
+                            string_view e2,
+                            int cmp) {
+                url_view u1 = parse_relative_ref(e1).value();
+                url_view u2 = parse_relative_ref(e2).value();
+                BOOST_TEST(u1.compare(u2) == cmp);
+                BOOST_TEST(u2.compare(u1) == -cmp);
+            };
+
+            check("a/g", "/../g", 1);
+            check("./a/b/c/./../../g", "/a/b/../../../g", 1);
+            check("%2E/a/b/c/./../../g", "/a/b/../../../g", 1);
+            check("/../g", "a/g", -1);
+            check("/a/b/../../../g", "./a/b/c/./../../g", -1);
+            check("../g", "a/g", -1);
+            check("a/b/../../../g", "./a/b/c/./../../g", -1);
+            check("a/b/../../../%67", "./a/b/c/./../../g", -1);
+            check("/aa/g", "/aa/gg", -1);
+            check("../a/b", "..%2Fa/b", 1);
+            check("../a/b", "%2E%2E%2Fa/b", 1);
+            check("../a/b", "%2E%2E/a/b", 0);
+        }
+    }
+
+    //--------------------------------------------
+
     void
     run()
     {
@@ -1828,6 +1944,7 @@ class url_test
         testSegments();
         testResolution();
         testOstream();
+        testNormalize();
     }
 };