From 5623f49fd082aa4eb25eb78b511d5f5160a256b4 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Sun, 14 Mar 2021 17:59:12 -0600 Subject: [PATCH 1/6] Add the resolvereference function. Adds a new function, `resolvereference`, that resolves references between a base URI and a reference URI. This function attempts to comply with RFC 3986 Section 5.2 (https://tools.ietf.org/html/rfc3986#section-5.2). Closes #18. --- src/URIs.jl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/URIs.jl b/src/URIs.jl index 6b3e476..3a554cf 100644 --- a/src/URIs.jl +++ b/src/URIs.jl @@ -2,7 +2,8 @@ module URIs export URI, queryparams, absuri, - escapeuri, unescapeuri, escapepath + escapeuri, unescapeuri, escapepath, + resolvereference import Base.== @@ -518,6 +519,73 @@ function Base.joinpath(uri::URI, parts::String...) return URI(uri; path=normpath(path)) end +""" + resolvereference(base, ref) + +Resolve a URI reference `ref` relative to the absolute base URI `base`, +complying with RFC 3986 Section 5.2. +""" +function resolvereference(base::URI, ref::URI) + # In the case where the second URI is absolute, we just return the + # reference URI. Refer to https://tools.ietf.org/html/rfc3986#section-5.2.2 + # + # We also default to just returning the reference when the base URI is + # non-absolute. + if base.scheme == "" || ref.scheme != "" + return ref + end + + host, port, path, query = if ref.host != "" + ref.host, ref.port, ref.path, ref.query + else + path, query = if ref.path == "" + base.path, (ref.query == "") ? base.query : ref.query + else + path = startswith(ref.path, "/") ? ref.path : resolveref_merge(base, ref) + #path = remove_dot_segments(path) + path, ref.query + end + base.host, base.port, path, query + end + + path = normpath(path) + scheme = base.scheme + fragment = ref.fragment + userinfo = (ref.userinfo == "") ? base.userinfo : ref.userinfo + + URI(; + scheme=scheme, + userinfo=userinfo, + host=host, + port=port, + path=path, + query=query, + fragment=fragment + ) +end + +resolvereference(base, ref) = resolvereference(URI(base), ref) +resolvereference(base::URI, ref) = resolvereference(base, URI(ref)) + +""" + resolveref_merge(base, ref) + +Implementation of the "merge" routine described in RFC 3986 Sec. 5.2.3 for merging +a relative-path reference with the path of the base URI. +""" +function resolveref_merge(base, ref) + if base.host != "" && base.path == "" + "/" * ref.path + else + last_slash = findprev('/', base.path, lastindex(base.path)) + if last_slash === nothing + ref.path + else + base.path[1:last_slash] * ref.path + end + end +end + function __init__() Threads.resize_nthreads!(uri_reference_regex) foreach(x -> Base.compile(x.re), uri_reference_regex) From 18ef1dd41368d0ab82a64a72380b331d3c6e324b Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Sun, 14 Mar 2021 18:02:00 -0600 Subject: [PATCH 2/6] Add tests for resolvereference. --- test/uri.jl | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/uri.jl b/test/uri.jl index b863c0a..055cdcb 100644 --- a/test/uri.jl +++ b/test/uri.jl @@ -547,4 +547,55 @@ urltests = URLTest[ @test joinpath(URIs.URI("http://a.b.c/"), "b", "c") == URI("http://a.b.c/b/c") @test joinpath(URIs.URI("http://a.b.c"), "b", "c") == URI("http://a.b.c/b/c") end + + @testset "resolvereference" begin + # Tests for resolving URI references, as defined in Section 5.4 + + # Perform some basic tests resolving absolute and relative references to a base URI + uri = URI("http://example.org/foo/bar/") + @test resolvereference(uri, "/baz") == URI("http://example.org/baz") + @test resolvereference(uri, "baz/") == URI("http://example.org/foo/bar/baz/") + @test resolvereference(uri, "../baz/") == URI("http://example.org/foo/baz/") + + # If the base URI's path doesn't end with a /, we handle relative URIs a little differently + uri = URI("http://example.org/foo/bar") + @test resolvereference(uri, "baz") == URI("http://example.org/foo/baz") + @test resolvereference(uri, "../baz") == URI("http://example.org/baz") + + # If the second URI is absolute, or the first URI isn't, we should just return the + # second URI. + @test resolvereference("http://www.example.org", "http://example.com") == URI("http://example.com") + @test resolvereference("http://example.org/foo", "http://example.org/bar") == URI("http://example.org/bar") + @test resolvereference("/foo", "/bar/baz") == URI("/bar/baz") + + # "Normal examples" specified in Section 5.4.1 + base = URI("http://a/b/c/d;p?q") + @test resolvereference(base, "g:h") == URI("g:h") + @test resolvereference(base, "g") == URI("http://a/b/c/g") + @test resolvereference(base, "./g") == URI("http://a/b/c/g") + @test resolvereference(base, "g/") == URI("http://a/b/c/g/") + @test resolvereference(base, "/g") == URI("http://a/g") + @test resolvereference(base, "//g") == URI("http://g") + @test resolvereference(base, "?y") == URI("http://a/b/c/d;p?y") + @test resolvereference(base, "g?y") == URI("http://a/b/c/g?y") + @test resolvereference(base, "#s") == URI("http://a/b/c/d;p?q#s") + @test resolvereference(base, "g#s") == URI("http://a/b/c/g#s") + @test resolvereference(base, "g?y#s") == URI("http://a/b/c/g?y#s") + @test resolvereference(base, ";x") == URI("http://a/b/c/;x") + @test resolvereference(base, "g;x") == URI("http://a/b/c/g;x") + @test resolvereference(base, "g;x?y#s") == URI("http://a/b/c/g;x?y#s") + @test resolvereference(base, "") == URI("http://a/b/c/d;p?q") + @test resolvereference(base, ".") == URI("http://a/b/c/") + @test resolvereference(base, "./") == URI("http://a/b/c/") + @test resolvereference(base, "..") == URI("http://a/b/") + @test resolvereference(base, "../") == URI("http://a/b/") + @test resolvereference(base, "../g") == URI("http://a/b/g") + @test resolvereference(base, "../..") == URI("http://a/") + @test resolvereference(base, "../../") == URI("http://a/") + @test resolvereference(base, "../../g") == URI("http://a/g") + + # "Abnormal examples" specified in Section 5.4.2 + @test resolvereference(base, "../../../g") == URI("http://a/g") + @test resolvereference(base, "../../../../g") == URI("http://a/g") + end end From 4762fed2fad0789afe30f232e3ff9dad221f3af2 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Sun, 14 Mar 2021 18:19:01 -0600 Subject: [PATCH 3/6] Improve resolvereference documentation. - Expand the documentation for resolvereference and add some examples for it. - Add resolvereference to the public documentation. --- docs/src/index.md | 1 + src/URIs.jl | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 3322c89..228ceff 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -57,6 +57,7 @@ absuri escapeuri unescapeuri escapepath +resolvereference URIs.splitpath Base.isvalid(::URI) ``` diff --git a/src/URIs.jl b/src/URIs.jl index 3a554cf..d3e4215 100644 --- a/src/URIs.jl +++ b/src/URIs.jl @@ -523,7 +523,24 @@ end resolvereference(base, ref) Resolve a URI reference `ref` relative to the absolute base URI `base`, -complying with RFC 3986 Section 5.2. +complying with RFC 3986 Section 5.2. `base` and `ref` should both be +of type `Union{URI,AbstractString}`. + +If `ref` is an absolute URI, then this function just returns a copy +of `ref`. + +# Examples + +```jldoctest; setup = :(using URIs) +julia> u = resolvereference("http://example.org/foo/bar/", "/baz/") +URI("http://example.org/baz/") + +julia> resolvereference(u, "./hello/world") +URI("http://example.org/baz/hello/world") + +julia> resolvereference(u, "http://localhost:8000") +URI("http://localhost:8000") +``` """ function resolvereference(base::URI, ref::URI) # In the case where the second URI is absolute, we just return the From 18a2565acfd7820ce18dee80b7199f5d6b34f3eb Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Sun, 14 Mar 2021 18:23:11 -0600 Subject: [PATCH 4/6] Fix compatibility with Julia 1.0. Change the version of `findprev` that `resolvereference` uses from `findprev(::AbstractChar, ::AbstractString, ::Integer)` to `findprev(::AbstractString, ::AbstractString, ::Integer)` (since the former has only been available since Julia 1.3). --- src/URIs.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/URIs.jl b/src/URIs.jl index d3e4215..6b480b7 100644 --- a/src/URIs.jl +++ b/src/URIs.jl @@ -594,10 +594,11 @@ function resolveref_merge(base, ref) if base.host != "" && base.path == "" "/" * ref.path else - last_slash = findprev('/', base.path, lastindex(base.path)) + last_slash = findprev("/", base.path, lastindex(base.path)) if last_slash === nothing ref.path else + last_slash = first(last_slash) base.path[1:last_slash] * ref.path end end From aa0b120e962608a51c3ccb433452778a9f742374 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Sun, 14 Mar 2021 18:56:07 -0600 Subject: [PATCH 5/6] Minor improvements to resolvereference. --- src/URIs.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/URIs.jl b/src/URIs.jl index 6b480b7..b53807d 100644 --- a/src/URIs.jl +++ b/src/URIs.jl @@ -548,18 +548,17 @@ function resolvereference(base::URI, ref::URI) # # We also default to just returning the reference when the base URI is # non-absolute. - if base.scheme == "" || ref.scheme != "" + if isempty(base.scheme) || !isempty(ref.scheme) return ref end - host, port, path, query = if ref.host != "" + host, port, path, query = if !isempty(ref.host) ref.host, ref.port, ref.path, ref.query else - path, query = if ref.path == "" - base.path, (ref.query == "") ? base.query : ref.query + path, query = if isempty(ref.path) + base.path, isempty(ref.query) ? base.query : ref.query else path = startswith(ref.path, "/") ? ref.path : resolveref_merge(base, ref) - #path = remove_dot_segments(path) path, ref.query end base.host, base.port, path, query @@ -568,7 +567,7 @@ function resolvereference(base::URI, ref::URI) path = normpath(path) scheme = base.scheme fragment = ref.fragment - userinfo = (ref.userinfo == "") ? base.userinfo : ref.userinfo + userinfo = isempty(ref.userinfo) ? base.userinfo : ref.userinfo URI(; scheme=scheme, @@ -591,7 +590,7 @@ Implementation of the "merge" routine described in RFC 3986 Sec. 5.2.3 for mergi a relative-path reference with the path of the base URI. """ function resolveref_merge(base, ref) - if base.host != "" && base.path == "" + if !isempty(base.host) && isempty(base.path) "/" * ref.path else last_slash = findprev("/", base.path, lastindex(base.path)) From 558c6d04aef4eb383fe2e1cdd3d76c97d6650eef Mon Sep 17 00:00:00 2001 From: Will Shand <17100608+kernelmethod@users.noreply.github.com> Date: Thu, 29 Apr 2021 21:47:15 -0600 Subject: [PATCH 6/6] Apply suggestions from code review for `resolvereference` (PR #19) Co-authored-by: Fredrik Ekre --- src/URIs.jl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/URIs.jl b/src/URIs.jl index b53807d..8a61569 100644 --- a/src/URIs.jl +++ b/src/URIs.jl @@ -520,14 +520,12 @@ function Base.joinpath(uri::URI, parts::String...) end """ - resolvereference(base, ref) + resolvereference(base::Union{URI,AbstractString}, ref::Union{URI,AbstractString}) -> URI Resolve a URI reference `ref` relative to the absolute base URI `base`, -complying with RFC 3986 Section 5.2. `base` and `ref` should both be -of type `Union{URI,AbstractString}`. +complying with [RFC 3986 Section 5.2](https://tools.ietf.org/html/rfc3986#section-5.2). -If `ref` is an absolute URI, then this function just returns a copy -of `ref`. +If `ref` is an absolute URI, return `ref` unchanged. # Examples @@ -580,8 +578,7 @@ function resolvereference(base::URI, ref::URI) ) end -resolvereference(base, ref) = resolvereference(URI(base), ref) -resolvereference(base::URI, ref) = resolvereference(base, URI(ref)) +resolvereference(base, ref) = resolvereference(URI(base), URI(ref)) """ resolveref_merge(base, ref)