diff --git a/base/path.jl b/base/path.jl index 3b8124f34f174a..526a4bb2cd5e54 100644 --- a/base/path.jl +++ b/base/path.jl @@ -613,3 +613,53 @@ relpath(path::AbstractString, startpath::AbstractString) = for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath) @eval $f(path::AbstractString) = $f(String(path)) end + +""" + uripath(path::AbstractString) + +Encode `path` as a URI as per [RFC8089: The "file" URI +Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource +Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and +the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/). + +## Examples + +```julia-repl +julia> uripath("/home/user/example file.jl") # On a unix machine +"file:///home/user/example%20file.jl" + +juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine +"file:///C:/Users/user/example%20file.jl" +``` +""" +function uripath end + +@static if Sys.iswindows() + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + path = abspath(path) + if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3 + unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/') + string("file://", encode_uri_component(unixpath)) + else + drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component + unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/') + encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2 + string("file:///", encdrive, '/', encode_uri_component(unixpath)) + end + end +else + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/') + string("file://", encode_uri_component(gethostname()), '/', encode_uri_component(localpath)) + end +end + +uripath(path::AbstractString) = uripath(String(path)) diff --git a/test/path.jl b/test/path.jl index 2f4f2d0983a58e..405334e8398580 100644 --- a/test/path.jl +++ b/test/path.jl @@ -311,6 +311,19 @@ test_relpath() end + @testset "uripath" begin + host = gethostname() + sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end + @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt" + @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md" + @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5eodd%25%20chars" + @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5b%5d%7b%7d" + @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive" + @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd()) + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%ce%94%ce%b5%ce%b4%ce%bf%ce%bc%ce%ad%ce%bd%ce%b1" + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%f0%9f%a7%ae%f0%9f%90%9b%f0%9f%94%a8" + end + if Sys.iswindows() @testset "issue #23646" begin @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"