Skip to content

Commit

Permalink
Merge pull request #146 from JuliaIO/sk/check-windows-paths
Browse files Browse the repository at this point in the history
Check for bad Windows paths prior to extraction; allow check on creation
  • Loading branch information
StefanKarpinski authored Oct 25, 2022
2 parents 951955b + d7f8b3d commit 6bfc114
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 148 deletions.
70 changes: 61 additions & 9 deletions src/Tar.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,16 @@ include("extract.jl")
## official API: create, list, extract, rewrite, tree_hash

"""
create([ predicate, ] dir, [ tarball ]; [ skeleton ]) -> tarball
create(
[ predicate, ] dir, [ tarball ];
[ skeleton, ] [ portable = false ]
) -> tarball
predicate :: String --> Bool
dir :: AbstractString
tarball :: Union{AbstractString, AbstractCmd, IO}
skeleton :: Union{AbstractString, AbstractCmd, IO}
portable :: Bool
Create a tar archive ("tarball") of the directory `dir`. The resulting archive
is written to the path `tarball` or if no path is specified, a temporary path is
Expand All @@ -77,25 +81,30 @@ a "skeleton" to generate the tarball. You create a skeleton file by passing the
`skeleton` keyword to the `extract` command. If `create` is called with that
skeleton file and the extracted files haven't changed, an identical tarball is
recreated. The `skeleton` and `predicate` arguments cannot be used together.
If the `portable` flag is true then path names are checked for validity on
Windows, which ensures that they don't contain illegal characters or have names
that are reserved. See https://stackoverflow.com/a/31976060/659248 for details.
"""
function create(
predicate::Function,
dir::AbstractString,
tarball::Union{ArgWrite, Nothing} = nothing;
skeleton::Union{ArgRead, Nothing} = nothing,
portable::Bool = false,
)
check_create_dir(dir)
if skeleton === nothing
arg_write(tarball) do tar
create_tarball(predicate, tar, dir)
create_tarball(predicate, tar, dir, portable=portable)
end
else
predicate === true_predicate ||
error("create: predicate and skeleton cannot be used together")
check_create_skeleton(skeleton)
arg_read(skeleton) do skeleton
arg_write(tarball) do tar
recreate_tarball(tar, dir, skeleton)
recreate_tarball(tar, dir, skeleton, portable=portable)
end
end
end
Expand All @@ -105,8 +114,9 @@ function create(
dir::AbstractString,
tarball::Union{ArgWrite, Nothing} = nothing;
skeleton::Union{ArgRead, Nothing} = nothing,
portable::Bool = false,
)
create(true_predicate, dir, tarball, skeleton=skeleton)
create(true_predicate, dir, tarball, skeleton=skeleton, portable=portable)
end

"""
Expand Down Expand Up @@ -261,11 +271,15 @@ function extract(
end

"""
rewrite([ predicate, ], old_tarball, [ new_tarball ]) -> new_tarball
rewrite(
[ predicate, ] old_tarball, [ new_tarball ];
[ portable = false, ]
) -> new_tarball
predicate :: Header --> Bool
old_tarball :: Union{AbstractString, AbtractCmd, IO}
new_tarball :: Union{AbstractString, AbtractCmd, IO}
portable :: Bool
Rewrite `old_tarball` to the standard format that `create` generates, while also
checking that it doesn't contain anything that would cause `extract` to raise an
Expand All @@ -289,25 +303,31 @@ remove `.` entries and replace multiple consecutive slashes with a single slash.
If the entry has type `:hardlink`, the link target path is normalized the same
way so that it will match the path of the target entry; the size field is set to
the size of the target path (which must be an already-seen file).
If the `portable` flag is true then path names are checked for validity on
Windows, which ensures that they don't contain illegal characters or have names
that are reserved. See https://stackoverflow.com/a/31976060/659248 for details.
"""
function rewrite(
predicate::Function,
old_tarball::ArgRead,
new_tarball::Union{ArgWrite, Nothing} = nothing,
new_tarball::Union{ArgWrite, Nothing} = nothing;
portable::Bool = false,
)
old_tarball = check_rewrite_old_tarball(old_tarball)
arg_read(old_tarball) do old_tar
arg_write(new_tarball) do new_tar
rewrite_tarball(predicate, old_tar, new_tar)
rewrite_tarball(predicate, old_tar, new_tar, portable=portable)
end
end
end

function rewrite(
old_tarball::ArgRead,
new_tarball::Union{ArgWrite, Nothing} = nothing,
new_tarball::Union{ArgWrite, Nothing} = nothing;
portable::Bool = false,
)
rewrite(true_predicate, old_tarball, new_tarball)
rewrite(true_predicate, old_tarball, new_tarball, portable=portable)
end

"""
Expand Down Expand Up @@ -461,4 +481,36 @@ check_tree_hash_tarball(tarball::AbstractString) =

check_tree_hash_tarball(tarball::ArgRead) = nothing

const Str = Union{String, SubString{String}}

# Special names on Windows: CON PRN AUX NUL COM1-9 LPT1-9
# we spell out uppercase/lowercase because of locales
const WIN_SPECIAL_NAMES = r"^(
[Cc][Oo][Nn] |
[Pp][Rr][Nn] |
[Aa][Uu][Xx] |
[Nn][Uu][Ll] |
( [Cc][Oo][Mm] |
[Ll][Pp][Tt] )[1-9]
)(\.|$)"x

function check_windows_path(
path :: AbstractString,
parts :: AbstractVector{<:Str} = split(path, r"/+"),
)
for part in parts
isempty(part) && continue
if !isvalid(part)
error("invalid Unicode: $(repr(part)) in $(repr(path))")
end
for ch in part
ch < ' ' || ch "\"*:<>?\\|" || continue
error("illegal Windows char: $(repr(ch)) in $(repr(path))")
end
if occursin(WIN_SPECIAL_NAMES, part)
error("reserved Windows name: $(repr(part)) in $(repr(path))")
end
end
end

end # module
12 changes: 12 additions & 0 deletions src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ function create_tarball(
tar::IO,
root::String;
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
portable::Bool = false,
)
write_tarball(tar, root, buf=buf) do sys_path, tar_path
portable && check_windows_path(tar_path)
hdr = path_header(sys_path, tar_path)
hdr.type != :directory && return hdr, sys_path
paths = Dict{String,String}()
Expand All @@ -22,13 +24,15 @@ function recreate_tarball(
root::String,
skeleton::IO;
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
portable::Bool = false,
)
check_skeleton_header(skeleton, buf=buf)
globals = Dict{String,String}()
while !eof(skeleton)
hdr = read_header(skeleton, globals=globals, buf=buf, tee=tar)
hdr === nothing && break
check_header(hdr)
portable && check_windows_path(hdr.path)
sys_path = joinpath(root, hdr.path)
if hdr.type == :file
write_data(tar, sys_path, size=hdr.size, buf=buf)
Expand All @@ -41,9 +45,11 @@ function rewrite_tarball(
old_tar::IO,
new_tar::IO;
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
portable::Bool = false,
)
tree = Dict{String,Any}()
read_tarball(predicate, old_tar; buf=buf) do hdr, parts
portable && check_windows_path(hdr.path, parts)
isempty(parts) && return
node = tree
name = pop!(parts)
Expand Down Expand Up @@ -138,6 +144,12 @@ function write_header(
size = hdr.size
link = hdr.link

# check for NULs
0x0 in codeunits(path) &&
throw(ArgumentError("path contains NUL bytes: $(repr(path))"))
0x0 in codeunits(link) &&
throw(ArgumentError("link contains NUL bytes: $(repr(path))"))

# determine if an extended header is needed
extended = Pair{String,String}[]
# WARNING: don't change the order of these insertions
Expand Down
1 change: 1 addition & 0 deletions src/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ function extract_tarball(
)
root = normpath(root)
paths = read_tarball(predicate, tar; buf=buf, skeleton=skeleton) do hdr, parts
Sys.iswindows() && check_windows_path(hdr.path, parts)
# get the file system version of the path
sys_path = isempty(parts) ? "." : reduce(joinpath, parts)
isabspath(sys_path) &&
Expand Down
Loading

0 comments on commit 6bfc114

Please sign in to comment.