Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase default buffer size #34

Merged
merged 3 commits into from
May 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/Tar.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ function Base.skip(io::Union{Base.Process, Base.ProcessChain}, n::Integer)
end
end

# 2 MiB to take advantage of THP if enabled
const DEFAULT_BUFFER_SIZE = 2 * 1024 * 1024

include("header.jl")
include("create.jl")
include("extract.jl")
Expand Down
37 changes: 23 additions & 14 deletions src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function write_tarball(
out::IO,
sys_path::String, # path in the filesystem
tar_path::String = ""; # path in the tarball
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
w = 0
st = lstat(sys_path)
Expand Down Expand Up @@ -55,15 +55,15 @@ function write_tarball(
out::IO,
sys_path::String,
tar_path::String = "";
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
write_tarball(p->true, out, sys_path, tar_path, buf=buf)
end

function write_header(
out::IO,
hdr::Header;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
# extract values
path = hdr.path
Expand Down Expand Up @@ -111,7 +111,7 @@ function write_extended_header(
out::IO,
metadata::Vector{Pair{String,String}};
type::Symbol = :x, # default: non-global extended header
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
type in (:x, :g) ||
throw(ArgumentError("invalid type flag for extended header: $(repr(type))"))
Expand Down Expand Up @@ -140,7 +140,7 @@ function write_standard_header(
hdr::Header;
name::AbstractString = "",
prefix::AbstractString = "",
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
name = String(name)
prefix = String(prefix)
Expand Down Expand Up @@ -169,8 +169,8 @@ function write_standard_header(
throw(ArgumentError("non-ASCII type flag value: $(repr(type))"))

# construct header block
resize!(buf, 512)
h = IOBuffer(fill!(buf, 0x00), write=true, truncate=false)
header_view = view(buf, 1:512)
h = IOBuffer(fill!(header_view, 0x00), write=true, truncate=false)
write(h, name) # name
seek(h, 100)
write(h, "$m \0") # mode
Expand Down Expand Up @@ -204,14 +204,14 @@ function write_standard_header(
write(h, prefix) # prefix

# fix the checksum
c = string(sum(buf), base=8, pad=6)
c = string(sum(header_view), base=8, pad=6)
@assert ncodeunits(c) ≤ 6
seek(h, 148)
write(h, "$c\0 ")
@assert position(h) == 156

# write header
w = write(out, buf)
w = write(out, header_view)
@assert w == 512
return w
end
Expand All @@ -220,14 +220,23 @@ function write_data(
tar::IO,
file::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
resize!(buf, 512)
w = s = 0
@assert sizeof(buf) % 512 == 0
while !eof(file)
s += n = readbytes!(file, buf)
n < 512 && (buf[n+1:512] .= 0)
w += write(tar, buf)
if n < sizeof(buf)
r = n % 512
if r != 0
pad = n - r + 512
buf[n+1:pad] .= 0
n = pad
end
w += write(tar, view(buf, 1:n))
else
w += write(tar, buf)
end
end
s == size || error("""
data did not have the expected size:
Expand All @@ -242,7 +251,7 @@ function write_data(
tar::IO,
file::String;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
open(file) do file′
write_data(tar, file′, size=size, buf=buf)
Expand Down
80 changes: 40 additions & 40 deletions src/extract.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
@static if VERSION < v"1.4.0-DEV"
view_read!(io, buf::SubArray{UInt8}) = readbytes!(io, buf, sizeof(buf))
else
view_read!(io, buf::SubArray{UInt8}) = read!(io, buf)
end

function list_tarball(
tar::IO;
raw::Bool = false,
strict::Bool = !raw,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
raw && strict &&
error("`raw=true` and `strict=true` options are incompatible")
Expand All @@ -22,7 +28,7 @@ function extract_tarball(
predicate::Function,
tarball::AbstractString,
root::String;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
open(tarball) do tar
extract_tarball(predicate, tar, root, buf=buf)
Expand All @@ -33,7 +39,7 @@ function extract_tarball(
predicate::Function,
tar::IO,
root::String;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
links = Set{String}()
while !eof(tar)
Expand Down Expand Up @@ -117,7 +123,7 @@ const IGNORED_EXTENDED_LOCAL_HEADERS = [
"uname",
]

function read_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, 512))
function read_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE))
hdr = read_standard_header(io, buf=buf)
hdr === nothing && return nothing
size = path = link = nothing
Expand Down Expand Up @@ -167,7 +173,7 @@ using Base.Checked: mul_with_overflow, add_with_overflow
function read_extended_metadata(
io::IO,
size::Integer;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
n = readbytes!(io, buf, size)
n < size && "premature end of tar file"
Expand Down Expand Up @@ -207,30 +213,26 @@ function read_extended_metadata(
return metadata
end

function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, 512))
resize!(buf, 512)
read!(io, buf)
all(iszero, buf) && return nothing
n = length(buf)
n == 0 && error("premature end of tar file")
n < 512 && error("incomplete trailing block with length $n < 512")
@assert n == 512
name = read_header_str(buf, 0, 100)
mode = read_header_int(buf, 100, 8)
size = buf[124+1] & 0x80 == 0 ?
read_header_int(buf, 124, 12) :
read_header_bin(buf, 124, 12)
chksum = read_header_int(buf, 148, 8)
type = read_header_chr(buf, 156)
link = read_header_str(buf, 157, 100)
magic = read_header_str(buf, 257, 6)
version = read_header_str(buf, 263, 2)
prefix = read_header_str(buf, 345, 155)
function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE))
header_view = view(buf, 1:512)
view_read!(io, header_view)
all(iszero, header_view) && return nothing
name = read_header_str(header_view, 0, 100)
mode = read_header_int(header_view, 100, 8)
size = header_view[124+1] & 0x80 == 0 ?
read_header_int(header_view, 124, 12) :
read_header_bin(header_view, 124, 12)
chksum = read_header_int(header_view, 148, 8)
type = read_header_chr(header_view, 156)
link = read_header_str(header_view, 157, 100)
magic = read_header_str(header_view, 257, 6)
version = read_header_str(header_view, 263, 2)
prefix = read_header_str(header_view, 345, 155)
# check various fields
buf[index_range(148, 8)] .= ' ' # fill checksum field with spaces
buf_sum = sum(buf)
header_view[index_range(148, 8)] .= ' ' # fill checksum field with spaces
buf_sum = sum(header_view)
chksum == buf_sum ||
error("incorrect header checksum = $chksum; should be $buf_sum\n$(repr(String(buf)))")
error("incorrect header checksum = $chksum; should be $buf_sum\n$(repr(String(header_view)))")
occursin(r"^ustar\s*$", magic) ||
error("unknown magic string for tar file: $(repr(magic))")
occursin(r"^0* *$", version) ||
Expand All @@ -239,15 +241,16 @@ function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef,
return Header(path, to_symbolic_type(type), mode, size, link)
end

round_up(size) = 512 * ((size + 511) ÷ 512)
function skip_data(tar::IO, size::Integer)
skip(tar, 512 * ((size + 511) ÷ 512))
skip(tar, round_up(size))
end

index_range(offset::Int, length::Int) = offset .+ (1:length)

read_header_chr(buf::Vector{UInt8}, offset::Int) = Char(buf[offset+1])
read_header_chr(buf::AbstractVector{UInt8}, offset::Int) = Char(buf[offset+1])

function read_header_str(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_str(buf::AbstractVector{UInt8}, offset::Int, length::Int)
r = index_range(offset, length)
for i in r
byte = buf[i]
Expand All @@ -256,7 +259,7 @@ function read_header_str(buf::Vector{UInt8}, offset::Int, length::Int)
return String(buf[r])
end

function read_header_int(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_int(buf::AbstractVector{UInt8}, offset::Int, length::Int)
n = UInt64(0)
for i in index_range(offset, length)
byte = buf[i]
Expand All @@ -269,7 +272,7 @@ function read_header_int(buf::Vector{UInt8}, offset::Int, length::Int)
return n
end

function read_header_bin(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_bin(buf::AbstractVector{UInt8}, offset::Int, length::Int)
n = UInt64(0)
for i in index_range(offset, length)
n <<= 8
Expand All @@ -282,16 +285,13 @@ function read_data(
tar::IO,
file::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::Nothing
resize!(buf, 512)
while size > 0
r = readbytes!(tar, buf)
r = readbytes!(tar, buf, size < sizeof(buf) ? round_up(size) : sizeof(buf))
r < 512 && eof(io) && error("premature end of tar file")
size < 512 && resize!(buf, size)
size -= write(file, buf)
size -= write(file, view(buf, 1:min(r, size)))
end
resize!(buf, 512)
@assert size == 0
return
end
Expand All @@ -300,7 +300,7 @@ function read_data(
tar::IO,
file::String;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::Nothing
open(file, write=true) do file′
read_data(tar, file′, size=size, buf=buf)
Expand All @@ -310,7 +310,7 @@ end
function read_data(
tar::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::String
io = IOBuffer(sizehint=size)
read_data(tar, io, size=size, buf=buf)
Expand Down