Skip to content

Commit

Permalink
Increase default buffer size (#34)
Browse files Browse the repository at this point in the history
* Increase default buffer size

Makes creating a tarball and compressing it with `zstdmt` about 6x
faster (30s vs 5s). Raw `tar` is still about 20% faster, but we'd
probably need #33 to make
up the difference.

* Buffer for extract also

* 1.3 compat
  • Loading branch information
Keno authored May 3, 2020
1 parent b8bd833 commit 0676c4e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 54 deletions.
3 changes: 3 additions & 0 deletions src/Tar.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ function Base.skip(io::Union{Base.Process, Base.ProcessChain}, n::Integer)
end
end

# 2 MiB to take advantage of THP if enabled
const DEFAULT_BUFFER_SIZE = 2 * 1024 * 1024

include("header.jl")
include("create.jl")
include("extract.jl")
Expand Down
37 changes: 23 additions & 14 deletions src/create.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function write_tarball(
out::IO,
sys_path::String, # path in the filesystem
tar_path::String = ""; # path in the tarball
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
w = 0
st = lstat(sys_path)
Expand Down Expand Up @@ -55,15 +55,15 @@ function write_tarball(
out::IO,
sys_path::String,
tar_path::String = "";
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
write_tarball(p->true, out, sys_path, tar_path, buf=buf)
end

function write_header(
out::IO,
hdr::Header;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
# extract values
path = hdr.path
Expand Down Expand Up @@ -111,7 +111,7 @@ function write_extended_header(
out::IO,
metadata::Vector{Pair{String,String}};
type::Symbol = :x, # default: non-global extended header
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
type in (:x, :g) ||
throw(ArgumentError("invalid type flag for extended header: $(repr(type))"))
Expand Down Expand Up @@ -140,7 +140,7 @@ function write_standard_header(
hdr::Header;
name::AbstractString = "",
prefix::AbstractString = "",
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
name = String(name)
prefix = String(prefix)
Expand Down Expand Up @@ -169,8 +169,8 @@ function write_standard_header(
throw(ArgumentError("non-ASCII type flag value: $(repr(type))"))

# construct header block
resize!(buf, 512)
h = IOBuffer(fill!(buf, 0x00), write=true, truncate=false)
header_view = view(buf, 1:512)
h = IOBuffer(fill!(header_view, 0x00), write=true, truncate=false)
write(h, name) # name
seek(h, 100)
write(h, "$m \0") # mode
Expand Down Expand Up @@ -204,14 +204,14 @@ function write_standard_header(
write(h, prefix) # prefix

# fix the checksum
c = string(sum(buf), base=8, pad=6)
c = string(sum(header_view), base=8, pad=6)
@assert ncodeunits(c) 6
seek(h, 148)
write(h, "$c\0 ")
@assert position(h) == 156

# write header
w = write(out, buf)
w = write(out, header_view)
@assert w == 512
return w
end
Expand All @@ -220,14 +220,23 @@ function write_data(
tar::IO,
file::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
resize!(buf, 512)
w = s = 0
@assert sizeof(buf) % 512 == 0
while !eof(file)
s += n = readbytes!(file, buf)
n < 512 && (buf[n+1:512] .= 0)
w += write(tar, buf)
if n < sizeof(buf)
r = n % 512
if r != 0
pad = n - r + 512
buf[n+1:pad] .= 0
n = pad
end
w += write(tar, view(buf, 1:n))
else
w += write(tar, buf)
end
end
s == size || error("""
data did not have the expected size:
Expand All @@ -242,7 +251,7 @@ function write_data(
tar::IO,
file::String;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
open(file) do file′
write_data(tar, file′, size=size, buf=buf)
Expand Down
80 changes: 40 additions & 40 deletions src/extract.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
@static if VERSION < v"1.4.0-DEV"
view_read!(io, buf::SubArray{UInt8}) = readbytes!(io, buf, sizeof(buf))
else
view_read!(io, buf::SubArray{UInt8}) = read!(io, buf)
end

function list_tarball(
tar::IO;
raw::Bool = false,
strict::Bool = !raw,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
raw && strict &&
error("`raw=true` and `strict=true` options are incompatible")
Expand All @@ -22,7 +28,7 @@ function extract_tarball(
predicate::Function,
tarball::AbstractString,
root::String;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
open(tarball) do tar
extract_tarball(predicate, tar, root, buf=buf)
Expand All @@ -33,7 +39,7 @@ function extract_tarball(
predicate::Function,
tar::IO,
root::String;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
links = Set{String}()
while !eof(tar)
Expand Down Expand Up @@ -117,7 +123,7 @@ const IGNORED_EXTENDED_LOCAL_HEADERS = [
"uname",
]

function read_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, 512))
function read_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE))
hdr = read_standard_header(io, buf=buf)
hdr === nothing && return nothing
size = path = link = nothing
Expand Down Expand Up @@ -167,7 +173,7 @@ using Base.Checked: mul_with_overflow, add_with_overflow
function read_extended_metadata(
io::IO,
size::Integer;
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)
n = readbytes!(io, buf, size)
n < size && "premature end of tar file"
Expand Down Expand Up @@ -207,30 +213,26 @@ function read_extended_metadata(
return metadata
end

function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, 512))
resize!(buf, 512)
read!(io, buf)
all(iszero, buf) && return nothing
n = length(buf)
n == 0 && error("premature end of tar file")
n < 512 && error("incomplete trailing block with length $n < 512")
@assert n == 512
name = read_header_str(buf, 0, 100)
mode = read_header_int(buf, 100, 8)
size = buf[124+1] & 0x80 == 0 ?
read_header_int(buf, 124, 12) :
read_header_bin(buf, 124, 12)
chksum = read_header_int(buf, 148, 8)
type = read_header_chr(buf, 156)
link = read_header_str(buf, 157, 100)
magic = read_header_str(buf, 257, 6)
version = read_header_str(buf, 263, 2)
prefix = read_header_str(buf, 345, 155)
function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE))
header_view = view(buf, 1:512)
view_read!(io, header_view)
all(iszero, header_view) && return nothing
name = read_header_str(header_view, 0, 100)
mode = read_header_int(header_view, 100, 8)
size = header_view[124+1] & 0x80 == 0 ?
read_header_int(header_view, 124, 12) :
read_header_bin(header_view, 124, 12)
chksum = read_header_int(header_view, 148, 8)
type = read_header_chr(header_view, 156)
link = read_header_str(header_view, 157, 100)
magic = read_header_str(header_view, 257, 6)
version = read_header_str(header_view, 263, 2)
prefix = read_header_str(header_view, 345, 155)
# check various fields
buf[index_range(148, 8)] .= ' ' # fill checksum field with spaces
buf_sum = sum(buf)
header_view[index_range(148, 8)] .= ' ' # fill checksum field with spaces
buf_sum = sum(header_view)
chksum == buf_sum ||
error("incorrect header checksum = $chksum; should be $buf_sum\n$(repr(String(buf)))")
error("incorrect header checksum = $chksum; should be $buf_sum\n$(repr(String(header_view)))")
occursin(r"^ustar\s*$", magic) ||
error("unknown magic string for tar file: $(repr(magic))")
occursin(r"^0* *$", version) ||
Expand All @@ -239,15 +241,16 @@ function read_standard_header(io::IO; buf::Vector{UInt8} = Vector{UInt8}(undef,
return Header(path, to_symbolic_type(type), mode, size, link)
end

round_up(size) = 512 * ((size + 511) ÷ 512)
function skip_data(tar::IO, size::Integer)
skip(tar, 512 * ((size + 511) ÷ 512))
skip(tar, round_up(size))
end

index_range(offset::Int, length::Int) = offset .+ (1:length)

read_header_chr(buf::Vector{UInt8}, offset::Int) = Char(buf[offset+1])
read_header_chr(buf::AbstractVector{UInt8}, offset::Int) = Char(buf[offset+1])

function read_header_str(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_str(buf::AbstractVector{UInt8}, offset::Int, length::Int)
r = index_range(offset, length)
for i in r
byte = buf[i]
Expand All @@ -256,7 +259,7 @@ function read_header_str(buf::Vector{UInt8}, offset::Int, length::Int)
return String(buf[r])
end

function read_header_int(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_int(buf::AbstractVector{UInt8}, offset::Int, length::Int)
n = UInt64(0)
for i in index_range(offset, length)
byte = buf[i]
Expand All @@ -269,7 +272,7 @@ function read_header_int(buf::Vector{UInt8}, offset::Int, length::Int)
return n
end

function read_header_bin(buf::Vector{UInt8}, offset::Int, length::Int)
function read_header_bin(buf::AbstractVector{UInt8}, offset::Int, length::Int)
n = UInt64(0)
for i in index_range(offset, length)
n <<= 8
Expand All @@ -282,16 +285,13 @@ function read_data(
tar::IO,
file::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::Nothing
resize!(buf, 512)
while size > 0
r = readbytes!(tar, buf)
r = readbytes!(tar, buf, size < sizeof(buf) ? round_up(size) : sizeof(buf))
r < 512 && eof(io) && error("premature end of tar file")
size < 512 && resize!(buf, size)
size -= write(file, buf)
size -= write(file, view(buf, 1:min(r, size)))
end
resize!(buf, 512)
@assert size == 0
return
end
Expand All @@ -300,7 +300,7 @@ function read_data(
tar::IO,
file::String;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::Nothing
open(file, write=true) do file′
read_data(tar, file′, size=size, buf=buf)
Expand All @@ -310,7 +310,7 @@ end
function read_data(
tar::IO;
size::Integer,
buf::Vector{UInt8} = Vector{UInt8}(undef, 512),
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
)::String
io = IOBuffer(sizehint=size)
read_data(tar, io, size=size, buf=buf)
Expand Down

0 comments on commit 0676c4e

Please sign in to comment.