Skip to content

Commit

Permalink
Reduce cache miss latency
Browse files Browse the repository at this point in the history
Stream data both to disk and also into `Tar.tree_hash()` through a
decompressor.  Reduces an 8MB cold cache download from 3.5s to 2.5s when
testing locally.
  • Loading branch information
staticfloat committed Jun 11, 2020
1 parent 33ff089 commit 3aa8ea3
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 21 deletions.
3 changes: 3 additions & 0 deletions src/PkgServer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ module PkgServer
using Pkg
using HTTP
using Base.Threads: Event, @spawn
import Base: fetch
using Random
using LibGit2
using FilesystemDatastructures
using JSON3, StructTypes
using Sockets
using Sockets: InetAddr
using Dates
using Tar
using TranscodingStreams, CodecZlib

include("resource.jl")
include("meta.jl")
Expand Down
58 changes: 37 additions & 21 deletions src/resource.jl
Original file line number Diff line number Diff line change
Expand Up @@ -276,42 +276,58 @@ function forget_failures()
end
end

function tarball_git_hash(tarball::String)
local tree_hash
mktempdir() do tmp_dir
run(`tar -C $tmp_dir -zxf $tarball`)
tree_hash = bytes2hex(Pkg.GitTools.tree_hash(tmp_dir))
chmod(tmp_dir, 0o777, recursive=true)
end
return tree_hash
end

function download(server::String, resource::String)
@info "downloading resource" server=server resource=resource
hash = let m = match(hash_part_re, resource)
m !== nothing ? m.captures[1] : nothing
end

write_atomic_lru(resource) do temp_file, io
response = HTTP.get(
write_atomic_lru(resource) do temp_file, file_io
buffio = Base.BufferStream()
tar_check_io = Base.BufferStream()
tar_extract_task = @async begin
# Only do this work if hash !=== nothing
if hash === nothing
return nothing
end
@info("tar_extract_task", resource, hash)
Tar.tree_hash(TranscodingStream(GzipDecompressor(), tar_check_io))
end
tee_task = @async begin
while !eof(buffio)
chunk = readavailable(buffio)
write(file_io, chunk)

if hash !== nothing
write(tar_check_io, chunk)
end
end
close(file_io)
close(tar_check_io)
end

# Get the response
response = HTTP.get(server * resource,
status_exception = false,
response_stream = io,
server * resource,
response_stream = buffio,
)

# Raise warnings about bad HTTP response codes
if response.status != 200
@warn "response status $(response.status)"
return false
end

# Wait for the tee task to finish
wait(tee_task)

# Fetch the result of the tarball hash check
calc_hash = fetch(tar_extract_task)

# If we're given a hash, then check tarball git hash
if hash !== nothing
tree_hash = tarball_git_hash(temp_file)
# Raise warnings about resource hash mismatches
if hash != tree_hash
@warn "resource hash mismatch" server=server resource=resource hash=tree_hash
return false
end
if hash != calc_hash
@warn "resource hash mismatch" server resource hash calc_hash
return false
end

return true
Expand Down

0 comments on commit 3aa8ea3

Please sign in to comment.