-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
use Tar.jl to create and extract tarballs #29
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,26 +10,11 @@ import Pkg | |
import Pkg.TOML | ||
import Pkg.Artifacts: download_artifact, artifact_path | ||
import LibGit2 | ||
|
||
# TODO: ensure all registries are git clones | ||
Pkg.update() | ||
import Tar | ||
|
||
mkpath(clones_dir) | ||
mkpath(static_dir) | ||
|
||
const tar_opts = ``` | ||
--format=posix | ||
--numeric-owner | ||
--owner=0 | ||
--group=0 | ||
--mode=go-w,+X | ||
--mtime=1970-01-01 | ||
--pax-option=exthdr.name=%d/PaxHeaders/%f,delete=atime,delete=ctime,delete=mtime | ||
--no-recursion | ||
``` | ||
# reproducible tarball options based on | ||
# http://h2.jaguarpaw.co.uk/posts/reproducible-tar/ | ||
|
||
const compress = `gzip -9` | ||
const decompress = `gzcat` | ||
|
||
|
@@ -43,25 +28,11 @@ function make_tarball( | |
tarball::AbstractString, | ||
tree_path::AbstractString, | ||
) | ||
paths = String[] | ||
for (root, dirs, files) in walkdir(tree_path) | ||
path = root != tree_path ? relpath(root, tree_path) : "" | ||
for file in [dirs; files] | ||
push!(paths, joinpath(path, file)) | ||
open(tarball, write=true) do io | ||
open(pipeline(compress, io), write=true) do io | ||
Tar.create(tree_path, io) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've observed a performance regression forking this PR and #32 by building tarballs for General registry and
The overall changes can be found at johnnychen94/StorageMirrorServer.jl@f77fb30 bash-3.2$ gtar --version
tar (GNU tar) 1.30
Copyright (C) 2017 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Written by John Gilmore and Jay Fenlason. julia> versioninfo()
Julia Version 1.4.1
Commit 381693d3df* (2020-04-14 17:20 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-8.0.1 (ORCJIT, skylake)
Environment:
JULIA_NUM_THREADS = 8
(StorageServer) pkg> st Tar
Project StorageServer v0.1.0
Status `~/Documents/Julia/StorageServer/Project.toml`
[a4e569a6] Tar v1.3.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Being 33% slower than the hyper-optimized GNU tar is quite good. I've created an issue about using sendfile to optimize tarball creation and extraction: JuliaIO/Tar.jl#33. However, matching performance of GNU tar isn't really a high priority. It's also possible since we're not sending data directly to a real file descriptor but to a |
||
end | ||
end | ||
sort!(paths) | ||
mktemp() do paths_file, io | ||
for path in paths | ||
print(io, "$path\0") | ||
end | ||
close(io) | ||
open(tarball, write=true) do io | ||
tar_cmd = `gtar $tar_opts -cf - -C $tree_path --null -T $paths_file` | ||
run(pipeline(tar_cmd, compress, io)) | ||
end | ||
end | ||
return | ||
end | ||
|
||
function create_git_tarball( | ||
|
@@ -94,7 +65,9 @@ function verify_tarball_hash( | |
) | ||
local hash | ||
mktempdir() do tmp_dir | ||
run(pipeline(`$decompress $tarball`, `tar -C $tmp_dir -x`)) | ||
open(pipeline(tarball, decompress)) do io | ||
Tar.extract(io, tmp_dir) | ||
end | ||
hash = bytes2hex(Pkg.GitTools.tree_hash(tmp_dir)) | ||
chmod(tmp_dir, 0o777, recursive=true) | ||
end | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:)