Skip to content

Commit

Permalink
use symlink to hashes to avoid mirror failures
Browse files Browse the repository at this point in the history
Mirrors may use rsync with the skip on same mtime feature, which would
skip files that are different in content but have the same mtime. This
results in an inconsistent mirror.

Avoid this by creating symlinks to files with the real content named
after the content hash.

When the rpm macro %clamp_mtime_to_source_date_epoch is set to Y to
enable reproducible builds, the mtime of files in the rpm will be set to
the date of the last changes entry, but build dependencies that affect
the content may be newer. This is relevant when extracting such an rpm
for a repo that is used by the installer. Some mirrors may fail to
sync to the newest content as they skipped them. This would make an
installer using that mirror fail.

Fixes: https://bugzilla.opensuse.org/show_bug.cgi?id=1148824
  • Loading branch information
JanZerebecki committed Aug 8, 2023
1 parent a15501e commit 3fd5a87
Showing 1 changed file with 90 additions and 9 deletions.
99 changes: 90 additions & 9 deletions modules/KIWIUtil.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ package KIWIUtil;
#------------------------------------------
use strict;
use warnings;
use Digest::SHA;
use File::Basename;
use File::Copy;
use File::Glob ':glob';
use File::Find;
use File::Path;
use POSIX qw(strftime);
use Time::HiRes qw(utime);

#==========================================
# KIWI Modules
Expand Down Expand Up @@ -475,9 +480,14 @@ sub unify {
#------------------------------------------
sub unpac_package {
# ...
# implementation of the pac_unpack script
# of the SuSE autobuild team
# original: /mounts/work/src/bin/tools/pac_unpack
# To ensure rsync works even without --checksum this creates all file names
# in the archive as symlinks to files with the real content named after the
# content hash. This is needed as newer builds will result in files with
# the same mtime but changed content compared to previous builds due to how
# SOURCE_DATE_EPOCH for reproducible builds works.
#
# Based on a script from the SuSE autobuild team:
# /mounts/work/src/bin/tools/pac_unpack
#--------------------------------------
# params
# $this - class name; always call as member
Expand All @@ -500,26 +510,97 @@ sub unpac_package {
goto up_failed;
}
}

my $tmpdir = KIWIQX::qxx ("mktemp -qdt kiwiunpac.XXXXXX"); chomp $tmpdir;

if($p_uri =~ m{(.*\.tgz|.*\.tar\.gz|.*\.taz|.*\.tar\.Z)}) {
my $out = qx(cd $dir && tar -zxvfp $p_uri);
my $out = qx(cd $tmpdir && tar -zxvfp $p_uri);
my $status = $?>>8;
if($status != 0) {
my $msg = "[E] command cp $dir && tar xvzfp $p_uri failed!\n";
my $msg = "[E] command cd $tmpdir && tar xvzfp $p_uri failed!\n";
$this->{m_collect}->logMsg("E", $msg);
$this->{m_collect}->logMsg("E", "\t$out\n");
$retval = 5;
goto up_failed;
} else {
my $msg = "[I] unpacked $p_uri in directory $dir\n";
$this->{m_collect}->logMsg("I", $msg);
}
} elsif($p_uri =~ m{.*\.rpm}i) {
my $out = qx(cd $dir && unrpm -q $p_uri);
my $out = qx(cd $tmpdir && unrpm -q $p_uri);
if($? != 0) {
$this->{m_collect}->logMsg("E", "[E] command cd $tmpdir && unrpm -q $p_uri failed, output follows!\n");
$this->{m_collect}->logMsg("E", "\t$out\n");
$retval = 5;
goto up_failed;
}
} else {
$this->{m_collect}->logMsg("E", "[E] cannot process file $p_uri\n");
$retval = 4;
goto up_failed;
}

# Symlink the filename to a file with its content named after its content hash.
if(!$ENV{SOURCE_DATE_EPOCH}) {
$this->{m_collect}->logMsg("W", "[W] Failed to read environment variable SOURCE_DATE_EPOCH, setting to 0.");
$ENV{SOURCE_DATE_EPOCH} = "0";
}
my $date = strftime("%Y%m%d%H%M.%S", gmtime($ENV{SOURCE_DATE_EPOCH}));
my @links;
find({ wanted => sub {
my $postfix = substr($_, length($tmpdir));
if (length($postfix) == 0) {
return;
}
if (-d $_) {
if(!mkdir("$dir/$postfix")) {
$this->{m_collect}->logMsg("E", "[E] failure from mkdir($dir/$postfix): $!\n");
$retval = 2;
goto up_failed;
}
# index 9 is mtime
my $time = (stat($_))[9];
# restore mtime from the archive which is supposed to be reproducible
if(1 != utime($time, $time, "$dir/$postfix")) {
$this->{m_collect}->logMsg("E", "[E] failure from utime($time, $time, $dir/$postfix): $!\n");
$retval = 2;
goto up_failed;
}
} else {
my $hashdir = dirname("$dir/$postfix")."/.hashed";
if (!-d $hashdir) {
if(!mkdir($hashdir)) {
$this->{m_collect}->logMsg("E", "[E] failure from mkdir($hashdir): $!\n");
$retval = 2;
goto up_failed;
}
}

my $hash = Digest::SHA->new(512224)->addfile($_)->hexdigest();
if(!move($_, "$hashdir/$hash")) {
$this->{m_collect}->logMsg("E", "[E] failure from move($_, $hashdir/$hash): $!\n");
$retval = 2;
goto up_failed;
}
if(!symlink(".hashed/$hash", "$dir/$postfix")) {
$this->{m_collect}->logMsg("E", "[E] failure from symlink(.hashed/$hash, $dir/$postfix)\n");
$retval = 2;
goto up_failed;
}
push(@links, "$dir/$postfix");
}
} , no_chdir => 1 }, $tmpdir);
if (@links) {
# there is no way in perl to set the mtime of a symlink and to call the libc function lutimes FFI::Platypus would be useful, but is not packaged yet, so for speed batch a call to touch
unshift(@links, ("/usr/bin/touch", "-h", "-t", $date));
system(@links);
if (0 != $?) {
$this->{m_collect}->logMsg("E", "[E] error return value $? from calling ".join(" ", @links)."\n");
$retval = 2;
}
}

File::Path::remove_tree($tmpdir);
my $msg = "[I] unpacked $p_uri in directory $dir\n";
$this->{m_collect}->logMsg("I", $msg);

up_failed:
return $retval;
}
Expand Down

0 comments on commit 3fd5a87

Please sign in to comment.