From 2ed80aa4f38a2cd9c14f0fc7f6dc242f476564e4 Mon Sep 17 00:00:00 2001 From: "Paul \"TBBle\" Hampson" Date: Sat, 12 Dec 2020 23:33:11 +1100 Subject: [PATCH] Include repeated files as hard-links in the tarstream Signed-off-by: Paul "TBBle" Hampson --- internal/ociwclayer/export.go | 23 +++++++++++++++++++++++ internal/wclayer/baselayerreader.go | 5 +++++ internal/wclayer/exportlayer.go | 2 ++ internal/wclayer/legacy.go | 5 +++++ 4 files changed, 35 insertions(+) diff --git a/internal/ociwclayer/export.go b/internal/ociwclayer/export.go index a582f0018f..b4af000944 100644 --- a/internal/ociwclayer/export.go +++ b/internal/ociwclayer/export.go @@ -50,6 +50,8 @@ func ExportLayer(w io.Writer, path string, parentLayerPaths []string) error { } func writeTarFromLayer(r hcsshim.LayerReader, w io.Writer) error { + linkRecords := make(map[[16]byte]string) + t := tar.NewWriter(w) for { name, size, fileInfo, err := r.Next() @@ -69,6 +71,27 @@ func writeTarFromLayer(r hcsshim.LayerReader, w io.Writer) error { return err } } else { + fileIDInfo, err := r.LinkInfo() + if err != nil { + return err + } + if linkName, ok := linkRecords[fileIDInfo.FileID]; ok { + // We've seen this file before, by another name, so put a hardlink in the tar stream. + hdr := backuptar.BasicInfoHeader(name, 0, fileInfo) + hdr.Mode = 0644 + hdr.Typeflag = tar.TypeLink + hdr.Linkname = linkName + if err := t.WriteHeader(hdr); err != nil { + return err + } + continue + } + + // All subsequent names for this file will be hard-linked to this name + // Optimisation opportunity: Only store records for files with multiple links. + // e.g., go-winio could expose FileStandardInfo to enable this. + linkRecords[fileIDInfo.FileID] = filepath.ToSlash(name) + err = backuptar.WriteTarFileFromBackupStream(t, r, name, size, fileInfo) if err != nil { return err diff --git a/internal/wclayer/baselayerreader.go b/internal/wclayer/baselayerreader.go index 7a13c98949..f9b66f3d3a 100644 --- a/internal/wclayer/baselayerreader.go +++ b/internal/wclayer/baselayerreader.go @@ -186,6 +186,11 @@ func (r *baseLayerReader) Next() (path string, size int64, fileInfo *winio.FileB return } +func (r *baseLayerReader) LinkInfo() (fileIDInfo *winio.FileIDInfo, err error) { + fileIDInfo, err = winio.GetFileID(r.currentFile) + return +} + func (r *baseLayerReader) Read(b []byte) (int, error) { if r.backupReader == nil { if r.currentFile == nil { diff --git a/internal/wclayer/exportlayer.go b/internal/wclayer/exportlayer.go index 990a67062b..6bc21b27ea 100644 --- a/internal/wclayer/exportlayer.go +++ b/internal/wclayer/exportlayer.go @@ -44,6 +44,8 @@ func ExportLayer(ctx context.Context, path string, exportFolderPath string, pare type LayerReader interface { // Next advances to the next file and returns the name, size, and file info Next() (string, int64, *winio.FileBasicInfo, error) + // LinkInfo returns the file identifier for the current file. + LinkInfo() (*winio.FileIDInfo, error) // Read reads data from the current file, in the format of a Win32 backup stream, and // returns the number of bytes read. Read(b []byte) (int, error) diff --git a/internal/wclayer/legacy.go b/internal/wclayer/legacy.go index 8f79dbf4d2..e3c63e3ad4 100644 --- a/internal/wclayer/legacy.go +++ b/internal/wclayer/legacy.go @@ -294,6 +294,11 @@ func (r *legacyLayerReader) Next() (path string, size int64, fileInfo *winio.Fil return } +func (r *legacyLayerReader) LinkInfo() (fileIDInfo *winio.FileIDInfo, err error) { + fileIDInfo, err = winio.GetFileID(r.currentFile) + return +} + func (r *legacyLayerReader) Read(b []byte) (int, error) { if r.backupReader == nil { if r.currentFile == nil {