Skip to content

Commit

Permalink
overlay: try harder with more layers
Browse files Browse the repository at this point in the history
If we have enough layers that even using relative paths for symlinks to
the diff directories won't make the list of lower directories and
options fit in a single page,
* open the various lowers for reading
* use the descriptor names in the lowerdir= list

This is a bit slower, so don't take these extra steps unless we know we
have to.

Signed-off-by: Nalin Dahyabhai <[email protected]>
  • Loading branch information
nalind committed Sep 30, 2022
1 parent fb9a8ea commit 59d3fb8
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 20 deletions.
113 changes: 107 additions & 6 deletions drivers/overlay/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ import (
"flag"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/containers/storage/pkg/reexec"
"golang.org/x/sys/unix"
)

func init() {
reexec.Register("storage-mountfrom", mountFromMain)
reexec.Register("storage-mountfrom", mountOverlayFromMain)
}

func fatal(err error) {
Expand All @@ -31,7 +33,7 @@ type mountOptions struct {
Flag uint32
}

func mountFrom(dir, device, target, mType string, flags uintptr, label string) error {
func mountOverlayFrom(dir, device, target, mType string, flags uintptr, label string) error {
options := &mountOptions{
Device: device,
Target: target,
Expand Down Expand Up @@ -67,7 +69,7 @@ func mountFrom(dir, device, target, mType string, flags uintptr, label string) e
}

// mountfromMain is the entry-point for storage-mountfrom on re-exec.
func mountFromMain() {
func mountOverlayFromMain() {
runtime.LockOSThread()
flag.Parse()

Expand All @@ -77,12 +79,111 @@ func mountFromMain() {
fatal(err)
}

if err := os.Chdir(flag.Arg(0)); err != nil {
// Mount the arguments passed from the specified directory. Some of the
// paths mentioned in the values we pass to the kernel are relative to
// the specified directory.
homedir := flag.Arg(0)
if err := os.Chdir(homedir); err != nil {
fatal(err)
}

if err := unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
fatal(err)
pageSize := unix.Getpagesize()
if len(options.Label) < pageSize {
if err := unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
fatal(err)
}
os.Exit(0)
}

// Those arguments still took up too much space. Open the diff
// directories and use their descriptor numbers as lowers, using
// /proc/self/fd as the current directory.

// Split out the various options, since we need to manipulate the
// paths, but we don't want to mess with other options.
var upper, work, lower, label, other string
for _, arg := range strings.Split(options.Label, ",") {
if strings.HasPrefix(arg, "upperdir=") {
upper = strings.TrimPrefix(arg, "upperdir=")
} else if strings.HasPrefix(arg, "workdir=") {
work = strings.TrimPrefix(arg, "workdir=")
} else if strings.HasPrefix(arg, "lowerdir=") {
lower = strings.TrimPrefix(arg, "lowerdir=")
} else if strings.HasPrefix(arg, "label=") {
label = arg
} else {
if other == "" {
other = arg
} else {
other = other + "," + arg
}
}
}

// Don't get tricked if there are no lowers.
lowers := strings.Split(lower, ":")
if len(lowers) == 1 && lowers[0] == "" {
lowers = []string{}
}

// Make sure upperdir, workdir, and the target are absolute paths.
if upper != "" {
if !filepath.IsAbs(upper) {
upper = filepath.Join(homedir, upper)
}
upper = "upperdir=" + upper
}
if work != "" {
if !filepath.IsAbs(work) {
work = filepath.Join(homedir, work)
}
work = "workdir=" + work
}
if !filepath.IsAbs(options.Target) {
options.Target = filepath.Join(homedir, options.Target)
}

// Get a descriptor for each lower, and use that descriptor's name as
// the new value for the list of lowers, because it's shorter.
if lower != "" {
for i := range lowers {
lowerFd, err := unix.Open(lowers[i], unix.O_RDONLY, 0)
if err != nil {
fatal(err)
}
lowers[i] = fmt.Sprintf("%d", lowerFd)
}
lower = "lowerdir=" + strings.Join(lowers, ":")
}

// Reconstruct the Label field.
options.Label = ""
for _, arg := range []string{upper, work, lower, label, other} {
if arg == "" {
continue
}
if options.Label == "" {
options.Label = arg
} else {
options.Label = options.Label + "," + arg
}
}

// Okay, try this, if we managed to make the arguments fit.
var err error
if len(options.Label) < pageSize {
if err := os.Chdir("/proc/self/fd"); err != nil {
fatal(err)
}
err = unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label)
} else {
err = fmt.Errorf("cannot mount layer, mount data %q too large %d >= page size %d", options.Label, len(options.Label), pageSize)
}

// Clean up.
if err != nil {
fmt.Fprintf(os.Stderr, "creating overlay mount to %s, mount_data=%q\n", options.Target, options.Label)
panic(err)
}

os.Exit(0)
Expand Down
24 changes: 12 additions & 12 deletions drivers/overlay/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,23 @@ const (
// or root directory. Mounts are always done relative to root and
// referencing the symbolic links in order to ensure the number of
// lower directories can fit in a single page for making the mount
// syscall. A hard upper limit of 128 lower layers is enforced to ensure
// syscall. A hard upper limit of 500 lower layers is enforced to ensure
// that mounts do not fail due to length.

const (
linkDir = "l"
lowerFile = "lower"
maxDepth = 128
maxDepth = 500

// idLength represents the number of random characters
// which can be used to create the unique link identifier
// for every layer. If this value is too long then the
// page size limit for the mount command may be exceeded.
// The idLength should be selected such that following equation
// is true (512 is a buffer for label metadata).
// ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512)
// is true (512 is a buffer for label metadata, 128 is the
// number of lowers we want to be able to use without having
// to use bind mounts to get all the way to the kernel limit).
// ((idLength + len(linkDir) + 1) * maxFastDepth) <= (pageSize - 512)
idLength = 26
)

Expand Down Expand Up @@ -1378,7 +1380,8 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO

// absLowers is the list of lowers as absolute paths, which works well with additional stores.
absLowers := []string{}
// relLowers is the list of lowers as paths relative to the driver's home directory.
// relLowers is the list of lowers as paths relative to the driver's home directory. If
// additional stores are being used, some of these will still be absolute paths.
relLowers := []string{}

// Check if $link/../diff{1-*} exist. If they do, add them, in order, as the front of the lowers
Expand Down Expand Up @@ -1435,6 +1438,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
perms = os.FileMode(st2.Mode())
permsKnown = true
}
l = lower
break
}
lower = ""
Expand Down Expand Up @@ -1606,13 +1610,12 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
return nil
}
} else if len(mountData) >= pageSize {
// Use relative paths and mountFrom when the mount data has exceeded
// Use (possibly) relative paths and mountFrom when the mount data has exceeded
// the page size. The mount syscall fails if the mount data cannot
// fit within a page and relative links make the mount data much
// smaller at the expense of requiring a fork exec to chroot.
// smaller at the expense of requiring a fork exec to chdir().

workdir = path.Join(id, "work")
//FIXME: We need to figure out to get this to work with additional stores
if readWrite {
diffDir := path.Join(id, "diff")
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(relLowers, ":"), diffDir, workdir)
Expand All @@ -1623,11 +1626,8 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
opts = fmt.Sprintf("%s,%s", opts, strings.Join(optsList, ","))
}
mountData = label.FormatMountLabel(opts, options.MountLabel)
if len(mountData) >= pageSize {
return "", fmt.Errorf("cannot mount layer, mount label %q too large %d >= page size %d", options.MountLabel, len(mountData), pageSize)
}
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
return mountFrom(d.home, source, target, mType, flags, label)
return mountOverlayFrom(d.home, source, target, mType, flags, label)
}
mountTarget = path.Join(id, "merged")
}
Expand Down
40 changes: 38 additions & 2 deletions tests/layers.bats
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,54 @@ load helpers

@test "allow storing images with more than 127 layers" {
LAYER=""
for i in {0..150}; do
LIMIT=300
for i in $(seq 0 ${LIMIT}); do
echo "Layer: $i"

# Create a layer.
run storage --debug=false create-layer "$LAYER"
echo "$output"
[ "$status" -eq 0 ]
[ "$output" != "" ]
LAYER="$output"
run storage --debug=false mount "$LAYER"
echo "$output"
[ "$status" -eq 0 ]
[ "$output" != "" ]
ROOTFS="$output"
touch "${ROOTFS}"/$i
run storage --debug=false unmount "$LAYER"
echo "$output"
[ "$status" -eq 0 ]
done

# Create the image
run storage --debug=false create-image --name test-image "$LAYER"
run storage --debug=false create-image "$LAYER"
echo "$output"
[ "$status" -eq 0 ]
[ "$output" != "" ]
IMAGE="$output"

# Make sure the image has all of the content.
run storage --debug=false create-container "$IMAGE"
echo "$output"
[ "$status" -eq 0 ]
[ "$output" != "" ]
CONTAINER="$output"

run storage --debug=false mount "$CONTAINER"
echo "$output"
[ "$status" -eq 0 ]
[ "$output" != "" ]
ROOTFS="$output"
for i in $(seq 0 ${LIMIT}); do
if ! test -r "${ROOTFS}"/$i ; then
echo File from layer $i of ${LIMIT} was not visible after mounting
false
fi
done

run storage --debug=false unmount "$CONTAINER"
echo "$output"
[ "$status" -eq 0 ]
}

0 comments on commit 59d3fb8

Please sign in to comment.