diff --git a/pkg/machine/config.go b/pkg/machine/config.go index ebcb1495ca..c773de2b24 100644 --- a/pkg/machine/config.go +++ b/pkg/machine/config.go @@ -199,6 +199,17 @@ func GetDataDir(vmType VMType) (string, error) { return dataDir, mkdirErr } +// GetGLobalDataDir returns the root of all backends +// for shared machine data. +func GetGlobalDataDir() (string, error) { + dataDir, err := DataDirPrefix() + if err != nil { + return "", err + } + + return dataDir, os.MkdirAll(dataDir, 0755) +} + // DataDirPrefix returns the path prefix for all machine data files func DataDirPrefix() (string, error) { data, err := homedir.GetDataHome() diff --git a/pkg/machine/connection.go b/pkg/machine/connection.go index 5633cd5cce..ebecda83e3 100644 --- a/pkg/machine/connection.go +++ b/pkg/machine/connection.go @@ -58,6 +58,21 @@ func AnyConnectionDefault(name ...string) (bool, error) { return false, nil } +func ChangeConnectionURI(name string, uri fmt.Stringer) error { + cfg, err := config.ReadCustomConfig() + if err != nil { + return err + } + dst, ok := cfg.Engine.ServiceDestinations[name] + if !ok { + return errors.New("connection not found") + } + dst.URI = uri.String() + cfg.Engine.ServiceDestinations[name] = dst + + return cfg.Write() +} + func ChangeDefault(name string) error { cfg, err := config.ReadCustomConfig() if err != nil { diff --git a/pkg/machine/ports.go b/pkg/machine/ports.go new file mode 100644 index 0000000000..2837d492f6 --- /dev/null +++ b/pkg/machine/ports.go @@ -0,0 +1,213 @@ +package machine + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "os" + "path/filepath" + "strconv" + + "github.com/containers/storage/pkg/ioutils" + "github.com/containers/storage/pkg/lockfile" + "github.com/sirupsen/logrus" +) + +const ( + portAllocFileName = "port-alloc.dat" + portLockFileName = "port-alloc.lck" +) + +// Reserves a unique port for a machine instance in a global (user) scope across +// all machines and backend types. On success the port is guaranteed to not be +// allocated until released with a call to ReleaseMachinePort(). +// +// The purpose of this method is to prevent collisions between machine +// instances when ran at the same time. Note, that dynamic port reassignment +// on its own is insufficient to resolve conflicts, since there is a narrow +// window between port detection and actual service binding, allowing for the +// possibility of a second racing machine to fail if its check is unlucky to +// fall within that window. Additionally, there is the potential for a long +// running reassignment dance over start/stop until all machine instances +// eventually arrive at total conflict free state. By reserving ports using +// mechanism these scenarios are prevented. +func AllocateMachinePort() (int, error) { + const maxRetries = 10000 + + handles := []io.Closer{} + defer func() { + for _, handle := range handles { + handle.Close() + } + }() + + lock, err := acquirePortLock() + if err != nil { + return 0, err + } + defer lock.Unlock() + + ports, err := loadPortAllocations() + if err != nil { + return 0, err + } + + var port int + for i := 0; ; i++ { + var handle io.Closer + + // Ports must be held temporarily to prevent repeat search results + handle, port, err = getRandomPortHold() + if err != nil { + return 0, err + } + handles = append(handles, handle) + + if _, exists := ports[port]; !exists { + break + } + + if i > maxRetries { + return 0, errors.New("maximum number of retries exceeded searching for available port") + } + } + + ports[port] = struct{}{} + if err := storePortAllocations(ports); err != nil { + return 0, err + } + + return port, nil +} + +// Releases a reserved port for a machine when no longer required. Care should +// be taken to ensure there are no conditions (e.g. failure paths) where the +// port might unintentionally remain in use after releasing +func ReleaseMachinePort(port int) error { + lock, err := acquirePortLock() + if err != nil { + return err + } + defer lock.Unlock() + ports, err := loadPortAllocations() + if err != nil { + return err + } + + delete(ports, port) + return storePortAllocations(ports) +} + +func IsLocalPortAvailable(port int) bool { + // Used to mark invalid / unassigned port + if port <= 0 { + return false + } + + lc := getPortCheckListenConfig() + l, err := lc.Listen(context.Background(), "tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if err != nil { + return false + } + l.Close() + return true +} + +func getRandomPortHold() (io.Closer, int, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return nil, 0, fmt.Errorf("unable to get free machine port: %w", err) + } + _, portString, err := net.SplitHostPort(l.Addr().String()) + if err != nil { + l.Close() + return nil, 0, fmt.Errorf("unable to determine free machine port: %w", err) + } + port, err := strconv.Atoi(portString) + if err != nil { + l.Close() + return nil, 0, fmt.Errorf("unable to convert port to int: %w", err) + } + return l, port, err +} + +func acquirePortLock() (*lockfile.LockFile, error) { + lockDir, err := GetGlobalDataDir() + if err != nil { + return nil, err + } + + lock, err := lockfile.GetLockFile(filepath.Join(lockDir, portLockFileName)) + if err != nil { + return nil, err + } + + lock.Lock() + return lock, nil +} + +func loadPortAllocations() (map[int]struct{}, error) { + portDir, err := GetGlobalDataDir() + if err != nil { + return nil, err + } + + var portData []int + exists := true + file, err := os.OpenFile(filepath.Join(portDir, portAllocFileName), 0, 0) + if errors.Is(err, os.ErrNotExist) { + exists = false + } else if err != nil { + return nil, err + } + defer file.Close() + + // Non-existence of the file, or a corrupt file are not treated as hard + // failures, since dynamic reassignment and continued use will eventually + // rebuild the dataset. This also makes migration cases simpler, since + // the state doesn't have to exist + if exists { + decoder := json.NewDecoder(file) + if err := decoder.Decode(&portData); err != nil { + logrus.Warnf("corrupt port allocation file, could not use state") + } + } + + ports := make(map[int]struct{}) + placeholder := struct{}{} + for _, port := range portData { + ports[port] = placeholder + } + + return ports, nil +} + +func storePortAllocations(ports map[int]struct{}) error { + portDir, err := GetGlobalDataDir() + if err != nil { + return err + } + + portData := make([]int, 0, len(ports)) + for port := range ports { + portData = append(portData, port) + } + + opts := &ioutils.AtomicFileWriterOptions{ExplicitCommit: true} + w, err := ioutils.NewAtomicFileWriterWithOpts(filepath.Join(portDir, portAllocFileName), 0644, opts) + if err != nil { + return err + } + defer w.Close() + + enc := json.NewEncoder(w) + if err := enc.Encode(portData); err != nil { + return err + } + + // Commit the changes to disk if no errors + return w.Commit() +} diff --git a/pkg/machine/ports_unix.go b/pkg/machine/ports_unix.go new file mode 100644 index 0000000000..e1960c3df4 --- /dev/null +++ b/pkg/machine/ports_unix.go @@ -0,0 +1,29 @@ +//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd +// +build darwin dragonfly freebsd linux netbsd openbsd + +package machine + +import ( + "net" + "syscall" +) + +func getPortCheckListenConfig() *net.ListenConfig { + return &net.ListenConfig{ + Control: func(network, address string, c syscall.RawConn) (cerr error) { + if err := c.Control(func(fd uintptr) { + // Prevent listening socket from holding over in TIME_WAIT in the rare case a connection + // attempt occurs in the short window the socket is listening. This ensures the registration + // will be gone when close() completes, freeing it up for the real subsequent listen by another + // process + cerr = syscall.SetsockoptLinger(int(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{ + Onoff: 1, + Linger: 0, + }) + }); err != nil { + cerr = err + } + return + }, + } +} diff --git a/pkg/machine/ports_windows.go b/pkg/machine/ports_windows.go new file mode 100644 index 0000000000..730d4be0b3 --- /dev/null +++ b/pkg/machine/ports_windows.go @@ -0,0 +1,28 @@ +package machine + +import ( + "net" + "syscall" +) + +// NOTE the reason for the code duplication between win and unix is that the syscall +// implementations require a different cast (Handle on Windows, int on Unixes) +func getPortCheckListenConfig() *net.ListenConfig { + return &net.ListenConfig{ + Control: func(network, address string, c syscall.RawConn) (cerr error) { + if err := c.Control(func(fd uintptr) { + // Prevent listening socket from holding over in TIME_WAIT in the rare case a connection + // attempt occurs in the short window the socket is listening. This ensures the registration + // will be gone when close() completes, freeing it up for the real subsequent listen by another + // process + cerr = syscall.SetsockoptLinger(syscall.Handle(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{ + Onoff: 1, + Linger: 0, + }) + }); err != nil { + cerr = err + } + return + }, + } +} diff --git a/pkg/machine/wsl/config.go b/pkg/machine/wsl/config.go index e48fc03b06..57e6580d69 100644 --- a/pkg/machine/wsl/config.go +++ b/pkg/machine/wsl/config.go @@ -10,7 +10,6 @@ import ( "time" "github.com/containers/podman/v4/pkg/machine" - "github.com/containers/podman/v4/utils" "github.com/sirupsen/logrus" ) @@ -55,7 +54,7 @@ func (p *WSLVirtualization) NewMachine(opts machine.InitOptions) (machine.VM, er } // Add a random port for ssh - port, err := utils.GetRandomPort() + port, err := machine.AllocateMachinePort() if err != nil { return nil, err } diff --git a/pkg/machine/wsl/machine.go b/pkg/machine/wsl/machine.go index 27745ce274..1cd41721c2 100644 --- a/pkg/machine/wsl/machine.go +++ b/pkg/machine/wsl/machine.go @@ -49,6 +49,8 @@ const registriesConf = `unqualified-search-registries=["docker.io"] const appendPort = `grep -q Port\ %d /etc/ssh/sshd_config || echo Port %d >> /etc/ssh/sshd_config` +const changePort = `sed -E -i 's/^Port[[:space:]]+[0-9]+/Port %d/' /etc/ssh/sshd_config` + const configServices = `ln -fs /usr/lib/systemd/system/sshd.service /etc/systemd/system/multi-user.target.wants/sshd.service ln -fs /usr/lib/systemd/system/podman.socket /etc/systemd/system/sockets.target.wants/podman.socket rm -f /etc/systemd/system/getty.target.wants/console-getty.service @@ -408,19 +410,33 @@ func (v *MachineVM) writeConfig() error { return machine.WriteConfig(v.ConfigPath, v) } -func setupConnections(v *MachineVM, opts machine.InitOptions) error { +func constructSSHUris(v *MachineVM) ([]url.URL, []string) { uri := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootlessSock, strconv.Itoa(v.Port), v.RemoteUsername) uriRoot := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootfulSock, strconv.Itoa(v.Port), "root") uris := []url.URL{uri, uriRoot} names := []string{v.Name, v.Name + "-root"} + return uris, names +} + +func setupConnections(v *MachineVM, opts machine.InitOptions) error { + uris, names := constructSSHUris(v) + // The first connection defined when connections is empty will become the default // regardless of IsDefault, so order according to rootful if opts.Rootful { uris[0], names[0], uris[1], names[1] = uris[1], names[1], uris[0], names[0] } + // We need to prevent racing connection updates to containers.conf globally + // across all backends to prevent connection overwrites + flock, err := obtainGlobalConfigLock() + if err != nil { + return fmt.Errorf("could not obtain global lock: %w", err) + } + defer flock.unlock() + for i := 0; i < 2; i++ { if err := machine.AddConnection(&uris[i], names[i], v.IdentityPath, opts.IsDefault && i == 0); err != nil { return err @@ -992,6 +1008,13 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error { return err } + if !machine.IsLocalPortAvailable(v.Port) { + logrus.Warnf("SSH port conflict detected, reassigning a new port") + if err := v.reassignSshPort(); err != nil { + return err + } + } + // Startup user-mode networking if enabled if err := v.startUserModeNetworking(); err != nil { return err @@ -1040,6 +1063,74 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error { return err } +func obtainGlobalConfigLock() (*fileLock, error) { + lockDir, err := machine.GetGlobalDataDir() + if err != nil { + return nil, err + } + + // Lock file needs to be above all backends + // TODO: This should be changed to a common.Config lock mechanism when available + return lockFile(filepath.Join(lockDir, "podman-config.lck")) +} + +func (v *MachineVM) reassignSshPort() error { + dist := toDist(v.Name) + newPort, err := machine.AllocateMachinePort() + if err != nil { + return err + } + + success := false + defer func() { + if !success { + if err := machine.ReleaseMachinePort(newPort); err != nil { + logrus.Warnf("could not release port allocation as part of failure rollback (%d): %w", newPort, err) + } + } + }() + + // We need to prevent racing connection updates to containers.conf globally + // across all backends to prevent connection overwrites + flock, err := obtainGlobalConfigLock() + if err != nil { + return fmt.Errorf("could not obtain global lock: %w", err) + } + defer flock.unlock() + + // Write a transient invalid port, to force a retry on failure + oldPort := v.Port + v.Port = 0 + if err := v.writeConfig(); err != nil { + return err + } + + if err := machine.ReleaseMachinePort(oldPort); err != nil { + logrus.Warnf("could not release current ssh port allocation (%d): %w", oldPort, err) + } + + if err := wslInvoke(dist, "sh", "-c", fmt.Sprintf(changePort, newPort)); err != nil { + return fmt.Errorf("could not change SSH port for guest OS: %w", err) + } + + v.Port = newPort + uris, names := constructSSHUris(v) + for i := 0; i < 2; i++ { + if err := machine.ChangeConnectionURI(names[i], &uris[i]); err != nil { + return err + } + } + + // Write updated port back + if err := v.writeConfig(); err != nil { + return err + } + + success = true + + return nil +} + func findExecutablePeer(name string) (string, error) { exe, err := os.Executable() if err != nil { @@ -1360,6 +1451,10 @@ func (v *MachineVM) Remove(name string, opts machine.RemoveOptions) (string, fun logrus.Error(err) } } + if err := machine.ReleaseMachinePort(v.Port); err != nil { + logrus.Warnf("could not release port allocation as part of removal (%d): %w", v.Port, err) + } + return nil }, nil }