diff --git a/pkg/specgen/generate/config_unsupported.go b/pkg/specgen/generate/config_unsupported.go deleted file mode 100644 index a97ae0709d..0000000000 --- a/pkg/specgen/generate/config_unsupported.go +++ /dev/null @@ -1,29 +0,0 @@ -//go:build !linux -// +build !linux - -package generate - -import ( - "errors" - - "github.com/containers/common/libimage" - "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" -) - -// DevicesFromPath computes a list of devices -func DevicesFromPath(g *generate.Generator, devicePath string) error { - return errors.New("unsupported DevicesFromPath") -} - -func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask []string, g *generate.Generator) { -} - -func supportAmbientCapabilities() bool { - return false -} - -func getSeccompConfig(s *specgen.SpecGenerator, configSpec *spec.Spec, img *libimage.Image) (*spec.LinuxSeccomp, error) { - return nil, errors.New("not implemented getSeccompConfig") -} diff --git a/pkg/specgen/generate/namespaces.go b/pkg/specgen/generate/namespaces.go index e27a3abace..f57b6c23c5 100644 --- a/pkg/specgen/generate/namespaces.go +++ b/pkg/specgen/generate/namespaces.go @@ -3,7 +3,6 @@ package generate import ( "errors" "fmt" - "os" "strings" "github.com/containers/common/libimage" @@ -16,7 +15,6 @@ import ( "github.com/containers/podman/v4/pkg/specgen" "github.com/containers/podman/v4/pkg/util" spec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/runtime-tools/generate" "github.com/sirupsen/logrus" ) @@ -364,153 +362,6 @@ func namespaceOptions(s *specgen.SpecGenerator, rt *libpod.Runtime, pod *libpod. return toReturn, nil } -func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt *libpod.Runtime, pod *libpod.Pod) error { - // PID - switch s.PidNS.NSMode { - case specgen.Path: - if _, err := os.Stat(s.PidNS.Value); err != nil { - return fmt.Errorf("cannot find specified PID namespace path: %w", err) - } - if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), s.PidNS.Value); err != nil { - return err - } - case specgen.Host: - if err := g.RemoveLinuxNamespace(string(spec.PIDNamespace)); err != nil { - return err - } - case specgen.Private: - if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), ""); err != nil { - return err - } - } - - // IPC - switch s.IpcNS.NSMode { - case specgen.Path: - if _, err := os.Stat(s.IpcNS.Value); err != nil { - return fmt.Errorf("cannot find specified IPC namespace path: %w", err) - } - if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), s.IpcNS.Value); err != nil { - return err - } - case specgen.Host: - if err := g.RemoveLinuxNamespace(string(spec.IPCNamespace)); err != nil { - return err - } - case specgen.Private: - if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), ""); err != nil { - return err - } - } - - // UTS - switch s.UtsNS.NSMode { - case specgen.Path: - if _, err := os.Stat(s.UtsNS.Value); err != nil { - return fmt.Errorf("cannot find specified UTS namespace path: %w", err) - } - if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), s.UtsNS.Value); err != nil { - return err - } - case specgen.Host: - if err := g.RemoveLinuxNamespace(string(spec.UTSNamespace)); err != nil { - return err - } - case specgen.Private: - if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), ""); err != nil { - return err - } - } - - hostname := s.Hostname - if hostname == "" { - switch { - case s.UtsNS.NSMode == specgen.FromPod: - hostname = pod.Hostname() - case s.UtsNS.NSMode == specgen.FromContainer: - utsCtr, err := rt.LookupContainer(s.UtsNS.Value) - if err != nil { - return fmt.Errorf("error looking up container to share uts namespace with: %w", err) - } - hostname = utsCtr.Hostname() - case (s.NetNS.NSMode == specgen.Host && hostname == "") || s.UtsNS.NSMode == specgen.Host: - tmpHostname, err := os.Hostname() - if err != nil { - return fmt.Errorf("unable to retrieve hostname of the host: %w", err) - } - hostname = tmpHostname - default: - logrus.Debug("No hostname set; container's hostname will default to runtime default") - } - } - - g.RemoveHostname() - if s.Hostname != "" || s.UtsNS.NSMode != specgen.Host { - // Set the hostname in the OCI configuration only if specified by - // the user or if we are creating a new UTS namespace. - // TODO: Should we be doing this for pod or container shared - // namespaces? - g.SetHostname(hostname) - } - if _, ok := s.Env["HOSTNAME"]; !ok && s.Hostname != "" { - g.AddProcessEnv("HOSTNAME", hostname) - } - - // User - if _, err := specgen.SetupUserNS(s.IDMappings, s.UserNS, g); err != nil { - return err - } - - // Cgroup - switch s.CgroupNS.NSMode { - case specgen.Path: - if _, err := os.Stat(s.CgroupNS.Value); err != nil { - return fmt.Errorf("cannot find specified cgroup namespace path: %w", err) - } - if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), s.CgroupNS.Value); err != nil { - return err - } - case specgen.Host: - if err := g.RemoveLinuxNamespace(string(spec.CgroupNamespace)); err != nil { - return err - } - case specgen.Private: - if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), ""); err != nil { - return err - } - } - - // Net - switch s.NetNS.NSMode { - case specgen.Path: - if _, err := os.Stat(s.NetNS.Value); err != nil { - return fmt.Errorf("cannot find specified network namespace path: %w", err) - } - if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), s.NetNS.Value); err != nil { - return err - } - case specgen.Host: - if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { - return err - } - case specgen.Private, specgen.NoNetwork: - if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil { - return err - } - } - - if g.Config.Annotations == nil { - g.Config.Annotations = make(map[string]string) - } - if s.PublishExposedPorts { - g.Config.Annotations[define.InspectAnnotationPublishAll] = define.InspectResponseTrue - } else { - g.Config.Annotations[define.InspectAnnotationPublishAll] = define.InspectResponseFalse - } - - return nil -} - // GetNamespaceOptions transforms a slice of kernel namespaces // into a slice of pod create options. Currently, not all // kernel namespaces are supported, and they will be returned in an error diff --git a/pkg/specgen/generate/namespaces_freebsd.go b/pkg/specgen/generate/namespaces_freebsd.go new file mode 100644 index 0000000000..d821d9daa2 --- /dev/null +++ b/pkg/specgen/generate/namespaces_freebsd.go @@ -0,0 +1,51 @@ +package generate + +import ( + "fmt" + "os" + + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + "github.com/opencontainers/runtime-tools/generate" + "github.com/sirupsen/logrus" +) + +func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt *libpod.Runtime, pod *libpod.Pod) error { + // UTS + + hostname := s.Hostname + if hostname == "" { + switch { + case s.UtsNS.NSMode == specgen.FromPod: + hostname = pod.Hostname() + case s.UtsNS.NSMode == specgen.FromContainer: + utsCtr, err := rt.LookupContainer(s.UtsNS.Value) + if err != nil { + return fmt.Errorf("error looking up container to share uts namespace with: %w", err) + } + hostname = utsCtr.Hostname() + case (s.NetNS.NSMode == specgen.Host && hostname == "") || s.UtsNS.NSMode == specgen.Host: + tmpHostname, err := os.Hostname() + if err != nil { + return fmt.Errorf("unable to retrieve hostname of the host: %w", err) + } + hostname = tmpHostname + default: + logrus.Debug("No hostname set; container's hostname will default to runtime default") + } + } + + g.RemoveHostname() + if s.Hostname != "" || s.UtsNS.NSMode != specgen.Host { + // Set the hostname in the OCI configuration only if specified by + // the user or if we are creating a new UTS namespace. + // TODO: Should we be doing this for pod or container shared + // namespaces? + g.SetHostname(hostname) + } + if _, ok := s.Env["HOSTNAME"]; !ok && s.Hostname != "" { + g.AddProcessEnv("HOSTNAME", hostname) + } + + return nil +} diff --git a/pkg/specgen/generate/namespaces_linux.go b/pkg/specgen/generate/namespaces_linux.go new file mode 100644 index 0000000000..5c056e52c6 --- /dev/null +++ b/pkg/specgen/generate/namespaces_linux.go @@ -0,0 +1,160 @@ +package generate + +import ( + "fmt" + "os" + + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "github.com/sirupsen/logrus" +) + +func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt *libpod.Runtime, pod *libpod.Pod) error { + // PID + switch s.PidNS.NSMode { + case specgen.Path: + if _, err := os.Stat(s.PidNS.Value); err != nil { + return fmt.Errorf("cannot find specified PID namespace path: %w", err) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), s.PidNS.Value); err != nil { + return err + } + case specgen.Host: + if err := g.RemoveLinuxNamespace(string(spec.PIDNamespace)); err != nil { + return err + } + case specgen.Private: + if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), ""); err != nil { + return err + } + } + + // IPC + switch s.IpcNS.NSMode { + case specgen.Path: + if _, err := os.Stat(s.IpcNS.Value); err != nil { + return fmt.Errorf("cannot find specified IPC namespace path: %w", err) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), s.IpcNS.Value); err != nil { + return err + } + case specgen.Host: + if err := g.RemoveLinuxNamespace(string(spec.IPCNamespace)); err != nil { + return err + } + case specgen.Private: + if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), ""); err != nil { + return err + } + } + + // UTS + switch s.UtsNS.NSMode { + case specgen.Path: + if _, err := os.Stat(s.UtsNS.Value); err != nil { + return fmt.Errorf("cannot find specified UTS namespace path: %w", err) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), s.UtsNS.Value); err != nil { + return err + } + case specgen.Host: + if err := g.RemoveLinuxNamespace(string(spec.UTSNamespace)); err != nil { + return err + } + case specgen.Private: + if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), ""); err != nil { + return err + } + } + + hostname := s.Hostname + if hostname == "" { + switch { + case s.UtsNS.NSMode == specgen.FromPod: + hostname = pod.Hostname() + case s.UtsNS.NSMode == specgen.FromContainer: + utsCtr, err := rt.LookupContainer(s.UtsNS.Value) + if err != nil { + return fmt.Errorf("error looking up container to share uts namespace with: %w", err) + } + hostname = utsCtr.Hostname() + case (s.NetNS.NSMode == specgen.Host && hostname == "") || s.UtsNS.NSMode == specgen.Host: + tmpHostname, err := os.Hostname() + if err != nil { + return fmt.Errorf("unable to retrieve hostname of the host: %w", err) + } + hostname = tmpHostname + default: + logrus.Debug("No hostname set; container's hostname will default to runtime default") + } + } + + g.RemoveHostname() + if s.Hostname != "" || s.UtsNS.NSMode != specgen.Host { + // Set the hostname in the OCI configuration only if specified by + // the user or if we are creating a new UTS namespace. + // TODO: Should we be doing this for pod or container shared + // namespaces? + g.SetHostname(hostname) + } + if _, ok := s.Env["HOSTNAME"]; !ok && s.Hostname != "" { + g.AddProcessEnv("HOSTNAME", hostname) + } + + // User + if _, err := specgen.SetupUserNS(s.IDMappings, s.UserNS, g); err != nil { + return err + } + + // Cgroup + switch s.CgroupNS.NSMode { + case specgen.Path: + if _, err := os.Stat(s.CgroupNS.Value); err != nil { + return fmt.Errorf("cannot find specified cgroup namespace path: %w", err) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), s.CgroupNS.Value); err != nil { + return err + } + case specgen.Host: + if err := g.RemoveLinuxNamespace(string(spec.CgroupNamespace)); err != nil { + return err + } + case specgen.Private: + if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), ""); err != nil { + return err + } + } + + // Net + switch s.NetNS.NSMode { + case specgen.Path: + if _, err := os.Stat(s.NetNS.Value); err != nil { + return fmt.Errorf("cannot find specified network namespace path: %w", err) + } + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), s.NetNS.Value); err != nil { + return err + } + case specgen.Host: + if err := g.RemoveLinuxNamespace(string(spec.NetworkNamespace)); err != nil { + return err + } + case specgen.Private, specgen.NoNetwork: + if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil { + return err + } + } + + if g.Config.Annotations == nil { + g.Config.Annotations = make(map[string]string) + } + if s.PublishExposedPorts { + g.Config.Annotations[define.InspectAnnotationPublishAll] = define.InspectResponseTrue + } else { + g.Config.Annotations[define.InspectAnnotationPublishAll] = define.InspectResponseFalse + } + + return nil +} diff --git a/pkg/specgen/generate/namespaces_unsupported.go b/pkg/specgen/generate/namespaces_unsupported.go new file mode 100644 index 0000000000..c4a9c22d8f --- /dev/null +++ b/pkg/specgen/generate/namespaces_unsupported.go @@ -0,0 +1,16 @@ +//go:build !linux && !freebsd +// +build !linux,!freebsd + +package generate + +import ( + "errors" + + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + "github.com/opencontainers/runtime-tools/generate" +) + +func specConfigureNamespaces(s *specgen.SpecGenerator, g *generate.Generator, rt *libpod.Runtime, pod *libpod.Pod) error { + return errors.New("unsupported specConfigureNamespaces") +} diff --git a/pkg/specgen/generate/oci.go b/pkg/specgen/generate/oci.go index a531494c99..3ac1a9b3fe 100644 --- a/pkg/specgen/generate/oci.go +++ b/pkg/specgen/generate/oci.go @@ -1,37 +1,19 @@ package generate import ( - "context" - "encoding/json" "fmt" - "path" "strings" "github.com/containers/common/libimage" - "github.com/containers/common/pkg/cgroups" "github.com/containers/common/pkg/config" - "github.com/containers/podman/v4/libpod" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/specgen" - spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) -func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { - if s.ProcOpts == nil { - return - } - for i := range g.Config.Mounts { - if g.Config.Mounts[i].Destination == "/proc" { - g.Config.Mounts[i].Options = s.ProcOpts - return - } - } -} - func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) { var ( isRootless = rootless.IsRootless() @@ -133,302 +115,3 @@ func makeCommand(s *specgen.SpecGenerator, imageData *libimage.ImageData, rtc *c return finalCommand, nil } - -// canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container -func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool { - if s.NetNS.IsHost() && (isRootless || isNewUserns) { - return false - } - if isNewUserns { - switch s.NetNS.NSMode { - case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge: - return true - default: - return false - } - } - return true -} - -func getCgroupPermissons(unmask []string) string { - ro := "ro" - rw := "rw" - cgroup := "/sys/fs/cgroup" - - cgroupv2, _ := cgroups.IsCgroup2UnifiedMode() - if !cgroupv2 { - return ro - } - - if unmask != nil && unmask[0] == "ALL" { - return rw - } - - for _, p := range unmask { - if path.Clean(p) == cgroup { - return rw - } - } - return ro -} - -// SpecGenToOCI returns the base configuration for the container. -func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { - cgroupPerm := getCgroupPermissons(s.Unmask) - - g, err := generate.New("linux") - if err != nil { - return nil, err - } - // Remove the default /dev/shm mount to ensure we overwrite it - g.RemoveMount("/dev/shm") - g.HostSpecific = true - addCgroup := true - - isRootless := rootless.IsRootless() - isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate() - - canMountSys := canMountSys(isRootless, isNewUserns, s) - - if s.Privileged && canMountSys { - cgroupPerm = "rw" - g.RemoveMount("/sys") - sysMnt := spec.Mount{ - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, - } - g.AddMount(sysMnt) - } - if !canMountSys { - addCgroup = false - g.RemoveMount("/sys") - r := "ro" - if s.Privileged { - r = "rw" - } - sysMnt := spec.Mount{ - Destination: "/sys", - Type: "bind", // should we use a constant for this, like createconfig? - Source: "/sys", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, - } - g.AddMount(sysMnt) - if !s.Privileged && isRootless { - g.AddLinuxMaskedPaths("/sys/kernel") - } - } - gid5Available := true - if isRootless { - nGids, err := rootless.GetAvailableGids() - if err != nil { - return nil, err - } - gid5Available = nGids >= 5 - } - // When using a different user namespace, check that the GID 5 is mapped inside - // the container. - if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { - mappingFound := false - for _, r := range s.IDMappings.GIDMap { - if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { - mappingFound = true - break - } - } - if !mappingFound { - gid5Available = false - } - } - if !gid5Available { - // If we have no GID mappings, the gid=5 default option would fail, so drop it. - g.RemoveMount("/dev/pts") - devPts := spec.Mount{ - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, - } - g.AddMount(devPts) - } - - inUserNS := isRootless || isNewUserns - - if inUserNS && s.IpcNS.IsHost() { - g.RemoveMount("/dev/mqueue") - devMqueue := spec.Mount{ - Destination: "/dev/mqueue", - Type: "bind", // constant ? - Source: "/dev/mqueue", - Options: []string{"bind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(devMqueue) - } - if inUserNS && s.PidNS.IsHost() { - g.RemoveMount("/proc") - procMount := spec.Mount{ - Destination: "/proc", - Type: define.TypeBind, - Source: "/proc", - Options: []string{"rbind", "nosuid", "noexec", "nodev"}, - } - g.AddMount(procMount) - } - - if addCgroup { - cgroupMnt := spec.Mount{ - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, - } - g.AddMount(cgroupMnt) - } - - g.Config.Linux.Personality = s.Personality - - g.SetProcessCwd(s.WorkDir) - - g.SetProcessArgs(finalCmd) - - g.SetProcessTerminal(s.Terminal) - - for key, val := range s.Annotations { - g.AddAnnotation(key, val) - } - - if s.ResourceLimits != nil { - out, err := json.Marshal(s.ResourceLimits) - if err != nil { - return nil, err - } - err = json.Unmarshal(out, g.Config.Linux.Resources) - if err != nil { - return nil, err - } - g.Config.Linux.Resources = s.ResourceLimits - } - - weightDevices, err := WeightDevices(s.WeightDevice) - if err != nil { - return nil, err - } - if len(weightDevices) > 0 { - for _, dev := range weightDevices { - g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight) - } - } - - // Devices - // set the default rule at the beginning of device configuration - if !inUserNS && !s.Privileged { - g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm") - } - - var userDevices []spec.LinuxDevice - - if !s.Privileged { - // add default devices from containers.conf - for _, device := range rtc.Containers.Devices { - if err = DevicesFromPath(&g, device); err != nil { - return nil, err - } - } - if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 { - userDevices = compatibleOptions.HostDeviceList - } else { - userDevices = s.Devices - } - // add default devices specified by caller - for _, device := range userDevices { - if err = DevicesFromPath(&g, device.Path); err != nil { - return nil, err - } - } - } - s.HostDeviceList = userDevices - - // set the devices cgroup when not running in a user namespace - if !inUserNS && !s.Privileged { - for _, dev := range s.DeviceCgroupRule { - g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access) - } - } - - BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) - - g.ClearProcessEnv() - for name, val := range s.Env { - g.AddProcessEnv(name, val) - } - - addRlimits(s, &g) - - // NAMESPACES - if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { - return nil, err - } - configSpec := g.Config - - if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { - return nil, err - } - - // BIND MOUNTS - configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) - // Process mounts to ensure correct options - if err := InitFSMounts(configSpec.Mounts); err != nil { - return nil, err - } - - // Add annotations - if configSpec.Annotations == nil { - configSpec.Annotations = make(map[string]string) - } - - if s.Remove { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse - } - - if len(s.VolumesFrom) > 0 { - configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") - } - - if s.Privileged { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse - } - - if s.Init { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue - } else { - configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse - } - - if s.OOMScoreAdj != nil { - g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) - } - setProcOpts(s, &g) - - return configSpec, nil -} - -func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { - devs := []spec.LinuxWeightDevice{} - for k, v := range wtDevices { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) - } - dev := new(spec.LinuxWeightDevice) - dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - dev.Weight = v.Weight - devs = append(devs, *dev) - } - return devs, nil -} diff --git a/pkg/specgen/generate/oci_freebsd.go b/pkg/specgen/generate/oci_freebsd.go new file mode 100644 index 0000000000..71c926fd2b --- /dev/null +++ b/pkg/specgen/generate/oci_freebsd.go @@ -0,0 +1,96 @@ +//go:build freebsd + +package generate + +import ( + "context" + "strings" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" +) + +// SpecGenToOCI returns the base configuration for the container. +func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { + g, err := generate.New("freebsd") + if err != nil { + return nil, err + } + + g.SetProcessCwd(s.WorkDir) + + g.SetProcessArgs(finalCmd) + + g.SetProcessTerminal(s.Terminal) + + for key, val := range s.Annotations { + g.AddAnnotation(key, val) + } + + g.ClearProcessEnv() + for name, val := range s.Env { + g.AddProcessEnv(name, val) + } + + addRlimits(s, &g) + + // NAMESPACES + if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { + return nil, err + } + configSpec := g.Config + + if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { + return nil, err + } + + // BIND MOUNTS + configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) + // Process mounts to ensure correct options + if err := InitFSMounts(configSpec.Mounts); err != nil { + return nil, err + } + + // Add annotations + if configSpec.Annotations == nil { + configSpec.Annotations = make(map[string]string) + } + + if s.Remove { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse + } + + if len(s.VolumesFrom) > 0 { + configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") + } + + if s.Privileged { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse + } + + if s.Init { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse + } + + if s.OOMScoreAdj != nil { + g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) + } + + return configSpec, nil +} + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + devs := []spec.LinuxWeightDevice{} + return devs, nil +} diff --git a/pkg/specgen/generate/oci_linux.go b/pkg/specgen/generate/oci_linux.go new file mode 100644 index 0000000000..341853de5f --- /dev/null +++ b/pkg/specgen/generate/oci_linux.go @@ -0,0 +1,331 @@ +package generate + +import ( + "context" + "encoding/json" + "fmt" + "path" + "strings" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/cgroups" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/libpod/define" + "github.com/containers/podman/v4/pkg/rootless" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-tools/generate" + "golang.org/x/sys/unix" +) + +func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { + if s.ProcOpts == nil { + return + } + for i := range g.Config.Mounts { + if g.Config.Mounts[i].Destination == "/proc" { + g.Config.Mounts[i].Options = s.ProcOpts + return + } + } +} + +// canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container +func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool { + if s.NetNS.IsHost() && (isRootless || isNewUserns) { + return false + } + if isNewUserns { + switch s.NetNS.NSMode { + case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge: + return true + default: + return false + } + } + return true +} + +func getCgroupPermissons(unmask []string) string { + ro := "ro" + rw := "rw" + cgroup := "/sys/fs/cgroup" + + cgroupv2, _ := cgroups.IsCgroup2UnifiedMode() + if !cgroupv2 { + return ro + } + + if unmask != nil && unmask[0] == "ALL" { + return rw + } + + for _, p := range unmask { + if path.Clean(p) == cgroup { + return rw + } + } + return ro +} + +// SpecGenToOCI returns the base configuration for the container. +func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { + cgroupPerm := getCgroupPermissons(s.Unmask) + + g, err := generate.New("linux") + if err != nil { + return nil, err + } + // Remove the default /dev/shm mount to ensure we overwrite it + g.RemoveMount("/dev/shm") + g.HostSpecific = true + addCgroup := true + + isRootless := rootless.IsRootless() + isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate() + + canMountSys := canMountSys(isRootless, isNewUserns, s) + + if s.Privileged && canMountSys { + cgroupPerm = "rw" + g.RemoveMount("/sys") + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, + } + g.AddMount(sysMnt) + } + if !canMountSys { + addCgroup = false + g.RemoveMount("/sys") + r := "ro" + if s.Privileged { + r = "rw" + } + sysMnt := spec.Mount{ + Destination: "/sys", + Type: "bind", // should we use a constant for this, like createconfig? + Source: "/sys", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, + } + g.AddMount(sysMnt) + if !s.Privileged && isRootless { + g.AddLinuxMaskedPaths("/sys/kernel") + } + } + gid5Available := true + if isRootless { + nGids, err := rootless.GetAvailableGids() + if err != nil { + return nil, err + } + gid5Available = nGids >= 5 + } + // When using a different user namespace, check that the GID 5 is mapped inside + // the container. + if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { + mappingFound := false + for _, r := range s.IDMappings.GIDMap { + if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { + mappingFound = true + break + } + } + if !mappingFound { + gid5Available = false + } + } + if !gid5Available { + // If we have no GID mappings, the gid=5 default option would fail, so drop it. + g.RemoveMount("/dev/pts") + devPts := spec.Mount{ + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, + } + g.AddMount(devPts) + } + + inUserNS := isRootless || isNewUserns + + if inUserNS && s.IpcNS.IsHost() { + g.RemoveMount("/dev/mqueue") + devMqueue := spec.Mount{ + Destination: "/dev/mqueue", + Type: "bind", // constant ? + Source: "/dev/mqueue", + Options: []string{"bind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(devMqueue) + } + if inUserNS && s.PidNS.IsHost() { + g.RemoveMount("/proc") + procMount := spec.Mount{ + Destination: "/proc", + Type: define.TypeBind, + Source: "/proc", + Options: []string{"rbind", "nosuid", "noexec", "nodev"}, + } + g.AddMount(procMount) + } + + if addCgroup { + cgroupMnt := spec.Mount{ + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, + } + g.AddMount(cgroupMnt) + } + + g.Config.Linux.Personality = s.Personality + + g.SetProcessCwd(s.WorkDir) + + g.SetProcessArgs(finalCmd) + + g.SetProcessTerminal(s.Terminal) + + for key, val := range s.Annotations { + g.AddAnnotation(key, val) + } + + if s.ResourceLimits != nil { + out, err := json.Marshal(s.ResourceLimits) + if err != nil { + return nil, err + } + err = json.Unmarshal(out, g.Config.Linux.Resources) + if err != nil { + return nil, err + } + g.Config.Linux.Resources = s.ResourceLimits + } + + weightDevices, err := WeightDevices(s.WeightDevice) + if err != nil { + return nil, err + } + if len(weightDevices) > 0 { + for _, dev := range weightDevices { + g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight) + } + } + + // Devices + // set the default rule at the beginning of device configuration + if !inUserNS && !s.Privileged { + g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm") + } + + var userDevices []spec.LinuxDevice + + if !s.Privileged { + // add default devices from containers.conf + for _, device := range rtc.Containers.Devices { + if err = DevicesFromPath(&g, device); err != nil { + return nil, err + } + } + if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 { + userDevices = compatibleOptions.HostDeviceList + } else { + userDevices = s.Devices + } + // add default devices specified by caller + for _, device := range userDevices { + if err = DevicesFromPath(&g, device.Path); err != nil { + return nil, err + } + } + } + s.HostDeviceList = userDevices + + // set the devices cgroup when not running in a user namespace + if !inUserNS && !s.Privileged { + for _, dev := range s.DeviceCgroupRule { + g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access) + } + } + + BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) + + g.ClearProcessEnv() + for name, val := range s.Env { + g.AddProcessEnv(name, val) + } + + addRlimits(s, &g) + + // NAMESPACES + if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { + return nil, err + } + configSpec := g.Config + + if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { + return nil, err + } + + // BIND MOUNTS + configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) + // Process mounts to ensure correct options + if err := InitFSMounts(configSpec.Mounts); err != nil { + return nil, err + } + + // Add annotations + if configSpec.Annotations == nil { + configSpec.Annotations = make(map[string]string) + } + + if s.Remove { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse + } + + if len(s.VolumesFrom) > 0 { + configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") + } + + if s.Privileged { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse + } + + if s.Init { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue + } else { + configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse + } + + if s.OOMScoreAdj != nil { + g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) + } + setProcOpts(s, &g) + + return configSpec, nil +} + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + devs := []spec.LinuxWeightDevice{} + for k, v := range wtDevices { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) + } + dev := new(spec.LinuxWeightDevice) + dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + dev.Weight = v.Weight + devs = append(devs, *dev) + } + return devs, nil +} diff --git a/pkg/specgen/generate/oci_unsupported.go b/pkg/specgen/generate/oci_unsupported.go new file mode 100644 index 0000000000..7e1b8c42ca --- /dev/null +++ b/pkg/specgen/generate/oci_unsupported.go @@ -0,0 +1,24 @@ +//go:build !linux && !freebsd +// +build !linux,!freebsd + +package generate + +import ( + "context" + "errors" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + spec "github.com/opencontainers/runtime-spec/specs-go" +) + +// SpecGenToOCI returns the base configuration for the container. +func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { + return nil, errors.New("unsupported SpecGenToOCI") +} + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + return []spec.LinuxWeightDevice{}, errors.New("unsupported WeightDevices") +} diff --git a/pkg/specgen/generate/security_freebsd.go b/pkg/specgen/generate/security_freebsd.go new file mode 100644 index 0000000000..5fd66c7695 --- /dev/null +++ b/pkg/specgen/generate/security_freebsd.go @@ -0,0 +1,19 @@ +package generate + +import ( + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + "github.com/opencontainers/runtime-tools/generate" +) + +// setLabelOpts sets the label options of the SecurityConfig according to the +// input. +func setLabelOpts(s *specgen.SpecGenerator, runtime *libpod.Runtime, pidConfig specgen.Namespace, ipcConfig specgen.Namespace) error { + return nil +} + +func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, newImage *libimage.Image, rtc *config.Config) error { + return nil +} diff --git a/pkg/specgen/generate/security.go b/pkg/specgen/generate/security_linux.go similarity index 100% rename from pkg/specgen/generate/security.go rename to pkg/specgen/generate/security_linux.go diff --git a/pkg/specgen/generate/security_unsupported.go b/pkg/specgen/generate/security_unsupported.go new file mode 100644 index 0000000000..d0f937e444 --- /dev/null +++ b/pkg/specgen/generate/security_unsupported.go @@ -0,0 +1,24 @@ +//go:build !linux && !freebsd +// +build !linux,!freebsd + +package generate + +import ( + "errors" + + "github.com/containers/common/libimage" + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v4/libpod" + "github.com/containers/podman/v4/pkg/specgen" + "github.com/opencontainers/runtime-tools/generate" +) + +// setLabelOpts sets the label options of the SecurityConfig according to the +// input. +func setLabelOpts(s *specgen.SpecGenerator, runtime *libpod.Runtime, pidConfig specgen.Namespace, ipcConfig specgen.Namespace) error { + return errors.New("unsupported setLabelOpts") +} + +func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, newImage *libimage.Image, rtc *config.Config) error { + return errors.New("unsupported securityConfigureGenerator") +}