Skip to content

Commit

Permalink
Pod Volumes Support
Browse files Browse the repository at this point in the history
added support for the --volume flag in pods using the new infra container design.
users can specify all volume options they can with regular containers

resolves containers#10379

Signed-off-by: cdoern <[email protected]>
  • Loading branch information
cdoern committed Sep 3, 2021
1 parent f5cfb0d commit 52bcf71
Show file tree
Hide file tree
Showing 11 changed files with 254 additions and 25 deletions.
24 changes: 12 additions & 12 deletions cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,18 +660,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
)
_ = cmd.RegisterFlagCompletionFunc(mountFlagName, AutocompleteMountFlag)

volumeDesciption := "Bind mount a volume into the container"
if registry.IsRemote() {
volumeDesciption = "Bind mount a volume into the container. Volume src will be on the server machine, not the client"
}
volumeFlagName := "volume"
createFlags.StringArrayVarP(
&cf.Volume,
volumeFlagName, "v", volumes(),
volumeDesciption,
)
_ = cmd.RegisterFlagCompletionFunc(volumeFlagName, AutocompleteVolumeFlag)

volumesFromFlagName := "volumes-from"
createFlags.StringArrayVar(
&cf.VolumesFrom,
Expand Down Expand Up @@ -865,4 +853,16 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
"PID namespace to use",
)
_ = cmd.RegisterFlagCompletionFunc(pidFlagName, AutocompleteNamespace)

volumeDesciption := "Bind mount a volume into the container"
if registry.IsRemote() {
volumeDesciption = "Bind mount a volume into the container. Volume source will be on the server machine, not the client"
}
volumeFlagName := "volume"
createFlags.StringArrayVarP(
&cf.Volume,
volumeFlagName, "v", volumes(),
volumeDesciption,
)
_ = cmd.RegisterFlagCompletionFunc(volumeFlagName, AutocompleteVolumeFlag)
}
17 changes: 11 additions & 6 deletions cmd/podman/pods/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,7 @@ func create(cmd *cobra.Command, args []string) error {
createOptions.Share = nil
} else {
// reassign certain optios for lbpod api, these need to be populated in spec
createOptions.InfraConmonPidFile = infraOptions.ConmonPIDFile
createOptions.InfraName = infraOptions.Name
createOptions.Hostname = infraOptions.Hostname
createOptions.Cpus = infraOptions.CPUS
createOptions.CpusetCpus = infraOptions.CPUSetCPUs
createOptions.Pid = infraOptions.PID
MapOptions()
flags := cmd.Flags()
infraOptions.Net, err = common.NetFlagsToNetOptions(nil, *flags, false)
if err != nil {
Expand Down Expand Up @@ -291,3 +286,13 @@ func replacePod(name string) error {
}
return removePods([]string{name}, rmOptions, false)
}

func MapOptions() {
createOptions.Cpus = infraOptions.CPUS
createOptions.CpusetCpus = infraOptions.CPUSetCPUs
createOptions.Hostname = infraOptions.Hostname
createOptions.InfraConmonPidFile = infraOptions.ConmonPIDFile
createOptions.InfraName = infraOptions.Name
createOptions.Pid = infraOptions.PID
createOptions.Volume = infraOptions.Volume
}
162 changes: 162 additions & 0 deletions docs/source/markdown/podman-pod-create.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,168 @@ Valid _mode_ values are:
- *host*: run in the user namespace of the caller. The processes running in the container will have the same privileges on the host as any other process launched by the calling user (default).
- *keep-id*: creates a user namespace where the current rootless user's UID:GID are mapped to the same values in the container. This option is ignored for containers created by the root user.

#### **--volume**, **-v**[=*[[SOURCE-VOLUME|HOST-DIR:]CONTAINER-DIR[:OPTIONS]]*]

Create a bind mount. If you specify, ` -v /HOST-DIR:/CONTAINER-DIR`, Podman
bind mounts `/HOST-DIR` in the host to `/CONTAINER-DIR` in the Podman
container. Similarly, `-v SOURCE-VOLUME:/CONTAINER-DIR` will mount the volume
in the host to the container. If no such named volume exists, Podman will
create one. The `OPTIONS` are a comma-separated list and can be: <sup>[[1]](#Footnote1)</sup> (Note when using the remote client, the volumes will be mounted from the remote server, not necessarily the client machine.)

The _options_ is a comma-separated list and can be:

* **rw**|**ro**
* **z**|**Z**
* [**r**]**shared**|[**r**]**slave**|[**r**]**private**[**r**]**unbindable**
* [**r**]**bind**
* [**no**]**exec**
* [**no**]**dev**
* [**no**]**suid**
* [**O**]
* [**U**]

The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The volume
will be mounted into the container at this directory.

Volumes may specify a source as well, as either a directory on the host
or the name of a named volume. If no source is given, the volume will be created as an
anonymously named volume with a randomly generated name, and will be removed when
the pod is removed via the `--rm` flag or `podman rm --volumes` commands.

If a volume source is specified, it must be a path on the host or the name of a
named volume. Host paths are allowed to be absolute or relative; relative paths
are resolved relative to the directory Podman is run in. If the source does not
exist, Podman will return an error. Users must pre-create the source files or
directories.

Any source that does not begin with a `.` or `/` will be treated as the name of
a named volume. If a volume with that name does not exist, it will be created.
Volumes created with names are not anonymous, and they are not removed by the `--rm`
option and the `podman rm --volumes` command.

You can specify multiple **-v** options to mount one or more volumes into a
pod.

`Write Protected Volume Mounts`

You can add `:ro` or `:rw` suffix to a volume to mount it read-only or
read-write mode, respectively. By default, the volumes are mounted read-write.
See examples.

`Chowning Volume Mounts`

By default, Podman does not change the owner and group of source volume
directories mounted into containers. If a pod is created in a new user
namespace, the UID and GID in the container may correspond to another UID and
GID on the host.

The `:U` suffix tells Podman to use the correct host UID and GID based on the
UID and GID within the pod, to change recursively the owner and group of
the source volume.

**Warning** use with caution since this will modify the host filesystem.

`Labeling Volume Mounts`

Labeling systems like SELinux require that proper labels are placed on volume
content mounted into a pod. Without a label, the security system might
prevent the processes running inside the pod from using the content. By
default, Podman does not change the labels set by the OS.

To change a label in the pod context, you can add either of two suffixes
`:z` or `:Z` to the volume mount. These suffixes tell Podman to relabel file
objects on the shared volumes. The `z` option tells Podman that two pods
share the volume content. As a result, Podman labels the content with a shared
content label. Shared volume labels allow all containers to read/write content.
The `Z` option tells Podman to label the content with a private unshared label.
Only the current pod can use a private volume.

`Overlay Volume Mounts`

The `:O` flag tells Podman to mount the directory from the host as a
temporary storage using the `overlay file system`. The pod processes
can modify content within the mountpoint which is stored in the
container storage in a separate directory. In overlay terms, the source
directory will be the lower, and the container storage directory will be the
upper. Modifications to the mount point are destroyed when the pod
finishes executing, similar to a tmpfs mount point being unmounted.

Subsequent executions of the container will see the original source directory
content, any changes from previous pod executions no longer exist.

One use case of the overlay mount is sharing the package cache from the
host into the container to allow speeding up builds.

Note:

- The `O` flag conflicts with other options listed above.
Content mounted into the container is labeled with the private label.
On SELinux systems, labels in the source directory must be readable
by the infra container label. Usually containers can read/execute `container_share_t`
and can read/write `container_file_t`. If you cannot change the labels on a
source volume, SELinux container separation must be disabled for the infra container/pod
to work.
- The source directory mounted into the pod with an overlay mount
should not be modified, it can cause unexpected failures. It is recommended
that you do not modify the directory until the container finishes running.

`Mounts propagation`

By default bind mounted volumes are `private`. That means any mounts done
inside pod will not be visible on host and vice versa. One can change
this behavior by specifying a volume mount propagation property. Making a
volume `shared` mounts done under that volume inside pod will be
visible on host and vice versa. Making a volume `slave` enables only one
way mount propagation and that is mounts done on host under that volume
will be visible inside container but not the other way around. <sup>[[1]](#Footnote1)</sup>

To control mount propagation property of a volume one can use the [**r**]**shared**,
[**r**]**slave**, [**r**]**private** or the [**r**]**unbindable** propagation flag.
Propagation property can be specified only for bind mounted volumes and not for
internal volumes or named volumes. For mount propagation to work the source mount
point (the mount point where source dir is mounted on) has to have the right propagation
properties. For shared volumes, the source mount point has to be shared. And for
slave volumes, the source mount point has to be either shared or slave.
<sup>[[1]](#Footnote1)</sup>

If you want to recursively mount a volume and all of its submounts into a
pod, then you can use the `rbind` option. By default the bind option is
used, and submounts of the source directory will not be mounted into the
pod.

Mounting the volume with the `nosuid` options means that SUID applications on
the volume will not be able to change their privilege. By default volumes
are mounted with `nosuid`.

Mounting the volume with the noexec option means that no executables on the
volume will be able to executed within the pod.

Mounting the volume with the nodev option means that no devices on the volume
will be able to be used by processes within the pod. By default volumes
are mounted with `nodev`.

If the `<source-dir>` is a mount point, then "dev", "suid", and "exec" options are
ignored by the kernel.

Use `df <source-dir>` to figure out the source mount and then use
`findmnt -o TARGET,PROPAGATION <source-mount-dir>` to figure out propagation
properties of source mount. If `findmnt` utility is not available, then one
can look at the mount entry for the source mount point in `/proc/self/mountinfo`. Look
at `optional fields` and see if any propagation properties are specified.
`shared:X` means mount is `shared`, `master:X` means mount is `slave` and if
nothing is there that means mount is `private`. <sup>[[1]](#Footnote1)</sup>

To change propagation properties of a mount point use `mount` command. For
example, if one wants to bind mount source directory `/foo` one can do
`mount --bind /foo /foo` and `mount --make-private --make-shared /foo`. This
will convert /foo into a `shared` mount point. Alternatively one can directly
change propagation properties of source mount. Say `/` is source mount for
`/foo`, then use `mount --make-shared /` to convert `/` into a `shared` mount.

Note: if the user only has access rights via a group, accessing the volume
from inside a rootless pod will fail.


## EXAMPLES

```
Expand Down
4 changes: 2 additions & 2 deletions libpod/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
}

namedVolumes, mounts := c.sortUserVolumes(ctrSpec)
inspectMounts, err := c.getInspectMounts(namedVolumes, c.config.ImageVolumes, mounts)
inspectMounts, err := c.GetInspectMounts(namedVolumes, c.config.ImageVolumes, mounts)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -193,7 +193,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
// Get inspect-formatted mounts list.
// Only includes user-specified mounts. Only includes bind mounts and named
// volumes, not tmpfs volumes.
func (c *Container) getInspectMounts(namedVolumes []*ContainerNamedVolume, imageVolumes []*ContainerImageVolume, mounts []spec.Mount) ([]define.InspectMount, error) {
func (c *Container) GetInspectMounts(namedVolumes []*ContainerNamedVolume, imageVolumes []*ContainerImageVolume, mounts []spec.Mount) ([]define.InspectMount, error) {
inspectMounts := []define.InspectMount{}

// No mounts, return early
Expand Down
2 changes: 2 additions & 0 deletions libpod/define/pod_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ type InspectPodData struct {
CPUQuota int64 `json:"cpu_quota,omitempty"`
// CPUSetCPUs contains linux specific CPU data for the pod
CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
// Mounts contains volume related information for the pod
Mounts []InspectMount `json:"mounts,omitempty"`
}

// InspectPodInfraConfig contains the configuration of the pod's infra
Expand Down
7 changes: 7 additions & 0 deletions libpod/pod_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
// Infra config contains detailed information on the pod's infra
// container.
var infraConfig *define.InspectPodInfraConfig
var inspectMounts []define.InspectMount
if p.state.InfraContainerID != "" {
infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
if err != nil {
Expand All @@ -597,6 +598,11 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
infraConfig.CPUSetCPUs = p.ResourceLim().CPU.Cpus
infraConfig.PidNS = p.PidMode()
infraConfig.UserNS = p.UserNSMode()
namedVolumes, mounts := infra.sortUserVolumes(infra.Config().Spec)
inspectMounts, err = infra.GetInspectMounts(namedVolumes, infra.config.ImageVolumes, mounts)
if err != nil {
return nil, err
}

if len(infra.Config().ContainerNetworkConfig.DNSServer) > 0 {
infraConfig.DNSServer = make([]string, 0, len(infra.Config().ContainerNetworkConfig.DNSServer))
Expand Down Expand Up @@ -645,6 +651,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
CPUSetCPUs: p.ResourceLim().CPU.Cpus,
CPUPeriod: p.CPUPeriod(),
CPUQuota: p.CPUQuota(),
Mounts: inspectMounts,
}

return &inspectData, nil
Expand Down
9 changes: 5 additions & 4 deletions pkg/api/handlers/libpod/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ func PodCreate(w http.ResponseWriter, r *http.Request) {
}
if !psg.NoInfra {
infraOptions := &entities.ContainerCreateOptions{ImageVolume: "bind", IsInfra: true, Net: &entities.NetOptions{}} // options for pulling the image and FillOutSpec
err = specgenutil.FillOutSpecGen(psg.InfraContainerSpec, infraOptions, []string{}) // necessary for default values in many cases (userns, idmappings)
if len(psg.Volume) > 0 {
infraOptions.Volume = psg.Volume
}
err = specgenutil.FillOutSpecGen(psg.InfraContainerSpec, infraOptions, []string{}) // necessary for default values in many cases (userns, idmappings)
if err != nil {
utils.Error(w, "Something went wrong.", http.StatusInternalServerError, errors.Wrap(err, "error filling out specgen"))
return
Expand All @@ -51,13 +54,11 @@ func PodCreate(w http.ResponseWriter, r *http.Request) {
utils.Error(w, "Something went wrong.", http.StatusInternalServerError, errors.Wrap(err, "failed to decode specgen"))
return
}
tempSpec := &specgen.SpecGenerator{} // temporary spec since infra cannot be decoded into
err = json.Unmarshal(out, tempSpec) // unmarhal matching options
err = json.Unmarshal(out, psg.InfraContainerSpec) // unmarhal matching options
if err != nil {
utils.Error(w, "Something went wrong.", http.StatusInternalServerError, errors.Wrap(err, "failed to decode specgen"))
return
}
psg.InfraContainerSpec = tempSpec // set infra spec equal to temp
// a few extra that do not have the same json tags
psg.InfraContainerSpec.Name = psg.InfraName
psg.InfraContainerSpec.ConmonPidFile = psg.InfraConmonPidFile
Expand Down
4 changes: 4 additions & 0 deletions pkg/domain/entities/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ type PodCreateOptions struct {
Cpus float64
CpusetCpus string
Userns specgen.Namespace
Volume []string
}

type ContainerCreateOptions struct {
Expand Down Expand Up @@ -282,6 +283,9 @@ func ToPodSpecGen(s specgen.PodSpecGenerator, p *PodCreateOptions) (*specgen.Pod
if err != nil {
return nil, err
}
if len(p.Volume) > 0 {
s.Volume = p.Volume
}
s.Pid = out
s.Hostname = p.Hostname
s.Labels = p.Labels
Expand Down
14 changes: 13 additions & 1 deletion pkg/specgen/generate/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,27 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener
return nil, nil, nil, err
}

// If joining a pod, retrieve the pod for use.
// If joining a pod, retrieve the pod for use, and its infra container
var pod *libpod.Pod
var cont *libpod.Container
var config *libpod.ContainerConfig
if s.Pod != "" {
pod, err = rt.LookupPod(s.Pod)
if err != nil {
return nil, nil, nil, errors.Wrapf(err, "error retrieving pod %s", s.Pod)
}
if pod.HasInfraContainer() {
cont, err = pod.InfraContainer()
if err != nil {
return nil, nil, nil, err
}
config = cont.Config()
}
}

if config != nil && (len(config.NamedVolumes) > 0 || len(config.UserVolumes) > 0 || len(config.ImageVolumes) > 0 || len(config.OverlayVolumes) > 0) {
s.VolumesFrom = append(s.VolumesFrom, config.ID)
}
// Set defaults for unset namespaces
if s.PidNS.IsDefault() {
defaultNS, err := GetDefaultNamespaceMode("pid", rtc, pod)
Expand Down
2 changes: 2 additions & 0 deletions pkg/specgen/podspecgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ type PodNetworkConfig struct {
// NetworkOptions are additional options for each network
// Optional.
NetworkOptions map[string][]string `json:"network_options,omitempty"`
// Volume contains the user specified volumes to be bind mounted into the container
Volume []string `json:"pod_volumes,omitempty"`
}

// PodCgroupConfig contains configuration options about a pod's cgroups.
Expand Down
Loading

0 comments on commit 52bcf71

Please sign in to comment.