Skip to content

Commit

Permalink
k8systemd: run k8s workloads in systemd
Browse files Browse the repository at this point in the history
Support running `podman play kube` in systemd by exploiting the
previously added "service containers".  During `play kube`, a service
container is started before all the pods and containers, and is stopped
last.  The service container communicates its conmon PID via sdnotify.

Add a new systemd template to dispatch such k8s workloads.  The argument
of the template is the path to the k8s file.  Note that the path must be
escaped for systemd not to bark:

Let's assume we have a `top.yaml` file in the home directory:
```
$ escaped=$(systemd-escape ~/top.yaml)
$ systemctl --user start podman-play-kube@$escaped.service
```

Closes: https://issues.redhat.com/browse/RUN-1287
Signed-off-by: Valentin Rothberg <[email protected]>
  • Loading branch information
vrothberg committed May 17, 2022
1 parent eb26fa4 commit 8684d41
Show file tree
Hide file tree
Showing 14 changed files with 248 additions and 30 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/build/
/conmon/
contrib/spec/podman.spec
contrib/systemd/*/*.service
*.coverprofile
coverprofile
/.coverage
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,8 @@ install.docker-full: install.docker install.docker-docs
ifneq (,$(findstring systemd,$(BUILDTAGS)))
PODMAN_UNIT_FILES = contrib/systemd/auto-update/podman-auto-update.service \
contrib/systemd/system/podman.service \
contrib/systemd/system/podman-restart.service
contrib/systemd/system/podman-restart.service \
contrib/systemd/system/[email protected]

%.service: %.service.in
sed -e 's;@@PODMAN@@;$(BINDIR)/podman;g' $< >$@.tmp.$$ \
Expand All @@ -846,12 +847,14 @@ install.systemd: $(PODMAN_UNIT_FILES)
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.socket ${DESTDIR}${USERSYSTEMDDIR}/podman.socket
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.service ${DESTDIR}${USERSYSTEMDDIR}/podman.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman-restart.service ${DESTDIR}${USERSYSTEMDDIR}/podman-restart.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/[email protected] ${DESTDIR}${USERSYSTEMDDIR}/[email protected]
# System services
install ${SELINUXOPT} -m 644 contrib/systemd/auto-update/podman-auto-update.service ${DESTDIR}${SYSTEMDDIR}/podman-auto-update.service
install ${SELINUXOPT} -m 644 contrib/systemd/auto-update/podman-auto-update.timer ${DESTDIR}${SYSTEMDDIR}/podman-auto-update.timer
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.socket ${DESTDIR}${SYSTEMDDIR}/podman.socket
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.service ${DESTDIR}${SYSTEMDDIR}/podman.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman-restart.service ${DESTDIR}${SYSTEMDDIR}/podman-restart.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/[email protected] ${DESTDIR}${SYSTEMDDIR}/[email protected]
rm -f $(PODMAN_UNIT_FILES)
else
install.systemd:
Expand Down
18 changes: 18 additions & 0 deletions contrib/systemd/system/[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[Unit]
Description=A template for running K8s workloads via podman-play-kube
Documentation=man:podman-play-kube(1)
Wants=network-online.target
After=network-online.target
RequiresMountsFor=%t/containers

[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=never
TimeoutStopSec=70
ExecStart=@@PODMAN@@ play kube --replace --service-container=true %I
ExecStop=@@PODMAN@@ play kube --down %I
Type=notify
NotifyAccess=all

[Install]
WantedBy=default.target
2 changes: 1 addition & 1 deletion libpod/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
Mounts: inspectMounts,
Dependencies: c.Dependencies(),
IsInfra: c.IsInfra(),
IsService: c.isService(),
IsService: c.IsService(),
}

if c.state.ConfigPath != "" {
Expand Down
2 changes: 1 addition & 1 deletion libpod/container_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (c *Container) validate() error {

// A container cannot be marked as an infra and service container at
// the same time.
if c.IsInfra() && c.isService() {
if c.IsInfra() && c.IsService() {
return fmt.Errorf("cannot be infra and service container at the same time: %w", define.ErrInvalidArg)
}

Expand Down
20 changes: 20 additions & 0 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,16 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
return err
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return err
}
if !canStop {
return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}

// If we're not force-removing, we need to check if we're in a good
// state to remove.
if !force {
Expand Down Expand Up @@ -903,6 +913,16 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
}
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return id, err
}
if !canStop {
return id, fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}

var cleanupErr error
// Remove the container from the state
if c.config.Pod != "" {
Expand Down
15 changes: 11 additions & 4 deletions libpod/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ func (c *Container) addServicePodLocked(id string) error {
return c.save()
}

func (c *Container) isService() bool {
// IsService returns true when the container is a "service container".
func (c *Container) IsService() bool {
return c.config.IsService
}

// canStopServiceContainer returns true if all pods of the service are stopped.
// canStopServiceContainerLocked returns true if all pods of the service are stopped.
// Note that the method acquires the container lock.
func (c *Container) canStopServiceContainerLocked() (bool, error) {
c.lock.Lock()
Expand All @@ -67,10 +68,16 @@ func (c *Container) canStopServiceContainerLocked() (bool, error) {
return false, err
}

if !c.isService() {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}

return c.canStopServiceContainer()
}

// canStopServiceContainer returns true if all pods of the service are stopped.
// Note that the method expects the container to be locked.
func (c *Container) canStopServiceContainer() (bool, error) {
for _, id := range c.state.Service.Pods {
pod, err := c.runtime.LookupPod(id)
if err != nil {
Expand Down Expand Up @@ -163,7 +170,7 @@ func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
return false, err
}

if !c.isService() {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}

Expand Down
8 changes: 7 additions & 1 deletion pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,13 @@ func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Cont
logrus.Debugf("Failed to remove container %s: %s", ctr.ID(), err.Error())
switch errors.Cause(err) {
case define.ErrNoSuchCtr:
if options.Ignore {
// Ignore if the container does not exist (anymore) when either
// it has been requested by the user of if the container is a
// service one. Service containers are removed along with its
// pods which in turn are removed along with their infra
// container. Hence, there is an inherent race when removing
// infra containers with service containers in parallel.
if options.Ignore || ctr.IsService() {
logrus.Debugf("Ignoring error (--allow-missing): %v", err)
return nil
}
Expand Down
34 changes: 27 additions & 7 deletions pkg/domain/infra/abi/play.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,15 @@ import (
// createServiceContainer creates a container that can later on
// be associated with the pods of a K8s yaml. It will be started along with
// the first pod.
func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name string) (*libpod.Container, error) {
func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name string, options entities.PlayKubeOptions) (*libpod.Container, error) {
// Make sure to replace the service container as well if requested by
// the user.
if options.Replace {
if _, err := ic.ContainerRm(ctx, []string{name}, entities.RmOptions{Force: true, Ignore: true}); err != nil {
return nil, fmt.Errorf("replacing service container: %w", err)
}
}

// Similar to infra containers, a service container is using the pause image.
image, err := generate.PullOrBuildInfraImage(ic.Libpod, "")
if err != nil {
Expand Down Expand Up @@ -65,6 +73,7 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
return nil, fmt.Errorf("creating runtime spec for service container: %w", err)
}
opts = append(opts, libpod.WithIsService())
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeConmon))

// Create a new libpod container based on the spec.
ctr, err := ic.Libpod.NewContainer(ctx, runtimeSpec, spec, false, opts...)
Expand All @@ -75,6 +84,17 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
return ctr, nil
}

// Creates the name for a service container based on the provided content of a
// K8s yaml file.
func serviceContainerName(content []byte) string {
// The name of the service container is the first 12
// characters of the yaml file's hash followed by the
// '-service' suffix to guarantee a predictable and
// discoverable name.
hash := digest.FromBytes(content).Encoded()
return hash[0:12] + "-service"
}

func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options entities.PlayKubeOptions) (_ *entities.PlayKubeReport, finalErr error) {
report := &entities.PlayKubeReport{}
validKinds := 0
Expand Down Expand Up @@ -112,12 +132,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
// TODO: create constants for the various "kinds" of yaml files.
var serviceContainer *libpod.Container
if options.ServiceContainer && (kind == "Pod" || kind == "Deployment") {
// The name of the service container is the first 12
// characters of the yaml file's hash followed by the
// '-service' suffix to guarantee a predictable and
// discoverable name.
hash := digest.FromBytes(content).Encoded()
ctr, err := ic.createServiceContainer(ctx, hash[0:12]+"-service")
ctr, err := ic.createServiceContainer(ctx, serviceContainerName(content), options)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -433,6 +448,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
podSpec.PodSpecGen.NoInfra = false
podSpec.PodSpecGen.InfraContainerSpec = specgen.NewSpecGenerator(infraImage, false)
podSpec.PodSpecGen.InfraContainerSpec.NetworkOptions = p.NetworkOptions
podSpec.PodSpecGen.InfraContainerSpec.SdNotifyMode = define.SdNotifyModeIgnore

err = specgenutil.FillOutSpecGen(podSpec.PodSpecGen.InfraContainerSpec, &infraOptions, []string{})
if err != nil {
Expand Down Expand Up @@ -516,10 +532,12 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
specGen.SdNotifyMode = define.SdNotifyModeIgnore
rtSpec, spec, opts, err := generate.MakeContainer(ctx, ic.Libpod, specGen, false, nil)
if err != nil {
return nil, err
}
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -570,6 +588,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -942,5 +961,6 @@ func (ic *ContainerEngine) PlayKubeDown(ctx context.Context, body io.Reader, _ e
if err != nil {
return nil, err
}

return reports, nil
}
2 changes: 2 additions & 0 deletions podman.spec.rpkg
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,13 @@ done
%{_unitdir}/%{name}.service
%{_unitdir}/%{name}.socket
%{_unitdir}/%{name}-restart.service
%{_unitdir}/%{name}[email protected]
%{_userunitdir}/%{name}-auto-update.service
%{_userunitdir}/%{name}-auto-update.timer
%{_userunitdir}/%{name}.service
%{_userunitdir}/%{name}.socket
%{_userunitdir}/%{name}-restart.service
%{_userunitdir}/%{name}[email protected]
%{_tmpfilesdir}/%{name}.conf
%if 0%{?fedora} >= 36
%{_modulesloaddir}/%{name}-iptables.conf
Expand Down
76 changes: 76 additions & 0 deletions test/system/250-systemd.bats
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,80 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
run_podman network rm -f $netname
}

@test "[email protected] template" {
skip_if_remote "systemd units do not work with remote clients"

# If running from a podman source directory, build and use the source
# version of the play-kube-@ unit file
unit_name="[email protected]"
unit_file="contrib/systemd/system/${unit_name}"
if [[ -e ${unit_file}.in ]]; then
echo "# [Building & using $unit_name from source]" >&3
BINDIR=$(dirname $PODMAN) make $unit_file
cp $unit_file $UNIT_DIR/$unit_name
fi

# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
labels:
app: test
name: test_pod
spec:
containers:
- command:
- top
image: $IMAGE
name: test
resources: {}
EOF

# Dispatch the YAML file
service_name="podman-play-kube@$(systemd-escape $yaml_source).service"
systemctl start $service_name
systemctl is-active $service_name

# The name of the service container is predictable: the first 12 characters
# of the hash of the YAML file followed by the "-service" suffix
yaml_sha=$(sha256sum $yaml_source)
service_container="${yaml_sha:0:12}-service"

# Make sure that the service container exists and runs.
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"

# Check for an error when trying to remove the service container
run_podman 125 container rm $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

# Kill the pod and make sure the service is not running.
# The restart policy is set to "never" since there is no
# design yet for propagating exit codes up to the service
# container.
run_podman pod kill test_pod
for i in {0..5}; do
run systemctl is-failed $service_name
if [[ $output == "failed" ]]; then
break
fi
sleep 0.5
done
is "$output" "failed" "systemd service transitioned to 'failed' state"

# Now stop and start the service again.
systemctl stop $service_name
systemctl start $service_name
systemctl is-active $service_name
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"

# Clean up
systemctl stop $service_name
run_podman 1 container exists $service_container
run_podman 1 pod exists test_pod
}

# vim: filetype=sh
Loading

0 comments on commit 8684d41

Please sign in to comment.