Skip to content
This repository has been archived by the owner on Dec 13, 2018. It is now read-only.

Commit

Permalink
Introduce support for syscall filtering in containers #237
Browse files Browse the repository at this point in the history
This PR introduces the ability to filter system calls on a per-container basis on Linux, using libseccomp to support multiple architectures.

This adds another layer of security between containers and the kernel. System calls which are unnecessary in a container or problematic from a security perspective can be restricted to prevent their use. Most of the truly problematic syscalls are already restricted by dropping capabilities; this adds an additional, finer-grained layer of protection.

There's a similar feature present in LXC already, with the significant difference that LXC uses a whitelist of system calls, whereas these patches use a blacklist. The blacklist approach ensures no difference in functionality to clients not explicitly aware of seccomp support (the restricted syscalls list in the container config is left empty, and the seccomp init function exits without taking action).

This PR adds a vendored library dependency (Go bindings for libseccomp) and a build dependency on libseccomp >= v2.1. The actual changes to libcontainer are fairly minimal, most of the delta is in the libseccomp bindings.

Presently missing: integration tests, documentation

Docker-DCO-1.1-Signed-off-by: Matt Heon <[email protected]> (github: mheon)
Docker-DCO-1.1-Signed-off-by: Dan Walsh <[email protected]> (github: rhatdan)

Conflicts:
	config.go
  • Loading branch information
rhatdan committed Dec 2, 2014
1 parent 51aa43f commit 6d1be31
Show file tree
Hide file tree
Showing 16 changed files with 2,367 additions and 5 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM crosbymichael/golang

RUN apt-get update && apt-get install -y gcc make
RUN apt-get update && apt-get install -y gcc make libseccomp2 libseccomp-dev
RUN go get golang.org/x/tools/cmd/cover

ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor
Expand All @@ -18,7 +18,7 @@ WORKDIR /go/src/github.com/docker/libcontainer
RUN cp sample_configs/minimal.json /busybox/container.json

RUN go get -d -v ./...
RUN make direct-install
RUN TEST_TAGS="-tag seccomp" make direct-install

ENTRYPOINT ["/dind"]
CMD ["make", "direct-test"]
CMD ["make", "TEST_TAGS=\"-tag seccomp\"", "direct-test"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ all:

test:
# we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting
docker run --rm -it --privileged docker/libcontainer
docker run --rm -it --privileged docker/libcontainer make TEST_TAGS="${TEST_TAGS}" direct-test

sh:
docker run --rm -it --privileged -w /busybox docker/libcontainer nsinit exec sh
Expand Down
4 changes: 4 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/mount"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/security/seccomp"
)

type MountConfig mount.MountConfig
Expand Down Expand Up @@ -72,6 +73,9 @@ type Config struct {
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`

// Syscalls which will be restricted on container start
Seccomps []seccomp.Seccomp `json:"seccomps,omitempty"`
}

// Routes can be specified to create entries in the route table as the container is started
Expand Down
70 changes: 70 additions & 0 deletions integration/seccomp_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// +build seccomp,linux,cgo

package integration

import (
"strings"
"testing"

"github.com/docker/libcontainer/security/seccomp"
)

func TestSeccompDenyGetcwd(t *testing.T) {
if testing.Short() {
return
}

rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)

config := newTemplateConfig(rootfs)
config.Seccomps = append(config.Seccomps, seccomp.Seccomp{Syscall: "getcwd"})

buffers, exitCode, err := runContainer(config, "", "pwd")
if err != nil {
t.Fatal(err)
}

if exitCode != 1 {
t.Fatalf("Getcwd should fail with exit code 1, instead got %d!", exitCode)
}

expected := "pwd: getcwd: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}

func TestSeccompDenyMmap(t *testing.T) {
if testing.Short() {
return
}

rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)

config := newTemplateConfig(rootfs)
config.Seccomps = append(config.Seccomps, seccomp.Secomp{Syscall: "mmap"})

buffers, exitCode, err := runContainer(config, "", "echo", "hello world")
if err != nil {
t.Fatal(err)
}

if exitCode != 20 {
t.Fatalf("Busybox should fail to start with exit code 20, instead got %d!", exitCode)
}

expected := "mmap of a spare page failed!"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}
2 changes: 1 addition & 1 deletion label/label_selinux_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build selinux,linux
// +build selinux,linux,cgo

package label

Expand Down
5 changes: 5 additions & 0 deletions namespaces/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/security/capabilities"
"github.com/docker/libcontainer/security/restrict"
"github.com/docker/libcontainer/security/seccomp"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
Expand Down Expand Up @@ -128,6 +129,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
return fmt.Errorf("get parent death signal %s", err)
}

if err := seccomp.InitSeccomp(container.Seccomps); err != nil {
return fmt.Errorf("initializing seccomp %s", err)
}

if err := FinalizeNamespace(container); err != nil {
return fmt.Errorf("finalize namespace %s", err)
}
Expand Down
88 changes: 88 additions & 0 deletions security/seccomp/seccomp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// +build linux,cgo,seccomp

package seccomp

import (
"fmt"
"syscall"

"sourceforge.net/seccomp"
)

type Seccomp struct {
Architecture string
Syscall string
Args []string
}

var (
// Match action: deny a syscall with -EPERM return code
actDeny seccomp.ScmpAction = seccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
)

// Filters given syscalls in a container, preventing them from being used
// Started in the container init process, and carried over to all child processes
func InitSeccomp(secomps []Seccomp) error {
if len(secomps) == 0 {
return nil
}

archNative, err := seccomp.GetNativeArch()
if err != nil {
return fmt.Errorf("Error getting native architecture: %s", err)
}

filter, err := seccomp.NewFilter(seccomp.ActAllow)
if err != nil {
return fmt.Errorf("Error creating filter: %s", err)
}

// Unset no new privs bit
if err = filter.SetNoNewPrivsBit(false); err != nil {
return fmt.Errorf("Error setting no new privileges: %s", err)
}

// If native arch is AMD64, add X86 to filter
if archNative == seccomp.ArchAMD64 {
if err = filter.AddArch(seccomp.ArchX86); err != nil {
return fmt.Errorf("Error adding x86 arch to filter: %s", err)
}
}

for _, call := range secomps {
if len(call.Architecture) > 0 {
archNum, err := seccomp.GetArchFromName(call.Architecture)
if err != nil {
return fmt.Errorf("Could not resolve Archietecture name %q: %s", call.Architecture, err)
}
if err = filter.AddArch(archNum); err != nil {
return fmt.Errorf("Error adding %q arch to filter: %s", call.Architecture, err)
}
continue
}
if len(call.Syscall) == 0 {
return fmt.Errorf("Empty string is not a valid syscall!")
}

callNum, err := seccomp.GetSyscallFromName(call.Syscall)
if err != nil {
return fmt.Errorf("Could not resolve syscall name %s: %s", call.Syscall, err)
}

if len(call.Args) == 0 {
if err = filter.AddRule(callNum, actDeny); err != nil {
return fmt.Errorf("Error adding rule to filter for syscall %s: %s", call, err)
}
}
}

if err != nil {
return fmt.Errorf("Error initializing filter: %s", err)
}

if err = filter.Load(); err != nil {
return fmt.Errorf("Error loading seccomp filter into kernel: %s", err)
}

return nil
}
27 changes: 27 additions & 0 deletions security/seccomp/seccomp_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// +build seccomp,linux,cgo

package seccomp

import (
"strings"
"testing"

"sourceforge.net/seccomp"
)

func TestInitSeccomp(t *testing.T) {
var seccomps []Seccomps
RestrictSyscalls := []string{"kexec_load", "open_by_handle_at", "init_module", "finit_module", "delete_module", "iopl", "ioperm", "swapon", "swapoff", "sysfs", "sysctl", "adjtimex", "clock_adjtime", "lookup_dcookie", "perf_event_open", "fanotify_init", "kcmp"}
for s := range RestrictSyscalls {
seccomps = append(seccomps, Seccomp{Syscall: s})
}
if err := InitSeccomp(nil); err != nil {
t.Log("InitLabels Failed")
t.Fatal(err)
}

if err := InitSeccomp(seccomps); err != nil {
t.Log("InitLabels Failed")
t.Fatal(err)
}
}
13 changes: 13 additions & 0 deletions security/seccomp/unsupported.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// +build !linux !cgo !seccomp

package seccomp

type Seccomp struct {
Architecture string
Syscall string
Args []string
}

func InitSeccomp(secomps []Seccomp) error {
return nil
}
1 change: 1 addition & 0 deletions update-vendor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,6 @@ clone git github.com/codegangsta/cli 1.1.0
clone git github.com/coreos/go-systemd v2
clone git github.com/godbus/dbus v1
clone git github.com/syndtr/gocapability 3c85049eae
clone git sourceforge.net/seccomp 9c912b45e1

# intentionally not vendoring Docker itself... that'd be a circle :)
Loading

0 comments on commit 6d1be31

Please sign in to comment.