From e28b636b7b5c4ebc8532cfd351dd4ee0a72d95fa Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 17:35:29 +0000 Subject: [PATCH 001/101] Add ContainerInfo instead of using a readonly state This modifies Load in the factory to return a ContainerInfo interface that is the read only view of the container when it is loaded. Signed-off-by: Michael Crosby --- container.go | 50 +++++++++++++++++++++++++++----------------------- factory.go | 8 +++----- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/container.go b/container.go index 307e8cbcb..82d1ace26 100644 --- a/container.go +++ b/container.go @@ -3,12 +3,7 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until */ package libcontainer -// A libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { +type ContainerInfo interface { // Returns the ID of the container ID() string @@ -22,40 +17,49 @@ type Container interface { // Returns the current config of the container. Config() *Config - // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. + // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // // Errors: // ContainerDestroyed - Container no longer exists, - // ConfigInvalid - config is invalid, - // ContainerPaused - Container is paused, // SystemError - System error. - Start(config *ProcessConfig) (pid int, exitChan chan int, err Error) - - // Destroys the container after killing all running processes. // - // Any event registrations are removed before the container is destroyed. - // No error is returned if the container is already destroyed. + // Some of the returned PIDs may no longer refer to processes in the Container, unless + // the Container state is PAUSED in which case every PID in the slice is valid. + Processes() ([]int, Error) + + // Returns statistics for the container. // // Errors: + // ContainerDestroyed - Container no longer exists, // SystemError - System error. - Destroy() Error + Stats() (*ContainerStats, Error) +} - // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. +// A libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { + ContainerInfo + + // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // // Errors: // ContainerDestroyed - Container no longer exists, + // ConfigInvalid - config is invalid, + // ContainerPaused - Container is paused, // SystemError - System error. - // - // Some of the returned PIDs may no longer refer to processes in the Container, unless - // the Container state is PAUSED in which case every PID in the slice is valid. - Processes() ([]int, Error) + Start(config *ProcessConfig) (pid int, exitChan chan int, err Error) - // Returns statistics for the container. + // Destroys the container after killing all running processes. + // + // Any event registrations are removed before the container is destroyed. + // No error is returned if the container is already destroyed. // // Errors: - // ContainerDestroyed - Container no longer exists, // SystemError - System error. - Stats() (*ContainerStats, Error) + Destroy() Error // If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses // the execution of any user processes. Asynchronously, when the container finished being paused the diff --git a/factory.go b/factory.go index e37773b2b..ccf0cf0fc 100644 --- a/factory.go +++ b/factory.go @@ -1,7 +1,6 @@ package libcontainer type Factory interface { - // Creates a new container with the given id and starts the initial process inside it. // id must be a string containing only letters, digits and underscores and must contain // between 1 and 1024 characters, inclusive. @@ -20,13 +19,12 @@ type Factory interface { // On error, any partially created container parts are cleaned up (the operation is atomic). Create(id string, config *Config) (Container, Error) - // Load takes an ID for an existing container and reconstructs the container - // from the state. + // Load takes an ID for an existing container and returns the container information + // from the state. This presents a read only view of the container. // // Errors: // Path does not exist // Container is stopped // System error - // TODO: fix description - Load(id string) (Container, Error) + Load(id string) (ContainerInfo, Error) } From 6bf1e4ddfcfca039c079ae8e8204fce027679c9f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 19:06:35 +0000 Subject: [PATCH 002/101] Update container interface with process operations Signed-off-by: Michael Crosby --- container.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/container.go b/container.go index 82d1ace26..c1b2a95a9 100644 --- a/container.go +++ b/container.go @@ -50,7 +50,7 @@ type Container interface { // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // SystemError - System error. - Start(config *ProcessConfig) (pid int, exitChan chan int, err Error) + StartProcess(config *ProcessConfig) (pid int, err Error) // Destroys the container after killing all running processes. // @@ -79,4 +79,26 @@ type Container interface { // ContainerDestroyed - Container no longer exists, // SystemError - System error. Resume() Error + + // Signal sends the specified signal to a process owned by the container. + // + // Errors: + // ContainerDestroyed - Container no longer exists, + // ContainerPaused - Container is paused, + // SystemError - System error. + Signal(pid, signal int) Error + + // Wait waits for the init process of the conatiner to die and returns it's exit status. + // + // Errors: + // ContainerDestroyed - Container no longer exists, + // SystemError - System error. + Wait() (exitStatus int, err Error) + + // WaitProcess waits on a process owned by the container. + // + // Errors: + // ContainerDestroyed - Container no longer exists, + // SystemError - System error. + WaitProcess(pid int) (exitStatus int, err Error) } From 6310a958e61914ebccd6d7f64c429f40343f0716 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 20:45:23 +0000 Subject: [PATCH 003/101] Implement linux factory and container with readonly interface Signed-off-by: Michael Crosby --- api_temp.go | 21 ----------- error.go | 24 ++++++++++--- generic_error.go | 46 ++++++++++++++++++++++++ linux_container.go | 66 ++++++++++++++++++++++++++++++++++ linux_factory.go | 89 ++++++++++++++++++++++++++++++++++++++++++++++ nsinit/main.go | 1 + nsinit/stats.go | 10 +++--- 7 files changed, 226 insertions(+), 31 deletions(-) delete mode 100644 api_temp.go create mode 100644 generic_error.go create mode 100644 linux_container.go create mode 100644 linux_factory.go diff --git a/api_temp.go b/api_temp.go deleted file mode 100644 index 5c682ee34..000000000 --- a/api_temp.go +++ /dev/null @@ -1,21 +0,0 @@ -/* -Temporary API endpoint for libcontainer while the full API is finalized (api.go). -*/ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/network" -) - -// TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that. -// DEPRECATED: The below portions are only to be used during the transition to the official API. -// Returns all available stats for the given container. -func GetStats(container *Config, state *State) (stats *ContainerStats, err error) { - stats = &ContainerStats{} - if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil { - return stats, err - } - stats.NetworkStats, err = network.GetStats(&state.NetworkState) - return stats, err -} diff --git a/error.go b/error.go index 5ff56d80b..5b96a3d2a 100644 --- a/error.go +++ b/error.go @@ -8,7 +8,6 @@ const ( // Factory errors IdInUse ErrorCode = iota InvalidIdFormat - // TODO: add Load errors // Container errors ContainerDestroyed @@ -19,14 +18,29 @@ const ( SystemError ) +func (c ErrorCode) String() string { + switch c { + case IdInUse: + return "Id already in use" + case InvalidIdFormat: + return "Invalid format" + case ContainerDestroyed: + return "Container destroyed" + case ContainerPaused: + return "Container paused" + case ConfigInvalid: + return "Invalid configuration" + case SystemError: + return "System Error" + default: + return "Unknown error" + } +} + // API Error type. type Error interface { error - // Returns the stack trace, if any, which identifies the - // point at which the error occurred. - Stack() []byte - // Returns a verbose string including the error message // and a representation of the stack trace suitable for // printing. diff --git a/generic_error.go b/generic_error.go new file mode 100644 index 000000000..7e97af7c7 --- /dev/null +++ b/generic_error.go @@ -0,0 +1,46 @@ +package libcontainer + +import ( + "bytes" + "fmt" + "runtime" + "time" +) + +var newLine = []byte("\n") + +func newGenericError(err error, c ErrorCode) Error { + return &GenericError{ + timestamp: time.Now(), + err: err, + code: c, + stack: captureStackTrace(2), + } +} + +func captureStackTrace(skip int) string { + buf := make([]byte, 4096) + buf = buf[:runtime.Stack(buf, true)] + + lines := bytes.Split(buf, newLine) + return string(bytes.Join(lines[skip:], newLine)) +} + +type GenericError struct { + timestamp time.Time + code ErrorCode + err error + stack string +} + +func (e *GenericError) Error() string { + return fmt.Sprintf("[%d] %s: %s", e.code, e.code, e.err) +} + +func (e *GenericError) Code() ErrorCode { + return e.code +} + +func (e *GenericError) Detail() string { + return fmt.Sprintf("[%d] %s\n%s", e.code, e.err, e.stack) +} diff --git a/linux_container.go b/linux_container.go new file mode 100644 index 000000000..9c16c4898 --- /dev/null +++ b/linux_container.go @@ -0,0 +1,66 @@ +// +build linux + +package libcontainer + +import ( + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer/network" +) + +type linuxContainer struct { + id string + root string + config *Config + state *State +} + +func (c *linuxContainer) ID() string { + return c.id +} + +func (c *linuxContainer) Config() *Config { + return c.config +} + +func (c *linuxContainer) RunState() (*RunState, Error) { + panic("not implemented") +} + +func (c *linuxContainer) Processes() ([]int, Error) { + var ( + err error + pids []int + ) + + if systemd.UseSystemd() { + pids, err = systemd.GetPids(c.config.Cgroups) + } else { + pids, err = fs.GetPids(c.config.Cgroups) + } + if err != nil { + return nil, newGenericError(err, SystemError) + } + return pids, nil +} + +func (c *linuxContainer) Stats() (*ContainerStats, Error) { + var ( + err error + stats = &ContainerStats{} + ) + + if systemd.UseSystemd() { + stats.CgroupStats, err = systemd.GetStats(c.config.Cgroups) + } else { + stats.CgroupStats, err = fs.GetStats(c.config.Cgroups) + } + if err != nil { + return stats, newGenericError(err, SystemError) + } + + if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil { + return stats, newGenericError(err, SystemError) + } + return stats, nil +} diff --git a/linux_factory.go b/linux_factory.go new file mode 100644 index 000000000..73782e3a8 --- /dev/null +++ b/linux_factory.go @@ -0,0 +1,89 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "os" + "path/filepath" +) + +const ( + configFilename = "config.json" + stateFilename = "state.json" +) + +// New returns a linux based container factory based in the root directory. +func New(root string) (Factory, Error) { + if err := os.MkdirAll(root, 0700); err != nil { + return nil, newGenericError(err, SystemError) + } + + return &linuxFactory{ + root: root, + }, nil +} + +// linuxFactory implements the default factory interface for linux based systems. +type linuxFactory struct { + // root is the root directory + root string +} + +func (l *linuxFactory) Create(id string, config *Config) (Container, Error) { + panic("not implemented") +} + +func (l *linuxFactory) Load(id string) (ContainerInfo, Error) { + containerRoot := filepath.Join(l.root, id) + config, err := l.loadContainerConfig(containerRoot) + if err != nil { + return nil, err + } + + state, err := l.loadContainerState(containerRoot) + if err != nil { + return nil, err + } + + return &linuxContainer{ + id: id, + root: containerRoot, + config: config, + state: state, + }, nil +} + +func (l *linuxFactory) loadContainerConfig(root string) (*Config, Error) { + f, err := os.Open(filepath.Join(root, configFilename)) + if err != nil { + if os.IsNotExist(err) { + return nil, newGenericError(err, ContainerDestroyed) + } + return nil, newGenericError(err, SystemError) + } + defer f.Close() + + var config *Config + if err := json.NewDecoder(f).Decode(&config); err != nil { + return nil, newGenericError(err, ConfigInvalid) + } + return config, nil +} + +func (l *linuxFactory) loadContainerState(root string) (*State, Error) { + f, err := os.Open(filepath.Join(root, stateFilename)) + if err != nil { + if os.IsNotExist(err) { + return nil, newGenericError(err, ContainerDestroyed) + } + return nil, newGenericError(err, SystemError) + } + defer f.Close() + + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, newGenericError(err, SystemError) + } + return state, nil +} diff --git a/nsinit/main.go b/nsinit/main.go index d65c0140e..561ce3a99 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -48,6 +48,7 @@ func main() { app.Flags = []cli.Flag{ cli.StringFlag{Name: "nspid"}, cli.StringFlag{Name: "console"}, + cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } app.Before = preload diff --git a/nsinit/stats.go b/nsinit/stats.go index 612b4a4ba..62fc1d4b2 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -16,23 +16,23 @@ var statsCommand = cli.Command{ } func statsAction(context *cli.Context) { - container, err := loadConfig() + factory, err := libcontainer.New(context.GlobalString("root")) if err != nil { log.Fatal(err) } - state, err := libcontainer.GetState(dataPath) + container, err := factory.Load(context.Args().First()) if err != nil { log.Fatal(err) } - stats, err := libcontainer.GetStats(container, state) + stats, err := container.Stats() if err != nil { log.Fatal(err) } - data, err := json.MarshalIndent(stats, "", "\t") + data, jerr := json.MarshalIndent(stats, "", "\t") if err != nil { - log.Fatal(err) + log.Fatal(jerr) } fmt.Printf("%s", data) From 7760faaab4d48a73870f32f12571acd8e61c10e6 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 23:10:51 +0000 Subject: [PATCH 004/101] Add stacktrace package for collection of stacktraces This helps aid our effort of returning useful errors. Signed-off-by: Michael Crosby --- stacktrace/capture.go | 25 +++++++++++++++++++++++++ stacktrace/capture_test.go | 27 +++++++++++++++++++++++++++ stacktrace/frame.go | 35 +++++++++++++++++++++++++++++++++++ stacktrace/frame_test.go | 20 ++++++++++++++++++++ stacktrace/stacktrace.go | 5 +++++ 5 files changed, 112 insertions(+) create mode 100644 stacktrace/capture.go create mode 100644 stacktrace/capture_test.go create mode 100644 stacktrace/frame.go create mode 100644 stacktrace/frame_test.go create mode 100644 stacktrace/stacktrace.go diff --git a/stacktrace/capture.go b/stacktrace/capture.go new file mode 100644 index 000000000..9fc75f8a0 --- /dev/null +++ b/stacktrace/capture.go @@ -0,0 +1,25 @@ +package stacktrace + +import "runtime" + +// Caputure captures a stacktrace for the current calling go program +// +// skip is the number of frames to skip +func Capture(userSkip int) Stacktrace { + var ( + skip = userSkip + 1 // add one for our own function + frames []Frame + ) + + for i := skip; ; i++ { + pc, file, line, ok := runtime.Caller(i) + if !ok { + break + } + frames = append(frames, NewFrame(pc, file, line)) + } + + return Stacktrace{ + Frames: frames, + } +} diff --git a/stacktrace/capture_test.go b/stacktrace/capture_test.go new file mode 100644 index 000000000..3f435d51a --- /dev/null +++ b/stacktrace/capture_test.go @@ -0,0 +1,27 @@ +package stacktrace + +import "testing" + +func captureFunc() Stacktrace { + return Capture(0) +} + +func TestCaptureTestFunc(t *testing.T) { + stack := captureFunc() + + if len(stack.Frames) == 0 { + t.Fatal("expected stack frames to be returned") + } + + // the first frame is the caller + frame := stack.Frames[0] + if expected := "captureFunc"; frame.Function != expected { + t.Fatalf("expteced function %q but recevied %q", expected, frame.Function) + } + if expected := "github.com/docker/libcontainer/stacktrace"; frame.Package != expected { + t.Fatalf("expected package %q but received %q", expected, frame.Package) + } + if expected := "capture_test.go"; frame.File != expected { + t.Fatalf("expected file %q but received %q", expected, frame.File) + } +} diff --git a/stacktrace/frame.go b/stacktrace/frame.go new file mode 100644 index 000000000..5edea1b75 --- /dev/null +++ b/stacktrace/frame.go @@ -0,0 +1,35 @@ +package stacktrace + +import ( + "path/filepath" + "runtime" + "strings" +) + +// NewFrame returns a new stack frame for the provided information +func NewFrame(pc uintptr, file string, line int) Frame { + fn := runtime.FuncForPC(pc) + pack, name := parseFunctionName(fn.Name()) + return Frame{ + Line: line, + File: filepath.Base(file), + Package: pack, + Function: name, + } +} + +func parseFunctionName(name string) (string, string) { + i := strings.LastIndex(name, ".") + if i == -1 { + return "", name + } + return name[:i], name[i+1:] +} + +// Frame contains all the information for a stack frame within a go program +type Frame struct { + File string + Function string + Package string + Line int +} diff --git a/stacktrace/frame_test.go b/stacktrace/frame_test.go new file mode 100644 index 000000000..ae95ec484 --- /dev/null +++ b/stacktrace/frame_test.go @@ -0,0 +1,20 @@ +package stacktrace + +import "testing" + +func TestParsePackageName(t *testing.T) { + var ( + name = "github.com/docker/libcontainer/stacktrace.captureFunc" + expectedPackage = "github.com/docker/libcontainer/stacktrace" + expectedFunction = "captureFunc" + ) + + pack, funcName := parseFunctionName(name) + if pack != expectedPackage { + t.Fatalf("expected package %q but received %q", expectedPackage, pack) + } + + if funcName != expectedFunction { + t.Fatalf("expected function %q but received %q", expectedFunction, funcName) + } +} diff --git a/stacktrace/stacktrace.go b/stacktrace/stacktrace.go new file mode 100644 index 000000000..5e8b58d2d --- /dev/null +++ b/stacktrace/stacktrace.go @@ -0,0 +1,5 @@ +package stacktrace + +type Stacktrace struct { + Frames []Frame +} From 926ab56ea860f0a1f955547b10f7f85c508682c0 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 23:27:06 +0000 Subject: [PATCH 005/101] Add testing for linux factory Load Signed-off-by: Michael Crosby --- container.go | 60 +++++++++--------- error.go | 12 ++-- error_test.go | 20 ++++++ factory.go | 10 +-- generic_error.go | 48 +++++++------- generic_error_test.go | 14 ++++ linux_container.go | 6 +- linux_factory.go | 14 ++-- linux_factory_test.go | 144 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 255 insertions(+), 73 deletions(-) create mode 100644 error_test.go create mode 100644 generic_error_test.go create mode 100644 linux_factory_test.go diff --git a/container.go b/container.go index c1b2a95a9..42887767a 100644 --- a/container.go +++ b/container.go @@ -9,30 +9,30 @@ type ContainerInfo interface { // Returns the current run state of the container. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - RunState() (*RunState, Error) + // Systemerror - System error. + RunState() (*RunState, error) // Returns the current config of the container. Config() *Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. + // Systemerror - System error. // // Some of the returned PIDs may no longer refer to processes in the Container, unless // the Container state is PAUSED in which case every PID in the slice is valid. - Processes() ([]int, Error) + Processes() ([]int, error) // Returns statistics for the container. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - Stats() (*ContainerStats, Error) + // Systemerror - System error. + Stats() (*ContainerStats, error) } // A libcontainer container object. @@ -45,60 +45,60 @@ type Container interface { // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, - // SystemError - System error. - StartProcess(config *ProcessConfig) (pid int, err Error) + // Systemerror - System error. + StartProcess(config *ProcessConfig) (pid int, err error) // Destroys the container after killing all running processes. // // Any event registrations are removed before the container is destroyed. // No error is returned if the container is already destroyed. // - // Errors: - // SystemError - System error. - Destroy() Error + // errors: + // Systemerror - System error. + Destroy() error // If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses // the execution of any user processes. Asynchronously, when the container finished being paused the // state is changed to PAUSED. // If the Container state is PAUSED, do nothing. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - Pause() Error + // Systemerror - System error. + Pause() error // If the Container state is PAUSED, resumes the execution of any user processes in the // Container before setting the Container state to RUNNING. // If the Container state is RUNNING, do nothing. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - Resume() Error + // Systemerror - System error. + Resume() error // Signal sends the specified signal to a process owned by the container. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, // ContainerPaused - Container is paused, - // SystemError - System error. - Signal(pid, signal int) Error + // Systemerror - System error. + Signal(pid, signal int) error // Wait waits for the init process of the conatiner to die and returns it's exit status. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - Wait() (exitStatus int, err Error) + // Systemerror - System error. + Wait() (exitStatus int, err error) // WaitProcess waits on a process owned by the container. // - // Errors: + // errors: // ContainerDestroyed - Container no longer exists, - // SystemError - System error. - WaitProcess(pid int) (exitStatus int, err Error) + // Systemerror - System error. + WaitProcess(pid int) (exitStatus int, err error) } diff --git a/error.go b/error.go index 5b96a3d2a..e86267009 100644 --- a/error.go +++ b/error.go @@ -1,5 +1,7 @@ package libcontainer +import "io" + // API error code type. type ErrorCode int @@ -10,7 +12,7 @@ const ( InvalidIdFormat // Container errors - ContainerDestroyed + ContainerNotExists ContainerPaused // Common errors @@ -24,14 +26,14 @@ func (c ErrorCode) String() string { return "Id already in use" case InvalidIdFormat: return "Invalid format" - case ContainerDestroyed: - return "Container destroyed" case ContainerPaused: return "Container paused" case ConfigInvalid: return "Invalid configuration" case SystemError: - return "System Error" + return "System error" + case ContainerNotExists: + return "Container does not exist" default: return "Unknown error" } @@ -44,7 +46,7 @@ type Error interface { // Returns a verbose string including the error message // and a representation of the stack trace suitable for // printing. - Detail() string + Detail(w io.Writer) error // Returns the error code for this error. Code() ErrorCode diff --git a/error_test.go b/error_test.go new file mode 100644 index 000000000..4bf4c9f5d --- /dev/null +++ b/error_test.go @@ -0,0 +1,20 @@ +package libcontainer + +import "testing" + +func TestErrorCode(t *testing.T) { + codes := map[ErrorCode]string{ + IdInUse: "Id already in use", + InvalidIdFormat: "Invalid format", + ContainerPaused: "Container paused", + ConfigInvalid: "Invalid configuration", + SystemError: "System error", + ContainerNotExists: "Container does not exist", + } + + for code, expected := range codes { + if actual := code.String(); actual != expected { + t.Fatalf("expected string %q but received %q", expected, actual) + } + } +} diff --git a/factory.go b/factory.go index ccf0cf0fc..c14832e72 100644 --- a/factory.go +++ b/factory.go @@ -10,21 +10,21 @@ type Factory interface { // // Returns the new container with a running process. // - // Errors: + // errors: // IdInUse - id is already in use by a container // InvalidIdFormat - id has incorrect format // ConfigInvalid - config is invalid - // SystemError - System error + // Systemerror - System error // // On error, any partially created container parts are cleaned up (the operation is atomic). - Create(id string, config *Config) (Container, Error) + Create(id string, config *Config) (Container, error) // Load takes an ID for an existing container and returns the container information // from the state. This presents a read only view of the container. // - // Errors: + // errors: // Path does not exist // Container is stopped // System error - Load(id string) (ContainerInfo, Error) + Load(id string) (ContainerInfo, error) } diff --git a/generic_error.go b/generic_error.go index 7e97af7c7..08a47f61e 100644 --- a/generic_error.go +++ b/generic_error.go @@ -1,46 +1,48 @@ package libcontainer import ( - "bytes" "fmt" - "runtime" + "io" + "text/template" "time" + + "github.com/docker/libcontainer/stacktrace" ) -var newLine = []byte("\n") +var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} +Code: {{.ECode}} +Message: {{.Err.Error}} +Frames:{{range $i, $frame := .Stack.Frames}} +--- +{{$i}}: {{$frame.Function}} +Package: {{$frame.Package}} +File: {{$frame.File}}{{end}} +`)) func newGenericError(err error, c ErrorCode) Error { return &GenericError{ - timestamp: time.Now(), - err: err, - code: c, - stack: captureStackTrace(2), + Timestamp: time.Now(), + Err: err, + ECode: c, + Stack: stacktrace.Capture(2), } } -func captureStackTrace(skip int) string { - buf := make([]byte, 4096) - buf = buf[:runtime.Stack(buf, true)] - - lines := bytes.Split(buf, newLine) - return string(bytes.Join(lines[skip:], newLine)) -} - type GenericError struct { - timestamp time.Time - code ErrorCode - err error - stack string + Timestamp time.Time + ECode ErrorCode + Err error + Stack stacktrace.Stacktrace } func (e *GenericError) Error() string { - return fmt.Sprintf("[%d] %s: %s", e.code, e.code, e.err) + return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Err) } func (e *GenericError) Code() ErrorCode { - return e.code + return e.ECode } -func (e *GenericError) Detail() string { - return fmt.Sprintf("[%d] %s\n%s", e.code, e.err, e.stack) +func (e *GenericError) Detail(w io.Writer) error { + return errorTemplate.Execute(w, e) } diff --git a/generic_error_test.go b/generic_error_test.go new file mode 100644 index 000000000..292d2a36b --- /dev/null +++ b/generic_error_test.go @@ -0,0 +1,14 @@ +package libcontainer + +import ( + "fmt" + "io/ioutil" + "testing" +) + +func TestErrorDetail(t *testing.T) { + err := newGenericError(fmt.Errorf("test error"), SystemError) + if derr := err.Detail(ioutil.Discard); derr != nil { + t.Fatal(derr) + } +} diff --git a/linux_container.go b/linux_container.go index 9c16c4898..112e3e579 100644 --- a/linux_container.go +++ b/linux_container.go @@ -23,11 +23,11 @@ func (c *linuxContainer) Config() *Config { return c.config } -func (c *linuxContainer) RunState() (*RunState, Error) { +func (c *linuxContainer) RunState() (*RunState, error) { panic("not implemented") } -func (c *linuxContainer) Processes() ([]int, Error) { +func (c *linuxContainer) Processes() ([]int, error) { var ( err error pids []int @@ -44,7 +44,7 @@ func (c *linuxContainer) Processes() ([]int, Error) { return pids, nil } -func (c *linuxContainer) Stats() (*ContainerStats, Error) { +func (c *linuxContainer) Stats() (*ContainerStats, error) { var ( err error stats = &ContainerStats{} diff --git a/linux_factory.go b/linux_factory.go index 73782e3a8..9741aa655 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -14,7 +14,7 @@ const ( ) // New returns a linux based container factory based in the root directory. -func New(root string) (Factory, Error) { +func New(root string) (Factory, error) { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } @@ -30,11 +30,11 @@ type linuxFactory struct { root string } -func (l *linuxFactory) Create(id string, config *Config) (Container, Error) { +func (l *linuxFactory) Create(id string, config *Config) (Container, error) { panic("not implemented") } -func (l *linuxFactory) Load(id string) (ContainerInfo, Error) { +func (l *linuxFactory) Load(id string) (ContainerInfo, error) { containerRoot := filepath.Join(l.root, id) config, err := l.loadContainerConfig(containerRoot) if err != nil { @@ -54,11 +54,11 @@ func (l *linuxFactory) Load(id string) (ContainerInfo, Error) { }, nil } -func (l *linuxFactory) loadContainerConfig(root string) (*Config, Error) { +func (l *linuxFactory) loadContainerConfig(root string) (*Config, error) { f, err := os.Open(filepath.Join(root, configFilename)) if err != nil { if os.IsNotExist(err) { - return nil, newGenericError(err, ContainerDestroyed) + return nil, newGenericError(err, ContainerNotExists) } return nil, newGenericError(err, SystemError) } @@ -71,11 +71,11 @@ func (l *linuxFactory) loadContainerConfig(root string) (*Config, Error) { return config, nil } -func (l *linuxFactory) loadContainerState(root string) (*State, Error) { +func (l *linuxFactory) loadContainerState(root string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { - return nil, newGenericError(err, ContainerDestroyed) + return nil, newGenericError(err, ContainerNotExists) } return nil, newGenericError(err, SystemError) } diff --git a/linux_factory_test.go b/linux_factory_test.go new file mode 100644 index 000000000..aa9a88482 --- /dev/null +++ b/linux_factory_test.go @@ -0,0 +1,144 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +func newTestRoot() (string, error) { + dir, err := ioutil.TempDir("", "libcontainer") + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", err + } + return dir, nil +} + +func TestFactoryNew(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + + factory, err := New(root) + if err != nil { + t.Fatal(err) + } + + if factory == nil { + t.Fatal("factory should not be nil") + } + + lfactory, ok := factory.(*linuxFactory) + if !ok { + t.Fatal("expected linux factory returned on linux based systems") + } + + if lfactory.root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.root) + } +} + +func TestFactoryLoadNotExists(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + + factory, err := New(root) + if err != nil { + t.Fatal(err) + } + + _, err = factory.Load("nocontainer") + if err == nil { + t.Fatal("expected nil error loading non-existing container") + } + + lerr, ok := err.(Error) + if !ok { + t.Fatal("expected libcontainer error type") + } + if lerr.Code() != ContainerNotExists { + t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code()) + } +} + +func TestFactoryLoadContainer(t *testing.T) { + root, err := newTestRoot() + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(root) + + // setup default container config and state for mocking + var ( + id = "1" + expectedConfig = &Config{ + RootFs: "/mycontainer/root", + } + expectedState = &State{ + InitPid: 1024, + } + ) + + if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { + t.Fatal(err) + } + if err := marshal(filepath.Join(root, id, configFilename), expectedConfig); err != nil { + t.Fatal(err) + } + if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { + t.Fatal(err) + } + + factory, err := New(root) + if err != nil { + t.Fatal(err) + } + + container, err := factory.Load(id) + if err != nil { + t.Fatal(err) + } + + if container.ID() != id { + t.Fatalf("expected container id %q but received %q", id, container.ID()) + } + + config := container.Config() + if config == nil { + t.Fatal("expected non nil container config") + } + + if config.RootFs != expectedConfig.RootFs { + t.Fatalf("expected rootfs %q but received %q", expectedConfig.RootFs, config.RootFs) + } + + lcontainer, ok := container.(*linuxContainer) + if !ok { + t.Fatal("expected linux container on linux based systems") + } + + if lcontainer.state.InitPid != expectedState.InitPid { + t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.state.InitPid) + } +} + +func marshal(path string, v interface{}) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return json.NewEncoder(f).Encode(v) +} From ccb83a1e8375cc8b658eaac3d396d15b816ffc3c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 22 Oct 2014 23:53:28 +0000 Subject: [PATCH 006/101] Add cgroup manage for mocking of package resources Signed-off-by: Michael Crosby --- cgroups.go | 41 +++++++++++++++++++++++++ container.go | 2 +- linux_container.go | 36 ++++++---------------- linux_container_test.go | 68 +++++++++++++++++++++++++++++++++++++++++ linux_factory.go | 9 +++--- state.go | 2 +- 6 files changed, 125 insertions(+), 33 deletions(-) create mode 100644 cgroups.go create mode 100644 linux_container_test.go diff --git a/cgroups.go b/cgroups.go new file mode 100644 index 000000000..9345f2e30 --- /dev/null +++ b/cgroups.go @@ -0,0 +1,41 @@ +package libcontainer + +import ( + "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/cgroups/systemd" +) + +type CgroupManager interface { + GetPids(*cgroups.Cgroup) ([]int, error) + GetStats(*cgroups.Cgroup) (*cgroups.Stats, error) +} + +func newCgroupsManager() CgroupManager { + if systemd.UseSystemd() { + return &systemdCgroupManager{} + } + return &fsCgroupsManager{} +} + +type systemdCgroupManager struct { +} + +func (m *systemdCgroupManager) GetPids(config *cgroups.Cgroup) ([]int, error) { + return systemd.GetPids(config) +} + +func (m *systemdCgroupManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { + return systemd.GetStats(config) +} + +type fsCgroupsManager struct { +} + +func (m *fsCgroupsManager) GetPids(config *cgroups.Cgroup) ([]int, error) { + return fs.GetPids(config) +} + +func (m *fsCgroupsManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { + return fs.GetStats(config) +} diff --git a/container.go b/container.go index 42887767a..232f42c45 100644 --- a/container.go +++ b/container.go @@ -12,7 +12,7 @@ type ContainerInfo interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - RunState() (*RunState, error) + RunState() (RunState, error) // Returns the current config of the container. Config() *Config diff --git a/linux_container.go b/linux_container.go index 112e3e579..1f5e090df 100644 --- a/linux_container.go +++ b/linux_container.go @@ -2,17 +2,14 @@ package libcontainer -import ( - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" - "github.com/docker/libcontainer/network" -) +import "github.com/docker/libcontainer/network" type linuxContainer struct { - id string - root string - config *Config - state *State + id string + root string + config *Config + state *State + cgroupManager CgroupManager } func (c *linuxContainer) ID() string { @@ -23,21 +20,12 @@ func (c *linuxContainer) Config() *Config { return c.config } -func (c *linuxContainer) RunState() (*RunState, error) { +func (c *linuxContainer) RunState() (RunState, error) { panic("not implemented") } func (c *linuxContainer) Processes() ([]int, error) { - var ( - err error - pids []int - ) - - if systemd.UseSystemd() { - pids, err = systemd.GetPids(c.config.Cgroups) - } else { - pids, err = fs.GetPids(c.config.Cgroups) - } + pids, err := c.cgroupManager.GetPids(c.config.Cgroups) if err != nil { return nil, newGenericError(err, SystemError) } @@ -50,15 +38,9 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { stats = &ContainerStats{} ) - if systemd.UseSystemd() { - stats.CgroupStats, err = systemd.GetStats(c.config.Cgroups) - } else { - stats.CgroupStats, err = fs.GetStats(c.config.Cgroups) - } - if err != nil { + if stats.CgroupStats, err = c.cgroupManager.GetStats(c.config.Cgroups); err != nil { return stats, newGenericError(err, SystemError) } - if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil { return stats, newGenericError(err, SystemError) } diff --git a/linux_container_test.go b/linux_container_test.go new file mode 100644 index 000000000..85a01cd33 --- /dev/null +++ b/linux_container_test.go @@ -0,0 +1,68 @@ +// +build linux + +package libcontainer + +import ( + "testing" + + "github.com/docker/libcontainer/cgroups" +) + +type mockCgroupManager struct { + pids []int + stats *cgroups.Stats +} + +func (m *mockCgroupManager) GetPids(config *cgroups.Cgroup) ([]int, error) { + return m.pids, nil +} + +func (m *mockCgroupManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { + return m.stats, nil +} + +func TestGetContainerPids(t *testing.T) { + container := &linuxContainer{ + id: "myid", + config: &Config{}, + cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}}, + } + + pids, err := container.Processes() + if err != nil { + t.Fatal(err) + } + + for i, expected := range []int{1, 2, 3} { + if pids[i] != expected { + t.Fatalf("expected pid %d but received %d", expected, pids[i]) + } + } +} + +func TestGetContainerStats(t *testing.T) { + container := &linuxContainer{ + id: "myid", + config: &Config{}, + cgroupManager: &mockCgroupManager{ + pids: []int{1, 2, 3}, + stats: &cgroups.Stats{ + MemoryStats: cgroups.MemoryStats{ + Usage: 1024, + }, + }, + }, + state: &State{}, + } + + stats, err := container.Stats() + if err != nil { + t.Fatal(err) + } + if stats.CgroupStats == nil { + t.Fatal("cgroup stats are nil") + } + if stats.CgroupStats.MemoryStats.Usage != 1024 { + t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage) + } +} diff --git a/linux_factory.go b/linux_factory.go index 9741aa655..989c55285 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -47,10 +47,11 @@ func (l *linuxFactory) Load(id string) (ContainerInfo, error) { } return &linuxContainer{ - id: id, - root: containerRoot, - config: config, - state: state, + id: id, + root: containerRoot, + config: config, + state: state, + cgroupManager: newCgroupsManager(), }, nil } diff --git a/state.go b/state.go index 208b4c627..4ab47ad75 100644 --- a/state.go +++ b/state.go @@ -31,7 +31,7 @@ const ( stateFile = "state.json" // The container exists and is running. - Running RunState = iota + Running RunState = iota + 1 // The container exists, it is in the process of being paused. Pausing From d5b8418f750643b1936c24460cd93ac59ba3ae15 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 28 Oct 2014 00:51:14 +0000 Subject: [PATCH 007/101] Remove ContainerInfo interface Signed-off-by: Michael Crosby --- container.go | 16 ++++++---------- factory.go | 2 +- linux_container.go | 28 ++++++++++++++++++++++++++++ linux_factory.go | 2 +- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/container.go b/container.go index 232f42c45..e53769025 100644 --- a/container.go +++ b/container.go @@ -3,7 +3,12 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until */ package libcontainer -type ContainerInfo interface { +// A libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { // Returns the ID of the container ID() string @@ -33,15 +38,6 @@ type ContainerInfo interface { // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Stats() (*ContainerStats, error) -} - -// A libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { - ContainerInfo // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // diff --git a/factory.go b/factory.go index c14832e72..389c138a6 100644 --- a/factory.go +++ b/factory.go @@ -26,5 +26,5 @@ type Factory interface { // Path does not exist // Container is stopped // System error - Load(id string) (ContainerInfo, error) + Load(id string) (Container, error) } diff --git a/linux_container.go b/linux_container.go index 1f5e090df..938250ca9 100644 --- a/linux_container.go +++ b/linux_container.go @@ -46,3 +46,31 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } return stats, nil } + +func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { + panic("not implemented") +} + +func (c *linuxContainer) Destroy() error { + panic("not implemented") +} + +func (c *linuxContainer) Pause() error { + panic("not implemented") +} + +func (c *linuxContainer) Resume() error { + panic("not implemented") +} + +func (c *linuxContainer) Signal(pid, signal int) error { + panic("not implemented") +} + +func (c *linuxContainer) Wait() (int, error) { + panic("not implemented") +} + +func (c *linuxContainer) WaitProcess(pid int) (int, error) { + panic("not implemented") +} diff --git a/linux_factory.go b/linux_factory.go index 989c55285..3299dc8fa 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -34,7 +34,7 @@ func (l *linuxFactory) Create(id string, config *Config) (Container, error) { panic("not implemented") } -func (l *linuxFactory) Load(id string) (ContainerInfo, error) { +func (l *linuxFactory) Load(id string) (Container, error) { containerRoot := filepath.Join(l.root, id) config, err := l.loadContainerConfig(containerRoot) if err != nil { From 47b41a6f5d801395f89ebdb7ff83f8ca8150e6de Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 30 Oct 2014 22:08:28 +0000 Subject: [PATCH 008/101] Add logger to container and factory Signed-off-by: Michael Crosby --- cgroups.go | 9 +++++++++ linux_container.go | 15 ++++++++++++++- linux_factory.go | 17 ++++++++++++++--- nsinit/main.go | 2 ++ nsinit/stats.go | 2 +- 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/cgroups.go b/cgroups.go index 9345f2e30..68754b062 100644 --- a/cgroups.go +++ b/cgroups.go @@ -7,6 +7,7 @@ import ( ) type CgroupManager interface { + String() string GetPids(*cgroups.Cgroup) ([]int, error) GetStats(*cgroups.Cgroup) (*cgroups.Stats, error) } @@ -29,6 +30,10 @@ func (m *systemdCgroupManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, return systemd.GetStats(config) } +func (m *systemdCgroupManager) String() string { + return "systemd" +} + type fsCgroupsManager struct { } @@ -39,3 +44,7 @@ func (m *fsCgroupsManager) GetPids(config *cgroups.Cgroup) ([]int, error) { func (m *fsCgroupsManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { return fs.GetStats(config) } + +func (m *fsCgroupsManager) String() string { + return "fs" +} diff --git a/linux_container.go b/linux_container.go index 938250ca9..09a0414fc 100644 --- a/linux_container.go +++ b/linux_container.go @@ -2,7 +2,10 @@ package libcontainer -import "github.com/docker/libcontainer/network" +import ( + "github.com/Sirupsen/logrus" + "github.com/docker/libcontainer/network" +) type linuxContainer struct { id string @@ -10,6 +13,7 @@ type linuxContainer struct { config *Config state *State cgroupManager CgroupManager + logger *logrus.Logger } func (c *linuxContainer) ID() string { @@ -25,6 +29,7 @@ func (c *linuxContainer) RunState() (RunState, error) { } func (c *linuxContainer) Processes() ([]int, error) { + c.logger.Debug("fetch container processes") pids, err := c.cgroupManager.GetPids(c.config.Cgroups) if err != nil { return nil, newGenericError(err, SystemError) @@ -33,6 +38,7 @@ func (c *linuxContainer) Processes() ([]int, error) { } func (c *linuxContainer) Stats() (*ContainerStats, error) { + c.logger.Debug("fetch container stats") var ( err error stats = &ContainerStats{} @@ -48,29 +54,36 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { + c.logger.Debug("start new container process") panic("not implemented") } func (c *linuxContainer) Destroy() error { + c.logger.Debug("destroy container") panic("not implemented") } func (c *linuxContainer) Pause() error { + c.logger.Debug("pause container") panic("not implemented") } func (c *linuxContainer) Resume() error { + c.logger.Debug("resume container") panic("not implemented") } func (c *linuxContainer) Signal(pid, signal int) error { + c.logger.Debugf("sending signal %d to pid %d", signal, pid) panic("not implemented") } func (c *linuxContainer) Wait() (int, error) { + c.logger.Debug("wait container") panic("not implemented") } func (c *linuxContainer) WaitProcess(pid int) (int, error) { + c.logger.Debugf("wait process %d", pid) panic("not implemented") } diff --git a/linux_factory.go b/linux_factory.go index 3299dc8fa..dc998c343 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -6,6 +6,8 @@ import ( "encoding/json" "os" "path/filepath" + + "github.com/Sirupsen/logrus" ) const ( @@ -14,13 +16,14 @@ const ( ) // New returns a linux based container factory based in the root directory. -func New(root string) (Factory, error) { +func New(root string, logger *logrus.Logger) (Factory, error) { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } return &linuxFactory{ - root: root, + root: root, + logger: logger, }, nil } @@ -28,6 +31,9 @@ func New(root string) (Factory, error) { type linuxFactory struct { // root is the root directory root string + + // standard logger for all packages + logger *logrus.Logger } func (l *linuxFactory) Create(id string, config *Config) (Container, error) { @@ -36,22 +42,27 @@ func (l *linuxFactory) Create(id string, config *Config) (Container, error) { func (l *linuxFactory) Load(id string) (Container, error) { containerRoot := filepath.Join(l.root, id) + l.logger.Debugf("loading container config from %s", containerRoot) config, err := l.loadContainerConfig(containerRoot) if err != nil { return nil, err } + l.logger.Debugf("loading container state from %s", containerRoot) state, err := l.loadContainerState(containerRoot) if err != nil { return nil, err } + cgroupManager := newCgroupsManager() + l.logger.Debugf("using %s as cgroup manager", cgroupManager) return &linuxContainer{ id: id, root: containerRoot, config: config, state: state, - cgroupManager: newCgroupsManager(), + cgroupManager: cgroupManager, + logger: l.logger, }, nil } diff --git a/nsinit/main.go b/nsinit/main.go index 561ce3a99..9ce75e94e 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -5,11 +5,13 @@ import ( "os" "strings" + "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" ) var ( logPath = os.Getenv("log") + logger = logrus.New() argvs = make(map[string]*rFunc) ) diff --git a/nsinit/stats.go b/nsinit/stats.go index 62fc1d4b2..a195231cd 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -16,7 +16,7 @@ var statsCommand = cli.Command{ } func statsAction(context *cli.Context) { - factory, err := libcontainer.New(context.GlobalString("root")) + factory, err := libcontainer.New(context.GlobalString("root"), logger) if err != nil { log.Fatal(err) } From 66e6806fd23f6c793f9478374278ed221dea3347 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Fri, 31 Oct 2014 13:56:53 -0700 Subject: [PATCH 009/101] Adds ID validation. Docker-DCO-1.1-Signed-off-by: Mrunal Patel (github: mrunalp) --- linux_factory.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/linux_factory.go b/linux_factory.go index dc998c343..4adb1bfbc 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -4,8 +4,10 @@ package libcontainer import ( "encoding/json" + "fmt" "os" "path/filepath" + "regexp" "github.com/Sirupsen/logrus" ) @@ -15,6 +17,10 @@ const ( stateFilename = "state.json" ) +var ( + idRegex = regexp.MustCompile(`^[\w_]{1,1024}$`) +) + // New returns a linux based container factory based in the root directory. func New(root string, logger *logrus.Logger) (Factory, error) { if err := os.MkdirAll(root, 0700); err != nil { @@ -37,6 +43,10 @@ type linuxFactory struct { } func (l *linuxFactory) Create(id string, config *Config) (Container, error) { + if !idRegex.MatchString(id) { + return nil, newGenericError(fmt.Errorf("Invalid id format: %s ", id), InvalidIdFormat) + } + panic("not implemented") } From 3f26e9a891365e4213332be3d0238b57739fd359 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Fri, 31 Oct 2014 15:34:50 -0700 Subject: [PATCH 010/101] Add check for ID already in use. Signed-off-by: Mrunal Patel --- linux_factory.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/linux_factory.go b/linux_factory.go index 4adb1bfbc..c6214d5ec 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -44,7 +44,12 @@ type linuxFactory struct { func (l *linuxFactory) Create(id string, config *Config) (Container, error) { if !idRegex.MatchString(id) { - return nil, newGenericError(fmt.Errorf("Invalid id format: %s ", id), InvalidIdFormat) + return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + } + + containerRoot := filepath.Join(l.root, id) + if _, err := os.Stat(containerRoot); err == nil { + return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) } panic("not implemented") From de57f78590cefdf08249118e2d710908cc81b16a Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Mon, 3 Nov 2014 09:42:20 -0800 Subject: [PATCH 011/101] More error handling and fixup Signed-off-by: Mrunal Patel --- linux_factory.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/linux_factory.go b/linux_factory.go index c6214d5ec..f89552ecd 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -18,7 +18,8 @@ const ( ) var ( - idRegex = regexp.MustCompile(`^[\w_]{1,1024}$`) + idRegex = regexp.MustCompile(`^[\w_]+$`) + maxIdLen = 1024 ) // New returns a linux based container factory based in the root directory. @@ -47,9 +48,16 @@ func (l *linuxFactory) Create(id string, config *Config) (Container, error) { return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } + if len(id) > maxIdLen { + return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + } + containerRoot := filepath.Join(l.root, id) - if _, err := os.Stat(containerRoot); err == nil { + _, err := os.Stat(containerRoot) + if err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) + } else if !os.IsNotExist(err) { + return nil, newGenericError(err, SystemError) } panic("not implemented") From e5636543cc616547be0842627484a5ae35acd4c2 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Fri, 5 Dec 2014 17:06:58 -0800 Subject: [PATCH 012/101] Switch from logrus to glog. Signed-off-by: Victor Marmol --- linux_container.go | 19 +++++++++---------- linux_factory.go | 17 ++++++----------- nsinit/main.go | 2 -- nsinit/stats.go | 2 +- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/linux_container.go b/linux_container.go index 09a0414fc..f69db34c6 100644 --- a/linux_container.go +++ b/linux_container.go @@ -13,7 +13,6 @@ type linuxContainer struct { config *Config state *State cgroupManager CgroupManager - logger *logrus.Logger } func (c *linuxContainer) ID() string { @@ -29,7 +28,7 @@ func (c *linuxContainer) RunState() (RunState, error) { } func (c *linuxContainer) Processes() ([]int, error) { - c.logger.Debug("fetch container processes") + glog.Info("fetch container processes") pids, err := c.cgroupManager.GetPids(c.config.Cgroups) if err != nil { return nil, newGenericError(err, SystemError) @@ -38,7 +37,7 @@ func (c *linuxContainer) Processes() ([]int, error) { } func (c *linuxContainer) Stats() (*ContainerStats, error) { - c.logger.Debug("fetch container stats") + glog.Info("fetch container stats") var ( err error stats = &ContainerStats{} @@ -54,36 +53,36 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { - c.logger.Debug("start new container process") + glog.Info("start new container process") panic("not implemented") } func (c *linuxContainer) Destroy() error { - c.logger.Debug("destroy container") + glog.Info("destroy container") panic("not implemented") } func (c *linuxContainer) Pause() error { - c.logger.Debug("pause container") + glog.Info("pause container") panic("not implemented") } func (c *linuxContainer) Resume() error { - c.logger.Debug("resume container") + glog.Info("resume container") panic("not implemented") } func (c *linuxContainer) Signal(pid, signal int) error { - c.logger.Debugf("sending signal %d to pid %d", signal, pid) + glog.Infof("sending signal %d to pid %d", signal, pid) panic("not implemented") } func (c *linuxContainer) Wait() (int, error) { - c.logger.Debug("wait container") + glog.Info("wait container") panic("not implemented") } func (c *linuxContainer) WaitProcess(pid int) (int, error) { - c.logger.Debugf("wait process %d", pid) + glog.Infof("wait process %d", pid) panic("not implemented") } diff --git a/linux_factory.go b/linux_factory.go index f89552ecd..c7c359dbb 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -9,7 +9,7 @@ import ( "path/filepath" "regexp" - "github.com/Sirupsen/logrus" + "github.com/golang/glog" ) const ( @@ -23,14 +23,13 @@ var ( ) // New returns a linux based container factory based in the root directory. -func New(root string, logger *logrus.Logger) (Factory, error) { +func New(root string) (Factory, error) { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } return &linuxFactory{ - root: root, - logger: logger, + root: root, }, nil } @@ -38,9 +37,6 @@ func New(root string, logger *logrus.Logger) (Factory, error) { type linuxFactory struct { // root is the root directory root string - - // standard logger for all packages - logger *logrus.Logger } func (l *linuxFactory) Create(id string, config *Config) (Container, error) { @@ -65,27 +61,26 @@ func (l *linuxFactory) Create(id string, config *Config) (Container, error) { func (l *linuxFactory) Load(id string) (Container, error) { containerRoot := filepath.Join(l.root, id) - l.logger.Debugf("loading container config from %s", containerRoot) + glog.Infof("loading container config from %s", containerRoot) config, err := l.loadContainerConfig(containerRoot) if err != nil { return nil, err } - l.logger.Debugf("loading container state from %s", containerRoot) + glog.Infof("loading container state from %s", containerRoot) state, err := l.loadContainerState(containerRoot) if err != nil { return nil, err } cgroupManager := newCgroupsManager() - l.logger.Debugf("using %s as cgroup manager", cgroupManager) + glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ id: id, root: containerRoot, config: config, state: state, cgroupManager: cgroupManager, - logger: l.logger, }, nil } diff --git a/nsinit/main.go b/nsinit/main.go index 9ce75e94e..561ce3a99 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -5,13 +5,11 @@ import ( "os" "strings" - "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" ) var ( logPath = os.Getenv("log") - logger = logrus.New() argvs = make(map[string]*rFunc) ) diff --git a/nsinit/stats.go b/nsinit/stats.go index a195231cd..62fc1d4b2 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -16,7 +16,7 @@ var statsCommand = cli.Command{ } func statsAction(context *cli.Context) { - factory, err := libcontainer.New(context.GlobalString("root"), logger) + factory, err := libcontainer.New(context.GlobalString("root")) if err != nil { log.Fatal(err) } From 646ca2ba1184d4830a045ca9fba2e50469539e5a Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Fri, 5 Dec 2014 17:02:49 -0800 Subject: [PATCH 013/101] Minor refactorings to fix build. Signed-off-by: Victor Marmol --- cgroup_manager.go | 30 +++++++++++++++++++++++++ cgroups.go | 50 ----------------------------------------- linux_container.go | 6 ++--- linux_container_test.go | 4 ++-- linux_factory.go | 2 +- 5 files changed, 36 insertions(+), 56 deletions(-) create mode 100644 cgroup_manager.go delete mode 100644 cgroups.go diff --git a/cgroup_manager.go b/cgroup_manager.go new file mode 100644 index 000000000..1bcb1bc4f --- /dev/null +++ b/cgroup_manager.go @@ -0,0 +1,30 @@ +package libcontainer + +import ( + "github.com/docker/libcontainer/cgroups" +) + +// TODO(vmarmol): Move this to cgroups and rename to Manager. +type CgroupManager interface { + GetPids() ([]int, error) + GetStats() (*cgroups.Stats, error) +} + +func NewCgroupManager() CgroupManager { + return &fsManager{} +} + +type fsManager struct { +} + +func (m *fsManager) GetPids() ([]int, error) { + // TODO(vmarmol): Implement + //return fs.GetPids(config) + panic("not implemented") +} + +func (m *fsManager) GetStats() (*cgroups.Stats, error) { + // TODO(vmarmol): Implement + //return fs.GetStats(config) + panic("not implemented") +} diff --git a/cgroups.go b/cgroups.go deleted file mode 100644 index 68754b062..000000000 --- a/cgroups.go +++ /dev/null @@ -1,50 +0,0 @@ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" -) - -type CgroupManager interface { - String() string - GetPids(*cgroups.Cgroup) ([]int, error) - GetStats(*cgroups.Cgroup) (*cgroups.Stats, error) -} - -func newCgroupsManager() CgroupManager { - if systemd.UseSystemd() { - return &systemdCgroupManager{} - } - return &fsCgroupsManager{} -} - -type systemdCgroupManager struct { -} - -func (m *systemdCgroupManager) GetPids(config *cgroups.Cgroup) ([]int, error) { - return systemd.GetPids(config) -} - -func (m *systemdCgroupManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { - return systemd.GetStats(config) -} - -func (m *systemdCgroupManager) String() string { - return "systemd" -} - -type fsCgroupsManager struct { -} - -func (m *fsCgroupsManager) GetPids(config *cgroups.Cgroup) ([]int, error) { - return fs.GetPids(config) -} - -func (m *fsCgroupsManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { - return fs.GetStats(config) -} - -func (m *fsCgroupsManager) String() string { - return "fs" -} diff --git a/linux_container.go b/linux_container.go index f69db34c6..0ba92a601 100644 --- a/linux_container.go +++ b/linux_container.go @@ -3,8 +3,8 @@ package libcontainer import ( - "github.com/Sirupsen/logrus" "github.com/docker/libcontainer/network" + "github.com/golang/glog" ) type linuxContainer struct { @@ -29,7 +29,7 @@ func (c *linuxContainer) RunState() (RunState, error) { func (c *linuxContainer) Processes() ([]int, error) { glog.Info("fetch container processes") - pids, err := c.cgroupManager.GetPids(c.config.Cgroups) + pids, err := c.cgroupManager.GetPids() if err != nil { return nil, newGenericError(err, SystemError) } @@ -43,7 +43,7 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { stats = &ContainerStats{} ) - if stats.CgroupStats, err = c.cgroupManager.GetStats(c.config.Cgroups); err != nil { + if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil { diff --git a/linux_container_test.go b/linux_container_test.go index 85a01cd33..cd8d33d03 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -13,11 +13,11 @@ type mockCgroupManager struct { stats *cgroups.Stats } -func (m *mockCgroupManager) GetPids(config *cgroups.Cgroup) ([]int, error) { +func (m *mockCgroupManager) GetPids() ([]int, error) { return m.pids, nil } -func (m *mockCgroupManager) GetStats(config *cgroups.Cgroup) (*cgroups.Stats, error) { +func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { return m.stats, nil } diff --git a/linux_factory.go b/linux_factory.go index c7c359dbb..772c89ef5 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -73,7 +73,7 @@ func (l *linuxFactory) Load(id string) (Container, error) { return nil, err } - cgroupManager := newCgroupsManager() + cgroupManager := NewCgroupManager() glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ id: id, From b608f5df107d4160c94843dc61dde4cd6a0dafc8 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 24 Nov 2014 12:13:34 +0400 Subject: [PATCH 014/101] nsinit: Add Makefile Signed-off-by: Andrew Vagin --- nsinit/Makefile | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 nsinit/Makefile diff --git a/nsinit/Makefile b/nsinit/Makefile new file mode 100644 index 000000000..57adf154d --- /dev/null +++ b/nsinit/Makefile @@ -0,0 +1,2 @@ +all: + go build -o nsinit . From 159db89c1f302840f1dea7046bf87e16ae858ea7 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 15 Dec 2014 18:08:56 +0300 Subject: [PATCH 015/101] nsinit: use the new API for executing processes Signed-off-by: Andrew Vagin --- nsinit/exec.go | 56 ++++++++++++++++++++++++++++++++++++++++++------- nsinit/init.go | 33 +++++++++++++---------------- nsinit/stats.go | 2 +- 3 files changed, 64 insertions(+), 27 deletions(-) diff --git a/nsinit/exec.go b/nsinit/exec.go index 6fc553b8f..4c7d6316c 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -1,6 +1,7 @@ package main import ( + "crypto/md5" "fmt" "io" "log" @@ -17,6 +18,12 @@ import ( "github.com/docker/libcontainer/namespaces" ) +var ( + dataPath = os.Getenv("data_path") + console = os.Getenv("console") + rawPipeFd = os.Getenv("pipe") +) + var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", @@ -43,26 +50,59 @@ func execAction(context *cli.Context) { var exitCode int - container, err := loadConfig() + process := &libcontainer.ProcessConfig{ + Args: context.Args(), + Env: context.StringSlice("env"), + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + factory, err := libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) if err != nil { log.Fatal(err) } - state, err := libcontainer.GetState(dataPath) + id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) + container, err := factory.Load(id) if err != nil && !os.IsNotExist(err) { - log.Fatalf("unable to read state.json: %s", err) - } + var config *libcontainer.Config - if state != nil { - exitCode, err = startInExistingContainer(container, state, context.String("func"), context) - } else { - exitCode, err = startContainer(container, dataPath, []string(context.Args())) + config, err = loadConfig() + if err != nil { + log.Fatal(err) + } + container, err = factory.Create(id, config) + } + if err != nil { + log.Fatal(err) } + pid, err := container.StartProcess(process) if err != nil { log.Fatalf("failed to exec: %s", err) } + p, err := os.FindProcess(pid) + if err != nil { + log.Fatalf("Unable to find the %d process: %s", pid, err) + } + + ps, err := p.Wait() + if err != nil { + log.Fatalf("Unable to wait the %d process: %s", pid, err) + } + container.Destroy() + + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + log.Fatalf("Unexpected status") + } + os.Exit(exitCode) } diff --git a/nsinit/init.go b/nsinit/init.go index 6df9b1d89..853c9e8e9 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -1,47 +1,44 @@ package main import ( + "github.com/docker/libcontainer/system" "log" "os" - "runtime" - "strconv" "github.com/codegangsta/cli" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer" ) var ( - dataPath = os.Getenv("data_path") - console = os.Getenv("console") - rawPipeFd = os.Getenv("pipe") - initCommand = cli.Command{ Name: "init", Usage: "runs the init process inside the namespace", Action: initAction, + Flags: []cli.Flag{ + cli.IntFlag{"fd", 0, "internal pipe fd"}, + }, } ) func initAction(context *cli.Context) { - runtime.LockOSThread() - - container, err := loadConfig() + factory, err := libcontainer.New("", []string{}) if err != nil { log.Fatal(err) } - rootfs, err := os.Getwd() - if err != nil { - log.Fatal(err) + if context.Int("fd") == 0 { + log.Fatal("--fd must be specified for init process") } - pipeFd, err := strconv.Atoi(rawPipeFd) - if err != nil { + fd := uintptr(context.Int("fd")) + + if err := factory.StartInitialization(fd); err != nil { log.Fatal(err) } - pipe := os.NewFile(uintptr(pipeFd), "pipe") - if err := namespaces.Init(container, rootfs, console, pipe, []string(context.Args())); err != nil { - log.Fatalf("unable to initialize for container: %s", err) + args := []string(context.Args()) + + if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { + log.Fatal(err) } } diff --git a/nsinit/stats.go b/nsinit/stats.go index 62fc1d4b2..6d8f75855 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -16,7 +16,7 @@ var statsCommand = cli.Command{ } func statsAction(context *cli.Context) { - factory, err := libcontainer.New(context.GlobalString("root")) + factory, err := libcontainer.New(context.GlobalString("root"), nil) if err != nil { log.Fatal(err) } From bce773a8c45841f05e3421fc87fb59f3ffd9ea4d Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 15 Dec 2014 18:00:04 +0300 Subject: [PATCH 016/101] linux_container: ct.Destroy() returns error if CT isn't stopped Signed-off-by: Andrew Vagin --- error.go | 3 +++ linux_container.go | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/error.go b/error.go index e86267009..062943a11 100644 --- a/error.go +++ b/error.go @@ -14,6 +14,7 @@ const ( // Container errors ContainerNotExists ContainerPaused + ContainerNotStopped // Common errors ConfigInvalid @@ -34,6 +35,8 @@ func (c ErrorCode) String() string { return "System error" case ContainerNotExists: return "Container does not exist" + case ContainerNotStopped: + return "Container isn't stopped" default: return "Unknown error" } diff --git a/linux_container.go b/linux_container.go index 0ba92a601..63135cb8e 100644 --- a/linux_container.go +++ b/linux_container.go @@ -58,6 +58,15 @@ func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { } func (c *linuxContainer) Destroy() error { + state, err := c.RunState() + if err != nil { + return err + } + + if state != Destroyed { + return newGenericError(nil, ContainerNotStopped) + } + glog.Info("destroy container") panic("not implemented") } From 2441babc48ba943a058ead02b1de32960cc83d71 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 15 Dec 2014 18:00:57 +0300 Subject: [PATCH 017/101] linux_factory: add StartInitialization() After forking a new processes in a new container, we need to call exec() and tune namespaces. Signed-off-by: Andrew Vagin --- factory.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/factory.go b/factory.go index 389c138a6..69014c14c 100644 --- a/factory.go +++ b/factory.go @@ -27,4 +27,14 @@ type Factory interface { // Container is stopped // System error Load(id string) (Container, error) + + // StartInitialization is an internal API to libcontainer used during the rexec of the + // container. pipefd is the fd to the child end of the pipe used to syncronize the + // parent and child process providing state and configuration to the child process and + // returning any errors during the init of the container + // + // Errors: + // pipe connection error + // system error + StartInitialization(pipefd uintptr) error } From 8e9a6d28d2504256f1939c4301ad23c5274be881 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 15 Dec 2014 18:05:11 +0300 Subject: [PATCH 018/101] linux_container: execute an init process in a new CT Here is a first step to implement a new API. Currently the init processes is executed without namespaces and cgroups. Signed-off-by: Andrew Vagin --- factory.go | 18 ++++----- linux_container.go | 87 ++++++++++++++++++++++++++++++++++++++++--- linux_factory.go | 61 +++++++++++++++++++++++++++--- linux_factory_test.go | 6 +-- 4 files changed, 149 insertions(+), 23 deletions(-) diff --git a/factory.go b/factory.go index 69014c14c..4959ff1e1 100644 --- a/factory.go +++ b/factory.go @@ -28,13 +28,13 @@ type Factory interface { // System error Load(id string) (Container, error) - // StartInitialization is an internal API to libcontainer used during the rexec of the - // container. pipefd is the fd to the child end of the pipe used to syncronize the - // parent and child process providing state and configuration to the child process and - // returning any errors during the init of the container - // - // Errors: - // pipe connection error - // system error - StartInitialization(pipefd uintptr) error + // StartInitialization is an internal API to libcontainer used during the rexec of the + // container. pipefd is the fd to the child end of the pipe used to syncronize the + // parent and child process providing state and configuration to the child process and + // returning any errors during the init of the container + // + // Errors: + // pipe connection error + // system error + StartInitialization(pipefd uintptr) error } diff --git a/linux_container.go b/linux_container.go index 63135cb8e..10d66bfe1 100644 --- a/linux_container.go +++ b/linux_container.go @@ -3,6 +3,13 @@ package libcontainer import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "syscall" + "github.com/docker/libcontainer/network" "github.com/golang/glog" ) @@ -13,6 +20,7 @@ type linuxContainer struct { config *Config state *State cgroupManager CgroupManager + initArgs []string } func (c *linuxContainer) ID() string { @@ -24,7 +32,7 @@ func (c *linuxContainer) Config() *Config { } func (c *linuxContainer) RunState() (RunState, error) { - panic("not implemented") + return Destroyed, nil // FIXME return a real state } func (c *linuxContainer) Processes() ([]int, error) { @@ -53,8 +61,77 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { - glog.Info("start new container process") - panic("not implemented") + state, err := c.RunState() + if err != nil { + return -1, err + } + + if state != Destroyed { + glog.Info("start new container process") + panic("not implemented") + } + + if err := c.startInitProcess(config); err != nil { + return -1, err + } + + return c.state.InitPid, nil +} + +func (c *linuxContainer) updateStateFile() error { + data, err := json.MarshalIndent(c.state, "", "\t") + if err != nil { + return newGenericError(err, SystemError) + } + + fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) + f, err := os.Create(fnew) + if err != nil { + return newGenericError(err, SystemError) + } + + _, err = f.Write(data) + if err != nil { + f.Close() + return newGenericError(err, SystemError) + } + f.Close() + + fname := filepath.Join(c.root, stateFilename) + if err := os.Rename(fnew, fname); err != nil { + return newGenericError(err, SystemError) + } + + return nil +} + +func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { + cmd := exec.Command(c.initArgs[0], append(c.initArgs[1:], config.Args...)...) + cmd.Stdin = config.Stdin + cmd.Stdout = config.Stdout + cmd.Stderr = config.Stderr + + cmd.Env = config.Env + cmd.Dir = c.config.RootFs + + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + + //FIXME call namespaces.Exec() + if err := cmd.Start(); err != nil { + return err + } + + c.state.InitPid = cmd.Process.Pid + err := c.updateStateFile() + if err != nil { + return err + } + + return nil } func (c *linuxContainer) Destroy() error { @@ -67,8 +144,8 @@ func (c *linuxContainer) Destroy() error { return newGenericError(nil, ContainerNotStopped) } - glog.Info("destroy container") - panic("not implemented") + os.RemoveAll(c.root) + return nil } func (c *linuxContainer) Pause() error { diff --git a/linux_factory.go b/linux_factory.go index 772c89ef5..30c63566a 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -23,23 +23,30 @@ var ( ) // New returns a linux based container factory based in the root directory. -func New(root string) (Factory, error) { - if err := os.MkdirAll(root, 0700); err != nil { - return nil, newGenericError(err, SystemError) +func New(root string, initArgs []string) (Factory, error) { + if root != "" { + if err := os.MkdirAll(root, 0700); err != nil { + return nil, newGenericError(err, SystemError) + } } return &linuxFactory{ - root: root, + root: root, + initArgs: initArgs, }, nil } // linuxFactory implements the default factory interface for linux based systems. type linuxFactory struct { // root is the root directory - root string + root string + initArgs []string } func (l *linuxFactory) Create(id string, config *Config) (Container, error) { + if l.root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } if !idRegex.MatchString(id) { return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } @@ -56,10 +63,43 @@ func (l *linuxFactory) Create(id string, config *Config) (Container, error) { return nil, newGenericError(err, SystemError) } - panic("not implemented") + data, err := json.MarshalIndent(config, "", "\t") + if err != nil { + return nil, newGenericError(err, SystemError) + } + + if err := os.MkdirAll(containerRoot, 0700); err != nil { + return nil, newGenericError(err, SystemError) + } + + f, err := os.Create(filepath.Join(containerRoot, configFilename)) + if err != nil { + os.RemoveAll(containerRoot) + return nil, newGenericError(err, SystemError) + } + defer f.Close() + + _, err = f.Write(data) + if err != nil { + os.RemoveAll(containerRoot) + return nil, newGenericError(err, SystemError) + } + + cgroupManager := NewCgroupManager() + return &linuxContainer{ + id: id, + root: containerRoot, + config: config, + initArgs: l.initArgs, + state: &State{}, + cgroupManager: cgroupManager, + }, nil } func (l *linuxFactory) Load(id string) (Container, error) { + if l.root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } containerRoot := filepath.Join(l.root, id) glog.Infof("loading container config from %s", containerRoot) config, err := l.loadContainerConfig(containerRoot) @@ -81,6 +121,7 @@ func (l *linuxFactory) Load(id string) (Container, error) { config: config, state: state, cgroupManager: cgroupManager, + initArgs: l.initArgs, }, nil } @@ -117,3 +158,11 @@ func (l *linuxFactory) loadContainerState(root string) (*State, error) { } return state, nil } + +// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state +// This is a low level implementation detail of the reexec and should not be consumed externally +func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { + + /* FIXME call namespaces.Init() */ + return nil +} diff --git a/linux_factory_test.go b/linux_factory_test.go index aa9a88482..168be1be7 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -28,7 +28,7 @@ func TestFactoryNew(t *testing.T) { } defer os.RemoveAll(root) - factory, err := New(root) + factory, err := New(root, nil) if err != nil { t.Fatal(err) } @@ -54,7 +54,7 @@ func TestFactoryLoadNotExists(t *testing.T) { } defer os.RemoveAll(root) - factory, err := New(root) + factory, err := New(root, nil) if err != nil { t.Fatal(err) } @@ -101,7 +101,7 @@ func TestFactoryLoadContainer(t *testing.T) { t.Fatal(err) } - factory, err := New(root) + factory, err := New(root, nil) if err != nil { t.Fatal(err) } From 44024d0c4748668184cd3c9d899eca617177ba15 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 17 Dec 2014 07:34:55 +0300 Subject: [PATCH 019/101] Makefile: get the glog package Signed-off-by: Andrey Vagin --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 0c4dda7c9..bc7f17851 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ sh: GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u) direct-test: + go get github.com/golang/glog && \ go test $(TEST_TAGS) -cover -v $(GO_PACKAGES) direct-test-short: From 7038ddbc8c8c21491d1a88a25b120505997c4a36 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 17 Dec 2014 12:12:23 +0300 Subject: [PATCH 020/101] libcontainer: move Config in a separate package We are going to import the namespaces package into libcontainer, so libcontainer should not be imported into namespaces. Signed-off-by: Andrey Vagin --- config.go => configs/config.go | 2 +- config_test.go => configs/config_test.go | 4 ++-- container.go | 6 +++++- factory.go | 6 +++++- integration/exec_test.go | 4 ++-- integration/template_test.go | 14 +++++++------- integration/utils_test.go | 10 +++++----- linux_container.go | 5 +++-- linux_container_test.go | 5 +++-- linux_factory.go | 8 +++++--- linux_factory_test.go | 4 +++- namespaces/create.go | 4 ++-- namespaces/exec.go | 9 +++++---- namespaces/execin.go | 5 +++-- namespaces/init.go | 16 ++++++++-------- namespaces/utils.go | 4 ++-- nsinit/exec.go | 9 +++++---- nsinit/nsenter.go | 8 ++++---- nsinit/utils.go | 12 ++++++------ 19 files changed, 76 insertions(+), 59 deletions(-) rename config.go => configs/config.go (99%) rename config_test.go => configs/config_test.go (97%) diff --git a/config.go b/configs/config.go similarity index 99% rename from config.go rename to configs/config.go index 94c2bd981..ab40b2b4b 100644 --- a/config.go +++ b/configs/config.go @@ -1,4 +1,4 @@ -package libcontainer +package configs import ( "github.com/docker/libcontainer/cgroups" diff --git a/config_test.go b/configs/config_test.go similarity index 97% rename from config_test.go rename to configs/config_test.go index b4e16bf01..f698e3d9a 100644 --- a/config_test.go +++ b/configs/config_test.go @@ -1,4 +1,4 @@ -package libcontainer +package configs import ( "encoding/json" @@ -34,7 +34,7 @@ func containsDevice(expected *devices.Device, values []*devices.Device) bool { } func loadConfig(name string) (*Config, error) { - f, err := os.Open(filepath.Join("sample_configs", name)) + f, err := os.Open(filepath.Join("../sample_configs", name)) if err != nil { return nil, err } diff --git a/container.go b/container.go index e53769025..c0bafa5d7 100644 --- a/container.go +++ b/container.go @@ -3,6 +3,10 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until */ package libcontainer +import ( + "github.com/docker/libcontainer/configs" +) + // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can @@ -20,7 +24,7 @@ type Container interface { RunState() (RunState, error) // Returns the current config of the container. - Config() *Config + Config() *configs.Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // diff --git a/factory.go b/factory.go index 4959ff1e1..37e629645 100644 --- a/factory.go +++ b/factory.go @@ -1,5 +1,9 @@ package libcontainer +import ( + "github.com/docker/libcontainer/configs" +) + type Factory interface { // Creates a new container with the given id and starts the initial process inside it. // id must be a string containing only letters, digits and underscores and must contain @@ -17,7 +21,7 @@ type Factory interface { // Systemerror - System error // // On error, any partially created container parts are cleaned up (the operation is atomic). - Create(id string, config *Config) (Container, error) + Create(id string, config *configs.Config) (Container, error) // Load takes an ID for an existing container and returns the container information // from the state. This presents a read only view of the container. diff --git a/integration/exec_test.go b/integration/exec_test.go index cf749efbf..993ca25ce 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -5,7 +5,7 @@ import ( "strings" "testing" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" ) func TestExecPS(t *testing.T) { @@ -180,7 +180,7 @@ func TestRlimit(t *testing.T) { } } -func getNamespaceIndex(config *libcontainer.Config, name string) int { +func getNamespaceIndex(config *configs.Config, name string) int { for i, v := range config.Namespaces { if v.Name == name { return i diff --git a/integration/template_test.go b/integration/template_test.go index f37070ffb..d58bb6133 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -3,8 +3,8 @@ package integration import ( "syscall" - "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/devices" ) @@ -12,8 +12,8 @@ import ( // // it uses a network strategy of just setting a loopback interface // and the default setup for devices -func newTemplateConfig(rootfs string) *libcontainer.Config { - return &libcontainer.Config{ +func newTemplateConfig(rootfs string) *configs.Config { + return &configs.Config{ RootFs: rootfs, Tty: false, Capabilities: []string{ @@ -32,7 +32,7 @@ func newTemplateConfig(rootfs string) *libcontainer.Config { "KILL", "AUDIT_WRITE", }, - Namespaces: []libcontainer.Namespace{ + Namespaces: []configs.Namespace{ {Name: "NEWNS"}, {Name: "NEWUTS"}, {Name: "NEWIPC"}, @@ -45,7 +45,7 @@ func newTemplateConfig(rootfs string) *libcontainer.Config { AllowedDevices: devices.DefaultAllowedDevices, }, - MountConfig: &libcontainer.MountConfig{ + MountConfig: &configs.MountConfig{ DeviceNodes: devices.DefaultAutoCreatedDevices, }, Hostname: "integration", @@ -55,14 +55,14 @@ func newTemplateConfig(rootfs string) *libcontainer.Config { "HOSTNAME=integration", "TERM=xterm", }, - Networks: []*libcontainer.Network{ + Networks: []*configs.Network{ { Type: "loopback", Address: "127.0.0.1/0", Gateway: "localhost", }, }, - Rlimits: []libcontainer.Rlimit{ + Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, Hard: uint64(1024), diff --git a/integration/utils_test.go b/integration/utils_test.go index 6393fb998..051252fc6 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -9,7 +9,7 @@ import ( "os/exec" "path/filepath" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/namespaces" ) @@ -27,7 +27,7 @@ type stdBuffers struct { Stderr *bytes.Buffer } -func writeConfig(config *libcontainer.Config) error { +func writeConfig(config *configs.Config) error { f, err := os.OpenFile(filepath.Join(config.RootFs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700) if err != nil { return err @@ -36,14 +36,14 @@ func writeConfig(config *libcontainer.Config) error { return json.NewEncoder(f).Encode(config) } -func loadConfig() (*libcontainer.Config, error) { +func loadConfig() (*configs.Config, error) { f, err := os.Open(filepath.Join(os.Getenv("data_path"), "container.json")) if err != nil { return nil, err } defer f.Close() - var container *libcontainer.Config + var container *configs.Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } @@ -83,7 +83,7 @@ func copyBusybox(dest string) error { // // buffers are returned containing the STDOUT and STDERR output for the run // along with the exit code and any go error -func runContainer(config *libcontainer.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { +func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { if err := writeConfig(config); err != nil { return nil, -1, err } diff --git a/linux_container.go b/linux_container.go index 10d66bfe1..d733cb79d 100644 --- a/linux_container.go +++ b/linux_container.go @@ -10,6 +10,7 @@ import ( "path/filepath" "syscall" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/network" "github.com/golang/glog" ) @@ -17,7 +18,7 @@ import ( type linuxContainer struct { id string root string - config *Config + config *configs.Config state *State cgroupManager CgroupManager initArgs []string @@ -27,7 +28,7 @@ func (c *linuxContainer) ID() string { return c.id } -func (c *linuxContainer) Config() *Config { +func (c *linuxContainer) Config() *configs.Config { return c.config } diff --git a/linux_container_test.go b/linux_container_test.go index cd8d33d03..6771a824d 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type mockCgroupManager struct { @@ -24,7 +25,7 @@ func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", - config: &Config{}, + config: &configs.Config{}, cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}}, } @@ -43,7 +44,7 @@ func TestGetContainerPids(t *testing.T) { func TestGetContainerStats(t *testing.T) { container := &linuxContainer{ id: "myid", - config: &Config{}, + config: &configs.Config{}, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, stats: &cgroups.Stats{ diff --git a/linux_factory.go b/linux_factory.go index 30c63566a..5d5d097ce 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -10,6 +10,8 @@ import ( "regexp" "github.com/golang/glog" + + "github.com/docker/libcontainer/configs" ) const ( @@ -43,7 +45,7 @@ type linuxFactory struct { initArgs []string } -func (l *linuxFactory) Create(id string, config *Config) (Container, error) { +func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) { if l.root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } @@ -125,7 +127,7 @@ func (l *linuxFactory) Load(id string) (Container, error) { }, nil } -func (l *linuxFactory) loadContainerConfig(root string) (*Config, error) { +func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { f, err := os.Open(filepath.Join(root, configFilename)) if err != nil { if os.IsNotExist(err) { @@ -135,7 +137,7 @@ func (l *linuxFactory) loadContainerConfig(root string) (*Config, error) { } defer f.Close() - var config *Config + var config *configs.Config if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, newGenericError(err, ConfigInvalid) } diff --git a/linux_factory_test.go b/linux_factory_test.go index 168be1be7..51986d394 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -8,6 +8,8 @@ import ( "os" "path/filepath" "testing" + + "github.com/docker/libcontainer/configs" ) func newTestRoot() (string, error) { @@ -83,7 +85,7 @@ func TestFactoryLoadContainer(t *testing.T) { // setup default container config and state for mocking var ( id = "1" - expectedConfig = &Config{ + expectedConfig = &configs.Config{ RootFs: "/mycontainer/root", } expectedState = &State{ diff --git a/namespaces/create.go b/namespaces/create.go index b6418b6e9..30de84cee 100644 --- a/namespaces/create.go +++ b/namespaces/create.go @@ -4,7 +4,7 @@ import ( "os" "os/exec" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" ) -type CreateCommand func(container *libcontainer.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd +type CreateCommand func(container *configs.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd diff --git a/namespaces/exec.go b/namespaces/exec.go index b7873edd0..63ade9167 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -13,6 +13,7 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" ) @@ -21,7 +22,7 @@ import ( // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { +func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { var err error // create a pipe so that we can syncronize with the namespaced process and @@ -122,7 +123,7 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri // root: the path to the container json file and information // pipe: sync pipe to synchronize the parent and child processes // args: the arguments to pass to the container to run as the user's program -func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { +func DefaultCreateCommand(container *configs.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { // get our binary name from arg0 so we can always reexec ourself env := []string{ "console=" + console, @@ -148,7 +149,7 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini // SetupCgroups applies the cgroup restrictions to the process running in the container based // on the container's configuration -func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) { +func SetupCgroups(container *configs.Config, nspid int) (map[string]string, error) { if container.Cgroups != nil { c := container.Cgroups if systemd.UseSystemd() { @@ -161,7 +162,7 @@ func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error { +func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { diff --git a/namespaces/execin.go b/namespaces/execin.go index 430dc72fe..8b642fd3a 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -15,13 +15,14 @@ import ( "github.com/docker/libcontainer" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/system" ) // ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs []string, initPath, action string, +func ExecIn(container *configs.Config, state *libcontainer.State, userArgs []string, initPath, action string, stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) { args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)} @@ -91,7 +92,7 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs // Finalize expects that the setns calls have been setup and that is has joined an // existing namespace -func FinalizeSetns(container *libcontainer.Config, args []string) error { +func FinalizeSetns(container *configs.Config, args []string) error { // clear the current processes env and replace it with the environment defined on the container if err := LoadContainerEnvironment(container); err != nil { return err diff --git a/namespaces/init.go b/namespaces/init.go index 5c7e1a71d..a6d49f61a 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -10,8 +10,8 @@ import ( "strings" "syscall" - "github.com/docker/libcontainer" "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/mount" @@ -30,7 +30,7 @@ import ( // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { +func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. @@ -218,7 +218,7 @@ func SetupUser(u string) error { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupNetwork(container *libcontainer.Config, networkState *network.NetworkState) error { +func setupNetwork(container *configs.Config, networkState *network.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { @@ -233,7 +233,7 @@ func setupNetwork(container *libcontainer.Config, networkState *network.NetworkS return nil } -func setupRoute(container *libcontainer.Config) error { +func setupRoute(container *configs.Config) error { for _, config := range container.Routes { if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { return err @@ -242,7 +242,7 @@ func setupRoute(container *libcontainer.Config) error { return nil } -func setupRlimits(container *libcontainer.Config) error { +func setupRlimits(container *configs.Config) error { for _, rlimit := range container.Rlimits { l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} if err := syscall.Setrlimit(rlimit.Type, l); err != nil { @@ -255,7 +255,7 @@ func setupRlimits(container *libcontainer.Config) error { // FinalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaky file descriptors // before execing the command inside the namespace -func FinalizeNamespace(container *libcontainer.Config) error { +func FinalizeNamespace(container *configs.Config) error { // Ensure that all non-standard fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container @@ -295,7 +295,7 @@ func FinalizeNamespace(container *libcontainer.Config) error { return nil } -func LoadContainerEnvironment(container *libcontainer.Config) error { +func LoadContainerEnvironment(container *configs.Config) error { os.Clearenv() for _, pair := range container.Env { p := strings.SplitN(pair, "=", 2) @@ -311,7 +311,7 @@ func LoadContainerEnvironment(container *libcontainer.Config) error { // joinExistingNamespaces gets all the namespace paths specified for the container and // does a setns on the namespace fd so that the current process joins the namespace. -func joinExistingNamespaces(namespaces []libcontainer.Namespace) error { +func joinExistingNamespaces(namespaces []configs.Namespace) error { for _, ns := range namespaces { if ns.Path != "" { f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) diff --git a/namespaces/utils.go b/namespaces/utils.go index 556ea6699..4aa590fd7 100644 --- a/namespaces/utils.go +++ b/namespaces/utils.go @@ -6,7 +6,7 @@ import ( "os" "syscall" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" ) type initError struct { @@ -37,7 +37,7 @@ func newInitPipe() (parent *os.File, child *os.File, err error) { // GetNamespaceFlags parses the container's Namespaces options to set the correct // flags on clone, unshare, and setns -func GetNamespaceFlags(namespaces []libcontainer.Namespace) (flag int) { +func GetNamespaceFlags(namespaces []configs.Namespace) (flag int) { for _, v := range namespaces { flag |= namespaceInfo[v.Name] } diff --git a/nsinit/exec.go b/nsinit/exec.go index 4c7d6316c..a5fd29cbb 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -14,6 +14,7 @@ import ( "github.com/codegangsta/cli" "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/namespaces" ) @@ -66,7 +67,7 @@ func execAction(context *cli.Context) { id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) container, err := factory.Load(id) if err != nil && !os.IsNotExist(err) { - var config *libcontainer.Config + var config *configs.Config config, err = loadConfig() if err != nil { @@ -110,7 +111,7 @@ func execAction(context *cli.Context) { // with the nsenter argument so that the C code can setns an the namespaces that we require. Then that // code path will drop us into the path that we can do the final setup of the namespace and exec the users // application. -func startInExistingContainer(config *libcontainer.Config, state *libcontainer.State, action string, context *cli.Context) (int, error) { +func startInExistingContainer(config *configs.Config, state *libcontainer.State, action string, context *cli.Context) (int, error) { var ( master *os.File console string @@ -167,7 +168,7 @@ func startInExistingContainer(config *libcontainer.Config, state *libcontainer.S // error. // // Signals sent to the current process will be forwarded to container. -func startContainer(container *libcontainer.Config, dataPath string, args []string) (int, error) { +func startContainer(container *configs.Config, dataPath string, args []string) (int, error) { var ( cmd *exec.Cmd sigc = make(chan os.Signal, 10) @@ -175,7 +176,7 @@ func startContainer(container *libcontainer.Config, dataPath string, args []stri signal.Notify(sigc) - createCommand := func(container *libcontainer.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { + createCommand := func(container *configs.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { cmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args) if logPath != "" { cmd.Env = append(cmd.Env, fmt.Sprintf("log=%s", logPath)) diff --git a/nsinit/nsenter.go b/nsinit/nsenter.go index 8dc149f4f..8365215e4 100644 --- a/nsinit/nsenter.go +++ b/nsinit/nsenter.go @@ -9,7 +9,7 @@ import ( "strings" "text/tabwriter" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/mount/nodes" "github.com/docker/libcontainer/namespaces" @@ -17,7 +17,7 @@ import ( ) // nsenterExec exec's a process inside an existing container -func nsenterExec(config *libcontainer.Config, args []string) { +func nsenterExec(config *configs.Config, args []string) { if err := namespaces.FinalizeSetns(config, args); err != nil { log.Fatalf("failed to nsenter: %s", err) } @@ -26,7 +26,7 @@ func nsenterExec(config *libcontainer.Config, args []string) { // nsenterMknod runs mknod inside an existing container // // mknod -func nsenterMknod(config *libcontainer.Config, args []string) { +func nsenterMknod(config *configs.Config, args []string) { if len(args) != 4 { log.Fatalf("expected mknod to have 4 arguments not %d", len(args)) } @@ -56,7 +56,7 @@ func nsenterMknod(config *libcontainer.Config, args []string) { } // nsenterIp displays the network interfaces inside a container's net namespace -func nsenterIp(config *libcontainer.Config, args []string) { +func nsenterIp(config *configs.Config, args []string) { interfaces, err := net.Interfaces() if err != nil { log.Fatal(err) diff --git a/nsinit/utils.go b/nsinit/utils.go index 6a8aafbf1..e02a1b3a3 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -7,23 +7,23 @@ import ( "path/filepath" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" ) // rFunc is a function registration for calling after an execin type rFunc struct { Usage string - Action func(*libcontainer.Config, []string) + Action func(*configs.Config, []string) } -func loadConfig() (*libcontainer.Config, error) { +func loadConfig() (*configs.Config, error) { f, err := os.Open(filepath.Join(dataPath, "container.json")) if err != nil { return nil, err } defer f.Close() - var container *libcontainer.Config + var container *configs.Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } @@ -57,11 +57,11 @@ func findUserArgs() []string { // loadConfigFromFd loads a container's config from the sync pipe that is provided by // fd 3 when running a process -func loadConfigFromFd() (*libcontainer.Config, error) { +func loadConfigFromFd() (*configs.Config, error) { pipe := os.NewFile(3, "pipe") defer pipe.Close() - var config *libcontainer.Config + var config *configs.Config if err := json.NewDecoder(pipe).Decode(&config); err != nil { return nil, err } From ce9d63376fea897f98c127f0cd8da528be4c6c23 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 17 Dec 2014 12:30:52 +0300 Subject: [PATCH 021/101] libcontainer: move State in the configs package We are going to import the namespaces package into libcontainer, so libcontainer should not be imported into namespaces. Signed-off-by: Andrey Vagin --- state.go => configs/state.go | 2 +- container.go | 2 +- linux_container.go | 14 +++++++------- linux_container_test.go | 2 +- linux_factory.go | 6 +++--- linux_factory_test.go | 2 +- namespaces/exec.go | 7 +++---- namespaces/execin.go | 5 ++--- nsinit/exec.go | 2 +- 9 files changed, 20 insertions(+), 22 deletions(-) rename state.go => configs/state.go (98%) diff --git a/state.go b/configs/state.go similarity index 98% rename from state.go rename to configs/state.go index 4ab47ad75..9dc770067 100644 --- a/state.go +++ b/configs/state.go @@ -1,4 +1,4 @@ -package libcontainer +package configs import ( "encoding/json" diff --git a/container.go b/container.go index c0bafa5d7..e04a43df4 100644 --- a/container.go +++ b/container.go @@ -21,7 +21,7 @@ type Container interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - RunState() (RunState, error) + RunState() (configs.RunState, error) // Returns the current config of the container. Config() *configs.Config diff --git a/linux_container.go b/linux_container.go index d733cb79d..99cea6335 100644 --- a/linux_container.go +++ b/linux_container.go @@ -19,7 +19,7 @@ type linuxContainer struct { id string root string config *configs.Config - state *State + state *configs.State cgroupManager CgroupManager initArgs []string } @@ -32,8 +32,8 @@ func (c *linuxContainer) Config() *configs.Config { return c.config } -func (c *linuxContainer) RunState() (RunState, error) { - return Destroyed, nil // FIXME return a real state +func (c *linuxContainer) RunState() (configs.RunState, error) { + return configs.Destroyed, nil // FIXME return a real state } func (c *linuxContainer) Processes() ([]int, error) { @@ -61,18 +61,18 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { return stats, nil } -func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { +func (c *linuxContainer) StartProcess(pconfig *ProcessConfig) (int, error) { state, err := c.RunState() if err != nil { return -1, err } - if state != Destroyed { + if state != configs.Destroyed { glog.Info("start new container process") panic("not implemented") } - if err := c.startInitProcess(config); err != nil { + if err := c.startInitProcess(pconfig); err != nil { return -1, err } @@ -141,7 +141,7 @@ func (c *linuxContainer) Destroy() error { return err } - if state != Destroyed { + if state != configs.Destroyed { return newGenericError(nil, ContainerNotStopped) } diff --git a/linux_container_test.go b/linux_container_test.go index 6771a824d..64d4fb8bc 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -53,7 +53,7 @@ func TestGetContainerStats(t *testing.T) { }, }, }, - state: &State{}, + state: &configs.State{}, } stats, err := container.Stats() diff --git a/linux_factory.go b/linux_factory.go index 5d5d097ce..5dc679bdc 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -93,7 +93,7 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err root: containerRoot, config: config, initArgs: l.initArgs, - state: &State{}, + state: &configs.State{}, cgroupManager: cgroupManager, }, nil } @@ -144,7 +144,7 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) return config, nil } -func (l *linuxFactory) loadContainerState(root string) (*State, error) { +func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { @@ -154,7 +154,7 @@ func (l *linuxFactory) loadContainerState(root string) (*State, error) { } defer f.Close() - var state *State + var state *configs.State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) } diff --git a/linux_factory_test.go b/linux_factory_test.go index 51986d394..3c1e275c0 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -88,7 +88,7 @@ func TestFactoryLoadContainer(t *testing.T) { expectedConfig = &configs.Config{ RootFs: "/mycontainer/root", } - expectedState = &State{ + expectedState = &configs.State{ InitPid: 1024, } ) diff --git a/namespaces/exec.go b/namespaces/exec.go index 63ade9167..32b9ab126 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -9,7 +9,6 @@ import ( "os/exec" "syscall" - "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" @@ -80,17 +79,17 @@ func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, return terminate(err) } - state := &libcontainer.State{ + state := &configs.State{ InitPid: command.Process.Pid, InitStartTime: started, NetworkState: networkState, CgroupPaths: cgroupPaths, } - if err := libcontainer.SaveState(dataPath, state); err != nil { + if err := configs.SaveState(dataPath, state); err != nil { return terminate(err) } - defer libcontainer.DeleteState(dataPath) + defer configs.DeleteState(dataPath) // wait for the child process to fully complete and receive an error message // if one was encoutered diff --git a/namespaces/execin.go b/namespaces/execin.go index 8b642fd3a..2b63b8c6f 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -12,7 +12,6 @@ import ( "strconv" "syscall" - "github.com/docker/libcontainer" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" @@ -22,7 +21,7 @@ import ( // ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(container *configs.Config, state *libcontainer.State, userArgs []string, initPath, action string, +func ExecIn(container *configs.Config, state *configs.State, userArgs []string, initPath, action string, stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) { args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)} @@ -119,6 +118,6 @@ func FinalizeSetns(container *configs.Config, args []string) error { panic("unreachable") } -func EnterCgroups(state *libcontainer.State, pid int) error { +func EnterCgroups(state *configs.State, pid int) error { return cgroups.EnterPid(state.CgroupPaths, pid) } diff --git a/nsinit/exec.go b/nsinit/exec.go index a5fd29cbb..8e0e734f2 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -111,7 +111,7 @@ func execAction(context *cli.Context) { // with the nsenter argument so that the C code can setns an the namespaces that we require. Then that // code path will drop us into the path that we can do the final setup of the namespace and exec the users // application. -func startInExistingContainer(config *configs.Config, state *libcontainer.State, action string, context *cli.Context) (int, error) { +func startInExistingContainer(config *configs.Config, state *configs.State, action string, context *cli.Context) (int, error) { var ( master *os.File console string From c406a6b6e00b996d5d5f67aa61e58189f8650e79 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 17 Dec 2014 16:16:29 +0300 Subject: [PATCH 022/101] nsinit: clean up Signed-off-by: Andrey Vagin --- nsinit/exec.go | 147 ------------------------------------------------- 1 file changed, 147 deletions(-) diff --git a/nsinit/exec.go b/nsinit/exec.go index 8e0e734f2..266f59356 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -3,20 +3,14 @@ package main import ( "crypto/md5" "fmt" - "io" "log" "os" - "os/exec" - "os/signal" "syscall" "text/tabwriter" "github.com/codegangsta/cli" - "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" - consolepkg "github.com/docker/libcontainer/console" - "github.com/docker/libcontainer/namespaces" ) var ( @@ -106,144 +100,3 @@ func execAction(context *cli.Context) { os.Exit(exitCode) } - -// the process for execing a new process inside an existing container is that we have to exec ourself -// with the nsenter argument so that the C code can setns an the namespaces that we require. Then that -// code path will drop us into the path that we can do the final setup of the namespace and exec the users -// application. -func startInExistingContainer(config *configs.Config, state *configs.State, action string, context *cli.Context) (int, error) { - var ( - master *os.File - console string - err error - - sigc = make(chan os.Signal, 10) - - stdin = os.Stdin - stdout = os.Stdout - stderr = os.Stderr - ) - signal.Notify(sigc) - - if config.Tty { - stdin = nil - stdout = nil - stderr = nil - - master, console, err = consolepkg.CreateMasterAndConsole() - if err != nil { - return -1, err - } - - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - return -1, err - } - - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } - - startCallback := func(cmd *exec.Cmd) { - go func() { - resizeTty(master) - - for sig := range sigc { - switch sig { - case syscall.SIGWINCH: - resizeTty(master) - default: - cmd.Process.Signal(sig) - } - } - }() - } - - return namespaces.ExecIn(config, state, context.Args(), os.Args[0], action, stdin, stdout, stderr, console, startCallback) -} - -// startContainer starts the container. Returns the exit status or -1 and an -// error. -// -// Signals sent to the current process will be forwarded to container. -func startContainer(container *configs.Config, dataPath string, args []string) (int, error) { - var ( - cmd *exec.Cmd - sigc = make(chan os.Signal, 10) - ) - - signal.Notify(sigc) - - createCommand := func(container *configs.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { - cmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args) - if logPath != "" { - cmd.Env = append(cmd.Env, fmt.Sprintf("log=%s", logPath)) - } - return cmd - } - - var ( - master *os.File - console string - err error - - stdin = os.Stdin - stdout = os.Stdout - stderr = os.Stderr - ) - - if container.Tty { - stdin = nil - stdout = nil - stderr = nil - - master, console, err = consolepkg.CreateMasterAndConsole() - if err != nil { - return -1, err - } - - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - return -1, err - } - - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } - - startCallback := func() { - go func() { - resizeTty(master) - - for sig := range sigc { - switch sig { - case syscall.SIGWINCH: - resizeTty(master) - default: - cmd.Process.Signal(sig) - } - } - }() - } - - return namespaces.Exec(container, stdin, stdout, stderr, console, dataPath, args, createCommand, startCallback) -} - -func resizeTty(master *os.File) { - if master == nil { - return - } - - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return - } - - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return - } -} From 5ecd29c1f20fec04eece5fdb9c47ae0a736645dd Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 19 Dec 2014 12:40:03 +0300 Subject: [PATCH 023/101] linux_container: fork an init process in a new set of namespaces Use namespace.Exec() and namespace.Init() to execute processes in CT. Now an init process is actually executed in a new container. This series doesn't change code about creating containers, it only reworks code according with new API. Signed-off-by: Andrey Vagin --- linux_container.go | 10 +++++--- linux_factory.go | 5 ++-- namespaces/exec.go | 61 ++++++++++++++++++++-------------------------- namespaces/init.go | 38 +++++++++++++++++++++++------ nsinit/init.go | 8 +----- 5 files changed, 68 insertions(+), 54 deletions(-) diff --git a/linux_container.go b/linux_container.go index 99cea6335..0a3945b0d 100644 --- a/linux_container.go +++ b/linux_container.go @@ -11,6 +11,7 @@ import ( "syscall" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/network" "github.com/golang/glog" ) @@ -119,16 +120,17 @@ func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { cmd.SysProcAttr = &syscall.SysProcAttr{} } + cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces)) cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - //FIXME call namespaces.Exec() - if err := cmd.Start(); err != nil { + err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) + if err != nil { return err } - c.state.InitPid = cmd.Process.Pid - err := c.updateStateFile() + err = c.updateStateFile() if err != nil { + // FIXME c.Kill() return err } diff --git a/linux_factory.go b/linux_factory.go index 5dc679bdc..ecef9dcd8 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -12,6 +12,7 @@ import ( "github.com/golang/glog" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/namespaces" ) const ( @@ -164,7 +165,7 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { + pipe := os.NewFile(uintptr(pipefd), "pipe") - /* FIXME call namespaces.Init() */ - return nil + return namespaces.Init(pipe) } diff --git a/namespaces/exec.go b/namespaces/exec.go index 32b9ab126..337d54163 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -21,36 +21,45 @@ import ( // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { +func Exec(args []string, env []string, command *exec.Cmd, container *configs.Config, state *configs.State) error { var err error // create a pipe so that we can syncronize with the namespaced process and // pass the state and configuration to the child process parent, child, err := newInitPipe() if err != nil { - return -1, err + return err } defer parent.Close() - command := createCommand(container, console, dataPath, os.Args[0], child, args) - // Note: these are only used in non-tty mode - // if there is a tty for the container it will be opened within the namespace and the - // fds will be duped to stdin, stdiout, and stderr - command.Stdin = stdin - command.Stdout = stdout - command.Stderr = stderr + command.ExtraFiles = []*os.File{child} + command.Dir = container.RootFs if err := command.Start(); err != nil { child.Close() - return -1, err + return err } child.Close() - terminate := func(terr error) (int, error) { + terminate := func(terr error) error { // TODO: log the errors for kill and wait command.Process.Kill() command.Wait() - return -1, terr + return terr + } + + encoder := json.NewEncoder(parent) + + if err := encoder.Encode(container); err != nil { + return terminate(err) + } + + process := processArgs{ + Env: env, + Args: args, + } + if err := encoder.Encode(process); err != nil { + return terminate(err) } started, err := system.GetProcessStartTime(command.Process.Pid) @@ -71,7 +80,7 @@ func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, return terminate(err) } // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(networkState); err != nil { + if err := encoder.Encode(networkState); err != nil { return terminate(err) } // shutdown writes for the parent side of the pipe @@ -79,18 +88,6 @@ func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, return terminate(err) } - state := &configs.State{ - InitPid: command.Process.Pid, - InitStartTime: started, - NetworkState: networkState, - CgroupPaths: cgroupPaths, - } - - if err := configs.SaveState(dataPath, state); err != nil { - return terminate(err) - } - defer configs.DeleteState(dataPath) - // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *initError @@ -101,16 +98,12 @@ func Exec(container *configs.Config, stdin io.Reader, stdout, stderr io.Writer, return terminate(ierr) } - if startCallback != nil { - startCallback() - } + state.InitPid = command.Process.Pid + state.InitStartTime = started + state.NetworkState = networkState + state.CgroupPaths = cgroupPaths - if err := command.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return -1, err - } - } - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + return nil } // DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces diff --git a/namespaces/init.go b/namespaces/init.go index a6d49f61a..441b3c340 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -24,13 +24,20 @@ import ( "github.com/docker/libcontainer/utils" ) +// Process is used for transferring parameters from Exec() to Init() +type processArgs struct { + Args []string `json:"args,omitempty"` + Env []string `json:"environment,omitempty"` + ConsolePath string `json:"console_path,omitempty"` +} + // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Init is the init process that first runs inside a new namespace to setup mounts, users, networking, // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { +func Init(pipe *os.File) (err error) { defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. @@ -48,6 +55,23 @@ func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os pipe.Close() }() + decoder := json.NewDecoder(pipe) + + var container *configs.Config + if err := decoder.Decode(&container); err != nil { + return err + } + + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err + } + + uncleanRootfs, err := os.Getwd() + if err != nil { + return err + } + rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err @@ -61,22 +85,22 @@ func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState - if err := json.NewDecoder(pipe).Decode(&networkState); err != nil { + if err := decoder.Decode(&networkState); err != nil { return err } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(container.Namespaces); err != nil { return err } - if consolePath != "" { - if err := console.OpenAndDup(consolePath); err != nil { + if process.ConsolePath != "" { + if err := console.OpenAndDup(process.ConsolePath); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return fmt.Errorf("setsid %s", err) } - if consolePath != "" { + if process.ConsolePath != "" { if err := system.Setctty(); err != nil { return fmt.Errorf("setctty %s", err) } @@ -96,7 +120,7 @@ func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os label.Init() if err := mount.InitializeMountNamespace(rootfs, - consolePath, + process.ConsolePath, container.RestrictSys, (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) @@ -138,7 +162,7 @@ func Init(container *configs.Config, uncleanRootfs, consolePath string, pipe *os return fmt.Errorf("restore parent death signal %s", err) } - return system.Execv(args[0], args[0:], os.Environ()) + return system.Execv(process.Args[0], process.Args[0:], process.Env) } // RestoreParentDeathSignal sets the parent death signal to old. diff --git a/nsinit/init.go b/nsinit/init.go index 853c9e8e9..088361390 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -1,9 +1,7 @@ package main import ( - "github.com/docker/libcontainer/system" "log" - "os" "github.com/codegangsta/cli" "github.com/docker/libcontainer" @@ -36,9 +34,5 @@ func initAction(context *cli.Context) { log.Fatal(err) } - args := []string(context.Args()) - - if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { - log.Fatal(err) - } + panic("This line should never been executed") } From 7996829914e8877392a106574b00d86047530cd1 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 17 Dec 2014 18:05:39 +0300 Subject: [PATCH 024/101] libcontainer: optimize updateStateFile() Signed-off-by: Andrey Vagin --- linux_container.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/linux_container.go b/linux_container.go index 0a3945b0d..bbd0cb1de 100644 --- a/linux_container.go +++ b/linux_container.go @@ -81,20 +81,16 @@ func (c *linuxContainer) StartProcess(pconfig *ProcessConfig) (int, error) { } func (c *linuxContainer) updateStateFile() error { - data, err := json.MarshalIndent(c.state, "", "\t") - if err != nil { - return newGenericError(err, SystemError) - } - fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) f, err := os.Create(fnew) if err != nil { return newGenericError(err, SystemError) } - _, err = f.Write(data) + err = json.NewEncoder(f).Encode(c.state) if err != nil { f.Close() + os.Remove(fnew) return newGenericError(err, SystemError) } f.Close() From 540f44d3b2d57d9a4ff5dcd64a686e64a90cf683 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 18 Dec 2014 00:14:49 +0300 Subject: [PATCH 025/101] process: use io.Reader instead of io.WriteCloser for standard fds Could someone explain why we should close this fds? Usually users cares about closing them or not. For example exec.Cmd declares them as io.Reader. Signed-off-by: Andrey Vagin --- process.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/process.go b/process.go index 489666a58..924de2ecf 100644 --- a/process.go +++ b/process.go @@ -17,11 +17,8 @@ type ProcessConfig struct { // If a reader or writer is nil, the input stream is assumed to be empty and the output is // discarded. // - // The readers and writers, if supplied, are closed when the process terminates. Their Close - // methods should be idempotent. - // // Stdout and Stderr may refer to the same writer in which case the output is interspersed. - Stdin io.ReadCloser - Stdout io.WriteCloser - Stderr io.WriteCloser + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer } From 86653c66a33053e73c38baa9b34bf09f6b059e6b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 18 Dec 2014 00:10:41 +0300 Subject: [PATCH 026/101] libcontinaer: use new API in integration tests Signed-off-by: Andrey Vagin --- integration/init_test.go | 12 +--------- integration/utils_test.go | 49 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/integration/init_test.go b/integration/init_test.go index 9954c0f8e..095263761 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -16,17 +16,7 @@ func init() { } runtime.LockOSThread() - container, err := loadConfig() - if err != nil { - log.Fatal(err) - } - - rootfs, err := os.Getwd() - if err != nil { - log.Fatal(err) - } - - if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil { + if err := namespaces.Init(os.NewFile(3, "pipe")); err != nil { log.Fatalf("unable to initialize for container: %s", err) } os.Exit(1) diff --git a/integration/utils_test.go b/integration/utils_test.go index 051252fc6..93fe3b4b7 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -8,9 +8,10 @@ import ( "os" "os/exec" "path/filepath" + "syscall" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/namespaces" ) func newStdBuffers() *stdBuffers { @@ -89,7 +90,49 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe } buffers = newStdBuffers() - exitCode, err = namespaces.Exec(config, buffers.Stdin, buffers.Stdout, buffers.Stderr, - console, config.RootFs, args, namespaces.DefaultCreateCommand, nil) + + process := &libcontainer.ProcessConfig{ + Args: args, + Env: make([]string, 0), + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + + factory, err := libcontainer.New(".", []string{os.Args[0], "init", "--"}) + if err != nil { + return nil, -1, err + } + + container, err := factory.Create("testCT", config) + if err != nil { + return nil, -1, err + } + defer container.Destroy() + + pid, err := container.StartProcess(process) + if err != nil { + return nil, -1, err + } + + p, err := os.FindProcess(pid) + if err != nil { + return nil, -1, err + } + + ps, err := p.Wait() + if err != nil { + return nil, -1, err + } + + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + return nil, -1, err + } + return } From c9122076364979861e9d9c5e0bcde53336aa2cf5 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 18 Dec 2014 14:59:29 +0300 Subject: [PATCH 027/101] namespaces: combine Process.Env and Container.Env Signed-off-by: Andrey Vagin --- namespaces/exec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/namespaces/exec.go b/namespaces/exec.go index 337d54163..0822154de 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -55,7 +55,7 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con } process := processArgs{ - Env: env, + Env: append(env[0:], container.Env...), Args: args, } if err := encoder.Encode(process); err != nil { From 1a380ac436d47020819bf2398705f7528515844a Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 01:05:56 +0300 Subject: [PATCH 028/101] nsinit: remove ticks around nsenter If we really need these command, we need to expand API. Signed-off-by: Andrey Vagin --- nsinit/exec.go | 14 -------- nsinit/main.go | 28 ---------------- nsinit/nsenter.go | 84 ----------------------------------------------- 3 files changed, 126 deletions(-) delete mode 100644 nsinit/nsenter.go diff --git a/nsinit/exec.go b/nsinit/exec.go index 266f59356..6c98c0f3a 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -6,7 +6,6 @@ import ( "log" "os" "syscall" - "text/tabwriter" "github.com/codegangsta/cli" "github.com/docker/libcontainer" @@ -30,19 +29,6 @@ var execCommand = cli.Command{ } func execAction(context *cli.Context) { - if context.Bool("list") { - w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) - fmt.Fprint(w, "NAME\tUSAGE\n") - - for k, f := range argvs { - fmt.Fprintf(w, "%s\t%s\n", k, f.Usage) - } - - w.Flush() - - return - } - var exitCode int process := &libcontainer.ProcessConfig{ diff --git a/nsinit/main.go b/nsinit/main.go index 561ce3a99..d1e4bf1e7 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -3,43 +3,15 @@ package main import ( "log" "os" - "strings" "github.com/codegangsta/cli" ) var ( logPath = os.Getenv("log") - argvs = make(map[string]*rFunc) ) -func init() { - argvs["exec"] = &rFunc{ - Usage: "execute a process inside an existing container", - Action: nsenterExec, - } - - argvs["mknod"] = &rFunc{ - Usage: "mknod a device inside an existing container", - Action: nsenterMknod, - } - - argvs["ip"] = &rFunc{ - Usage: "display the container's network interfaces", - Action: nsenterIp, - } -} - func main() { - // we need to check our argv 0 for any registred functions to run instead of the - // normal cli code path - f, exists := argvs[strings.TrimPrefix(os.Args[0], "nsenter-")] - if exists { - runFunc(f) - - return - } - app := cli.NewApp() app.Name = "nsinit" diff --git a/nsinit/nsenter.go b/nsinit/nsenter.go deleted file mode 100644 index 8365215e4..000000000 --- a/nsinit/nsenter.go +++ /dev/null @@ -1,84 +0,0 @@ -package main - -import ( - "fmt" - "log" - "net" - "os" - "strconv" - "strings" - "text/tabwriter" - - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/devices" - "github.com/docker/libcontainer/mount/nodes" - "github.com/docker/libcontainer/namespaces" - _ "github.com/docker/libcontainer/namespaces/nsenter" -) - -// nsenterExec exec's a process inside an existing container -func nsenterExec(config *configs.Config, args []string) { - if err := namespaces.FinalizeSetns(config, args); err != nil { - log.Fatalf("failed to nsenter: %s", err) - } -} - -// nsenterMknod runs mknod inside an existing container -// -// mknod -func nsenterMknod(config *configs.Config, args []string) { - if len(args) != 4 { - log.Fatalf("expected mknod to have 4 arguments not %d", len(args)) - } - - t := rune(args[1][0]) - - major, err := strconv.Atoi(args[2]) - if err != nil { - log.Fatal(err) - } - - minor, err := strconv.Atoi(args[3]) - if err != nil { - log.Fatal(err) - } - - n := &devices.Device{ - Path: args[0], - Type: t, - MajorNumber: int64(major), - MinorNumber: int64(minor), - } - - if err := nodes.CreateDeviceNode("/", n); err != nil { - log.Fatal(err) - } -} - -// nsenterIp displays the network interfaces inside a container's net namespace -func nsenterIp(config *configs.Config, args []string) { - interfaces, err := net.Interfaces() - if err != nil { - log.Fatal(err) - } - - w := tabwriter.NewWriter(os.Stdout, 10, 1, 3, ' ', 0) - fmt.Fprint(w, "NAME\tMTU\tMAC\tFLAG\tADDRS\n") - - for _, iface := range interfaces { - addrs, err := iface.Addrs() - if err != nil { - log.Fatal(err) - } - - o := []string{} - - for _, a := range addrs { - o = append(o, a.String()) - } - - fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", iface.Name, iface.MTU, iface.HardwareAddr, iface.Flags, strings.Join(o, ",")) - } - - w.Flush() -} From 13841ef37da97bfaaed4e14a6c18638a53a62d01 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 16:09:35 +0300 Subject: [PATCH 029/101] new-api: return the Running state only if the init process is alive Signed-off-by: Andrey Vagin --- linux_container.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/linux_container.go b/linux_container.go index bbd0cb1de..ea202df42 100644 --- a/linux_container.go +++ b/linux_container.go @@ -34,7 +34,27 @@ func (c *linuxContainer) Config() *configs.Config { } func (c *linuxContainer) RunState() (configs.RunState, error) { - return configs.Destroyed, nil // FIXME return a real state + if c.state.InitPid <= 0 { + return configs.Destroyed, nil + } + + // return Running if the init process is alive + err := syscall.Kill(c.state.InitPid, 0) + if err != nil { + errn, y := err.(syscall.Errno) + if !y { + return 0, err + } + + if errn == syscall.ESRCH { + return configs.Destroyed, nil + } + return 0, err + } + + //FIXME get a cgroup state to check other states + + return configs.Running, nil } func (c *linuxContainer) Processes() ([]int, error) { From 11ce56a9e07e88f41f0455d0a1e4fdec7d779850 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 16:10:22 +0300 Subject: [PATCH 030/101] new-api: clean up startInitProcess() Signed-off-by: Andrey Vagin --- linux_container.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux_container.go b/linux_container.go index ea202df42..555e93b84 100644 --- a/linux_container.go +++ b/linux_container.go @@ -124,7 +124,7 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { - cmd := exec.Command(c.initArgs[0], append(c.initArgs[1:], config.Args...)...) + cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) cmd.Stdin = config.Stdin cmd.Stdout = config.Stdout cmd.Stderr = config.Stderr From d572094b75e169e7ca9cbd2ca11d0d8336d72b04 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 23 Dec 2014 01:06:22 +0300 Subject: [PATCH 031/101] new-api: execute a process inside an existing container A new constructor function (like nsenter) is added in this patch. This function gets arguments from environment variables and its behaviour doesn't depend on a command line arguments. A program which calls factory.StartInitialization() must import the nsenter package. It looks ugly, but I don't know another way how to enter into CT from a go code. Signed-off-by: Andrey Vagin --- linux_container.go | 42 ++++++------- linux_factory.go | 5 ++ namespaces/execin.go | 104 ++++++++++++++++++------------- namespaces/nsenter/nsenter.go | 1 + namespaces/nsenter/nsexec.c | 114 ++++++++++++++++++++++++++++++++++ nsinit/init.go | 1 + 6 files changed, 201 insertions(+), 66 deletions(-) create mode 100644 namespaces/nsenter/nsexec.c diff --git a/linux_container.go b/linux_container.go index 555e93b84..9189be2e2 100644 --- a/linux_container.go +++ b/linux_container.go @@ -41,12 +41,7 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { // return Running if the init process is alive err := syscall.Kill(c.state.InitPid, 0) if err != nil { - errn, y := err.(syscall.Errno) - if !y { - return 0, err - } - - if errn == syscall.ESRCH { + if err == syscall.ESRCH { return configs.Destroyed, nil } return 0, err @@ -82,18 +77,32 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { return stats, nil } -func (c *linuxContainer) StartProcess(pconfig *ProcessConfig) (int, error) { +func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { state, err := c.RunState() if err != nil { return -1, err } + cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) + cmd.Stdin = config.Stdin + cmd.Stdout = config.Stdout + cmd.Stderr = config.Stderr + + cmd.Env = config.Env + cmd.Dir = c.config.RootFs + + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + if state != configs.Destroyed { glog.Info("start new container process") - panic("not implemented") + return namespaces.ExecIn(config.Args, config.Env, cmd, c.config, c.state) } - if err := c.startInitProcess(pconfig); err != nil { + if err := c.startInitProcess(cmd, config); err != nil { return -1, err } @@ -123,21 +132,8 @@ func (c *linuxContainer) updateStateFile() error { return nil } -func (c *linuxContainer) startInitProcess(config *ProcessConfig) error { - cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) - cmd.Stdin = config.Stdin - cmd.Stdout = config.Stdout - cmd.Stderr = config.Stderr - - cmd.Env = config.Env - cmd.Dir = c.config.RootFs - - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{} - } - +func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces)) - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) if err != nil { diff --git a/linux_factory.go b/linux_factory.go index ecef9dcd8..10e464ec2 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -167,5 +167,10 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { pipe := os.NewFile(uintptr(pipefd), "pipe") + pid := os.Getenv("_LIBCONTAINER_INITPID") + if pid != "" { + return namespaces.InitIn(pipe) + } + return namespaces.Init(pipe) } diff --git a/namespaces/execin.go b/namespaces/execin.go index 2b63b8c6f..5d2708ace 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -5,12 +5,9 @@ package namespaces import ( "encoding/json" "fmt" - "io" + "io/ioutil" "os" "os/exec" - "path/filepath" - "strconv" - "syscall" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" @@ -19,27 +16,10 @@ import ( "github.com/docker/libcontainer/system" ) -// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the +// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(container *configs.Config, state *configs.State, userArgs []string, initPath, action string, - stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) { - - args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)} - - if console != "" { - args = append(args, "--console", console) - } - - cmd := &exec.Cmd{ - Path: initPath, - Args: append(args, append([]string{"--"}, userArgs...)...), - } - - if filepath.Base(initPath) == initPath { - if lp, err := exec.LookPath(initPath); err == nil { - cmd.Path = lp - } - } +func ExecIn(args []string, env []string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { + var err error parent, child, err := newInitPipe() if err != nil { @@ -47,13 +27,8 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, } defer parent.Close() - // Note: these are only used in non-tty mode - // if there is a tty for the container it will be opened within the namespace and the - // fds will be duped to stdin, stdiout, and stderr - cmd.Stdin = stdin - cmd.Stdout = stdout - cmd.Stderr = stderr cmd.ExtraFiles = []*os.File{child} + cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid)) if err := cmd.Start(); err != nil { child.Close() @@ -68,6 +43,20 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, return -1, terr } + encoder := json.NewEncoder(parent) + + if err := encoder.Encode(container); err != nil { + return terminate(err) + } + + process := processArgs{ + Env: append(env[0:], container.Env...), + Args: args, + } + if err := encoder.Encode(process); err != nil { + return terminate(err) + } + // Enter cgroups. if err := EnterCgroups(state, cmd.Process.Pid); err != nil { return terminate(err) @@ -77,21 +66,54 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string, return terminate(err) } - if startCallback != nil { - startCallback(cmd) - } + return cmd.Process.Pid, nil +} - if err := cmd.Wait(); err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return -1, err +// Finalize entering into a container and execute a specified command +func InitIn(pipe *os.File) (err error) { + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } } + // ensure that this pipe is always closed + pipe.Close() + }() + + decoder := json.NewDecoder(pipe) + + var container *configs.Config + if err := decoder.Decode(&container); err != nil { + return err + } + + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err + } + + if err := FinalizeSetns(container); err != nil { + return err } - return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil + + if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil { + return err + } + + panic("unreachable") } // Finalize expects that the setns calls have been setup and that is has joined an // existing namespace -func FinalizeSetns(container *configs.Config, args []string) error { +func FinalizeSetns(container *configs.Config) error { // clear the current processes env and replace it with the environment defined on the container if err := LoadContainerEnvironment(container); err != nil { return err @@ -111,11 +133,7 @@ func FinalizeSetns(container *configs.Config, args []string) error { } } - if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { - return err - } - - panic("unreachable") + return nil } func EnterCgroups(state *configs.State, pid int) error { diff --git a/namespaces/nsenter/nsenter.go b/namespaces/nsenter/nsenter.go index 7d21e8e59..394716145 100644 --- a/namespaces/nsenter/nsenter.go +++ b/namespaces/nsenter/nsenter.go @@ -5,6 +5,7 @@ package nsenter /* __attribute__((constructor)) init() { nsenter(); + nsexec(); } */ import "C" diff --git a/namespaces/nsenter/nsexec.c b/namespaces/nsenter/nsexec.c new file mode 100644 index 000000000..95498bf07 --- /dev/null +++ b/namespaces/nsenter/nsexec.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 +#define _GNU_SOURCE +#include +#include "syscall.h" +#ifdef SYS_setns +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif +#endif + +void nsexec() +{ + char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; + const int num = sizeof(namespaces) / sizeof(char *); + char buf[PATH_MAX], *val; + int child, i, tfd; + pid_t pid; + + val = getenv("_LIBCONTAINER_INITPID"); + if (val == NULL) + return; + + pid = atoi(val); + snprintf(buf, sizeof(buf), "%d", pid); + if (strcmp(val, buf)) { + fprintf(stderr, "Unable to parse _LIBCONTAINER_INITPID"); + exit(1); + } + + /* Check that the specified process exists */ + snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid); + tfd = open(buf, O_DIRECTORY | O_RDONLY); + if (tfd == -1) { + fprintf(stderr, + "nsenter: Failed to open \"%s\" with error: \"%s\"\n", + buf, strerror(errno)); + exit(1); + } + + for (i = 0; i < num; i++) { + struct stat st; + int fd; + + /* Symlinks on all namespaces exist for dead processes, but they can't be opened */ + if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) { + // Ignore nonexistent namespaces. + if (errno == ENOENT) + continue; + } + + fd = openat(tfd, namespaces[i], O_RDONLY); + if (fd == -1) { + fprintf(stderr, + "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", + buf, namespaces[i], strerror(errno)); + exit(1); + } + // Set the namespace. + if (setns(fd, 0) == -1) { + fprintf(stderr, + "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", + namespaces[i], strerror(errno)); + exit(1); + } + close(fd); + } + + child = fork(); + if (child < 0) { + fprintf(stderr, "Unable to fork: %s", strerror(errno)); + exit(1); + } + // We must fork to actually enter the PID namespace. + if (child == 0) { + // Finish executing, let the Go runtime take over. + return; + } else { + // Parent, wait for the child. + int status = 0; + if (waitpid(child, &status, 0) == -1) { + fprintf(stderr, + "nsenter: Failed to waitpid with error: \"%s\"\n", + strerror(errno)); + exit(1); + } + // Forward the child's exit code or re-send its death signal. + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + + exit(1); + } + + return; +} diff --git a/nsinit/init.go b/nsinit/init.go index 088361390..bf59345a2 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -5,6 +5,7 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer" + _ "github.com/docker/libcontainer/namespaces/nsenter" ) var ( From 195a08efbce00f1a121df96945281fa1d2a115ab Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 24 Dec 2014 11:25:00 +0300 Subject: [PATCH 032/101] new-api: set Cloneflags in namespace.Exec() This place looks more suitable. Signed-off-by: Andrey Vagin --- linux_container.go | 2 -- namespaces/exec.go | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/linux_container.go b/linux_container.go index 9189be2e2..7845f8dbe 100644 --- a/linux_container.go +++ b/linux_container.go @@ -133,8 +133,6 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces)) - err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) if err != nil { return err diff --git a/namespaces/exec.go b/namespaces/exec.go index 0822154de..1d7914a0d 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -31,9 +31,10 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con return err } defer parent.Close() - command.ExtraFiles = []*os.File{child} + command.Dir = container.RootFs + command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) if err := command.Start(); err != nil { child.Close() From 6dd7552537728a120a47f3575d1de342f1be8070 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 13 Jan 2015 00:54:00 +0300 Subject: [PATCH 033/101] new-api: implement fs and systemd cgroup managers Signed-off-by: Andrey Vagin --- cgroup_manager.go | 30 ------------------ cgroups/cgroups.go | 11 +++++++ cgroups/fs/apply_raw.go | 43 +++++++++++++++++++------ cgroups/manager/manager.go | 19 ++++++++++++ cgroups/systemd/apply_nosystemd.go | 23 ++++++++++++-- cgroups/systemd/apply_systemd.go | 50 ++++++++++++++++++++---------- linux_container.go | 5 +-- linux_container_test.go | 15 +++++++++ linux_factory.go | 6 ++-- namespaces/exec.go | 23 +++----------- 10 files changed, 144 insertions(+), 81 deletions(-) delete mode 100644 cgroup_manager.go create mode 100644 cgroups/manager/manager.go diff --git a/cgroup_manager.go b/cgroup_manager.go deleted file mode 100644 index 1bcb1bc4f..000000000 --- a/cgroup_manager.go +++ /dev/null @@ -1,30 +0,0 @@ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups" -) - -// TODO(vmarmol): Move this to cgroups and rename to Manager. -type CgroupManager interface { - GetPids() ([]int, error) - GetStats() (*cgroups.Stats, error) -} - -func NewCgroupManager() CgroupManager { - return &fsManager{} -} - -type fsManager struct { -} - -func (m *fsManager) GetPids() ([]int, error) { - // TODO(vmarmol): Implement - //return fs.GetPids(config) - panic("not implemented") -} - -func (m *fsManager) GetStats() (*cgroups.Stats, error) { - // TODO(vmarmol): Implement - //return fs.GetStats(config) - panic("not implemented") -} diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index fe3600597..0ce7cc977 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -6,6 +6,17 @@ import ( "github.com/docker/libcontainer/devices" ) +type Manager interface { + Apply(pid int) error + + GetPids() ([]int, error) + GetStats() (*Stats, error) + + RemovePaths() error + GetPaths() map[string]string + SetPaths(map[string]string) +} + type FreezerState string const ( diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 6f85793dd..687892866 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -24,6 +24,11 @@ var ( CgroupProcesses = "cgroup.procs" ) +type Manager struct { + Cgroups *cgroups.Cgroup + paths map[string]string +} + // The absolute path to the root of the cgroup hierarchies. var cgroupRoot string @@ -57,10 +62,14 @@ type data struct { pid int } -func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { - d, err := getCgroupData(c, pid) +func (m *Manager) Apply(pid int) error { + if m.Cgroups == nil { + return nil + } + + d, err := getCgroupData(m.Cgroups, pid) if err != nil { - return nil, err + return err } paths := make(map[string]string) @@ -71,7 +80,7 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { }() for name, sys := range subsystems { if err := sys.Set(d); err != nil { - return nil, err + return err } // FIXME: Apply should, ideally, be reentrant or be broken up into a separate // create and join phase so that the cgroup hierarchy for a container can be @@ -81,11 +90,25 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { if cgroups.IsNotFound(err) { continue } - return nil, err + return err } paths[name] = p } - return paths, nil + m.paths = paths + + return nil +} + +func (m *Manager) RemovePaths() error { + return cgroups.RemovePaths(m.paths) +} + +func (m *Manager) GetPaths() map[string]string { + return m.paths +} + +func (m *Manager) SetPaths(paths map[string]string) { + m.paths = paths } // Symmetrical public function to update device based cgroups. Also available @@ -101,9 +124,9 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error { return devices.Set(d) } -func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) { +func (m *Manager) GetStats() (*cgroups.Stats, error) { stats := cgroups.NewStats() - for name, path := range systemPaths { + for name, path := range m.paths { sys, ok := subsystems[name] if !ok { continue @@ -131,8 +154,8 @@ func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { return freezer.Set(d) } -func GetPids(c *cgroups.Cgroup) ([]int, error) { - d, err := getCgroupData(c, 0) +func (m *Manager) GetPids() ([]int, error) { + d, err := getCgroupData(m.Cgroups, 0) if err != nil { return nil, err } diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go new file mode 100644 index 000000000..3ee625b48 --- /dev/null +++ b/cgroups/manager/manager.go @@ -0,0 +1,19 @@ +package manager + +import ( + "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/cgroups/systemd" +) + +func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { + if systemd.UseSystemd() { + return &systemd.Manager{ + Cgroups: cgroups, + } + } + + return &fs.Manager{ + Cgroups: cgroups, + } +} diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index 4b9a2f5b7..f35eb5947 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -8,15 +8,34 @@ import ( "github.com/docker/libcontainer/cgroups" ) +type Manager struct { + Cgroups *cgroups.Cgroup +} + func UseSystemd() bool { return false } -func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { +func (m *Manager) Apply(pid int) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPids() ([]int, error) { return nil, fmt.Errorf("Systemd not supported") } -func GetPids(c *cgroups.Cgroup) ([]int, error) { +func (m *Manager) RemovePaths() error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPaths() map[string]string { + return nil +} + +func (m *Manager) SetPaths(paths map[string]string) { +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 3d8981143..96ffdebec 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -19,8 +19,9 @@ import ( "github.com/godbus/dbus" ) -type systemdCgroup struct { - cgroup *cgroups.Cgroup +type Manager struct { + Cgroups *cgroups.Cgroup + paths map[string]string } type subsystem interface { @@ -81,16 +82,14 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { +func (m *Manager) Apply(pid int) error { var ( + c = m.Cgroups unitName = getUnitName(c) slice = "system.slice" properties []systemd.Property - res = &systemdCgroup{} ) - res.cgroup = c - if c.Slice != "" { slice = c.Slice } @@ -120,19 +119,19 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { } if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { - return nil, err + return err } if !c.AllowAllDevices { if err := joinDevices(c, pid); err != nil { - return nil, err + return err } } // -1 disables memorySwap if c.MemorySwap >= 0 && (c.Memory != 0 || c.MemorySwap > 0) { if err := joinMemory(c, pid); err != nil { - return nil, err + return err } } @@ -140,11 +139,11 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { // we need to manually join the freezer and cpuset cgroup in systemd // because it does not currently support it via the dbus api. if err := joinFreezer(c, pid); err != nil { - return nil, err + return err } if err := joinCpuset(c, pid); err != nil { - return nil, err + return err } paths := make(map[string]string) @@ -158,17 +157,32 @@ func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { "perf_event", "freezer", } { - subsystemPath, err := getSubsystemPath(res.cgroup, sysname) + subsystemPath, err := getSubsystemPath(m.Cgroups, sysname) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } - return nil, err + return err } paths[sysname] = subsystemPath } - return paths, nil + + m.paths = paths + + return nil +} + +func (m *Manager) RemovePaths() error { + return cgroups.RemovePaths(m.paths) +} + +func (m *Manager) GetPaths() map[string]string { + return m.paths +} + +func (m *Manager) SetPaths(paths map[string]string) { + m.paths = paths } func writeFile(dir, file, data string) error { @@ -229,8 +243,8 @@ func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { return nil } -func GetPids(c *cgroups.Cgroup) ([]int, error) { - path, err := getSubsystemPath(c, "cpu") +func (m *Manager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "cpu") if err != nil { return nil, err } @@ -238,6 +252,10 @@ func GetPids(c *cgroups.Cgroup) ([]int, error) { return cgroups.ReadProcsFile(path) } +func (m *Manager) GetStats() (*cgroups.Stats, error) { + panic("not implemented") +} + func getUnitName(c *cgroups.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } diff --git a/linux_container.go b/linux_container.go index 7845f8dbe..fffd7c67e 100644 --- a/linux_container.go +++ b/linux_container.go @@ -10,6 +10,7 @@ import ( "path/filepath" "syscall" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/network" @@ -21,7 +22,7 @@ type linuxContainer struct { root string config *configs.Config state *configs.State - cgroupManager CgroupManager + cgroupManager cgroups.Manager initArgs []string } @@ -133,7 +134,7 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state) + err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.cgroupManager, c.state) if err != nil { return err } diff --git a/linux_container_test.go b/linux_container_test.go index 64d4fb8bc..a3c6e3069 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -22,6 +22,21 @@ func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { return m.stats, nil } +func (m *mockCgroupManager) Apply(pid int) error { + return nil +} + +func (m *mockCgroupManager) RemovePaths() error { + return nil +} + +func (m *mockCgroupManager) GetPaths() map[string]string { + return nil +} + +func (m *mockCgroupManager) SetPaths(map[string]string) { +} + func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", diff --git a/linux_factory.go b/linux_factory.go index 10e464ec2..a83116686 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -11,6 +11,7 @@ import ( "github.com/golang/glog" + cgroups "github.com/docker/libcontainer/cgroups/manager" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/namespaces" ) @@ -88,7 +89,7 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err return nil, newGenericError(err, SystemError) } - cgroupManager := NewCgroupManager() + cgroupManager := cgroups.NewCgroupManager(config.Cgroups) return &linuxContainer{ id: id, root: containerRoot, @@ -116,7 +117,8 @@ func (l *linuxFactory) Load(id string) (Container, error) { return nil, err } - cgroupManager := NewCgroupManager() + cgroupManager := cgroups.NewCgroupManager(config.Cgroups) + cgroupManager.SetPaths(state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ id: id, diff --git a/namespaces/exec.go b/namespaces/exec.go index 1d7914a0d..ff20c414b 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -10,8 +10,6 @@ import ( "syscall" "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" @@ -21,7 +19,7 @@ import ( // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(args []string, env []string, command *exec.Cmd, container *configs.Config, state *configs.State) error { +func Exec(args []string, env []string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) error { var err error // create a pipe so that we can syncronize with the namespaced process and @@ -70,11 +68,11 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con // Do this before syncing with child so that no children // can escape the cgroup - cgroupPaths, err := SetupCgroups(container, command.Process.Pid) + err = cgroupManager.Apply(command.Process.Pid) if err != nil { return terminate(err) } - defer cgroups.RemovePaths(cgroupPaths) + defer cgroupManager.RemovePaths() var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { @@ -102,7 +100,7 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con state.InitPid = command.Process.Pid state.InitStartTime = started state.NetworkState = networkState - state.CgroupPaths = cgroupPaths + state.CgroupPaths = cgroupManager.GetPaths() return nil } @@ -140,19 +138,6 @@ func DefaultCreateCommand(container *configs.Config, console, dataPath, init str return command } -// SetupCgroups applies the cgroup restrictions to the process running in the container based -// on the container's configuration -func SetupCgroups(container *configs.Config, nspid int) (map[string]string, error) { - if container.Cgroups != nil { - c := container.Cgroups - if systemd.UseSystemd() { - return systemd.Apply(c, nspid) - } - return fs.Apply(c, nspid) - } - return map[string]string{}, nil -} - // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { From 59e66b818db8695204e576d1962ab07806e8b2ba Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 13 Jan 2015 14:49:46 +0300 Subject: [PATCH 034/101] nsinit: add getContainer() Signed-off-by: Andrey Vagin --- nsinit/exec.go | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/nsinit/exec.go b/nsinit/exec.go index 6c98c0f3a..dc7b21082 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -28,17 +28,7 @@ var execCommand = cli.Command{ }, } -func execAction(context *cli.Context) { - var exitCode int - - process := &libcontainer.ProcessConfig{ - Args: context.Args(), - Env: context.StringSlice("env"), - Stdin: os.Stdin, - Stdout: os.Stdout, - Stderr: os.Stderr, - } - +func getContainer(context *cli.Context) (libcontainer.Container, error) { factory, err := libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) if err != nil { log.Fatal(err) @@ -55,6 +45,22 @@ func execAction(context *cli.Context) { } container, err = factory.Create(id, config) } + + return container, err +} + +func execAction(context *cli.Context) { + var exitCode int + + process := &libcontainer.ProcessConfig{ + Args: context.Args(), + Env: context.StringSlice("env"), + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + container, err := getContainer(context) if err != nil { log.Fatal(err) } From ba4257a146e9416c04f8dd09e57024e77393ae52 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 13 Jan 2015 14:52:14 +0300 Subject: [PATCH 035/101] new-api: add the Freezer method to cgroup.Manager Signed-off-by: Andrey Vagin --- cgroups/cgroups.go | 2 ++ cgroups/fs/apply_raw.go | 6 +++--- cgroups/systemd/apply_nosystemd.go | 4 ++++ cgroups/systemd/apply_systemd.go | 4 ++-- linux_container_test.go | 4 ++++ nsinit/pause.go | 24 ++++++++---------------- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 0ce7cc977..e23eb146f 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -12,6 +12,8 @@ type Manager interface { GetPids() ([]int, error) GetStats() (*Stats, error) + Freeze(state FreezerState) error + RemovePaths() error GetPaths() map[string]string SetPaths(map[string]string) diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 687892866..57246c71e 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -141,13 +141,13 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { // Freeze toggles the container's freezer cgroup depending on the state // provided -func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { - d, err := getCgroupData(c, 0) +func (m *Manager) Freeze(state cgroups.FreezerState) error { + d, err := getCgroupData(m.Cgroups, 0) if err != nil { return err } - c.Freezer = state + m.Cgroups.Freezer = state freezer := subsystems["freezer"] diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index f35eb5947..ef7856e94 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -39,6 +39,10 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } +func (m *Manager) Freeze(state cgroups.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + func ApplyDevices(c *cgroups.Cgroup, pid int) error { return fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 96ffdebec..3d1fc73b4 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -221,8 +221,8 @@ func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } -func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { - path, err := getSubsystemPath(c, "freezer") +func (m *Manager) Freeze(state cgroups.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err } diff --git a/linux_container_test.go b/linux_container_test.go index a3c6e3069..eb14f13b4 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -37,6 +37,10 @@ func (m *mockCgroupManager) GetPaths() map[string]string { func (m *mockCgroupManager) SetPaths(map[string]string) { } +func (m *mockCgroupManager) Freeze(state cgroups.FreezerState) error { + return nil +} + func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", diff --git a/nsinit/pause.go b/nsinit/pause.go index ada24250c..6ba95cd1b 100644 --- a/nsinit/pause.go +++ b/nsinit/pause.go @@ -4,9 +4,6 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" ) var pauseCommand = cli.Command{ @@ -22,28 +19,23 @@ var unpauseCommand = cli.Command{ } func pauseAction(context *cli.Context) { - if err := toggle(cgroups.Frozen); err != nil { + container, err := getContainer(context) + if err != nil { log.Fatal(err) } -} -func unpauseAction(context *cli.Context) { - if err := toggle(cgroups.Thawed); err != nil { + if err = container.Pause(); err != nil { log.Fatal(err) } } -func toggle(state cgroups.FreezerState) error { - container, err := loadConfig() +func unpauseAction(context *cli.Context) { + container, err := getContainer(context) if err != nil { - return err + log.Fatal(err) } - if systemd.UseSystemd() { - err = systemd.Freeze(container.Cgroups, state) - } else { - err = fs.Freeze(container.Cgroups, state) + if err = container.Resume(); err != nil { + log.Fatal(err) } - - return err } From ee6e585e219d813c8acaec833ef04ea50fb1f733 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 14 Jan 2015 18:47:26 +0300 Subject: [PATCH 036/101] cgroups: replace SetPaths on LoadCgroupManager Signed-off-by: Andrey Vagin --- cgroups/cgroups.go | 1 - cgroups/fs/apply_raw.go | 14 +++++--------- cgroups/manager/manager.go | 14 ++++++++++++++ cgroups/systemd/apply_nosystemd.go | 4 +--- cgroups/systemd/apply_systemd.go | 12 ++++-------- linux_container_test.go | 3 --- linux_factory.go | 3 +-- 7 files changed, 25 insertions(+), 26 deletions(-) diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index e23eb146f..973f436a5 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -16,7 +16,6 @@ type Manager interface { RemovePaths() error GetPaths() map[string]string - SetPaths(map[string]string) } type FreezerState string diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 57246c71e..79b241705 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -26,7 +26,7 @@ var ( type Manager struct { Cgroups *cgroups.Cgroup - paths map[string]string + Paths map[string]string } // The absolute path to the root of the cgroup hierarchies. @@ -94,21 +94,17 @@ func (m *Manager) Apply(pid int) error { } paths[name] = p } - m.paths = paths + m.Paths = paths return nil } func (m *Manager) RemovePaths() error { - return cgroups.RemovePaths(m.paths) + return cgroups.RemovePaths(m.Paths) } func (m *Manager) GetPaths() map[string]string { - return m.paths -} - -func (m *Manager) SetPaths(paths map[string]string) { - m.paths = paths + return m.Paths } // Symmetrical public function to update device based cgroups. Also available @@ -126,7 +122,7 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error { func (m *Manager) GetStats() (*cgroups.Stats, error) { stats := cgroups.NewStats() - for name, path := range m.paths { + for name, path := range m.Paths { sys, ok := subsystems[name] if !ok { continue diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index 3ee625b48..2ab04c39f 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -17,3 +17,17 @@ func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { Cgroups: cgroups, } } + +func LoadCgroupManager(cgroups *cgroups.Cgroup, paths map[string]string) cgroups.Manager { + if systemd.UseSystemd() { + return &systemd.Manager{ + Cgroups: cgroups, + Paths: paths, + } + } + + return &fs.Manager{ + Cgroups: cgroups, + Paths: paths, + } +} diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index ef7856e94..6bd30d484 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -10,6 +10,7 @@ import ( type Manager struct { Cgroups *cgroups.Cgroup + Paths map[string]string } func UseSystemd() bool { @@ -32,9 +33,6 @@ func (m *Manager) GetPaths() map[string]string { return nil } -func (m *Manager) SetPaths(paths map[string]string) { -} - func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 3d1fc73b4..a8aa02489 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -21,7 +21,7 @@ import ( type Manager struct { Cgroups *cgroups.Cgroup - paths map[string]string + Paths map[string]string } type subsystem interface { @@ -168,21 +168,17 @@ func (m *Manager) Apply(pid int) error { paths[sysname] = subsystemPath } - m.paths = paths + m.Paths = paths return nil } func (m *Manager) RemovePaths() error { - return cgroups.RemovePaths(m.paths) + return cgroups.RemovePaths(m.Paths) } func (m *Manager) GetPaths() map[string]string { - return m.paths -} - -func (m *Manager) SetPaths(paths map[string]string) { - m.paths = paths + return m.Paths } func writeFile(dir, file, data string) error { diff --git a/linux_container_test.go b/linux_container_test.go index eb14f13b4..e2f0fb80f 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -34,9 +34,6 @@ func (m *mockCgroupManager) GetPaths() map[string]string { return nil } -func (m *mockCgroupManager) SetPaths(map[string]string) { -} - func (m *mockCgroupManager) Freeze(state cgroups.FreezerState) error { return nil } diff --git a/linux_factory.go b/linux_factory.go index a83116686..542331d44 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -117,8 +117,7 @@ func (l *linuxFactory) Load(id string) (Container, error) { return nil, err } - cgroupManager := cgroups.NewCgroupManager(config.Cgroups) - cgroupManager.SetPaths(state.CgroupPaths) + cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ id: id, From 083d91f8c361a6804bac3d74e01fd74bea517260 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 14 Jan 2015 18:23:42 +0300 Subject: [PATCH 037/101] cgroups: Add comments for methods of cgroup managers Signed-off-by: Andrey Vagin --- cgroups/cgroups.go | 17 ++++++++++++++++- cgroups/fs/apply_raw.go | 2 +- cgroups/manager/manager.go | 2 ++ cgroups/systemd/apply_nosystemd.go | 2 +- cgroups/systemd/apply_systemd.go | 2 +- linux_container_test.go | 2 +- namespaces/exec.go | 2 +- 7 files changed, 23 insertions(+), 6 deletions(-) diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 973f436a5..894c8125f 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -7,14 +7,29 @@ import ( ) type Manager interface { + // Apply cgroup configuration to the process with the specified pid Apply(pid int) error + // Returns the PIDs inside the cgroup set GetPids() ([]int, error) + + // Returns statistics for the cgroup set GetStats() (*Stats, error) + // Toggles the freezer cgroup according with specified state Freeze(state FreezerState) error - RemovePaths() error + // Destroys the cgroup set + Destroy() error + + // NewCgroupManager() and LoadCgroupManager() require following attributes: + // Paths map[string]string + // Cgroups *cgroups.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. GetPaths() map[string]string } diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 79b241705..59beb7ed8 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -99,7 +99,7 @@ func (m *Manager) Apply(pid int) error { return nil } -func (m *Manager) RemovePaths() error { +func (m *Manager) Destroy() error { return cgroups.RemovePaths(m.Paths) } diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index 2ab04c39f..873968136 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -6,6 +6,7 @@ import ( "github.com/docker/libcontainer/cgroups/systemd" ) +// Create a new cgroup manager with specified configuration func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ @@ -18,6 +19,7 @@ func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { } } +// Restore a cgroup manager with specified configuration and state func LoadCgroupManager(cgroups *cgroups.Cgroup, paths map[string]string) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index 6bd30d484..62928b822 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -25,7 +25,7 @@ func (m *Manager) GetPids() ([]int, error) { return nil, fmt.Errorf("Systemd not supported") } -func (m *Manager) RemovePaths() error { +func (m *Manager) Destroy() error { return fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index a8aa02489..854d7f3b7 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -173,7 +173,7 @@ func (m *Manager) Apply(pid int) error { return nil } -func (m *Manager) RemovePaths() error { +func (m *Manager) Destroy() error { return cgroups.RemovePaths(m.Paths) } diff --git a/linux_container_test.go b/linux_container_test.go index e2f0fb80f..17bd8b9ab 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -26,7 +26,7 @@ func (m *mockCgroupManager) Apply(pid int) error { return nil } -func (m *mockCgroupManager) RemovePaths() error { +func (m *mockCgroupManager) Destroy() error { return nil } diff --git a/namespaces/exec.go b/namespaces/exec.go index ff20c414b..f9526c225 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -72,7 +72,7 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con if err != nil { return terminate(err) } - defer cgroupManager.RemovePaths() + defer cgroupManager.Destroy() var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { From 4eaff5e14ea4c29bc9d37c63195c234e1b3bb407 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 14 Jan 2015 19:39:29 +0300 Subject: [PATCH 038/101] cgroups: don't change a freezer state if an operation failed Signed-off-by: Andrey Vagin --- cgroups/fs/apply_raw.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 59beb7ed8..1fe5faf3b 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -143,11 +143,15 @@ func (m *Manager) Freeze(state cgroups.FreezerState) error { return err } - m.Cgroups.Freezer = state - freezer := subsystems["freezer"] + err = freezer.Set(d) + if err != nil { + return err + } + + m.Cgroups.Freezer = state - return freezer.Set(d) + return nil } func (m *Manager) GetPids() ([]int, error) { From 6334be0ac56b2a38456d65b29326aad8dd0bcc60 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 14 Jan 2015 19:48:25 +0300 Subject: [PATCH 039/101] cgroups: add TODO before NewCgroupManager() Signed-off-by: Andrey Vagin --- cgroups/manager/manager.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index 873968136..bd5fd48ab 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -7,6 +7,9 @@ import ( ) // Create a new cgroup manager with specified configuration +// TODO this object is not really initialized until Apply() is called. +// Maybe make this to the equivalent of Apply() at some point? +// @vmarmol func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ From 085a8fbff7bcc8076595799b926b7cb382f4e891 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Wed, 14 Jan 2015 20:01:05 -0500 Subject: [PATCH 040/101] Vendors glog dependency for the api branch. Signed-off-by: Mrunal Patel (github: mrunalp) --- update-vendor.sh | 1 + vendor/src/github.com/golang/glog/LICENSE | 191 +++ vendor/src/github.com/golang/glog/README | 44 + vendor/src/github.com/golang/glog/glog.go | 1177 +++++++++++++++++ .../src/github.com/golang/glog/glog_file.go | 124 ++ .../src/github.com/golang/glog/glog_test.go | 415 ++++++ 6 files changed, 1952 insertions(+) create mode 100644 vendor/src/github.com/golang/glog/LICENSE create mode 100644 vendor/src/github.com/golang/glog/README create mode 100644 vendor/src/github.com/golang/glog/glog.go create mode 100644 vendor/src/github.com/golang/glog/glog_file.go create mode 100644 vendor/src/github.com/golang/glog/glog_test.go diff --git a/update-vendor.sh b/update-vendor.sh index df66a0a8d..b38493303 100755 --- a/update-vendor.sh +++ b/update-vendor.sh @@ -44,5 +44,6 @@ clone git github.com/codegangsta/cli 1.1.0 clone git github.com/coreos/go-systemd v2 clone git github.com/godbus/dbus v1 clone git github.com/syndtr/gocapability 3c85049eae +clone git github.com/golang/glog 44145f04b68c # intentionally not vendoring Docker itself... that'd be a circle :) diff --git a/vendor/src/github.com/golang/glog/LICENSE b/vendor/src/github.com/golang/glog/LICENSE new file mode 100644 index 000000000..37ec93a14 --- /dev/null +++ b/vendor/src/github.com/golang/glog/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/src/github.com/golang/glog/README b/vendor/src/github.com/golang/glog/README new file mode 100644 index 000000000..5f9c11485 --- /dev/null +++ b/vendor/src/github.com/golang/glog/README @@ -0,0 +1,44 @@ +glog +==== + +Leveled execution logs for Go. + +This is an efficient pure Go implementation of leveled logs in the +manner of the open source C++ package + http://code.google.com/p/google-glog + +By binding methods to booleans it is possible to use the log package +without paying the expense of evaluating the arguments to the log. +Through the -vmodule flag, the package also provides fine-grained +control over logging at the file level. + +The comment from glog.go introduces the ideas: + + Package glog implements logging analogous to the Google-internal + C++ INFO/ERROR/V setup. It provides functions Info, Warning, + Error, Fatal, plus formatting variants such as Infof. It + also provides V-style logging controlled by the -v and + -vmodule=file=2 flags. + + Basic examples: + + glog.Info("Prepare to repel boarders") + + glog.Fatalf("Initialization failed: %s", err) + + See the documentation for the V function for an explanation + of these examples: + + if glog.V(2) { + glog.Info("Starting transaction...") + } + + glog.V(2).Infoln("Processed", nItems, "elements") + + +The repository contains an open source version of the log package +used inside Google. The master copy of the source lives inside +Google, not here. The code in this repo is for export only and is not itself +under development. Feature requests will be ignored. + +Send bug reports to golang-nuts@googlegroups.com. diff --git a/vendor/src/github.com/golang/glog/glog.go b/vendor/src/github.com/golang/glog/glog.go new file mode 100644 index 000000000..3e63fffd5 --- /dev/null +++ b/vendor/src/github.com/golang/glog/glog.go @@ -0,0 +1,1177 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package glog implements logging analogous to the Google-internal C++ INFO/ERROR/V setup. +// It provides functions Info, Warning, Error, Fatal, plus formatting variants such as +// Infof. It also provides V-style logging controlled by the -v and -vmodule=file=2 flags. +// +// Basic examples: +// +// glog.Info("Prepare to repel boarders") +// +// glog.Fatalf("Initialization failed: %s", err) +// +// See the documentation for the V function for an explanation of these examples: +// +// if glog.V(2) { +// glog.Info("Starting transaction...") +// } +// +// glog.V(2).Infoln("Processed", nItems, "elements") +// +// Log output is buffered and written periodically using Flush. Programs +// should call Flush before exiting to guarantee all log output is written. +// +// By default, all log statements write to files in a temporary directory. +// This package provides several flags that modify this behavior. +// As a result, flag.Parse must be called before any logging is done. +// +// -logtostderr=false +// Logs are written to standard error instead of to files. +// -alsologtostderr=false +// Logs are written to standard error as well as to files. +// -stderrthreshold=ERROR +// Log events at or above this severity are logged to standard +// error as well as to files. +// -log_dir="" +// Log files will be written to this directory instead of the +// default temporary directory. +// +// Other flags provide aids to debugging. +// +// -log_backtrace_at="" +// When set to a file and line number holding a logging statement, +// such as +// -log_backtrace_at=gopherflakes.go:234 +// a stack trace will be written to the Info log whenever execution +// hits that statement. (Unlike with -vmodule, the ".go" must be +// present.) +// -v=0 +// Enable V-leveled logging at the specified level. +// -vmodule="" +// The syntax of the argument is a comma-separated list of pattern=N, +// where pattern is a literal file name (minus the ".go" suffix) or +// "glob" pattern and N is a V level. For instance, +// -vmodule=gopher*=3 +// sets the V level to 3 in all Go files whose names begin "gopher". +// +package glog + +import ( + "bufio" + "bytes" + "errors" + "flag" + "fmt" + "io" + stdLog "log" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" +) + +// severity identifies the sort of log: info, warning etc. It also implements +// the flag.Value interface. The -stderrthreshold flag is of type severity and +// should be modified only through the flag.Value interface. The values match +// the corresponding constants in C++. +type severity int32 // sync/atomic int32 + +// These constants identify the log levels in order of increasing severity. +// A message written to a high-severity log file is also written to each +// lower-severity log file. +const ( + infoLog severity = iota + warningLog + errorLog + fatalLog + numSeverity = 4 +) + +const severityChar = "IWEF" + +var severityName = []string{ + infoLog: "INFO", + warningLog: "WARNING", + errorLog: "ERROR", + fatalLog: "FATAL", +} + +// get returns the value of the severity. +func (s *severity) get() severity { + return severity(atomic.LoadInt32((*int32)(s))) +} + +// set sets the value of the severity. +func (s *severity) set(val severity) { + atomic.StoreInt32((*int32)(s), int32(val)) +} + +// String is part of the flag.Value interface. +func (s *severity) String() string { + return strconv.FormatInt(int64(*s), 10) +} + +// Get is part of the flag.Value interface. +func (s *severity) Get() interface{} { + return *s +} + +// Set is part of the flag.Value interface. +func (s *severity) Set(value string) error { + var threshold severity + // Is it a known name? + if v, ok := severityByName(value); ok { + threshold = v + } else { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + threshold = severity(v) + } + logging.stderrThreshold.set(threshold) + return nil +} + +func severityByName(s string) (severity, bool) { + s = strings.ToUpper(s) + for i, name := range severityName { + if name == s { + return severity(i), true + } + } + return 0, false +} + +// OutputStats tracks the number of output lines and bytes written. +type OutputStats struct { + lines int64 + bytes int64 +} + +// Lines returns the number of lines written. +func (s *OutputStats) Lines() int64 { + return atomic.LoadInt64(&s.lines) +} + +// Bytes returns the number of bytes written. +func (s *OutputStats) Bytes() int64 { + return atomic.LoadInt64(&s.bytes) +} + +// Stats tracks the number of lines of output and number of bytes +// per severity level. Values must be read with atomic.LoadInt64. +var Stats struct { + Info, Warning, Error OutputStats +} + +var severityStats = [numSeverity]*OutputStats{ + infoLog: &Stats.Info, + warningLog: &Stats.Warning, + errorLog: &Stats.Error, +} + +// Level is exported because it appears in the arguments to V and is +// the type of the v flag, which can be set programmatically. +// It's a distinct type because we want to discriminate it from logType. +// Variables of type level are only changed under logging.mu. +// The -v flag is read only with atomic ops, so the state of the logging +// module is consistent. + +// Level is treated as a sync/atomic int32. + +// Level specifies a level of verbosity for V logs. *Level implements +// flag.Value; the -v flag is of type Level and should be modified +// only through the flag.Value interface. +type Level int32 + +// get returns the value of the Level. +func (l *Level) get() Level { + return Level(atomic.LoadInt32((*int32)(l))) +} + +// set sets the value of the Level. +func (l *Level) set(val Level) { + atomic.StoreInt32((*int32)(l), int32(val)) +} + +// String is part of the flag.Value interface. +func (l *Level) String() string { + return strconv.FormatInt(int64(*l), 10) +} + +// Get is part of the flag.Value interface. +func (l *Level) Get() interface{} { + return *l +} + +// Set is part of the flag.Value interface. +func (l *Level) Set(value string) error { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(Level(v), logging.vmodule.filter, false) + return nil +} + +// moduleSpec represents the setting of the -vmodule flag. +type moduleSpec struct { + filter []modulePat +} + +// modulePat contains a filter for the -vmodule flag. +// It holds a verbosity level and a file pattern to match. +type modulePat struct { + pattern string + literal bool // The pattern is a literal string + level Level +} + +// match reports whether the file matches the pattern. It uses a string +// comparison if the pattern contains no metacharacters. +func (m *modulePat) match(file string) bool { + if m.literal { + return file == m.pattern + } + match, _ := filepath.Match(m.pattern, file) + return match +} + +func (m *moduleSpec) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + var b bytes.Buffer + for i, f := range m.filter { + if i > 0 { + b.WriteRune(',') + } + fmt.Fprintf(&b, "%s=%d", f.pattern, f.level) + } + return b.String() +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported. +func (m *moduleSpec) Get() interface{} { + return nil +} + +var errVmoduleSyntax = errors.New("syntax error: expect comma-separated list of filename=N") + +// Syntax: -vmodule=recordio=2,file=1,gfs*=3 +func (m *moduleSpec) Set(value string) error { + var filter []modulePat + for _, pat := range strings.Split(value, ",") { + if len(pat) == 0 { + // Empty strings such as from a trailing comma can be ignored. + continue + } + patLev := strings.Split(pat, "=") + if len(patLev) != 2 || len(patLev[0]) == 0 || len(patLev[1]) == 0 { + return errVmoduleSyntax + } + pattern := patLev[0] + v, err := strconv.Atoi(patLev[1]) + if err != nil { + return errors.New("syntax error: expect comma-separated list of filename=N") + } + if v < 0 { + return errors.New("negative value for vmodule level") + } + if v == 0 { + continue // Ignore. It's harmless but no point in paying the overhead. + } + // TODO: check syntax of filter? + filter = append(filter, modulePat{pattern, isLiteral(pattern), Level(v)}) + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(logging.verbosity, filter, true) + return nil +} + +// isLiteral reports whether the pattern is a literal string, that is, has no metacharacters +// that require filepath.Match to be called to match the pattern. +func isLiteral(pattern string) bool { + return !strings.ContainsAny(pattern, `\*?[]`) +} + +// traceLocation represents the setting of the -log_backtrace_at flag. +type traceLocation struct { + file string + line int +} + +// isSet reports whether the trace location has been specified. +// logging.mu is held. +func (t *traceLocation) isSet() bool { + return t.line > 0 +} + +// match reports whether the specified file and line matches the trace location. +// The argument file name is the full path, not the basename specified in the flag. +// logging.mu is held. +func (t *traceLocation) match(file string, line int) bool { + if t.line != line { + return false + } + if i := strings.LastIndex(file, "/"); i >= 0 { + file = file[i+1:] + } + return t.file == file +} + +func (t *traceLocation) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + return fmt.Sprintf("%s:%d", t.file, t.line) +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported +func (t *traceLocation) Get() interface{} { + return nil +} + +var errTraceSyntax = errors.New("syntax error: expect file.go:234") + +// Syntax: -log_backtrace_at=gopherflakes.go:234 +// Note that unlike vmodule the file extension is included here. +func (t *traceLocation) Set(value string) error { + if value == "" { + // Unset. + t.line = 0 + t.file = "" + } + fields := strings.Split(value, ":") + if len(fields) != 2 { + return errTraceSyntax + } + file, line := fields[0], fields[1] + if !strings.Contains(file, ".") { + return errTraceSyntax + } + v, err := strconv.Atoi(line) + if err != nil { + return errTraceSyntax + } + if v <= 0 { + return errors.New("negative or zero value for level") + } + logging.mu.Lock() + defer logging.mu.Unlock() + t.line = v + t.file = file + return nil +} + +// flushSyncWriter is the interface satisfied by logging destinations. +type flushSyncWriter interface { + Flush() error + Sync() error + io.Writer +} + +func init() { + flag.BoolVar(&logging.toStderr, "logtostderr", false, "log to standard error instead of files") + flag.BoolVar(&logging.alsoToStderr, "alsologtostderr", false, "log to standard error as well as files") + flag.Var(&logging.verbosity, "v", "log level for V logs") + flag.Var(&logging.stderrThreshold, "stderrthreshold", "logs at or above this threshold go to stderr") + flag.Var(&logging.vmodule, "vmodule", "comma-separated list of pattern=N settings for file-filtered logging") + flag.Var(&logging.traceLocation, "log_backtrace_at", "when logging hits line file:N, emit a stack trace") + + // Default stderrThreshold is ERROR. + logging.stderrThreshold = errorLog + + logging.setVState(0, nil, false) + go logging.flushDaemon() +} + +// Flush flushes all pending log I/O. +func Flush() { + logging.lockAndFlushAll() +} + +// loggingT collects all the global state of the logging setup. +type loggingT struct { + // Boolean flags. Not handled atomically because the flag.Value interface + // does not let us avoid the =true, and that shorthand is necessary for + // compatibility. TODO: does this matter enough to fix? Seems unlikely. + toStderr bool // The -logtostderr flag. + alsoToStderr bool // The -alsologtostderr flag. + + // Level flag. Handled atomically. + stderrThreshold severity // The -stderrthreshold flag. + + // freeList is a list of byte buffers, maintained under freeListMu. + freeList *buffer + // freeListMu maintains the free list. It is separate from the main mutex + // so buffers can be grabbed and printed to without holding the main lock, + // for better parallelization. + freeListMu sync.Mutex + + // mu protects the remaining elements of this structure and is + // used to synchronize logging. + mu sync.Mutex + // file holds writer for each of the log types. + file [numSeverity]flushSyncWriter + // pcs is used in V to avoid an allocation when computing the caller's PC. + pcs [1]uintptr + // vmap is a cache of the V Level for each V() call site, identified by PC. + // It is wiped whenever the vmodule flag changes state. + vmap map[uintptr]Level + // filterLength stores the length of the vmodule filter chain. If greater + // than zero, it means vmodule is enabled. It may be read safely + // using sync.LoadInt32, but is only modified under mu. + filterLength int32 + // traceLocation is the state of the -log_backtrace_at flag. + traceLocation traceLocation + // These flags are modified only under lock, although verbosity may be fetched + // safely using atomic.LoadInt32. + vmodule moduleSpec // The state of the -vmodule flag. + verbosity Level // V logging level, the value of the -v flag/ +} + +// buffer holds a byte Buffer for reuse. The zero value is ready for use. +type buffer struct { + bytes.Buffer + tmp [64]byte // temporary byte array for creating headers. + next *buffer +} + +var logging loggingT + +// setVState sets a consistent state for V logging. +// l.mu is held. +func (l *loggingT) setVState(verbosity Level, filter []modulePat, setFilter bool) { + // Turn verbosity off so V will not fire while we are in transition. + logging.verbosity.set(0) + // Ditto for filter length. + atomic.StoreInt32(&logging.filterLength, 0) + + // Set the new filters and wipe the pc->Level map if the filter has changed. + if setFilter { + logging.vmodule.filter = filter + logging.vmap = make(map[uintptr]Level) + } + + // Things are consistent now, so enable filtering and verbosity. + // They are enabled in order opposite to that in V. + atomic.StoreInt32(&logging.filterLength, int32(len(filter))) + logging.verbosity.set(verbosity) +} + +// getBuffer returns a new, ready-to-use buffer. +func (l *loggingT) getBuffer() *buffer { + l.freeListMu.Lock() + b := l.freeList + if b != nil { + l.freeList = b.next + } + l.freeListMu.Unlock() + if b == nil { + b = new(buffer) + } else { + b.next = nil + b.Reset() + } + return b +} + +// putBuffer returns a buffer to the free list. +func (l *loggingT) putBuffer(b *buffer) { + if b.Len() >= 256 { + // Let big buffers die a natural death. + return + } + l.freeListMu.Lock() + b.next = l.freeList + l.freeList = b + l.freeListMu.Unlock() +} + +var timeNow = time.Now // Stubbed out for testing. + +/* +header formats a log header as defined by the C++ implementation. +It returns a buffer containing the formatted header and the user's file and line number. +The depth specifies how many stack frames above lives the source line to be identified in the log message. + +Log lines have this form: + Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg... +where the fields are defined as follows: + L A single character, representing the log level (eg 'I' for INFO) + mm The month (zero padded; ie May is '05') + dd The day (zero padded) + hh:mm:ss.uuuuuu Time in hours, minutes and fractional seconds + threadid The space-padded thread ID as returned by GetTID() + file The file name + line The line number + msg The user-supplied message +*/ +func (l *loggingT) header(s severity, depth int) (*buffer, string, int) { + _, file, line, ok := runtime.Caller(3 + depth) + if !ok { + file = "???" + line = 1 + } else { + slash := strings.LastIndex(file, "/") + if slash >= 0 { + file = file[slash+1:] + } + } + return l.formatHeader(s, file, line), file, line +} + +// formatHeader formats a log header using the provided file name and line number. +func (l *loggingT) formatHeader(s severity, file string, line int) *buffer { + now := timeNow() + if line < 0 { + line = 0 // not a real line number, but acceptable to someDigits + } + if s > fatalLog { + s = infoLog // for safety. + } + buf := l.getBuffer() + + // Avoid Fprintf, for speed. The format is so simple that we can do it quickly by hand. + // It's worth about 3X. Fprintf is hard. + _, month, day := now.Date() + hour, minute, second := now.Clock() + // Lmmdd hh:mm:ss.uuuuuu threadid file:line] + buf.tmp[0] = severityChar[s] + buf.twoDigits(1, int(month)) + buf.twoDigits(3, day) + buf.tmp[5] = ' ' + buf.twoDigits(6, hour) + buf.tmp[8] = ':' + buf.twoDigits(9, minute) + buf.tmp[11] = ':' + buf.twoDigits(12, second) + buf.tmp[14] = '.' + buf.nDigits(6, 15, now.Nanosecond()/1000, '0') + buf.tmp[21] = ' ' + buf.nDigits(7, 22, pid, ' ') // TODO: should be TID + buf.tmp[29] = ' ' + buf.Write(buf.tmp[:30]) + buf.WriteString(file) + buf.tmp[0] = ':' + n := buf.someDigits(1, line) + buf.tmp[n+1] = ']' + buf.tmp[n+2] = ' ' + buf.Write(buf.tmp[:n+3]) + return buf +} + +// Some custom tiny helper functions to print the log header efficiently. + +const digits = "0123456789" + +// twoDigits formats a zero-prefixed two-digit integer at buf.tmp[i]. +func (buf *buffer) twoDigits(i, d int) { + buf.tmp[i+1] = digits[d%10] + d /= 10 + buf.tmp[i] = digits[d%10] +} + +// nDigits formats an n-digit integer at buf.tmp[i], +// padding with pad on the left. +// It assumes d >= 0. +func (buf *buffer) nDigits(n, i, d int, pad byte) { + j := n - 1 + for ; j >= 0 && d > 0; j-- { + buf.tmp[i+j] = digits[d%10] + d /= 10 + } + for ; j >= 0; j-- { + buf.tmp[i+j] = pad + } +} + +// someDigits formats a zero-prefixed variable-width integer at buf.tmp[i]. +func (buf *buffer) someDigits(i, d int) int { + // Print into the top, then copy down. We know there's space for at least + // a 10-digit number. + j := len(buf.tmp) + for { + j-- + buf.tmp[j] = digits[d%10] + d /= 10 + if d == 0 { + break + } + } + return copy(buf.tmp[i:], buf.tmp[j:]) +} + +func (l *loggingT) println(s severity, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintln(buf, args...) + l.output(s, buf, file, line, false) +} + +func (l *loggingT) print(s severity, args ...interface{}) { + l.printDepth(s, 1, args...) +} + +func (l *loggingT) printDepth(s severity, depth int, args ...interface{}) { + buf, file, line := l.header(s, depth) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +func (l *loggingT) printf(s severity, format string, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintf(buf, format, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +// printWithFileLine behaves like print but uses the provided file and line number. If +// alsoLogToStderr is true, the log message always appears on standard error; it +// will also appear in the log file unless --logtostderr is set. +func (l *loggingT) printWithFileLine(s severity, file string, line int, alsoToStderr bool, args ...interface{}) { + buf := l.formatHeader(s, file, line) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, alsoToStderr) +} + +// output writes the data to the log files and releases the buffer. +func (l *loggingT) output(s severity, buf *buffer, file string, line int, alsoToStderr bool) { + l.mu.Lock() + if l.traceLocation.isSet() { + if l.traceLocation.match(file, line) { + buf.Write(stacks(false)) + } + } + data := buf.Bytes() + if l.toStderr { + os.Stderr.Write(data) + } else { + if alsoToStderr || l.alsoToStderr || s >= l.stderrThreshold.get() { + os.Stderr.Write(data) + } + if l.file[s] == nil { + if err := l.createFiles(s); err != nil { + os.Stderr.Write(data) // Make sure the message appears somewhere. + l.exit(err) + } + } + switch s { + case fatalLog: + l.file[fatalLog].Write(data) + fallthrough + case errorLog: + l.file[errorLog].Write(data) + fallthrough + case warningLog: + l.file[warningLog].Write(data) + fallthrough + case infoLog: + l.file[infoLog].Write(data) + } + } + if s == fatalLog { + // If we got here via Exit rather than Fatal, print no stacks. + if atomic.LoadUint32(&fatalNoStacks) > 0 { + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(1) + } + // Dump all goroutine stacks before exiting. + // First, make sure we see the trace for the current goroutine on standard error. + // If -logtostderr has been specified, the loop below will do that anyway + // as the first stack in the full dump. + if !l.toStderr { + os.Stderr.Write(stacks(false)) + } + // Write the stack trace for all goroutines to the files. + trace := stacks(true) + logExitFunc = func(error) {} // If we get a write error, we'll still exit below. + for log := fatalLog; log >= infoLog; log-- { + if f := l.file[log]; f != nil { // Can be nil if -logtostderr is set. + f.Write(trace) + } + } + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway. + } + l.putBuffer(buf) + l.mu.Unlock() + if stats := severityStats[s]; stats != nil { + atomic.AddInt64(&stats.lines, 1) + atomic.AddInt64(&stats.bytes, int64(len(data))) + } +} + +// timeoutFlush calls Flush and returns when it completes or after timeout +// elapses, whichever happens first. This is needed because the hooks invoked +// by Flush may deadlock when glog.Fatal is called from a hook that holds +// a lock. +func timeoutFlush(timeout time.Duration) { + done := make(chan bool, 1) + go func() { + Flush() // calls logging.lockAndFlushAll() + done <- true + }() + select { + case <-done: + case <-time.After(timeout): + fmt.Fprintln(os.Stderr, "glog: Flush took longer than", timeout) + } +} + +// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines. +func stacks(all bool) []byte { + // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though. + n := 10000 + if all { + n = 100000 + } + var trace []byte + for i := 0; i < 5; i++ { + trace = make([]byte, n) + nbytes := runtime.Stack(trace, all) + if nbytes < len(trace) { + return trace[:nbytes] + } + n *= 2 + } + return trace +} + +// logExitFunc provides a simple mechanism to override the default behavior +// of exiting on error. Used in testing and to guarantee we reach a required exit +// for fatal logs. Instead, exit could be a function rather than a method but that +// would make its use clumsier. +var logExitFunc func(error) + +// exit is called if there is trouble creating or writing log files. +// It flushes the logs and exits the program; there's no point in hanging around. +// l.mu is held. +func (l *loggingT) exit(err error) { + fmt.Fprintf(os.Stderr, "log: exiting because of error: %s\n", err) + // If logExitFunc is set, we do that instead of exiting. + if logExitFunc != nil { + logExitFunc(err) + return + } + l.flushAll() + os.Exit(2) +} + +// syncBuffer joins a bufio.Writer to its underlying file, providing access to the +// file's Sync method and providing a wrapper for the Write method that provides log +// file rotation. There are conflicting methods, so the file cannot be embedded. +// l.mu is held for all its methods. +type syncBuffer struct { + logger *loggingT + *bufio.Writer + file *os.File + sev severity + nbytes uint64 // The number of bytes written to this file +} + +func (sb *syncBuffer) Sync() error { + return sb.file.Sync() +} + +func (sb *syncBuffer) Write(p []byte) (n int, err error) { + if sb.nbytes+uint64(len(p)) >= MaxSize { + if err := sb.rotateFile(time.Now()); err != nil { + sb.logger.exit(err) + } + } + n, err = sb.Writer.Write(p) + sb.nbytes += uint64(n) + if err != nil { + sb.logger.exit(err) + } + return +} + +// rotateFile closes the syncBuffer's file and starts a new one. +func (sb *syncBuffer) rotateFile(now time.Time) error { + if sb.file != nil { + sb.Flush() + sb.file.Close() + } + var err error + sb.file, _, err = create(severityName[sb.sev], now) + sb.nbytes = 0 + if err != nil { + return err + } + + sb.Writer = bufio.NewWriterSize(sb.file, bufferSize) + + // Write header. + var buf bytes.Buffer + fmt.Fprintf(&buf, "Log file created at: %s\n", now.Format("2006/01/02 15:04:05")) + fmt.Fprintf(&buf, "Running on machine: %s\n", host) + fmt.Fprintf(&buf, "Binary: Built with %s %s for %s/%s\n", runtime.Compiler, runtime.Version(), runtime.GOOS, runtime.GOARCH) + fmt.Fprintf(&buf, "Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg\n") + n, err := sb.file.Write(buf.Bytes()) + sb.nbytes += uint64(n) + return err +} + +// bufferSize sizes the buffer associated with each log file. It's large +// so that log records can accumulate without the logging thread blocking +// on disk I/O. The flushDaemon will block instead. +const bufferSize = 256 * 1024 + +// createFiles creates all the log files for severity from sev down to infoLog. +// l.mu is held. +func (l *loggingT) createFiles(sev severity) error { + now := time.Now() + // Files are created in decreasing severity order, so as soon as we find one + // has already been created, we can stop. + for s := sev; s >= infoLog && l.file[s] == nil; s-- { + sb := &syncBuffer{ + logger: l, + sev: s, + } + if err := sb.rotateFile(now); err != nil { + return err + } + l.file[s] = sb + } + return nil +} + +const flushInterval = 30 * time.Second + +// flushDaemon periodically flushes the log file buffers. +func (l *loggingT) flushDaemon() { + for _ = range time.NewTicker(flushInterval).C { + l.lockAndFlushAll() + } +} + +// lockAndFlushAll is like flushAll but locks l.mu first. +func (l *loggingT) lockAndFlushAll() { + l.mu.Lock() + l.flushAll() + l.mu.Unlock() +} + +// flushAll flushes all the logs and attempts to "sync" their data to disk. +// l.mu is held. +func (l *loggingT) flushAll() { + // Flush from fatal down, in case there's trouble flushing. + for s := fatalLog; s >= infoLog; s-- { + file := l.file[s] + if file != nil { + file.Flush() // ignore error + file.Sync() // ignore error + } + } +} + +// CopyStandardLogTo arranges for messages written to the Go "log" package's +// default logs to also appear in the Google logs for the named and lower +// severities. Subsequent changes to the standard log's default output location +// or format may break this behavior. +// +// Valid names are "INFO", "WARNING", "ERROR", and "FATAL". If the name is not +// recognized, CopyStandardLogTo panics. +func CopyStandardLogTo(name string) { + sev, ok := severityByName(name) + if !ok { + panic(fmt.Sprintf("log.CopyStandardLogTo(%q): unrecognized severity name", name)) + } + // Set a log format that captures the user's file and line: + // d.go:23: message + stdLog.SetFlags(stdLog.Lshortfile) + stdLog.SetOutput(logBridge(sev)) +} + +// logBridge provides the Write method that enables CopyStandardLogTo to connect +// Go's standard logs to the logs provided by this package. +type logBridge severity + +// Write parses the standard logging line and passes its components to the +// logger for severity(lb). +func (lb logBridge) Write(b []byte) (n int, err error) { + var ( + file = "???" + line = 1 + text string + ) + // Split "d.go:23: message" into "d.go", "23", and "message". + if parts := bytes.SplitN(b, []byte{':'}, 3); len(parts) != 3 || len(parts[0]) < 1 || len(parts[2]) < 1 { + text = fmt.Sprintf("bad log format: %s", b) + } else { + file = string(parts[0]) + text = string(parts[2][1:]) // skip leading space + line, err = strconv.Atoi(string(parts[1])) + if err != nil { + text = fmt.Sprintf("bad line number: %s", b) + line = 1 + } + } + // printWithFileLine with alsoToStderr=true, so standard log messages + // always appear on standard error. + logging.printWithFileLine(severity(lb), file, line, true, text) + return len(b), nil +} + +// setV computes and remembers the V level for a given PC +// when vmodule is enabled. +// File pattern matching takes the basename of the file, stripped +// of its .go suffix, and uses filepath.Match, which is a little more +// general than the *? matching used in C++. +// l.mu is held. +func (l *loggingT) setV(pc uintptr) Level { + fn := runtime.FuncForPC(pc) + file, _ := fn.FileLine(pc) + // The file is something like /a/b/c/d.go. We want just the d. + if strings.HasSuffix(file, ".go") { + file = file[:len(file)-3] + } + if slash := strings.LastIndex(file, "/"); slash >= 0 { + file = file[slash+1:] + } + for _, filter := range l.vmodule.filter { + if filter.match(file) { + l.vmap[pc] = filter.level + return filter.level + } + } + l.vmap[pc] = 0 + return 0 +} + +// Verbose is a boolean type that implements Infof (like Printf) etc. +// See the documentation of V for more information. +type Verbose bool + +// V reports whether verbosity at the call site is at least the requested level. +// The returned value is a boolean of type Verbose, which implements Info, Infoln +// and Infof. These methods will write to the Info log if called. +// Thus, one may write either +// if glog.V(2) { glog.Info("log this") } +// or +// glog.V(2).Info("log this") +// The second form is shorter but the first is cheaper if logging is off because it does +// not evaluate its arguments. +// +// Whether an individual call to V generates a log record depends on the setting of +// the -v and --vmodule flags; both are off by default. If the level in the call to +// V is at least the value of -v, or of -vmodule for the source file containing the +// call, the V call will log. +func V(level Level) Verbose { + // This function tries hard to be cheap unless there's work to do. + // The fast path is two atomic loads and compares. + + // Here is a cheap but safe test to see if V logging is enabled globally. + if logging.verbosity.get() >= level { + return Verbose(true) + } + + // It's off globally but it vmodule may still be set. + // Here is another cheap but safe test to see if vmodule is enabled. + if atomic.LoadInt32(&logging.filterLength) > 0 { + // Now we need a proper lock to use the logging structure. The pcs field + // is shared so we must lock before accessing it. This is fairly expensive, + // but if V logging is enabled we're slow anyway. + logging.mu.Lock() + defer logging.mu.Unlock() + if runtime.Callers(2, logging.pcs[:]) == 0 { + return Verbose(false) + } + v, ok := logging.vmap[logging.pcs[0]] + if !ok { + v = logging.setV(logging.pcs[0]) + } + return Verbose(v >= level) + } + return Verbose(false) +} + +// Info is equivalent to the global Info function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Info(args ...interface{}) { + if v { + logging.print(infoLog, args...) + } +} + +// Infoln is equivalent to the global Infoln function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infoln(args ...interface{}) { + if v { + logging.println(infoLog, args...) + } +} + +// Infof is equivalent to the global Infof function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infof(format string, args ...interface{}) { + if v { + logging.printf(infoLog, format, args...) + } +} + +// Info logs to the INFO log. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Info(args ...interface{}) { + logging.print(infoLog, args...) +} + +// InfoDepth acts as Info but uses depth to determine which call frame to log. +// InfoDepth(0, "msg") is the same as Info("msg"). +func InfoDepth(depth int, args ...interface{}) { + logging.printDepth(infoLog, depth, args...) +} + +// Infoln logs to the INFO log. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Infoln(args ...interface{}) { + logging.println(infoLog, args...) +} + +// Infof logs to the INFO log. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Infof(format string, args ...interface{}) { + logging.printf(infoLog, format, args...) +} + +// Warning logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Warning(args ...interface{}) { + logging.print(warningLog, args...) +} + +// WarningDepth acts as Warning but uses depth to determine which call frame to log. +// WarningDepth(0, "msg") is the same as Warning("msg"). +func WarningDepth(depth int, args ...interface{}) { + logging.printDepth(warningLog, depth, args...) +} + +// Warningln logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Warningln(args ...interface{}) { + logging.println(warningLog, args...) +} + +// Warningf logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Warningf(format string, args ...interface{}) { + logging.printf(warningLog, format, args...) +} + +// Error logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Error(args ...interface{}) { + logging.print(errorLog, args...) +} + +// ErrorDepth acts as Error but uses depth to determine which call frame to log. +// ErrorDepth(0, "msg") is the same as Error("msg"). +func ErrorDepth(depth int, args ...interface{}) { + logging.printDepth(errorLog, depth, args...) +} + +// Errorln logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Errorln(args ...interface{}) { + logging.println(errorLog, args...) +} + +// Errorf logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Errorf(format string, args ...interface{}) { + logging.printf(errorLog, format, args...) +} + +// Fatal logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Fatal(args ...interface{}) { + logging.print(fatalLog, args...) +} + +// FatalDepth acts as Fatal but uses depth to determine which call frame to log. +// FatalDepth(0, "msg") is the same as Fatal("msg"). +func FatalDepth(depth int, args ...interface{}) { + logging.printDepth(fatalLog, depth, args...) +} + +// Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Fatalln(args ...interface{}) { + logging.println(fatalLog, args...) +} + +// Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Fatalf(format string, args ...interface{}) { + logging.printf(fatalLog, format, args...) +} + +// fatalNoStacks is non-zero if we are to exit without dumping goroutine stacks. +// It allows Exit and relatives to use the Fatal logs. +var fatalNoStacks uint32 + +// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Exit(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.print(fatalLog, args...) +} + +// ExitDepth acts as Exit but uses depth to determine which call frame to log. +// ExitDepth(0, "msg") is the same as Exit("msg"). +func ExitDepth(depth int, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printDepth(fatalLog, depth, args...) +} + +// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +func Exitln(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.println(fatalLog, args...) +} + +// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Exitf(format string, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printf(fatalLog, format, args...) +} diff --git a/vendor/src/github.com/golang/glog/glog_file.go b/vendor/src/github.com/golang/glog/glog_file.go new file mode 100644 index 000000000..65075d281 --- /dev/null +++ b/vendor/src/github.com/golang/glog/glog_file.go @@ -0,0 +1,124 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// File I/O for logs. + +package glog + +import ( + "errors" + "flag" + "fmt" + "os" + "os/user" + "path/filepath" + "strings" + "sync" + "time" +) + +// MaxSize is the maximum size of a log file in bytes. +var MaxSize uint64 = 1024 * 1024 * 1800 + +// logDirs lists the candidate directories for new log files. +var logDirs []string + +// If non-empty, overrides the choice of directory in which to write logs. +// See createLogDirs for the full list of possible destinations. +var logDir = flag.String("log_dir", "", "If non-empty, write log files in this directory") + +func createLogDirs() { + if *logDir != "" { + logDirs = append(logDirs, *logDir) + } + logDirs = append(logDirs, os.TempDir()) +} + +var ( + pid = os.Getpid() + program = filepath.Base(os.Args[0]) + host = "unknownhost" + userName = "unknownuser" +) + +func init() { + h, err := os.Hostname() + if err == nil { + host = shortHostname(h) + } + + current, err := user.Current() + if err == nil { + userName = current.Username + } + + // Sanitize userName since it may contain filepath separators on Windows. + userName = strings.Replace(userName, `\`, "_", -1) +} + +// shortHostname returns its argument, truncating at the first period. +// For instance, given "www.google.com" it returns "www". +func shortHostname(hostname string) string { + if i := strings.Index(hostname, "."); i >= 0 { + return hostname[:i] + } + return hostname +} + +// logName returns a new log file name containing tag, with start time t, and +// the name for the symlink for tag. +func logName(tag string, t time.Time) (name, link string) { + name = fmt.Sprintf("%s.%s.%s.log.%s.%04d%02d%02d-%02d%02d%02d.%d", + program, + host, + userName, + tag, + t.Year(), + t.Month(), + t.Day(), + t.Hour(), + t.Minute(), + t.Second(), + pid) + return name, program + "." + tag +} + +var onceLogDirs sync.Once + +// create creates a new log file and returns the file and its filename, which +// contains tag ("INFO", "FATAL", etc.) and t. If the file is created +// successfully, create also attempts to update the symlink for that tag, ignoring +// errors. +func create(tag string, t time.Time) (f *os.File, filename string, err error) { + onceLogDirs.Do(createLogDirs) + if len(logDirs) == 0 { + return nil, "", errors.New("log: no log dirs") + } + name, link := logName(tag, t) + var lastErr error + for _, dir := range logDirs { + fname := filepath.Join(dir, name) + f, err := os.Create(fname) + if err == nil { + symlink := filepath.Join(dir, link) + os.Remove(symlink) // ignore err + os.Symlink(name, symlink) // ignore err + return f, fname, nil + } + lastErr = err + } + return nil, "", fmt.Errorf("log: cannot create log: %v", lastErr) +} diff --git a/vendor/src/github.com/golang/glog/glog_test.go b/vendor/src/github.com/golang/glog/glog_test.go new file mode 100644 index 000000000..0fb376e1f --- /dev/null +++ b/vendor/src/github.com/golang/glog/glog_test.go @@ -0,0 +1,415 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package glog + +import ( + "bytes" + "fmt" + stdLog "log" + "path/filepath" + "runtime" + "strconv" + "strings" + "testing" + "time" +) + +// Test that shortHostname works as advertised. +func TestShortHostname(t *testing.T) { + for hostname, expect := range map[string]string{ + "": "", + "host": "host", + "host.google.com": "host", + } { + if got := shortHostname(hostname); expect != got { + t.Errorf("shortHostname(%q): expected %q, got %q", hostname, expect, got) + } + } +} + +// flushBuffer wraps a bytes.Buffer to satisfy flushSyncWriter. +type flushBuffer struct { + bytes.Buffer +} + +func (f *flushBuffer) Flush() error { + return nil +} + +func (f *flushBuffer) Sync() error { + return nil +} + +// swap sets the log writers and returns the old array. +func (l *loggingT) swap(writers [numSeverity]flushSyncWriter) (old [numSeverity]flushSyncWriter) { + l.mu.Lock() + defer l.mu.Unlock() + old = l.file + for i, w := range writers { + logging.file[i] = w + } + return +} + +// newBuffers sets the log writers to all new byte buffers and returns the old array. +func (l *loggingT) newBuffers() [numSeverity]flushSyncWriter { + return l.swap([numSeverity]flushSyncWriter{new(flushBuffer), new(flushBuffer), new(flushBuffer), new(flushBuffer)}) +} + +// contents returns the specified log value as a string. +func contents(s severity) string { + return logging.file[s].(*flushBuffer).String() +} + +// contains reports whether the string is contained in the log. +func contains(s severity, str string, t *testing.T) bool { + return strings.Contains(contents(s), str) +} + +// setFlags configures the logging flags how the test expects them. +func setFlags() { + logging.toStderr = false +} + +// Test that Info works as advertised. +func TestInfo(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +func TestInfoDepth(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + + f := func() { InfoDepth(1, "depth-test1") } + + // The next three lines must stay together + _, _, wantLine, _ := runtime.Caller(0) + InfoDepth(0, "depth-test0") + f() + + msgs := strings.Split(strings.TrimSuffix(contents(infoLog), "\n"), "\n") + if len(msgs) != 2 { + t.Fatalf("Got %d lines, expected 2", len(msgs)) + } + + for i, m := range msgs { + if !strings.HasPrefix(m, "I") { + t.Errorf("InfoDepth[%d] has wrong character: %q", i, m) + } + w := fmt.Sprintf("depth-test%d", i) + if !strings.Contains(m, w) { + t.Errorf("InfoDepth[%d] missing %q: %q", i, w, m) + } + + // pull out the line number (between : and ]) + msg := m[strings.LastIndex(m, ":")+1:] + x := strings.Index(msg, "]") + if x < 0 { + t.Errorf("InfoDepth[%d]: missing ']': %q", i, m) + continue + } + line, err := strconv.Atoi(msg[:x]) + if err != nil { + t.Errorf("InfoDepth[%d]: bad line number: %q", i, m) + continue + } + wantLine++ + if wantLine != line { + t.Errorf("InfoDepth[%d]: got line %d, want %d", i, line, wantLine) + } + } +} + +func init() { + CopyStandardLogTo("INFO") +} + +// Test that CopyStandardLogTo panics on bad input. +func TestCopyStandardLogToPanic(t *testing.T) { + defer func() { + if s, ok := recover().(string); !ok || !strings.Contains(s, "LOG") { + t.Errorf(`CopyStandardLogTo("LOG") should have panicked: %v`, s) + } + }() + CopyStandardLogTo("LOG") +} + +// Test that using the standard log package logs to INFO. +func TestStandardLog(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + stdLog.Print("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that the header has the correct format. +func TestHeader(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + defer func(previous func() time.Time) { timeNow = previous }(timeNow) + timeNow = func() time.Time { + return time.Date(2006, 1, 2, 15, 4, 5, .067890e9, time.Local) + } + pid = 1234 + Info("test") + var line int + format := "I0102 15:04:05.067890 1234 glog_test.go:%d] test\n" + n, err := fmt.Sscanf(contents(infoLog), format, &line) + if n != 1 || err != nil { + t.Errorf("log format error: %d elements, error %s:\n%s", n, err, contents(infoLog)) + } + // Scanf treats multiple spaces as equivalent to a single space, + // so check for correct space-padding also. + want := fmt.Sprintf(format, line) + if contents(infoLog) != want { + t.Errorf("log format error: got:\n\t%q\nwant:\t%q", contents(infoLog), want) + } +} + +// Test that an Error log goes to Warning and Info. +// Even in the Info log, the source character will be E, so the data should +// all be identical. +func TestError(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Error("test") + if !contains(errorLog, "E", t) { + t.Errorf("Error has wrong character: %q", contents(errorLog)) + } + if !contains(errorLog, "test", t) { + t.Error("Error failed") + } + str := contents(errorLog) + if !contains(warningLog, str, t) { + t.Error("Warning failed") + } + if !contains(infoLog, str, t) { + t.Error("Info failed") + } +} + +// Test that a Warning log goes to Info. +// Even in the Info log, the source character will be W, so the data should +// all be identical. +func TestWarning(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Warning("test") + if !contains(warningLog, "W", t) { + t.Errorf("Warning has wrong character: %q", contents(warningLog)) + } + if !contains(warningLog, "test", t) { + t.Error("Warning failed") + } + str := contents(warningLog) + if !contains(infoLog, str, t) { + t.Error("Info failed") + } +} + +// Test that a V log goes to Info. +func TestV(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.verbosity.Set("2") + defer logging.verbosity.Set("0") + V(2).Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that a vmodule enables a log in this file. +func TestVmoduleOn(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.vmodule.Set("glog_test=2") + defer logging.vmodule.Set("") + if !V(1) { + t.Error("V not enabled for 1") + } + if !V(2) { + t.Error("V not enabled for 2") + } + if V(3) { + t.Error("V enabled for 3") + } + V(2).Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that a vmodule of another file does not enable a log in this file. +func TestVmoduleOff(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.vmodule.Set("notthisfile=2") + defer logging.vmodule.Set("") + for i := 1; i <= 3; i++ { + if V(Level(i)) { + t.Errorf("V enabled for %d", i) + } + } + V(2).Info("test") + if contents(infoLog) != "" { + t.Error("V logged incorrectly") + } +} + +// vGlobs are patterns that match/don't match this file at V=2. +var vGlobs = map[string]bool{ + // Easy to test the numeric match here. + "glog_test=1": false, // If -vmodule sets V to 1, V(2) will fail. + "glog_test=2": true, + "glog_test=3": true, // If -vmodule sets V to 1, V(3) will succeed. + // These all use 2 and check the patterns. All are true. + "*=2": true, + "?l*=2": true, + "????_*=2": true, + "??[mno]?_*t=2": true, + // These all use 2 and check the patterns. All are false. + "*x=2": false, + "m*=2": false, + "??_*=2": false, + "?[abc]?_*t=2": false, +} + +// Test that vmodule globbing works as advertised. +func testVmoduleGlob(pat string, match bool, t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + defer logging.vmodule.Set("") + logging.vmodule.Set(pat) + if V(2) != Verbose(match) { + t.Errorf("incorrect match for %q: got %t expected %t", pat, V(2), match) + } +} + +// Test that a vmodule globbing works as advertised. +func TestVmoduleGlob(t *testing.T) { + for glob, match := range vGlobs { + testVmoduleGlob(glob, match, t) + } +} + +func TestRollover(t *testing.T) { + setFlags() + var err error + defer func(previous func(error)) { logExitFunc = previous }(logExitFunc) + logExitFunc = func(e error) { + err = e + } + defer func(previous uint64) { MaxSize = previous }(MaxSize) + MaxSize = 512 + + Info("x") // Be sure we have a file. + info, ok := logging.file[infoLog].(*syncBuffer) + if !ok { + t.Fatal("info wasn't created") + } + if err != nil { + t.Fatalf("info has initial error: %v", err) + } + fname0 := info.file.Name() + Info(strings.Repeat("x", int(MaxSize))) // force a rollover + if err != nil { + t.Fatalf("info has error after big write: %v", err) + } + + // Make sure the next log file gets a file name with a different + // time stamp. + // + // TODO: determine whether we need to support subsecond log + // rotation. C++ does not appear to handle this case (nor does it + // handle Daylight Savings Time properly). + time.Sleep(1 * time.Second) + + Info("x") // create a new file + if err != nil { + t.Fatalf("error after rotation: %v", err) + } + fname1 := info.file.Name() + if fname0 == fname1 { + t.Errorf("info.f.Name did not change: %v", fname0) + } + if info.nbytes >= MaxSize { + t.Errorf("file size was not reset: %d", info.nbytes) + } +} + +func TestLogBacktraceAt(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + // The peculiar style of this code simplifies line counting and maintenance of the + // tracing block below. + var infoLine string + setTraceLocation := func(file string, line int, ok bool, delta int) { + if !ok { + t.Fatal("could not get file:line") + } + _, file = filepath.Split(file) + infoLine = fmt.Sprintf("%s:%d", file, line+delta) + err := logging.traceLocation.Set(infoLine) + if err != nil { + t.Fatal("error setting log_backtrace_at: ", err) + } + } + { + // Start of tracing block. These lines know about each other's relative position. + _, file, line, ok := runtime.Caller(0) + setTraceLocation(file, line, ok, +2) // Two lines between Caller and Info calls. + Info("we want a stack trace here") + } + numAppearances := strings.Count(contents(infoLog), infoLine) + if numAppearances < 2 { + // Need 2 appearances, one in the log header and one in the trace: + // log_test.go:281: I0511 16:36:06.952398 02238 log_test.go:280] we want a stack trace here + // ... + // github.com/glog/glog_test.go:280 (0x41ba91) + // ... + // We could be more precise but that would require knowing the details + // of the traceback format, which may not be dependable. + t.Fatal("got no trace back; log is ", contents(infoLog)) + } +} + +func BenchmarkHeader(b *testing.B) { + for i := 0; i < b.N; i++ { + buf, _, _ := logging.header(infoLog, 0) + logging.putBuffer(buf) + } +} From 76d395eff2c69074a3d47389bbde8a20dd2725d1 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 26 Dec 2014 15:42:02 +0300 Subject: [PATCH 041/101] new-api: add Console to ProcessConfig Add ability to execute a process with a specified terminal. Signed-off-by: Andrey Vagin --- linux_container.go | 4 ++-- namespaces/exec.go | 7 ++++--- namespaces/execin.go | 7 ++++--- process.go | 3 +++ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/linux_container.go b/linux_container.go index fffd7c67e..a5eb9a988 100644 --- a/linux_container.go +++ b/linux_container.go @@ -100,7 +100,7 @@ func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { if state != configs.Destroyed { glog.Info("start new container process") - return namespaces.ExecIn(config.Args, config.Env, cmd, c.config, c.state) + return namespaces.ExecIn(config.Args, config.Env, config.Console, cmd, c.config, c.state) } if err := c.startInitProcess(cmd, config); err != nil { @@ -134,7 +134,7 @@ func (c *linuxContainer) updateStateFile() error { } func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.cgroupManager, c.state) + err := namespaces.Exec(config.Args, config.Env, config.Console, cmd, c.config, c.cgroupManager, c.state) if err != nil { return err } diff --git a/namespaces/exec.go b/namespaces/exec.go index f9526c225..0fd31be03 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -19,7 +19,7 @@ import ( // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(args []string, env []string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) error { +func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) error { var err error // create a pipe so that we can syncronize with the namespaced process and @@ -54,8 +54,9 @@ func Exec(args []string, env []string, command *exec.Cmd, container *configs.Con } process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, + Env: append(env[0:], container.Env...), + Args: args, + ConsolePath: console, } if err := encoder.Encode(process); err != nil { return terminate(err) diff --git a/namespaces/execin.go b/namespaces/execin.go index 5d2708ace..9b5b42c0f 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -18,7 +18,7 @@ import ( // ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(args []string, env []string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { +func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { var err error parent, child, err := newInitPipe() @@ -50,8 +50,9 @@ func ExecIn(args []string, env []string, cmd *exec.Cmd, container *configs.Confi } process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, + Env: append(env[0:], container.Env...), + Args: args, + ConsolePath: console, } if err := encoder.Encode(process); err != nil { return terminate(err) diff --git a/process.go b/process.go index 924de2ecf..cd72b129c 100644 --- a/process.go +++ b/process.go @@ -21,4 +21,7 @@ type ProcessConfig struct { Stdin io.Reader Stdout io.Writer Stderr io.Writer + + // Console is the path to the pty slave for use by the master + Console string } From 46e62c92041e133cd2278d5312f9245c026ca47e Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 26 Dec 2014 16:13:10 +0300 Subject: [PATCH 042/101] nsinit: return console Signed-off-by: Andrey Vagin --- nsinit/exec.go | 81 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/nsinit/exec.go b/nsinit/exec.go index dc7b21082..525991d3c 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -3,13 +3,16 @@ package main import ( "crypto/md5" "fmt" + "io" "log" "os" "syscall" "github.com/codegangsta/cli" + "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" + consolepkg "github.com/docker/libcontainer/console" ) var ( @@ -50,21 +53,55 @@ func getContainer(context *cli.Context) (libcontainer.Container, error) { } func execAction(context *cli.Context) { - var exitCode int + var ( + master *os.File + console string + err error - process := &libcontainer.ProcessConfig{ - Args: context.Args(), - Env: context.StringSlice("env"), - Stdin: os.Stdin, - Stdout: os.Stdout, - Stderr: os.Stderr, - } + sigc = make(chan os.Signal, 10) + + stdin = os.Stdin + stdout = os.Stdout + stderr = os.Stderr + + exitCode int + ) container, err := getContainer(context) if err != nil { log.Fatal(err) } + if container.Config().Tty { + stdin = nil + stdout = nil + stderr = nil + + master, console, err = consolepkg.CreateMasterAndConsole() + if err != nil { + log.Fatal(err) + } + + go io.Copy(master, os.Stdin) + go io.Copy(os.Stdout, master) + + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + log.Fatal(err) + } + + defer term.RestoreTerminal(os.Stdin.Fd(), state) + } + + process := &libcontainer.ProcessConfig{ + Args: context.Args(), + Env: context.StringSlice("env"), + Stdin: stdin, + Stdout: stdout, + Stderr: stderr, + Console: console, + } + pid, err := container.StartProcess(process) if err != nil { log.Fatalf("failed to exec: %s", err) @@ -75,6 +112,19 @@ func execAction(context *cli.Context) { log.Fatalf("Unable to find the %d process: %s", pid, err) } + go func() { + resizeTty(master) + + for sig := range sigc { + switch sig { + case syscall.SIGWINCH: + resizeTty(master) + default: + p.Signal(sig) + } + } + }() + ps, err := p.Wait() if err != nil { log.Fatalf("Unable to wait the %d process: %s", pid, err) @@ -92,3 +142,18 @@ func execAction(context *cli.Context) { os.Exit(exitCode) } + +func resizeTty(master *os.File) { + if master == nil { + return + } + + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return + } + + if err := term.SetWinsize(master.Fd(), ws); err != nil { + return + } +} From 7b33e53e4ade5c39d8962dd72ce2426eb04ec310 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Thu, 25 Dec 2014 18:43:05 +0300 Subject: [PATCH 043/101] integration: check that a process can be executed in an existing CT Signed-off-by: Andrey Vagin --- integration/exec_test.go | 112 +++++++++++++++++++++++++++++++++++++++ integration/init_test.go | 9 +++- 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index 993ca25ce..36213bcaf 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -1,10 +1,13 @@ package integration import ( + "bytes" + "io/ioutil" "os" "strings" "testing" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) @@ -188,3 +191,112 @@ func getNamespaceIndex(config *configs.Config, name string) int { } return -1 } + +func newTestRoot() (string, error) { + dir, err := ioutil.TempDir("", "libcontainer") + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", err + } + return dir, nil +} + +func TestEnter(t *testing.T) { + root, err := newTestRoot() + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(root) + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + factory, err := libcontainer.New(root, []string{os.Args[0], "init", "--"}) + if err != nil { + t.Fatal(err) + } + + container, err := factory.Create("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + + var stdout, stdout2 bytes.Buffer + + pconfig := libcontainer.ProcessConfig{ + Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, + Stdin: stdinR, + Stdout: &stdout, + } + pid, err := container.StartProcess(&pconfig) + stdinR.Close() + defer stdinW.Close() + if err != nil { + t.Fatal(err) + } + + process, err := os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + + pconfig.Args = []string{"readlink", "/proc/self/ns/pid"} + pconfig.Stdin = nil + pconfig.Stdout = &stdout2 + + pid2, err := container.StartProcess(&pconfig) + if err != nil { + t.Fatal(err) + } + + process2, err := os.FindProcess(pid2) + if err != nil { + t.Fatal(err) + } + + s, err := process2.Wait() + if err != nil { + t.Fatal(err) + } + if !s.Success() { + t.Fatal(s.String()) + } + + stdinW.Close() + s, err = process.Wait() + if err != nil { + t.Fatal(err) + } + if !s.Success() { + t.Fatal(s.String()) + } + + // Check that both processes live in the same pidns + pidns := string(stdout.Bytes()) + if err != nil { + t.Fatal(err) + } + + pidns2 := string(stdout2.Bytes()) + if err != nil { + t.Fatal(err) + } + + if pidns != pidns2 { + t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2) + } +} diff --git a/integration/init_test.go b/integration/init_test.go index 095263761..f9c1e3cf5 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -5,7 +5,8 @@ import ( "os" "runtime" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer" + _ "github.com/docker/libcontainer/namespaces/nsenter" ) // init runs the libcontainer initialization code because of the busybox style needs @@ -16,8 +17,12 @@ func init() { } runtime.LockOSThread() - if err := namespaces.Init(os.NewFile(3, "pipe")); err != nil { + factory, err := libcontainer.New("", nil) + if err != nil { log.Fatalf("unable to initialize for container: %s", err) } + + factory.StartInitialization(3) + os.Exit(1) } From 9c50d819aebcad25ac853362bd41f2f856f8aa19 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 19 Jan 2015 16:19:28 +0300 Subject: [PATCH 044/101] new-api: implement Pause() and Resume() Signed-off-by: Andrey Vagin --- linux_container.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/linux_container.go b/linux_container.go index a5eb9a988..971ebd3ef 100644 --- a/linux_container.go +++ b/linux_container.go @@ -48,6 +48,11 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { return 0, err } + if c.config.Cgroups != nil && + c.config.Cgroups.Freezer == cgroups.Frozen { + return configs.Paused, nil + } + //FIXME get a cgroup state to check other states return configs.Running, nil @@ -163,13 +168,11 @@ func (c *linuxContainer) Destroy() error { } func (c *linuxContainer) Pause() error { - glog.Info("pause container") - panic("not implemented") + return c.cgroupManager.Freeze(cgroups.Frozen) } func (c *linuxContainer) Resume() error { - glog.Info("resume container") - panic("not implemented") + return c.cgroupManager.Freeze(cgroups.Thawed) } func (c *linuxContainer) Signal(pid, signal int) error { From dcb3bca32ca62ed8ed5360f7d3cef5775910b5ae Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 19 Jan 2015 16:21:21 +0300 Subject: [PATCH 045/101] namespaces: destroy cgroups only on error paths Signed-off-by: Andrey Vagin --- namespaces/exec.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/namespaces/exec.go b/namespaces/exec.go index 0fd31be03..a4e1d383e 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -19,9 +19,7 @@ import ( // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. -func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) error { - var err error - +func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) (err error) { // create a pipe so that we can syncronize with the namespaced process and // pass the state and configuration to the child process parent, child, err := newInitPipe() @@ -73,7 +71,11 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai if err != nil { return terminate(err) } - defer cgroupManager.Destroy() + defer func() { + if err != nil { + cgroupManager.Destroy() + } + }() var networkState network.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { From 02c1de6f1126a75b04938376003a71b9ce1c5345 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 19 Jan 2015 17:04:14 +0300 Subject: [PATCH 046/101] cgroups: set a freezer state before calling FreezerGroup.Set() My previous patch moved the setting of the freezer state after the Set() command. It's wrong, because this command uses it, so we need to set the freezer state before the command and rollback it in an error case. Fixes: 13a5703 ("cgroups: don't change a freezer state if an operation failed") Signed-off-by: Andrey Vagin --- cgroups/fs/apply_raw.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 1fe5faf3b..930738bf3 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -143,14 +143,16 @@ func (m *Manager) Freeze(state cgroups.FreezerState) error { return err } + prevState := m.Cgroups.Freezer + m.Cgroups.Freezer = state + freezer := subsystems["freezer"] err = freezer.Set(d) if err != nil { + m.Cgroups.Freezer = prevState return err } - m.Cgroups.Freezer = state - return nil } From 5138417f80e1f302ddf58853526ca158a394b095 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 19 Jan 2015 17:12:00 +0300 Subject: [PATCH 047/101] integration: add test to check Pause and Resume operations Signed-off-by: Andrey Vagin --- integration/exec_test.go | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/integration/exec_test.go b/integration/exec_test.go index 36213bcaf..1ea7992b3 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -204,6 +204,9 @@ func newTestRoot() (string, error) { } func TestEnter(t *testing.T) { + if testing.Short() { + return + } root, err := newTestRoot() if err != nil { t.Fatal(err) @@ -300,3 +303,77 @@ func TestEnter(t *testing.T) { t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2) } } + +func TestFreeze(t *testing.T) { + if testing.Short() { + return + } + root, err := newTestRoot() + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(root) + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + factory, err := libcontainer.New(root, []string{os.Args[0], "init", "--"}) + if err != nil { + t.Fatal(err) + } + + container, err := factory.Create("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + + pconfig := libcontainer.ProcessConfig{ + Args: []string{"cat"}, + Stdin: stdinR, + } + pid, err := container.StartProcess(&pconfig) + stdinR.Close() + defer stdinW.Close() + if err != nil { + t.Fatal(err) + } + + process, err := os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + + if err := container.Pause(); err != nil { + t.Fatal(err) + } + state, err := container.RunState() + if err != nil { + t.Fatal(err) + } + if state != configs.Paused { + t.Fatal("Unexpected state: ", state) + } + if err := container.Resume(); err != nil { + t.Fatal(err) + } + + stdinW.Close() + s, err := process.Wait() + if err != nil { + t.Fatal(err) + } + if !s.Success() { + t.Fatal(s.String()) + } +} From f0c20b560122c63eb84f4b0dd87634f02034f378 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 26 Dec 2014 14:01:58 +0300 Subject: [PATCH 048/101] new-api: remove DefaultCreateCommand() Signed-off-by: Andrew Vagin --- namespaces/exec.go | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/namespaces/exec.go b/namespaces/exec.go index a4e1d383e..68c3a2be3 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -108,39 +108,6 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai return nil } -// DefaultCreateCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces -// defined on the container's configuration and use the current binary as the init with the -// args provided -// -// console: the /dev/console to setup inside the container -// init: the program executed inside the namespaces -// root: the path to the container json file and information -// pipe: sync pipe to synchronize the parent and child processes -// args: the arguments to pass to the container to run as the user's program -func DefaultCreateCommand(container *configs.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd { - // get our binary name from arg0 so we can always reexec ourself - env := []string{ - "console=" + console, - "pipe=3", - "data_path=" + dataPath, - } - - command := exec.Command(init, append([]string{"init", "--"}, args...)...) - // make sure the process is executed inside the context of the rootfs - command.Dir = container.RootFs - command.Env = append(os.Environ(), env...) - - if command.SysProcAttr == nil { - command.SysProcAttr = &syscall.SysProcAttr{} - } - command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) - - command.SysProcAttr.Pdeathsig = syscall.SIGKILL - command.ExtraFiles = []*os.File{pipe} - - return command -} - // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { From 10f0ac292127ba3cde23ef091b47152364b9268b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 15 Jan 2015 18:11:24 +0300 Subject: [PATCH 049/101] new-api: remove nsenter.c Signed-off-by: Andrey Vagin --- namespaces/nsenter/nsenter.c | 224 ---------------------------------- namespaces/nsenter/nsenter.go | 1 - 2 files changed, 225 deletions(-) delete mode 100644 namespaces/nsenter/nsenter.c diff --git a/namespaces/nsenter/nsenter.c b/namespaces/nsenter/nsenter.c deleted file mode 100644 index f060f63b1..000000000 --- a/namespaces/nsenter/nsenter.c +++ /dev/null @@ -1,224 +0,0 @@ -// +build cgo -// -// formated with indent -linux nsenter.c - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const kBufSize = 256; -static const char *kNsEnter = "nsenter"; - -void get_args(int *argc, char ***argv) -{ - // Read argv - int fd = open("/proc/self/cmdline", O_RDONLY); - - // Read the whole commandline. - ssize_t contents_size = 0; - ssize_t contents_offset = 0; - char *contents = NULL; - ssize_t bytes_read = 0; - do { - contents_size += kBufSize; - contents = (char *)realloc(contents, contents_size); - bytes_read = - read(fd, contents + contents_offset, - contents_size - contents_offset); - contents_offset += bytes_read; - } - while (bytes_read > 0); - close(fd); - - // Parse the commandline into an argv. /proc/self/cmdline has \0 delimited args. - ssize_t i; - *argc = 0; - for (i = 0; i < contents_offset; i++) { - if (contents[i] == '\0') { - (*argc)++; - } - } - *argv = (char **)malloc(sizeof(char *) * ((*argc) + 1)); - int idx; - for (idx = 0; idx < (*argc); idx++) { - (*argv)[idx] = contents; - contents += strlen(contents) + 1; - } - (*argv)[*argc] = NULL; -} - -// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) -#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 -#define _GNU_SOURCE -#include -#include "syscall.h" -#ifdef SYS_setns -int setns(int fd, int nstype) -{ - return syscall(SYS_setns, fd, nstype); -} -#endif -#endif - -void print_usage() -{ - fprintf(stderr, - "nsenter --nspid --console -- cmd1 arg1 arg2...\n"); -} - -void nsenter() -{ - int argc, c; - char **argv; - get_args(&argc, &argv); - - // check argv 0 to ensure that we are supposed to setns - // we use strncmp to test for a value of "nsenter" but also allows alternate implmentations - // after the setns code path to continue to use the argv 0 to determine actions to be run - // resulting in the ability to specify "nsenter-mknod", "nsenter-exec", etc... - if (strncmp(argv[0], kNsEnter, strlen(kNsEnter)) != 0) { - return; - } - - if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) { - fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno)); - exit(1); - } - - static const struct option longopts[] = { - {"nspid", required_argument, NULL, 'n'}, - {"console", required_argument, NULL, 't'}, - {NULL, 0, NULL, 0} - }; - - pid_t init_pid = -1; - char *init_pid_str = NULL; - char *console = NULL; - while ((c = getopt_long_only(argc, argv, "n:c:", longopts, NULL)) != -1) { - switch (c) { - case 'n': - init_pid_str = optarg; - break; - case 't': - console = optarg; - break; - } - } - - if (init_pid_str == NULL) { - print_usage(); - exit(1); - } - - init_pid = strtol(init_pid_str, NULL, 10); - if ((init_pid == 0 && errno == EINVAL) || errno == ERANGE) { - fprintf(stderr, - "nsenter: Failed to parse PID from \"%s\" with output \"%d\" and error: \"%s\"\n", - init_pid_str, init_pid, strerror(errno)); - print_usage(); - exit(1); - } - - argc -= 3; - argv += 3; - - if (setsid() == -1) { - fprintf(stderr, "setsid failed. Error: %s\n", strerror(errno)); - exit(1); - } - // before we setns we need to dup the console - int consolefd = -1; - if (console != NULL) { - consolefd = open(console, O_RDWR); - if (consolefd < 0) { - fprintf(stderr, - "nsenter: failed to open console %s %s\n", - console, strerror(errno)); - exit(1); - } - } - // Setns on all supported namespaces. - char ns_dir[PATH_MAX]; - memset(ns_dir, 0, PATH_MAX); - snprintf(ns_dir, PATH_MAX - 1, "/proc/%d/ns/", init_pid); - - char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; - const int num = sizeof(namespaces) / sizeof(char *); - int i; - for (i = 0; i < num; i++) { - char buf[PATH_MAX]; - memset(buf, 0, PATH_MAX); - snprintf(buf, PATH_MAX - 1, "%s%s", ns_dir, namespaces[i]); - int fd = open(buf, O_RDONLY); - if (fd == -1) { - // Ignore nonexistent namespaces. - if (errno == ENOENT) - continue; - - fprintf(stderr, - "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", - buf, namespaces[i], strerror(errno)); - exit(1); - } - // Set the namespace. - if (setns(fd, 0) == -1) { - fprintf(stderr, - "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", - namespaces[i], strerror(errno)); - exit(1); - } - close(fd); - } - - // We must fork to actually enter the PID namespace. - int child = fork(); - if (child == 0) { - if (consolefd != -1) { - if (dup2(consolefd, STDIN_FILENO) != 0) { - fprintf(stderr, "nsenter: failed to dup 0 %s\n", - strerror(errno)); - exit(1); - } - if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) { - fprintf(stderr, "nsenter: failed to dup 1 %s\n", - strerror(errno)); - exit(1); - } - if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) { - fprintf(stderr, "nsenter: failed to dup 2 %s\n", - strerror(errno)); - exit(1); - } - } - // Finish executing, let the Go runtime take over. - return; - } else { - // Parent, wait for the child. - int status = 0; - if (waitpid(child, &status, 0) == -1) { - fprintf(stderr, - "nsenter: Failed to waitpid with error: \"%s\"\n", - strerror(errno)); - exit(1); - } - // Forward the child's exit code or re-send its death signal. - if (WIFEXITED(status)) { - exit(WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { - kill(getpid(), WTERMSIG(status)); - } - - exit(1); - } - - return; -} diff --git a/namespaces/nsenter/nsenter.go b/namespaces/nsenter/nsenter.go index 394716145..e0ade6ebe 100644 --- a/namespaces/nsenter/nsenter.go +++ b/namespaces/nsenter/nsenter.go @@ -4,7 +4,6 @@ package nsenter /* __attribute__((constructor)) init() { - nsenter(); nsexec(); } */ From 5162e5a81ca8e4381c6773d74d23bc7f64591516 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 21 Jan 2015 12:29:53 +0300 Subject: [PATCH 050/101] integration: check container.Processes() Signed-off-by: Andrey Vagin --- integration/exec_test.go | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index 1ea7992b3..8c0c2b685 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -257,11 +257,18 @@ func TestEnter(t *testing.T) { t.Fatal(err) } - pconfig.Args = []string{"readlink", "/proc/self/ns/pid"} - pconfig.Stdin = nil + // Execute a first process in the container + stdinR2, stdinW2, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + pconfig.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"} + pconfig.Stdin = stdinR2 pconfig.Stdout = &stdout2 pid2, err := container.StartProcess(&pconfig) + stdinR2.Close() + defer stdinW2.Close() if err != nil { t.Fatal(err) } @@ -271,6 +278,22 @@ func TestEnter(t *testing.T) { t.Fatal(err) } + processes, err := container.Processes() + if err != nil { + t.Fatal(err) + } + + n := 0 + for i := range processes { + if processes[i] == pid || processes[i] == pid2 { + n++ + } + } + if n != 2 { + t.Fatal("unexpected number of processes", processes, pid, pid2) + } + + stdinW2.Close() s, err := process2.Wait() if err != nil { t.Fatal(err) From 6fc1dd5f253cba133d6b232cd3186816fc402c5b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 21 Jan 2015 15:24:18 +0300 Subject: [PATCH 051/101] integration: check a container state after resumning the CT Otherwise CT will be left in a frozen state in a fail case Signed-off-by: Andrey Vagin --- integration/exec_test.go | 6 +++--- integration/template_test.go | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index 8c0c2b685..faa3fc15a 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -384,12 +384,12 @@ func TestFreeze(t *testing.T) { if err != nil { t.Fatal(err) } - if state != configs.Paused { - t.Fatal("Unexpected state: ", state) - } if err := container.Resume(); err != nil { t.Fatal(err) } + if state != configs.Paused { + t.Fatal("Unexpected state: ", state) + } stdinW.Close() s, err := process.Wait() diff --git a/integration/template_test.go b/integration/template_test.go index d58bb6133..834099d27 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -40,6 +40,7 @@ func newTemplateConfig(rootfs string) *configs.Config { {Name: "NEWNET"}, }, Cgroups: &cgroups.Cgroup{ + Name: "test", Parent: "integration", AllowAllDevices: false, AllowedDevices: devices.DefaultAllowedDevices, From e79e87e426153992e9324d67f081b01f306d7a96 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 21 Jan 2015 15:36:26 +0300 Subject: [PATCH 052/101] cgroup/systemd: set config.Cgroups.Freezer Signed-off-by: Andrey Vagin --- cgroups/systemd/apply_systemd.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 854d7f3b7..05b97444e 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -236,6 +236,9 @@ func (m *Manager) Freeze(state cgroups.FreezerState) error { } time.Sleep(1 * time.Millisecond) } + + m.Cgroups.Freezer = state + return nil } From 61fef16f4aa331459f8e7e35b219a12e8f670080 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Wed, 21 Jan 2015 18:41:30 +0300 Subject: [PATCH 053/101] new-api: implement Wait, WaitProcess Signed-off-by: Andrew Vagin --- integration/exec_test.go | 28 ++++++++++++---------------- linux_container.go | 13 +++++++++---- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index faa3fc15a..8132e25b8 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "os" "strings" + "syscall" "testing" "github.com/docker/libcontainer" @@ -252,11 +253,6 @@ func TestEnter(t *testing.T) { t.Fatal(err) } - process, err := os.FindProcess(pid) - if err != nil { - t.Fatal(err) - } - // Execute a first process in the container stdinR2, stdinW2, err := os.Pipe() if err != nil { @@ -273,11 +269,6 @@ func TestEnter(t *testing.T) { t.Fatal(err) } - process2, err := os.FindProcess(pid2) - if err != nil { - t.Fatal(err) - } - processes, err := container.Processes() if err != nil { t.Fatal(err) @@ -293,22 +284,27 @@ func TestEnter(t *testing.T) { t.Fatal("unexpected number of processes", processes, pid, pid2) } + // Wait processes + var status syscall.WaitStatus + stdinW2.Close() - s, err := process2.Wait() + exitCode, err := container.WaitProcess(pid2) if err != nil { t.Fatal(err) } - if !s.Success() { - t.Fatal(s.String()) + status = syscall.WaitStatus(exitCode) + if status.ExitStatus() != 0 { + t.Fatal(exitCode) } stdinW.Close() - s, err = process.Wait() + exitCode, err = container.WaitProcess(pid) if err != nil { t.Fatal(err) } - if !s.Success() { - t.Fatal(s.String()) + status = syscall.WaitStatus(exitCode) + if status.ExitStatus() != 0 { + t.Fatal(exitCode) } // Check that both processes live in the same pidns diff --git a/linux_container.go b/linux_container.go index 971ebd3ef..e5c6826a2 100644 --- a/linux_container.go +++ b/linux_container.go @@ -181,11 +181,16 @@ func (c *linuxContainer) Signal(pid, signal int) error { } func (c *linuxContainer) Wait() (int, error) { - glog.Info("wait container") - panic("not implemented") + return c.WaitProcess(c.state.InitPid) } func (c *linuxContainer) WaitProcess(pid int) (int, error) { - glog.Infof("wait process %d", pid) - panic("not implemented") + var status syscall.WaitStatus + + _, err := syscall.Wait4(pid, &status, 0, nil) + if err != nil { + return -1, newGenericError(err, SystemError) + } + + return int(status), err } From e77b238a83350f45252a99741165b2db1d31eeda Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 26 Jan 2015 14:07:29 +0300 Subject: [PATCH 054/101] namespaces: don't send a container config twice Signed-off-by: Andrey Vagin --- namespaces/execin.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/namespaces/execin.go b/namespaces/execin.go index 9b5b42c0f..2424f2de0 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -63,10 +63,6 @@ func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, containe return terminate(err) } - if err := json.NewEncoder(parent).Encode(container); err != nil { - return terminate(err) - } - return cmd.Process.Pid, nil } From 11b2dab1c50201e745f667d94efc69682670070b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 26 Jan 2015 13:56:13 +0300 Subject: [PATCH 055/101] nsenter: add a macros to print errors Signed-off-by: Andrey Vagin --- namespaces/nsenter/nsexec.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/namespaces/nsenter/nsexec.c b/namespaces/nsenter/nsexec.c index 95498bf07..90c293d0d 100644 --- a/namespaces/nsenter/nsexec.c +++ b/namespaces/nsenter/nsexec.c @@ -12,6 +12,8 @@ #include #include +#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__) + // Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 #define _GNU_SOURCE @@ -40,7 +42,7 @@ void nsexec() pid = atoi(val); snprintf(buf, sizeof(buf), "%d", pid); if (strcmp(val, buf)) { - fprintf(stderr, "Unable to parse _LIBCONTAINER_INITPID"); + pr_perror("Unable to parse _LIBCONTAINER_INITPID"); exit(1); } @@ -48,9 +50,7 @@ void nsexec() snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid); tfd = open(buf, O_DIRECTORY | O_RDONLY); if (tfd == -1) { - fprintf(stderr, - "nsenter: Failed to open \"%s\" with error: \"%s\"\n", - buf, strerror(errno)); + pr_perror("Failed to open \"%s\"", buf); exit(1); } @@ -67,16 +67,12 @@ void nsexec() fd = openat(tfd, namespaces[i], O_RDONLY); if (fd == -1) { - fprintf(stderr, - "nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n", - buf, namespaces[i], strerror(errno)); + pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]); exit(1); } // Set the namespace. if (setns(fd, 0) == -1) { - fprintf(stderr, - "nsenter: Failed to setns for \"%s\" with error: \"%s\"\n", - namespaces[i], strerror(errno)); + pr_perror("Failed to setns for %s", namespaces[i]); exit(1); } close(fd); @@ -84,7 +80,7 @@ void nsexec() child = fork(); if (child < 0) { - fprintf(stderr, "Unable to fork: %s", strerror(errno)); + pr_perror("Unable to fork"); exit(1); } // We must fork to actually enter the PID namespace. @@ -95,9 +91,7 @@ void nsexec() // Parent, wait for the child. int status = 0; if (waitpid(child, &status, 0) == -1) { - fprintf(stderr, - "nsenter: Failed to waitpid with error: \"%s\"\n", - strerror(errno)); + pr_perror("Failed to waitpid"); exit(1); } // Forward the child's exit code or re-send its death signal. From 82367938b778abbcb739c261b8381dc99e28ab80 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 26 Jan 2015 11:33:56 +0300 Subject: [PATCH 056/101] nsenter: remove a proxy process Currently nsexec() creates a proxy process to enter into a pid namespace. It isn't good, because we need to proxy an exit code and signals. We can use CLONE_PARENT to fork a process with the right parent. Signed-off-by: Andrey Vagin --- namespaces/execin.go | 40 ++++++++++++++++----- namespaces/nsenter/nsenter.go | 4 ++- namespaces/nsenter/nsexec.c | 68 ++++++++++++++++++++++++----------- 3 files changed, 82 insertions(+), 30 deletions(-) diff --git a/namespaces/execin.go b/namespaces/execin.go index 2424f2de0..cff1cca7d 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -16,6 +16,10 @@ import ( "github.com/docker/libcontainer/system" ) +type pid struct { + Pid int `json:"Pid"` +} + // ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the // setns code in a single threaded environment joining the existing containers' namespaces. func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { @@ -36,13 +40,38 @@ func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, containe } child.Close() + s, err := cmd.Process.Wait() + if err != nil { + return -1, err + } + if !s.Success() { + return -1, &exec.ExitError{s} + } + + decoder := json.NewDecoder(parent) + var pid *pid + + if err := decoder.Decode(&pid); err != nil { + return -1, err + } + + p, err := os.FindProcess(pid.Pid) + if err != nil { + return -1, err + } + terminate := func(terr error) (int, error) { // TODO: log the errors for kill and wait - cmd.Process.Kill() - cmd.Wait() + p.Kill() + p.Wait() return -1, terr } + // Enter cgroups. + if err := EnterCgroups(state, pid.Pid); err != nil { + return terminate(err) + } + encoder := json.NewEncoder(parent) if err := encoder.Encode(container); err != nil { @@ -58,12 +87,7 @@ func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, containe return terminate(err) } - // Enter cgroups. - if err := EnterCgroups(state, cmd.Process.Pid); err != nil { - return terminate(err) - } - - return cmd.Process.Pid, nil + return pid.Pid, nil } // Finalize entering into a container and execute a specified command diff --git a/namespaces/nsenter/nsenter.go b/namespaces/nsenter/nsenter.go index e0ade6ebe..880dca835 100644 --- a/namespaces/nsenter/nsenter.go +++ b/namespaces/nsenter/nsenter.go @@ -3,7 +3,9 @@ package nsenter /* -__attribute__((constructor)) init() { +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init() { nsexec(); } */ diff --git a/namespaces/nsenter/nsexec.c b/namespaces/nsenter/nsexec.c index 90c293d0d..426dfc575 100644 --- a/namespaces/nsenter/nsexec.c +++ b/namespaces/nsenter/nsexec.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include #include #include @@ -11,13 +12,32 @@ #include #include #include +#include +#include +#include + +/* All arguments should be above stack, because it grows down */ +struct clone_arg { + /* + * Reserve some space for clone() to locate arguments + * and retcode in this place + */ + char stack[4096] __attribute__((aligned (8))); + char stack_ptr[0]; + jmp_buf *env; +}; + +static int child_func(void *_arg) +{ + struct clone_arg *arg = (struct clone_arg *) _arg; + longjmp(*arg->env, 1); +} #define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__) // Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 #define _GNU_SOURCE -#include #include "syscall.h" #ifdef SYS_setns int setns(int fd, int nstype) @@ -27,12 +47,25 @@ int setns(int fd, int nstype) #endif #endif +static int clone_parent(jmp_buf *env) __attribute__ ((noinline)); +static int clone_parent(jmp_buf *env) +{ + struct clone_arg ca; + int child; + + ca.env = env; + child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); + + return child; +} + void nsexec() { char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; const int num = sizeof(namespaces) / sizeof(char *); + jmp_buf env; char buf[PATH_MAX], *val; - int child, i, tfd; + int i, tfd, child, len; pid_t pid; val = getenv("_LIBCONTAINER_INITPID"); @@ -78,31 +111,24 @@ void nsexec() close(fd); } - child = fork(); + if (setjmp(env) == 1) { + // Finish executing, let the Go runtime take over. + return; + } + + child = clone_parent(&env); if (child < 0) { pr_perror("Unable to fork"); exit(1); } - // We must fork to actually enter the PID namespace. - if (child == 0) { - // Finish executing, let the Go runtime take over. - return; - } else { - // Parent, wait for the child. - int status = 0; - if (waitpid(child, &status, 0) == -1) { - pr_perror("Failed to waitpid"); - exit(1); - } - // Forward the child's exit code or re-send its death signal. - if (WIFEXITED(status)) { - exit(WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { - kill(getpid(), WTERMSIG(status)); - } + len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child); + + if (write(3, buf, len) != len) { + pr_perror("Unable to send a child pid"); + kill(child, SIGKILL); exit(1); } - return; + exit(0); } From 8d8242aa8a3e5869ceab2b2240e4d7001ca8d33e Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 26 Jan 2015 14:05:29 +0300 Subject: [PATCH 057/101] nsenter: add tests Signed-off-by: Andrey Vagin --- namespaces/nsenter/nsenter_test.go | 99 ++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 namespaces/nsenter/nsenter_test.go diff --git a/namespaces/nsenter/nsenter_test.go b/namespaces/nsenter/nsenter_test.go new file mode 100644 index 000000000..274d67d04 --- /dev/null +++ b/namespaces/nsenter/nsenter_test.go @@ -0,0 +1,99 @@ +package nsenter + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + "testing" +) + +type pid struct { + Pid int `json:"Pid"` +} + +func TestNsenterAlivePid(t *testing.T) { + args := []string{"nsenter-exec"} + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) + } + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{w}, + Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid())}, + } + + if err := cmd.Start(); err != nil { + t.Fatalf("nsenter failed to start %v", err) + } + w.Close() + + decoder := json.NewDecoder(r) + var pid *pid + + if err := decoder.Decode(&pid); err != nil { + t.Fatalf("%v", err) + } + + if err := cmd.Wait(); err != nil { + t.Fatalf("nsenter exits with a non-zero exit status") + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + t.Fatalf("%v", err) + } + p.Wait() +} + +func TestNsenterInvalidPid(t *testing.T) { + args := []string{"nsenter-exec"} + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + Env: []string{"_LIBCONTAINER_INITPID=-1"}, + } + + err := cmd.Run() + if err == nil { + t.Fatal("nsenter exits with a zero exit status") + } +} + +func TestNsenterDeadPid(t *testing.T) { + + c := make(chan os.Signal) + signal.Notify(c, syscall.SIGCHLD) + dead_cmd := exec.Command("true") + if err := dead_cmd.Start(); err != nil { + t.Fatal(err) + } + defer dead_cmd.Wait() + <-c // dead_cmd is zombie + + args := []string{"nsenter-exec"} + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)}, + } + + err := cmd.Run() + if err == nil { + t.Fatal("nsenter exits with a zero exit status") + } +} + +func init() { + if strings.HasPrefix(os.Args[0], "nsenter-") { + os.Exit(0) + } + return +} From ca633b2f29bc948237b93738e2d602ad5bf0b64f Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 27 Jan 2015 15:54:19 +0300 Subject: [PATCH 058/101] Merge remote-tracking branch 'origin/master' into api Signed-off-by: Andrey Vagin --- Dockerfile | 3 +- MAINTAINERS | 1 + Makefile | 8 +- ROADMAP.md | 6 +- SPEC.md | 25 ++ cgroups/cgroups.go | 2 + cgroups/fs/apply_raw.go | 2 +- cgroups/fs/blkio.go | 10 +- cgroups/fs/cpuset.go | 17 +- cgroups/fs/memory.go | 9 +- cgroups/fs/stats_util_test.go | 6 +- cgroups/systemd/apply_systemd.go | 7 +- cgroups/utils.go | 34 ++- configs/config.go | 66 ++++- configs/config_test.go | 19 +- console/console.go | 4 +- integration/exec_test.go | 22 +- integration/template_test.go | 14 +- linux_factory.go | 5 +- mount/init.go | 13 +- mount/mount_config.go | 5 + mount/pivotroot.go | 13 +- mount/ptmx.go | 4 +- namespaces/create.go | 10 - namespaces/exec.go | 218 ++++++++++++++- namespaces/execin.go | 70 +++++ namespaces/init.go | 158 +++++++++-- namespaces/utils.go | 23 +- netlink/netlink_linux.go | 5 +- network/network.go | 12 + cgroups/fs/notify_linux.go => notify_linux.go | 51 ++-- ...tify_linux_test.go => notify_linux_test.go | 44 +-- nsinit/main.go | 5 +- nsinit/oom.go | 29 ++ sample_configs/apparmor.json | 10 +- sample_configs/attach_to_bridge.json | 10 +- sample_configs/host-pid.json | 200 ++++++++++++++ sample_configs/minimal.json | 10 +- .../route_source_address_selection.json | 10 +- sample_configs/selinux.json | 10 +- sample_configs/userns.json | 251 ++++++++++++++++++ user/MAINTAINERS | 1 + user/lookup_unix.go | 16 +- user/lookup_unsupported.go | 4 +- user/user.go | 4 +- 45 files changed, 1238 insertions(+), 208 deletions(-) delete mode 100644 namespaces/create.go rename cgroups/fs/notify_linux.go => notify_linux.go (54%) rename cgroups/fs/notify_linux_test.go => notify_linux_test.go (66%) create mode 100644 nsinit/oom.go create mode 100644 sample_configs/host-pid.json create mode 100644 sample_configs/userns.json diff --git a/Dockerfile b/Dockerfile index 614e5979b..0771c808e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,5 @@ -FROM crosbymichael/golang +FROM golang:1.4 -RUN apt-get update && apt-get install -y gcc make RUN go get golang.org/x/tools/cmd/cover ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor diff --git a/MAINTAINERS b/MAINTAINERS index 7295c6038..523513172 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2,4 +2,5 @@ Michael Crosby (@crosbymichael) Rohit Jnagal (@rjnagal) Victor Marmol (@vmarmol) Mrunal Patel (@mrunalp) +Alexandr Morozov (@LK4D4) update-vendor.sh: Tianon Gravi (@tianon) diff --git a/Makefile b/Makefile index bc7f17851..c7cec0b9b 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,13 @@ all: - docker build -t docker/libcontainer . + docker build -t dockercore/libcontainer . test: # we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting - docker run --rm -it --privileged docker/libcontainer + docker run --rm -it --privileged dockercore/libcontainer sh: - docker run --rm -it --privileged -w /busybox docker/libcontainer nsinit exec sh + docker run --rm -it --privileged -w /busybox dockercore/libcontainer nsinit exec sh GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u) @@ -23,3 +23,5 @@ direct-build: direct-install: go install -v $(GO_PACKAGES) +local: + go test -v diff --git a/ROADMAP.md b/ROADMAP.md index 08deb9ada..f59035351 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -13,4 +13,8 @@ Our goal is to make libcontainer run everywhere, but currently libcontainer requ ## Cross-architecture support -Our goal is to make libcontainer run everywhere. However currently libcontainer only runs on x86_64 systems. We plan on expanding architecture support, so that libcontainer containers can be created and used on more architectures. +Our goal is to make libcontainer run everywhere. Recently libcontainer has +expanded from its initial support for x86_64 systems to include POWER (ppc64 +little and big endian variants), IBM System z (s390x 64-bit), and ARM. We plan +to continue expanding architecture support such that libcontainer containers +can be created and used on more architectures. diff --git a/SPEC.md b/SPEC.md index f5afaadc5..d83d758dd 100644 --- a/SPEC.md +++ b/SPEC.md @@ -318,4 +318,29 @@ a container. | Resume | Resume all processes inside the container if paused | | Exec | Execute a new process inside of the container ( requires setns ) | +### Execute a new process inside of a running container. +User can execute a new process inside of a running container. Any binaries to be +executed must be accessible within the container's rootfs. + +The started process will run inside the container's rootfs. Any changes +made by the process to the container's filesystem will persist after the +process finished executing. + +The started process will join all the container's existing namespaces. When the +container is paused, the process will also be paused and will resume when +the container is unpaused. The started process will only run when the container's +primary process (PID 1) is running, and will not be restarted when the container +is restarted. + +#### Planned additions + +The started process will have its own cgroups nested inside the container's +cgroups. This is used for process tracking and optionally resource allocation +handling for the new process. Freezer cgroup is required, the rest of the cgroups +are optional. The process executor must place its pid inside the correct +cgroups before starting the process. This is done so that no child processes or +threads can escape the cgroups. + +When the process is stopped, the process executor will try (in a best-effort way) +to stop all its children and remove the sub-cgroups. diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 894c8125f..60b1135a3 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -77,6 +77,8 @@ type Cgroup struct { CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use + CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use + BlkioWeight int64 `json:"blkio_weight,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process Slice string `json:"slice,omitempty"` // Parent slice to use for systemd } diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 930738bf3..11d35d7a4 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -124,7 +124,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] - if !ok { + if !ok || !cgroups.PathExists(path) { continue } if err := sys.GetStats(path, stats); err != nil { diff --git a/cgroups/fs/blkio.go b/cgroups/fs/blkio.go index ce824d56c..b64e46843 100644 --- a/cgroups/fs/blkio.go +++ b/cgroups/fs/blkio.go @@ -15,11 +15,17 @@ type BlkioGroup struct { } func (s *BlkioGroup) Set(d *data) error { - // we just want to join this group even though we don't set anything - if _, err := d.join("blkio"); err != nil && !cgroups.IsNotFound(err) { + dir, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { return err } + if d.c.BlkioWeight != 0 { + if err := writeFile(dir, "blkio.weight", strconv.FormatInt(d.c.BlkioWeight, 10)); err != nil { + return err + } + } + return nil } diff --git a/cgroups/fs/cpuset.go b/cgroups/fs/cpuset.go index 54d2ed572..ff67a53e8 100644 --- a/cgroups/fs/cpuset.go +++ b/cgroups/fs/cpuset.go @@ -18,7 +18,7 @@ func (s *CpusetGroup) Set(d *data) error { if err != nil { return err } - return s.SetDir(dir, d.c.CpusetCpus, d.pid) + return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid) } func (s *CpusetGroup) Remove(d *data) error { @@ -29,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } -func (s *CpusetGroup) SetDir(dir, value string, pid int) error { +func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error { if err := s.ensureParent(dir); err != nil { return err } @@ -40,10 +40,15 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error { return err } - // If we don't use --cpuset, the default cpuset.cpus is set in - // s.ensureParent, otherwise, use the value we set - if value != "" { - if err := writeFile(dir, "cpuset.cpus", value); err != nil { + // If we don't use --cpuset-xxx, the default value inherit from parent cgroup + // is set in s.ensureParent, otherwise, use the value we set + if cpus != "" { + if err := writeFile(dir, "cpuset.cpus", cpus); err != nil { + return err + } + } + if mems != "" { + if err := writeFile(dir, "cpuset.mems", mems); err != nil { return err } } diff --git a/cgroups/fs/memory.go b/cgroups/fs/memory.go index 3f9647c2f..01713fd79 100644 --- a/cgroups/fs/memory.go +++ b/cgroups/fs/memory.go @@ -38,12 +38,17 @@ func (s *MemoryGroup) Set(d *data) error { } } // By default, MemorySwap is set to twice the size of RAM. - // If you want to omit MemorySwap, set it to `-1'. - if d.c.MemorySwap != -1 { + // If you want to omit MemorySwap, set it to '-1'. + if d.c.MemorySwap == 0 { if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil { return err } } + if d.c.MemorySwap > 0 { + if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.MemorySwap, 10)); err != nil { + return err + } + } } return nil } diff --git a/cgroups/fs/stats_util_test.go b/cgroups/fs/stats_util_test.go index 1a9e590f5..c55ba938c 100644 --- a/cgroups/fs/stats_util_test.go +++ b/cgroups/fs/stats_util_test.go @@ -53,7 +53,7 @@ func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { } if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil { - log.Printf("blkio IoMergedRecursive do not match - %s vs %s\n", expected.IoMergedRecursive, actual.IoMergedRecursive) + log.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive) t.Fail() } @@ -90,4 +90,8 @@ func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) t.Fail() } } + if expected.Failcnt != actual.Failcnt { + log.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt) + t.Fail() + } } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 05b97444e..7143a5951 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -118,6 +118,11 @@ func (m *Manager) Apply(pid int) error { newProp("CPUShares", uint64(c.CpuShares))) } + if c.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.BlkioWeight))) + } + if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { return err } @@ -330,5 +335,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error { s := &fs.CpusetGroup{} - return s.SetDir(path, c.CpusetCpus, pid) + return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid) } diff --git a/cgroups/utils.go b/cgroups/utils.go index 224a20b9b..a360904cc 100644 --- a/cgroups/utils.go +++ b/cgroups/utils.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strconv" "strings" + "time" "github.com/docker/docker/pkg/mount" ) @@ -173,7 +174,7 @@ func ParseCgroupFile(subsystem string, r io.Reader) (string, error) { return "", NewNotFoundError(subsystem) } -func pathExists(path string) bool { +func PathExists(path string) bool { if _, err := os.Stat(path); err != nil { return false } @@ -182,7 +183,7 @@ func pathExists(path string) bool { func EnterPid(cgroupPaths map[string]string, pid int) error { for _, path := range cgroupPaths { - if pathExists(path) { + if PathExists(path) { if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700); err != nil { return err @@ -193,13 +194,30 @@ func EnterPid(cgroupPaths map[string]string, pid int) error { } // RemovePaths iterates over the provided paths removing them. -// If an error is encountered the removal proceeds and the first error is -// returned to ensure a partial removal is not possible. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. func RemovePaths(paths map[string]string) (err error) { - for _, path := range paths { - if rerr := os.RemoveAll(path); err == nil { - err = rerr + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil } } - return err + return fmt.Errorf("Failed to remove paths: %s", paths) } diff --git a/configs/config.go b/configs/config.go index ab40b2b4b..d1e03f61c 100644 --- a/configs/config.go +++ b/configs/config.go @@ -10,11 +10,55 @@ type MountConfig mount.MountConfig type Network network.Network +type NamespaceType string + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + // Namespace defines configuration for each namespace. It specifies an // alternate path that is able to be joined via setns. type Namespace struct { - Name string `json:"name"` - Path string `json:"path,omitempty"` + Type NamespaceType `json:"type"` + Path string `json:"path,omitempty"` +} + +type Namespaces []Namespace + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 } // Config defines configuration options for executing a process inside a contained environment. @@ -45,7 +89,7 @@ type Config struct { // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process - Namespaces []Namespace `json:"namespaces,omitempty"` + Namespaces Namespaces `json:"namespaces,omitempty"` // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask @@ -76,6 +120,15 @@ type Config struct { // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []Rlimit `json:"rlimits,omitempty"` + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []int `json:"additional_groups,omitempty"` + // UidMappings is an array of User ID mappings for User Namespaces + UidMappings []IDMap `json:"uid_mappings,omitempty"` + + // GidMappings is an array of Group ID mappings for User Namespaces + GidMappings []IDMap `json:"gid_mappings,omitempty"` } // Routes can be specified to create entries in the route table as the container is started @@ -104,3 +157,10 @@ type Rlimit struct { Hard uint64 `json:"hard,omitempty"` Soft uint64 `json:"soft,omitempty"` } + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id,omitempty"` + HostID int `json:"host_id,omitempty"` + Size int `json:"size,omitempty"` +} diff --git a/configs/config_test.go b/configs/config_test.go index f698e3d9a..d64066c42 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) { t.Fail() } - if getNamespaceIndex(container, "NEWNET") == -1 { + if !container.Namespaces.Contains(NEWNET) { t.Log("namespaces should contain NEWNET") t.Fail() } - if getNamespaceIndex(container, "NEWUSER") != -1 { + if container.Namespaces.Contains(NEWUSER) { t.Log("namespaces should not contain NEWUSER") t.Fail() } @@ -159,11 +159,14 @@ func TestSelinuxLabels(t *testing.T) { } } -func getNamespaceIndex(config *Config, name string) int { - for i, v := range config.Namespaces { - if v.Name == name { - return i - } +func TestRemoveNamespace(t *testing.T) { + ns := Namespaces{ + {Type: NEWNET}, + } + if !ns.Remove(NEWNET) { + t.Fatal("NEWNET was not removed") + } + if len(ns) != 0 { + t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) } - return -1 } diff --git a/console/console.go b/console/console.go index 438e67042..69af70c19 100644 --- a/console/console.go +++ b/console/console.go @@ -13,7 +13,7 @@ import ( ) // Setup initializes the proper /dev/console inside the rootfs path -func Setup(rootfs, consolePath, mountLabel string) error { +func Setup(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) @@ -21,7 +21,7 @@ func Setup(rootfs, consolePath, mountLabel string) error { return err } - if err := os.Chown(consolePath, 0, 0); err != nil { + if err := os.Chown(consolePath, hostRootUid, hostRootGid); err != nil { return err } diff --git a/integration/exec_test.go b/integration/exec_test.go index 8132e25b8..9ec617d13 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -71,7 +71,7 @@ func TestIPCPrivate(t *testing.T) { } if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { - t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l) + t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l) } } @@ -92,8 +92,7 @@ func TestIPCHost(t *testing.T) { } config := newTemplateConfig(rootfs) - i := getNamespaceIndex(config, "NEWIPC") - config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...) + config.Namespaces.Remove(configs.NEWIPC) buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") if err != nil { t.Fatal(err) @@ -125,8 +124,7 @@ func TestIPCJoinPath(t *testing.T) { } config := newTemplateConfig(rootfs) - i := getNamespaceIndex(config, "NEWIPC") - config.Namespaces[i].Path = "/proc/1/ns/ipc" + config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc") buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") if err != nil { @@ -154,12 +152,11 @@ func TestIPCBadPath(t *testing.T) { defer remove(rootfs) config := newTemplateConfig(rootfs) - i := getNamespaceIndex(config, "NEWIPC") - config.Namespaces[i].Path = "/proc/1/ns/ipcc" + config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc") _, _, err = runContainer(config, "", "true") if err == nil { - t.Fatal("container succeded with bad ipc path") + t.Fatal("container succeeded with bad ipc path") } } @@ -184,15 +181,6 @@ func TestRlimit(t *testing.T) { } } -func getNamespaceIndex(config *configs.Config, name string) int { - for i, v := range config.Namespaces { - if v.Name == name { - return i - } - } - return -1 -} - func newTestRoot() (string, error) { dir, err := ioutil.TempDir("", "libcontainer") if err != nil { diff --git a/integration/template_test.go b/integration/template_test.go index 834099d27..372cc6953 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -32,13 +32,13 @@ func newTemplateConfig(rootfs string) *configs.Config { "KILL", "AUDIT_WRITE", }, - Namespaces: []configs.Namespace{ - {Name: "NEWNS"}, - {Name: "NEWUTS"}, - {Name: "NEWIPC"}, - {Name: "NEWPID"}, - {Name: "NEWNET"}, - }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWNET}, + }), Cgroups: &cgroups.Cgroup{ Name: "test", Parent: "integration", diff --git a/linux_factory.go b/linux_factory.go index 542331d44..b88a66def 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -168,10 +168,11 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { pipe := os.NewFile(uintptr(pipefd), "pipe") + setupUserns := os.Getenv("_LIBCONTAINER_USERNS") pid := os.Getenv("_LIBCONTAINER_INITPID") - if pid != "" { + if pid != "" && setupUserns == "" { return namespaces.InitIn(pipe) } - return namespaces.Init(pipe) + return namespaces.Init(pipe, setupUserns != "") } diff --git a/mount/init.go b/mount/init.go index a2c3d5202..91a27294a 100644 --- a/mount/init.go +++ b/mount/init.go @@ -25,7 +25,7 @@ type mount struct { // InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a // new mount namespace. -func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountConfig *MountConfig) error { +func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error { var ( err error flag = syscall.MS_PRIVATE @@ -58,14 +58,17 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon return fmt.Errorf("create device nodes %s", err) } - if err := SetupPtmx(rootfs, console, mountConfig.MountLabel); err != nil { + if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil { return err } // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. - if err := reOpenDevNull(rootfs); err != nil { - return fmt.Errorf("Failed to reopen /dev/null %s", err) + // FIXME: Need to fix this for user namespaces. + if hostRootUid == 0 { + if err := reOpenDevNull(rootfs); err != nil { + return fmt.Errorf("Failed to reopen /dev/null %s", err) + } } if err := setupDevSymlinks(rootfs); err != nil { @@ -79,7 +82,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon if mountConfig.NoPivotRoot { err = MsMoveRoot(rootfs) } else { - err = PivotRoot(rootfs) + err = PivotRoot(rootfs, mountConfig.PivotDir) } if err != nil { diff --git a/mount/mount_config.go b/mount/mount_config.go index eef9b8ce4..f19465e60 100644 --- a/mount/mount_config.go +++ b/mount/mount_config.go @@ -13,6 +13,11 @@ type MountConfig struct { // This is a common option when the container is running in ramdisk NoPivotRoot bool `json:"no_pivot_root,omitempty"` + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir,omitempty"` + // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted // bind mounts are writtable ReadonlyFs bool `json:"readonly_fs,omitempty"` diff --git a/mount/pivotroot.go b/mount/pivotroot.go index a88ed4a84..acc3be24c 100644 --- a/mount/pivotroot.go +++ b/mount/pivotroot.go @@ -10,8 +10,15 @@ import ( "syscall" ) -func PivotRoot(rootfs string) error { - pivotDir, err := ioutil.TempDir(rootfs, ".pivot_root") +func PivotRoot(rootfs, pivotBaseDir string) error { + if pivotBaseDir == "" { + pivotBaseDir = "/" + } + tmpDir := filepath.Join(rootfs, pivotBaseDir) + if err := os.MkdirAll(tmpDir, 0755); err != nil { + return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err) + } + pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root") if err != nil { return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) } @@ -25,7 +32,7 @@ func PivotRoot(rootfs string) error { } // path to pivot dir now changed, update - pivotDir = filepath.Join("/", filepath.Base(pivotDir)) + pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } diff --git a/mount/ptmx.go b/mount/ptmx.go index c316481ad..5b558775b 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -10,7 +10,7 @@ import ( "github.com/docker/libcontainer/console" ) -func SetupPtmx(rootfs, consolePath, mountLabel string) error { +func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { ptmx := filepath.Join(rootfs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err @@ -21,7 +21,7 @@ func SetupPtmx(rootfs, consolePath, mountLabel string) error { } if consolePath != "" { - if err := console.Setup(rootfs, consolePath, mountLabel); err != nil { + if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil { return err } } diff --git a/namespaces/create.go b/namespaces/create.go deleted file mode 100644 index 30de84cee..000000000 --- a/namespaces/create.go +++ /dev/null @@ -1,10 +0,0 @@ -package namespaces - -import ( - "os" - "os/exec" - - "github.com/docker/libcontainer/configs" -) - -type CreateCommand func(container *configs.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd diff --git a/namespaces/exec.go b/namespaces/exec.go index 68c3a2be3..0c0f6cf6c 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -4,6 +4,7 @@ package namespaces import ( "encoding/json" + "fmt" "io" "os" "os/exec" @@ -15,6 +16,99 @@ import ( "github.com/docker/libcontainer/system" ) +const ( + EXIT_SIGNAL_OFFSET = 128 +) + +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *network.NetworkState) error { + command := exec.Command(args[0], args[1:]...) + + parent, child, err := newInitPipe() + if err != nil { + return err + } + defer parent.Close() + command.ExtraFiles = []*os.File{child} + + command.Dir = container.RootFs + command.Env = append(command.Env, + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), + fmt.Sprintf("_LIBCONTAINER_USERNS=1")) + + err = command.Start() + child.Close() + if err != nil { + return err + } + + s, err := command.Process.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + + decoder := json.NewDecoder(parent) + var pid *pid + + if err := decoder.Decode(&pid); err != nil { + return err + } + + p, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + + terminate := func(terr error) error { + // TODO: log the errors for kill and wait + p.Kill() + p.Wait() + return terr + } + + encoder := json.NewEncoder(parent) + + if err := encoder.Encode(container); err != nil { + return terminate(err) + } + + if err := encoder.Encode(process); err != nil { + return terminate(err) + } + + // send the state to the container's init process then shutdown writes for the parent + if err := encoder.Encode(networkState); err != nil { + return terminate(err) + } + + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) + } + + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := decoder.Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return ierr + } + + s, err = p.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + + return nil +} + // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. // Move this to libcontainer package. // Exec performs setup outside of a namespace so that a container can be @@ -32,16 +126,35 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai command.Dir = container.RootFs command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) + if container.Namespaces.Contains(configs.NEWUSER) { + AddUidGidMappings(command.SysProcAttr, container) + + // Default to root user when user namespaces are enabled. + if command.SysProcAttr.Credential == nil { + command.SysProcAttr.Credential = &syscall.Credential{} + } + } + if err := command.Start(); err != nil { child.Close() return err } child.Close() + wait := func() (*os.ProcessState, error) { + ps, err := command.Process.Wait() + // we should kill all processes in cgroup when init is died if we use + // host PID namespace + if !container.Namespaces.Contains(configs.NEWPID) { + killAllPids(cgroupManager) + } + return ps, err + } + terminate := func(terr error) error { // TODO: log the errors for kill and wait command.Process.Kill() - command.Wait() + wait() return terr } @@ -81,6 +194,14 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { return terminate(err) } + + // Start the setup process to setup the init process + if container.Namespaces.Contains(configs.NEWUSER) { + if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil { + return terminate(err) + } + } + // send the state to the container's init process then shutdown writes for the parent if err := encoder.Encode(networkState); err != nil { return terminate(err) @@ -108,6 +229,101 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai return nil } +// killAllPids iterates over all of the container's processes +// sending a SIGKILL to each process. +func killAllPids(m cgroups.Manager) error { + var ( + procs []*os.Process + ) + m.Freeze(cgroups.Frozen) + pids, err := m.GetPids() + if err != nil { + return err + } + for _, pid := range pids { + // TODO: log err without aborting if we are unable to find + // a single PID + if p, err := os.FindProcess(pid); err == nil { + procs = append(procs, p) + p.Kill() + } + } + m.Freeze(cgroups.Thawed) + for _, p := range procs { + p.Wait() + } + return err +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func hostIDFromMapping(containerID int, uMap []configs.IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} + +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func GetHostRootGid(container *configs.Config) (int, error) { + if container.Namespaces.Contains(configs.NEWUSER) { + if container.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + hostRootGid, found := hostIDFromMapping(0, container.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return hostRootGid, nil + } + + // Return default root uid 0 + return 0, nil +} + +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func GetHostRootUid(container *configs.Config) (int, error) { + if container.Namespaces.Contains(configs.NEWUSER) { + if container.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + hostRootUid, found := hostIDFromMapping(0, container.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return hostRootUid, nil + } + + // Return default root uid 0 + return 0, nil +} + +// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. +func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { + if container.UidMappings != nil { + sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) + for i, um := range container.UidMappings { + sys.UidMappings[i].ContainerID = um.ContainerID + sys.UidMappings[i].HostID = um.HostID + sys.UidMappings[i].Size = um.Size + } + } + + if container.GidMappings != nil { + sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) + for i, gm := range container.GidMappings { + sys.GidMappings[i].ContainerID = gm.ContainerID + sys.GidMappings[i].HostID = gm.HostID + sys.GidMappings[i].Size = gm.Size + } + } +} + // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { diff --git a/namespaces/execin.go b/namespaces/execin.go index cff1cca7d..514ba9992 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -8,12 +8,16 @@ import ( "io/ioutil" "os" "os/exec" + "syscall" "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" + "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/utils" ) type pid struct { @@ -140,6 +144,10 @@ func FinalizeSetns(container *configs.Config) error { return err } + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + if err := FinalizeNamespace(container); err != nil { return err } @@ -157,6 +165,68 @@ func FinalizeSetns(container *configs.Config) error { return nil } +// SetupContainer is run to setup mounts and networking related operations +// for a user namespace enabled process as a user namespace root doesn't +// have permissions to perform these operations. +// The setup process joins all the namespaces of user namespace enabled init +// except the user namespace, so it run as root in the root user namespace +// to perform these operations. +func SetupContainer(container *configs.Config, networkState *network.NetworkState, consolePath string) error { + rootfs, err := utils.ResolveRootfs(container.RootFs) + if err != nil { + return err + } + + // clear the current processes env and replace it with the environment + // defined on the container + if err := LoadContainerEnvironment(container); err != nil { + return err + } + + cloneFlags := GetNamespaceFlags(container.Namespaces) + + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(container.Networks) != 0 || len(container.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(container, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(container); err != nil { + return fmt.Errorf("setup route %s", err) + } + } + + label.Init() + + hostRootUid, err := GetHostRootUid(container) + if err != nil { + return fmt.Errorf("failed to get hostRootUid %s", err) + } + + hostRootGid, err := GetHostRootGid(container) + if err != nil { + return fmt.Errorf("failed to get hostRootGid %s", err) + } + + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + if container.MountConfig != nil { + return fmt.Errorf("mount config is set without mount namespace") + } + } else if err := mount.InitializeMountNamespace(rootfs, + consolePath, + container.RestrictSys, + hostRootUid, + hostRootGid, + (*mount.MountConfig)(container.MountConfig)); err != nil { + return fmt.Errorf("setup mount namespace %s", err) + } + + return nil +} + func EnterCgroups(state *configs.State, pid int) error { return cgroups.EnterPid(state.CgroupPaths, pid) } diff --git a/namespaces/init.go b/namespaces/init.go index 441b3c340..01f721141 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -37,7 +37,7 @@ type processArgs struct { // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(pipe *os.File) (err error) { +func Init(pipe *os.File, setupUserns bool) (err error) { defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. @@ -72,6 +72,29 @@ func Init(pipe *os.File) (err error) { return err } + // We always read this as it is a way to sync with the parent as well + var networkState *network.NetworkState + if err := decoder.Decode(&networkState); err != nil { + return err + } + + if setupUserns { + err = SetupContainer(container, networkState, process.ConsolePath) + if err == nil { + os.Exit(0) + } else { + os.Exit(1) + } + } + + if container.Namespaces.Contains(configs.NEWUSER) { + return initUserNs(container, uncleanRootfs, process, networkState) + } else { + return initDefault(container, uncleanRootfs, process, networkState) + } +} + +func initDefault(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) { rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err @@ -83,11 +106,6 @@ func Init(pipe *os.File) (err error) { return err } - // We always read this as it is a way to sync with the parent as well - var networkState *network.NetworkState - if err := decoder.Decode(&networkState); err != nil { - return err - } // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(container.Namespaces); err != nil { return err @@ -106,11 +124,19 @@ func Init(pipe *os.File) (err error) { } } - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) + cloneFlags := GetNamespaceFlags(container.Namespaces) + + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(container.Networks) != 0 || len(container.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(container, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(container); err != nil { + return fmt.Errorf("setup route %s", err) + } } if err := setupRlimits(container); err != nil { @@ -119,14 +145,24 @@ func Init(pipe *os.File) (err error) { label.Init() - if err := mount.InitializeMountNamespace(rootfs, + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + if container.MountConfig != nil { + return fmt.Errorf("mount config is set without mount namespace") + } + } else if err := mount.InitializeMountNamespace(rootfs, process.ConsolePath, container.RestrictSys, + 0, // Default Root Uid + 0, // Default Root Gid (*mount.MountConfig)(container.MountConfig)); err != nil { return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) } @@ -142,6 +178,88 @@ func Init(pipe *os.File) (err error) { // TODO: (crosbymichael) make this configurable at the Config level if container.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + + pdeathSignal, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + + if err := FinalizeNamespace(container); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } + + // FinalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := RestoreParentDeathSignal(pdeathSignal); err != nil { + return fmt.Errorf("restore parent death signal %s", err) + } + + return system.Execv(process.Args[0], process.Args[0:], process.Env) +} + +func initUserNs(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) { + // clear the current processes env and replace it with the environment + // defined on the container + if err := LoadContainerEnvironment(container); err != nil { + return err + } + + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(container.Namespaces); err != nil { + return err + } + if process.ConsolePath != "" { + if err := console.OpenAndDup("/dev/console"); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return fmt.Errorf("setsid %s", err) + } + if process.ConsolePath != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } + } + + if container.WorkingDir == "" { + container.WorkingDir = "/" + } + + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + + cloneFlags := GetNamespaceFlags(container.Namespaces) + + if container.Hostname != "" { + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } + if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { + return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) + } + } + + if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) + } + + if err := label.SetProcessLabel(container.ProcessLabel); err != nil { + return fmt.Errorf("set process label %s", err) + } + + if container.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } @@ -194,7 +312,7 @@ func RestoreParentDeathSignal(old int) error { } // SetupUser changes the groups, gid, and uid for the user inside the container -func SetupUser(u string) error { +func SetupUser(container *configs.Config) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: syscall.Getuid(), @@ -202,22 +320,24 @@ func SetupUser(u string) error { Home: "/", } - passwdFile, err := user.GetPasswdFile() + passwdPath, err := user.GetPasswdPath() if err != nil { return err } - groupFile, err := user.GetGroupFile() + groupPath, err := user.GetGroupPath() if err != nil { return err } - execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile) + execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath) if err != nil { return fmt.Errorf("get supplementary groups %s", err) } - if err := syscall.Setgroups(execUser.Sgids); err != nil { + suppGroups := append(execUser.Sgids, container.AdditionalGroups...) + + if err := syscall.Setgroups(suppGroups); err != nil { return fmt.Errorf("setgroups %s", err) } @@ -297,7 +417,7 @@ func FinalizeNamespace(container *configs.Config) error { return fmt.Errorf("set keep caps %s", err) } - if err := SetupUser(container.User); err != nil { + if err := SetupUser(container); err != nil { return fmt.Errorf("setup user %s", err) } @@ -342,7 +462,7 @@ func joinExistingNamespaces(namespaces []configs.Namespace) error { if err != nil { return err } - err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name])) + err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type])) f.Close() if err != nil { return err diff --git a/namespaces/utils.go b/namespaces/utils.go index 4aa590fd7..978a02d89 100644 --- a/namespaces/utils.go +++ b/namespaces/utils.go @@ -17,13 +17,13 @@ func (i initError) Error() string { return i.Message } -var namespaceInfo = map[string]int{ - "NEWNET": syscall.CLONE_NEWNET, - "NEWNS": syscall.CLONE_NEWNS, - "NEWUSER": syscall.CLONE_NEWUSER, - "NEWIPC": syscall.CLONE_NEWIPC, - "NEWUTS": syscall.CLONE_NEWUTS, - "NEWPID": syscall.CLONE_NEWPID, +var namespaceInfo = map[configs.NamespaceType]int{ + configs.NEWNET: syscall.CLONE_NEWNET, + configs.NEWNS: syscall.CLONE_NEWNS, + configs.NEWUSER: syscall.CLONE_NEWUSER, + configs.NEWIPC: syscall.CLONE_NEWIPC, + configs.NEWUTS: syscall.CLONE_NEWUTS, + configs.NEWPID: syscall.CLONE_NEWPID, } // New returns a newly initialized Pipe for communication between processes @@ -36,10 +36,13 @@ func newInitPipe() (parent *os.File, child *os.File, err error) { } // GetNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare, and setns -func GetNamespaceFlags(namespaces []configs.Namespace) (flag int) { +// flags on clone, unshare. This functions returns flags only for new namespaces. +func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) { for _, v := range namespaces { - flag |= namespaceInfo[v.Name] + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] } return flag } diff --git a/netlink/netlink_linux.go b/netlink/netlink_linux.go index 1bf70430f..3cc3cc94f 100644 --- a/netlink/netlink_linux.go +++ b/netlink/netlink_linux.go @@ -522,11 +522,10 @@ func NetworkSetMacAddress(iface *net.Interface, macaddr string) error { var ( MULTICAST byte = 0x1 - LOCALOUI byte = 0x2 ) - if hwaddr[0]&0x1 == MULTICAST || hwaddr[0]&0x2 != LOCALOUI { - return fmt.Errorf("Incorrect Local MAC Address specified: %s", macaddr) + if hwaddr[0]&0x1 == MULTICAST { + return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr) } wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) diff --git a/network/network.go b/network/network.go index ba8f6f74e..40b25b135 100644 --- a/network/network.go +++ b/network/network.go @@ -88,6 +88,18 @@ func SetInterfaceIp(name string, rawIp string) error { return netlink.NetworkLinkAddIp(iface, ip, ipNet) } +func DeleteInterfaceIp(name string, rawIp string) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + ip, ipNet, err := net.ParseCIDR(rawIp) + if err != nil { + return err + } + return netlink.NetworkLinkDelIp(iface, ip, ipNet) +} + func SetMtu(name string, mtu int) error { iface, err := net.InterfaceByName(name) if err != nil { diff --git a/cgroups/fs/notify_linux.go b/notify_linux.go similarity index 54% rename from cgroups/fs/notify_linux.go rename to notify_linux.go index d92063bad..059ce5131 100644 --- a/cgroups/fs/notify_linux.go +++ b/notify_linux.go @@ -1,33 +1,30 @@ // +build linux -package fs +package libcontainer import ( "fmt" + "github.com/docker/libcontainer/configs" + "io/ioutil" "os" "path/filepath" "syscall" - - "github.com/docker/libcontainer/cgroups" ) -// NotifyOnOOM sends signals on the returned channel when the cgroup reaches -// its memory limit. The channel is closed when the cgroup is removed. -func NotifyOnOOM(c *cgroups.Cgroup) (<-chan struct{}, error) { - d, err := getCgroupData(c, 0) - if err != nil { - return nil, err - } - - return notifyOnOOM(d) -} +const oomCgroupName = "memory" -func notifyOnOOM(d *data) (<-chan struct{}, error) { - dir, err := d.path("memory") +// NotifyOnOOM returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +// s is current *libcontainer.State for container. +func NotifyOnOOM(s *configs.State) (<-chan struct{}, error) { + dir := s.CgroupPaths[oomCgroupName] + if dir == "" { + return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName) + } + oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control")) if err != nil { return nil, err } - fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0) if syserr != 0 { return nil, syserr @@ -35,48 +32,32 @@ func notifyOnOOM(d *data) (<-chan struct{}, error) { eventfd := os.NewFile(fd, "eventfd") - oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control")) - if err != nil { - eventfd.Close() - return nil, err - } - - var ( - eventControlPath = filepath.Join(dir, "cgroup.event_control") - data = fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd()) - ) - - if err := writeFile(dir, "cgroup.event_control", data); err != nil { + eventControlPath := filepath.Join(dir, "cgroup.event_control") + data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd()) + if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { eventfd.Close() oomControl.Close() return nil, err } - ch := make(chan struct{}) - go func() { defer func() { close(ch) eventfd.Close() oomControl.Close() }() - buf := make([]byte, 8) - for { if _, err := eventfd.Read(buf); err != nil { return } - // When a cgroup is destroyed, an event is sent to eventfd. // So if the control path is gone, return instead of notifying. if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { return } - ch <- struct{}{} } }() - return ch, nil } diff --git a/cgroups/fs/notify_linux_test.go b/notify_linux_test.go similarity index 66% rename from cgroups/fs/notify_linux_test.go rename to notify_linux_test.go index a11880cb6..8a3026eda 100644 --- a/cgroups/fs/notify_linux_test.go +++ b/notify_linux_test.go @@ -1,38 +1,50 @@ // +build linux -package fs +package libcontainer import ( "encoding/binary" "fmt" + "io/ioutil" + "os" + "path/filepath" "syscall" "testing" "time" + + "github.com/docker/libcontainer/configs" ) func TestNotifyOnOOM(t *testing.T) { - helper := NewCgroupTestUtil("memory", t) - defer helper.cleanup() - - helper.writeFileContents(map[string]string{ - "memory.oom_control": "", - "cgroup.event_control": "", - }) - + memoryPath, err := ioutil.TempDir("", "testnotifyoom-") + if err != nil { + t.Fatal(err) + } + oomPath := filepath.Join(memoryPath, "memory.oom_control") + eventPath := filepath.Join(memoryPath, "cgroup.event_control") + if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil { + t.Fatal(err) + } var eventFd, oomControlFd int - - ooms, err := notifyOnOOM(helper.CgroupData) + st := &configs.State{ + CgroupPaths: map[string]string{ + "memory": memoryPath, + }, + } + ooms, err := NotifyOnOOM(st) if err != nil { t.Fatal("expected no error, got:", err) } - memoryPath, _ := helper.CgroupData.path("memory") - data, err := readFile(memoryPath, "cgroup.event_control") + data, err := ioutil.ReadFile(eventPath) if err != nil { t.Fatal("couldn't read event control file:", err) } - if _, err := fmt.Sscanf(data, "%d %d", &eventFd, &oomControlFd); err != nil { + if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil { t.Fatalf("invalid control data %q: %s", data, err) } @@ -62,7 +74,9 @@ func TestNotifyOnOOM(t *testing.T) { // simulate what happens when a cgroup is destroyed by cleaning up and then // writing to the eventfd. - helper.cleanup() + if err := os.RemoveAll(memoryPath); err != nil { + t.Fatal(err) + } if _, err := syscall.Write(efd, buf); err != nil { t.Fatal("unable to write to eventfd:", err) } diff --git a/nsinit/main.go b/nsinit/main.go index d1e4bf1e7..2de7bc3ee 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -26,11 +26,12 @@ func main() { app.Before = preload app.Commands = []cli.Command{ + configCommand, execCommand, initCommand, - statsCommand, - configCommand, + oomCommand, pauseCommand, + statsCommand, unpauseCommand, } diff --git a/nsinit/oom.go b/nsinit/oom.go new file mode 100644 index 000000000..f7a333d45 --- /dev/null +++ b/nsinit/oom.go @@ -0,0 +1,29 @@ +package main + +import ( + "log" + + "github.com/codegangsta/cli" + "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" +) + +var oomCommand = cli.Command{ + Name: "oom", + Usage: "display oom notifications for a container", + Action: oomAction, +} + +func oomAction(context *cli.Context) { + state, err := configs.GetState(dataPath) + if err != nil { + log.Fatal(err) + } + n, err := libcontainer.NotifyOnOOM(state) + if err != nil { + log.Fatal(err) + } + for range n { + log.Printf("OOM notification received") + } +} diff --git a/sample_configs/apparmor.json b/sample_configs/apparmor.json index 50421ec88..96f73cb79 100644 --- a/sample_configs/apparmor.json +++ b/sample_configs/apparmor.json @@ -177,11 +177,11 @@ ], "hostname": "koye", "namespaces": [ - {"name":"NEWIPC"}, - {"name": "NEWNET"}, - {"name": "NEWNS"}, - {"name": "NEWPID"}, - {"name": "NEWUTS"} + {"type":"NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"} ], "networks": [ { diff --git a/sample_configs/attach_to_bridge.json b/sample_configs/attach_to_bridge.json index 9b190293a..e5c03a7ef 100644 --- a/sample_configs/attach_to_bridge.json +++ b/sample_configs/attach_to_bridge.json @@ -176,11 +176,11 @@ ], "hostname": "koye", "namespaces": [ - {"name": "NEWIPC"}, - {"name": "NEWNET"}, - {"name": "NEWNS"}, - {"name": "NEWPID"}, - {"name": "NEWUTS"} + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"} ], "networks": [ { diff --git a/sample_configs/host-pid.json b/sample_configs/host-pid.json new file mode 100644 index 000000000..f47af930e --- /dev/null +++ b/sample_configs/host-pid.json @@ -0,0 +1,200 @@ +{ + "capabilities": [ + "CHOWN", + "DAC_OVERRIDE", + "FOWNER", + "MKNOD", + "NET_RAW", + "SETGID", + "SETUID", + "SETFCAP", + "SETPCAP", + "NET_BIND_SERVICE", + "SYS_CHROOT", + "KILL" + ], + "cgroups": { + "allowed_devices": [ + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 98 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 1, + "path": "/dev/console", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "path": "/dev/tty0", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "minor_number": 1, + "path": "/dev/tty1", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 136, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 2, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 10, + "minor_number": 200, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ], + "name": "docker-koye", + "parent": "docker" + }, + "restrict_sys": true, + "mount_config": { + "device_nodes": [ + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ], + "mounts": [ + { + "type": "tmpfs", + "destination": "/tmp" + } + ] + }, + "environment": [ + "HOME=/", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=koye", + "TERM=xterm" + ], + "hostname": "koye", + "namespaces": [ + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWUTS"} + ], + "networks": [ + { + "address": "127.0.0.1/0", + "gateway": "localhost", + "mtu": 1500, + "type": "loopback" + } + ], + "tty": true, + "user": "daemon" +} diff --git a/sample_configs/minimal.json b/sample_configs/minimal.json index 720be64f9..01de46746 100644 --- a/sample_configs/minimal.json +++ b/sample_configs/minimal.json @@ -182,11 +182,11 @@ ], "hostname": "koye", "namespaces": [ - {"name": "NEWIPC"}, - {"name": "NEWNET"}, - {"name": "NEWNS"}, - {"name": "NEWPID"}, - {"name": "NEWUTS"} + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"} ], "networks": [ { diff --git a/sample_configs/route_source_address_selection.json b/sample_configs/route_source_address_selection.json index f403996dc..9c62045a4 100644 --- a/sample_configs/route_source_address_selection.json +++ b/sample_configs/route_source_address_selection.json @@ -176,11 +176,11 @@ ], "hostname": "koye", "namespaces": [ - {"name": "NEWIPC"}, - {"name": "NEWNET"}, - {"name": "NEWNS"}, - {"name": "NEWPID"}, - {"name": "NEWUTS"} + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"} ], "networks": [ { diff --git a/sample_configs/selinux.json b/sample_configs/selinux.json index cfb83e09f..15556488a 100644 --- a/sample_configs/selinux.json +++ b/sample_configs/selinux.json @@ -178,11 +178,11 @@ ], "hostname": "koye", "namespaces": [ - {"name": "NEWIPC"}, - {"name": "NEWNET"}, - {"name": "NEWNS"}, - {"name": "NEWPID"}, - {"name": "NEWUTS"} + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"} ], "networks": [ { diff --git a/sample_configs/userns.json b/sample_configs/userns.json new file mode 100644 index 000000000..8c9c841f1 --- /dev/null +++ b/sample_configs/userns.json @@ -0,0 +1,251 @@ +{ + "capabilities": [ + "CHOWN", + "DAC_OVERRIDE", + "FOWNER", + "MKNOD", + "NET_RAW", + "SETGID", + "SETUID", + "SETFCAP", + "SETPCAP", + "NET_BIND_SERVICE", + "SYS_CHROOT", + "KILL" + ], + "cgroups": { + "allowed_devices": [ + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 98 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 1, + "path": "/dev/console", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "path": "/dev/tty0", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "minor_number": 1, + "path": "/dev/tty1", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 136, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 2, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 10, + "minor_number": 200, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ], + "name": "docker-koye", + "parent": "docker" + }, + "restrict_sys": true, + "mount_config": { + "device_nodes": [ + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ], + "mounts": [ + { + "type": "tmpfs", + "destination": "/tmp" + } + ] + }, + "environment": [ + "HOME=/", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=koye", + "TERM=xterm" + ], + "hostname": "koye", + "namespaces": [ + {"type": "NEWIPC"}, + {"type": "NEWNET"}, + {"type": "NEWNS"}, + {"type": "NEWPID"}, + {"type": "NEWUTS"}, + {"type": "NEWUSER"} + ], + "networks": [ + { + "address": "127.0.0.1/0", + "gateway": "localhost", + "mtu": 1500, + "type": "loopback" + }, + { + "address": "172.17.0.9/16", + "gateway": "172.17.42.1", + "bridge": "docker0", + "veth_prefix": "veth", + "mtu": 1500, + "type": "veth" + } + ], + "tty": true, + "user": "root", + "uid_mappings": [ + { + "container_id": 0, + "host_id": 1000, + "size": 1 + }, + { + "container_id": 1, + "host_id": 1, + "size": 999 + }, + { + "container_id": 1001, + "host_id": 1001, + "size": 9000 + } + ], + "gid_mappings": [ + { + "container_id": 0, + "host_id": 1000, + "size": 1 + }, + { + "container_id": 1, + "host_id": 1, + "size": 999 + }, + { + "container_id": 1001, + "host_id": 1001, + "size": 9000 + } + ], + "rlimits": [ + { + "type": 7, + "hard": 999, + "soft": 999 + } + ] +} diff --git a/user/MAINTAINERS b/user/MAINTAINERS index 18e05a307..edbe20066 100644 --- a/user/MAINTAINERS +++ b/user/MAINTAINERS @@ -1 +1,2 @@ Tianon Gravi (@tianon) +Aleksa Sarai (@cyphar) diff --git a/user/lookup_unix.go b/user/lookup_unix.go index 409c114e2..758b734c2 100644 --- a/user/lookup_unix.go +++ b/user/lookup_unix.go @@ -9,22 +9,22 @@ import ( // Unix-specific path to the passwd and group formatted files. const ( - unixPasswdFile = "/etc/passwd" - unixGroupFile = "/etc/group" + unixPasswdPath = "/etc/passwd" + unixGroupPath = "/etc/group" ) -func GetPasswdFile() (string, error) { - return unixPasswdFile, nil +func GetPasswdPath() (string, error) { + return unixPasswdPath, nil } func GetPasswd() (io.ReadCloser, error) { - return os.Open(unixPasswdFile) + return os.Open(unixPasswdPath) } -func GetGroupFile() (string, error) { - return unixGroupFile, nil +func GetGroupPath() (string, error) { + return unixGroupPath, nil } func GetGroup() (io.ReadCloser, error) { - return os.Open(unixGroupFile) + return os.Open(unixGroupPath) } diff --git a/user/lookup_unsupported.go b/user/lookup_unsupported.go index 0f15c57d8..721794887 100644 --- a/user/lookup_unsupported.go +++ b/user/lookup_unsupported.go @@ -4,7 +4,7 @@ package user import "io" -func GetPasswdFile() (string, error) { +func GetPasswdPath() (string, error) { return "", ErrUnsupported } @@ -12,7 +12,7 @@ func GetPasswd() (io.ReadCloser, error) { return nil, ErrUnsupported } -func GetGroupFile() (string, error) { +func GetGroupPath() (string, error) { return "", ErrUnsupported } diff --git a/user/user.go b/user/user.go index 69387f2ef..d7439f12e 100644 --- a/user/user.go +++ b/user/user.go @@ -197,11 +197,11 @@ type ExecUser struct { Home string } -// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the +// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the // given file paths and uses that data as the arguments to GetExecUser. If the // files cannot be opened for any reason, the error is ignored and a nil // io.Reader is passed instead. -func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { +func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { passwd, err := os.Open(passwdPath) if err != nil { passwd = nil From df52d638541d8199cd79b4be42647976447fecbc Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 28 Jan 2015 12:11:19 +0300 Subject: [PATCH 059/101] namespaces: send config, network state and other arguments in one packet Signed-off-by: Andrey Vagin --- namespaces/exec.go | 37 ++++++++++--------------------------- namespaces/init.go | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 44 deletions(-) diff --git a/namespaces/exec.go b/namespaces/exec.go index 0c0f6cf6c..1c157e3a2 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -68,18 +68,8 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process return terr } - encoder := json.NewEncoder(parent) - - if err := encoder.Encode(container); err != nil { - return terminate(err) - } - - if err := encoder.Encode(process); err != nil { - return terminate(err) - } - // send the state to the container's init process then shutdown writes for the parent - if err := encoder.Encode(networkState); err != nil { + if err := json.NewEncoder(parent).Encode(process); err != nil { return terminate(err) } @@ -158,21 +148,6 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai return terr } - encoder := json.NewEncoder(parent) - - if err := encoder.Encode(container); err != nil { - return terminate(err) - } - - process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, - ConsolePath: console, - } - if err := encoder.Encode(process); err != nil { - return terminate(err) - } - started, err := system.GetProcessStartTime(command.Process.Pid) if err != nil { return terminate(err) @@ -195,6 +170,14 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai return terminate(err) } + process := processArgs{ + Env: append(env[0:], container.Env...), + Args: args, + ConsolePath: console, + Config: container, + NetworkState: &networkState, + } + // Start the setup process to setup the init process if container.Namespaces.Contains(configs.NEWUSER) { if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil { @@ -203,7 +186,7 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai } // send the state to the container's init process then shutdown writes for the parent - if err := encoder.Encode(networkState); err != nil { + if err := json.NewEncoder(parent).Encode(process); err != nil { return terminate(err) } // shutdown writes for the parent side of the pipe diff --git a/namespaces/init.go b/namespaces/init.go index 01f721141..5af898e18 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -26,9 +26,11 @@ import ( // Process is used for transferring parameters from Exec() to Init() type processArgs struct { - Args []string `json:"args,omitempty"` - Env []string `json:"environment,omitempty"` - ConsolePath string `json:"console_path,omitempty"` + Args []string `json:"args,omitempty"` + Env []string `json:"environment,omitempty"` + ConsolePath string `json:"console_path,omitempty"` + Config *configs.Config `json:"config,omitempty"` + NetworkState *network.NetworkState `json:"network_state,omitempty"` } // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. @@ -55,29 +57,20 @@ func Init(pipe *os.File, setupUserns bool) (err error) { pipe.Close() }() - decoder := json.NewDecoder(pipe) - - var container *configs.Config - if err := decoder.Decode(&container); err != nil { - return err - } - - var process *processArgs - if err := decoder.Decode(&process); err != nil { - return err - } - uncleanRootfs, err := os.Getwd() if err != nil { return err } + var process *processArgs // We always read this as it is a way to sync with the parent as well - var networkState *network.NetworkState - if err := decoder.Decode(&networkState); err != nil { + if err := json.NewDecoder(pipe).Decode(&process); err != nil { return err } + container := process.Config + networkState := process.NetworkState + if setupUserns { err = SetupContainer(container, networkState, process.ConsolePath) if err == nil { From c3f3db724a441eb54591d7c71d4dfa85b314f7d2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 28 Jan 2015 14:03:57 +0300 Subject: [PATCH 060/101] namespaces: don't unroll process arguments It looks better. Signed-off-by: Andrey Vagin --- namespaces/execin.go | 7 +++++-- namespaces/init.go | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/namespaces/execin.go b/namespaces/execin.go index 514ba9992..3b51a5872 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -15,7 +15,6 @@ import ( "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/utils" ) @@ -171,7 +170,11 @@ func FinalizeSetns(container *configs.Config) error { // The setup process joins all the namespaces of user namespace enabled init // except the user namespace, so it run as root in the root user namespace // to perform these operations. -func SetupContainer(container *configs.Config, networkState *network.NetworkState, consolePath string) error { +func SetupContainer(process *processArgs) error { + container := process.Config + networkState := process.NetworkState + consolePath := process.ConsolePath + rootfs, err := utils.ResolveRootfs(container.RootFs) if err != nil { return err diff --git a/namespaces/init.go b/namespaces/init.go index 5af898e18..582545149 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -68,11 +68,8 @@ func Init(pipe *os.File, setupUserns bool) (err error) { return err } - container := process.Config - networkState := process.NetworkState - if setupUserns { - err = SetupContainer(container, networkState, process.ConsolePath) + err = SetupContainer(process) if err == nil { os.Exit(0) } else { @@ -80,14 +77,17 @@ func Init(pipe *os.File, setupUserns bool) (err error) { } } - if container.Namespaces.Contains(configs.NEWUSER) { - return initUserNs(container, uncleanRootfs, process, networkState) + if process.Config.Namespaces.Contains(configs.NEWUSER) { + return initUserNs(uncleanRootfs, process) } else { - return initDefault(container, uncleanRootfs, process, networkState) + return initDefault(uncleanRootfs, process) } } -func initDefault(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) { +func initDefault(uncleanRootfs string, process *processArgs) (err error) { + container := process.Config + networkState := process.NetworkState + rootfs, err := utils.ResolveRootfs(uncleanRootfs) if err != nil { return err @@ -197,7 +197,9 @@ func initDefault(container *configs.Config, uncleanRootfs string, process *proce return system.Execv(process.Args[0], process.Args[0:], process.Env) } -func initUserNs(container *configs.Config, uncleanRootfs string, process *processArgs, networkState *network.NetworkState) (err error) { +func initUserNs(uncleanRootfs string, process *processArgs) (err error) { + container := process.Config + // clear the current processes env and replace it with the environment // defined on the container if err := LoadContainerEnvironment(container); err != nil { From 77f255a544f1a74f91c8c9699a5444b12010bec9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 31 Jan 2015 14:05:53 -0800 Subject: [PATCH 061/101] Add missing initializers Signed-off-by: Michael Crosby --- nsinit/init.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nsinit/init.go b/nsinit/init.go index bf59345a2..21128302a 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -14,7 +14,7 @@ var ( Usage: "runs the init process inside the namespace", Action: initAction, Flags: []cli.Flag{ - cli.IntFlag{"fd", 0, "internal pipe fd"}, + cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, }, } ) From 935d81f23d8dc3802ee3565c7f5a30ea44a11278 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 31 Jan 2015 19:56:27 -0800 Subject: [PATCH 062/101] Flatten configuration structs Change the various config structs into one package and have a flatter structure for easier use. Signed-off-by: Michael Crosby --- cgroups/cgroups.go | 32 +---- cgroups/fs/apply_raw.go | 29 ++--- cgroups/fs/devices.go | 2 +- cgroups/fs/freezer.go | 3 +- cgroups/manager/manager.go | 5 +- cgroups/systemd/apply_nosystemd.go | 9 +- cgroups/systemd/apply_systemd.go | 21 ++-- configs/cgroup.go | 54 +++++++++ configs/config.go | 165 ++++++++++++------------- configs/device.go | 42 +++++++ {mount => configs}/mount.go | 28 ++++- configs/namespaces.go | 52 ++++++++ configs/network.go | 62 ++++++++++ configs/state.go | 64 +++------- container.go | 20 ++- devices/defaults.go | 188 +++++++++++++---------------- devices/devices.go | 68 +++-------- devices/number.go | 4 - linux_container.go | 37 +++--- linux_factory.go | 1 - mount/init.go | 149 ++++++++++++----------- mount/mount_config.go | 33 ----- mount/msmoveroot.go | 13 +- mount/nodes/nodes.go | 57 --------- mount/nodes/nodes_unsupported.go | 13 -- mount/pivotroot.go | 6 +- mount/ptmx.go | 19 +-- mount/readonly.go | 2 +- namespaces/exec.go | 73 +---------- namespaces/execin.go | 37 ++---- namespaces/init.go | 31 +++-- network/loopback.go | 6 +- network/stats.go | 4 +- network/strategy.go | 6 +- network/types.go | 49 -------- network/veth.go | 5 +- nsinit/exec.go | 8 +- nsinit/main.go | 11 +- nsinit/oom.go | 10 +- nsinit/utils.go | 55 +-------- types.go | 11 -- 41 files changed, 650 insertions(+), 834 deletions(-) create mode 100644 configs/cgroup.go create mode 100644 configs/device.go rename {mount => configs}/mount.go (82%) create mode 100644 configs/namespaces.go create mode 100644 configs/network.go delete mode 100644 mount/mount_config.go delete mode 100644 mount/nodes/nodes.go delete mode 100644 mount/nodes/nodes_unsupported.go delete mode 100644 types.go diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 60b1135a3..7ed9be81e 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -3,7 +3,7 @@ package cgroups import ( "fmt" - "github.com/docker/libcontainer/devices" + "github.com/docker/libcontainer/configs" ) type Manager interface { @@ -17,7 +17,7 @@ type Manager interface { GetStats() (*Stats, error) // Toggles the freezer cgroup according with specified state - Freeze(state FreezerState) error + Freeze(state configs.FreezerState) error // Destroys the cgroup set Destroy() error @@ -33,14 +33,6 @@ type Manager interface { GetPaths() map[string]string } -type FreezerState string - -const ( - Undefined FreezerState = "" - Frozen FreezerState = "FROZEN" - Thawed FreezerState = "THAWED" -) - type NotFoundError struct { Subsystem string } @@ -59,26 +51,6 @@ func IsNotFound(err error) bool { if err == nil { return false } - _, ok := err.(*NotFoundError) return ok } - -type Cgroup struct { - Name string `json:"name,omitempty"` - Parent string `json:"parent,omitempty"` // name of parent cgroup or slice - - AllowAllDevices bool `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - AllowedDevices []*devices.Device `json:"allowed_devices,omitempty"` - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) - CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use - CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use - BlkioWeight int64 `json:"blkio_weight,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. - Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process - Slice string `json:"slice,omitempty"` // Parent slice to use for systemd -} diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 11d35d7a4..4a3a88645 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) var ( @@ -24,8 +25,17 @@ var ( CgroupProcesses = "cgroup.procs" ) +type subsystem interface { + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'data'. + Remove(*data) error + // Creates and joins the cgroup represented by data. + Set(*data) error +} + type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -46,19 +56,10 @@ func init() { } } -type subsystem interface { - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error - // Removes the cgroup represented by 'data'. - Remove(*data) error - // Creates and joins the cgroup represented by data. - Set(*data) error -} - type data struct { root string cgroup string - c *cgroups.Cgroup + c *configs.Cgroup pid int } @@ -109,7 +110,7 @@ func (m *Manager) GetPaths() map[string]string { // Symmetrical public function to update device based cgroups. Also available // in the systemd implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { d, err := getCgroupData(c, pid) if err != nil { return err @@ -137,7 +138,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { // Freeze toggles the container's freezer cgroup depending on the state // provided -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { d, err := getCgroupData(m.Cgroups, 0) if err != nil { return err @@ -170,7 +171,7 @@ func (m *Manager) GetPids() ([]int, error) { return cgroups.ReadProcsFile(dir) } -func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) { +func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { if cgroupRoot == "" { return nil, fmt.Errorf("failed to find the cgroup root") } diff --git a/cgroups/fs/devices.go b/cgroups/fs/devices.go index 98d5d2d7d..e904e10c8 100644 --- a/cgroups/fs/devices.go +++ b/cgroups/fs/devices.go @@ -17,7 +17,7 @@ func (s *DevicesGroup) Set(d *data) error { } for _, dev := range d.c.AllowedDevices { - if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(dir, "devices.allow", dev.CgroupString()); err != nil { return err } } diff --git a/cgroups/fs/freezer.go b/cgroups/fs/freezer.go index c6b677fa9..b881d0d47 100644 --- a/cgroups/fs/freezer.go +++ b/cgroups/fs/freezer.go @@ -5,6 +5,7 @@ import ( "time" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type FreezerGroup struct { @@ -12,7 +13,7 @@ type FreezerGroup struct { func (s *FreezerGroup) Set(d *data) error { switch d.c.Freezer { - case cgroups.Frozen, cgroups.Thawed: + case configs.Frozen, configs.Thawed: dir, err := d.path("freezer") if err != nil { return err diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index bd5fd48ab..b8e2010ed 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -4,13 +4,14 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer/configs" ) // Create a new cgroup manager with specified configuration // TODO this object is not really initialized until Apply() is called. // Maybe make this to the equivalent of Apply() at some point? // @vmarmol -func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { +func NewCgroupManager(cgroups *configs.Cgroup) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, @@ -23,7 +24,7 @@ func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { } // Restore a cgroup manager with specified configuration and state -func LoadCgroupManager(cgroups *cgroups.Cgroup, paths map[string]string) cgroups.Manager { +func LoadCgroupManager(cgroups *configs.Cgroup, paths map[string]string) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index 62928b822..8a46ea8bb 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -6,10 +6,11 @@ import ( "fmt" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -37,14 +38,14 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return fmt.Errorf("Systemd not supported") } -func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 7143a5951..f46067b48 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -16,11 +16,12 @@ import ( systemd "github.com/coreos/go-systemd/dbus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/configs" "github.com/godbus/dbus" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -190,7 +191,7 @@ func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } -func joinFreezer(c *cgroups.Cgroup, pid int) error { +func joinFreezer(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "freezer") if err != nil { return err @@ -203,7 +204,7 @@ func joinFreezer(c *cgroups.Cgroup, pid int) error { return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) } -func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return "", err @@ -222,7 +223,7 @@ func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err @@ -260,7 +261,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { panic("not implemented") } -func getUnitName(c *cgroups.Cgroup) string { +func getUnitName(c *configs.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } @@ -275,7 +276,7 @@ func getUnitName(c *cgroups.Cgroup) string { // Note: we can't use systemd to set up the initial limits, and then change the cgroup // because systemd will re-write the device settings if it needs to re-apply the cgroup context. // This happens at least for v208 when any sibling unit is started. -func joinDevices(c *cgroups.Cgroup, pid int) error { +func joinDevices(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "devices") if err != nil { return err @@ -294,7 +295,7 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { } for _, dev := range c.AllowedDevices { - if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { return err } } @@ -304,11 +305,11 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { // Symmetrical public function to update device based cgroups. Also available // in the fs implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return joinDevices(c, pid) } -func joinMemory(c *cgroups.Cgroup, pid int) error { +func joinMemory(c *configs.Cgroup, pid int) error { memorySwap := c.MemorySwap if memorySwap == 0 { @@ -327,7 +328,7 @@ func joinMemory(c *cgroups.Cgroup, pid int) error { // systemd does not atm set up the cpuset controller, so we must manually // join it. Additionally that is a very finicky controller where each // level must have a full setup as the default for a new directory is "no cpus" -func joinCpuset(c *cgroups.Cgroup, pid int) error { +func joinCpuset(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "cpuset") if err != nil { return err diff --git a/configs/cgroup.go b/configs/cgroup.go new file mode 100644 index 000000000..0dffc6401 --- /dev/null +++ b/configs/cgroup.go @@ -0,0 +1,54 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + Name string `json:"name,omitempty"` + + // name of parent cgroup or slice + Parent string `json:"parent,omitempty"` + + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + AllowAllDevices bool `json:"allow_all_devices,omitempty"` + + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + + // Memory limit (in bytes) + Memory int64 `json:"memory,omitempty"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation,omitempty"` + + // Total memory usage (memory + swap); set `-1' to disable swap + MemorySwap int64 `json:"memory_swap,omitempty"` + + // CPU shares (relative weight vs. other containers) + CpuShares int64 `json:"cpu_shares,omitempty"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota,omitempty"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod int64 `json:"cpu_period,omitempty"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus,omitempty"` + + // MEM to use + CpusetMems string `json:"cpuset_mems,omitempty"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight int64 `json:"blkio_weight,omitempty"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer,omitempty"` + + // Parent slice to use for systemd TODO: remove in favor or parent + Slice string `json:"slice,omitempty"` +} diff --git a/configs/config.go b/configs/config.go index d1e03f61c..844a9cad4 100644 --- a/configs/config.go +++ b/configs/config.go @@ -1,70 +1,43 @@ package configs -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/network" -) - -type MountConfig mount.MountConfig - -type Network network.Network - -type NamespaceType string - -const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" -) - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { - Type NamespaceType `json:"type"` - Path string `json:"path,omitempty"` -} - -type Namespaces []Namespace - -func (n *Namespaces) Remove(t NamespaceType) bool { - i := n.index(t) - if i == -1 { - return false - } - *n = append((*n)[:i], (*n)[i+1:]...) - return true -} +import "fmt" -func (n *Namespaces) Add(t NamespaceType, path string) { - i := n.index(t) - if i == -1 { - *n = append(*n, Namespace{Type: t, Path: path}) - return - } - (*n)[i].Path = path -} - -func (n *Namespaces) index(t NamespaceType) int { - for i, ns := range *n { - if ns.Type == t { - return i - } - } - return -1 +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` } -func (n *Namespaces) Contains(t NamespaceType) bool { - return n.index(t) != -1 +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id,omitempty"` + HostID int `json:"host_id,omitempty"` + Size int `json:"size,omitempty"` } // Config defines configuration options for executing a process inside a contained environment. type Config struct { - // Mount specific options. - MountConfig *MountConfig `json:"mount_config,omitempty"` + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root,omitempty"` + + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir,omitempty"` + + // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable + ReadonlyFs bool `json:"readonly_fs,omitempty"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts,omitempty"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + DeviceNodes []*Device `json:"device_nodes,omitempty"` + + MountLabel string `json:"mount_label,omitempty"` // Pathname to container's root filesystem RootFs string `json:"root_fs,omitempty"` @@ -83,9 +56,8 @@ type Config struct { // provided in Env are provided to the process Env []string `json:"environment,omitempty"` - // Tty when true will allocate a pty slave on the host for access by the container's process - // and ensure that it is mounted inside the container's rootfs - Tty bool `json:"tty,omitempty"` + // Console is the path to the console allocated to the container. + Console string `json:"console,omitempty"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process @@ -103,7 +75,7 @@ type Config struct { // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available - Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` + Cgroups *Cgroup `json:"cgroups,omitempty"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed @@ -124,6 +96,7 @@ type Config struct { // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []int `json:"additional_groups,omitempty"` + // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings,omitempty"` @@ -131,36 +104,48 @@ type Config struct { GidMappings []IDMap `json:"gid_mappings,omitempty"` } -// Routes can be specified to create entries in the route table as the container is started -// -// All of destination, source, and gateway should be either IPv4 or IPv6. -// One of the three options must be present, and ommitted entries will use their -// IP family default for the route table. For IPv4 for example, setting the -// gateway to 1.2.3.4 and the interface to eth0 will set up a standard -// destination of 0.0.0.0(or *) when viewed in the route table. -type Route struct { - // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination,omitempty"` - - // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source,omitempty"` - - // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway,omitempty"` - - // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostUID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + id, found := c.hostIDFromMapping(0, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -type Rlimit struct { - Type int `json:"type,omitempty"` - Hard uint64 `json:"hard,omitempty"` - Soft uint64 `json:"soft,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostGID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(0, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -// IDMap represents UID/GID Mappings for User Namespaces. -type IDMap struct { - ContainerID int `json:"container_id,omitempty"` - HostID int `json:"host_id,omitempty"` - Size int `json:"size,omitempty"` +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c *Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false } diff --git a/configs/device.go b/configs/device.go new file mode 100644 index 000000000..18d732325 --- /dev/null +++ b/configs/device.go @@ -0,0 +1,42 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +type Device struct { + Type rune `json:"type,omitempty"` + // It is fine if this is an empty string in the case that you are using Wildcards + Path string `json:"path,omitempty"` + // Use the wildcard constant for wildcards. + Major int64 `json:"major,omitempty"` + // Use the wildcard constant for wildcards. + Minor int64 `json:"minor,omitempty"` + // Typically just "rwm" + Permissions string `json:"permissions,omitempty"` + // The permission bits of the file's mode + FileMode os.FileMode `json:"file_mode,omitempty"` + Uid uint32 `json:"uid,omitempty"` + Gid uint32 `json:"gid,omitempty"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/mount/mount.go b/configs/mount.go similarity index 82% rename from mount/mount.go rename to configs/mount.go index c1b424214..f6f39992c 100644 --- a/mount/mount.go +++ b/configs/mount.go @@ -1,4 +1,4 @@ -package mount +package configs import ( "fmt" @@ -10,6 +10,8 @@ import ( "github.com/docker/libcontainer/label" ) +const DefaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + type Mount struct { Type string `json:"type,omitempty"` Source string `json:"source,omitempty"` // Source path, in the host namespace @@ -101,9 +103,31 @@ func (m *Mount) tmpfsMount(rootfs, mountLabel string) error { return fmt.Errorf("creating new tmpfs mount target %s", err) } - if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l); err != nil { + if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(DefaultMountFlags), l); err != nil { return fmt.Errorf("%s mounting %s in tmpfs", err, dest) } return nil } + +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + } + return nil +} diff --git a/configs/namespaces.go b/configs/namespaces.go new file mode 100644 index 000000000..5e891eab2 --- /dev/null +++ b/configs/namespaces.go @@ -0,0 +1,52 @@ +package configs + +type NamespaceType string + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path,omitempty"` +} + +type Namespaces []Namespace + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} diff --git a/configs/network.go b/configs/network.go new file mode 100644 index 000000000..542183632 --- /dev/null +++ b/configs/network.go @@ -0,0 +1,62 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omited from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type,omitempty"` + + // The bridge to use. + Bridge string `json:"bridge,omitempty"` + + // Prefix for the veth interfaces. + VethPrefix string `json:"veth_prefix,omitempty"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address,omitempty"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address,omitempty"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address,omitempty"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway,omitempty"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway,omitempty"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu,omitempty"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen,omitempty"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and ommitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination,omitempty"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source,omitempty"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway,omitempty"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name,omitempty"` +} diff --git a/configs/state.go b/configs/state.go index 9dc770067..27122c441 100644 --- a/configs/state.go +++ b/configs/state.go @@ -1,13 +1,5 @@ package configs -import ( - "encoding/json" - "os" - "path/filepath" - - "github.com/docker/libcontainer/network" -) - // State represents a running container's state type State struct { // InitPid is the init process id in the parent namespace @@ -17,21 +9,30 @@ type State struct { InitStartTime string `json:"init_start_time,omitempty"` // Network runtime state. - NetworkState network.NetworkState `json:"network_state,omitempty"` + NetworkState NetworkState `json:"network_state,omitempty"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name. CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` + + Status Status `json:"status,omitempty"` } -// The running state of the container. -type RunState int +// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers +// Do not depend on it outside of libcontainer. +// TODO: move veth names to config time +type NetworkState struct { + // The name of the veth interface on the Host. + VethHost string `json:"veth_host,omitempty"` + // The name of the veth interface created inside the container for the child. + VethChild string `json:"veth_child,omitempty"` +} -const ( - // The name of the runtime state file - stateFile = "state.json" +// The status of a container. +type Status int +const ( // The container exists and is running. - Running RunState = iota + 1 + Running Status = iota + 1 // The container exists, it is in the process of being paused. Pausing @@ -42,36 +43,3 @@ const ( // The container does not exist. Destroyed ) - -// SaveState writes the container's runtime state to a state.json file -// in the specified path -func SaveState(basePath string, state *State) error { - f, err := os.Create(filepath.Join(basePath, stateFile)) - if err != nil { - return err - } - defer f.Close() - - return json.NewEncoder(f).Encode(state) -} - -// GetState reads the state.json file for a running container -func GetState(basePath string) (*State, error) { - f, err := os.Open(filepath.Join(basePath, stateFile)) - if err != nil { - return nil, err - } - defer f.Close() - - var state *State - if err := json.NewDecoder(f).Decode(&state); err != nil { - return nil, err - } - - return state, nil -} - -// DeleteState deletes the state.json file -func DeleteState(basePath string) error { - return os.Remove(filepath.Join(basePath, stateFile)) -} diff --git a/container.go b/container.go index e04a43df4..4348e9625 100644 --- a/container.go +++ b/container.go @@ -4,9 +4,16 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until package libcontainer import ( + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/network" ) +type Stats struct { + NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` + CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` +} + // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can @@ -16,12 +23,11 @@ type Container interface { // Returns the ID of the container ID() string - // Returns the current run state of the container. + // Returns the current statusof the container. // // errors: - // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - RunState() (configs.RunState, error) + Status() (configs.Status, error) // Returns the current config of the container. Config() *configs.Config @@ -41,7 +47,7 @@ type Container interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Stats() (*ContainerStats, error) + Stats() (*Stats, error) // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // @@ -101,4 +107,10 @@ type Container interface { // ContainerDestroyed - Container no longer exists, // Systemerror - System error. WaitProcess(pid int) (exitStatus int, err error) + + // OOM returns a read-only channel signaling when the container receives an OOM notification. + // + // errors: + // Systemerror - System error. + OOM() (<-chan struct{}, error) } diff --git a/devices/defaults.go b/devices/defaults.go index e0ad0b08f..3923ccd50 100644 --- a/devices/defaults.go +++ b/devices/defaults.go @@ -1,147 +1,127 @@ package devices +import "github.com/docker/libcontainer/configs" + var ( // These are devices that are to be both allowed and created. - - DefaultSimpleDevices = []*Device{ + DefaultSimpleDevices = []*configs.Device{ // /dev/null and zero { - Path: "/dev/null", - Type: 'c', - MajorNumber: 1, - MinorNumber: 3, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/zero", - Type: 'c', - MajorNumber: 1, - MinorNumber: 5, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/full", - Type: 'c', - MajorNumber: 1, - MinorNumber: 7, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, }, // consoles and ttys { - Path: "/dev/tty", - Type: 'c', - MajorNumber: 5, - MinorNumber: 0, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, }, // /dev/urandom,/dev/random { - Path: "/dev/urandom", - Type: 'c', - MajorNumber: 1, - MinorNumber: 9, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/random", - Type: 'c', - MajorNumber: 1, - MinorNumber: 8, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, }, } - - DefaultAllowedDevices = append([]*Device{ + DefaultAllowedDevices = append([]*configs.Device{ // allow mknod for any device { - Type: 'c', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", + Type: 'c', + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", }, { - Type: 'b', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", + Type: 'b', + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", }, { - Path: "/dev/console", - Type: 'c', - MajorNumber: 5, - MinorNumber: 1, - CgroupPermissions: "rwm", + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", }, { - Path: "/dev/tty0", - Type: 'c', - MajorNumber: 4, - MinorNumber: 0, - CgroupPermissions: "rwm", + Path: "/dev/tty0", + Type: 'c', + Major: 4, + Minor: 0, + Permissions: "rwm", }, { - Path: "/dev/tty1", - Type: 'c', - MajorNumber: 4, - MinorNumber: 1, - CgroupPermissions: "rwm", + Path: "/dev/tty1", + Type: 'c', + Major: 4, + Minor: 1, + Permissions: "rwm", }, // /dev/pts/ - pts namespaces are "coming soon" { - Path: "", - Type: 'c', - MajorNumber: 136, - MinorNumber: Wildcard, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 136, + Minor: configs.Wildcard, + Permissions: "rwm", }, { - Path: "", - Type: 'c', - MajorNumber: 5, - MinorNumber: 2, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", }, // tuntap { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 200, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", }, - - /*// fuse - { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", - }, - - // rtc - { - Path: "", - Type: 'c', - MajorNumber: 254, - MinorNumber: 0, - CgroupPermissions: "rwm", - }, - */ }, DefaultSimpleDevices...) - - DefaultAutoCreatedDevices = append([]*Device{ + DefaultAutoCreatedDevices = append([]*configs.Device{ { // /dev/fuse is created but not allowed. // This is to allow java to work. Because java @@ -149,11 +129,11 @@ var ( // https://github.com/docker/docker/issues/514 // https://github.com/docker/docker/issues/2393 // - Path: "/dev/fuse", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", + Path: "/dev/fuse", + Type: 'c', + Major: 10, + Minor: 229, + Permissions: "rwm", }, }, DefaultSimpleDevices...) ) diff --git a/devices/devices.go b/devices/devices.go index 8e86d9529..b3f67aa3d 100644 --- a/devices/devices.go +++ b/devices/devices.go @@ -7,14 +7,12 @@ import ( "os" "path/filepath" "syscall" -) -const ( - Wildcard = -1 + "github.com/docker/libcontainer/configs" ) var ( - ErrNotADeviceNode = errors.New("not a device node") + ErrNotADevice = errors.New("not a device node") ) // Testing dependencies @@ -23,45 +21,20 @@ var ( ioutilReadDir = ioutil.ReadDir ) -type Device struct { - Type rune `json:"type,omitempty"` - Path string `json:"path,omitempty"` // It is fine if this is an empty string in the case that you are using Wildcards - MajorNumber int64 `json:"major_number,omitempty"` // Use the wildcard constant for wildcards. - MinorNumber int64 `json:"minor_number,omitempty"` // Use the wildcard constant for wildcards. - CgroupPermissions string `json:"cgroup_permissions,omitempty"` // Typically just "rwm" - FileMode os.FileMode `json:"file_mode,omitempty"` // The permission bits of the file's mode - Uid uint32 `json:"uid,omitempty"` - Gid uint32 `json:"gid,omitempty"` -} - -func GetDeviceNumberString(deviceNumber int64) string { - if deviceNumber == Wildcard { - return "*" - } else { - return fmt.Sprintf("%d", deviceNumber) - } -} - -func (device *Device) GetCgroupAllowString() string { - return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions) -} - // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. -func GetDevice(path, cgroupPermissions string) (*Device, error) { +func DeviceFromPath(path, permissions string) (*configs.Device, error) { fileInfo, err := osLstat(path) if err != nil { return nil, err } - var ( devType rune mode = fileInfo.Mode() fileModePermissionBits = os.FileMode.Perm(mode) ) - switch { case mode&os.ModeDevice == 0: - return nil, ErrNotADeviceNode + return nil, ErrNotADevice case mode&os.ModeCharDevice != 0: fileModePermissionBits |= syscall.S_IFCHR devType = 'c' @@ -69,36 +42,33 @@ func GetDevice(path, cgroupPermissions string) (*Device, error) { fileModePermissionBits |= syscall.S_IFBLK devType = 'b' } - stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) if !ok { return nil, fmt.Errorf("cannot determine the device number for device %s", path) } devNumber := int(stat_t.Rdev) - - return &Device{ - Type: devType, - Path: path, - MajorNumber: Major(devNumber), - MinorNumber: Minor(devNumber), - CgroupPermissions: cgroupPermissions, - FileMode: fileModePermissionBits, - Uid: stat_t.Uid, - Gid: stat_t.Gid, + return &configs.Device{ + Type: devType, + Path: path, + Major: Major(devNumber), + Minor: Minor(devNumber), + Permissions: permissions, + FileMode: fileModePermissionBits, + Uid: stat_t.Uid, + Gid: stat_t.Gid, }, nil } -func GetHostDeviceNodes() ([]*Device, error) { +func HostDevices() ([]*configs.Device, error) { return getDeviceNodes("/dev") } -func getDeviceNodes(path string) ([]*Device, error) { +func getDeviceNodes(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } - - out := []*Device{} + out := []*configs.Device{} for _, f := range files { switch { case f.IsDir(): @@ -117,16 +87,14 @@ func getDeviceNodes(path string) ([]*Device, error) { case f.Name() == "console": continue } - - device, err := GetDevice(filepath.Join(path, f.Name()), "rwm") + device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { - if err == ErrNotADeviceNode { + if err == ErrNotADevice { continue } return nil, err } out = append(out, device) } - return out, nil } diff --git a/devices/number.go b/devices/number.go index 3aae380bb..9e8feb83b 100644 --- a/devices/number.go +++ b/devices/number.go @@ -20,7 +20,3 @@ func Major(devNumber int) int64 { func Minor(devNumber int) int64 { return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) } - -func Mkdev(majorNumber int64, minorNumber int64) int { - return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12)) -} diff --git a/linux_container.go b/linux_container.go index e5c6826a2..c3b43c6cb 100644 --- a/linux_container.go +++ b/linux_container.go @@ -34,11 +34,10 @@ func (c *linuxContainer) Config() *configs.Config { return c.config } -func (c *linuxContainer) RunState() (configs.RunState, error) { +func (c *linuxContainer) Status() (configs.Status, error) { if c.state.InitPid <= 0 { return configs.Destroyed, nil } - // return Running if the init process is alive err := syscall.Kill(c.state.InitPid, 0) if err != nil { @@ -47,14 +46,10 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { } return 0, err } - if c.config.Cgroups != nil && - c.config.Cgroups.Freezer == cgroups.Frozen { + c.config.Cgroups.Freezer == configs.Frozen { return configs.Paused, nil } - - //FIXME get a cgroup state to check other states - return configs.Running, nil } @@ -67,13 +62,12 @@ func (c *linuxContainer) Processes() ([]int, error) { return pids, nil } -func (c *linuxContainer) Stats() (*ContainerStats, error) { +func (c *linuxContainer) Stats() (*Stats, error) { glog.Info("fetch container stats") var ( err error - stats = &ContainerStats{} + stats = &Stats{} ) - if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } @@ -84,7 +78,7 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { - state, err := c.RunState() + status, err := c.Status() if err != nil { return -1, err } @@ -103,15 +97,13 @@ func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - if state != configs.Destroyed { + if status != configs.Destroyed { glog.Info("start new container process") return namespaces.ExecIn(config.Args, config.Env, config.Console, cmd, c.config, c.state) } - if err := c.startInitProcess(cmd, config); err != nil { return -1, err } - return c.state.InitPid, nil } @@ -154,25 +146,22 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) } func (c *linuxContainer) Destroy() error { - state, err := c.RunState() + status, err := c.Status() if err != nil { return err } - - if state != configs.Destroyed { + if status != configs.Destroyed { return newGenericError(nil, ContainerNotStopped) } - - os.RemoveAll(c.root) - return nil + return os.RemoveAll(c.root) } func (c *linuxContainer) Pause() error { - return c.cgroupManager.Freeze(cgroups.Frozen) + return c.cgroupManager.Freeze(configs.Frozen) } func (c *linuxContainer) Resume() error { - return c.cgroupManager.Freeze(cgroups.Thawed) + return c.cgroupManager.Freeze(configs.Thawed) } func (c *linuxContainer) Signal(pid, signal int) error { @@ -194,3 +183,7 @@ func (c *linuxContainer) WaitProcess(pid int) (int, error) { return int(status), err } + +func (c *linuxContainer) OOM() (<-chan struct{}, error) { + return NotifyOnOOM(c.state) +} diff --git a/linux_factory.go b/linux_factory.go index b88a66def..9f23b154b 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -33,7 +33,6 @@ func New(root string, initArgs []string) (Factory, error) { return nil, newGenericError(err, SystemError) } } - return &linuxFactory{ root: root, initArgs: initArgs, diff --git a/mount/init.go b/mount/init.go index 91a27294a..4f9943332 100644 --- a/mount/init.go +++ b/mount/init.go @@ -8,11 +8,10 @@ import ( "path/filepath" "syscall" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount/nodes" ) -// default mount point flags const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV type mount struct { @@ -25,85 +24,60 @@ type mount struct { // InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a // new mount namespace. -func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error { - var ( - err error - flag = syscall.MS_PRIVATE - ) - - if mountConfig.NoPivotRoot { - flag = syscall.MS_SLAVE - } - - if err := syscall.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil { - return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err) - } - - if err := syscall.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { - return fmt.Errorf("mouting %s as bind %s", rootfs, err) +func InitializeMountNamespace(config *configs.Config) (err error) { + if err := prepareRoot(config); err != nil { + return err } - - if err := mountSystem(rootfs, sysReadonly, mountConfig); err != nil { - return fmt.Errorf("mount system %s", err) + if err := mountSystem(config); err != nil { + return err } - // apply any user specified mounts within the new mount namespace - for _, m := range mountConfig.Mounts { - if err := m.Mount(rootfs, mountConfig.MountLabel); err != nil { + for _, m := range config.Mounts { + if err := m.Mount(config.RootFs, config.MountLabel); err != nil { return err } } - - if err := nodes.CreateDeviceNodes(rootfs, mountConfig.DeviceNodes); err != nil { - return fmt.Errorf("create device nodes %s", err) + if err := createDeviceNodes(config); err != nil { + return err } - - if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil { + if err := setupPtmx(config); err != nil { return err } - // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. - if hostRootUid == 0 { - if err := reOpenDevNull(rootfs); err != nil { - return fmt.Errorf("Failed to reopen /dev/null %s", err) + if 0 == 0 { + if err := reOpenDevNull(config.RootFs); err != nil { + return err } } - - if err := setupDevSymlinks(rootfs); err != nil { - return fmt.Errorf("dev symlinks %s", err) + if err := setupDevSymlinks(config.RootFs); err != nil { + return err } - - if err := syscall.Chdir(rootfs); err != nil { - return fmt.Errorf("chdir into %s %s", rootfs, err) + if err := syscall.Chdir(config.RootFs); err != nil { + return err } - - if mountConfig.NoPivotRoot { - err = MsMoveRoot(rootfs) + if config.NoPivotRoot { + err = msMoveRoot(config.RootFs) } else { - err = PivotRoot(rootfs, mountConfig.PivotDir) + err = pivotRoot(config.RootFs, config.PivotDir) } - if err != nil { return err } - - if mountConfig.ReadonlyFs { - if err := SetReadonly(); err != nil { + if config.ReadonlyFs { + if err := setReadonly(); err != nil { return fmt.Errorf("set readonly %s", err) } } - syscall.Umask(0022) - return nil } // mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts // inside the mount namespace -func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error { - for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) { +func mountSystem(config *configs.Config) error { + for _, m := range newSystemMounts(config.RootFs, config.MountLabel, config.RestrictSys) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } @@ -114,28 +88,6 @@ func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) erro return nil } -func createIfNotExists(path string, isDir bool) error { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { - return err - } - f.Close() - } - } - } - return nil -} - func setupDevSymlinks(rootfs string) error { var links = [][2]string{ {"/proc/self/fd", "/dev/fd"}, @@ -210,3 +162,54 @@ func reOpenDevNull(rootfs string) error { } return nil } + +// Create the device nodes in the container. +func createDeviceNodes(config *configs.Config) error { + oldMask := syscall.Umask(0000) + for _, node := range config.DeviceNodes { + if err := createDeviceNode(config.RootFs, node); err != nil { + syscall.Umask(oldMask) + return err + } + } + syscall.Umask(oldMask) + return nil +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device) error { + var ( + dest = filepath.Join(rootfs, node.Path) + parent = filepath.Dir(dest) + ) + if err := os.MkdirAll(parent, 0755); err != nil { + return err + } + fileMode := node.FileMode + switch node.Type { + case 'c': + fileMode |= syscall.S_IFCHR + case 'b': + fileMode |= syscall.S_IFBLK + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { + return fmt.Errorf("mknod %s %s", node.Path, err) + } + if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) + } + return nil +} + +func prepareRoot(config *configs.Config) error { + flag := syscall.MS_PRIVATE | syscall.MS_REC + if config.NoPivotRoot { + flag = syscall.MS_SLAVE | syscall.MS_REC + } + if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + return syscall.Mount(config.RootFs, config.RootFs, "bind", syscall.MS_BIND|syscall.MS_REC, "") +} diff --git a/mount/mount_config.go b/mount/mount_config.go deleted file mode 100644 index f19465e60..000000000 --- a/mount/mount_config.go +++ /dev/null @@ -1,33 +0,0 @@ -package mount - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -var ErrUnsupported = errors.New("Unsupported method") - -type MountConfig struct { - // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs - // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root,omitempty"` - - // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. - // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. - // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. - PivotDir string `json:"pivot_dir,omitempty"` - - // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable - ReadonlyFs bool `json:"readonly_fs,omitempty"` - - // Mounts specify additional source and destination paths that will be mounted inside the container's - // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts,omitempty"` - - // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - DeviceNodes []*devices.Device `json:"device_nodes,omitempty"` - - MountLabel string `json:"mount_label,omitempty"` -} diff --git a/mount/msmoveroot.go b/mount/msmoveroot.go index 94afd3a99..17b732932 100644 --- a/mount/msmoveroot.go +++ b/mount/msmoveroot.go @@ -2,19 +2,14 @@ package mount -import ( - "fmt" - "syscall" -) +import "syscall" -func MsMoveRoot(rootfs string) error { +func msMoveRoot(rootfs string) error { if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { - return fmt.Errorf("mount move %s into / %s", rootfs, err) + return err } - if err := syscall.Chroot("."); err != nil { - return fmt.Errorf("chroot . %s", err) + return err } - return syscall.Chdir("/") } diff --git a/mount/nodes/nodes.go b/mount/nodes/nodes.go deleted file mode 100644 index 322c0c0ee..000000000 --- a/mount/nodes/nodes.go +++ /dev/null @@ -1,57 +0,0 @@ -// +build linux - -package nodes - -import ( - "fmt" - "os" - "path/filepath" - "syscall" - - "github.com/docker/libcontainer/devices" -) - -// Create the device nodes in the container. -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - oldMask := syscall.Umask(0000) - defer syscall.Umask(oldMask) - - for _, node := range nodesToCreate { - if err := CreateDeviceNode(rootfs, node); err != nil { - return err - } - } - return nil -} - -// Creates the device node in the rootfs of the container. -func CreateDeviceNode(rootfs string, node *devices.Device) error { - var ( - dest = filepath.Join(rootfs, node.Path) - parent = filepath.Dir(dest) - ) - - if err := os.MkdirAll(parent, 0755); err != nil { - return err - } - - fileMode := node.FileMode - switch node.Type { - case 'c': - fileMode |= syscall.S_IFCHR - case 'b': - fileMode |= syscall.S_IFBLK - default: - return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) - } - - if err := syscall.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) { - return fmt.Errorf("mknod %s %s", node.Path, err) - } - - if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { - return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) - } - - return nil -} diff --git a/mount/nodes/nodes_unsupported.go b/mount/nodes/nodes_unsupported.go deleted file mode 100644 index 83660715d..000000000 --- a/mount/nodes/nodes_unsupported.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build !linux - -package nodes - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - return errors.New("Unsupported method") -} diff --git a/mount/pivotroot.go b/mount/pivotroot.go index acc3be24c..3d422774e 100644 --- a/mount/pivotroot.go +++ b/mount/pivotroot.go @@ -10,7 +10,7 @@ import ( "syscall" ) -func PivotRoot(rootfs, pivotBaseDir string) error { +func pivotRoot(rootfs, pivotBaseDir string) error { if pivotBaseDir == "" { pivotBaseDir = "/" } @@ -22,20 +22,16 @@ func PivotRoot(rootfs, pivotBaseDir string) error { if err != nil { return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) } - if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { return fmt.Errorf("pivot_root %s", err) } - if err := syscall.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } - // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } - return os.Remove(pivotDir) } diff --git a/mount/ptmx.go b/mount/ptmx.go index 5b558775b..25de75b18 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -7,24 +7,29 @@ import ( "os" "path/filepath" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/console" ) -func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { - ptmx := filepath.Join(rootfs, "dev/ptmx") +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.RootFs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } - - if consolePath != "" { - if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil { + if config.Console != "" { + uid, err := config.HostUID() + if err != nil { return err } + gid, err := config.HostGID() + if err != nil { + return err + } + // TODO: (crosbymichael) get uid/gid + return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid) } - return nil } diff --git a/mount/readonly.go b/mount/readonly.go index 9b4a6f704..855c9f995 100644 --- a/mount/readonly.go +++ b/mount/readonly.go @@ -6,6 +6,6 @@ import ( "syscall" ) -func SetReadonly() error { +func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } diff --git a/namespaces/exec.go b/namespaces/exec.go index 1c157e3a2..ec462cbcb 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -20,27 +20,23 @@ const ( EXIT_SIGNAL_OFFSET = 128 ) -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *network.NetworkState) error { +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { command := exec.Command(args[0], args[1:]...) - parent, child, err := newInitPipe() if err != nil { return err } defer parent.Close() command.ExtraFiles = []*os.File{child} - command.Dir = container.RootFs command.Env = append(command.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), fmt.Sprintf("_LIBCONTAINER_USERNS=1")) - err = command.Start() child.Close() if err != nil { return err } - s, err := command.Process.Wait() if err != nil { return err @@ -48,36 +44,29 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if !s.Success() { return &exec.ExitError{s} } - decoder := json.NewDecoder(parent) var pid *pid - if err := decoder.Decode(&pid); err != nil { return err } - p, err := os.FindProcess(pid.Pid) if err != nil { return err } - terminate := func(terr error) error { // TODO: log the errors for kill and wait p.Kill() p.Wait() return terr } - // send the state to the container's init process then shutdown writes for the parent if err := json.NewEncoder(parent).Encode(process); err != nil { return terminate(err) } - // shutdown writes for the parent side of the pipe if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { return terminate(err) } - // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *initError @@ -87,7 +76,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if ierr != nil { return ierr } - s, err = p.Wait() if err != nil { return err @@ -95,7 +83,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if !s.Success() { return &exec.ExitError{s} } - return nil } @@ -165,7 +152,7 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai } }() - var networkState network.NetworkState + var networkState configs.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { return terminate(err) } @@ -218,7 +205,7 @@ func killAllPids(m cgroups.Manager) error { var ( procs []*os.Process ) - m.Freeze(cgroups.Frozen) + m.Freeze(configs.Frozen) pids, err := m.GetPids() if err != nil { return err @@ -231,61 +218,13 @@ func killAllPids(m cgroups.Manager) error { p.Kill() } } - m.Freeze(cgroups.Thawed) + m.Freeze(configs.Thawed) for _, p := range procs { p.Wait() } return err } -// Utility function that gets a host ID for a container ID from user namespace map -// if that ID is present in the map. -func hostIDFromMapping(containerID int, uMap []configs.IDMap) (int, bool) { - for _, m := range uMap { - if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { - hostID := m.HostID + (containerID - m.ContainerID) - return hostID, true - } - } - return -1, false -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootGid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") - } - hostRootGid, found := hostIDFromMapping(0, container.GidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootGid, nil - } - - // Return default root uid 0 - return 0, nil -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootUid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") - } - hostRootUid, found := hostIDFromMapping(0, container.UidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootUid, nil - } - - // Return default root uid 0 - return 0, nil -} - // Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { if container.UidMappings != nil { @@ -309,13 +248,13 @@ func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { +func InitializeNetworking(container *configs.Config, nspid int, networkState *configs.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - if err := strategy.Create((*network.Network)(config), nspid, networkState); err != nil { + if err := strategy.Create(config, nspid, networkState); err != nil { return err } } diff --git a/namespaces/execin.go b/namespaces/execin.go index 3b51a5872..75e70a06f 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -16,7 +16,6 @@ import ( "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/utils" ) type pid struct { @@ -173,12 +172,14 @@ func FinalizeSetns(container *configs.Config) error { func SetupContainer(process *processArgs) error { container := process.Config networkState := process.NetworkState - consolePath := process.ConsolePath - rootfs, err := utils.ResolveRootfs(container.RootFs) - if err != nil { - return err - } + // TODO : move to validation + /* + rootfs, err := utils.ResolveRootfs(container.RootFs) + if err != nil { + return err + } + */ // clear the current processes env and replace it with the environment // defined on the container @@ -203,30 +204,12 @@ func SetupContainer(process *processArgs) error { label.Init() - hostRootUid, err := GetHostRootUid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootUid %s", err) - } - - hostRootGid, err := GetHostRootGid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootGid %s", err) - } - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return fmt.Errorf("setup mount namespace %s", err) } - } else if err := mount.InitializeMountNamespace(rootfs, - consolePath, - container.RestrictSys, - hostRootUid, - hostRootGid, - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) } - return nil } diff --git a/namespaces/init.go b/namespaces/init.go index 582545149..d651352ac 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -30,7 +30,7 @@ type processArgs struct { Env []string `json:"environment,omitempty"` ConsolePath string `json:"console_path,omitempty"` Config *configs.Config `json:"config,omitempty"` - NetworkState *network.NetworkState `json:"network_state,omitempty"` + NetworkState *configs.NetworkState `json:"network_state,omitempty"` } // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. @@ -88,10 +88,13 @@ func initDefault(uncleanRootfs string, process *processArgs) (err error) { container := process.Config networkState := process.NetworkState - rootfs, err := utils.ResolveRootfs(uncleanRootfs) - if err != nil { - return err - } + // TODO: move to validation + /* + rootfs, err := utils.ResolveRootfs(uncleanRootfs) + if err != nil { + return err + } + */ // clear the current processes env and replace it with the environment // defined on the container @@ -139,20 +142,14 @@ func initDefault(uncleanRootfs string, process *processArgs) (err error) { label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return err } - } else if err := mount.InitializeMountNamespace(rootfs, - process.ConsolePath, - container.RestrictSys, - 0, // Default Root Uid - 0, // Default Root Gid - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { + // TODO: (crosbymichael) move this to pre spawn validation if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { return fmt.Errorf("unable to set the hostname without UTS namespace") } @@ -357,14 +354,14 @@ func SetupUser(container *configs.Config) error { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupNetwork(container *configs.Config, networkState *network.NetworkState) error { +func setupNetwork(container *configs.Config, networkState *configs.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - err1 := strategy.Initialize((*network.Network)(config), networkState) + err1 := strategy.Initialize(config, networkState) if err1 != nil { return err1 } diff --git a/network/loopback.go b/network/loopback.go index 1667b4d82..b208dfc69 100644 --- a/network/loopback.go +++ b/network/loopback.go @@ -4,17 +4,19 @@ package network import ( "fmt" + + "github.com/docker/libcontainer/configs" ) // Loopback is a network strategy that provides a basic loopback device type Loopback struct { } -func (l *Loopback) Create(n *Network, nspid int, networkState *NetworkState) error { +func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { return nil } -func (l *Loopback) Initialize(config *Network, networkState *NetworkState) error { +func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error { // Do not set the MTU on the loopback interface - use the default. if err := InterfaceUp("lo"); err != nil { return fmt.Errorf("lo up %s", err) diff --git a/network/stats.go b/network/stats.go index e2156c74d..329a16bf3 100644 --- a/network/stats.go +++ b/network/stats.go @@ -5,6 +5,8 @@ import ( "path/filepath" "strconv" "strings" + + "github.com/docker/libcontainer/configs" ) type NetworkStats struct { @@ -19,7 +21,7 @@ type NetworkStats struct { } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func GetStats(networkState *NetworkState) (*NetworkStats, error) { +func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { // This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer. if networkState.VethHost == "" { return &NetworkStats{}, nil diff --git a/network/strategy.go b/network/strategy.go index 019fe62f4..bc4a023b0 100644 --- a/network/strategy.go +++ b/network/strategy.go @@ -4,6 +4,8 @@ package network import ( "errors" + + "github.com/docker/libcontainer/configs" ) var ( @@ -18,8 +20,8 @@ var strategies = map[string]NetworkStrategy{ // NetworkStrategy represents a specific network configuration for // a container's networking stack type NetworkStrategy interface { - Create(*Network, int, *NetworkState) error - Initialize(*Network, *NetworkState) error + Create(*configs.Network, int, *configs.NetworkState) error + Initialize(*configs.Network, *configs.NetworkState) error } // GetStrategy returns the specific network strategy for the diff --git a/network/types.go b/network/types.go index dcf00420f..1ae2e9d50 100644 --- a/network/types.go +++ b/network/types.go @@ -1,50 +1 @@ package network - -// Network defines configuration for a container's networking stack -// -// The network configuration can be omited from a container causing the -// container to be setup with the host's networking stack -type Network struct { - // Type sets the networks type, commonly veth and loopback - Type string `json:"type,omitempty"` - - // The bridge to use. - Bridge string `json:"bridge,omitempty"` - - // Prefix for the veth interfaces. - VethPrefix string `json:"veth_prefix,omitempty"` - - // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address,omitempty"` - - // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address,omitempty"` - - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address,omitempty"` - - // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway,omitempty"` - - // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway,omitempty"` - - // Mtu sets the mtu value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu,omitempty"` - - // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen,omitempty"` -} - -// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers -// Do not depend on it outside of libcontainer. -type NetworkState struct { - // The name of the veth interface on the Host. - VethHost string `json:"veth_host,omitempty"` - // The name of the veth interface created inside the container for the child. - VethChild string `json:"veth_child,omitempty"` -} diff --git a/network/veth.go b/network/veth.go index 3d7dc8729..7bcc3910c 100644 --- a/network/veth.go +++ b/network/veth.go @@ -5,6 +5,7 @@ package network import ( "fmt" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/utils" ) @@ -17,7 +18,7 @@ type Veth struct { const defaultDevice = "eth0" -func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { +func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { var ( bridge = n.Bridge prefix = n.VethPrefix @@ -51,7 +52,7 @@ func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { return nil } -func (v *Veth) Initialize(config *Network, networkState *NetworkState) error { +func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error { var vethChild = networkState.VethChild if vethChild == "" { return fmt.Errorf("vethChild is not specified") diff --git a/nsinit/exec.go b/nsinit/exec.go index 525991d3c..ef7762edf 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -26,17 +26,15 @@ var execCommand = cli.Command{ Usage: "execute a new command inside a container", Action: execAction, Flags: []cli.Flag{ - cli.BoolFlag{Name: "list", Usage: "list all registered exec functions"}, - cli.StringFlag{Name: "func", Value: "exec", Usage: "function name to exec inside a container"}, + cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, }, } func getContainer(context *cli.Context) (libcontainer.Container, error) { - factory, err := libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) + factory, err := loadFactory(context) if err != nil { log.Fatal(err) } - id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) container, err := factory.Load(id) if err != nil && !os.IsNotExist(err) { @@ -72,7 +70,7 @@ func execAction(context *cli.Context) { log.Fatal(err) } - if container.Config().Tty { + if context.Bool("tty") { stdin = nil stdout = nil stderr = nil diff --git a/nsinit/main.go b/nsinit/main.go index 2de7bc3ee..034afb6f7 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -7,24 +7,16 @@ import ( "github.com/codegangsta/cli" ) -var ( - logPath = os.Getenv("log") -) - func main() { app := cli.NewApp() - app.Name = "nsinit" - app.Version = "0.1" + app.Version = "1" app.Author = "libcontainer maintainers" app.Flags = []cli.Flag{ cli.StringFlag{Name: "nspid"}, cli.StringFlag{Name: "console"}, cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } - - app.Before = preload - app.Commands = []cli.Command{ configCommand, execCommand, @@ -34,7 +26,6 @@ func main() { statsCommand, unpauseCommand, } - if err := app.Run(os.Args); err != nil { log.Fatal(err) } diff --git a/nsinit/oom.go b/nsinit/oom.go index f7a333d45..194c45cb0 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -4,8 +4,6 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" - "github.com/docker/libcontainer/configs" ) var oomCommand = cli.Command{ @@ -15,11 +13,15 @@ var oomCommand = cli.Command{ } func oomAction(context *cli.Context) { - state, err := configs.GetState(dataPath) + factory, err := loadFactory(context) if err != nil { log.Fatal(err) } - n, err := libcontainer.NotifyOnOOM(state) + container, err := factory.Load("nsinit") + if err != nil { + log.Fatal(err) + } + n, err := container.OOM() if err != nil { log.Fatal(err) } diff --git a/nsinit/utils.go b/nsinit/utils.go index e02a1b3a3..c46d98b15 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -2,59 +2,27 @@ package main import ( "encoding/json" - "log" "os" "path/filepath" "github.com/codegangsta/cli" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) -// rFunc is a function registration for calling after an execin -type rFunc struct { - Usage string - Action func(*configs.Config, []string) -} - func loadConfig() (*configs.Config, error) { f, err := os.Open(filepath.Join(dataPath, "container.json")) if err != nil { return nil, err } defer f.Close() - var container *configs.Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } - return container, nil } -func openLog(name string) error { - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755) - if err != nil { - return err - } - - log.SetOutput(f) - - return nil -} - -func findUserArgs() []string { - i := 0 - for _, a := range os.Args { - i++ - - if a == "--" { - break - } - } - - return os.Args[i:] -} - // loadConfigFromFd loads a container's config from the sync pipe that is provided by // fd 3 when running a process func loadConfigFromFd() (*configs.Config, error) { @@ -68,23 +36,6 @@ func loadConfigFromFd() (*configs.Config, error) { return config, nil } -func preload(context *cli.Context) error { - if logPath != "" { - if err := openLog(logPath); err != nil { - return err - } - } - - return nil -} - -func runFunc(f *rFunc) { - userArgs := findUserArgs() - - config, err := loadConfigFromFd() - if err != nil { - log.Fatalf("unable to receive config from sync pipe: %s", err) - } - - f.Action(config, userArgs) +func loadFactory(context *cli.Context) (libcontainer.Factory, error) { + return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) } diff --git a/types.go b/types.go deleted file mode 100644 index c341137ec..000000000 --- a/types.go +++ /dev/null @@ -1,11 +0,0 @@ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/network" -) - -type ContainerStats struct { - NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` - CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` -} From 8191d4d60f109df5e803a2e8918ff57342124421 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 31 Jan 2015 20:51:12 -0800 Subject: [PATCH 063/101] Refactory container interface This removes a new unused methods from the container interface and types parameters such as os.Signal and WaitStatus Signed-off-by: Michael Crosby --- container.go | 18 +++---- linux_container.go | 44 +++++++--------- linux_factory.go | 54 +++++++++---------- mount/ptmx.go | 1 - mount/remount.go | 31 ----------- nsinit/config.go | 29 ----------- nsinit/exec.go | 126 ++++++++++++++++----------------------------- nsinit/init.go | 48 +++++++---------- nsinit/main.go | 1 - nsinit/oom.go | 39 +++++++------- nsinit/pause.go | 56 ++++++++++---------- nsinit/stats.go | 47 +++++++---------- nsinit/utils.go | 44 ++++++++++------ process.go | 9 +--- 14 files changed, 209 insertions(+), 338 deletions(-) delete mode 100644 mount/remount.go delete mode 100644 nsinit/config.go diff --git a/container.go b/container.go index 4348e9625..72d284f87 100644 --- a/container.go +++ b/container.go @@ -4,6 +4,9 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until package libcontainer import ( + "os" + "syscall" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/network" @@ -56,7 +59,7 @@ type Container interface { // ConfigInvalid - config is invalid, // ContainerPaused - Container is paused, // Systemerror - System error. - StartProcess(config *ProcessConfig) (pid int, err error) + Start(process *Process) (pid int, err error) // Destroys the container after killing all running processes. // @@ -86,27 +89,20 @@ type Container interface { // Systemerror - System error. Resume() error - // Signal sends the specified signal to a process owned by the container. + // Signal sends the specified signal to the init process of the container. // // errors: // ContainerDestroyed - Container no longer exists, // ContainerPaused - Container is paused, // Systemerror - System error. - Signal(pid, signal int) error + Signal(signal os.Signal) error // Wait waits for the init process of the conatiner to die and returns it's exit status. // // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Wait() (exitStatus int, err error) - - // WaitProcess waits on a process owned by the container. - // - // errors: - // ContainerDestroyed - Container no longer exists, - // Systemerror - System error. - WaitProcess(pid int) (exitStatus int, err error) + Wait() (exitStatus syscall.WaitStatus, err error) // OOM returns a read-only channel signaling when the container receives an OOM notification. // diff --git a/linux_container.go b/linux_container.go index c3b43c6cb..1d7ab3094 100644 --- a/linux_container.go +++ b/linux_container.go @@ -77,31 +77,28 @@ func (c *linuxContainer) Stats() (*Stats, error) { return stats, nil } -func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { +func (c *linuxContainer) Start(process *Process) (int, error) { status, err := c.Status() if err != nil { return -1, err } - cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) - cmd.Stdin = config.Stdin - cmd.Stdout = config.Stdout - cmd.Stderr = config.Stderr - - cmd.Env = config.Env + cmd.Stdin = process.Stdin + cmd.Stdout = process.Stdout + cmd.Stderr = process.Stderr + cmd.Env = c.config.Env cmd.Dir = c.config.RootFs - if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } - + // TODO: add pdeath to config for a container cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - if status != configs.Destroyed { glog.Info("start new container process") - return namespaces.ExecIn(config.Args, config.Env, config.Console, cmd, c.config, c.state) + // TODO: (crosbymichael) check out console use for execin + return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) } - if err := c.startInitProcess(cmd, config); err != nil { + if err := c.startInitProcess(cmd, process); err != nil { return -1, err } return c.state.InitPid, nil @@ -130,8 +127,8 @@ func (c *linuxContainer) updateStateFile() error { return nil } -func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error { - err := namespaces.Exec(config.Args, config.Env, config.Console, cmd, c.config, c.cgroupManager, c.state) +func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *Process) error { + err := namespaces.Exec(config.Args, c.config.Env, c.config.Console, cmd, c.config, c.cgroupManager, c.state) if err != nil { return err } @@ -164,24 +161,19 @@ func (c *linuxContainer) Resume() error { return c.cgroupManager.Freeze(configs.Thawed) } -func (c *linuxContainer) Signal(pid, signal int) error { - glog.Infof("sending signal %d to pid %d", signal, pid) +func (c *linuxContainer) Signal(signal os.Signal) error { + glog.Infof("sending signal %d to pid %d", signal, c.state.InitPid) panic("not implemented") } -func (c *linuxContainer) Wait() (int, error) { - return c.WaitProcess(c.state.InitPid) -} - -func (c *linuxContainer) WaitProcess(pid int) (int, error) { +func (c *linuxContainer) Wait() (syscall.WaitStatus, error) { var status syscall.WaitStatus - - _, err := syscall.Wait4(pid, &status, 0, nil) + // TODO : close exec.Cmd pipes, fix in master + _, err := syscall.Wait4(c.state.InitPid, &status, 0, nil) if err != nil { - return -1, newGenericError(err, SystemError) + return 0, newGenericError(err, SystemError) } - - return int(status), err + return status, err } func (c *linuxContainer) OOM() (<-chan struct{}, error) { diff --git a/linux_factory.go b/linux_factory.go index 9f23b154b..3583bf728 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -50,44 +50,32 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err if l.root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } - if !idRegex.MatchString(id) { - return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) - } - - if len(id) > maxIdLen { - return nil, newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + if err := l.validateID(id); err != nil { + return nil, err } - containerRoot := filepath.Join(l.root, id) - _, err := os.Stat(containerRoot) - if err == nil { + if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) } else if !os.IsNotExist(err) { return nil, newGenericError(err, SystemError) } - data, err := json.MarshalIndent(config, "", "\t") if err != nil { return nil, newGenericError(err, SystemError) } - if err := os.MkdirAll(containerRoot, 0700); err != nil { return nil, newGenericError(err, SystemError) } - f, err := os.Create(filepath.Join(containerRoot, configFilename)) if err != nil { os.RemoveAll(containerRoot) return nil, newGenericError(err, SystemError) } defer f.Close() - - _, err = f.Write(data) - if err != nil { + if _, err := f.Write(data); err != nil { os.RemoveAll(containerRoot) return nil, newGenericError(err, SystemError) } - cgroupManager := cgroups.NewCgroupManager(config.Cgroups) return &linuxContainer{ id: id, @@ -109,13 +97,11 @@ func (l *linuxFactory) Load(id string) (Container, error) { if err != nil { return nil, err } - glog.Infof("loading container state from %s", containerRoot) state, err := l.loadContainerState(containerRoot) if err != nil { return nil, err } - cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ @@ -128,6 +114,20 @@ func (l *linuxFactory) Load(id string) (Container, error) { }, nil } +// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state +// This is a low level implementation detail of the reexec and should not be consumed externally +func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { + pipe := os.NewFile(uintptr(pipefd), "pipe") + + setupUserns := os.Getenv("_LIBCONTAINER_USERNS") + pid := os.Getenv("_LIBCONTAINER_INITPID") + if pid != "" && setupUserns == "" { + return namespaces.InitIn(pipe) + } + + return namespaces.Init(pipe, setupUserns != "") +} + func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { f, err := os.Open(filepath.Join(root, configFilename)) if err != nil { @@ -162,16 +162,12 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { return state, nil } -// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state -// This is a low level implementation detail of the reexec and should not be consumed externally -func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { - pipe := os.NewFile(uintptr(pipefd), "pipe") - - setupUserns := os.Getenv("_LIBCONTAINER_USERNS") - pid := os.Getenv("_LIBCONTAINER_INITPID") - if pid != "" && setupUserns == "" { - return namespaces.InitIn(pipe) +func (l *linuxFactory) validateID(id string) error { + if !idRegex.MatchString(id) { + return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } - - return namespaces.Init(pipe, setupUserns != "") + if len(id) > maxIdLen { + return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) + } + return nil } diff --git a/mount/ptmx.go b/mount/ptmx.go index 25de75b18..657318dc4 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -28,7 +28,6 @@ func setupPtmx(config *configs.Config) error { if err != nil { return err } - // TODO: (crosbymichael) get uid/gid return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid) } return nil diff --git a/mount/remount.go b/mount/remount.go deleted file mode 100644 index 99a01209d..000000000 --- a/mount/remount.go +++ /dev/null @@ -1,31 +0,0 @@ -// +build linux - -package mount - -import "syscall" - -func RemountProc() error { - if err := syscall.Unmount("/proc", syscall.MNT_DETACH); err != nil { - return err - } - - if err := syscall.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), ""); err != nil { - return err - } - - return nil -} - -func RemountSys() error { - if err := syscall.Unmount("/sys", syscall.MNT_DETACH); err != nil { - if err != syscall.EINVAL { - return err - } - } else { - if err := syscall.Mount("sysfs", "/sys", "sysfs", uintptr(defaultMountFlags), ""); err != nil { - return err - } - } - - return nil -} diff --git a/nsinit/config.go b/nsinit/config.go deleted file mode 100644 index 74c7b3c09..000000000 --- a/nsinit/config.go +++ /dev/null @@ -1,29 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - - "github.com/codegangsta/cli" -) - -var configCommand = cli.Command{ - Name: "config", - Usage: "display the container configuration", - Action: configAction, -} - -func configAction(context *cli.Context) { - container, err := loadConfig() - if err != nil { - log.Fatal(err) - } - - data, err := json.MarshalIndent(container, "", "\t") - if err != nil { - log.Fatal(err) - } - - fmt.Printf("%s", data) -} diff --git a/nsinit/exec.go b/nsinit/exec.go index ef7762edf..3a91fa4e2 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -1,55 +1,27 @@ package main import ( - "crypto/md5" - "fmt" "io" - "log" "os" "syscall" "github.com/codegangsta/cli" "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/configs" consolepkg "github.com/docker/libcontainer/console" ) -var ( - dataPath = os.Getenv("data_path") - console = os.Getenv("console") - rawPipeFd = os.Getenv("pipe") -) - var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", Action: execAction, Flags: []cli.Flag{ cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, }, } -func getContainer(context *cli.Context) (libcontainer.Container, error) { - factory, err := loadFactory(context) - if err != nil { - log.Fatal(err) - } - id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) - container, err := factory.Load(id) - if err != nil && !os.IsNotExist(err) { - var config *configs.Config - - config, err = loadConfig() - if err != nil { - log.Fatal(err) - } - container, err = factory.Create(id, config) - } - - return container, err -} - func execAction(context *cli.Context) { var ( master *os.File @@ -65,79 +37,73 @@ func execAction(context *cli.Context) { exitCode int ) - container, err := getContainer(context) + factory, err := loadFactory(context) if err != nil { - log.Fatal(err) + fatal(err) } - - if context.Bool("tty") { - stdin = nil - stdout = nil - stderr = nil - - master, console, err = consolepkg.CreateMasterAndConsole() - if err != nil { - log.Fatal(err) + container, err := factory.Load(context.String("id")) + if err != nil { + if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists { + fatal(err) } - - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - - state, err := term.SetRawTerminal(os.Stdin.Fd()) + config, err := loadConfig(context) if err != nil { - log.Fatal(err) + fatal(err) + } + if context.Bool("tty") { + stdin = nil + stdout = nil + stderr = nil + if master, console, err = consolepkg.CreateMasterAndConsole(); err != nil { + fatal(err) + } + go io.Copy(master, os.Stdin) + go io.Copy(os.Stdout, master) + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + fatal(err) + } + defer term.RestoreTerminal(os.Stdin.Fd(), state) + config.Console = console + } + if container, err = factory.Create(context.String("id"), config); err != nil { + fatal(err) } - - defer term.RestoreTerminal(os.Stdin.Fd(), state) - } - - process := &libcontainer.ProcessConfig{ - Args: context.Args(), - Env: context.StringSlice("env"), - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, - Console: console, } - - pid, err := container.StartProcess(process) - if err != nil { - log.Fatalf("failed to exec: %s", err) + process := &libcontainer.Process{ + Args: context.Args(), + Stdin: stdin, + Stdout: stdout, + Stderr: stderr, } - - p, err := os.FindProcess(pid) - if err != nil { - log.Fatalf("Unable to find the %d process: %s", pid, err) + if _, err := container.Start(process); err != nil { + fatal(err) } - go func() { resizeTty(master) - for sig := range sigc { switch sig { case syscall.SIGWINCH: resizeTty(master) default: - p.Signal(sig) + container.Signal(sig) } } }() - - ps, err := p.Wait() + status, err := container.Wait() if err != nil { - log.Fatalf("Unable to wait the %d process: %s", pid, err) + fatal(err) + } + if err := container.Destroy(); err != nil { + fatal(err) } - container.Destroy() - - status := ps.Sys().(syscall.WaitStatus) if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { exitCode = -int(status.Signal()) } else { - log.Fatalf("Unexpected status") + fatalf("Unexpected status") } - os.Exit(exitCode) } @@ -145,13 +111,9 @@ func resizeTty(master *os.File) { if master == nil { return } - ws, err := term.GetWinsize(os.Stdin.Fd()) if err != nil { return } - - if err := term.SetWinsize(master.Fd(), ws); err != nil { - return - } + term.SetWinsize(master.Fd(), ws) } diff --git a/nsinit/init.go b/nsinit/init.go index 21128302a..2b8784ccd 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -8,32 +8,24 @@ import ( _ "github.com/docker/libcontainer/namespaces/nsenter" ) -var ( - initCommand = cli.Command{ - Name: "init", - Usage: "runs the init process inside the namespace", - Action: initAction, - Flags: []cli.Flag{ - cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, - }, - } -) - -func initAction(context *cli.Context) { - factory, err := libcontainer.New("", []string{}) - if err != nil { - log.Fatal(err) - } - - if context.Int("fd") == 0 { - log.Fatal("--fd must be specified for init process") - } - - fd := uintptr(context.Int("fd")) - - if err := factory.StartInitialization(fd); err != nil { - log.Fatal(err) - } - - panic("This line should never been executed") +var initCommand = cli.Command{ + Name: "init", + Usage: "runs the init process inside the namespace", + Flags: []cli.Flag{ + cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, + }, + Action: func(context *cli.Context) { + factory, err := libcontainer.New("", nil) + if err != nil { + log.Fatal(err) + } + if context.Int("fd") == 0 { + log.Fatal("--fd must be specified for init process") + } + fd := uintptr(context.Int("fd")) + if err := factory.StartInitialization(fd); err != nil { + log.Fatal(err) + } + panic("This line should never been executed") + }, } diff --git a/nsinit/main.go b/nsinit/main.go index 034afb6f7..e0dcf460f 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -18,7 +18,6 @@ func main() { cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } app.Commands = []cli.Command{ - configCommand, execCommand, initCommand, oomCommand, diff --git a/nsinit/oom.go b/nsinit/oom.go index 194c45cb0..eabe0b2bd 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -7,25 +7,22 @@ import ( ) var oomCommand = cli.Command{ - Name: "oom", - Usage: "display oom notifications for a container", - Action: oomAction, -} - -func oomAction(context *cli.Context) { - factory, err := loadFactory(context) - if err != nil { - log.Fatal(err) - } - container, err := factory.Load("nsinit") - if err != nil { - log.Fatal(err) - } - n, err := container.OOM() - if err != nil { - log.Fatal(err) - } - for range n { - log.Printf("OOM notification received") - } + Name: "oom", + Usage: "display oom notifications for a container", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + n, err := container.OOM() + if err != nil { + log.Fatal(err) + } + for range n { + log.Printf("OOM notification received") + } + }, } diff --git a/nsinit/pause.go b/nsinit/pause.go index 6ba95cd1b..89af0b6f7 100644 --- a/nsinit/pause.go +++ b/nsinit/pause.go @@ -7,35 +7,35 @@ import ( ) var pauseCommand = cli.Command{ - Name: "pause", - Usage: "pause the container's processes", - Action: pauseAction, + Name: "pause", + Usage: "pause the container's processes", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + if err = container.Pause(); err != nil { + log.Fatal(err) + } + }, } var unpauseCommand = cli.Command{ - Name: "unpause", - Usage: "unpause the container's processes", - Action: unpauseAction, -} - -func pauseAction(context *cli.Context) { - container, err := getContainer(context) - if err != nil { - log.Fatal(err) - } - - if err = container.Pause(); err != nil { - log.Fatal(err) - } -} - -func unpauseAction(context *cli.Context) { - container, err := getContainer(context) - if err != nil { - log.Fatal(err) - } - - if err = container.Resume(); err != nil { - log.Fatal(err) - } + Name: "unpause", + Usage: "unpause the container's processes", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + if err = container.Resume(); err != nil { + log.Fatal(err) + } + }, } diff --git a/nsinit/stats.go b/nsinit/stats.go index 6d8f75855..8320fed4f 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -6,34 +6,27 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" ) var statsCommand = cli.Command{ - Name: "stats", - Usage: "display statistics for the container", - Action: statsAction, -} - -func statsAction(context *cli.Context) { - factory, err := libcontainer.New(context.GlobalString("root"), nil) - if err != nil { - log.Fatal(err) - } - - container, err := factory.Load(context.Args().First()) - if err != nil { - log.Fatal(err) - } - - stats, err := container.Stats() - if err != nil { - log.Fatal(err) - } - data, jerr := json.MarshalIndent(stats, "", "\t") - if err != nil { - log.Fatal(jerr) - } - - fmt.Printf("%s", data) + Name: "stats", + Usage: "display statistics for the container", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + log.Fatal(err) + } + stats, err := container.Stats() + if err != nil { + log.Fatal(err) + } + data, jerr := json.MarshalIndent(stats, "", "\t") + if err != nil { + log.Fatal(jerr) + } + fmt.Printf("%s", data) + }, } diff --git a/nsinit/utils.go b/nsinit/utils.go index c46d98b15..901972e81 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -2,40 +2,52 @@ package main import ( "encoding/json" + "fmt" "os" - "path/filepath" "github.com/codegangsta/cli" "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) -func loadConfig() (*configs.Config, error) { - f, err := os.Open(filepath.Join(dataPath, "container.json")) +func loadConfig(context *cli.Context) (*configs.Config, error) { + f, err := os.Open(context.String("config")) if err != nil { return nil, err } defer f.Close() - var container *configs.Config - if err := json.NewDecoder(f).Decode(&container); err != nil { + var config *configs.Config + if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, err } - return container, nil + return config, nil } -// loadConfigFromFd loads a container's config from the sync pipe that is provided by -// fd 3 when running a process -func loadConfigFromFd() (*configs.Config, error) { - pipe := os.NewFile(3, "pipe") - defer pipe.Close() +func loadFactory(context *cli.Context) (libcontainer.Factory, error) { + return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) +} - var config *configs.Config - if err := json.NewDecoder(pipe).Decode(&config); err != nil { +func getContainer(context *cli.Context) (libcontainer.Container, error) { + factory, err := loadFactory(context) + if err != nil { return nil, err } - return config, nil + container, err := factory.Load(context.String("id")) + if err != nil { + return nil, err + } + return container, nil } -func loadFactory(context *cli.Context) (libcontainer.Factory, error) { - return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) +func fatal(err error) { + if lerr, ok := err.(libcontainer.Error); ok { + lerr.Detail(os.Stderr) + os.Exit(1) + } + fmt.Fprintln(os.Stderr, err) +} + +func fatalf(t string, v ...interface{}) { + fmt.Fprintf(os.Stderr, t, v...) + os.Exit(1) } diff --git a/process.go b/process.go index cd72b129c..ed228f791 100644 --- a/process.go +++ b/process.go @@ -3,13 +3,9 @@ package libcontainer import "io" // Configuration for a process to be run inside a container. -type ProcessConfig struct { +type Process struct { // The command to be run followed by any arguments. Args []string - - // Map of environment variables to their values. - Env []string - // Stdin is a pointer to a reader which provides the standard input stream. // Stdout is a pointer to a writer which receives the standard output stream. // Stderr is a pointer to a writer which receives the standard error stream. @@ -21,7 +17,4 @@ type ProcessConfig struct { Stdin io.Reader Stdout io.Writer Stderr io.Writer - - // Console is the path to the pty slave for use by the master - Console string } From bbeae7445a904592c19e571f9de05bb4500c3cc5 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Sat, 31 Jan 2015 21:21:06 -0800 Subject: [PATCH 064/101] Remove namespaces package Signed-off-by: Michael Crosby --- configs/namespaces.go | 30 ++ container.go | 2 +- linux_container.go | 462 ++++++++++++++++- linux_factory.go | 406 ++++++++++++++- namespaces/exec.go | 262 ---------- namespaces/execin.go | 218 -------- namespaces/init.go | 465 ------------------ namespaces/utils.go | 48 -- {namespaces/nsenter => nsenter}/README.md | 0 {namespaces/nsenter => nsenter}/nsenter.go | 0 .../nsenter => nsenter}/nsenter_test.go | 0 .../nsenter_unsupported.go | 0 {namespaces/nsenter => nsenter}/nsexec.c | 0 nsinit/exec.go | 138 ++++-- nsinit/init.go | 2 +- 15 files changed, 950 insertions(+), 1083 deletions(-) delete mode 100644 namespaces/exec.go delete mode 100644 namespaces/execin.go delete mode 100644 namespaces/init.go delete mode 100644 namespaces/utils.go rename {namespaces/nsenter => nsenter}/README.md (100%) rename {namespaces/nsenter => nsenter}/nsenter.go (100%) rename {namespaces/nsenter => nsenter}/nsenter_test.go (100%) rename {namespaces/nsenter => nsenter}/nsenter_unsupported.go (100%) rename {namespaces/nsenter => nsenter}/nsexec.c (100%) diff --git a/configs/namespaces.go b/configs/namespaces.go index 5e891eab2..a227f1ba9 100644 --- a/configs/namespaces.go +++ b/configs/namespaces.go @@ -1,5 +1,9 @@ package configs +import ( + "syscall" +) + type NamespaceType string const ( @@ -18,6 +22,10 @@ type Namespace struct { Path string `json:"path,omitempty"` } +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + type Namespaces []Namespace func (n *Namespaces) Remove(t NamespaceType) bool { @@ -50,3 +58,25 @@ func (n *Namespaces) index(t NamespaceType) int { func (n *Namespaces) Contains(t NamespaceType) bool { return n.index(t) != -1 } + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: syscall.CLONE_NEWNET, + NEWNS: syscall.CLONE_NEWNS, + NEWUSER: syscall.CLONE_NEWUSER, + NEWIPC: syscall.CLONE_NEWIPC, + NEWUTS: syscall.CLONE_NEWUTS, + NEWPID: syscall.CLONE_NEWPID, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This functions returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/container.go b/container.go index 72d284f87..bb6bce89f 100644 --- a/container.go +++ b/container.go @@ -33,7 +33,7 @@ type Container interface { Status() (configs.Status, error) // Returns the current config of the container. - Config() *configs.Config + Config() configs.Config // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. // diff --git a/linux_container.go b/linux_container.go index 1d7ab3094..f684188c5 100644 --- a/linux_container.go +++ b/linux_container.go @@ -5,18 +5,35 @@ package libcontainer import ( "encoding/json" "fmt" + "io" + "io/ioutil" "os" "os/exec" "path/filepath" "syscall" + "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/system" "github.com/golang/glog" ) +const ( + EXIT_SIGNAL_OFFSET = 128 +) + +type initError struct { + Message string `json:"message,omitempty"` +} + +func (i initError) Error() string { + return i.Message +} + type linuxContainer struct { id string root string @@ -26,12 +43,14 @@ type linuxContainer struct { initArgs []string } +// ID returns the container's unique ID func (c *linuxContainer) ID() string { return c.id } -func (c *linuxContainer) Config() *configs.Config { - return c.config +// Config returns the container's configuration +func (c *linuxContainer) Config() configs.Config { + return *c.config } func (c *linuxContainer) Status() (configs.Status, error) { @@ -96,48 +115,158 @@ func (c *linuxContainer) Start(process *Process) (int, error) { if status != configs.Destroyed { glog.Info("start new container process") // TODO: (crosbymichael) check out console use for execin - return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) + //return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) + return c.startNewProcess(cmd, process.Args) } - if err := c.startInitProcess(cmd, process); err != nil { + if err := c.startInitProcess(cmd, process.Args); err != nil { return -1, err } return c.state.InitPid, nil } -func (c *linuxContainer) updateStateFile() error { - fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) - f, err := os.Create(fnew) +func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) { + var err error + parent, child, err := newInitPipe() if err != nil { - return newGenericError(err, SystemError) + return -1, err } - - err = json.NewEncoder(f).Encode(c.state) + defer parent.Close() + cmd.ExtraFiles = []*os.File{child} + cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid)) + if err := cmd.Start(); err != nil { + child.Close() + return -1, err + } + child.Close() + s, err := cmd.Process.Wait() if err != nil { - f.Close() - os.Remove(fnew) - return newGenericError(err, SystemError) + return -1, err } - f.Close() - - fname := filepath.Join(c.root, stateFilename) - if err := os.Rename(fnew, fname); err != nil { - return newGenericError(err, SystemError) + if !s.Success() { + return -1, &exec.ExitError{s} } - - return nil + decoder := json.NewDecoder(parent) + var pid *pid + if err := decoder.Decode(&pid); err != nil { + return -1, err + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + return -1, err + } + terminate := func(terr error) (int, error) { + // TODO: log the errors for kill and wait + p.Kill() + p.Wait() + return -1, terr + } + // Enter cgroups. + if err := enterCgroups(c.state, pid.Pid); err != nil { + return terminate(err) + } + encoder := json.NewEncoder(parent) + if err := encoder.Encode(c.config); err != nil { + return terminate(err) + } + process := processArgs{ + Config: c.config, + Args: args, + } + if err := encoder.Encode(process); err != nil { + return terminate(err) + } + return pid.Pid, nil } -func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *Process) error { - err := namespaces.Exec(config.Args, c.config.Env, c.config.Console, cmd, c.config, c.cgroupManager, c.state) +func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { + // create a pipe so that we can syncronize with the namespaced process and + // pass the state and configuration to the child process + parent, child, err := newInitPipe() if err != nil { return err } - - err = c.updateStateFile() + defer parent.Close() + cmd.ExtraFiles = []*os.File{child} + cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags() + if c.config.Namespaces.Contains(configs.NEWUSER) { + addUidGidMappings(cmd.SysProcAttr, c.config) + // Default to root user when user namespaces are enabled. + if cmd.SysProcAttr.Credential == nil { + cmd.SysProcAttr.Credential = &syscall.Credential{} + } + } + glog.Info("starting container init process") + err = cmd.Start() + child.Close() if err != nil { - // FIXME c.Kill() - return err + return newGenericError(err, SystemError) + } + wait := func() (*os.ProcessState, error) { + ps, err := cmd.Process.Wait() + // we should kill all processes in cgroup when init is died if we use + // host PID namespace + if !c.config.Namespaces.Contains(configs.NEWPID) { + c.killAllPids() + } + return ps, newGenericError(err, SystemError) + } + terminate := func(terr error) error { + // TODO: log the errors for kill and wait + cmd.Process.Kill() + wait() + return terr + } + started, err := system.GetProcessStartTime(cmd.Process.Pid) + if err != nil { + return terminate(err) } + // Do this before syncing with child so that no children + // can escape the cgroup + if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil { + return terminate(err) + } + defer func() { + if err != nil { + c.cgroupManager.Destroy() + } + }() + var networkState configs.NetworkState + if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil { + return terminate(err) + } + process := processArgs{ + Args: args, + Config: c.config, + NetworkState: &networkState, + } + // Start the setup process to setup the init process + if c.config.Namespaces.Contains(configs.NEWUSER) { + if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil { + return terminate(err) + } + } + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(parent).Encode(process); err != nil { + return terminate(err) + } + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) + } + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return terminate(ierr) + } + + c.state.InitPid = cmd.Process.Pid + c.state.InitStartTime = started + c.state.NetworkState = networkState + c.state.CgroupPaths = c.cgroupManager.GetPaths() return nil } @@ -179,3 +308,282 @@ func (c *linuxContainer) Wait() (syscall.WaitStatus, error) { func (c *linuxContainer) OOM() (<-chan struct{}, error) { return NotifyOnOOM(c.state) } + +func (c *linuxContainer) updateStateFile() error { + fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) + f, err := os.Create(fnew) + if err != nil { + return newGenericError(err, SystemError) + } + defer f.Close() + + if err := json.NewEncoder(f).Encode(c.state); err != nil { + f.Close() + os.Remove(fnew) + return newGenericError(err, SystemError) + } + fname := filepath.Join(c.root, stateFilename) + if err := os.Rename(fnew, fname); err != nil { + return newGenericError(err, SystemError) + } + return nil +} + +// New returns a newly initialized Pipe for communication between processes +func newInitPipe() (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. +func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { + if container.UidMappings != nil { + sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) + for i, um := range container.UidMappings { + sys.UidMappings[i].ContainerID = um.ContainerID + sys.UidMappings[i].HostID = um.HostID + sys.UidMappings[i].Size = um.Size + } + } + + if container.GidMappings != nil { + sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) + for i, gm := range container.GidMappings { + sys.GidMappings[i].ContainerID = gm.ContainerID + sys.GidMappings[i].HostID = gm.HostID + sys.GidMappings[i].Size = gm.Size + } + } +} + +// killAllPids iterates over all of the container's processes +// sending a SIGKILL to each process. +func (c *linuxContainer) killAllPids() error { + glog.Info("killing all processes in container") + var procs []*os.Process + c.cgroupManager.Freeze(configs.Frozen) + pids, err := c.cgroupManager.GetPids() + if err != nil { + return err + } + for _, pid := range pids { + // TODO: log err without aborting if we are unable to find + // a single PID + if p, err := os.FindProcess(pid); err == nil { + procs = append(procs, p) + p.Kill() + } + } + c.cgroupManager.Freeze(configs.Thawed) + for _, p := range procs { + p.Wait() + } + return err +} + +// initializeNetworking creates the container's network stack outside of the namespace and moves +// interfaces into the container's net namespaces if necessary +func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error { + glog.Info("initailzing container's network stack") + for _, config := range c.config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.Create(config, nspid, networkState); err != nil { + return err + } + } + return nil +} + +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { + command := exec.Command(args[0], args[1:]...) + parent, child, err := newInitPipe() + if err != nil { + return err + } + defer parent.Close() + command.ExtraFiles = []*os.File{child} + command.Dir = container.RootFs + command.Env = append(command.Env, + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), + fmt.Sprintf("_LIBCONTAINER_USERNS=1")) + err = command.Start() + child.Close() + if err != nil { + return err + } + s, err := command.Process.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + decoder := json.NewDecoder(parent) + var pid *pid + if err := decoder.Decode(&pid); err != nil { + return err + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + terminate := func(terr error) error { + // TODO: log the errors for kill and wait + p.Kill() + p.Wait() + return terr + } + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(parent).Encode(process); err != nil { + return terminate(err) + } + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) + } + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := decoder.Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return ierr + } + s, err = p.Wait() + if err != nil { + return err + } + if !s.Success() { + return &exec.ExitError{s} + } + return nil +} + +type pid struct { + Pid int `json:"Pid"` +} + +// Finalize entering into a container and execute a specified command +func InitIn(pipe *os.File) (err error) { + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + decoder := json.NewDecoder(pipe) + var config *configs.Config + if err := decoder.Decode(&config); err != nil { + return err + } + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err + } + if err := finalizeSetns(config); err != nil { + return err + } + if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { + return err + } + panic("unreachable") +} + +// finalize expects that the setns calls have been setup and that is has joined an +// existing namespace +func finalizeSetns(container *configs.Config) error { + // clear the current processes env and replace it with the environment defined on the container + if err := loadContainerEnvironment(container); err != nil { + return err + } + + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + + if err := finalizeNamespace(container); err != nil { + return err + } + + if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) + } + + if container.ProcessLabel != "" { + if err := label.SetProcessLabel(container.ProcessLabel); err != nil { + return err + } + } + + return nil +} + +// SetupContainer is run to setup mounts and networking related operations +// for a user namespace enabled process as a user namespace root doesn't +// have permissions to perform these operations. +// The setup process joins all the namespaces of user namespace enabled init +// except the user namespace, so it run as root in the root user namespace +// to perform these operations. +func SetupContainer(process *processArgs) error { + container := process.Config + networkState := process.NetworkState + + // TODO : move to validation + /* + rootfs, err := utils.ResolveRootfs(container.RootFs) + if err != nil { + return err + } + */ + + // clear the current processes env and replace it with the environment + // defined on the container + if err := loadContainerEnvironment(container); err != nil { + return err + } + + cloneFlags := container.Namespaces.CloneFlags() + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(container.Networks) != 0 || len(container.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(container, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(container); err != nil { + return fmt.Errorf("setup route %s", err) + } + } + + label.Init() + + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return fmt.Errorf("setup mount namespace %s", err) + } + } + return nil +} + +func enterCgroups(state *configs.State, pid int) error { + return cgroups.EnterPid(state.CgroupPaths, pid) +} diff --git a/linux_factory.go b/linux_factory.go index 3583bf728..d7bf8a50c 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -5,15 +5,28 @@ package libcontainer import ( "encoding/json" "fmt" + "io/ioutil" "os" "path/filepath" "regexp" + "strings" + "syscall" "github.com/golang/glog" + "github.com/docker/libcontainer/apparmor" cgroups "github.com/docker/libcontainer/cgroups/manager" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/namespaces" + "github.com/docker/libcontainer/console" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" + "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/security/capabilities" + "github.com/docker/libcontainer/security/restrict" + "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/user" + "github.com/docker/libcontainer/utils" ) const ( @@ -26,6 +39,13 @@ var ( maxIdLen = 1024 ) +// Process is used for transferring parameters from Exec() to Init() +type processArgs struct { + Args []string `json:"args,omitempty"` + Config *configs.Config `json:"config,omitempty"` + NetworkState *configs.NetworkState `json:"network_state,omitempty"` +} + // New returns a linux based container factory based in the root directory. func New(root string, initArgs []string) (Factory, error) { if root != "" { @@ -116,16 +136,50 @@ func (l *linuxFactory) Load(id string) (Container, error) { // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally -func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) { +func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { pipe := os.NewFile(uintptr(pipefd), "pipe") - - setupUserns := os.Getenv("_LIBCONTAINER_USERNS") + setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != "" pid := os.Getenv("_LIBCONTAINER_INITPID") - if pid != "" && setupUserns == "" { - return namespaces.InitIn(pipe) + if pid != "" && !setupUserns { + return InitIn(pipe) } - - return namespaces.Init(pipe, setupUserns != "") + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + uncleanRootfs, err := os.Getwd() + if err != nil { + return err + } + var process *processArgs + // We always read this as it is a way to sync with the parent as well + if err := json.NewDecoder(pipe).Decode(&process); err != nil { + return err + } + if setupUserns { + err = SetupContainer(process) + if err == nil { + os.Exit(0) + } else { + os.Exit(1) + } + } + if process.Config.Namespaces.Contains(configs.NEWUSER) { + return l.initUserNs(uncleanRootfs, process) + } + return l.initDefault(uncleanRootfs, process) } func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { @@ -137,7 +191,6 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) return nil, newGenericError(err, SystemError) } defer f.Close() - var config *configs.Config if err := json.NewDecoder(f).Decode(&config); err != nil { return nil, newGenericError(err, ConfigInvalid) @@ -154,7 +207,6 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { return nil, newGenericError(err, SystemError) } defer f.Close() - var state *configs.State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) @@ -171,3 +223,337 @@ func (l *linuxFactory) validateID(id string) error { } return nil } + +func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) { + config := process.Config + networkState := process.NetworkState + + // TODO: move to validation + /* + rootfs, err := utils.ResolveRootfs(uncleanRootfs) + if err != nil { + return err + } + */ + + // clear the current processes env and replace it with the environment + // defined on the container + if err := loadContainerEnvironment(config); err != nil { + return err + } + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(config.Namespaces); err != nil { + return err + } + if config.Console != "" { + if err := console.OpenAndDup(config.Console); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return fmt.Errorf("setsid %s", err) + } + if config.Console != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } + } + + cloneFlags := config.Namespaces.CloneFlags() + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(config.Networks) != 0 || len(config.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(config, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(config); err != nil { + return fmt.Errorf("setup route %s", err) + } + } + if err := setupRlimits(config); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + label.Init() + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(config); err != nil { + return err + } + } + if config.Hostname != "" { + // TODO: (crosbymichael) move this to pre spawn validation + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } + if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { + return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) + } + } + if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) + } + if err := label.SetProcessLabel(config.ProcessLabel); err != nil { + return fmt.Errorf("set process label %s", err) + } + // TODO: (crosbymichael) make this configurable at the Config level + if config.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeathSignal, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if err := finalizeNamespace(config); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := restoreParentDeathSignal(pdeathSignal); err != nil { + return fmt.Errorf("restore parent death signal %s", err) + } + return system.Execv(process.Args[0], process.Args[0:], config.Env) +} + +func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) { + config := process.Config + // clear the current processes env and replace it with the environment + // defined on the config + if err := loadContainerEnvironment(config); err != nil { + return err + } + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(config.Namespaces); err != nil { + return err + } + if config.Console != "" { + if err := console.OpenAndDup("/dev/console"); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return fmt.Errorf("setsid %s", err) + } + if config.Console != "" { + if err := system.Setctty(); err != nil { + return fmt.Errorf("setctty %s", err) + } + } + if config.WorkingDir == "" { + config.WorkingDir = "/" + } + + if err := setupRlimits(config); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + cloneFlags := config.Namespaces.CloneFlags() + if config.Hostname != "" { + // TODO: move validation + if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { + return fmt.Errorf("unable to set the hostname without UTS namespace") + } + if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { + return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) + } + } + if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) + } + if err := label.SetProcessLabel(config.ProcessLabel); err != nil { + return fmt.Errorf("set process label %s", err) + } + if config.RestrictSys { + if (cloneFlags & syscall.CLONE_NEWNS) == 0 { + return fmt.Errorf("unable to restrict access to kernel files without mount namespace") + } + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeathSignal, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if err := finalizeNamespace(config); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := restoreParentDeathSignal(pdeathSignal); err != nil { + return fmt.Errorf("restore parent death signal %s", err) + } + return system.Execv(process.Args[0], process.Args[0:], config.Env) +} + +// restoreParentDeathSignal sets the parent death signal to old. +func restoreParentDeathSignal(old int) error { + if old == 0 { + return nil + } + current, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("get parent death signal %s", err) + } + if old == current { + return nil + } + if err := system.ParentDeathSignal(uintptr(old)); err != nil { + return fmt.Errorf("set parent death signal %s", err) + } + // Signal self if parent is already dead. Does nothing if running in a new + // PID namespace, as Getppid will always return 0. + if syscall.Getppid() == 1 { + return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) + } + return nil +} + +// setupUser changes the groups, gid, and uid for the user inside the container +func setupUser(config *configs.Config) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: syscall.Getuid(), + Gid: syscall.Getgid(), + Home: "/", + } + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return fmt.Errorf("get supplementary groups %s", err) + } + suppGroups := append(execUser.Sgids, config.AdditionalGroups...) + if err := syscall.Setgroups(suppGroups); err != nil { + return fmt.Errorf("setgroups %s", err) + } + if err := system.Setgid(execUser.Gid); err != nil { + return fmt.Errorf("setgid %s", err) + } + if err := system.Setuid(execUser.Uid); err != nil { + return fmt.Errorf("setuid %s", err) + } + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return fmt.Errorf("set HOME %s", err) + } + } + return nil +} + +// setupVethNetwork uses the Network config if it is not nil to initialize +// the new veth interface inside the container for use by changing the name to eth0 +// setting the MTU and IP address along with the default gateway +func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { + for _, config := range config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + err1 := strategy.Initialize(config, networkState) + if err1 != nil { + return err1 + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { + return err + } + } + return nil +} + +func setupRlimits(config *configs.Config) error { + for _, rlimit := range config.Rlimits { + l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} + if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaky file descriptors +// before execing the command inside the namespace +func finalizeNamespace(config *configs.Config) error { + // Ensure that all non-standard fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(3); err != nil { + return fmt.Errorf("close open file descriptors %s", err) + } + // drop capabilities in bounding set before changing user + if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { + return fmt.Errorf("drop bounding set %s", err) + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return fmt.Errorf("set keep caps %s", err) + } + if err := setupUser(config); err != nil { + return fmt.Errorf("setup user %s", err) + } + if err := system.ClearKeepCaps(); err != nil { + return fmt.Errorf("clear keep caps %s", err) + } + // drop all other capabilities + if err := capabilities.DropCapabilities(config.Capabilities); err != nil { + return fmt.Errorf("drop capabilities %s", err) + } + if config.WorkingDir != "" { + if err := syscall.Chdir(config.WorkingDir); err != nil { + return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) + } + } + return nil +} + +func loadContainerEnvironment(config *configs.Config) error { + os.Clearenv() + for _, pair := range config.Env { + p := strings.SplitN(pair, "=", 2) + if len(p) < 2 { + return fmt.Errorf("invalid environment '%v'", pair) + } + if err := os.Setenv(p[0], p[1]); err != nil { + return err + } + } + return nil +} + +// joinExistingNamespaces gets all the namespace paths specified for the container and +// does a setns on the namespace fd so that the current process joins the namespace. +func joinExistingNamespaces(namespaces []configs.Namespace) error { + for _, ns := range namespaces { + if ns.Path != "" { + f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) + if err != nil { + return err + } + err = system.Setns(f.Fd(), uintptr(ns.Syscall())) + f.Close() + if err != nil { + return err + } + } + } + return nil +} diff --git a/namespaces/exec.go b/namespaces/exec.go deleted file mode 100644 index ec462cbcb..000000000 --- a/namespaces/exec.go +++ /dev/null @@ -1,262 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io" - "os" - "os/exec" - "syscall" - - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/system" -) - -const ( - EXIT_SIGNAL_OFFSET = 128 -) - -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { - command := exec.Command(args[0], args[1:]...) - parent, child, err := newInitPipe() - if err != nil { - return err - } - defer parent.Close() - command.ExtraFiles = []*os.File{child} - command.Dir = container.RootFs - command.Env = append(command.Env, - fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), - fmt.Sprintf("_LIBCONTAINER_USERNS=1")) - err = command.Start() - child.Close() - if err != nil { - return err - } - s, err := command.Process.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - decoder := json.NewDecoder(parent) - var pid *pid - if err := decoder.Decode(&pid); err != nil { - return err - } - p, err := os.FindProcess(pid.Pid) - if err != nil { - return err - } - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - p.Kill() - p.Wait() - return terr - } - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { - return terminate(err) - } - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := decoder.Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return ierr - } - s, err = p.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - return nil -} - -// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. -// Move this to libcontainer package. -// Exec performs setup outside of a namespace so that a container can be -// executed. Exec is a high level function for working with container namespaces. -func Exec(args []string, env []string, console string, command *exec.Cmd, container *configs.Config, cgroupManager cgroups.Manager, state *configs.State) (err error) { - // create a pipe so that we can syncronize with the namespaced process and - // pass the state and configuration to the child process - parent, child, err := newInitPipe() - if err != nil { - return err - } - defer parent.Close() - command.ExtraFiles = []*os.File{child} - - command.Dir = container.RootFs - command.SysProcAttr.Cloneflags = uintptr(GetNamespaceFlags(container.Namespaces)) - - if container.Namespaces.Contains(configs.NEWUSER) { - AddUidGidMappings(command.SysProcAttr, container) - - // Default to root user when user namespaces are enabled. - if command.SysProcAttr.Credential == nil { - command.SysProcAttr.Credential = &syscall.Credential{} - } - } - - if err := command.Start(); err != nil { - child.Close() - return err - } - child.Close() - - wait := func() (*os.ProcessState, error) { - ps, err := command.Process.Wait() - // we should kill all processes in cgroup when init is died if we use - // host PID namespace - if !container.Namespaces.Contains(configs.NEWPID) { - killAllPids(cgroupManager) - } - return ps, err - } - - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - command.Process.Kill() - wait() - return terr - } - - started, err := system.GetProcessStartTime(command.Process.Pid) - if err != nil { - return terminate(err) - } - - // Do this before syncing with child so that no children - // can escape the cgroup - err = cgroupManager.Apply(command.Process.Pid) - if err != nil { - return terminate(err) - } - defer func() { - if err != nil { - cgroupManager.Destroy() - } - }() - - var networkState configs.NetworkState - if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { - return terminate(err) - } - - process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, - ConsolePath: console, - Config: container, - NetworkState: &networkState, - } - - // Start the setup process to setup the init process - if container.Namespaces.Contains(configs.NEWUSER) { - if err = executeSetupCmd(command.Args, command.Process.Pid, container, &process, &networkState); err != nil { - return terminate(err) - } - } - - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { - return terminate(err) - } - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return terminate(ierr) - } - - state.InitPid = command.Process.Pid - state.InitStartTime = started - state.NetworkState = networkState - state.CgroupPaths = cgroupManager.GetPaths() - - return nil -} - -// killAllPids iterates over all of the container's processes -// sending a SIGKILL to each process. -func killAllPids(m cgroups.Manager) error { - var ( - procs []*os.Process - ) - m.Freeze(configs.Frozen) - pids, err := m.GetPids() - if err != nil { - return err - } - for _, pid := range pids { - // TODO: log err without aborting if we are unable to find - // a single PID - if p, err := os.FindProcess(pid); err == nil { - procs = append(procs, p) - p.Kill() - } - } - m.Freeze(configs.Thawed) - for _, p := range procs { - p.Wait() - } - return err -} - -// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. -func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { - if container.UidMappings != nil { - sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) - for i, um := range container.UidMappings { - sys.UidMappings[i].ContainerID = um.ContainerID - sys.UidMappings[i].HostID = um.HostID - sys.UidMappings[i].Size = um.Size - } - } - - if container.GidMappings != nil { - sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) - for i, gm := range container.GidMappings { - sys.GidMappings[i].ContainerID = gm.ContainerID - sys.GidMappings[i].HostID = gm.HostID - sys.GidMappings[i].Size = gm.Size - } - } -} - -// InitializeNetworking creates the container's network stack outside of the namespace and moves -// interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *configs.Config, nspid int, networkState *configs.NetworkState) error { - for _, config := range container.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - if err := strategy.Create(config, nspid, networkState); err != nil { - return err - } - } - return nil -} diff --git a/namespaces/execin.go b/namespaces/execin.go deleted file mode 100644 index 75e70a06f..000000000 --- a/namespaces/execin.go +++ /dev/null @@ -1,218 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "os/exec" - "syscall" - - "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/system" -) - -type pid struct { - Pid int `json:"Pid"` -} - -// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the -// setns code in a single threaded environment joining the existing containers' namespaces. -func ExecIn(args []string, env []string, console string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) { - var err error - - parent, child, err := newInitPipe() - if err != nil { - return -1, err - } - defer parent.Close() - - cmd.ExtraFiles = []*os.File{child} - cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid)) - - if err := cmd.Start(); err != nil { - child.Close() - return -1, err - } - child.Close() - - s, err := cmd.Process.Wait() - if err != nil { - return -1, err - } - if !s.Success() { - return -1, &exec.ExitError{s} - } - - decoder := json.NewDecoder(parent) - var pid *pid - - if err := decoder.Decode(&pid); err != nil { - return -1, err - } - - p, err := os.FindProcess(pid.Pid) - if err != nil { - return -1, err - } - - terminate := func(terr error) (int, error) { - // TODO: log the errors for kill and wait - p.Kill() - p.Wait() - return -1, terr - } - - // Enter cgroups. - if err := EnterCgroups(state, pid.Pid); err != nil { - return terminate(err) - } - - encoder := json.NewEncoder(parent) - - if err := encoder.Encode(container); err != nil { - return terminate(err) - } - - process := processArgs{ - Env: append(env[0:], container.Env...), - Args: args, - ConsolePath: console, - } - if err := encoder.Encode(process); err != nil { - return terminate(err) - } - - return pid.Pid, nil -} - -// Finalize entering into a container and execute a specified command -func InitIn(pipe *os.File) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - - decoder := json.NewDecoder(pipe) - - var container *configs.Config - if err := decoder.Decode(&container); err != nil { - return err - } - - var process *processArgs - if err := decoder.Decode(&process); err != nil { - return err - } - - if err := FinalizeSetns(container); err != nil { - return err - } - - if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil { - return err - } - - panic("unreachable") -} - -// Finalize expects that the setns calls have been setup and that is has joined an -// existing namespace -func FinalizeSetns(container *configs.Config) error { - // clear the current processes env and replace it with the environment defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return err - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if container.ProcessLabel != "" { - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return err - } - } - - return nil -} - -// SetupContainer is run to setup mounts and networking related operations -// for a user namespace enabled process as a user namespace root doesn't -// have permissions to perform these operations. -// The setup process joins all the namespaces of user namespace enabled init -// except the user namespace, so it run as root in the root user namespace -// to perform these operations. -func SetupContainer(process *processArgs) error { - container := process.Config - networkState := process.NetworkState - - // TODO : move to validation - /* - rootfs, err := utils.ResolveRootfs(container.RootFs) - if err != nil { - return err - } - */ - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - label.Init() - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) != 0 { - if err := mount.InitializeMountNamespace(container); err != nil { - return fmt.Errorf("setup mount namespace %s", err) - } - } - return nil -} - -func EnterCgroups(state *configs.State, pid int) error { - return cgroups.EnterPid(state.CgroupPaths, pid) -} diff --git a/namespaces/init.go b/namespaces/init.go deleted file mode 100644 index d651352ac..000000000 --- a/namespaces/init.go +++ /dev/null @@ -1,465 +0,0 @@ -// +build linux - -package namespaces - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "strings" - "syscall" - - "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/console" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/security/capabilities" - "github.com/docker/libcontainer/security/restrict" - "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/user" - "github.com/docker/libcontainer/utils" -) - -// Process is used for transferring parameters from Exec() to Init() -type processArgs struct { - Args []string `json:"args,omitempty"` - Env []string `json:"environment,omitempty"` - ConsolePath string `json:"console_path,omitempty"` - Config *configs.Config `json:"config,omitempty"` - NetworkState *configs.NetworkState `json:"network_state,omitempty"` -} - -// TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. -// Move this to libcontainer package. -// Init is the init process that first runs inside a new namespace to setup mounts, users, networking, -// and other options required for the new container. -// The caller of Init function has to ensure that the go runtime is locked to an OS thread -// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(pipe *os.File, setupUserns bool) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - - uncleanRootfs, err := os.Getwd() - if err != nil { - return err - } - - var process *processArgs - // We always read this as it is a way to sync with the parent as well - if err := json.NewDecoder(pipe).Decode(&process); err != nil { - return err - } - - if setupUserns { - err = SetupContainer(process) - if err == nil { - os.Exit(0) - } else { - os.Exit(1) - } - } - - if process.Config.Namespaces.Contains(configs.NEWUSER) { - return initUserNs(uncleanRootfs, process) - } else { - return initDefault(uncleanRootfs, process) - } -} - -func initDefault(uncleanRootfs string, process *processArgs) (err error) { - container := process.Config - networkState := process.NetworkState - - // TODO: move to validation - /* - rootfs, err := utils.ResolveRootfs(uncleanRootfs) - if err != nil { - return err - } - */ - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(container.Namespaces); err != nil { - return err - } - if process.ConsolePath != "" { - if err := console.OpenAndDup(process.ConsolePath); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if process.ConsolePath != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - label.Init() - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) != 0 { - if err := mount.InitializeMountNamespace(container); err != nil { - return err - } - } - - if container.Hostname != "" { - // TODO: (crosbymichael) move this to pre spawn validation - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) - } - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - - // TODO: (crosbymichael) make this configurable at the Config level - if container.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - - // FinalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := RestoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - - return system.Execv(process.Args[0], process.Args[0:], process.Env) -} - -func initUserNs(uncleanRootfs string, process *processArgs) (err error) { - container := process.Config - - // clear the current processes env and replace it with the environment - // defined on the container - if err := LoadContainerEnvironment(container); err != nil { - return err - } - - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(container.Namespaces); err != nil { - return err - } - if process.ConsolePath != "" { - if err := console.OpenAndDup("/dev/console"); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if process.ConsolePath != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - - if container.WorkingDir == "" { - container.WorkingDir = "/" - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - cloneFlags := GetNamespaceFlags(container.Namespaces) - - if container.Hostname != "" { - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) - } - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - - if container.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if err := FinalizeNamespace(container); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - - // FinalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := RestoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - - return system.Execv(process.Args[0], process.Args[0:], process.Env) -} - -// RestoreParentDeathSignal sets the parent death signal to old. -func RestoreParentDeathSignal(old int) error { - if old == 0 { - return nil - } - - current, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - - if old == current { - return nil - } - - if err := system.ParentDeathSignal(uintptr(old)); err != nil { - return fmt.Errorf("set parent death signal %s", err) - } - - // Signal self if parent is already dead. Does nothing if running in a new - // PID namespace, as Getppid will always return 0. - if syscall.Getppid() == 1 { - return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) - } - - return nil -} - -// SetupUser changes the groups, gid, and uid for the user inside the container -func SetupUser(container *configs.Config) error { - // Set up defaults. - defaultExecUser := user.ExecUser{ - Uid: syscall.Getuid(), - Gid: syscall.Getgid(), - Home: "/", - } - - passwdPath, err := user.GetPasswdPath() - if err != nil { - return err - } - - groupPath, err := user.GetGroupPath() - if err != nil { - return err - } - - execUser, err := user.GetExecUserPath(container.User, &defaultExecUser, passwdPath, groupPath) - if err != nil { - return fmt.Errorf("get supplementary groups %s", err) - } - - suppGroups := append(execUser.Sgids, container.AdditionalGroups...) - - if err := syscall.Setgroups(suppGroups); err != nil { - return fmt.Errorf("setgroups %s", err) - } - - if err := system.Setgid(execUser.Gid); err != nil { - return fmt.Errorf("setgid %s", err) - } - - if err := system.Setuid(execUser.Uid); err != nil { - return fmt.Errorf("setuid %s", err) - } - - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return fmt.Errorf("set HOME %s", err) - } - } - - return nil -} - -// setupVethNetwork uses the Network config if it is not nil to initialize -// the new veth interface inside the container for use by changing the name to eth0 -// setting the MTU and IP address along with the default gateway -func setupNetwork(container *configs.Config, networkState *configs.NetworkState) error { - for _, config := range container.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - - err1 := strategy.Initialize(config, networkState) - if err1 != nil { - return err1 - } - } - return nil -} - -func setupRoute(container *configs.Config) error { - for _, config := range container.Routes { - if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { - return err - } - } - return nil -} - -func setupRlimits(container *configs.Config) error { - for _, rlimit := range container.Rlimits { - l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} - if err := syscall.Setrlimit(rlimit.Type, l); err != nil { - return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) - } - } - return nil -} - -// FinalizeNamespace drops the caps, sets the correct user -// and working dir, and closes any leaky file descriptors -// before execing the command inside the namespace -func FinalizeNamespace(container *configs.Config) error { - // Ensure that all non-standard fds we may have accidentally - // inherited are marked close-on-exec so they stay out of the - // container - if err := utils.CloseExecFrom(3); err != nil { - return fmt.Errorf("close open file descriptors %s", err) - } - - // drop capabilities in bounding set before changing user - if err := capabilities.DropBoundingSet(container.Capabilities); err != nil { - return fmt.Errorf("drop bounding set %s", err) - } - - // preserve existing capabilities while we change users - if err := system.SetKeepCaps(); err != nil { - return fmt.Errorf("set keep caps %s", err) - } - - if err := SetupUser(container); err != nil { - return fmt.Errorf("setup user %s", err) - } - - if err := system.ClearKeepCaps(); err != nil { - return fmt.Errorf("clear keep caps %s", err) - } - - // drop all other capabilities - if err := capabilities.DropCapabilities(container.Capabilities); err != nil { - return fmt.Errorf("drop capabilities %s", err) - } - - if container.WorkingDir != "" { - if err := syscall.Chdir(container.WorkingDir); err != nil { - return fmt.Errorf("chdir to %s %s", container.WorkingDir, err) - } - } - - return nil -} - -func LoadContainerEnvironment(container *configs.Config) error { - os.Clearenv() - for _, pair := range container.Env { - p := strings.SplitN(pair, "=", 2) - if len(p) < 2 { - return fmt.Errorf("invalid environment '%v'", pair) - } - if err := os.Setenv(p[0], p[1]); err != nil { - return err - } - } - return nil -} - -// joinExistingNamespaces gets all the namespace paths specified for the container and -// does a setns on the namespace fd so that the current process joins the namespace. -func joinExistingNamespaces(namespaces []configs.Namespace) error { - for _, ns := range namespaces { - if ns.Path != "" { - f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) - if err != nil { - return err - } - err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type])) - f.Close() - if err != nil { - return err - } - } - } - return nil -} diff --git a/namespaces/utils.go b/namespaces/utils.go deleted file mode 100644 index 978a02d89..000000000 --- a/namespaces/utils.go +++ /dev/null @@ -1,48 +0,0 @@ -// +build linux - -package namespaces - -import ( - "os" - "syscall" - - "github.com/docker/libcontainer/configs" -) - -type initError struct { - Message string `json:"message,omitempty"` -} - -func (i initError) Error() string { - return i.Message -} - -var namespaceInfo = map[configs.NamespaceType]int{ - configs.NEWNET: syscall.CLONE_NEWNET, - configs.NEWNS: syscall.CLONE_NEWNS, - configs.NEWUSER: syscall.CLONE_NEWUSER, - configs.NEWIPC: syscall.CLONE_NEWIPC, - configs.NEWUTS: syscall.CLONE_NEWUTS, - configs.NEWPID: syscall.CLONE_NEWPID, -} - -// New returns a newly initialized Pipe for communication between processes -func newInitPipe() (parent *os.File, child *os.File, err error) { - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - -// GetNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare. This functions returns flags only for new namespaces. -func GetNamespaceFlags(namespaces configs.Namespaces) (flag int) { - for _, v := range namespaces { - if v.Path != "" { - continue - } - flag |= namespaceInfo[v.Type] - } - return flag -} diff --git a/namespaces/nsenter/README.md b/nsenter/README.md similarity index 100% rename from namespaces/nsenter/README.md rename to nsenter/README.md diff --git a/namespaces/nsenter/nsenter.go b/nsenter/nsenter.go similarity index 100% rename from namespaces/nsenter/nsenter.go rename to nsenter/nsenter.go diff --git a/namespaces/nsenter/nsenter_test.go b/nsenter/nsenter_test.go similarity index 100% rename from namespaces/nsenter/nsenter_test.go rename to nsenter/nsenter_test.go diff --git a/namespaces/nsenter/nsenter_unsupported.go b/nsenter/nsenter_unsupported.go similarity index 100% rename from namespaces/nsenter/nsenter_unsupported.go rename to nsenter/nsenter_unsupported.go diff --git a/namespaces/nsenter/nsexec.c b/nsenter/nsexec.c similarity index 100% rename from namespaces/nsenter/nsexec.c rename to nsenter/nsexec.c diff --git a/nsinit/exec.go b/nsinit/exec.go index 3a91fa4e2..f710a9b73 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -3,14 +3,55 @@ package main import ( "io" "os" + "os/signal" "syscall" "github.com/codegangsta/cli" "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" + "github.com/docker/libcontainer/configs" consolepkg "github.com/docker/libcontainer/console" ) +type tty struct { + master *os.File + console string + state *term.State +} + +func (t *tty) Close() error { + if t.master != nil { + t.master.Close() + } + if t.state != nil { + term.RestoreTerminal(os.Stdin.Fd(), t.state) + } + return nil +} + +func (t *tty) set(config *configs.Config) { + config.Console = t.console +} + +func (t *tty) attach(process *libcontainer.Process) { + if t.master != nil { + process.Stderr = nil + process.Stdout = nil + process.Stdin = nil + } +} + +func (t *tty) resize() error { + if t.master == nil { + return nil + } + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return err + } + return term.SetWinsize(t.master.Fd(), ws) +} + var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", @@ -23,24 +64,14 @@ var execCommand = cli.Command{ } func execAction(context *cli.Context) { - var ( - master *os.File - console string - err error - - sigc = make(chan os.Signal, 10) - - stdin = os.Stdin - stdout = os.Stdout - stderr = os.Stderr - - exitCode int - ) - factory, err := loadFactory(context) if err != nil { fatal(err) } + tty, err := newTty(context) + if err != nil { + fatal(err) + } container, err := factory.Load(context.String("id")) if err != nil { if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists { @@ -50,46 +81,22 @@ func execAction(context *cli.Context) { if err != nil { fatal(err) } - if context.Bool("tty") { - stdin = nil - stdout = nil - stderr = nil - if master, console, err = consolepkg.CreateMasterAndConsole(); err != nil { - fatal(err) - } - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - fatal(err) - } - defer term.RestoreTerminal(os.Stdin.Fd(), state) - config.Console = console - } + tty.set(config) if container, err = factory.Create(context.String("id"), config); err != nil { fatal(err) } } + go handleSignals(container, tty) process := &libcontainer.Process{ Args: context.Args(), - Stdin: stdin, - Stdout: stdout, - Stderr: stderr, + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, } + tty.attach(process) if _, err := container.Start(process); err != nil { fatal(err) } - go func() { - resizeTty(master) - for sig := range sigc { - switch sig { - case syscall.SIGWINCH: - resizeTty(master) - default: - container.Signal(sig) - } - } - }() status, err := container.Wait() if err != nil { fatal(err) @@ -97,6 +104,11 @@ func execAction(context *cli.Context) { if err := container.Destroy(); err != nil { fatal(err) } + exit(status) +} + +func exit(status syscall.WaitStatus) { + var exitCode int if status.Exited() { exitCode = status.ExitStatus() } else if status.Signaled() { @@ -107,13 +119,37 @@ func execAction(context *cli.Context) { os.Exit(exitCode) } -func resizeTty(master *os.File) { - if master == nil { - return +func handleSignals(container libcontainer.Container, tty *tty) { + sigc := make(chan os.Signal, 10) + signal.Notify(sigc) + tty.resize() + for sig := range sigc { + switch sig { + case syscall.SIGWINCH: + tty.resize() + default: + container.Signal(sig) + } } - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return +} + +func newTty(context *cli.Context) (*tty, error) { + if context.Bool("tty") { + master, console, err := consolepkg.CreateMasterAndConsole() + if err != nil { + return nil, err + } + go io.Copy(master, os.Stdin) + go io.Copy(os.Stdout, master) + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + return nil, err + } + return &tty{ + master: master, + console: console, + state: state, + }, nil } - term.SetWinsize(master.Fd(), ws) + return &tty{}, nil } diff --git a/nsinit/init.go b/nsinit/init.go index 2b8784ccd..d45d12871 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -5,7 +5,7 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer" - _ "github.com/docker/libcontainer/namespaces/nsenter" + _ "github.com/docker/libcontainer/nsenter" ) var initCommand = cli.Command{ From daca745c4ce95f42600f013fe63fb69a07e7a85d Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 3 Feb 2015 13:53:31 +0300 Subject: [PATCH 065/101] api: fix integration tests Signed-off-by: Andrey Vagin --- integration/exec_test.go | 47 +++++++++++++++++------------------- integration/init_test.go | 2 +- integration/template_test.go | 10 +++----- integration/utils_test.go | 5 ++-- 4 files changed, 28 insertions(+), 36 deletions(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index 9ec617d13..745da1575 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -5,7 +5,6 @@ import ( "io/ioutil" "os" "strings" - "syscall" "testing" "github.com/docker/libcontainer" @@ -192,6 +191,20 @@ func newTestRoot() (string, error) { return dir, nil } +func waitProcess(pid int, t *testing.T) { + p, err := os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + status, err := p.Wait() + if err != nil { + t.Fatal(err) + } + if !status.Success() { + t.Fatal(status) + } +} + func TestEnter(t *testing.T) { if testing.Short() { return @@ -229,12 +242,12 @@ func TestEnter(t *testing.T) { var stdout, stdout2 bytes.Buffer - pconfig := libcontainer.ProcessConfig{ + pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, Stdin: stdinR, Stdout: &stdout, } - pid, err := container.StartProcess(&pconfig) + pid, err := container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { @@ -250,7 +263,7 @@ func TestEnter(t *testing.T) { pconfig.Stdin = stdinR2 pconfig.Stdout = &stdout2 - pid2, err := container.StartProcess(&pconfig) + pid2, err := container.Start(&pconfig) stdinR2.Close() defer stdinW2.Close() if err != nil { @@ -273,27 +286,11 @@ func TestEnter(t *testing.T) { } // Wait processes - var status syscall.WaitStatus - stdinW2.Close() - exitCode, err := container.WaitProcess(pid2) - if err != nil { - t.Fatal(err) - } - status = syscall.WaitStatus(exitCode) - if status.ExitStatus() != 0 { - t.Fatal(exitCode) - } + waitProcess(pid2, t) stdinW.Close() - exitCode, err = container.WaitProcess(pid) - if err != nil { - t.Fatal(err) - } - status = syscall.WaitStatus(exitCode) - if status.ExitStatus() != 0 { - t.Fatal(exitCode) - } + waitProcess(pid, t) // Check that both processes live in the same pidns pidns := string(stdout.Bytes()) @@ -345,11 +342,11 @@ func TestFreeze(t *testing.T) { t.Fatal(err) } - pconfig := libcontainer.ProcessConfig{ + pconfig := libcontainer.Process{ Args: []string{"cat"}, Stdin: stdinR, } - pid, err := container.StartProcess(&pconfig) + pid, err := container.Start(&pconfig) stdinR.Close() defer stdinW.Close() if err != nil { @@ -364,7 +361,7 @@ func TestFreeze(t *testing.T) { if err := container.Pause(); err != nil { t.Fatal(err) } - state, err := container.RunState() + state, err := container.Status() if err != nil { t.Fatal(err) } diff --git a/integration/init_test.go b/integration/init_test.go index f9c1e3cf5..6b4bc32df 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -6,7 +6,7 @@ import ( "runtime" "github.com/docker/libcontainer" - _ "github.com/docker/libcontainer/namespaces/nsenter" + _ "github.com/docker/libcontainer/nsenter" ) // init runs the libcontainer initialization code because of the busybox style needs diff --git a/integration/template_test.go b/integration/template_test.go index 372cc6953..a8a40b089 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -3,7 +3,6 @@ package integration import ( "syscall" - "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/devices" ) @@ -15,7 +14,6 @@ import ( func newTemplateConfig(rootfs string) *configs.Config { return &configs.Config{ RootFs: rootfs, - Tty: false, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", @@ -39,17 +37,15 @@ func newTemplateConfig(rootfs string) *configs.Config { {Type: configs.NEWPID}, {Type: configs.NEWNET}, }), - Cgroups: &cgroups.Cgroup{ + Cgroups: &configs.Cgroup{ Name: "test", Parent: "integration", AllowAllDevices: false, AllowedDevices: devices.DefaultAllowedDevices, }, - MountConfig: &configs.MountConfig{ - DeviceNodes: devices.DefaultAutoCreatedDevices, - }, - Hostname: "integration", + DeviceNodes: devices.DefaultAutoCreatedDevices, + Hostname: "integration", Env: []string{ "HOME=/root", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/integration/utils_test.go b/integration/utils_test.go index 93fe3b4b7..39c9a1269 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -91,9 +91,8 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe buffers = newStdBuffers() - process := &libcontainer.ProcessConfig{ + process := &libcontainer.Process{ Args: args, - Env: make([]string, 0), Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, @@ -110,7 +109,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe } defer container.Destroy() - pid, err := container.StartProcess(process) + pid, err := container.Start(process) if err != nil { return nil, -1, err } From bcd0222be50de3789a5d99620c51988d6e497155 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 3 Feb 2015 15:27:21 +0300 Subject: [PATCH 066/101] api: fix config tests Signed-off-by: Andrey Vagin --- configs/config_test.go | 23 +-- .../defaults.go => configs/device_defaults.go | 20 +- devices/devices_test.go | 12 +- integration/template_test.go | 5 +- linux_container_test.go | 2 +- linux_factory_test.go | 3 - sample_configs/apparmor.json | 118 ++++++------ sample_configs/attach_to_bridge.json | 180 +++++++++--------- sample_configs/host-pid.json | 118 ++++++------ sample_configs/minimal.json | 118 ++++++------ .../route_source_address_selection.json | 118 ++++++------ sample_configs/selinux.json | 118 ++++++------ sample_configs/userns.json | 118 ++++++------ 13 files changed, 463 insertions(+), 490 deletions(-) rename devices/defaults.go => configs/device_defaults.go (85%) diff --git a/configs/config_test.go b/configs/config_test.go index d64066c42..a74ccd40b 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -5,8 +5,6 @@ import ( "os" "path/filepath" "testing" - - "github.com/docker/libcontainer/devices" ) // Checks whether the expected capability is specified in the capabilities. @@ -19,13 +17,13 @@ func contains(expected string, values []string) bool { return false } -func containsDevice(expected *devices.Device, values []*devices.Device) bool { +func containsDevice(expected *Device, values []*Device) bool { for _, d := range values { if d.Path == expected.Path && - d.CgroupPermissions == expected.CgroupPermissions && + d.Permissions == expected.Permissions && d.FileMode == expected.FileMode && - d.MajorNumber == expected.MajorNumber && - d.MinorNumber == expected.MinorNumber && + d.Major == expected.Major && + d.Minor == expected.Minor && d.Type == expected.Type { return true } @@ -59,11 +57,6 @@ func TestConfigJsonFormat(t *testing.T) { t.Fail() } - if !container.Tty { - t.Log("tty should be set to true") - t.Fail() - } - if !container.Namespaces.Contains(NEWNET) { t.Log("namespaces should contain NEWNET") t.Fail() @@ -120,8 +113,8 @@ func TestConfigJsonFormat(t *testing.T) { } } - for _, d := range devices.DefaultSimpleDevices { - if !containsDevice(d, container.MountConfig.DeviceNodes) { + for _, d := range DefaultSimpleDevices { + if !containsDevice(d, container.DeviceNodes) { t.Logf("expected device configuration for %s", d.Path) t.Fail() } @@ -154,8 +147,8 @@ func TestSelinuxLabels(t *testing.T) { if container.ProcessLabel != label { t.Fatalf("expected process label %q but received %q", label, container.ProcessLabel) } - if container.MountConfig.MountLabel != label { - t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel) + if container.MountLabel != label { + t.Fatalf("expected mount label %q but received %q", label, container.MountLabel) } } diff --git a/devices/defaults.go b/configs/device_defaults.go similarity index 85% rename from devices/defaults.go rename to configs/device_defaults.go index 3923ccd50..70fa4af04 100644 --- a/devices/defaults.go +++ b/configs/device_defaults.go @@ -1,10 +1,8 @@ -package devices - -import "github.com/docker/libcontainer/configs" +package configs var ( // These are devices that are to be both allowed and created. - DefaultSimpleDevices = []*configs.Device{ + DefaultSimpleDevices = []*Device{ // /dev/null and zero { Path: "/dev/null", @@ -60,18 +58,18 @@ var ( FileMode: 0666, }, } - DefaultAllowedDevices = append([]*configs.Device{ + DefaultAllowedDevices = append([]*Device{ // allow mknod for any device { Type: 'c', - Major: configs.Wildcard, - Minor: configs.Wildcard, + Major: Wildcard, + Minor: Wildcard, Permissions: "m", }, { Type: 'b', - Major: configs.Wildcard, - Minor: configs.Wildcard, + Major: Wildcard, + Minor: Wildcard, Permissions: "m", }, @@ -101,7 +99,7 @@ var ( Path: "", Type: 'c', Major: 136, - Minor: configs.Wildcard, + Minor: Wildcard, Permissions: "rwm", }, { @@ -121,7 +119,7 @@ var ( Permissions: "rwm", }, }, DefaultSimpleDevices...) - DefaultAutoCreatedDevices = append([]*configs.Device{ + DefaultAutoCreatedDevices = append([]*Device{ { // /dev/fuse is created but not allowed. // This is to allow java to work. Because java diff --git a/devices/devices_test.go b/devices/devices_test.go index fec400223..9e52fc4e2 100644 --- a/devices/devices_test.go +++ b/devices/devices_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -func TestGetDeviceLstatFailure(t *testing.T) { +func TestDeviceFromPathLstatFailure(t *testing.T) { testError := errors.New("test error") // Override os.Lstat to inject error. @@ -14,13 +14,13 @@ func TestGetDeviceLstatFailure(t *testing.T) { return nil, testError } - _, err := GetDevice("", "") + _, err := DeviceFromPath("", "") if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } -func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) { +func TestHostDevicesIoutilReadDirFailure(t *testing.T) { testError := errors.New("test error") // Override ioutil.ReadDir to inject error. @@ -28,13 +28,13 @@ func TestGetHostDeviceNodesIoutilReadDirFailure(t *testing.T) { return nil, testError } - _, err := GetHostDeviceNodes() + _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } } -func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) { +func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) { testError := errors.New("test error") called := false @@ -54,7 +54,7 @@ func TestGetHostDeviceNodesIoutilReadDirDeepFailure(t *testing.T) { return []os.FileInfo{fi}, nil } - _, err := GetHostDeviceNodes() + _, err := HostDevices() if err != testError { t.Fatalf("Unexpected error %v, expected %v", err, testError) } diff --git a/integration/template_test.go b/integration/template_test.go index a8a40b089..28c801960 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -4,7 +4,6 @@ import ( "syscall" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/devices" ) // newTemplateConfig returns a base template for running a container @@ -41,10 +40,10 @@ func newTemplateConfig(rootfs string) *configs.Config { Name: "test", Parent: "integration", AllowAllDevices: false, - AllowedDevices: devices.DefaultAllowedDevices, + AllowedDevices: configs.DefaultAllowedDevices, }, - DeviceNodes: devices.DefaultAutoCreatedDevices, + DeviceNodes: configs.DefaultAutoCreatedDevices, Hostname: "integration", Env: []string{ "HOME=/root", diff --git a/linux_container_test.go b/linux_container_test.go index 17bd8b9ab..7d4eae92b 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -34,7 +34,7 @@ func (m *mockCgroupManager) GetPaths() map[string]string { return nil } -func (m *mockCgroupManager) Freeze(state cgroups.FreezerState) error { +func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } diff --git a/linux_factory_test.go b/linux_factory_test.go index 3c1e275c0..fd1e830a7 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -118,9 +118,6 @@ func TestFactoryLoadContainer(t *testing.T) { } config := container.Config() - if config == nil { - t.Fatal("expected non nil container config") - } if config.RootFs != expectedConfig.RootFs { t.Fatalf("expected rootfs %q but received %q", expectedConfig.RootFs, config.RootFs) diff --git a/sample_configs/apparmor.json b/sample_configs/apparmor.json index 96f73cb79..d044524fa 100644 --- a/sample_configs/apparmor.json +++ b/sample_configs/apparmor.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -118,57 +118,55 @@ }, "restrict_sys": true, "apparmor_profile": "docker-default", - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/attach_to_bridge.json b/sample_configs/attach_to_bridge.json index e5c03a7ef..248a8a799 100644 --- a/sample_configs/attach_to_bridge.json +++ b/sample_configs/attach_to_bridge.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,57 +117,55 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { - "device_nodes": [ - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 3, - "path": "/dev/null", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 5, - "path": "/dev/zero", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 7, - "path": "/dev/full", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 5, - "path": "/dev/tty", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 9, - "path": "/dev/urandom", - "type": 99 - }, - { - "cgroup_permissions": "rwm", - "file_mode": 438, - "major_number": 1, - "minor_number": 8, - "path": "/dev/random", - "type": 99 - } - ] - }, + "device_nodes": [ + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 3, + "path": "/dev/null", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 7, + "path": "/dev/full", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "permissions": "rwm", + "file_mode": 438, + "major": 1, + "minor": 8, + "path": "/dev/random", + "type": 99 + } + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/host-pid.json b/sample_configs/host-pid.json index f47af930e..61c3cf480 100644 --- a/sample_configs/host-pid.json +++ b/sample_configs/host-pid.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/minimal.json b/sample_configs/minimal.json index 01de46746..28b22cce4 100644 --- a/sample_configs/minimal.json +++ b/sample_configs/minimal.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/route_source_address_selection.json b/sample_configs/route_source_address_selection.json index 9c62045a4..b9c9ef7bd 100644 --- a/sample_configs/route_source_address_selection.json +++ b/sample_configs/route_source_address_selection.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,57 +117,55 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/selinux.json b/sample_configs/selinux.json index 15556488a..a415c2574 100644 --- a/sample_configs/selinux.json +++ b/sample_configs/selinux.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -118,58 +118,56 @@ }, "restrict_sys": true, "process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", - "mount_config": { "mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", diff --git a/sample_configs/userns.json b/sample_configs/userns.json index 8c9c841f1..1ebbad610 100644 --- a/sample_configs/userns.json +++ b/sample_configs/userns.json @@ -16,99 +16,99 @@ "cgroups": { "allowed_devices": [ { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "m", - "major_number": -1, - "minor_number": -1, + "permissions": "m", + "major": -1, + "minor": -1, "type": 98 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 1, + "permissions": "rwm", + "major": 5, + "minor": 1, "path": "/dev/console", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, + "permissions": "rwm", + "major": 4, "path": "/dev/tty0", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 4, - "minor_number": 1, + "permissions": "rwm", + "major": 4, + "minor": 1, "path": "/dev/tty1", "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 136, - "minor_number": -1, + "permissions": "rwm", + "major": 136, + "minor": -1, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 5, - "minor_number": 2, + "permissions": "rwm", + "major": 5, + "minor": 2, "type": 99 }, { - "cgroup_permissions": "rwm", - "major_number": 10, - "minor_number": 200, + "permissions": "rwm", + "major": 10, + "minor": 200, "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -117,52 +117,51 @@ "parent": "docker" }, "restrict_sys": true, - "mount_config": { "device_nodes": [ { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 3, + "major": 1, + "minor": 3, "path": "/dev/null", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 5, + "major": 1, + "minor": 5, "path": "/dev/zero", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 7, + "major": 1, + "minor": 7, "path": "/dev/full", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 5, + "major": 5, "path": "/dev/tty", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 9, + "major": 1, + "minor": 9, "path": "/dev/urandom", "type": 99 }, { - "cgroup_permissions": "rwm", + "permissions": "rwm", "file_mode": 438, - "major_number": 1, - "minor_number": 8, + "major": 1, + "minor": 8, "path": "/dev/random", "type": 99 } @@ -172,8 +171,7 @@ "type": "tmpfs", "destination": "/tmp" } - ] - }, + ], "environment": [ "HOME=/", "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", From ab76a88d6bf94d309f74df0a0be2ad3d648cfe7c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 3 Feb 2015 10:50:18 -0800 Subject: [PATCH 067/101] Remove Wait() on container interface Since we return the pid for the started process we do not need this method on the interface. Signed-off-by: Michael Crosby --- .gitignore | 1 + container.go | 10 +--------- linux_container.go | 10 ---------- nsinit/exec.go | 11 ++++++++--- 4 files changed, 10 insertions(+), 22 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..4c2914fc7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +nsinit/nsinit diff --git a/container.go b/container.go index bb6bce89f..9db1e297b 100644 --- a/container.go +++ b/container.go @@ -5,7 +5,6 @@ package libcontainer import ( "os" - "syscall" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" @@ -26,7 +25,7 @@ type Container interface { // Returns the ID of the container ID() string - // Returns the current statusof the container. + // Returns the current status of the container. // // errors: // Systemerror - System error. @@ -97,13 +96,6 @@ type Container interface { // Systemerror - System error. Signal(signal os.Signal) error - // Wait waits for the init process of the conatiner to die and returns it's exit status. - // - // errors: - // ContainerDestroyed - Container no longer exists, - // Systemerror - System error. - Wait() (exitStatus syscall.WaitStatus, err error) - // OOM returns a read-only channel signaling when the container receives an OOM notification. // // errors: diff --git a/linux_container.go b/linux_container.go index f684188c5..bf501c86a 100644 --- a/linux_container.go +++ b/linux_container.go @@ -295,16 +295,6 @@ func (c *linuxContainer) Signal(signal os.Signal) error { panic("not implemented") } -func (c *linuxContainer) Wait() (syscall.WaitStatus, error) { - var status syscall.WaitStatus - // TODO : close exec.Cmd pipes, fix in master - _, err := syscall.Wait4(c.state.InitPid, &status, 0, nil) - if err != nil { - return 0, newGenericError(err, SystemError) - } - return status, err -} - func (c *linuxContainer) OOM() (<-chan struct{}, error) { return NotifyOnOOM(c.state) } diff --git a/nsinit/exec.go b/nsinit/exec.go index f710a9b73..d12f3638b 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -94,17 +94,22 @@ func execAction(context *cli.Context) { Stderr: os.Stderr, } tty.attach(process) - if _, err := container.Start(process); err != nil { + pid, err := container.Start(process) + if err != nil { + fatal(err) + } + proc, err := os.FindProcess(pid) + if err != nil { fatal(err) } - status, err := container.Wait() + status, err := proc.Wait() if err != nil { fatal(err) } if err := container.Destroy(); err != nil { fatal(err) } - exit(status) + exit(status.Sys().(syscall.WaitStatus)) } func exit(status syscall.WaitStatus) { From c6f5420bed7bb1e6802b40cc6ab254b1bbf67854 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 4 Feb 2015 14:21:03 +0300 Subject: [PATCH 068/101] integration: check a container with userns Signed-off-by: Andrey Vagin --- integration/exec_test.go | 17 +++++++++++++++++ mount/init.go | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/integration/exec_test.go b/integration/exec_test.go index 745da1575..855613ded 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -12,6 +12,17 @@ import ( ) func TestExecPS(t *testing.T) { + testExecPS(t, false) +} + +func TestUsernsExecPS(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + testExecPS(t, true) +} + +func testExecPS(t *testing.T, userns bool) { if testing.Short() { return } @@ -23,6 +34,12 @@ func TestExecPS(t *testing.T) { defer remove(rootfs) config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{0, 0, 1000}} + config.GidMappings = []configs.IDMap{{0, 0, 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + buffers, exitCode, err := runContainer(config, "", "ps") if err != nil { t.Fatal(err) diff --git a/mount/init.go b/mount/init.go index 4f9943332..646970d56 100644 --- a/mount/init.go +++ b/mount/init.go @@ -46,7 +46,7 @@ func InitializeMountNamespace(config *configs.Config) (err error) { // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. - if 0 == 0 { + if !config.Namespaces.Contains(configs.NEWUSER) { if err := reOpenDevNull(config.RootFs); err != nil { return err } From 5fc19e8db53f0b4c473f7e31bfa77e7e1c5d157b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 3 Feb 2015 17:44:58 -0800 Subject: [PATCH 069/101] Rename Fs fields to fs Signed-off-by: Michael Crosby --- PRINCIPLES.md | 2 +- cgroups/fs/apply_raw.go | 2 +- configs/config.go | 14 +- configs/config_test.go | 68 +++++++++- configs/device.go | 24 +++- configs/mount.go | 4 +- configs/network.go | 6 +- devices/devices.go | 6 +- integration/exec_test.go | 16 +-- integration/template_test.go | 4 +- integration/utils_test.go | 6 +- linux_container.go | 128 +----------------- linux_factory.go | 119 +++++++++++++++- linux_factory_test.go | 6 +- mount/init.go | 24 ++-- mount/ptmx.go | 4 +- process.go | 15 +- sample_configs/apparmor.json | 2 +- sample_configs/attach_to_bridge.json | 2 +- sample_configs/host-pid.json | 2 +- sample_configs/minimal.json | 2 +- .../route_source_address_selection.json | 2 +- sample_configs/selinux.json | 2 +- sample_configs/userns.json | 2 +- 24 files changed, 266 insertions(+), 196 deletions(-) diff --git a/PRINCIPLES.md b/PRINCIPLES.md index 42396c0ee..056064210 100644 --- a/PRINCIPLES.md +++ b/PRINCIPLES.md @@ -8,7 +8,7 @@ In the design and development of libcontainer we try to follow these principles: * Less code is better. * Fewer components are better. Do you really need to add one more class? * 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand. -* Don't do later what you can do now. "//FIXME: refactor" is not acceptable in new code. +* Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code. * When hesitating between two options, choose the one that is easier to reverse. * "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later. * Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable. diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 4a3a88645..a56b59a1c 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -83,7 +83,7 @@ func (m *Manager) Apply(pid int) error { if err := sys.Set(d); err != nil { return err } - // FIXME: Apply should, ideally, be reentrant or be broken up into a separate + // TODO: Apply should, ideally, be reentrant or be broken up into a separate // create and join phase so that the cgroup hierarchy for a container can be // created then join consists of writing the process pids to cgroup.procs p, err := d.path(name) diff --git a/configs/config.go b/configs/config.go index 844a9cad4..0f939ade5 100644 --- a/configs/config.go +++ b/configs/config.go @@ -26,22 +26,22 @@ type Config struct { // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. PivotDir string `json:"pivot_dir,omitempty"` - // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable - ReadonlyFs bool `json:"readonly_fs,omitempty"` + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs,omitempty"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs,omitempty"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified Mounts []*Mount `json:"mounts,omitempty"` // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - DeviceNodes []*Device `json:"device_nodes,omitempty"` + Devices []*Device `json:"devices,omitempty"` MountLabel string `json:"mount_label,omitempty"` - // Pathname to container's root filesystem - RootFs string `json:"root_fs,omitempty"` - // Hostname optionally sets the container's hostname if provided Hostname string `json:"hostname,omitempty"` diff --git a/configs/config_test.go b/configs/config_test.go index a74ccd40b..2128d020b 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -114,7 +114,7 @@ func TestConfigJsonFormat(t *testing.T) { } for _, d := range DefaultSimpleDevices { - if !containsDevice(d, container.DeviceNodes) { + if !containsDevice(d, container.Devices) { t.Logf("expected device configuration for %s", d.Path) t.Fail() } @@ -163,3 +163,69 @@ func TestRemoveNamespace(t *testing.T) { t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) } } + +func TestHostUIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostUID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatal("expected uid 0 with no USERNS but received %d", uid) + } +} + +func TestHostUIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + UidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostUID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatal("expected uid 1000 with no USERNS but received %d", uid) + } +} + +func TestHostGIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostGID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatal("expected gid 0 with no USERNS but received %d", uid) + } +} + +func TestHostGIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + GidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostGID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatal("expected gid 1000 with no USERNS but received %d", uid) + } +} diff --git a/configs/device.go b/configs/device.go index 18d732325..a8117068e 100644 --- a/configs/device.go +++ b/configs/device.go @@ -10,19 +10,29 @@ const ( ) type Device struct { + // Device type, block, char, etc. Type rune `json:"type,omitempty"` - // It is fine if this is an empty string in the case that you are using Wildcards + + // Path to the device. Path string `json:"path,omitempty"` - // Use the wildcard constant for wildcards. + + // Major is the device's major number. Major int64 `json:"major,omitempty"` - // Use the wildcard constant for wildcards. + + // Minor is the device's minor number. Minor int64 `json:"minor,omitempty"` - // Typically just "rwm" + + // Cgroup permissions format, rwm. Permissions string `json:"permissions,omitempty"` - // The permission bits of the file's mode + + // FileMode permission bits for the device. FileMode os.FileMode `json:"file_mode,omitempty"` - Uid uint32 `json:"uid,omitempty"` - Gid uint32 `json:"gid,omitempty"` + + // Uid of the device. + Uid uint32 `json:"uid,omitempty"` + + // Gid of the device. + Gid uint32 `json:"gid,omitempty"` } func (d *Device) CgroupString() string { diff --git a/configs/mount.go b/configs/mount.go index f6f39992c..5a26a287e 100644 --- a/configs/mount.go +++ b/configs/mount.go @@ -52,7 +52,7 @@ func (m *Mount) bindMount(rootfs, mountLabel string) error { return err } - // FIXME: (crosbymichael) This does not belong here and should be done a layer above + // TODO: (crosbymichael) This does not belong here and should be done a layer above dest, err = symlink.FollowSymlinkInScope(dest, rootfs) if err != nil { return err @@ -94,7 +94,7 @@ func (m *Mount) tmpfsMount(rootfs, mountLabel string) error { dest = filepath.Join(rootfs, m.Destination) ) - // FIXME: (crosbymichael) This does not belong here and should be done a layer above + // TODO: (crosbymichael) This does not belong here and should be done a layer above if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { return err } diff --git a/configs/network.go b/configs/network.go index 542183632..edcc4dd4d 100644 --- a/configs/network.go +++ b/configs/network.go @@ -20,12 +20,12 @@ type Network struct { // Address contains the IPv4 and mask to set on the network interface Address string `json:"address,omitempty"` - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address,omitempty"` - // Gateway sets the gateway address that is used as the default for the interface Gateway string `json:"gateway,omitempty"` + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address,omitempty"` + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface IPv6Gateway string `json:"ipv6_gateway,omitempty"` diff --git a/devices/devices.go b/devices/devices.go index b3f67aa3d..537f71aff 100644 --- a/devices/devices.go +++ b/devices/devices.go @@ -60,10 +60,10 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) { } func HostDevices() ([]*configs.Device, error) { - return getDeviceNodes("/dev") + return getDevices("/dev") } -func getDeviceNodes(path string) ([]*configs.Device, error) { +func getDevices(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err @@ -76,7 +76,7 @@ func getDeviceNodes(path string) ([]*configs.Device, error) { case "pts", "shm", "fd", "mqueue": continue default: - sub, err := getDeviceNodes(filepath.Join(path, f.Name())) + sub, err := getDevices(filepath.Join(path, f.Name())) if err != nil { return nil, err } diff --git a/integration/exec_test.go b/integration/exec_test.go index 855613ded..0f93957c7 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -27,7 +27,7 @@ func testExecPS(t *testing.T, userns bool) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -65,7 +65,7 @@ func TestIPCPrivate(t *testing.T) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -96,7 +96,7 @@ func TestIPCHost(t *testing.T) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -128,7 +128,7 @@ func TestIPCJoinPath(t *testing.T) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -161,7 +161,7 @@ func TestIPCBadPath(t *testing.T) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -181,7 +181,7 @@ func TestRlimit(t *testing.T) { return } - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -232,7 +232,7 @@ func TestEnter(t *testing.T) { } defer os.RemoveAll(root) - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } @@ -335,7 +335,7 @@ func TestFreeze(t *testing.T) { } defer os.RemoveAll(root) - rootfs, err := newRootFs() + rootfs, err := newRootfs() if err != nil { t.Fatal(err) } diff --git a/integration/template_test.go b/integration/template_test.go index 28c801960..28160c986 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -12,7 +12,7 @@ import ( // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { return &configs.Config{ - RootFs: rootfs, + Rootfs: rootfs, Capabilities: []string{ "CHOWN", "DAC_OVERRIDE", @@ -43,7 +43,7 @@ func newTemplateConfig(rootfs string) *configs.Config { AllowedDevices: configs.DefaultAllowedDevices, }, - DeviceNodes: configs.DefaultAutoCreatedDevices, + Devices: configs.DefaultAutoCreatedDevices, Hostname: "integration", Env: []string{ "HOME=/root", diff --git a/integration/utils_test.go b/integration/utils_test.go index 39c9a1269..75cba52b1 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -29,7 +29,7 @@ type stdBuffers struct { } func writeConfig(config *configs.Config) error { - f, err := os.OpenFile(filepath.Join(config.RootFs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700) + f, err := os.OpenFile(filepath.Join(config.Rootfs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700) if err != nil { return err } @@ -51,8 +51,8 @@ func loadConfig() (*configs.Config, error) { return container, nil } -// newRootFs creates a new tmp directory and copies the busybox root filesystem -func newRootFs() (string, error) { +// newRootfs creates a new tmp directory and copies the busybox root filesystem +func newRootfs() (string, error) { dir, err := ioutil.TempDir("", "") if err != nil { return "", err diff --git a/linux_container.go b/linux_container.go index bf501c86a..bf6b6a09b 100644 --- a/linux_container.go +++ b/linux_container.go @@ -6,17 +6,13 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" "syscall" - "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" "github.com/golang/glog" @@ -106,7 +102,7 @@ func (c *linuxContainer) Start(process *Process) (int, error) { cmd.Stdout = process.Stdout cmd.Stderr = process.Stderr cmd.Env = c.config.Env - cmd.Dir = c.config.RootFs + cmd.Dir = c.config.Rootfs if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } @@ -161,7 +157,7 @@ func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, err return -1, terr } // Enter cgroups. - if err := enterCgroups(c.state, pid.Pid); err != nil { + if err := c.enterCgroups(pid.Pid); err != nil { return terminate(err) } encoder := json.NewEncoder(parent) @@ -398,7 +394,7 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process } defer parent.Close() command.ExtraFiles = []*os.File{child} - command.Dir = container.RootFs + command.Dir = container.Rootfs command.Env = append(command.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), fmt.Sprintf("_LIBCONTAINER_USERNS=1")) @@ -460,120 +456,6 @@ type pid struct { Pid int `json:"Pid"` } -// Finalize entering into a container and execute a specified command -func InitIn(pipe *os.File) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - decoder := json.NewDecoder(pipe) - var config *configs.Config - if err := decoder.Decode(&config); err != nil { - return err - } - var process *processArgs - if err := decoder.Decode(&process); err != nil { - return err - } - if err := finalizeSetns(config); err != nil { - return err - } - if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { - return err - } - panic("unreachable") -} - -// finalize expects that the setns calls have been setup and that is has joined an -// existing namespace -func finalizeSetns(container *configs.Config) error { - // clear the current processes env and replace it with the environment defined on the container - if err := loadContainerEnvironment(container); err != nil { - return err - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - if err := finalizeNamespace(container); err != nil { - return err - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if container.ProcessLabel != "" { - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return err - } - } - - return nil -} - -// SetupContainer is run to setup mounts and networking related operations -// for a user namespace enabled process as a user namespace root doesn't -// have permissions to perform these operations. -// The setup process joins all the namespaces of user namespace enabled init -// except the user namespace, so it run as root in the root user namespace -// to perform these operations. -func SetupContainer(process *processArgs) error { - container := process.Config - networkState := process.NetworkState - - // TODO : move to validation - /* - rootfs, err := utils.ResolveRootfs(container.RootFs) - if err != nil { - return err - } - */ - - // clear the current processes env and replace it with the environment - // defined on the container - if err := loadContainerEnvironment(container); err != nil { - return err - } - - cloneFlags := container.Namespaces.CloneFlags() - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - label.Init() - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) != 0 { - if err := mount.InitializeMountNamespace(container); err != nil { - return fmt.Errorf("setup mount namespace %s", err) - } - } - return nil -} - -func enterCgroups(state *configs.State, pid int) error { - return cgroups.EnterPid(state.CgroupPaths, pid) +func (c *linuxContainer) enterCgroups(pid int) error { + return cgroups.EnterPid(c.state.CgroupPaths, pid) } diff --git a/linux_factory.go b/linux_factory.go index d7bf8a50c..02456c123 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -141,7 +141,7 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != "" pid := os.Getenv("_LIBCONTAINER_INITPID") if pid != "" && !setupUserns { - return InitIn(pipe) + return initIn(pipe) } defer func() { // if we have an error during the initialization of the container's init then send it back to the @@ -169,7 +169,7 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { return err } if setupUserns { - err = SetupContainer(process) + err = setupContainer(process) if err == nil { os.Exit(0) } else { @@ -348,7 +348,6 @@ func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (e if config.WorkingDir == "" { config.WorkingDir = "/" } - if err := setupRlimits(config); err != nil { return fmt.Errorf("setup rlimits %s", err) } @@ -557,3 +556,117 @@ func joinExistingNamespaces(namespaces []configs.Namespace) error { } return nil } + +// setupContainer is run to setup mounts and networking related operations +// for a user namespace enabled process as a user namespace root doesn't +// have permissions to perform these operations. +// The setup process joins all the namespaces of user namespace enabled init +// except the user namespace, so it run as root in the root user namespace +// to perform these operations. +func setupContainer(process *processArgs) error { + container := process.Config + networkState := process.NetworkState + + // TODO : move to validation + /* + rootfs, err := utils.ResolveRootfs(container.Rootfs) + if err != nil { + return err + } + */ + + // clear the current processes env and replace it with the environment + // defined on the container + if err := loadContainerEnvironment(container); err != nil { + return err + } + + cloneFlags := container.Namespaces.CloneFlags() + if (cloneFlags & syscall.CLONE_NEWNET) == 0 { + if len(container.Networks) != 0 || len(container.Routes) != 0 { + return fmt.Errorf("unable to apply network parameters without network namespace") + } + } else { + if err := setupNetwork(container, networkState); err != nil { + return fmt.Errorf("setup networking %s", err) + } + if err := setupRoute(container); err != nil { + return fmt.Errorf("setup route %s", err) + } + } + + label.Init() + + // InitializeMountNamespace() can be executed only for a new mount namespace + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return fmt.Errorf("setup mount namespace %s", err) + } + } + return nil +} + +// Finalize entering into a container and execute a specified command +func initIn(pipe *os.File) (err error) { + defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. + if err != nil { + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } + } + // ensure that this pipe is always closed + pipe.Close() + }() + decoder := json.NewDecoder(pipe) + var config *configs.Config + if err := decoder.Decode(&config); err != nil { + return err + } + var process *processArgs + if err := decoder.Decode(&process); err != nil { + return err + } + if err := finalizeSetns(config); err != nil { + return err + } + if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { + return err + } + panic("unreachable") +} + +// finalize expects that the setns calls have been setup and that is has joined an +// existing namespace +func finalizeSetns(container *configs.Config) error { + // clear the current processes env and replace it with the environment defined on the container + if err := loadContainerEnvironment(container); err != nil { + return err + } + + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + + if err := finalizeNamespace(container); err != nil { + return err + } + + if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) + } + + if container.ProcessLabel != "" { + if err := label.SetProcessLabel(container.ProcessLabel); err != nil { + return err + } + } + + return nil +} diff --git a/linux_factory_test.go b/linux_factory_test.go index fd1e830a7..a5f0b7e1b 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -86,7 +86,7 @@ func TestFactoryLoadContainer(t *testing.T) { var ( id = "1" expectedConfig = &configs.Config{ - RootFs: "/mycontainer/root", + Rootfs: "/mycontainer/root", } expectedState = &configs.State{ InitPid: 1024, @@ -119,8 +119,8 @@ func TestFactoryLoadContainer(t *testing.T) { config := container.Config() - if config.RootFs != expectedConfig.RootFs { - t.Fatalf("expected rootfs %q but received %q", expectedConfig.RootFs, config.RootFs) + if config.Rootfs != expectedConfig.Rootfs { + t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs) } lcontainer, ok := container.(*linuxContainer) diff --git a/mount/init.go b/mount/init.go index 646970d56..a017ea519 100644 --- a/mount/init.go +++ b/mount/init.go @@ -33,11 +33,11 @@ func InitializeMountNamespace(config *configs.Config) (err error) { } // apply any user specified mounts within the new mount namespace for _, m := range config.Mounts { - if err := m.Mount(config.RootFs, config.MountLabel); err != nil { + if err := m.Mount(config.Rootfs, config.MountLabel); err != nil { return err } } - if err := createDeviceNodes(config); err != nil { + if err := createDevices(config); err != nil { return err } if err := setupPtmx(config); err != nil { @@ -51,21 +51,21 @@ func InitializeMountNamespace(config *configs.Config) (err error) { return err } } - if err := setupDevSymlinks(config.RootFs); err != nil { + if err := setupDevSymlinks(config.Rootfs); err != nil { return err } - if err := syscall.Chdir(config.RootFs); err != nil { + if err := syscall.Chdir(config.Rootfs); err != nil { return err } if config.NoPivotRoot { - err = msMoveRoot(config.RootFs) + err = msMoveRoot(config.Rootfs) } else { - err = pivotRoot(config.RootFs, config.PivotDir) + err = pivotRoot(config.Rootfs, config.PivotDir) } if err != nil { return err } - if config.ReadonlyFs { + if config.Readonlyfs { if err := setReadonly(); err != nil { return fmt.Errorf("set readonly %s", err) } @@ -77,7 +77,7 @@ func InitializeMountNamespace(config *configs.Config) (err error) { // mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts // inside the mount namespace func mountSystem(config *configs.Config) error { - for _, m := range newSystemMounts(config.RootFs, config.MountLabel, config.RestrictSys) { + for _, m := range newSystemMounts(config.Rootfs, config.MountLabel, config.RestrictSys) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } @@ -164,10 +164,10 @@ func reOpenDevNull(rootfs string) error { } // Create the device nodes in the container. -func createDeviceNodes(config *configs.Config) error { +func createDevices(config *configs.Config) error { oldMask := syscall.Umask(0000) - for _, node := range config.DeviceNodes { - if err := createDeviceNode(config.RootFs, node); err != nil { + for _, node := range config.Devices { + if err := createDeviceNode(config.Rootfs, node); err != nil { syscall.Umask(oldMask) return err } @@ -211,5 +211,5 @@ func prepareRoot(config *configs.Config) error { if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { return err } - return syscall.Mount(config.RootFs, config.RootFs, "bind", syscall.MS_BIND|syscall.MS_REC, "") + return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } diff --git a/mount/ptmx.go b/mount/ptmx.go index 657318dc4..278f72997 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -12,7 +12,7 @@ import ( ) func setupPtmx(config *configs.Config) error { - ptmx := filepath.Join(config.RootFs, "dev/ptmx") + ptmx := filepath.Join(config.Rootfs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } @@ -28,7 +28,7 @@ func setupPtmx(config *configs.Config) error { if err != nil { return err } - return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid) + return console.Setup(config.Rootfs, config.Console, config.MountLabel, uid, gid) } return nil } diff --git a/process.go b/process.go index ed228f791..c26b8bf59 100644 --- a/process.go +++ b/process.go @@ -2,19 +2,18 @@ package libcontainer import "io" -// Configuration for a process to be run inside a container. +// Process specifies the configuration and IO for a process inside +// a container. type Process struct { // The command to be run followed by any arguments. Args []string + // Stdin is a pointer to a reader which provides the standard input stream. + Stdin io.Reader + // Stdout is a pointer to a writer which receives the standard output stream. - // Stderr is a pointer to a writer which receives the standard error stream. - // - // If a reader or writer is nil, the input stream is assumed to be empty and the output is - // discarded. - // - // Stdout and Stderr may refer to the same writer in which case the output is interspersed. - Stdin io.Reader Stdout io.Writer + + // Stderr is a pointer to a writer which receives the standard error stream. Stderr io.Writer } diff --git a/sample_configs/apparmor.json b/sample_configs/apparmor.json index d044524fa..bc423f500 100644 --- a/sample_configs/apparmor.json +++ b/sample_configs/apparmor.json @@ -118,7 +118,7 @@ }, "restrict_sys": true, "apparmor_profile": "docker-default", - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/attach_to_bridge.json b/sample_configs/attach_to_bridge.json index 248a8a799..eb788691e 100644 --- a/sample_configs/attach_to_bridge.json +++ b/sample_configs/attach_to_bridge.json @@ -117,7 +117,7 @@ "parent": "docker" }, "restrict_sys": true, - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/host-pid.json b/sample_configs/host-pid.json index 61c3cf480..5ef8f78c1 100644 --- a/sample_configs/host-pid.json +++ b/sample_configs/host-pid.json @@ -117,7 +117,7 @@ "parent": "docker" }, "restrict_sys": true, - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/minimal.json b/sample_configs/minimal.json index 28b22cce4..d6ee044c5 100644 --- a/sample_configs/minimal.json +++ b/sample_configs/minimal.json @@ -117,7 +117,7 @@ "parent": "docker" }, "restrict_sys": true, - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/route_source_address_selection.json b/sample_configs/route_source_address_selection.json index b9c9ef7bd..2ade6d00d 100644 --- a/sample_configs/route_source_address_selection.json +++ b/sample_configs/route_source_address_selection.json @@ -117,7 +117,7 @@ "parent": "docker" }, "restrict_sys": true, - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/selinux.json b/sample_configs/selinux.json index a415c2574..4d6971a4d 100644 --- a/sample_configs/selinux.json +++ b/sample_configs/selinux.json @@ -119,7 +119,7 @@ "restrict_sys": true, "process_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", "mount_label": "system_u:system_r:svirt_lxc_net_t:s0:c164,c475", - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, diff --git a/sample_configs/userns.json b/sample_configs/userns.json index 1ebbad610..ad10b249d 100644 --- a/sample_configs/userns.json +++ b/sample_configs/userns.json @@ -117,7 +117,7 @@ "parent": "docker" }, "restrict_sys": true, - "device_nodes": [ + "devices": [ { "permissions": "rwm", "file_mode": 438, From 8850636eb3df747fc174c04fdba2a8bb447cf9be Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 6 Feb 2015 12:48:57 -0800 Subject: [PATCH 070/101] Refactor init actions into separate types Signed-off-by: Michael Crosby --- configs/validate/config.go | 75 +++++ error.go | 8 + linux_container.go | 93 ++++--- linux_factory.go | 517 ++--------------------------------- linux_init.go | 216 +++++++++++++++ linux_setns_init.go | 35 +++ linux_standard_init.go | 90 ++++++ linux_userns_init.go | 80 ++++++ linux_userns_sidecar_init.go | 37 +++ nsenter/nsexec.c | 11 +- stacktrace/capture.go | 2 - system/linux.go | 29 +- 12 files changed, 638 insertions(+), 555 deletions(-) create mode 100644 configs/validate/config.go create mode 100644 linux_init.go create mode 100644 linux_setns_init.go create mode 100644 linux_standard_init.go create mode 100644 linux_userns_init.go create mode 100644 linux_userns_sidecar_init.go diff --git a/configs/validate/config.go b/configs/validate/config.go new file mode 100644 index 000000000..6148e1eb9 --- /dev/null +++ b/configs/validate/config.go @@ -0,0 +1,75 @@ +package validate + +import ( + "fmt" + "path/filepath" + + "github.com/docker/libcontainer/configs" +) + +type Validator interface { + Validate(*configs.Config) error +} + +func New() Validator { + return &ConfigValidator{} +} + +type ConfigValidator struct { +} + +func (v *ConfigValidator) Validate(config *configs.Config) error { + if err := v.rootfs(config); err != nil { + return err + } + if err := v.network(config); err != nil { + return err + } + if err := v.hostname(config); err != nil { + return err + } + if err := v.security(config); err != nil { + return err + } + return nil +} + +// rootfs validates the the rootfs is an absolute path and is not a symlink +// to the container's root filesystem. +func (v *ConfigValidator) rootfs(config *configs.Config) error { + cleaned, err := filepath.Abs(config.Rootfs) + if err != nil { + return err + } + if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { + return err + } + if config.Rootfs != cleaned { + return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) + } + return nil +} + +func (v *ConfigValidator) network(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + if len(config.Networks) > 0 || len(config.Routes) > 0 { + return fmt.Errorf("unable to apply network settings without a private NET namespace") + } + } + return nil +} + +func (v *ConfigValidator) hostname(config *configs.Config) error { + if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return fmt.Errorf("unable to set hostname without a private UTS namespace") + } + return nil +} + +func (v *ConfigValidator) security(config *configs.Config) error { + // restrict sys without mount namespace + if config.RestrictSys && !config.Namespaces.Contains(configs.NEWNS) { + return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") + } + return nil +} diff --git a/error.go b/error.go index 062943a11..31ebb3207 100644 --- a/error.go +++ b/error.go @@ -54,3 +54,11 @@ type Error interface { // Returns the error code for this error. Code() ErrorCode } + +type initError struct { + Message string `json:"message,omitempty"` +} + +func (i initError) Error() string { + return i.Message +} diff --git a/linux_container.go b/linux_container.go index bf6b6a09b..5b8e18a69 100644 --- a/linux_container.go +++ b/linux_container.go @@ -22,12 +22,8 @@ const ( EXIT_SIGNAL_OFFSET = 128 ) -type initError struct { - Message string `json:"message,omitempty"` -} - -func (i initError) Error() string { - return i.Message +type pid struct { + Pid int `json:"Pid"` } type linuxContainer struct { @@ -97,6 +93,21 @@ func (c *linuxContainer) Start(process *Process) (int, error) { if err != nil { return -1, err } + cmd := c.commandTemplate(process) + if status != configs.Destroyed { + // TODO: (crosbymichael) check out console use for execin + return c.startNewProcess(cmd, process.Args) + //return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) + } + if err := c.startInitialProcess(cmd, process.Args); err != nil { + return -1, err + } + return c.state.InitPid, nil +} + +// commandTemplate creates a template *exec.Cmd. It uses the init arguments provided +// to the factory and attaches IO to the process. +func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) cmd.Stdin = process.Stdin cmd.Stdout = process.Stdout @@ -108,32 +119,26 @@ func (c *linuxContainer) Start(process *Process) (int, error) { } // TODO: add pdeath to config for a container cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - if status != configs.Destroyed { - glog.Info("start new container process") - // TODO: (crosbymichael) check out console use for execin - //return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) - return c.startNewProcess(cmd, process.Args) - } - if err := c.startInitProcess(cmd, process.Args); err != nil { - return -1, err - } - return c.state.InitPid, nil + return cmd } +// startNewProcess adds another process to an already running container func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) { - var err error + glog.Info("start new container process") parent, child, err := newInitPipe() if err != nil { return -1, err } defer parent.Close() cmd.ExtraFiles = []*os.File{child} - cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid)) - if err := cmd.Start(); err != nil { - child.Close() + cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid), "_LIBCONTAINER_INITTYPE=setns") + + // start the command + err = cmd.Start() + child.Close() + if err != nil { return -1, err } - child.Close() s, err := cmd.Process.Wait() if err != nil { return -1, err @@ -152,29 +157,28 @@ func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, err } terminate := func(terr error) (int, error) { // TODO: log the errors for kill and wait - p.Kill() - p.Wait() + if err := p.Kill(); err != nil { + glog.Warning(err) + } + if _, err := p.Wait(); err != nil { + glog.Warning(err) + } return -1, terr } - // Enter cgroups. if err := c.enterCgroups(pid.Pid); err != nil { return terminate(err) } - encoder := json.NewEncoder(parent) - if err := encoder.Encode(c.config); err != nil { - return terminate(err) - } - process := processArgs{ + if err := json.NewEncoder(parent).Encode(&initConfig{ Config: c.config, Args: args, - } - if err := encoder.Encode(process); err != nil { + }); err != nil { return terminate(err) } return pid.Pid, nil } -func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { +func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, args []string) error { + glog.Info("starting container initial process") // create a pipe so that we can syncronize with the namespaced process and // pass the state and configuration to the child process parent, child, err := newInitPipe() @@ -184,6 +188,9 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { defer parent.Close() cmd.ExtraFiles = []*os.File{child} cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags() + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard") + // if the container is configured to use user namespaces we have to setup the + // uid:gid mapping on the command. if c.config.Namespaces.Contains(configs.NEWUSER) { addUidGidMappings(cmd.SysProcAttr, c.config) // Default to root user when user namespaces are enabled. @@ -191,7 +198,6 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { cmd.SysProcAttr.Credential = &syscall.Credential{} } } - glog.Info("starting container init process") err = cmd.Start() child.Close() if err != nil { @@ -199,12 +205,15 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { } wait := func() (*os.ProcessState, error) { ps, err := cmd.Process.Wait() + if err != nil { + return nil, newGenericError(err, SystemError) + } // we should kill all processes in cgroup when init is died if we use // host PID namespace if !c.config.Namespaces.Contains(configs.NEWPID) { c.killAllPids() } - return ps, newGenericError(err, SystemError) + return ps, nil } terminate := func(terr error) error { // TODO: log the errors for kill and wait @@ -230,19 +239,19 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil { return terminate(err) } - process := processArgs{ + iconfig := &initConfig{ Args: args, Config: c.config, NetworkState: &networkState, } // Start the setup process to setup the init process if c.config.Namespaces.Contains(configs.NEWUSER) { - if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, &process, &networkState); err != nil { + if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil { return terminate(err) } } // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { + if err := json.NewEncoder(parent).Encode(iconfig); err != nil { return terminate(err) } // shutdown writes for the parent side of the pipe @@ -258,12 +267,10 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, args []string) error { if ierr != nil { return terminate(ierr) } - c.state.InitPid = cmd.Process.Pid c.state.InitStartTime = started c.state.NetworkState = networkState c.state.CgroupPaths = c.cgroupManager.GetPaths() - return nil } @@ -386,7 +393,7 @@ func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.N return nil } -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error { command := exec.Command(args[0], args[1:]...) parent, child, err := newInitPipe() if err != nil { @@ -397,7 +404,7 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process command.Dir = container.Rootfs command.Env = append(command.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), - fmt.Sprintf("_LIBCONTAINER_USERNS=1")) + fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_sidecar")) err = command.Start() child.Close() if err != nil { @@ -452,10 +459,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process return nil } -type pid struct { - Pid int `json:"Pid"` -} - func (c *linuxContainer) enterCgroups(pid int) error { return cgroups.EnterPid(c.state.CgroupPaths, pid) } diff --git a/linux_factory.go b/linux_factory.go index 02456c123..3bb1659a9 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -9,24 +9,12 @@ import ( "os" "path/filepath" "regexp" - "strings" - "syscall" "github.com/golang/glog" - "github.com/docker/libcontainer/apparmor" cgroups "github.com/docker/libcontainer/cgroups/manager" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/console" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/security/capabilities" - "github.com/docker/libcontainer/security/restrict" - "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/user" - "github.com/docker/libcontainer/utils" + "github.com/docker/libcontainer/configs/validate" ) const ( @@ -39,13 +27,6 @@ var ( maxIdLen = 1024 ) -// Process is used for transferring parameters from Exec() to Init() -type processArgs struct { - Args []string `json:"args,omitempty"` - Config *configs.Config `json:"config,omitempty"` - NetworkState *configs.NetworkState `json:"network_state,omitempty"` -} - // New returns a linux based container factory based in the root directory. func New(root string, initArgs []string) (Factory, error) { if root != "" { @@ -54,16 +35,18 @@ func New(root string, initArgs []string) (Factory, error) { } } return &linuxFactory{ - root: root, - initArgs: initArgs, + root: root, + initArgs: initArgs, + validator: validate.New(), }, nil } // linuxFactory implements the default factory interface for linux based systems. type linuxFactory struct { // root is the root directory - root string - initArgs []string + root string + initArgs []string + validator validate.Validator } func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) { @@ -73,6 +56,9 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err if err := l.validateID(id); err != nil { return nil, err } + if err := l.validator.Validate(config); err != nil { + return nil, newGenericError(err, ConfigInvalid) + } containerRoot := filepath.Join(l.root, id) if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) @@ -96,14 +82,13 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err os.RemoveAll(containerRoot) return nil, newGenericError(err, SystemError) } - cgroupManager := cgroups.NewCgroupManager(config.Cgroups) return &linuxContainer{ id: id, root: containerRoot, config: config, initArgs: l.initArgs, state: &configs.State{}, - cgroupManager: cgroupManager, + cgroupManager: cgroups.NewCgroupManager(config.Cgroups), }, nil } @@ -137,12 +122,13 @@ func (l *linuxFactory) Load(id string) (Container, error) { // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { - pipe := os.NewFile(uintptr(pipefd), "pipe") - setupUserns := os.Getenv("_LIBCONTAINER_USERNS") != "" - pid := os.Getenv("_LIBCONTAINER_INITPID") - if pid != "" && !setupUserns { - return initIn(pipe) - } + var ( + pipe = os.NewFile(uintptr(pipefd), "pipe") + it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) + ) + // clear the current process's environment to clean any libcontainer + // specific env vars. + os.Clearenv() defer func() { // if we have an error during the initialization of the container's init then send it back to the // parent process in the form of an initError. @@ -159,27 +145,11 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { // ensure that this pipe is always closed pipe.Close() }() - uncleanRootfs, err := os.Getwd() + i, err := newContainerInit(it, pipe) if err != nil { return err } - var process *processArgs - // We always read this as it is a way to sync with the parent as well - if err := json.NewDecoder(pipe).Decode(&process); err != nil { - return err - } - if setupUserns { - err = setupContainer(process) - if err == nil { - os.Exit(0) - } else { - os.Exit(1) - } - } - if process.Config.Namespaces.Contains(configs.NEWUSER) { - return l.initUserNs(uncleanRootfs, process) - } - return l.initDefault(uncleanRootfs, process) + return i.Init() } func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { @@ -223,450 +193,3 @@ func (l *linuxFactory) validateID(id string) error { } return nil } - -func (l *linuxFactory) initDefault(uncleanRootfs string, process *processArgs) (err error) { - config := process.Config - networkState := process.NetworkState - - // TODO: move to validation - /* - rootfs, err := utils.ResolveRootfs(uncleanRootfs) - if err != nil { - return err - } - */ - - // clear the current processes env and replace it with the environment - // defined on the container - if err := loadContainerEnvironment(config); err != nil { - return err - } - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(config.Namespaces); err != nil { - return err - } - if config.Console != "" { - if err := console.OpenAndDup(config.Console); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if config.Console != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - - cloneFlags := config.Namespaces.CloneFlags() - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(config.Networks) != 0 || len(config.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(config, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(config); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - if err := setupRlimits(config); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - label.Init() - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) != 0 { - if err := mount.InitializeMountNamespace(config); err != nil { - return err - } - } - if config.Hostname != "" { - // TODO: (crosbymichael) move this to pre spawn validation - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) - } - } - if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) - } - if err := label.SetProcessLabel(config.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - // TODO: (crosbymichael) make this configurable at the Config level - if config.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - if err := finalizeNamespace(config); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - // finalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := restoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - return system.Execv(process.Args[0], process.Args[0:], config.Env) -} - -func (l *linuxFactory) initUserNs(uncleanRootfs string, process *processArgs) (err error) { - config := process.Config - // clear the current processes env and replace it with the environment - // defined on the config - if err := loadContainerEnvironment(config); err != nil { - return err - } - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(config.Namespaces); err != nil { - return err - } - if config.Console != "" { - if err := console.OpenAndDup("/dev/console"); err != nil { - return err - } - } - if _, err := syscall.Setsid(); err != nil { - return fmt.Errorf("setsid %s", err) - } - if config.Console != "" { - if err := system.Setctty(); err != nil { - return fmt.Errorf("setctty %s", err) - } - } - if config.WorkingDir == "" { - config.WorkingDir = "/" - } - if err := setupRlimits(config); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - cloneFlags := config.Namespaces.CloneFlags() - if config.Hostname != "" { - // TODO: move validation - if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { - return fmt.Errorf("unable to set the hostname without UTS namespace") - } - if err := syscall.Sethostname([]byte(config.Hostname)); err != nil { - return fmt.Errorf("unable to sethostname %q: %s", config.Hostname, err) - } - } - if err := apparmor.ApplyProfile(config.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", config.AppArmorProfile, err) - } - if err := label.SetProcessLabel(config.ProcessLabel); err != nil { - return fmt.Errorf("set process label %s", err) - } - if config.RestrictSys { - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - return fmt.Errorf("unable to restrict access to kernel files without mount namespace") - } - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { - return err - } - } - pdeathSignal, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - if err := finalizeNamespace(config); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } - // finalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := restoreParentDeathSignal(pdeathSignal); err != nil { - return fmt.Errorf("restore parent death signal %s", err) - } - return system.Execv(process.Args[0], process.Args[0:], config.Env) -} - -// restoreParentDeathSignal sets the parent death signal to old. -func restoreParentDeathSignal(old int) error { - if old == 0 { - return nil - } - current, err := system.GetParentDeathSignal() - if err != nil { - return fmt.Errorf("get parent death signal %s", err) - } - if old == current { - return nil - } - if err := system.ParentDeathSignal(uintptr(old)); err != nil { - return fmt.Errorf("set parent death signal %s", err) - } - // Signal self if parent is already dead. Does nothing if running in a new - // PID namespace, as Getppid will always return 0. - if syscall.Getppid() == 1 { - return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) - } - return nil -} - -// setupUser changes the groups, gid, and uid for the user inside the container -func setupUser(config *configs.Config) error { - // Set up defaults. - defaultExecUser := user.ExecUser{ - Uid: syscall.Getuid(), - Gid: syscall.Getgid(), - Home: "/", - } - passwdPath, err := user.GetPasswdPath() - if err != nil { - return err - } - groupPath, err := user.GetGroupPath() - if err != nil { - return err - } - execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) - if err != nil { - return fmt.Errorf("get supplementary groups %s", err) - } - suppGroups := append(execUser.Sgids, config.AdditionalGroups...) - if err := syscall.Setgroups(suppGroups); err != nil { - return fmt.Errorf("setgroups %s", err) - } - if err := system.Setgid(execUser.Gid); err != nil { - return fmt.Errorf("setgid %s", err) - } - if err := system.Setuid(execUser.Uid); err != nil { - return fmt.Errorf("setuid %s", err) - } - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return fmt.Errorf("set HOME %s", err) - } - } - return nil -} - -// setupVethNetwork uses the Network config if it is not nil to initialize -// the new veth interface inside the container for use by changing the name to eth0 -// setting the MTU and IP address along with the default gateway -func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { - for _, config := range config.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - err1 := strategy.Initialize(config, networkState) - if err1 != nil { - return err1 - } - } - return nil -} - -func setupRoute(config *configs.Config) error { - for _, config := range config.Routes { - if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { - return err - } - } - return nil -} - -func setupRlimits(config *configs.Config) error { - for _, rlimit := range config.Rlimits { - l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} - if err := syscall.Setrlimit(rlimit.Type, l); err != nil { - return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) - } - } - return nil -} - -// finalizeNamespace drops the caps, sets the correct user -// and working dir, and closes any leaky file descriptors -// before execing the command inside the namespace -func finalizeNamespace(config *configs.Config) error { - // Ensure that all non-standard fds we may have accidentally - // inherited are marked close-on-exec so they stay out of the - // container - if err := utils.CloseExecFrom(3); err != nil { - return fmt.Errorf("close open file descriptors %s", err) - } - // drop capabilities in bounding set before changing user - if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { - return fmt.Errorf("drop bounding set %s", err) - } - // preserve existing capabilities while we change users - if err := system.SetKeepCaps(); err != nil { - return fmt.Errorf("set keep caps %s", err) - } - if err := setupUser(config); err != nil { - return fmt.Errorf("setup user %s", err) - } - if err := system.ClearKeepCaps(); err != nil { - return fmt.Errorf("clear keep caps %s", err) - } - // drop all other capabilities - if err := capabilities.DropCapabilities(config.Capabilities); err != nil { - return fmt.Errorf("drop capabilities %s", err) - } - if config.WorkingDir != "" { - if err := syscall.Chdir(config.WorkingDir); err != nil { - return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) - } - } - return nil -} - -func loadContainerEnvironment(config *configs.Config) error { - os.Clearenv() - for _, pair := range config.Env { - p := strings.SplitN(pair, "=", 2) - if len(p) < 2 { - return fmt.Errorf("invalid environment '%v'", pair) - } - if err := os.Setenv(p[0], p[1]); err != nil { - return err - } - } - return nil -} - -// joinExistingNamespaces gets all the namespace paths specified for the container and -// does a setns on the namespace fd so that the current process joins the namespace. -func joinExistingNamespaces(namespaces []configs.Namespace) error { - for _, ns := range namespaces { - if ns.Path != "" { - f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) - if err != nil { - return err - } - err = system.Setns(f.Fd(), uintptr(ns.Syscall())) - f.Close() - if err != nil { - return err - } - } - } - return nil -} - -// setupContainer is run to setup mounts and networking related operations -// for a user namespace enabled process as a user namespace root doesn't -// have permissions to perform these operations. -// The setup process joins all the namespaces of user namespace enabled init -// except the user namespace, so it run as root in the root user namespace -// to perform these operations. -func setupContainer(process *processArgs) error { - container := process.Config - networkState := process.NetworkState - - // TODO : move to validation - /* - rootfs, err := utils.ResolveRootfs(container.Rootfs) - if err != nil { - return err - } - */ - - // clear the current processes env and replace it with the environment - // defined on the container - if err := loadContainerEnvironment(container); err != nil { - return err - } - - cloneFlags := container.Namespaces.CloneFlags() - if (cloneFlags & syscall.CLONE_NEWNET) == 0 { - if len(container.Networks) != 0 || len(container.Routes) != 0 { - return fmt.Errorf("unable to apply network parameters without network namespace") - } - } else { - if err := setupNetwork(container, networkState); err != nil { - return fmt.Errorf("setup networking %s", err) - } - if err := setupRoute(container); err != nil { - return fmt.Errorf("setup route %s", err) - } - } - - label.Init() - - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) != 0 { - if err := mount.InitializeMountNamespace(container); err != nil { - return fmt.Errorf("setup mount namespace %s", err) - } - } - return nil -} - -// Finalize entering into a container and execute a specified command -func initIn(pipe *os.File) (err error) { - defer func() { - // if we have an error during the initialization of the container's init then send it back to the - // parent process in the form of an initError. - if err != nil { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { - panic(err) - } - } - // ensure that this pipe is always closed - pipe.Close() - }() - decoder := json.NewDecoder(pipe) - var config *configs.Config - if err := decoder.Decode(&config); err != nil { - return err - } - var process *processArgs - if err := decoder.Decode(&process); err != nil { - return err - } - if err := finalizeSetns(config); err != nil { - return err - } - if err := system.Execv(process.Args[0], process.Args[0:], config.Env); err != nil { - return err - } - panic("unreachable") -} - -// finalize expects that the setns calls have been setup and that is has joined an -// existing namespace -func finalizeSetns(container *configs.Config) error { - // clear the current processes env and replace it with the environment defined on the container - if err := loadContainerEnvironment(container); err != nil { - return err - } - - if err := setupRlimits(container); err != nil { - return fmt.Errorf("setup rlimits %s", err) - } - - if err := finalizeNamespace(container); err != nil { - return err - } - - if err := apparmor.ApplyProfile(container.AppArmorProfile); err != nil { - return fmt.Errorf("set apparmor profile %s: %s", container.AppArmorProfile, err) - } - - if container.ProcessLabel != "" { - if err := label.SetProcessLabel(container.ProcessLabel); err != nil { - return err - } - } - - return nil -} diff --git a/linux_init.go b/linux_init.go new file mode 100644 index 000000000..b35257d0d --- /dev/null +++ b/linux_init.go @@ -0,0 +1,216 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "os" + "strings" + "syscall" + + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/security/capabilities" + "github.com/docker/libcontainer/system" + "github.com/docker/libcontainer/user" + "github.com/docker/libcontainer/utils" +) + +type initType string + +const ( + initSetns initType = "setns" + initStandard initType = "standard" + initUserns initType = "userns" + initUsernsSideCar initType = "userns_sidecar" +) + +// Process is used for transferring parameters from Exec() to Init() +type initConfig struct { + Args []string `json:"args,omitempty"` + Config *configs.Config `json:"config,omitempty"` + NetworkState *configs.NetworkState `json:"network_state,omitempty"` +} + +type initer interface { + Init() error +} + +func newContainerInit(t initType, pipe *os.File) (initer, error) { + var config *initConfig + if err := json.NewDecoder(pipe).Decode(&config); err != nil { + return nil, err + } + if err := populateProcessEnvironment(config.Config.Env); err != nil { + return nil, err + } + switch t { + case initSetns: + return &linuxSetnsInit{ + args: config.Args, + config: config.Config, + }, nil + case initUserns: + return &linuxUsernsInit{ + args: config.Args, + config: config.Config, + }, nil + case initUsernsSideCar: + return &linuxUsernsSideCar{ + config: config.Config, + network: config.NetworkState, + }, nil + case initStandard: + return &linuxStandardInit{ + config: config, + }, nil + } + return nil, fmt.Errorf("unknown init type %q", t) +} + +// populateProcessEnvironment loads the provided environment variables into the +// current processes's environment. +func populateProcessEnvironment(env []string) error { + for _, pair := range env { + p := strings.SplitN(pair, "=", 2) + if len(p) < 2 { + return fmt.Errorf("invalid environment '%v'", pair) + } + if err := os.Setenv(p[0], p[1]); err != nil { + return err + } + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaky file descriptors +// before execing the command inside the namespace +func finalizeNamespace(config *configs.Config) error { + // Ensure that all non-standard fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(3); err != nil { + return err + } + // drop capabilities in bounding set before changing user + if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { + return err + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return err + } + if err := setupUser(config); err != nil { + return err + } + if err := system.ClearKeepCaps(); err != nil { + return err + } + // drop all other capabilities + if err := capabilities.DropCapabilities(config.Capabilities); err != nil { + return err + } + if config.WorkingDir != "" { + if err := syscall.Chdir(config.WorkingDir); err != nil { + return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) + } + } + return nil +} + +// joinExistingNamespaces gets all the namespace paths specified for the container and +// does a setns on the namespace fd so that the current process joins the namespace. +func joinExistingNamespaces(namespaces []configs.Namespace) error { + for _, ns := range namespaces { + if ns.Path != "" { + f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) + if err != nil { + return err + } + err = system.Setns(f.Fd(), uintptr(ns.Syscall())) + f.Close() + if err != nil { + return err + } + } + } + return nil +} + +// setupUser changes the groups, gid, and uid for the user inside the container +func setupUser(config *configs.Config) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: syscall.Getuid(), + Gid: syscall.Getgid(), + Home: "/", + } + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return fmt.Errorf("get supplementary groups %s", err) + } + suppGroups := append(execUser.Sgids, config.AdditionalGroups...) + if err := syscall.Setgroups(suppGroups); err != nil { + return fmt.Errorf("setgroups %s", err) + } + if err := system.Setgid(execUser.Gid); err != nil { + return fmt.Errorf("setgid %s", err) + } + if err := system.Setuid(execUser.Uid); err != nil { + return fmt.Errorf("setuid %s", err) + } + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return fmt.Errorf("set HOME %s", err) + } + } + return nil +} + +// setupVethNetwork uses the Network config if it is not nil to initialize +// the new veth interface inside the container for use by changing the name to eth0 +// setting the MTU and IP address along with the default gateway +func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { + for _, config := range config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + err1 := strategy.Initialize(config, networkState) + if err1 != nil { + return err1 + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil { + return err + } + } + return nil +} + +func setupRlimits(config *configs.Config) error { + for _, rlimit := range config.Rlimits { + l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} + if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} diff --git a/linux_setns_init.go b/linux_setns_init.go new file mode 100644 index 000000000..356e41b09 --- /dev/null +++ b/linux_setns_init.go @@ -0,0 +1,35 @@ +// +build linux + +package libcontainer + +import ( + "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/system" +) + +// linuxSetnsInit performs the container's initialization for running a new process +// inside an existing container. +type linuxSetnsInit struct { + args []string + config *configs.Config +} + +func (l *linuxSetnsInit) Init() error { + if err := setupRlimits(l.config); err != nil { + return err + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + if l.config.ProcessLabel != "" { + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return err + } + } + return system.Execv(l.args[0], l.args[0:], l.config.Env) +} diff --git a/linux_standard_init.go b/linux_standard_init.go new file mode 100644 index 000000000..6c0a4661f --- /dev/null +++ b/linux_standard_init.go @@ -0,0 +1,90 @@ +// +build linux + +package libcontainer + +import ( + "syscall" + + "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" + consolepkg "github.com/docker/libcontainer/console" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" + "github.com/docker/libcontainer/security/restrict" + "github.com/docker/libcontainer/system" +) + +type linuxStandardInit struct { + config *initConfig +} + +func (l *linuxStandardInit) Init() error { + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { + return err + } + console := l.config.Config.Console + if console != "" { + if err := consolepkg.OpenAndDup(console); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return err + } + if console != "" { + if err := system.Setctty(); err != nil { + return err + } + } + if err := setupNetwork(l.config.Config, l.config.NetworkState); err != nil { + return err + } + if err := setupRoute(l.config.Config); err != nil { + return err + } + if err := setupRlimits(l.config.Config); err != nil { + return err + } + label.Init() + // InitializeMountNamespace() can be executed only for a new mount namespace + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := mount.InitializeMountNamespace(l.config.Config); err != nil { + return err + } + } + if hostname := l.config.Config.Hostname; hostname != "" { + if err := syscall.Sethostname([]byte(hostname)); err != nil { + return err + } + } + if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { + return err + } + if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { + return err + } + if l.config.Config.RestrictSys { + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { + return err + } + if err := finalizeNamespace(l.config.Config); err != nil { + return err + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := pdeath.Restore(); err != nil { + return err + } + // Signal self if parent is already dead. Does nothing if running in a new + // PID namespace, as Getppid will always return 0. + if syscall.Getppid() == 1 { + return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) + } + return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Config.Env) +} diff --git a/linux_userns_init.go b/linux_userns_init.go new file mode 100644 index 000000000..cee2ebb5f --- /dev/null +++ b/linux_userns_init.go @@ -0,0 +1,80 @@ +// +build linux + +package libcontainer + +import ( + "syscall" + + "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" + consolepkg "github.com/docker/libcontainer/console" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/security/restrict" + "github.com/docker/libcontainer/system" +) + +type linuxUsernsInit struct { + args []string + config *configs.Config +} + +func (l *linuxUsernsInit) Init() error { + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(l.config.Namespaces); err != nil { + return err + } + console := l.config.Console + if console != "" { + if err := consolepkg.OpenAndDup("/dev/console"); err != nil { + return err + } + } + if _, err := syscall.Setsid(); err != nil { + return err + } + if console != "" { + if err := system.Setctty(); err != nil { + return err + } + } + if l.config.WorkingDir == "" { + l.config.WorkingDir = "/" + } + if err := setupRlimits(l.config); err != nil { + return err + } + if hostname := l.config.Hostname; hostname != "" { + if err := syscall.Sethostname([]byte(hostname)); err != nil { + return err + } + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return err + } + if l.config.RestrictSys { + if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + return err + } + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { + return err + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := pdeath.Restore(); err != nil { + return err + } + // Signal self if parent is already dead. Does nothing if running in a new + // PID namespace, as Getppid will always return 0. + if syscall.Getppid() == 1 { + return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) + } + return system.Execv(l.args[0], l.args[0:], l.config.Env) +} diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go new file mode 100644 index 000000000..047a5b7f9 --- /dev/null +++ b/linux_userns_sidecar_init.go @@ -0,0 +1,37 @@ +// +build linux + +package libcontainer + +import ( + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/label" + "github.com/docker/libcontainer/mount" +) + +// linuxUsernsSideCar is run to setup mounts and networking related operations +// for a user namespace enabled process as a user namespace root doesn't +// have permissions to perform these operations. +// The setup process joins all the namespaces of user namespace enabled init +// except the user namespace, so it run as root in the root user namespace +// to perform these operations. +type linuxUsernsSideCar struct { + config *configs.Config + network *configs.NetworkState +} + +func (l *linuxUsernsSideCar) Init() error { + if err := setupNetwork(l.config, l.network); err != nil { + return err + } + if err := setupRoute(l.config); err != nil { + return err + } + label.Init() + // InitializeMountNamespace() can be executed only for a new mount namespace + if l.config.Namespaces.Contains(configs.NEWNET) { + if err := mount.InitializeMountNamespace(l.config); err != nil { + return err + } + } + return nil +} diff --git a/nsenter/nsexec.c b/nsenter/nsexec.c index 426dfc575..5b62729a3 100644 --- a/nsenter/nsexec.c +++ b/nsenter/nsexec.c @@ -22,14 +22,14 @@ struct clone_arg { * Reserve some space for clone() to locate arguments * and retcode in this place */ - char stack[4096] __attribute__((aligned (8))); + char stack[4096] __attribute__ ((aligned(8))); char stack_ptr[0]; jmp_buf *env; }; static int child_func(void *_arg) { - struct clone_arg *arg = (struct clone_arg *) _arg; + struct clone_arg *arg = (struct clone_arg *)_arg; longjmp(*arg->env, 1); } @@ -47,8 +47,8 @@ int setns(int fd, int nstype) #endif #endif -static int clone_parent(jmp_buf *env) __attribute__ ((noinline)); -static int clone_parent(jmp_buf *env) +static int clone_parent(jmp_buf * env) __attribute__ ((noinline)); +static int clone_parent(jmp_buf * env) { struct clone_arg ca; int child; @@ -100,7 +100,8 @@ void nsexec() fd = openat(tfd, namespaces[i], O_RDONLY); if (fd == -1) { - pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]); + pr_perror("Failed to open ns file %s for ns %s", buf, + namespaces[i]); exit(1); } // Set the namespace. diff --git a/stacktrace/capture.go b/stacktrace/capture.go index 9fc75f8a0..15b3482cc 100644 --- a/stacktrace/capture.go +++ b/stacktrace/capture.go @@ -10,7 +10,6 @@ func Capture(userSkip int) Stacktrace { skip = userSkip + 1 // add one for our own function frames []Frame ) - for i := skip; ; i++ { pc, file, line, ok := runtime.Caller(i) if !ok { @@ -18,7 +17,6 @@ func Capture(userSkip int) Stacktrace { } frames = append(frames, NewFrame(pc, file, line)) } - return Stacktrace{ Frames: frames, } diff --git a/system/linux.go b/system/linux.go index c07ef1532..2cc3ef803 100644 --- a/system/linux.go +++ b/system/linux.go @@ -8,6 +8,26 @@ import ( "unsafe" ) +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + func Execv(cmd string, args []string, env []string) error { name, err := exec.LookPath(cmd) if err != nil { @@ -17,23 +37,20 @@ func Execv(cmd string, args []string, env []string) error { return syscall.Exec(name, args, env) } -func ParentDeathSignal(sig uintptr) error { +func SetParentDeathSignal(sig uintptr) error { if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { return err } return nil } -func GetParentDeathSignal() (int, error) { +func GetParentDeathSignal() (ParentDeathSignal, error) { var sig int - _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) - if err != 0 { return -1, err } - - return sig, nil + return ParentDeathSignal(sig), nil } func SetKeepCaps() error { From 58023ad32f99366c05b1fb1168b72464bc4f4fe5 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 6 Feb 2015 18:50:11 -0800 Subject: [PATCH 071/101] Add parent death signal to the container config Signed-off-by: Michael Crosby --- configs/config.go | 4 ++++ linux_container.go | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/configs/config.go b/configs/config.go index 0f939ade5..bf7daea0b 100644 --- a/configs/config.go +++ b/configs/config.go @@ -21,6 +21,10 @@ type Config struct { // This is a common option when the container is running in ramdisk NoPivotRoot bool `json:"no_pivot_root,omitempty"` + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal,omitempty"` + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. diff --git a/linux_container.go b/linux_container.go index 5b8e18a69..d30b5ba5b 100644 --- a/linux_container.go +++ b/linux_container.go @@ -117,8 +117,7 @@ func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } - // TODO: add pdeath to config for a container - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) return cmd } From 21bb5ccc4f112c486029bbfbf4aa27818684545c Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 6 Feb 2015 19:16:11 -0800 Subject: [PATCH 072/101] Move environment configuration to Process Signed-off-by: Michael Crosby --- configs/config.go | 5 ----- integration/exec_test.go | 2 ++ integration/template_test.go | 17 ++++++++-------- integration/utils_test.go | 1 + linux_container.go | 39 ++++++++++++++++++------------------ linux_init.go | 6 +++++- linux_setns_init.go | 3 ++- linux_standard_init.go | 3 ++- linux_userns_init.go | 3 ++- process.go | 10 ++++++++- utils/utils.go | 13 ++++++++++++ 11 files changed, 64 insertions(+), 38 deletions(-) diff --git a/configs/config.go b/configs/config.go index bf7daea0b..ca433b288 100644 --- a/configs/config.go +++ b/configs/config.go @@ -55,11 +55,6 @@ type Config struct { // WorkingDir will change the processes current working directory inside the container's rootfs WorkingDir string `json:"working_dir,omitempty"` - // Env will populate the processes environment with the provided values - // Any values from the parent processes will be cleared before the values - // provided in Env are provided to the process - Env []string `json:"environment,omitempty"` - // Console is the path to the console allocated to the container. Console string `json:"console,omitempty"` diff --git a/integration/exec_test.go b/integration/exec_test.go index 0f93957c7..6d68b4bab 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -261,6 +261,7 @@ func TestEnter(t *testing.T) { pconfig := libcontainer.Process{ Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, + Env: standardEnvironment, Stdin: stdinR, Stdout: &stdout, } @@ -361,6 +362,7 @@ func TestFreeze(t *testing.T) { pconfig := libcontainer.Process{ Args: []string{"cat"}, + Env: standardEnvironment, Stdin: stdinR, } pid, err := container.Start(&pconfig) diff --git a/integration/template_test.go b/integration/template_test.go index 28160c986..1e7c418f0 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -6,6 +6,13 @@ import ( "github.com/docker/libcontainer/configs" ) +var standardEnvironment = []string{ + "HOME=/root", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=integration", + "TERM=xterm", +} + // newTemplateConfig returns a base template for running a container // // it uses a network strategy of just setting a loopback interface @@ -43,14 +50,8 @@ func newTemplateConfig(rootfs string) *configs.Config { AllowedDevices: configs.DefaultAllowedDevices, }, - Devices: configs.DefaultAutoCreatedDevices, - Hostname: "integration", - Env: []string{ - "HOME=/root", - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "HOSTNAME=integration", - "TERM=xterm", - }, + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "integration", Networks: []*configs.Network{ { Type: "loopback", diff --git a/integration/utils_test.go b/integration/utils_test.go index 75cba52b1..e3fdb779a 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -93,6 +93,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe process := &libcontainer.Process{ Args: args, + Env: standardEnvironment, Stdin: buffers.Stdin, Stdout: buffers.Stdout, Stderr: buffers.Stderr, diff --git a/linux_container.go b/linux_container.go index d30b5ba5b..7e73b0573 100644 --- a/linux_container.go +++ b/linux_container.go @@ -18,10 +18,6 @@ import ( "github.com/golang/glog" ) -const ( - EXIT_SIGNAL_OFFSET = 128 -) - type pid struct { Pid int `json:"Pid"` } @@ -50,8 +46,7 @@ func (c *linuxContainer) Status() (configs.Status, error) { return configs.Destroyed, nil } // return Running if the init process is alive - err := syscall.Kill(c.state.InitPid, 0) - if err != nil { + if err := syscall.Kill(c.state.InitPid, 0); err != nil { if err == syscall.ESRCH { return configs.Destroyed, nil } @@ -96,10 +91,9 @@ func (c *linuxContainer) Start(process *Process) (int, error) { cmd := c.commandTemplate(process) if status != configs.Destroyed { // TODO: (crosbymichael) check out console use for execin - return c.startNewProcess(cmd, process.Args) - //return namespaces.ExecIn(process.Args, c.config.Env, "", cmd, c.config, c.state) + return c.startNewProcess(cmd, process) } - if err := c.startInitialProcess(cmd, process.Args); err != nil { + if err := c.startInitialProcess(cmd, process); err != nil { return -1, err } return c.state.InitPid, nil @@ -112,7 +106,7 @@ func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { cmd.Stdin = process.Stdin cmd.Stdout = process.Stdout cmd.Stderr = process.Stderr - cmd.Env = c.config.Env + cmd.Env = process.Env cmd.Dir = c.config.Rootfs if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} @@ -122,9 +116,9 @@ func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { } // startNewProcess adds another process to an already running container -func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, error) { +func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, process *Process) (int, error) { glog.Info("start new container process") - parent, child, err := newInitPipe() + parent, child, err := c.newInitPipe() if err != nil { return -1, err } @@ -169,18 +163,20 @@ func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, args []string) (int, err } if err := json.NewEncoder(parent).Encode(&initConfig{ Config: c.config, - Args: args, + Args: process.Args, + Env: process.Env, }); err != nil { return terminate(err) } return pid.Pid, nil } -func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, args []string) error { +// startInitialProcess starts PID 1 for the container. +func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, process *Process) error { glog.Info("starting container initial process") // create a pipe so that we can syncronize with the namespaced process and // pass the state and configuration to the child process - parent, child, err := newInitPipe() + parent, child, err := c.newInitPipe() if err != nil { return err } @@ -239,13 +235,14 @@ func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, args []string) error return terminate(err) } iconfig := &initConfig{ - Args: args, + Args: process.Args, Config: c.config, + Env: process.Env, NetworkState: &networkState, } // Start the setup process to setup the init process if c.config.Namespaces.Contains(configs.NEWUSER) { - if err = executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil { + if err = c.executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil { return terminate(err) } } @@ -281,6 +278,7 @@ func (c *linuxContainer) Destroy() error { if status != configs.Destroyed { return newGenericError(nil, ContainerNotStopped) } + // TODO: remove cgroups return os.RemoveAll(c.root) } @@ -297,6 +295,7 @@ func (c *linuxContainer) Signal(signal os.Signal) error { panic("not implemented") } +// TODO: rename to be more descriptive func (c *linuxContainer) OOM() (<-chan struct{}, error) { return NotifyOnOOM(c.state) } @@ -322,7 +321,7 @@ func (c *linuxContainer) updateStateFile() error { } // New returns a newly initialized Pipe for communication between processes -func newInitPipe() (parent *os.File, child *os.File, err error) { +func (c *linuxContainer) newInitPipe() (parent *os.File, child *os.File, err error) { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { return nil, nil, err @@ -392,9 +391,9 @@ func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.N return nil } -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error { +func (c *linuxContainer) executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error { command := exec.Command(args[0], args[1:]...) - parent, child, err := newInitPipe() + parent, child, err := c.newInitPipe() if err != nil { return err } diff --git a/linux_init.go b/linux_init.go index b35257d0d..daed34e92 100644 --- a/linux_init.go +++ b/linux_init.go @@ -30,6 +30,7 @@ const ( // Process is used for transferring parameters from Exec() to Init() type initConfig struct { Args []string `json:"args,omitempty"` + Env []string `json:"env,omitempty"` Config *configs.Config `json:"config,omitempty"` NetworkState *configs.NetworkState `json:"network_state,omitempty"` } @@ -43,18 +44,20 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) { if err := json.NewDecoder(pipe).Decode(&config); err != nil { return nil, err } - if err := populateProcessEnvironment(config.Config.Env); err != nil { + if err := populateProcessEnvironment(config.Env); err != nil { return nil, err } switch t { case initSetns: return &linuxSetnsInit{ args: config.Args, + env: config.Env, config: config.Config, }, nil case initUserns: return &linuxUsernsInit{ args: config.Args, + env: config.Env, config: config.Config, }, nil case initUsernsSideCar: @@ -65,6 +68,7 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) { case initStandard: return &linuxStandardInit{ config: config, + env: config.Env, }, nil } return nil, fmt.Errorf("unknown init type %q", t) diff --git a/linux_setns_init.go b/linux_setns_init.go index 356e41b09..a9a1d27cd 100644 --- a/linux_setns_init.go +++ b/linux_setns_init.go @@ -13,6 +13,7 @@ import ( // inside an existing container. type linuxSetnsInit struct { args []string + env []string config *configs.Config } @@ -31,5 +32,5 @@ func (l *linuxSetnsInit) Init() error { return err } } - return system.Execv(l.args[0], l.args[0:], l.config.Env) + return system.Execv(l.args[0], l.args[0:], l.env) } diff --git a/linux_standard_init.go b/linux_standard_init.go index 6c0a4661f..b497ec80d 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -16,6 +16,7 @@ import ( type linuxStandardInit struct { config *initConfig + env []string } func (l *linuxStandardInit) Init() error { @@ -86,5 +87,5 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() == 1 { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Config.Env) + return system.Execv(l.config.Args[0], l.config.Args[0:], l.env) } diff --git a/linux_userns_init.go b/linux_userns_init.go index cee2ebb5f..b44c191e2 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -15,6 +15,7 @@ import ( type linuxUsernsInit struct { args []string + env []string config *configs.Config } @@ -76,5 +77,5 @@ func (l *linuxUsernsInit) Init() error { if syscall.Getppid() == 1 { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - return system.Execv(l.args[0], l.args[0:], l.config.Env) + return system.Execv(l.args[0], l.args[0:], l.env) } diff --git a/process.go b/process.go index c26b8bf59..e80dda0c8 100644 --- a/process.go +++ b/process.go @@ -1,6 +1,9 @@ package libcontainer -import "io" +import ( + "io" + "os/exec" +) // Process specifies the configuration and IO for a process inside // a container. @@ -8,6 +11,9 @@ type Process struct { // The command to be run followed by any arguments. Args []string + // Env specifies the environment variables for the process. + Env []string + // Stdin is a pointer to a reader which provides the standard input stream. Stdin io.Reader @@ -16,4 +22,6 @@ type Process struct { // Stderr is a pointer to a writer which receives the standard error stream. Stderr io.Writer + + cmd *exec.Cmd } diff --git a/utils/utils.go b/utils/utils.go index 76184ce00..094bce530 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -10,6 +10,10 @@ import ( "syscall" ) +const ( + exitSignalOffset = 128 +) + // GenerateRandomName returns a new name joined with a prefix. This size // specified is used to truncate the randomly generated value func GenerateRandomName(prefix string, size int) (string, error) { @@ -53,3 +57,12 @@ func CloseExecFrom(minFd int) error { } return nil } + +// ExitStatus returns the correct exit status for a process based on if it +// was signaled or existed cleanly. +func ExitStatus(status syscall.WaitStatus) int { + if status.Signaled() { + return exitSignalOffset + int(status.Signal()) + } + return status.ExitStatus() +} From 0c1919c4277d4c178af74f00645f508c03d32486 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 6 Feb 2015 21:12:27 -0800 Subject: [PATCH 073/101] Refactor parent processes into types Signed-off-by: Michael Crosby --- configs/config_test.go | 5 - configs/network.go | 9 +- configs/state.go | 39 +--- integration/exec_test.go | 6 +- integration/utils_test.go | 34 +-- linux_container.go | 425 +++++++++-------------------------- linux_container_test.go | 1 - linux_factory.go | 39 +++- linux_factory_test.go | 20 +- linux_init.go | 24 +- linux_process.go | 262 +++++++++++++++++++++ linux_standard_init.go | 2 +- linux_userns_sidecar_init.go | 5 +- network/loopback.go | 4 +- network/stats.go | 12 +- network/strategy.go | 4 +- network/types.go | 1 - network/veth.go | 54 +---- network/veth_test.go | 53 ----- notify_linux.go | 5 +- notify_linux_test.go | 10 +- process.go | 7 +- 22 files changed, 462 insertions(+), 559 deletions(-) create mode 100644 linux_process.go delete mode 100644 network/types.go delete mode 100644 network/veth_test.go diff --git a/configs/config_test.go b/configs/config_test.go index 2128d020b..826aa6c3b 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -94,11 +94,6 @@ func TestConfigJsonFormat(t *testing.T) { t.Fail() } - if n.VethPrefix != "veth" { - t.Logf("veth prefix should be veth but received %q", n.VethPrefix) - t.Fail() - } - if n.Gateway != "172.17.42.1" { t.Logf("veth gateway should be 172.17.42.1 but received %q", n.Gateway) t.Fail() diff --git a/configs/network.go b/configs/network.go index edcc4dd4d..fdccce50c 100644 --- a/configs/network.go +++ b/configs/network.go @@ -11,9 +11,6 @@ type Network struct { // The bridge to use. Bridge string `json:"bridge,omitempty"` - // Prefix for the veth interfaces. - VethPrefix string `json:"veth_prefix,omitempty"` - // MacAddress contains the MAC address to set on the network interface MacAddress string `json:"mac_address,omitempty"` @@ -38,6 +35,12 @@ type Network struct { // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. TxQueueLen int `json:"txqueuelen,omitempty"` + + // The name of the veth interface on the Host. + VethHost string `json:"veth_host,omitempty"` + + // The name of the veth interface created inside the container for the child. + VethChild string `json:"veth_child,omitempty"` } // Routes can be specified to create entries in the route table as the container is started diff --git a/configs/state.go b/configs/state.go index 27122c441..c1e8adcf7 100644 --- a/configs/state.go +++ b/configs/state.go @@ -1,32 +1,5 @@ package configs -// State represents a running container's state -type State struct { - // InitPid is the init process id in the parent namespace - InitPid int `json:"init_pid,omitempty"` - - // InitStartTime is the init process start time - InitStartTime string `json:"init_start_time,omitempty"` - - // Network runtime state. - NetworkState NetworkState `json:"network_state,omitempty"` - - // Path to all the cgroups setup for a container. Key is cgroup subsystem name. - CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` - - Status Status `json:"status,omitempty"` -} - -// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers -// Do not depend on it outside of libcontainer. -// TODO: move veth names to config time -type NetworkState struct { - // The name of the veth interface on the Host. - VethHost string `json:"veth_host,omitempty"` - // The name of the veth interface created inside the container for the child. - VethChild string `json:"veth_child,omitempty"` -} - // The status of a container. type Status int @@ -43,3 +16,15 @@ const ( // The container does not exist. Destroyed ) + +// State represents a running container's state +type State struct { + // InitPid is the init process id in the parent namespace + InitPid int `json:"init_pid,omitempty"` + + // InitStartTime is the init process start time + InitStartTime string `json:"init_start_time,omitempty"` + + // Path to all the cgroups setup for a container. Key is cgroup subsystem name. + CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` +} diff --git a/integration/exec_test.go b/integration/exec_test.go index 6d68b4bab..5e9fa0e4e 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -26,13 +26,11 @@ func testExecPS(t *testing.T, userns bool) { if testing.Short() { return } - rootfs, err := newRootfs() if err != nil { t.Fatal(err) } defer remove(rootfs) - config := newTemplateConfig(rootfs) if userns { config.UidMappings = []configs.IDMap{{0, 0, 1000}} @@ -42,13 +40,11 @@ func testExecPS(t *testing.T, userns bool) { buffers, exitCode, err := runContainer(config, "", "ps") if err != nil { - t.Fatal(err) + t.Fatalf("%s: %s", buffers, err) } - if exitCode != 0 { t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) } - lines := strings.Split(buffers.Stdout.String(), "\n") if len(lines) < 2 { t.Fatalf("more than one process running for output %q", buffers.Stdout.String()) diff --git a/integration/utils_test.go b/integration/utils_test.go index e3fdb779a..141035e7d 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -2,12 +2,11 @@ package integration import ( "bytes" - "encoding/json" "fmt" "io/ioutil" "os" "os/exec" - "path/filepath" + "strings" "syscall" "github.com/docker/libcontainer" @@ -28,27 +27,15 @@ type stdBuffers struct { Stderr *bytes.Buffer } -func writeConfig(config *configs.Config) error { - f, err := os.OpenFile(filepath.Join(config.Rootfs, "container.json"), os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0700) - if err != nil { - return err - } - defer f.Close() - return json.NewEncoder(f).Encode(config) -} - -func loadConfig() (*configs.Config, error) { - f, err := os.Open(filepath.Join(os.Getenv("data_path"), "container.json")) - if err != nil { - return nil, err +func (b *stdBuffers) String() string { + s := []string{} + if b.Stderr != nil { + s = append(s, b.Stderr.String()) } - defer f.Close() - - var container *configs.Config - if err := json.NewDecoder(f).Decode(&container); err != nil { - return nil, err + if b.Stdout != nil { + s = append(s, b.Stdout.String()) } - return container, nil + return strings.Join(s, "|") } // newRootfs creates a new tmp directory and copies the busybox root filesystem @@ -85,12 +72,7 @@ func copyBusybox(dest string) error { // buffers are returned containing the STDOUT and STDERR output for the run // along with the exit code and any go error func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { - if err := writeConfig(config); err != nil { - return nil, -1, err - } - buffers = newStdBuffers() - process := &libcontainer.Process{ Args: args, Env: standardEnvironment, diff --git a/linux_container.go b/linux_container.go index 7e73b0573..89143260f 100644 --- a/linux_container.go +++ b/linux_container.go @@ -3,18 +3,14 @@ package libcontainer import ( - "encoding/json" "fmt" - "io" "os" "os/exec" - "path/filepath" "syscall" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/system" "github.com/golang/glog" ) @@ -26,9 +22,9 @@ type linuxContainer struct { id string root string config *configs.Config - state *configs.State cgroupManager cgroups.Manager initArgs []string + initProcess parentProcess } // ID returns the container's unique ID @@ -42,11 +38,11 @@ func (c *linuxContainer) Config() configs.Config { } func (c *linuxContainer) Status() (configs.Status, error) { - if c.state.InitPid <= 0 { + if c.initProcess == nil { return configs.Destroyed, nil } // return Running if the init process is alive - if err := syscall.Kill(c.state.InitPid, 0); err != nil { + if err := syscall.Kill(c.initProcess.pid(), 0); err != nil { if err == syscall.ESRCH { return configs.Destroyed, nil } @@ -77,8 +73,14 @@ func (c *linuxContainer) Stats() (*Stats, error) { if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } - if stats.NetworkStats, err = network.GetStats(&c.state.NetworkState); err != nil { - return stats, newGenericError(err, SystemError) + // TODO: handle stats for multiple veth interfaces + for _, iface := range c.config.Networks { + if iface.Type == "veth" { + if stats.NetworkStats, err = network.GetStats(iface.VethHost); err != nil { + return stats, newGenericError(err, SystemError) + } + break + } } return stats, nil } @@ -88,186 +90,122 @@ func (c *linuxContainer) Start(process *Process) (int, error) { if err != nil { return -1, err } - cmd := c.commandTemplate(process) - if status != configs.Destroyed { - // TODO: (crosbymichael) check out console use for execin - return c.startNewProcess(cmd, process) + doInit := status == configs.Destroyed + parent, err := c.newParentProcess(process, doInit) + if err != nil { + return -1, err } - if err := c.startInitialProcess(cmd, process); err != nil { + if err := parent.start(); err != nil { + // terminate the process to ensure that it properly is reaped. + if err := parent.terminate(); err != nil { + glog.Warning(err) + } return -1, err } - return c.state.InitPid, nil + if doInit { + c.initProcess = parent + } + return parent.pid(), nil } -// commandTemplate creates a template *exec.Cmd. It uses the init arguments provided -// to the factory and attaches IO to the process. -func (c *linuxContainer) commandTemplate(process *Process) *exec.Cmd { +func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { + parentPipe, childPipe, err := newPipe() + if err != nil { + return nil, err + } + cmd, err := c.commandTemplate(p, childPipe) + if err != nil { + return nil, err + } + if !doInit { + return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil + } + return c.newInitProcess(p, cmd, parentPipe, childPipe), nil +} + +func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) - cmd.Stdin = process.Stdin - cmd.Stdout = process.Stdout - cmd.Stderr = process.Stderr - cmd.Env = process.Env + cmd.Stdin = p.Stdin + cmd.Stdout = p.Stdout + cmd.Stderr = p.Stderr cmd.Dir = c.config.Rootfs if cmd.SysProcAttr == nil { cmd.SysProcAttr = &syscall.SysProcAttr{} } + cmd.ExtraFiles = []*os.File{childPipe} cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) - return cmd + return cmd, nil } -// startNewProcess adds another process to an already running container -func (c *linuxContainer) startNewProcess(cmd *exec.Cmd, process *Process) (int, error) { - glog.Info("start new container process") - parent, child, err := c.newInitPipe() - if err != nil { - return -1, err - } - defer parent.Close() - cmd.ExtraFiles = []*os.File{child} - cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.state.InitPid), "_LIBCONTAINER_INITTYPE=setns") - - // start the command - err = cmd.Start() - child.Close() - if err != nil { - return -1, err - } - s, err := cmd.Process.Wait() - if err != nil { - return -1, err - } - if !s.Success() { - return -1, &exec.ExitError{s} - } - decoder := json.NewDecoder(parent) - var pid *pid - if err := decoder.Decode(&pid); err != nil { - return -1, err - } - p, err := os.FindProcess(pid.Pid) - if err != nil { - return -1, err - } - terminate := func(terr error) (int, error) { - // TODO: log the errors for kill and wait - if err := p.Kill(); err != nil { - glog.Warning(err) - } - if _, err := p.Wait(); err != nil { - glog.Warning(err) +func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess { + cloneFlags := c.config.Namespaces.CloneFlags() + if cloneFlags&syscall.CLONE_NEWUSER != 0 { + c.addUidGidMappings(cmd.SysProcAttr) + // Default to root user when user namespaces are enabled. + if cmd.SysProcAttr.Credential == nil { + cmd.SysProcAttr.Credential = &syscall.Credential{} } - return -1, terr } - if err := c.enterCgroups(pid.Pid); err != nil { - return terminate(err) + cmd.SysProcAttr.Cloneflags = cloneFlags + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard") + return &initProcess{ + cmd: cmd, + childPipe: childPipe, + parentPipe: parentPipe, + manager: c.cgroupManager, + config: c.newInitConfig(p), + } +} + +func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess { + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()), + "_LIBCONTAINER_INITTYPE=setns", + ) + // TODO: set on container for process management + return &setnsProcess{ + cmd: cmd, + cgroupPaths: c.cgroupManager.GetPaths(), + childPipe: childPipe, + parentPipe: parentPipe, + config: c.newInitConfig(p), } - if err := json.NewEncoder(parent).Encode(&initConfig{ +} + +func (c *linuxContainer) newInitConfig(process *Process) *initConfig { + return &initConfig{ Config: c.config, Args: process.Args, Env: process.Env, - }); err != nil { - return terminate(err) } - return pid.Pid, nil } -// startInitialProcess starts PID 1 for the container. -func (c *linuxContainer) startInitialProcess(cmd *exec.Cmd, process *Process) error { - glog.Info("starting container initial process") - // create a pipe so that we can syncronize with the namespaced process and - // pass the state and configuration to the child process - parent, child, err := c.newInitPipe() - if err != nil { - return err - } - defer parent.Close() - cmd.ExtraFiles = []*os.File{child} - cmd.SysProcAttr.Cloneflags = c.config.Namespaces.CloneFlags() - cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard") - // if the container is configured to use user namespaces we have to setup the - // uid:gid mapping on the command. - if c.config.Namespaces.Contains(configs.NEWUSER) { - addUidGidMappings(cmd.SysProcAttr, c.config) - // Default to root user when user namespaces are enabled. - if cmd.SysProcAttr.Credential == nil { - cmd.SysProcAttr.Credential = &syscall.Credential{} +// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. +func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) { + if c.config.UidMappings != nil { + sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings)) + for i, um := range c.config.UidMappings { + sys.UidMappings[i].ContainerID = um.ContainerID + sys.UidMappings[i].HostID = um.HostID + sys.UidMappings[i].Size = um.Size } } - err = cmd.Start() - child.Close() - if err != nil { - return newGenericError(err, SystemError) - } - wait := func() (*os.ProcessState, error) { - ps, err := cmd.Process.Wait() - if err != nil { - return nil, newGenericError(err, SystemError) - } - // we should kill all processes in cgroup when init is died if we use - // host PID namespace - if !c.config.Namespaces.Contains(configs.NEWPID) { - c.killAllPids() + if c.config.GidMappings != nil { + sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings)) + for i, gm := range c.config.GidMappings { + sys.GidMappings[i].ContainerID = gm.ContainerID + sys.GidMappings[i].HostID = gm.HostID + sys.GidMappings[i].Size = gm.Size } - return ps, nil } - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - cmd.Process.Kill() - wait() - return terr - } - started, err := system.GetProcessStartTime(cmd.Process.Pid) +} + +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { - return terminate(err) - } - // Do this before syncing with child so that no children - // can escape the cgroup - if err := c.cgroupManager.Apply(cmd.Process.Pid); err != nil { - return terminate(err) - } - defer func() { - if err != nil { - c.cgroupManager.Destroy() - } - }() - var networkState configs.NetworkState - if err := c.initializeNetworking(cmd.Process.Pid, &networkState); err != nil { - return terminate(err) - } - iconfig := &initConfig{ - Args: process.Args, - Config: c.config, - Env: process.Env, - NetworkState: &networkState, - } - // Start the setup process to setup the init process - if c.config.Namespaces.Contains(configs.NEWUSER) { - if err = c.executeSetupCmd(cmd.Args, cmd.Process.Pid, c.config, iconfig, &networkState); err != nil { - return terminate(err) - } - } - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(iconfig); err != nil { - return terminate(err) - } - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return terminate(ierr) + return nil, nil, err } - c.state.InitPid = cmd.Process.Pid - c.state.InitStartTime = started - c.state.NetworkState = networkState - c.state.CgroupPaths = c.cgroupManager.GetPaths() - return nil + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } func (c *linuxContainer) Destroy() error { @@ -291,172 +229,11 @@ func (c *linuxContainer) Resume() error { } func (c *linuxContainer) Signal(signal os.Signal) error { - glog.Infof("sending signal %d to pid %d", signal, c.state.InitPid) + glog.Infof("sending signal %d to pid %d", signal, c.initProcess.pid()) panic("not implemented") } // TODO: rename to be more descriptive func (c *linuxContainer) OOM() (<-chan struct{}, error) { - return NotifyOnOOM(c.state) -} - -func (c *linuxContainer) updateStateFile() error { - fnew := filepath.Join(c.root, fmt.Sprintf("%s.new", stateFilename)) - f, err := os.Create(fnew) - if err != nil { - return newGenericError(err, SystemError) - } - defer f.Close() - - if err := json.NewEncoder(f).Encode(c.state); err != nil { - f.Close() - os.Remove(fnew) - return newGenericError(err, SystemError) - } - fname := filepath.Join(c.root, stateFilename) - if err := os.Rename(fnew, fname); err != nil { - return newGenericError(err, SystemError) - } - return nil -} - -// New returns a newly initialized Pipe for communication between processes -func (c *linuxContainer) newInitPipe() (parent *os.File, child *os.File, err error) { - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil -} - -// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. -func addUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { - if container.UidMappings != nil { - sys.UidMappings = make([]syscall.SysProcIDMap, len(container.UidMappings)) - for i, um := range container.UidMappings { - sys.UidMappings[i].ContainerID = um.ContainerID - sys.UidMappings[i].HostID = um.HostID - sys.UidMappings[i].Size = um.Size - } - } - - if container.GidMappings != nil { - sys.GidMappings = make([]syscall.SysProcIDMap, len(container.GidMappings)) - for i, gm := range container.GidMappings { - sys.GidMappings[i].ContainerID = gm.ContainerID - sys.GidMappings[i].HostID = gm.HostID - sys.GidMappings[i].Size = gm.Size - } - } -} - -// killAllPids iterates over all of the container's processes -// sending a SIGKILL to each process. -func (c *linuxContainer) killAllPids() error { - glog.Info("killing all processes in container") - var procs []*os.Process - c.cgroupManager.Freeze(configs.Frozen) - pids, err := c.cgroupManager.GetPids() - if err != nil { - return err - } - for _, pid := range pids { - // TODO: log err without aborting if we are unable to find - // a single PID - if p, err := os.FindProcess(pid); err == nil { - procs = append(procs, p) - p.Kill() - } - } - c.cgroupManager.Freeze(configs.Thawed) - for _, p := range procs { - p.Wait() - } - return err -} - -// initializeNetworking creates the container's network stack outside of the namespace and moves -// interfaces into the container's net namespaces if necessary -func (c *linuxContainer) initializeNetworking(nspid int, networkState *configs.NetworkState) error { - glog.Info("initailzing container's network stack") - for _, config := range c.config.Networks { - strategy, err := network.GetStrategy(config.Type) - if err != nil { - return err - } - if err := strategy.Create(config, nspid, networkState); err != nil { - return err - } - } - return nil -} - -func (c *linuxContainer) executeSetupCmd(args []string, ppid int, container *configs.Config, process *initConfig, networkState *configs.NetworkState) error { - command := exec.Command(args[0], args[1:]...) - parent, child, err := c.newInitPipe() - if err != nil { - return err - } - defer parent.Close() - command.ExtraFiles = []*os.File{child} - command.Dir = container.Rootfs - command.Env = append(command.Env, - fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), - fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_sidecar")) - err = command.Start() - child.Close() - if err != nil { - return err - } - s, err := command.Process.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - decoder := json.NewDecoder(parent) - var pid *pid - if err := decoder.Decode(&pid); err != nil { - return err - } - p, err := os.FindProcess(pid.Pid) - if err != nil { - return err - } - terminate := func(terr error) error { - // TODO: log the errors for kill and wait - p.Kill() - p.Wait() - return terr - } - // send the state to the container's init process then shutdown writes for the parent - if err := json.NewEncoder(parent).Encode(process); err != nil { - return terminate(err) - } - // shutdown writes for the parent side of the pipe - if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { - return terminate(err) - } - // wait for the child process to fully complete and receive an error message - // if one was encoutered - var ierr *initError - if err := decoder.Decode(&ierr); err != nil && err != io.EOF { - return terminate(err) - } - if ierr != nil { - return ierr - } - s, err = p.Wait() - if err != nil { - return err - } - if !s.Success() { - return &exec.ExitError{s} - } - return nil -} - -func (c *linuxContainer) enterCgroups(pid int) error { - return cgroups.EnterPid(c.state.CgroupPaths, pid) + return NotifyOnOOM(c.cgroupManager.GetPaths()) } diff --git a/linux_container_test.go b/linux_container_test.go index 7d4eae92b..11ad253e2 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -69,7 +69,6 @@ func TestGetContainerStats(t *testing.T) { }, }, }, - state: &configs.State{}, } stats, err := container.Stats() diff --git a/linux_factory.go b/linux_factory.go index 3bb1659a9..f2af2897d 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -87,7 +87,6 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err root: containerRoot, config: config, initArgs: l.initArgs, - state: &configs.State{}, cgroupManager: cgroups.NewCgroupManager(config.Cgroups), }, nil } @@ -107,15 +106,19 @@ func (l *linuxFactory) Load(id string) (Container, error) { if err != nil { return nil, err } + r := &restoredProcess{ + processPid: state.InitPid, + processStartTime: state.InitStartTime, + } cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ + initProcess: r, id: id, - root: containerRoot, config: config, - state: state, - cgroupManager: cgroupManager, initArgs: l.initArgs, + cgroupManager: cgroupManager, + root: containerRoot, }, nil } @@ -193,3 +196,31 @@ func (l *linuxFactory) validateID(id string) error { } return nil } + +// restoredProcess represents a process where the calling process may or may not be +// the parent process. This process is created when a factory loads a container from +// a persisted state. +type restoredProcess struct { + processPid int + processStartTime string +} + +func (p *restoredProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *restoredProcess) pid() int { + return p.processPid +} + +func (p *restoredProcess) terminate() error { + return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) +} + +func (p *restoredProcess) wait() (*os.ProcessState, error) { + return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) +} + +func (p *restoredProcess) startTime() (string, error) { + return p.processStartTime, nil +} diff --git a/linux_factory_test.go b/linux_factory_test.go index a5f0b7e1b..028c73e7a 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -29,21 +29,17 @@ func TestFactoryNew(t *testing.T) { t.Fatal(rerr) } defer os.RemoveAll(root) - factory, err := New(root, nil) if err != nil { t.Fatal(err) } - if factory == nil { t.Fatal("factory should not be nil") } - lfactory, ok := factory.(*linuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } - if lfactory.root != root { t.Fatalf("expected factory root to be %q but received %q", root, lfactory.root) } @@ -55,17 +51,14 @@ func TestFactoryLoadNotExists(t *testing.T) { t.Fatal(rerr) } defer os.RemoveAll(root) - factory, err := New(root, nil) if err != nil { t.Fatal(err) } - _, err = factory.Load("nocontainer") if err == nil { t.Fatal("expected nil error loading non-existing container") } - lerr, ok := err.(Error) if !ok { t.Fatal("expected libcontainer error type") @@ -81,7 +74,6 @@ func TestFactoryLoadContainer(t *testing.T) { t.Fatal(err) } defer os.RemoveAll(root) - // setup default container config and state for mocking var ( id = "1" @@ -92,7 +84,6 @@ func TestFactoryLoadContainer(t *testing.T) { InitPid: 1024, } ) - if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { t.Fatal(err) } @@ -102,34 +93,27 @@ func TestFactoryLoadContainer(t *testing.T) { if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } - factory, err := New(root, nil) if err != nil { t.Fatal(err) } - container, err := factory.Load(id) if err != nil { t.Fatal(err) } - if container.ID() != id { t.Fatalf("expected container id %q but received %q", id, container.ID()) } - config := container.Config() - if config.Rootfs != expectedConfig.Rootfs { t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs) } - lcontainer, ok := container.(*linuxContainer) if !ok { t.Fatal("expected linux container on linux based systems") } - - if lcontainer.state.InitPid != expectedState.InitPid { - t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.state.InitPid) + if lcontainer.initProcess.pid() != expectedState.InitPid { + t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.initProcess.pid()) } } diff --git a/linux_init.go b/linux_init.go index daed34e92..ecb770b3d 100644 --- a/linux_init.go +++ b/linux_init.go @@ -21,18 +21,17 @@ import ( type initType string const ( - initSetns initType = "setns" - initStandard initType = "standard" - initUserns initType = "userns" - initUsernsSideCar initType = "userns_sidecar" + initSetns initType = "setns" + initStandard initType = "standard" + initUserns initType = "userns" + initUsernsSetup initType = "userns_setup" ) // Process is used for transferring parameters from Exec() to Init() type initConfig struct { - Args []string `json:"args,omitempty"` - Env []string `json:"env,omitempty"` - Config *configs.Config `json:"config,omitempty"` - NetworkState *configs.NetworkState `json:"network_state,omitempty"` + Args []string `json:"args,omitempty"` + Env []string `json:"env,omitempty"` + Config *configs.Config `json:"config,omitempty"` } type initer interface { @@ -60,10 +59,9 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) { env: config.Env, config: config.Config, }, nil - case initUsernsSideCar: + case initUsernsSetup: return &linuxUsernsSideCar{ - config: config.Config, - network: config.NetworkState, + config: config.Config, }, nil case initStandard: return &linuxStandardInit{ @@ -186,13 +184,13 @@ func setupUser(config *configs.Config) error { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupNetwork(config *configs.Config, networkState *configs.NetworkState) error { +func setupNetwork(config *configs.Config) error { for _, config := range config.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - err1 := strategy.Initialize(config, networkState) + err1 := strategy.Initialize(config) if err1 != nil { return err1 } diff --git a/linux_process.go b/linux_process.go new file mode 100644 index 000000000..f6c8a14c4 --- /dev/null +++ b/linux_process.go @@ -0,0 +1,262 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "syscall" + + "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/network" + "github.com/docker/libcontainer/system" + "github.com/golang/glog" +) + +type parentProcess interface { + // pid returns the pid for the running process. + pid() int + + // start starts the process execution. + start() error + + // send a SIGKILL to the process and wait for the exit. + terminate() error + + // wait waits on the process returning the process state. + wait() (*os.ProcessState, error) + + // startTime return's the process start time. + startTime() (string, error) +} + +type setnsProcess struct { + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + forkedProcess *os.Process + cgroupPaths map[string]string + config *initConfig +} + +func (p *setnsProcess) startTime() (string, error) { + return system.GetProcessStartTime(p.pid()) +} + +func (p *setnsProcess) start() (err error) { + defer p.parentPipe.Close() + if p.forkedProcess, err = p.execSetns(); err != nil { + return err + } + if len(p.cgroupPaths) > 0 { + if err := cgroups.EnterPid(p.cgroupPaths, p.forkedProcess.Pid); err != nil { + return err + } + } + if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { + return err + } + return nil +} + +// execSetns runs the process that executes C code to perform the setns calls +// because setns support requires the C process to fork off a child and perform the setns +// before the go runtime boots, we wait on the process to die and receive the child's pid +// over the provided pipe. +func (p *setnsProcess) execSetns() (*os.Process, error) { + err := p.cmd.Start() + p.childPipe.Close() + if err != nil { + return nil, err + } + status, err := p.cmd.Process.Wait() + if err != nil { + return nil, err + } + if !status.Success() { + return nil, &exec.ExitError{status} + } + var pid *pid + if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { + return nil, err + } + return os.FindProcess(pid.Pid) +} + +// terminate sends a SIGKILL to the forked process for the setns routine then waits to +// avoid the process becomming a zombie. +func (p *setnsProcess) terminate() error { + if p.forkedProcess == nil { + return nil + } + err := p.forkedProcess.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *setnsProcess) wait() (*os.ProcessState, error) { + return p.forkedProcess.Wait() +} + +func (p *setnsProcess) pid() int { + return p.forkedProcess.Pid +} + +type initProcess struct { + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + config *initConfig + manager cgroups.Manager +} + +func (p *initProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *initProcess) start() error { + defer p.parentPipe.Close() + err := p.cmd.Start() + p.childPipe.Close() + if err != nil { + return err + } + // Do this before syncing with child so that no children + // can escape the cgroup + if err := p.manager.Apply(p.pid()); err != nil { + return err + } + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + } + }() + if err := p.createNetworkInterfaces(); err != nil { + return err + } + // Start the setup process to setup the init process + if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 { + parent, err := p.newUsernsSetupProcess() + if err != nil { + return err + } + if err := parent.start(); err != nil { + if err := parent.terminate(); err != nil { + glog.Warning(err) + } + return err + } + if _, err := parent.wait(); err != nil { + return err + } + } + if err := p.sendConfig(); err != nil { + return err + } + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { + return err + } + if ierr != nil { + return ierr + } + return nil +} + +func (p *initProcess) wait() (*os.ProcessState, error) { + state, err := p.cmd.Process.Wait() + if err != nil { + return nil, err + } + // we should kill all processes in cgroup when init is died if we use host PID namespace + if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 { + // TODO: this will not work for the success path because libcontainer + // does not wait on the process. This needs to be moved to destroy or add a Wait() + // method back onto the container. + var procs []*os.Process + p.manager.Freeze(configs.Frozen) + pids, err := p.manager.GetPids() + if err != nil { + return nil, err + } + for _, pid := range pids { + // TODO: log err without aborting if we are unable to find + // a single PID + if p, err := os.FindProcess(pid); err == nil { + procs = append(procs, p) + p.Kill() + } + } + p.manager.Freeze(configs.Thawed) + for _, p := range procs { + p.Wait() + } + } + return state, nil +} + +func (p *initProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *initProcess) startTime() (string, error) { + return system.GetProcessStartTime(p.pid()) +} + +func (p *initProcess) sendConfig() error { + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { + return err + } + // shutdown writes for the parent side of the pipe + return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR) +} + +func (p *initProcess) createNetworkInterfaces() error { + for _, config := range p.config.Config.Networks { + strategy, err := network.GetStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.Create(config, p.pid()); err != nil { + return err + } + } + return nil +} + +func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) { + parentPipe, childPipe, err := newPipe() + if err != nil { + return nil, err + } + cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...) + cmd.ExtraFiles = []*os.File{childPipe} + cmd.Dir = p.cmd.Dir + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()), + fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"), + ) + return &setnsProcess{ + cmd: cmd, + childPipe: childPipe, + parentPipe: parentPipe, + config: p.config, + }, nil +} diff --git a/linux_standard_init.go b/linux_standard_init.go index b497ec80d..f69a4bdb6 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -38,7 +38,7 @@ func (l *linuxStandardInit) Init() error { return err } } - if err := setupNetwork(l.config.Config, l.config.NetworkState); err != nil { + if err := setupNetwork(l.config.Config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go index 047a5b7f9..7ac7dc43a 100644 --- a/linux_userns_sidecar_init.go +++ b/linux_userns_sidecar_init.go @@ -15,12 +15,11 @@ import ( // except the user namespace, so it run as root in the root user namespace // to perform these operations. type linuxUsernsSideCar struct { - config *configs.Config - network *configs.NetworkState + config *configs.Config } func (l *linuxUsernsSideCar) Init() error { - if err := setupNetwork(l.config, l.network); err != nil { + if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config); err != nil { diff --git a/network/loopback.go b/network/loopback.go index b208dfc69..11db88851 100644 --- a/network/loopback.go +++ b/network/loopback.go @@ -12,11 +12,11 @@ import ( type Loopback struct { } -func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { +func (l *Loopback) Create(n *configs.Network, nspid int) error { return nil } -func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error { +func (l *Loopback) Initialize(config *configs.Network) error { // Do not set the MTU on the loopback interface - use the default. if err := InterfaceUp("lo"); err != nil { return fmt.Errorf("lo up %s", err) diff --git a/network/stats.go b/network/stats.go index 329a16bf3..1d7cfe77c 100644 --- a/network/stats.go +++ b/network/stats.go @@ -5,8 +5,6 @@ import ( "path/filepath" "strconv" "strings" - - "github.com/docker/libcontainer/configs" ) type NetworkStats struct { @@ -21,14 +19,12 @@ type NetworkStats struct { } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { +func GetStats(vethHostInterface string) (*NetworkStats, error) { // This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer. - if networkState.VethHost == "" { + if vethHostInterface == "" { return &NetworkStats{}, nil } - out := &NetworkStats{} - type netStatsPair struct { // Where to write the output. Out *uint64 @@ -36,7 +32,6 @@ func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { // The network stats file to read. File string } - // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. netStats := []netStatsPair{ {Out: &out.RxBytes, File: "tx_bytes"}, @@ -50,13 +45,12 @@ func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { {Out: &out.TxDropped, File: "rx_dropped"}, } for _, netStat := range netStats { - data, err := readSysfsNetworkStats(networkState.VethHost, netStat.File) + data, err := readSysfsNetworkStats(vethHostInterface, netStat.File) if err != nil { return nil, err } *(netStat.Out) = data } - return out, nil } diff --git a/network/strategy.go b/network/strategy.go index bc4a023b0..8631c11d8 100644 --- a/network/strategy.go +++ b/network/strategy.go @@ -20,8 +20,8 @@ var strategies = map[string]NetworkStrategy{ // NetworkStrategy represents a specific network configuration for // a container's networking stack type NetworkStrategy interface { - Create(*configs.Network, int, *configs.NetworkState) error - Initialize(*configs.Network, *configs.NetworkState) error + Create(*configs.Network, int) error + Initialize(*configs.Network) error } // GetStrategy returns the specific network strategy for the diff --git a/network/types.go b/network/types.go deleted file mode 100644 index 1ae2e9d50..000000000 --- a/network/types.go +++ /dev/null @@ -1 +0,0 @@ -package network diff --git a/network/veth.go b/network/veth.go index 7bcc3910c..5d554e8b7 100644 --- a/network/veth.go +++ b/network/veth.go @@ -6,8 +6,6 @@ import ( "fmt" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/utils" ) // Veth is a network strategy that uses a bridge and creates @@ -18,42 +16,32 @@ type Veth struct { const defaultDevice = "eth0" -func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { +func (v *Veth) Create(n *configs.Network, nspid int) error { var ( bridge = n.Bridge - prefix = n.VethPrefix txQueueLen = n.TxQueueLen ) if bridge == "" { return fmt.Errorf("bridge is not specified") } - if prefix == "" { - return fmt.Errorf("veth prefix is not specified") - } - name1, name2, err := createVethPair(prefix, txQueueLen) - if err != nil { - return err - } - if err := SetInterfaceMaster(name1, bridge); err != nil { + if err := CreateVethPair(n.VethHost, n.VethChild, txQueueLen); err != nil { return err } - if err := SetMtu(name1, n.Mtu); err != nil { + if err := SetInterfaceMaster(n.VethHost, bridge); err != nil { return err } - if err := InterfaceUp(name1); err != nil { + if err := SetMtu(n.VethHost, n.Mtu); err != nil { return err } - if err := SetInterfaceInNamespacePid(name2, nspid); err != nil { + if err := InterfaceUp(n.VethHost); err != nil { return err } - networkState.VethHost = name1 - networkState.VethChild = name2 - + return SetInterfaceInNamespacePid(n.VethChild, nspid) return nil } -func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error { - var vethChild = networkState.VethChild +func (v *Veth) Initialize(config *configs.Network) error { + vethChild := config.VethChild if vethChild == "" { return fmt.Errorf("vethChild is not specified") } @@ -95,29 +83,3 @@ func (v *Veth) Initialize(config *configs.Network, networkState *configs.Network } return nil } - -// createVethPair will automatically generage two random names for -// the veth pair and ensure that they have been created -func createVethPair(prefix string, txQueueLen int) (name1 string, name2 string, err error) { - for i := 0; i < 10; i++ { - if name1, err = utils.GenerateRandomName(prefix, 7); err != nil { - return - } - - if name2, err = utils.GenerateRandomName(prefix, 7); err != nil { - return - } - - if err = CreateVethPair(name1, name2, txQueueLen); err != nil { - if err == netlink.ErrInterfaceExists { - continue - } - - return - } - - break - } - - return -} diff --git a/network/veth_test.go b/network/veth_test.go deleted file mode 100644 index b92b284eb..000000000 --- a/network/veth_test.go +++ /dev/null @@ -1,53 +0,0 @@ -// +build linux - -package network - -import ( - "testing" - - "github.com/docker/libcontainer/netlink" -) - -func TestGenerateVethNames(t *testing.T) { - if testing.Short() { - return - } - - prefix := "veth" - - name1, name2, err := createVethPair(prefix, 0) - if err != nil { - t.Fatal(err) - } - - if name1 == "" { - t.Fatal("name1 should not be empty") - } - - if name2 == "" { - t.Fatal("name2 should not be empty") - } -} - -func TestCreateDuplicateVethPair(t *testing.T) { - if testing.Short() { - return - } - - prefix := "veth" - - name1, name2, err := createVethPair(prefix, 0) - if err != nil { - t.Fatal(err) - } - - // retry to create the name interfaces and make sure that we get the correct error - err = CreateVethPair(name1, name2, 0) - if err == nil { - t.Fatal("expected error to not be nil with duplicate interface") - } - - if err != netlink.ErrInterfaceExists { - t.Fatalf("expected error to be ErrInterfaceExists but received %q", err) - } -} diff --git a/notify_linux.go b/notify_linux.go index 059ce5131..062fa11a0 100644 --- a/notify_linux.go +++ b/notify_linux.go @@ -4,7 +4,6 @@ package libcontainer import ( "fmt" - "github.com/docker/libcontainer/configs" "io/ioutil" "os" "path/filepath" @@ -16,8 +15,8 @@ const oomCgroupName = "memory" // NotifyOnOOM returns channel on which you can expect event about OOM, // if process died without OOM this channel will be closed. // s is current *libcontainer.State for container. -func NotifyOnOOM(s *configs.State) (<-chan struct{}, error) { - dir := s.CgroupPaths[oomCgroupName] +func NotifyOnOOM(paths map[string]string) (<-chan struct{}, error) { + dir := paths[oomCgroupName] if dir == "" { return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName) } diff --git a/notify_linux_test.go b/notify_linux_test.go index 8a3026eda..65189d362 100644 --- a/notify_linux_test.go +++ b/notify_linux_test.go @@ -11,8 +11,6 @@ import ( "syscall" "testing" "time" - - "github.com/docker/libcontainer/configs" ) func TestNotifyOnOOM(t *testing.T) { @@ -29,12 +27,10 @@ func TestNotifyOnOOM(t *testing.T) { t.Fatal(err) } var eventFd, oomControlFd int - st := &configs.State{ - CgroupPaths: map[string]string{ - "memory": memoryPath, - }, + paths := map[string]string{ + "memory": memoryPath, } - ooms, err := NotifyOnOOM(st) + ooms, err := NotifyOnOOM(paths) if err != nil { t.Fatal("expected no error, got:", err) } diff --git a/process.go b/process.go index e80dda0c8..a1528c558 100644 --- a/process.go +++ b/process.go @@ -1,9 +1,6 @@ package libcontainer -import ( - "io" - "os/exec" -) +import "io" // Process specifies the configuration and IO for a process inside // a container. @@ -22,6 +19,4 @@ type Process struct { // Stderr is a pointer to a writer which receives the standard error stream. Stderr io.Writer - - cmd *exec.Cmd } From 9dcbc4f3f89fa7b02a9343b23e1f868ee60f8585 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 6 Feb 2015 22:33:10 -0800 Subject: [PATCH 074/101] Implement container signaling Signed-off-by: Michael Crosby --- linux_container.go | 2 +- linux_factory.go | 4 ++++ linux_process.go | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/linux_container.go b/linux_container.go index 89143260f..8f733d475 100644 --- a/linux_container.go +++ b/linux_container.go @@ -230,7 +230,7 @@ func (c *linuxContainer) Resume() error { func (c *linuxContainer) Signal(signal os.Signal) error { glog.Infof("sending signal %d to pid %d", signal, c.initProcess.pid()) - panic("not implemented") + return c.initProcess.signal(signal) } // TODO: rename to be more descriptive diff --git a/linux_factory.go b/linux_factory.go index f2af2897d..161829277 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -224,3 +224,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) { func (p *restoredProcess) startTime() (string, error) { return p.processStartTime, nil } + +func (p *restoredProcess) signal(s os.Signal) error { + return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError) +} diff --git a/linux_process.go b/linux_process.go index f6c8a14c4..0de894abc 100644 --- a/linux_process.go +++ b/linux_process.go @@ -32,6 +32,8 @@ type parentProcess interface { // startTime return's the process start time. startTime() (string, error) + + signal(os.Signal) error } type setnsProcess struct { @@ -47,6 +49,10 @@ func (p *setnsProcess) startTime() (string, error) { return system.GetProcessStartTime(p.pid()) } +func (p *setnsProcess) signal(s os.Signal) error { + return p.forkedProcess.Signal(s) +} + func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() if p.forkedProcess, err = p.execSetns(); err != nil { @@ -260,3 +266,7 @@ func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) { config: p.config, }, nil } + +func (p *initProcess) signal(s os.Signal) error { + return p.cmd.Process.Signal(s) +} From 1edada52fd6f27dce3a37de26626f6700576a11e Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 13:11:57 -0800 Subject: [PATCH 075/101] Move Cwd and User to Process Signed-off-by: Michael Crosby --- configs/config.go | 6 ------ linux_init.go | 39 +++++++++++++++++------------------- linux_setns_init.go | 15 ++++++-------- linux_standard_init.go | 5 ++--- linux_userns_init.go | 25 ++++++++++------------- linux_userns_sidecar_init.go | 10 ++++----- process.go | 7 +++++++ 7 files changed, 49 insertions(+), 58 deletions(-) diff --git a/configs/config.go b/configs/config.go index ca433b288..d8e2c9eb3 100644 --- a/configs/config.go +++ b/configs/config.go @@ -49,12 +49,6 @@ type Config struct { // Hostname optionally sets the container's hostname if provided Hostname string `json:"hostname,omitempty"` - // User will set the uid and gid of the executing process running inside the container - User string `json:"user,omitempty"` - - // WorkingDir will change the processes current working directory inside the container's rootfs - WorkingDir string `json:"working_dir,omitempty"` - // Console is the path to the console allocated to the container. Console string `json:"console,omitempty"` diff --git a/linux_init.go b/linux_init.go index ecb770b3d..9b28af639 100644 --- a/linux_init.go +++ b/linux_init.go @@ -31,6 +31,8 @@ const ( type initConfig struct { Args []string `json:"args,omitempty"` Env []string `json:"env,omitempty"` + Cwd string `json:"cwd,omitempty"` + User string `json:"user,omitempty"` Config *configs.Config `json:"config,omitempty"` } @@ -49,24 +51,19 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) { switch t { case initSetns: return &linuxSetnsInit{ - args: config.Args, - env: config.Env, - config: config.Config, + config: config, }, nil case initUserns: return &linuxUsernsInit{ - args: config.Args, - env: config.Env, - config: config.Config, + config: config, }, nil case initUsernsSetup: return &linuxUsernsSideCar{ - config: config.Config, + config: config, }, nil case initStandard: return &linuxStandardInit{ config: config, - env: config.Env, }, nil } return nil, fmt.Errorf("unknown init type %q", t) @@ -90,7 +87,7 @@ func populateProcessEnvironment(env []string) error { // finalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaky file descriptors // before execing the command inside the namespace -func finalizeNamespace(config *configs.Config) error { +func finalizeNamespace(config *initConfig) error { // Ensure that all non-standard fds we may have accidentally // inherited are marked close-on-exec so they stay out of the // container @@ -98,7 +95,7 @@ func finalizeNamespace(config *configs.Config) error { return err } // drop capabilities in bounding set before changing user - if err := capabilities.DropBoundingSet(config.Capabilities); err != nil { + if err := capabilities.DropBoundingSet(config.Config.Capabilities); err != nil { return err } // preserve existing capabilities while we change users @@ -112,12 +109,12 @@ func finalizeNamespace(config *configs.Config) error { return err } // drop all other capabilities - if err := capabilities.DropCapabilities(config.Capabilities); err != nil { + if err := capabilities.DropCapabilities(config.Config.Capabilities); err != nil { return err } - if config.WorkingDir != "" { - if err := syscall.Chdir(config.WorkingDir); err != nil { - return fmt.Errorf("chdir to %s %s", config.WorkingDir, err) + if config.Cwd != "" { + if err := syscall.Chdir(config.Cwd); err != nil { + return err } } return nil @@ -143,7 +140,7 @@ func joinExistingNamespaces(namespaces []configs.Namespace) error { } // setupUser changes the groups, gid, and uid for the user inside the container -func setupUser(config *configs.Config) error { +func setupUser(config *initConfig) error { // Set up defaults. defaultExecUser := user.ExecUser{ Uid: syscall.Getuid(), @@ -160,22 +157,22 @@ func setupUser(config *configs.Config) error { } execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) if err != nil { - return fmt.Errorf("get supplementary groups %s", err) + return err } - suppGroups := append(execUser.Sgids, config.AdditionalGroups...) + suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...) if err := syscall.Setgroups(suppGroups); err != nil { - return fmt.Errorf("setgroups %s", err) + return err } if err := system.Setgid(execUser.Gid); err != nil { - return fmt.Errorf("setgid %s", err) + return err } if err := system.Setuid(execUser.Uid); err != nil { - return fmt.Errorf("setuid %s", err) + return err } // if we didn't get HOME already, set it based on the user's HOME if envHome := os.Getenv("HOME"); envHome == "" { if err := os.Setenv("HOME", execUser.Home); err != nil { - return fmt.Errorf("set HOME %s", err) + return err } } return nil diff --git a/linux_setns_init.go b/linux_setns_init.go index a9a1d27cd..2d91475bf 100644 --- a/linux_setns_init.go +++ b/linux_setns_init.go @@ -4,7 +4,6 @@ package libcontainer import ( "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/system" ) @@ -12,25 +11,23 @@ import ( // linuxSetnsInit performs the container's initialization for running a new process // inside an existing container. type linuxSetnsInit struct { - args []string - env []string - config *configs.Config + config *initConfig } func (l *linuxSetnsInit) Init() error { - if err := setupRlimits(l.config); err != nil { + if err := setupRlimits(l.config.Config); err != nil { return err } if err := finalizeNamespace(l.config); err != nil { return err } - if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } - if l.config.ProcessLabel != "" { - if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + if l.config.Config.ProcessLabel != "" { + if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } } - return system.Execv(l.args[0], l.args[0:], l.env) + return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env) } diff --git a/linux_standard_init.go b/linux_standard_init.go index f69a4bdb6..2fe558e75 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -16,7 +16,6 @@ import ( type linuxStandardInit struct { config *initConfig - env []string } func (l *linuxStandardInit) Init() error { @@ -74,7 +73,7 @@ func (l *linuxStandardInit) Init() error { if err != nil { return err } - if err := finalizeNamespace(l.config.Config); err != nil { + if err := finalizeNamespace(l.config); err != nil { return err } // finalizeNamespace can change user/group which clears the parent death @@ -87,5 +86,5 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() == 1 { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - return system.Execv(l.config.Args[0], l.config.Args[0:], l.env) + return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env) } diff --git a/linux_userns_init.go b/linux_userns_init.go index b44c191e2..71f2a96e3 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -6,7 +6,6 @@ import ( "syscall" "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/configs" consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/security/restrict" @@ -14,17 +13,15 @@ import ( ) type linuxUsernsInit struct { - args []string - env []string - config *configs.Config + config *initConfig } func (l *linuxUsernsInit) Init() error { // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(l.config.Namespaces); err != nil { + if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } - console := l.config.Console + console := l.config.Config.Console if console != "" { if err := consolepkg.OpenAndDup("/dev/console"); err != nil { return err @@ -38,24 +35,24 @@ func (l *linuxUsernsInit) Init() error { return err } } - if l.config.WorkingDir == "" { - l.config.WorkingDir = "/" + if l.config.Cwd == "" { + l.config.Cwd = "/" } - if err := setupRlimits(l.config); err != nil { + if err := setupRlimits(l.config.Config); err != nil { return err } - if hostname := l.config.Hostname; hostname != "" { + if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { return err } } - if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { return err } - if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } - if l.config.RestrictSys { + if l.config.Config.RestrictSys { if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { return err } @@ -77,5 +74,5 @@ func (l *linuxUsernsInit) Init() error { if syscall.Getppid() == 1 { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - return system.Execv(l.args[0], l.args[0:], l.env) + return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env) } diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go index 7ac7dc43a..514b9dd4c 100644 --- a/linux_userns_sidecar_init.go +++ b/linux_userns_sidecar_init.go @@ -15,20 +15,20 @@ import ( // except the user namespace, so it run as root in the root user namespace // to perform these operations. type linuxUsernsSideCar struct { - config *configs.Config + config *initConfig } func (l *linuxUsernsSideCar) Init() error { - if err := setupNetwork(l.config); err != nil { + if err := setupNetwork(l.config.Config); err != nil { return err } - if err := setupRoute(l.config); err != nil { + if err := setupRoute(l.config.Config); err != nil { return err } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace - if l.config.Namespaces.Contains(configs.NEWNET) { - if err := mount.InitializeMountNamespace(l.config); err != nil { + if l.config.Config.Namespaces.Contains(configs.NEWNET) { + if err := mount.InitializeMountNamespace(l.config.Config); err != nil { return err } } diff --git a/process.go b/process.go index a1528c558..d361f98fe 100644 --- a/process.go +++ b/process.go @@ -11,6 +11,13 @@ type Process struct { // Env specifies the environment variables for the process. Env []string + // User will set the uid and gid of the executing process running inside the container + // local to the contaienr's user and group configuration. + User string + + // Cwd will change the processes current working directory inside the container's rootfs. + Cwd string + // Stdin is a pointer to a reader which provides the standard input stream. Stdin io.Reader From 20daff5e2c03dfda9dad5c26a5807522141bc931 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 13:16:43 -0800 Subject: [PATCH 076/101] Move mount package into libcontainer root Signed-off-by: Michael Crosby --- mount/init.go => linux_rootfs.go | 70 ++++++++++++++++++++++++++++++-- linux_standard_init.go | 3 +- linux_userns_sidecar_init.go | 3 +- mount/msmoveroot.go | 15 ------- mount/pivotroot.go | 37 ----------------- mount/ptmx.go | 34 ---------------- mount/readonly.go | 11 ----- 7 files changed, 69 insertions(+), 104 deletions(-) rename mount/init.go => linux_rootfs.go (76%) delete mode 100644 mount/msmoveroot.go delete mode 100644 mount/pivotroot.go delete mode 100644 mount/ptmx.go delete mode 100644 mount/readonly.go diff --git a/mount/init.go b/linux_rootfs.go similarity index 76% rename from mount/init.go rename to linux_rootfs.go index a017ea519..2542dfe54 100644 --- a/mount/init.go +++ b/linux_rootfs.go @@ -1,14 +1,16 @@ // +build linux -package mount +package libcontainer import ( "fmt" + "io/ioutil" "os" "path/filepath" "syscall" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" ) @@ -22,9 +24,9 @@ type mount struct { data string } -// InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a +// setupRootfs sets up the devices, mount points, and filesystems for use inside a // new mount namespace. -func InitializeMountNamespace(config *configs.Config) (err error) { +func setupRootfs(config *configs.Config) (err error) { if err := prepareRoot(config); err != nil { return err } @@ -213,3 +215,65 @@ func prepareRoot(config *configs.Config) error { } return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") } + +func setReadonly() error { + return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") +} + +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.Rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + if config.Console != "" { + uid, err := config.HostUID() + if err != nil { + return err + } + gid, err := config.HostGID() + if err != nil { + return err + } + return console.Setup(config.Rootfs, config.Console, config.MountLabel, uid, gid) + } + return nil +} + +func pivotRoot(rootfs, pivotBaseDir string) error { + if pivotBaseDir == "" { + pivotBaseDir = "/" + } + tmpDir := filepath.Join(rootfs, pivotBaseDir) + if err := os.MkdirAll(tmpDir, 0755); err != nil { + return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err) + } + pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root") + if err != nil { + return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) + } + if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { + return fmt.Errorf("pivot_root %s", err) + } + if err := syscall.Chdir("/"); err != nil { + return fmt.Errorf("chdir / %s", err) + } + // path to pivot dir now changed, update + pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) + if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { + return fmt.Errorf("unmount pivot_root dir %s", err) + } + return os.Remove(pivotDir) +} + +func msMoveRoot(rootfs string) error { + if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { + return err + } + if err := syscall.Chroot("."); err != nil { + return err + } + return syscall.Chdir("/") +} diff --git a/linux_standard_init.go b/linux_standard_init.go index 2fe558e75..9b0ac76bf 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -9,7 +9,6 @@ import ( "github.com/docker/libcontainer/configs" consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" ) @@ -49,7 +48,7 @@ func (l *linuxStandardInit) Init() error { label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := mount.InitializeMountNamespace(l.config.Config); err != nil { + if err := setupRootfs(l.config.Config); err != nil { return err } } diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go index 514b9dd4c..24dea9ef0 100644 --- a/linux_userns_sidecar_init.go +++ b/linux_userns_sidecar_init.go @@ -5,7 +5,6 @@ package libcontainer import ( "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount" ) // linuxUsernsSideCar is run to setup mounts and networking related operations @@ -28,7 +27,7 @@ func (l *linuxUsernsSideCar) Init() error { label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace if l.config.Config.Namespaces.Contains(configs.NEWNET) { - if err := mount.InitializeMountNamespace(l.config.Config); err != nil { + if err := setupRootfs(l.config.Config); err != nil { return err } } diff --git a/mount/msmoveroot.go b/mount/msmoveroot.go deleted file mode 100644 index 17b732932..000000000 --- a/mount/msmoveroot.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build linux - -package mount - -import "syscall" - -func msMoveRoot(rootfs string) error { - if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { - return err - } - if err := syscall.Chroot("."); err != nil { - return err - } - return syscall.Chdir("/") -} diff --git a/mount/pivotroot.go b/mount/pivotroot.go deleted file mode 100644 index 3d422774e..000000000 --- a/mount/pivotroot.go +++ /dev/null @@ -1,37 +0,0 @@ -// +build linux - -package mount - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "syscall" -) - -func pivotRoot(rootfs, pivotBaseDir string) error { - if pivotBaseDir == "" { - pivotBaseDir = "/" - } - tmpDir := filepath.Join(rootfs, pivotBaseDir) - if err := os.MkdirAll(tmpDir, 0755); err != nil { - return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err) - } - pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root") - if err != nil { - return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) - } - if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { - return fmt.Errorf("pivot_root %s", err) - } - if err := syscall.Chdir("/"); err != nil { - return fmt.Errorf("chdir / %s", err) - } - // path to pivot dir now changed, update - pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) - if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { - return fmt.Errorf("unmount pivot_root dir %s", err) - } - return os.Remove(pivotDir) -} diff --git a/mount/ptmx.go b/mount/ptmx.go deleted file mode 100644 index 278f72997..000000000 --- a/mount/ptmx.go +++ /dev/null @@ -1,34 +0,0 @@ -// +build linux - -package mount - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/console" -) - -func setupPtmx(config *configs.Config) error { - ptmx := filepath.Join(config.Rootfs, "dev/ptmx") - if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { - return err - } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { - return fmt.Errorf("symlink dev ptmx %s", err) - } - if config.Console != "" { - uid, err := config.HostUID() - if err != nil { - return err - } - gid, err := config.HostGID() - if err != nil { - return err - } - return console.Setup(config.Rootfs, config.Console, config.MountLabel, uid, gid) - } - return nil -} diff --git a/mount/readonly.go b/mount/readonly.go deleted file mode 100644 index 855c9f995..000000000 --- a/mount/readonly.go +++ /dev/null @@ -1,11 +0,0 @@ -// +build linux - -package mount - -import ( - "syscall" -) - -func setReadonly() error { - return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") -} From b0e274c0d2d5993f5780e05cdf7377baa635f3fa Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 14:07:18 -0800 Subject: [PATCH 077/101] Remove console package and add Console type Signed-off-by: Michael Crosby --- console.go | 15 +++++ console/console.go | 128 ------------------------------------ linux_console.go | 145 +++++++++++++++++++++++++++++++++++++++++ linux_rootfs.go | 4 +- linux_standard_init.go | 10 +-- linux_userns_init.go | 10 +-- nsinit/exec.go | 87 ++++--------------------- nsinit/tty.go | 64 ++++++++++++++++++ 8 files changed, 247 insertions(+), 216 deletions(-) create mode 100644 console.go delete mode 100644 console/console.go create mode 100644 linux_console.go create mode 100644 nsinit/tty.go diff --git a/console.go b/console.go new file mode 100644 index 000000000..d3392ee3b --- /dev/null +++ b/console.go @@ -0,0 +1,15 @@ +package libcontainer + +import "io" + +// Console is a psuedo TTY. +type Console interface { + io.ReadWriter + io.Closer + + // Path returns the filesystem path to the slave side of the pty. + Path() string + + // Fd returns the fd for the master of the pty. + Fd() uintptr +} diff --git a/console/console.go b/console/console.go deleted file mode 100644 index 69af70c19..000000000 --- a/console/console.go +++ /dev/null @@ -1,128 +0,0 @@ -// +build linux - -package console - -import ( - "fmt" - "os" - "path/filepath" - "syscall" - "unsafe" - - "github.com/docker/libcontainer/label" -) - -// Setup initializes the proper /dev/console inside the rootfs path -func Setup(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { - oldMask := syscall.Umask(0000) - defer syscall.Umask(oldMask) - - if err := os.Chmod(consolePath, 0600); err != nil { - return err - } - - if err := os.Chown(consolePath, hostRootUid, hostRootGid); err != nil { - return err - } - - if err := label.SetFileLabel(consolePath, mountLabel); err != nil { - return fmt.Errorf("set file label %s %s", consolePath, err) - } - - dest := filepath.Join(rootfs, "dev/console") - - f, err := os.Create(dest) - if err != nil && !os.IsExist(err) { - return fmt.Errorf("create %s %s", dest, err) - } - - if f != nil { - f.Close() - } - - if err := syscall.Mount(consolePath, dest, "bind", syscall.MS_BIND, ""); err != nil { - return fmt.Errorf("bind %s to %s %s", consolePath, dest, err) - } - - return nil -} - -func OpenAndDup(consolePath string) error { - slave, err := OpenTerminal(consolePath, syscall.O_RDWR) - if err != nil { - return fmt.Errorf("open terminal %s", err) - } - - if err := syscall.Dup2(int(slave.Fd()), 0); err != nil { - return err - } - - if err := syscall.Dup2(int(slave.Fd()), 1); err != nil { - return err - } - - return syscall.Dup2(int(slave.Fd()), 2) -} - -// Unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. -// Unlockpt should be called before opening the slave side of a pseudoterminal. -func Unlockpt(f *os.File) error { - var u int32 - - return Ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) -} - -// Ptsname retrieves the name of the first available pts for the given master. -func Ptsname(f *os.File) (string, error) { - var n int32 - - if err := Ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { - return "", err - } - - return fmt.Sprintf("/dev/pts/%d", n), nil -} - -// CreateMasterAndConsole will open /dev/ptmx on the host and retreive the -// pts name for use as the pty slave inside the container -func CreateMasterAndConsole() (*os.File, string, error) { - master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) - if err != nil { - return nil, "", err - } - - console, err := Ptsname(master) - if err != nil { - return nil, "", err - } - - if err := Unlockpt(master); err != nil { - return nil, "", err - } - - return master, console, nil -} - -// OpenPtmx opens /dev/ptmx, i.e. the PTY master. -func OpenPtmx() (*os.File, error) { - // O_NOCTTY and O_CLOEXEC are not present in os package so we use the syscall's one for all. - return os.OpenFile("/dev/ptmx", syscall.O_RDONLY|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) -} - -// OpenTerminal is a clone of os.OpenFile without the O_CLOEXEC -// used to open the pty slave inside the container namespace -func OpenTerminal(name string, flag int) (*os.File, error) { - r, e := syscall.Open(name, flag, 0) - if e != nil { - return nil, &os.PathError{Op: "open", Path: name, Err: e} - } - return os.NewFile(uintptr(r), name), nil -} - -func Ioctl(fd uintptr, flag, data uintptr) error { - if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { - return err - } - - return nil -} diff --git a/linux_console.go b/linux_console.go new file mode 100644 index 000000000..f1eeaedfa --- /dev/null +++ b/linux_console.go @@ -0,0 +1,145 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + "unsafe" + + "github.com/docker/libcontainer/label" +) + +func NewConsole() (Console, error) { + master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + console, err := ptsname(master) + if err != nil { + return nil, err + } + if err := unlockpt(master); err != nil { + return nil, err + } + return &linuxConsole{ + slavePath: console, + master: master, + }, nil +} + +// newConsoleFromPath is an internal fucntion returning an initialzied console for use inside +// a container's MNT namespace. +func newConsoleFromPath(slavePath string) *linuxConsole { + return &linuxConsole{ + slavePath: slavePath, + } +} + +// linuxConsole is a linux psuedo TTY for use within a container. +type linuxConsole struct { + master *os.File + slavePath string +} + +func (c *linuxConsole) Fd() uintptr { + return c.master.Fd() +} + +func (c *linuxConsole) Path() string { + return c.slavePath +} + +func (c *linuxConsole) Read(b []byte) (int, error) { + return c.master.Read(b) +} + +func (c *linuxConsole) Write(b []byte) (int, error) { + return c.master.Write(b) +} + +func (c *linuxConsole) Close() error { + if m := c.master; m != nil { + return m.Close() + } + return nil +} + +// mount initializes the console inside the rootfs mounting with the specified mount label +// and applying the correct ownership of the console. +func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { + oldMask := syscall.Umask(0000) + defer syscall.Umask(oldMask) + if err := os.Chmod(c.slavePath, 0600); err != nil { + return err + } + if err := os.Chown(c.slavePath, uid, gid); err != nil { + return err + } + if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { + return err + } + dest := filepath.Join(rootfs, "dev/console") + f, err := os.Create(dest) + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { + f.Close() + } + return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") +} + +// dupStdio opens the slavePath for the console and dup2s the fds to the current +// processes stdio, fd 0,1,2. +func (c *linuxConsole) dupStdio() error { + slave, err := c.open(syscall.O_RDWR) + if err != nil { + return err + } + fd := int(slave.Fd()) + for _, i := range []int{0, 1, 2} { + if err := syscall.Dup2(fd, i); err != nil { + return err + } + } + return nil +} + +// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave. +func (c *linuxConsole) open(flag int) (*os.File, error) { + r, e := syscall.Open(c.slavePath, flag, 0) + if e != nil { + return nil, &os.PathError{ + Op: "open", + Path: c.slavePath, + Err: e, + } + } + return os.NewFile(uintptr(r), c.slavePath), nil +} + +func ioctl(fd uintptr, flag, data uintptr) error { + if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + var n int32 + if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/linux_rootfs.go b/linux_rootfs.go index 2542dfe54..c1bf4572e 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -10,7 +10,6 @@ import ( "syscall" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" ) @@ -237,7 +236,8 @@ func setupPtmx(config *configs.Config) error { if err != nil { return err } - return console.Setup(config.Rootfs, config.Console, config.MountLabel, uid, gid) + console := newConsoleFromPath(config.Console) + return console.mount(config.Rootfs, config.MountLabel, uid, gid) } return nil } diff --git a/linux_standard_init.go b/linux_standard_init.go index 9b0ac76bf..c667d0c35 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -7,7 +7,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/configs" - consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" @@ -22,16 +21,17 @@ func (l *linuxStandardInit) Init() error { if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } - console := l.config.Config.Console - if console != "" { - if err := consolepkg.OpenAndDup(console); err != nil { + consolePath := l.config.Config.Console + if consolePath != "" { + console := newConsoleFromPath(consolePath) + if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } - if console != "" { + if consolePath != "" { if err := system.Setctty(); err != nil { return err } diff --git a/linux_userns_init.go b/linux_userns_init.go index 71f2a96e3..2c32f2741 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -6,7 +6,6 @@ import ( "syscall" "github.com/docker/libcontainer/apparmor" - consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" @@ -21,16 +20,17 @@ func (l *linuxUsernsInit) Init() error { if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { return err } - console := l.config.Config.Console - if console != "" { - if err := consolepkg.OpenAndDup("/dev/console"); err != nil { + consolePath := l.config.Config.Console + if consolePath != "" { + console := newConsoleFromPath(consolePath) + if err := console.dupStdio(); err != nil { return err } } if _, err := syscall.Setsid(); err != nil { return err } - if console != "" { + if consolePath != "" { if err := system.Setctty(); err != nil { return err } diff --git a/nsinit/exec.go b/nsinit/exec.go index d12f3638b..c7635e34b 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -1,55 +1,19 @@ package main import ( - "io" "os" "os/signal" "syscall" "github.com/codegangsta/cli" - "github.com/docker/docker/pkg/term" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/configs" - consolepkg "github.com/docker/libcontainer/console" + "github.com/docker/libcontainer/utils" ) -type tty struct { - master *os.File - console string - state *term.State -} - -func (t *tty) Close() error { - if t.master != nil { - t.master.Close() - } - if t.state != nil { - term.RestoreTerminal(os.Stdin.Fd(), t.state) - } - return nil -} - -func (t *tty) set(config *configs.Config) { - config.Console = t.console -} - -func (t *tty) attach(process *libcontainer.Process) { - if t.master != nil { - process.Stderr = nil - process.Stdout = nil - process.Stdin = nil - } -} - -func (t *tty) resize() error { - if t.master == nil { - return nil - } - ws, err := term.GetWinsize(os.Stdin.Fd()) - if err != nil { - return err - } - return term.SetWinsize(t.master.Fd(), ws) +var standardEnvironment = &cli.StringSlice{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=nsinit", + "TERM=xterm", } var execCommand = cli.Command{ @@ -60,6 +24,8 @@ var execCommand = cli.Command{ cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, + cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"}, + cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"}, }, } @@ -81,7 +47,7 @@ func execAction(context *cli.Context) { if err != nil { fatal(err) } - tty.set(config) + config.Console = tty.console.Path() if container, err = factory.Create(context.String("id"), config); err != nil { fatal(err) } @@ -89,6 +55,8 @@ func execAction(context *cli.Context) { go handleSignals(container, tty) process := &libcontainer.Process{ Args: context.Args(), + Env: context.StringSlice("env"), + User: context.String("user"), Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, @@ -109,19 +77,7 @@ func execAction(context *cli.Context) { if err := container.Destroy(); err != nil { fatal(err) } - exit(status.Sys().(syscall.WaitStatus)) -} - -func exit(status syscall.WaitStatus) { - var exitCode int - if status.Exited() { - exitCode = status.ExitStatus() - } else if status.Signaled() { - exitCode = -int(status.Signal()) - } else { - fatalf("Unexpected status") - } - os.Exit(exitCode) + os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus))) } func handleSignals(container libcontainer.Container, tty *tty) { @@ -137,24 +93,3 @@ func handleSignals(container libcontainer.Container, tty *tty) { } } } - -func newTty(context *cli.Context) (*tty, error) { - if context.Bool("tty") { - master, console, err := consolepkg.CreateMasterAndConsole() - if err != nil { - return nil, err - } - go io.Copy(master, os.Stdin) - go io.Copy(os.Stdout, master) - state, err := term.SetRawTerminal(os.Stdin.Fd()) - if err != nil { - return nil, err - } - return &tty{ - master: master, - console: console, - state: state, - }, nil - } - return &tty{}, nil -} diff --git a/nsinit/tty.go b/nsinit/tty.go new file mode 100644 index 000000000..f1e593740 --- /dev/null +++ b/nsinit/tty.go @@ -0,0 +1,64 @@ +package main + +import ( + "io" + "os" + + "github.com/codegangsta/cli" + "github.com/docker/docker/pkg/term" + "github.com/docker/libcontainer" +) + +func newTty(context *cli.Context) (*tty, error) { + if context.Bool("tty") { + console, err := libcontainer.NewConsole() + if err != nil { + return nil, err + } + go io.Copy(console, os.Stdin) + go io.Copy(os.Stdout, console) + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + return nil, err + } + return &tty{ + console: console, + state: state, + }, nil + } + return &tty{}, nil +} + +type tty struct { + console libcontainer.Console + state *term.State +} + +func (t *tty) Close() error { + if t.console != nil { + t.console.Close() + } + if t.state != nil { + term.RestoreTerminal(os.Stdin.Fd(), t.state) + } + return nil +} + +func (t *tty) attach(process *libcontainer.Process) { + if t.console != nil { + process.Stderr = nil + process.Stdout = nil + process.Stdin = nil + } +} + +func (t *tty) resize() error { + if t.console == nil { + return nil + } + ws, err := term.GetWinsize(os.Stdin.Fd()) + if err != nil { + return err + } + return term.SetWinsize(t.console.Fd(), ws) +} From 1c895b409acc5c3425963e1a2472c0221dc0c22f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 14:22:52 -0800 Subject: [PATCH 078/101] Move mount logic into root package Signed-off-by: Michael Crosby --- configs/mount.go | 122 ----------------------------------------------- linux_rootfs.go | 102 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 126 deletions(-) diff --git a/configs/mount.go b/configs/mount.go index 5a26a287e..2f20de9ed 100644 --- a/configs/mount.go +++ b/configs/mount.go @@ -1,17 +1,5 @@ package configs -import ( - "fmt" - "os" - "path/filepath" - "syscall" - - "github.com/docker/docker/pkg/symlink" - "github.com/docker/libcontainer/label" -) - -const DefaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV - type Mount struct { Type string `json:"type,omitempty"` Source string `json:"source,omitempty"` // Source path, in the host namespace @@ -21,113 +9,3 @@ type Mount struct { Private bool `json:"private,omitempty"` Slave bool `json:"slave,omitempty"` } - -func (m *Mount) Mount(rootfs, mountLabel string) error { - switch m.Type { - case "bind": - return m.bindMount(rootfs, mountLabel) - case "tmpfs": - return m.tmpfsMount(rootfs, mountLabel) - default: - return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination) - } -} - -func (m *Mount) bindMount(rootfs, mountLabel string) error { - var ( - flags = syscall.MS_BIND | syscall.MS_REC - dest = filepath.Join(rootfs, m.Destination) - ) - - if !m.Writable { - flags = flags | syscall.MS_RDONLY - } - - if m.Slave { - flags = flags | syscall.MS_SLAVE - } - - stat, err := os.Stat(m.Source) - if err != nil { - return err - } - - // TODO: (crosbymichael) This does not belong here and should be done a layer above - dest, err = symlink.FollowSymlinkInScope(dest, rootfs) - if err != nil { - return err - } - - if err := createIfNotExists(dest, stat.IsDir()); err != nil { - return fmt.Errorf("creating new bind mount target %s", err) - } - - if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil { - return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err) - } - - if !m.Writable { - if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil { - return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err) - } - } - - if m.Relabel != "" { - if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { - return fmt.Errorf("relabeling %s to %s %s", m.Source, mountLabel, err) - } - } - - if m.Private { - if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { - return fmt.Errorf("mounting %s private %s", dest, err) - } - } - - return nil -} - -func (m *Mount) tmpfsMount(rootfs, mountLabel string) error { - var ( - err error - l = label.FormatMountLabel("", mountLabel) - dest = filepath.Join(rootfs, m.Destination) - ) - - // TODO: (crosbymichael) This does not belong here and should be done a layer above - if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { - return err - } - - if err := createIfNotExists(dest, true); err != nil { - return fmt.Errorf("creating new tmpfs mount target %s", err) - } - - if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(DefaultMountFlags), l); err != nil { - return fmt.Errorf("%s mounting %s in tmpfs", err, dest) - } - - return nil -} - -func createIfNotExists(path string, isDir bool) error { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { - return err - } - f.Close() - } - } - } - return nil -} diff --git a/linux_rootfs.go b/linux_rootfs.go index c1bf4572e..1f0259c34 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -9,6 +9,7 @@ import ( "path/filepath" "syscall" + "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" ) @@ -34,7 +35,7 @@ func setupRootfs(config *configs.Config) (err error) { } // apply any user specified mounts within the new mount namespace for _, m := range config.Mounts { - if err := m.Mount(config.Rootfs, config.MountLabel); err != nil { + if err := mountUserMount(m, config.Rootfs, config.MountLabel); err != nil { return err } } @@ -68,7 +69,7 @@ func setupRootfs(config *configs.Config) (err error) { } if config.Readonlyfs { if err := setReadonly(); err != nil { - return fmt.Errorf("set readonly %s", err) + return err } } syscall.Umask(0022) @@ -80,10 +81,10 @@ func setupRootfs(config *configs.Config) (err error) { func mountSystem(config *configs.Config) error { for _, m := range newSystemMounts(config.Rootfs, config.MountLabel, config.RestrictSys) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { - return fmt.Errorf("mkdirall %s %s", m.path, err) + return err } if err := syscall.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { - return fmt.Errorf("mounting %s into %s %s", m.source, m.path, err) + return err } } return nil @@ -277,3 +278,96 @@ func msMoveRoot(rootfs string) error { } return syscall.Chdir("/") } + +func mountUserMount(m *configs.Mount, rootfs, mountLabel string) error { + switch m.Type { + case "bind": + return bindMount(m, rootfs, mountLabel) + case "tmpfs": + return tmpfsMount(m, rootfs, mountLabel) + default: + return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination) + } +} + +func bindMount(m *configs.Mount, rootfs, mountLabel string) error { + var ( + flags = syscall.MS_BIND | syscall.MS_REC + dest = filepath.Join(rootfs, m.Destination) + ) + if !m.Writable { + flags = flags | syscall.MS_RDONLY + } + if m.Slave { + flags = flags | syscall.MS_SLAVE + } + stat, err := os.Stat(m.Source) + if err != nil { + return err + } + // TODO: (crosbymichael) This does not belong here and should be done a layer above + dest, err = symlink.FollowSymlinkInScope(dest, rootfs) + if err != nil { + return err + } + if err := createIfNotExists(dest, stat.IsDir()); err != nil { + return fmt.Errorf("creating new bind mount target %s", err) + } + if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil { + return err + } + if !m.Writable { + if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil { + return err + } + } + if m.Relabel != "" { + if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { + return err + } + } + if m.Private { + if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { + return err + } + } + return nil +} + +func tmpfsMount(m *configs.Mount, rootfs, mountLabel string) error { + var ( + err error + l = label.FormatMountLabel("", mountLabel) + dest = filepath.Join(rootfs, m.Destination) + ) + // TODO: (crosbymichael) This does not belong here and should be done a layer above + if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { + return err + } + if err := createIfNotExists(dest, true); err != nil { + return err + } + return syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l) +} + +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + } + return nil +} From 6a04779b415109a8bc78764bd00660e32c432f8e Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 14:42:21 -0800 Subject: [PATCH 079/101] Remove restrict package Signed-off-by: Michael Crosby --- linux_rootfs.go | 51 ++++++++++++++++++++++++------ linux_standard_init.go | 8 +++-- linux_userns_init.go | 8 +++-- security/restrict/restrict.go | 53 -------------------------------- security/restrict/unsupported.go | 9 ------ 5 files changed, 53 insertions(+), 76 deletions(-) delete mode 100644 security/restrict/restrict.go delete mode 100644 security/restrict/unsupported.go diff --git a/linux_rootfs.go b/linux_rootfs.go index 1f0259c34..86491480a 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "syscall" + "time" "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/configs" @@ -350,24 +351,54 @@ func tmpfsMount(m *configs.Mount, rootfs, mountLabel string) error { return syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l) } +// createIfNotExists creates a file or a directory only if it does not already exist. func createIfNotExists(path string, isDir bool) error { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { + return os.MkdirAll(path, 0755) + } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + return nil +} + +// remountReadonly will bind over the top of an existing path and ensure that it is read-only. +func remountReadonly(path string) error { + for i := 0; i < 5; i++ { + if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { + switch err { + case syscall.EINVAL: + // Probably not a mountpoint, use bind-mount + if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { return err } - f.Close() + return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") + case syscall.EBUSY: + time.Sleep(100 * time.Millisecond) + continue + default: + return err } } + return nil + } + return fmt.Errorf("unable to mount %s as readonly max retries reached", path) +} + +// maskProckcore bind mounts /dev/null over the top of /proc/kcore inside a container to avoid security +// issues from processes reading memory information. +func maskProckcore() error { + if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) } return nil } diff --git a/linux_standard_init.go b/linux_standard_init.go index c667d0c35..2cf7a9f23 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -8,7 +8,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" ) @@ -64,7 +63,12 @@ func (l *linuxStandardInit) Init() error { return err } if l.config.Config.RestrictSys { - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { + if err := remountReadonly(path); err != nil { + return err + } + } + if err := maskProckcore(); err != nil { return err } } diff --git a/linux_userns_init.go b/linux_userns_init.go index 2c32f2741..a898f2d28 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -7,7 +7,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/system" ) @@ -53,7 +52,12 @@ func (l *linuxUsernsInit) Init() error { return err } if l.config.Config.RestrictSys { - if err := restrict.Restrict("proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"); err != nil { + for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { + if err := remountReadonly(path); err != nil { + return err + } + } + if err := maskProckcore(); err != nil { return err } } diff --git a/security/restrict/restrict.go b/security/restrict/restrict.go deleted file mode 100644 index dd765b1f1..000000000 --- a/security/restrict/restrict.go +++ /dev/null @@ -1,53 +0,0 @@ -// +build linux - -package restrict - -import ( - "fmt" - "os" - "syscall" - "time" -) - -const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV - -func mountReadonly(path string) error { - for i := 0; i < 5; i++ { - if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) { - switch err { - case syscall.EINVAL: - // Probably not a mountpoint, use bind-mount - if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil { - return err - } - - return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "") - case syscall.EBUSY: - time.Sleep(100 * time.Millisecond) - continue - default: - return err - } - } - - return nil - } - - return fmt.Errorf("unable to mount %s as readonly max retries reached", path) -} - -// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). -// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). -func Restrict(mounts ...string) error { - for _, dest := range mounts { - if err := mountReadonly(dest); err != nil { - return fmt.Errorf("unable to remount %s readonly: %s", dest, err) - } - } - - if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) - } - - return nil -} diff --git a/security/restrict/unsupported.go b/security/restrict/unsupported.go deleted file mode 100644 index 464e8d498..000000000 --- a/security/restrict/unsupported.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build !linux - -package restrict - -import "fmt" - -func Restrict() error { - return fmt.Errorf("not supported") -} From ad49d71504abdcfb6eb0bf38187c647c2bf30662 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 15:16:27 -0800 Subject: [PATCH 080/101] Remove network package Also add ability to get network stats from multiple interfaces. Signed-off-by: Michael Crosby --- container.go | 7 -- linux_container.go | 14 ++- linux_init.go | 17 ++-- linux_network.go | 201 ++++++++++++++++++++++++++++++++++++++++++++ linux_process.go | 3 +- network/loopback.go | 25 ------ network/network.go | 117 -------------------------- network/stats.go | 70 --------------- network/strategy.go | 36 -------- network/veth.go | 85 ------------------- stats.go | 22 +++++ 11 files changed, 239 insertions(+), 358 deletions(-) create mode 100644 linux_network.go delete mode 100644 network/loopback.go delete mode 100644 network/network.go delete mode 100644 network/stats.go delete mode 100644 network/strategy.go delete mode 100644 network/veth.go create mode 100644 stats.go diff --git a/container.go b/container.go index 9db1e297b..7c9a3308e 100644 --- a/container.go +++ b/container.go @@ -6,16 +6,9 @@ package libcontainer import ( "os" - "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/network" ) -type Stats struct { - NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` - CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` -} - // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can diff --git a/linux_container.go b/linux_container.go index 8f733d475..06f90b3c5 100644 --- a/linux_container.go +++ b/linux_container.go @@ -10,14 +10,9 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/network" "github.com/golang/glog" ) -type pid struct { - Pid int `json:"Pid"` -} - type linuxContainer struct { id string root string @@ -73,13 +68,14 @@ func (c *linuxContainer) Stats() (*Stats, error) { if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } - // TODO: handle stats for multiple veth interfaces for _, iface := range c.config.Networks { - if iface.Type == "veth" { - if stats.NetworkStats, err = network.GetStats(iface.VethHost); err != nil { + switch iface.Type { + case "veth": + istats, err := getNetworkInterfaceStats(iface.VethHost) + if err != nil { return stats, newGenericError(err, SystemError) } - break + stats.Interfaces = append(stats.Interfaces, istats) } } return stats, nil diff --git a/linux_init.go b/linux_init.go index 9b28af639..663bfa6d5 100644 --- a/linux_init.go +++ b/linux_init.go @@ -11,7 +11,6 @@ import ( "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/security/capabilities" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" @@ -27,13 +26,17 @@ const ( initUsernsSetup initType = "userns_setup" ) +type pid struct { + Pid int `json:"pid"` +} + // Process is used for transferring parameters from Exec() to Init() type initConfig struct { - Args []string `json:"args,omitempty"` - Env []string `json:"env,omitempty"` - Cwd string `json:"cwd,omitempty"` - User string `json:"user,omitempty"` - Config *configs.Config `json:"config,omitempty"` + Args []string `json:"args"` + Env []string `json:"env"` + Cwd string `json:"cwd"` + User string `json:"user"` + Config *configs.Config `json:"config"` } type initer interface { @@ -183,7 +186,7 @@ func setupUser(config *initConfig) error { // setting the MTU and IP address along with the default gateway func setupNetwork(config *configs.Config) error { for _, config := range config.Networks { - strategy, err := network.GetStrategy(config.Type) + strategy, err := getStrategy(config.Type) if err != nil { return err } diff --git a/linux_network.go b/linux_network.go new file mode 100644 index 000000000..cc199be41 --- /dev/null +++ b/linux_network.go @@ -0,0 +1,201 @@ +// +build linux + +package libcontainer + +import ( + "errors" + "fmt" + "io/ioutil" + "net" + "path/filepath" + "strconv" + "strings" + + "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/netlink" +) + +const defaultVethInterfaceName = "eth0" + +var ( + ErrNotValidStrategyType = errors.New("not a valid network strategy type") +) + +var strategies = map[string]networkStrategy{ + "veth": &veth{}, + "loopback": &loopback{}, +} + +// networkStrategy represents a specific network configuration for +// a container's networking stack +type networkStrategy interface { + Create(*configs.Network, int) error + Initialize(*configs.Network) error +} + +// getStrategy returns the specific network strategy for the +// provided type. If no strategy is registered for the type an +// ErrNotValidStrategyType is returned. +func getStrategy(tpe string) (networkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, ErrNotValidStrategyType + } + return s, nil +} + +// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. +func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) { + out := &NetworkInterface{Name: interfaceName} + // This can happen if the network runtime information is missing - possible if the + // container was created by an old version of libcontainer. + if interfaceName == "" { + return out, nil + } + type netStatsPair struct { + // Where to write the output. + Out *uint64 + // The network stats file to read. + File string + } + // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. + netStats := []netStatsPair{ + {Out: &out.RxBytes, File: "tx_bytes"}, + {Out: &out.RxPackets, File: "tx_packets"}, + {Out: &out.RxErrors, File: "tx_errors"}, + {Out: &out.RxDropped, File: "tx_dropped"}, + + {Out: &out.TxBytes, File: "rx_bytes"}, + {Out: &out.TxPackets, File: "rx_packets"}, + {Out: &out.TxErrors, File: "rx_errors"}, + {Out: &out.TxDropped, File: "rx_dropped"}, + } + for _, netStat := range netStats { + data, err := readSysfsNetworkStats(interfaceName, netStat.File) + if err != nil { + return nil, err + } + *(netStat.Out) = data + } + return out, nil +} + +// Reads the specified statistics available under /sys/class/net//statistics +func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { + data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) +} + +// loopback is a network strategy that provides a basic loopback device +type loopback struct { +} + +func (l *loopback) Create(n *configs.Network, nspid int) error { + return nil +} + +func (l *loopback) Initialize(config *configs.Network) error { + iface, err := net.InterfaceByName("lo") + if err != nil { + return err + } + return netlink.NetworkLinkUp(iface) +} + +// veth is a network strategy that uses a bridge and creates +// a veth pair, one that stays outside on the host and the other +// is placed inside the container's namespace +type veth struct { +} + +func (v *veth) Create(n *configs.Network, nspid int) error { + if n.Bridge == "" { + return fmt.Errorf("bridge is not specified") + } + bridge, err := net.InterfaceByName(n.Bridge) + if err != nil { + return err + } + if err := netlink.NetworkCreateVethPair(n.VethHost, n.VethChild, n.TxQueueLen); err != nil { + return err + } + host, err := net.InterfaceByName(n.VethHost) + if err != nil { + return err + } + if err := netlink.AddToBridge(host, bridge); err != nil { + return err + } + if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil { + return err + } + if err := netlink.NetworkLinkUp(host); err != nil { + return err + } + child, err := net.InterfaceByName(n.VethChild) + if err != nil { + return err + } + return netlink.NetworkSetNsPid(child, nspid) +} + +func (v *veth) Initialize(config *configs.Network) error { + vethChild := config.VethChild + if vethChild == "" { + return fmt.Errorf("vethChild is not specified") + } + child, err := net.InterfaceByName(vethChild) + if err != nil { + return err + } + if err := netlink.NetworkLinkDown(child); err != nil { + return err + } + if err := netlink.NetworkChangeName(child, defaultVethInterfaceName); err != nil { + return err + } + // get the interface again after we changed the name as the index also changes. + if child, err = net.InterfaceByName(defaultVethInterfaceName); err != nil { + return err + } + if config.MacAddress != "" { + if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil { + return err + } + } + ip, ipNet, err := net.ParseCIDR(config.Address) + if err != nil { + return err + } + if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { + return err + } + if config.IPv6Address != "" { + if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil { + return err + } + if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil { + return err + } + } + if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil { + return err + } + if err := netlink.NetworkLinkUp(child); err != nil { + return err + } + if config.Gateway != "" { + if err := netlink.AddDefaultGw(config.Gateway, defaultVethInterfaceName); err != nil { + return err + } + } + if config.IPv6Gateway != "" { + if err := netlink.AddDefaultGw(config.IPv6Gateway, defaultVethInterfaceName); err != nil { + return err + } + } + return nil +} diff --git a/linux_process.go b/linux_process.go index 0de894abc..04c0e5d65 100644 --- a/linux_process.go +++ b/linux_process.go @@ -12,7 +12,6 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/system" "github.com/golang/glog" ) @@ -236,7 +235,7 @@ func (p *initProcess) sendConfig() error { func (p *initProcess) createNetworkInterfaces() error { for _, config := range p.config.Config.Networks { - strategy, err := network.GetStrategy(config.Type) + strategy, err := getStrategy(config.Type) if err != nil { return err } diff --git a/network/loopback.go b/network/loopback.go deleted file mode 100644 index 11db88851..000000000 --- a/network/loopback.go +++ /dev/null @@ -1,25 +0,0 @@ -// +build linux - -package network - -import ( - "fmt" - - "github.com/docker/libcontainer/configs" -) - -// Loopback is a network strategy that provides a basic loopback device -type Loopback struct { -} - -func (l *Loopback) Create(n *configs.Network, nspid int) error { - return nil -} - -func (l *Loopback) Initialize(config *configs.Network) error { - // Do not set the MTU on the loopback interface - use the default. - if err := InterfaceUp("lo"); err != nil { - return fmt.Errorf("lo up %s", err) - } - return nil -} diff --git a/network/network.go b/network/network.go deleted file mode 100644 index 40b25b135..000000000 --- a/network/network.go +++ /dev/null @@ -1,117 +0,0 @@ -// +build linux - -package network - -import ( - "net" - - "github.com/docker/libcontainer/netlink" -) - -func InterfaceUp(name string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkLinkUp(iface) -} - -func InterfaceDown(name string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkLinkDown(iface) -} - -func ChangeInterfaceName(old, newName string) error { - iface, err := net.InterfaceByName(old) - if err != nil { - return err - } - return netlink.NetworkChangeName(iface, newName) -} - -func CreateVethPair(name1, name2 string, txQueueLen int) error { - return netlink.NetworkCreateVethPair(name1, name2, txQueueLen) -} - -func SetInterfaceInNamespacePid(name string, nsPid int) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetNsPid(iface, nsPid) -} - -func SetInterfaceInNamespaceFd(name string, fd uintptr) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetNsFd(iface, int(fd)) -} - -func SetInterfaceMaster(name, master string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - masterIface, err := net.InterfaceByName(master) - if err != nil { - return err - } - return netlink.AddToBridge(iface, masterIface) -} - -func SetDefaultGateway(ip, ifaceName string) error { - return netlink.AddDefaultGw(ip, ifaceName) -} - -func SetInterfaceMac(name string, macaddr string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetMacAddress(iface, macaddr) -} - -func SetInterfaceIp(name string, rawIp string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - ip, ipNet, err := net.ParseCIDR(rawIp) - if err != nil { - return err - } - return netlink.NetworkLinkAddIp(iface, ip, ipNet) -} - -func DeleteInterfaceIp(name string, rawIp string) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - ip, ipNet, err := net.ParseCIDR(rawIp) - if err != nil { - return err - } - return netlink.NetworkLinkDelIp(iface, ip, ipNet) -} - -func SetMtu(name string, mtu int) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.NetworkSetMTU(iface, mtu) -} - -func SetHairpinMode(name string, enabled bool) error { - iface, err := net.InterfaceByName(name) - if err != nil { - return err - } - return netlink.SetHairpinMode(iface, enabled) -} diff --git a/network/stats.go b/network/stats.go deleted file mode 100644 index 1d7cfe77c..000000000 --- a/network/stats.go +++ /dev/null @@ -1,70 +0,0 @@ -package network - -import ( - "io/ioutil" - "path/filepath" - "strconv" - "strings" -) - -type NetworkStats struct { - RxBytes uint64 `json:"rx_bytes"` - RxPackets uint64 `json:"rx_packets"` - RxErrors uint64 `json:"rx_errors"` - RxDropped uint64 `json:"rx_dropped"` - TxBytes uint64 `json:"tx_bytes"` - TxPackets uint64 `json:"tx_packets"` - TxErrors uint64 `json:"tx_errors"` - TxDropped uint64 `json:"tx_dropped"` -} - -// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func GetStats(vethHostInterface string) (*NetworkStats, error) { - // This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer. - if vethHostInterface == "" { - return &NetworkStats{}, nil - } - out := &NetworkStats{} - type netStatsPair struct { - // Where to write the output. - Out *uint64 - - // The network stats file to read. - File string - } - // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. - netStats := []netStatsPair{ - {Out: &out.RxBytes, File: "tx_bytes"}, - {Out: &out.RxPackets, File: "tx_packets"}, - {Out: &out.RxErrors, File: "tx_errors"}, - {Out: &out.RxDropped, File: "tx_dropped"}, - - {Out: &out.TxBytes, File: "rx_bytes"}, - {Out: &out.TxPackets, File: "rx_packets"}, - {Out: &out.TxErrors, File: "rx_errors"}, - {Out: &out.TxDropped, File: "rx_dropped"}, - } - for _, netStat := range netStats { - data, err := readSysfsNetworkStats(vethHostInterface, netStat.File) - if err != nil { - return nil, err - } - *(netStat.Out) = data - } - return out, nil -} - -// Reads the specified statistics available under /sys/class/net//statistics -func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { - fullPath := filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile) - data, err := ioutil.ReadFile(fullPath) - if err != nil { - return 0, err - } - value, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) - if err != nil { - return 0, err - } - - return value, err -} diff --git a/network/strategy.go b/network/strategy.go deleted file mode 100644 index 8631c11d8..000000000 --- a/network/strategy.go +++ /dev/null @@ -1,36 +0,0 @@ -// +build linux - -package network - -import ( - "errors" - - "github.com/docker/libcontainer/configs" -) - -var ( - ErrNotValidStrategyType = errors.New("not a valid network strategy type") -) - -var strategies = map[string]NetworkStrategy{ - "veth": &Veth{}, - "loopback": &Loopback{}, -} - -// NetworkStrategy represents a specific network configuration for -// a container's networking stack -type NetworkStrategy interface { - Create(*configs.Network, int) error - Initialize(*configs.Network) error -} - -// GetStrategy returns the specific network strategy for the -// provided type. If no strategy is registered for the type an -// ErrNotValidStrategyType is returned. -func GetStrategy(tpe string) (NetworkStrategy, error) { - s, exists := strategies[tpe] - if !exists { - return nil, ErrNotValidStrategyType - } - return s, nil -} diff --git a/network/veth.go b/network/veth.go deleted file mode 100644 index 5d554e8b7..000000000 --- a/network/veth.go +++ /dev/null @@ -1,85 +0,0 @@ -// +build linux - -package network - -import ( - "fmt" - - "github.com/docker/libcontainer/configs" -) - -// Veth is a network strategy that uses a bridge and creates -// a veth pair, one that stays outside on the host and the other -// is placed inside the container's namespace -type Veth struct { -} - -const defaultDevice = "eth0" - -func (v *Veth) Create(n *configs.Network, nspid int) error { - var ( - bridge = n.Bridge - txQueueLen = n.TxQueueLen - ) - if bridge == "" { - return fmt.Errorf("bridge is not specified") - } - if err := CreateVethPair(n.VethHost, n.VethChild, txQueueLen); err != nil { - return err - } - if err := SetInterfaceMaster(n.VethHost, bridge); err != nil { - return err - } - if err := SetMtu(n.VethHost, n.Mtu); err != nil { - return err - } - if err := InterfaceUp(n.VethHost); err != nil { - return err - } - return SetInterfaceInNamespacePid(n.VethChild, nspid) - return nil -} - -func (v *Veth) Initialize(config *configs.Network) error { - vethChild := config.VethChild - if vethChild == "" { - return fmt.Errorf("vethChild is not specified") - } - if err := InterfaceDown(vethChild); err != nil { - return fmt.Errorf("interface down %s %s", vethChild, err) - } - if err := ChangeInterfaceName(vethChild, defaultDevice); err != nil { - return fmt.Errorf("change %s to %s %s", vethChild, defaultDevice, err) - } - if config.MacAddress != "" { - if err := SetInterfaceMac(defaultDevice, config.MacAddress); err != nil { - return fmt.Errorf("set %s mac %s", defaultDevice, err) - } - } - if err := SetInterfaceIp(defaultDevice, config.Address); err != nil { - return fmt.Errorf("set %s ip %s", defaultDevice, err) - } - if config.IPv6Address != "" { - if err := SetInterfaceIp(defaultDevice, config.IPv6Address); err != nil { - return fmt.Errorf("set %s ipv6 %s", defaultDevice, err) - } - } - - if err := SetMtu(defaultDevice, config.Mtu); err != nil { - return fmt.Errorf("set %s mtu to %d %s", defaultDevice, config.Mtu, err) - } - if err := InterfaceUp(defaultDevice); err != nil { - return fmt.Errorf("%s up %s", defaultDevice, err) - } - if config.Gateway != "" { - if err := SetDefaultGateway(config.Gateway, defaultDevice); err != nil { - return fmt.Errorf("set gateway to %s on device %s failed with %s", config.Gateway, defaultDevice, err) - } - } - if config.IPv6Gateway != "" { - if err := SetDefaultGateway(config.IPv6Gateway, defaultDevice); err != nil { - return fmt.Errorf("set gateway for ipv6 to %s on device %s failed with %s", config.IPv6Gateway, defaultDevice, err) - } - } - return nil -} diff --git a/stats.go b/stats.go new file mode 100644 index 000000000..198a8bf54 --- /dev/null +++ b/stats.go @@ -0,0 +1,22 @@ +package libcontainer + +import "github.com/docker/libcontainer/cgroups" + +type NetworkInterface struct { + // Name is the name of the network interface. + Name string + + RxBytes uint64 + RxPackets uint64 + RxErrors uint64 + RxDropped uint64 + TxBytes uint64 + TxPackets uint64 + TxErrors uint64 + TxDropped uint64 +} + +type Stats struct { + Interfaces []*NetworkInterface + CgroupStats *cgroups.Stats +} From 758d151e619042841b8a39cf4c0525b09d74887b Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 15:38:28 -0800 Subject: [PATCH 081/101] Fully remove security package This moves the capabilities package into the root package. Signed-off-by: Michael Crosby --- linux_capabilities.go | 90 +++++++++++++++++++++++++++ linux_init.go | 9 ++- security/capabilities/capabilities.go | 56 ----------------- security/capabilities/types.go | 88 -------------------------- security/capabilities/types_test.go | 19 ------ 5 files changed, 96 insertions(+), 166 deletions(-) create mode 100644 linux_capabilities.go delete mode 100644 security/capabilities/capabilities.go delete mode 100644 security/capabilities/types.go delete mode 100644 security/capabilities/types_test.go diff --git a/linux_capabilities.go b/linux_capabilities.go new file mode 100644 index 000000000..b1c5c1760 --- /dev/null +++ b/linux_capabilities.go @@ -0,0 +1,90 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + + "github.com/syndtr/gocapability/capability" +) + +const allCapabilityTypes = capability.CAPS | capability.BOUNDS + +var capabilityList = map[string]capability.Cap{ + "SETPCAP": capability.CAP_SETPCAP, + "SYS_MODULE": capability.CAP_SYS_MODULE, + "SYS_RAWIO": capability.CAP_SYS_RAWIO, + "SYS_PACCT": capability.CAP_SYS_PACCT, + "SYS_ADMIN": capability.CAP_SYS_ADMIN, + "SYS_NICE": capability.CAP_SYS_NICE, + "SYS_RESOURCE": capability.CAP_SYS_RESOURCE, + "SYS_TIME": capability.CAP_SYS_TIME, + "SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, + "MKNOD": capability.CAP_MKNOD, + "AUDIT_WRITE": capability.CAP_AUDIT_WRITE, + "AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, + "MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, + "MAC_ADMIN": capability.CAP_MAC_ADMIN, + "NET_ADMIN": capability.CAP_NET_ADMIN, + "SYSLOG": capability.CAP_SYSLOG, + "CHOWN": capability.CAP_CHOWN, + "NET_RAW": capability.CAP_NET_RAW, + "DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, + "FOWNER": capability.CAP_FOWNER, + "DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, + "FSETID": capability.CAP_FSETID, + "KILL": capability.CAP_KILL, + "SETGID": capability.CAP_SETGID, + "SETUID": capability.CAP_SETUID, + "LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, + "NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, + "NET_BROADCAST": capability.CAP_NET_BROADCAST, + "IPC_LOCK": capability.CAP_IPC_LOCK, + "IPC_OWNER": capability.CAP_IPC_OWNER, + "SYS_CHROOT": capability.CAP_SYS_CHROOT, + "SYS_PTRACE": capability.CAP_SYS_PTRACE, + "SYS_BOOT": capability.CAP_SYS_BOOT, + "LEASE": capability.CAP_LEASE, + "SETFCAP": capability.CAP_SETFCAP, + "WAKE_ALARM": capability.CAP_WAKE_ALARM, + "BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, +} + +func newCapWhitelist(caps []string) (*whitelist, error) { + l := []capability.Cap{} + for _, c := range caps { + v, ok := capabilityList[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + l = append(l, v) + } + pid, err := capability.NewPid(os.Getpid()) + if err != nil { + return nil, err + } + return &whitelist{ + keep: l, + pid: pid, + }, nil +} + +type whitelist struct { + pid capability.Capabilities + keep []capability.Cap +} + +// dropBoundingSet drops the capability bounding set to those specified in the whitelist. +func (w *whitelist) dropBoundingSet() error { + w.pid.Clear(capability.BOUNDS) + w.pid.Set(capability.BOUNDS, w.keep...) + return w.pid.Apply(capability.BOUNDS) +} + +// drop drops all capabilities for the current process except those specified in the whitelist. +func (w *whitelist) drop() error { + w.pid.Clear(allCapabilityTypes) + w.pid.Set(allCapabilityTypes, w.keep...) + return w.pid.Apply(allCapabilityTypes) +} diff --git a/linux_init.go b/linux_init.go index 663bfa6d5..5ff1afc71 100644 --- a/linux_init.go +++ b/linux_init.go @@ -11,7 +11,6 @@ import ( "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" - "github.com/docker/libcontainer/security/capabilities" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" @@ -97,8 +96,12 @@ func finalizeNamespace(config *initConfig) error { if err := utils.CloseExecFrom(3); err != nil { return err } + w, err := newCapWhitelist(config.Config.Capabilities) + if err != nil { + return err + } // drop capabilities in bounding set before changing user - if err := capabilities.DropBoundingSet(config.Config.Capabilities); err != nil { + if err := w.dropBoundingSet(); err != nil { return err } // preserve existing capabilities while we change users @@ -112,7 +115,7 @@ func finalizeNamespace(config *initConfig) error { return err } // drop all other capabilities - if err := capabilities.DropCapabilities(config.Config.Capabilities); err != nil { + if err := w.drop(); err != nil { return err } if config.Cwd != "" { diff --git a/security/capabilities/capabilities.go b/security/capabilities/capabilities.go deleted file mode 100644 index 7aef5fa67..000000000 --- a/security/capabilities/capabilities.go +++ /dev/null @@ -1,56 +0,0 @@ -package capabilities - -import ( - "os" - - "github.com/syndtr/gocapability/capability" -) - -const allCapabilityTypes = capability.CAPS | capability.BOUNDS - -// DropBoundingSet drops the capability bounding set to those specified in the -// container configuration. -func DropBoundingSet(capabilities []string) error { - c, err := capability.NewPid(os.Getpid()) - if err != nil { - return err - } - - keep := getEnabledCapabilities(capabilities) - c.Clear(capability.BOUNDS) - c.Set(capability.BOUNDS, keep...) - - if err := c.Apply(capability.BOUNDS); err != nil { - return err - } - - return nil -} - -// DropCapabilities drops all capabilities for the current process except those specified in the container configuration. -func DropCapabilities(capList []string) error { - c, err := capability.NewPid(os.Getpid()) - if err != nil { - return err - } - - keep := getEnabledCapabilities(capList) - c.Clear(allCapabilityTypes) - c.Set(allCapabilityTypes, keep...) - - if err := c.Apply(allCapabilityTypes); err != nil { - return err - } - return nil -} - -// getEnabledCapabilities returns the capabilities that should not be dropped by the container. -func getEnabledCapabilities(capList []string) []capability.Cap { - keep := []capability.Cap{} - for _, capability := range capList { - if c := GetCapability(capability); c != nil { - keep = append(keep, c.Value) - } - } - return keep -} diff --git a/security/capabilities/types.go b/security/capabilities/types.go deleted file mode 100644 index a960b804c..000000000 --- a/security/capabilities/types.go +++ /dev/null @@ -1,88 +0,0 @@ -package capabilities - -import "github.com/syndtr/gocapability/capability" - -type ( - CapabilityMapping struct { - Key string `json:"key,omitempty"` - Value capability.Cap `json:"value,omitempty"` - } - Capabilities []*CapabilityMapping -) - -func (c *CapabilityMapping) String() string { - return c.Key -} - -func GetCapability(key string) *CapabilityMapping { - for _, capp := range capabilityList { - if capp.Key == key { - cpy := *capp - return &cpy - } - } - return nil -} - -func GetAllCapabilities() []string { - output := make([]string, len(capabilityList)) - for i, capability := range capabilityList { - output[i] = capability.String() - } - return output -} - -// Contains returns true if the specified Capability is -// in the slice -func (c Capabilities) contains(capp string) bool { - return c.get(capp) != nil -} - -func (c Capabilities) get(capp string) *CapabilityMapping { - for _, cap := range c { - if cap.Key == capp { - return cap - } - } - return nil -} - -var capabilityList = Capabilities{ - {Key: "SETPCAP", Value: capability.CAP_SETPCAP}, - {Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE}, - {Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO}, - {Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT}, - {Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN}, - {Key: "SYS_NICE", Value: capability.CAP_SYS_NICE}, - {Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE}, - {Key: "SYS_TIME", Value: capability.CAP_SYS_TIME}, - {Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG}, - {Key: "MKNOD", Value: capability.CAP_MKNOD}, - {Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE}, - {Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL}, - {Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE}, - {Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN}, - {Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN}, - {Key: "SYSLOG", Value: capability.CAP_SYSLOG}, - {Key: "CHOWN", Value: capability.CAP_CHOWN}, - {Key: "NET_RAW", Value: capability.CAP_NET_RAW}, - {Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE}, - {Key: "FOWNER", Value: capability.CAP_FOWNER}, - {Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH}, - {Key: "FSETID", Value: capability.CAP_FSETID}, - {Key: "KILL", Value: capability.CAP_KILL}, - {Key: "SETGID", Value: capability.CAP_SETGID}, - {Key: "SETUID", Value: capability.CAP_SETUID}, - {Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE}, - {Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE}, - {Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST}, - {Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK}, - {Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER}, - {Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT}, - {Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE}, - {Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT}, - {Key: "LEASE", Value: capability.CAP_LEASE}, - {Key: "SETFCAP", Value: capability.CAP_SETFCAP}, - {Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM}, - {Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND}, -} diff --git a/security/capabilities/types_test.go b/security/capabilities/types_test.go deleted file mode 100644 index 06e8a2b01..000000000 --- a/security/capabilities/types_test.go +++ /dev/null @@ -1,19 +0,0 @@ -package capabilities - -import ( - "testing" -) - -func TestCapabilitiesContains(t *testing.T) { - caps := Capabilities{ - GetCapability("MKNOD"), - GetCapability("SETPCAP"), - } - - if caps.contains("SYS_ADMIN") { - t.Fatal("capabilities should not contain SYS_ADMIN") - } - if !caps.contains("MKNOD") { - t.Fatal("capabilities should contain MKNOD but does not") - } -} From 2ec6b585ea50e0a2d663ddccb2ca63a12ed82da6 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 9 Feb 2015 18:12:04 -0800 Subject: [PATCH 082/101] Add new API examples to readme Signed-off-by: Michael Crosby --- README.md | 155 ++++++++++++++++++++++++++++++++++++++++++------ linux_rootfs.go | 4 ++ nsinit/exec.go | 2 +- 3 files changed, 143 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 37047e68c..00984e903 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,169 @@ ## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer) -### Note on API changes: +Libcontainer provides a native Go implementation for creating containers +with namespaces, cgroups, capabilities, and filesystem access controls. +It allows you to manage the lifecycle of the container performing additional operations +after the container is created. -Please bear with us while we work on making the libcontainer API stable and something that we can support long term. We are currently discussing the API with the community, therefore, if you currently depend on libcontainer please pin your dependency at a specific tag or commit id. Please join the discussion and help shape the API. -#### Background +#### Container +A container is a self contained execution environment that shares the kernel of the +host system and which is (optionally) isolated from other containers in the system. -libcontainer specifies configuration options for what a container is. It provides a native Go implementation for using Linux namespaces with no external dependencies. libcontainer provides many convenience functions for working with namespaces, networking, and management. +#### Using libcontainer +To create a container you first have to initialize an instance of a factory +that will handle the creation and initialization for a container. -#### Container -A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system. +Because containers are spawned in a two step process you will need to provide +arguments to a binary that will be executed as the init process for the container. +To use the current binary that is spawning the containers and acting as the parent +you can use `os.Args[0]` and we have a command called `init` setup. + +```go +initArgs := []string{os.Args[0], "init"} + +root, err := libcontainer.New("/var/lib/container", initArgs) +if err != nil { + log.Fatal(err) +} +``` -libcontainer may be used to execute a process in a container. If a user tries to run a new process inside an existing container, the new process is added to the processes executing in the container. +Once you have an instance of the factory created we can create a configuration +struct describing how the container is to be created. A sample would look similar to this: + +```go +config := &configs.Config{ + Rootfs: rootfs, + Capabilities: []string{ + "CHOWN", + "DAC_OVERRIDE", + "FSETID", + "FOWNER", + "MKNOD", + "NET_RAW", + "SETGID", + "SETUID", + "SETFCAP", + "SETPCAP", + "NET_BIND_SERVICE", + "SYS_CHROOT", + "KILL", + "AUDIT_WRITE", + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWNET}, + }), + Cgroups: &configs.Cgroup{ + Name: "test-container", + Parent: "system", + AllowAllDevices: false, + AllowedDevices: configs.DefaultAllowedDevices, + }, + + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "testing", + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: syscall.RLIMIT_NOFILE, + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, +} +``` + +Once you have the configuration populated you can create a container: + +```go +container, err := root.Create("container-id", config) +``` +To spawn bash as the initial process inside the container and have the +processes pid returned in order to wait, signal, or kill the process: -#### Root file system +```go +process := &libcontainer.Process{ + Args: []string{"/bin/bash"}, + Env: []string{"PATH=/bin"}, + User: "daemon", + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, +} -A container runs with a directory known as its *root file system*, or *rootfs*, mounted as the file system root. The rootfs is usually a full system tree. +pid, err := container.Start(process) +if err != nil { + log.Fatal(err) +} -#### Configuration +// wait for the process to finish. +wait(pid) -A container is initially configured by supplying configuration data when the container is created. +// destroy the container. +container.Destroy() +``` + +Additional ways to interact with a running container are: + +```go +// return all the pids for all processes running inside the container. +processes, err := container.Processes() + +// get detailed cpu, memory, io, and network statistics for the container and +// it's processes. +stats, err := container.Stats() + + +// pause all processes inside the container. +container.Pause() + +// resume all paused processes. +container.Resume() +``` #### nsinit -`nsinit` is a cli application which demonstrates the use of libcontainer. It is able to spawn new containers or join existing containers, based on the current directory. +`nsinit` is a cli application which demonstrates the use of libcontainer. +It is able to spawn new containers or join existing containers. A root +filesystem must be provided for use along with a container configuration file. -To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into the directory with your specified configuration. Environment, networking, and different capabilities for the container are specified in this file. The configuration is used for each process executed inside the container. +To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into +the directory with your specified configuration. Environment, networking, +and different capabilities for the container are specified in this file. +The configuration is used for each process executed inside the container. See the `sample_configs` folder for examples of what the container configuration should look like. To execute `/bin/bash` in the current directory as a container just run the following **as root**: ```bash -nsinit exec /bin/bash +nsinit exec --tty /bin/bash ``` -If you wish to spawn another process inside the container while your current bash session is running, run the same command again to get another bash shell (or change the command). If the original process (PID 1) dies, all other processes spawned inside the container will be killed and the namespace will be removed. +If you wish to spawn another process inside the container while your +current bash session is running, run the same command again to +get another bash shell (or change the command). If the original +process (PID 1) dies, all other processes spawned inside the container +will be killed and the namespace will be removed. -You can identify if a process is running in a container by looking to see if `state.json` is in the root of the directory. +You can identify if a process is running in a container by +looking to see if `state.json` is in the root of the directory. -You may also specify an alternate root place where the `container.json` file is read and where the `state.json` file will be saved. +You may also specify an alternate root place where +the `container.json` file is read and where the `state.json` file will be saved. #### Future See the [roadmap](ROADMAP.md). diff --git a/linux_rootfs.go b/linux_rootfs.go index 86491480a..20a1a9db9 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -46,6 +46,10 @@ func setupRootfs(config *configs.Config) (err error) { if err := setupPtmx(config); err != nil { return err } + uid, err := config.HostUID() + if err != nil { + return err + } // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. diff --git a/nsinit/exec.go b/nsinit/exec.go index c7635e34b..6b90ce28d 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -21,7 +21,7 @@ var execCommand = cli.Command{ Usage: "execute a new command inside a container", Action: execAction, Flags: []cli.Flag{ - cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, + cli.BoolFlag{Name: "tty,t", Usage: "allocate a TTY to the container"}, cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"}, From fde0b7aa0d8b1c3e4605ffe21d78c33d0c6bec7f Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 10 Feb 2015 11:51:45 -0800 Subject: [PATCH 083/101] Refactor network and veth creation Remove veth interfaces on the host if an error occurs. Provide the host interface name, temporary peer interface name and the name of the peer once it is inside the container's namespace in the Network config. Signed-off-by: Michael Crosby --- configs/network.go | 11 ++++---- integration/init_test.go | 4 +-- linux_container.go | 7 +++-- linux_init.go | 31 +++++++++++++-------- linux_network.go | 53 ++++++++++++++++++++++-------------- linux_process.go | 6 +++- linux_rootfs.go | 6 +--- linux_standard_init.go | 2 +- linux_userns_sidecar_init.go | 4 +-- nsinit/init.go | 3 ++ 10 files changed, 76 insertions(+), 51 deletions(-) diff --git a/configs/network.go b/configs/network.go index fdccce50c..890953a0d 100644 --- a/configs/network.go +++ b/configs/network.go @@ -8,6 +8,9 @@ type Network struct { // Type sets the networks type, commonly veth and loopback Type string `json:"type,omitempty"` + // Name of the network interface + Name string `json:"name,omitempty"` + // The bridge to use. Bridge string `json:"bridge,omitempty"` @@ -36,11 +39,9 @@ type Network struct { // Note: This does not apply to loopback interfaces. TxQueueLen int `json:"txqueuelen,omitempty"` - // The name of the veth interface on the Host. - VethHost string `json:"veth_host,omitempty"` - - // The name of the veth interface created inside the container for the child. - VethChild string `json:"veth_child,omitempty"` + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name,omitempty"` } // Routes can be specified to create entries in the route table as the container is started diff --git a/integration/init_test.go b/integration/init_test.go index 6b4bc32df..8af88efd9 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -15,14 +15,12 @@ func init() { if len(os.Args) < 2 || os.Args[1] != "init" { return } + runtime.GOMAXPROCS(1) runtime.LockOSThread() - factory, err := libcontainer.New("", nil) if err != nil { log.Fatalf("unable to initialize for container: %s", err) } - factory.StartInitialization(3) - os.Exit(1) } diff --git a/linux_container.go b/linux_container.go index 06f90b3c5..c92ffa71b 100644 --- a/linux_container.go +++ b/linux_container.go @@ -71,7 +71,7 @@ func (c *linuxContainer) Stats() (*Stats, error) { for _, iface := range c.config.Networks { switch iface.Type { case "veth": - istats, err := getNetworkInterfaceStats(iface.VethHost) + istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) if err != nil { return stats, newGenericError(err, SystemError) } @@ -134,6 +134,8 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec. } func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess { + t := "_LIBCONTAINER_INITTYPE=standard" + cloneFlags := c.config.Namespaces.CloneFlags() if cloneFlags&syscall.CLONE_NEWUSER != 0 { c.addUidGidMappings(cmd.SysProcAttr) @@ -141,9 +143,10 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c if cmd.SysProcAttr.Credential == nil { cmd.SysProcAttr.Credential = &syscall.Credential{} } + t = "_LIBCONTAINER_INITTYPE=userns" } + cmd.Env = append(cmd.Env, t) cmd.SysProcAttr.Cloneflags = cloneFlags - cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=standard") return &initProcess{ cmd: cmd, childPipe: childPipe, diff --git a/linux_init.go b/linux_init.go index 5ff1afc71..7c56afe87 100644 --- a/linux_init.go +++ b/linux_init.go @@ -29,13 +29,23 @@ type pid struct { Pid int `json:"pid"` } +// network is an internal struct used to setup container networks. +type network struct { + configs.Network + + // TempVethPeerName is a unique tempory veth peer name that was placed into + // the container's namespace. + TempVethPeerName string `json:"temp_veth_peer_name"` +} + // Process is used for transferring parameters from Exec() to Init() type initConfig struct { - Args []string `json:"args"` - Env []string `json:"env"` - Cwd string `json:"cwd"` - User string `json:"user"` - Config *configs.Config `json:"config"` + Args []string `json:"args"` + Env []string `json:"env"` + Cwd string `json:"cwd"` + User string `json:"user"` + Config *configs.Config `json:"config"` + Networks []*network `json:"network"` } type initer interface { @@ -184,18 +194,15 @@ func setupUser(config *initConfig) error { return nil } -// setupVethNetwork uses the Network config if it is not nil to initialize -// the new veth interface inside the container for use by changing the name to eth0 -// setting the MTU and IP address along with the default gateway -func setupNetwork(config *configs.Config) error { +// setupNetwork sets up and initializes any network interface inside the container. +func setupNetwork(config *initConfig) error { for _, config := range config.Networks { strategy, err := getStrategy(config.Type) if err != nil { return err } - err1 := strategy.Initialize(config) - if err1 != nil { - return err1 + if err := strategy.initialize(config); err != nil { + return err } } return nil diff --git a/linux_network.go b/linux_network.go index cc199be41..0b5d33949 100644 --- a/linux_network.go +++ b/linux_network.go @@ -11,12 +11,10 @@ import ( "strconv" "strings" - "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/utils" ) -const defaultVethInterfaceName = "eth0" - var ( ErrNotValidStrategyType = errors.New("not a valid network strategy type") ) @@ -29,8 +27,8 @@ var strategies = map[string]networkStrategy{ // networkStrategy represents a specific network configuration for // a container's networking stack type networkStrategy interface { - Create(*configs.Network, int) error - Initialize(*configs.Network) error + create(*network, int) error + initialize(*network) error } // getStrategy returns the specific network strategy for the @@ -93,11 +91,11 @@ func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { type loopback struct { } -func (l *loopback) Create(n *configs.Network, nspid int) error { +func (l *loopback) create(n *network, nspid int) error { return nil } -func (l *loopback) Initialize(config *configs.Network) error { +func (l *loopback) initialize(config *network) error { iface, err := net.InterfaceByName("lo") if err != nil { return err @@ -111,7 +109,18 @@ func (l *loopback) Initialize(config *configs.Network) error { type veth struct { } -func (v *veth) Create(n *configs.Network, nspid int) error { +func (v *veth) create(n *network, nspid int) (err error) { + tmpName, err := v.generateTempPeerName() + if err != nil { + return err + } + n.TempVethPeerName = tmpName + defer func() { + if err != nil { + netlink.NetworkLinkDel(n.HostInterfaceName) + netlink.NetworkLinkDel(n.TempVethPeerName) + } + }() if n.Bridge == "" { return fmt.Errorf("bridge is not specified") } @@ -119,10 +128,10 @@ func (v *veth) Create(n *configs.Network, nspid int) error { if err != nil { return err } - if err := netlink.NetworkCreateVethPair(n.VethHost, n.VethChild, n.TxQueueLen); err != nil { + if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil { return err } - host, err := net.InterfaceByName(n.VethHost) + host, err := net.InterfaceByName(n.HostInterfaceName) if err != nil { return err } @@ -135,30 +144,34 @@ func (v *veth) Create(n *configs.Network, nspid int) error { if err := netlink.NetworkLinkUp(host); err != nil { return err } - child, err := net.InterfaceByName(n.VethChild) + child, err := net.InterfaceByName(n.TempVethPeerName) if err != nil { return err } return netlink.NetworkSetNsPid(child, nspid) } -func (v *veth) Initialize(config *configs.Network) error { - vethChild := config.VethChild - if vethChild == "" { - return fmt.Errorf("vethChild is not specified") +func (v *veth) generateTempPeerName() (string, error) { + return utils.GenerateRandomName("veth", 7) +} + +func (v *veth) initialize(config *network) error { + peer := config.TempVethPeerName + if peer == "" { + return fmt.Errorf("peer is not specified") } - child, err := net.InterfaceByName(vethChild) + child, err := net.InterfaceByName(peer) if err != nil { return err } if err := netlink.NetworkLinkDown(child); err != nil { return err } - if err := netlink.NetworkChangeName(child, defaultVethInterfaceName); err != nil { + if err := netlink.NetworkChangeName(child, config.Name); err != nil { return err } // get the interface again after we changed the name as the index also changes. - if child, err = net.InterfaceByName(defaultVethInterfaceName); err != nil { + if child, err = net.InterfaceByName(config.Name); err != nil { return err } if config.MacAddress != "" { @@ -188,12 +201,12 @@ func (v *veth) Initialize(config *configs.Network) error { return err } if config.Gateway != "" { - if err := netlink.AddDefaultGw(config.Gateway, defaultVethInterfaceName); err != nil { + if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil { return err } } if config.IPv6Gateway != "" { - if err := netlink.AddDefaultGw(config.IPv6Gateway, defaultVethInterfaceName); err != nil { + if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil { return err } } diff --git a/linux_process.go b/linux_process.go index 04c0e5d65..2b434a511 100644 --- a/linux_process.go +++ b/linux_process.go @@ -239,9 +239,13 @@ func (p *initProcess) createNetworkInterfaces() error { if err != nil { return err } - if err := strategy.Create(config, p.pid()); err != nil { + n := &network{ + Network: *config, + } + if err := strategy.create(n, p.pid()); err != nil { return err } + p.config.Networks = append(p.config.Networks, n) } return nil } diff --git a/linux_rootfs.go b/linux_rootfs.go index 20a1a9db9..36fc18989 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -46,15 +46,11 @@ func setupRootfs(config *configs.Config) (err error) { if err := setupPtmx(config); err != nil { return err } - uid, err := config.HostUID() - if err != nil { - return err - } // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. if !config.Namespaces.Contains(configs.NEWUSER) { - if err := reOpenDevNull(config.RootFs); err != nil { + if err := reOpenDevNull(config.Rootfs); err != nil { return err } } diff --git a/linux_standard_init.go b/linux_standard_init.go index 2cf7a9f23..c576bdbd8 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -35,7 +35,7 @@ func (l *linuxStandardInit) Init() error { return err } } - if err := setupNetwork(l.config.Config); err != nil { + if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go index 24dea9ef0..eedc63e0d 100644 --- a/linux_userns_sidecar_init.go +++ b/linux_userns_sidecar_init.go @@ -18,7 +18,7 @@ type linuxUsernsSideCar struct { } func (l *linuxUsernsSideCar) Init() error { - if err := setupNetwork(l.config.Config); err != nil { + if err := setupNetwork(l.config); err != nil { return err } if err := setupRoute(l.config.Config); err != nil { @@ -26,7 +26,7 @@ func (l *linuxUsernsSideCar) Init() error { } label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace - if l.config.Config.Namespaces.Contains(configs.NEWNET) { + if l.config.Config.Namespaces.Contains(configs.NEWNS) { if err := setupRootfs(l.config.Config); err != nil { return err } diff --git a/nsinit/init.go b/nsinit/init.go index d45d12871..9848c42b8 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -2,6 +2,7 @@ package main import ( "log" + "runtime" "github.com/codegangsta/cli" "github.com/docker/libcontainer" @@ -15,6 +16,8 @@ var initCommand = cli.Command{ cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, }, Action: func(context *cli.Context) { + runtime.GOMAXPROCS(1) + runtime.LockOSThread() factory, err := libcontainer.New("", nil) if err != nil { log.Fatal(err) From 7fff13632ef71664a734ac27ff39c928d80c9a26 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 14:45:07 -0800 Subject: [PATCH 084/101] Add state method to return container's runtime state Signed-off-by: Michael Crosby --- configs/state.go | 12 ---- container.go | 24 ++++++++ linux_container.go | 46 ++++++++++++++- linux_container_test.go | 120 ++++++++++++++++++++++++++++++++++++++-- linux_factory.go | 8 +-- linux_factory_test.go | 8 +-- 6 files changed, 192 insertions(+), 26 deletions(-) diff --git a/configs/state.go b/configs/state.go index c1e8adcf7..5e7eef9a7 100644 --- a/configs/state.go +++ b/configs/state.go @@ -16,15 +16,3 @@ const ( // The container does not exist. Destroyed ) - -// State represents a running container's state -type State struct { - // InitPid is the init process id in the parent namespace - InitPid int `json:"init_pid,omitempty"` - - // InitStartTime is the init process start time - InitStartTime string `json:"init_start_time,omitempty"` - - // Path to all the cgroups setup for a container. Key is cgroup subsystem name. - CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` -} diff --git a/container.go b/container.go index 7c9a3308e..a22463780 100644 --- a/container.go +++ b/container.go @@ -9,6 +9,23 @@ import ( "github.com/docker/libcontainer/configs" ) +// State represents a running container's state +type State struct { + // InitProcessPid is the init process id in the parent namespace. + InitProcessPid int + + // InitProcessStartTime is the init process start time. + InitProcessStartTime string + + // Path to all the cgroups setup for a container. Key is cgroup subsystem name + // with the value as the path. + CgroupPaths map[string]string + + // NamespacePaths are filepaths to the container's namespaces. Key is the namespace name + // with the value as the path. + NamespacePaths map[string]string +} + // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can @@ -21,9 +38,16 @@ type Container interface { // Returns the current status of the container. // // errors: + // ContainerDestroyed - Container no longer exists, // Systemerror - System error. Status() (configs.Status, error) + // State returns the current container's state information. + // + // errors: + // Systemerror - System erroor. + State() (*State, error) + // Returns the current config of the container. Config() configs.Config diff --git a/linux_container.go b/linux_container.go index c92ffa71b..07f08aee4 100644 --- a/linux_container.go +++ b/linux_container.go @@ -43,13 +43,55 @@ func (c *linuxContainer) Status() (configs.Status, error) { } return 0, err } - if c.config.Cgroups != nil && - c.config.Cgroups.Freezer == configs.Frozen { + if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen { return configs.Paused, nil } return configs.Running, nil } +func (c *linuxContainer) State() (*State, error) { + status, err := c.Status() + if err != nil { + return nil, err + } + if status == configs.Destroyed { + return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists) + } + startTime, err := c.initProcess.startTime() + if err != nil { + return nil, err + } + state := &State{ + InitProcessPid: c.initProcess.pid(), + InitProcessStartTime: startTime, + CgroupPaths: c.cgroupManager.GetPaths(), + NamespacePaths: make(map[string]string), + } + for _, ns := range c.config.Namespaces { + if ns.Path != "" { + state.NamespacePaths[string(ns.Type)] = ns.Path + continue + } + file := "" + switch ns.Type { + case configs.NEWNET: + file = "net" + case configs.NEWNS: + file = "mnt" + case configs.NEWPID: + file = "pid" + case configs.NEWIPC: + file = "ipc" + case configs.NEWUSER: + file = "user" + case configs.NEWUTS: + file = "uts" + } + state.NamespacePaths[string(ns.Type)] = fmt.Sprintf("/proc/%d/ns/%s", c.initProcess.pid(), file) + } + return state, nil +} + func (c *linuxContainer) Processes() ([]int, error) { glog.Info("fetch container processes") pids, err := c.cgroupManager.GetPids() diff --git a/linux_container_test.go b/linux_container_test.go index 11ad253e2..8dbb39c53 100644 --- a/linux_container_test.go +++ b/linux_container_test.go @@ -3,6 +3,8 @@ package libcontainer import ( + "fmt" + "os" "testing" "github.com/docker/libcontainer/cgroups" @@ -12,6 +14,7 @@ import ( type mockCgroupManager struct { pids []int stats *cgroups.Stats + paths map[string]string } func (m *mockCgroupManager) GetPids() ([]int, error) { @@ -31,25 +34,52 @@ func (m *mockCgroupManager) Destroy() error { } func (m *mockCgroupManager) GetPaths() map[string]string { - return nil + return m.paths } func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } +type mockProcess struct { + _pid int + started string +} + +func (m *mockProcess) terminate() error { + return nil +} + +func (m *mockProcess) pid() int { + return m._pid +} + +func (m *mockProcess) startTime() (string, error) { + return m.started, nil +} + +func (m *mockProcess) start() error { + return nil +} + +func (m *mockProcess) wait() (*os.ProcessState, error) { + return nil, nil +} + +func (m *mockProcess) signal(_ os.Signal) error { + return nil +} + func TestGetContainerPids(t *testing.T) { container := &linuxContainer{ id: "myid", config: &configs.Config{}, cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}}, } - pids, err := container.Processes() if err != nil { t.Fatal(err) } - for i, expected := range []int{1, 2, 3} { if pids[i] != expected { t.Fatalf("expected pid %d but received %d", expected, pids[i]) @@ -70,7 +100,6 @@ func TestGetContainerStats(t *testing.T) { }, }, } - stats, err := container.Stats() if err != nil { t.Fatal(err) @@ -82,3 +111,86 @@ func TestGetContainerStats(t *testing.T) { t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage) } } + +func TestGetContainerState(t *testing.T) { + var ( + pid = os.Getpid() + expectedMemoryPath = "/sys/fs/cgroup/memory/myid" + expectedNetworkPath = "/networks/fd" + ) + container := &linuxContainer{ + id: "myid", + config: &configs.Config{ + Namespaces: configs.Namespaces{ + {Type: configs.NEWPID}, + {Type: configs.NEWNS}, + {Type: configs.NEWNET, Path: expectedNetworkPath}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + }, + }, + initProcess: &mockProcess{ + _pid: pid, + started: "010", + }, + cgroupManager: &mockCgroupManager{ + pids: []int{1, 2, 3}, + stats: &cgroups.Stats{ + MemoryStats: cgroups.MemoryStats{ + Usage: 1024, + }, + }, + paths: map[string]string{ + "memory": expectedMemoryPath, + }, + }, + } + state, err := container.State() + if err != nil { + t.Fatal(err) + } + if state.InitProcessPid != pid { + t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) + } + if state.InitProcessStartTime != "010" { + t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime) + } + paths := state.CgroupPaths + if paths == nil { + t.Fatal("cgroup paths should not be nil") + } + if memPath := paths["memory"]; memPath != expectedMemoryPath { + t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) + } + for _, ns := range container.config.Namespaces { + path := state.NamespacePaths[string(ns.Type)] + if path == "" { + t.Fatalf("expected non nil namespace path for %s", ns.Type) + } + if ns.Type == configs.NEWNET { + if path != expectedNetworkPath { + t.Fatalf("expected path %q but received %q", expectedNetworkPath, path) + } + } else { + file := "" + switch ns.Type { + case configs.NEWNET: + file = "net" + case configs.NEWNS: + file = "mnt" + case configs.NEWPID: + file = "pid" + case configs.NEWIPC: + file = "ipc" + case configs.NEWUSER: + file = "user" + case configs.NEWUTS: + file = "uts" + } + expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file) + if expected != path { + t.Fatalf("expected path %q but received %q", expected, path) + } + } + } +} diff --git a/linux_factory.go b/linux_factory.go index 161829277..8c33e0981 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -107,8 +107,8 @@ func (l *linuxFactory) Load(id string) (Container, error) { return nil, err } r := &restoredProcess{ - processPid: state.InitPid, - processStartTime: state.InitStartTime, + processPid: state.InitProcessPid, + processStartTime: state.InitProcessStartTime, } cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) @@ -171,7 +171,7 @@ func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) return config, nil } -func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { +func (l *linuxFactory) loadContainerState(root string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { @@ -180,7 +180,7 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) { return nil, newGenericError(err, SystemError) } defer f.Close() - var state *configs.State + var state *State if err := json.NewDecoder(f).Decode(&state); err != nil { return nil, newGenericError(err, SystemError) } diff --git a/linux_factory_test.go b/linux_factory_test.go index 028c73e7a..457ec10d6 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -80,8 +80,8 @@ func TestFactoryLoadContainer(t *testing.T) { expectedConfig = &configs.Config{ Rootfs: "/mycontainer/root", } - expectedState = &configs.State{ - InitPid: 1024, + expectedState = &State{ + InitProcessPid: 1024, } ) if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { @@ -112,8 +112,8 @@ func TestFactoryLoadContainer(t *testing.T) { if !ok { t.Fatal("expected linux container on linux based systems") } - if lcontainer.initProcess.pid() != expectedState.InitPid { - t.Fatalf("expected init pid %d but received %d", expectedState.InitPid, lcontainer.initProcess.pid()) + if lcontainer.initProcess.pid() != expectedState.InitProcessPid { + t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid()) } } From 31327166e5396d8078e33e7eeec2fc2e920fb84a Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 15:09:54 -0800 Subject: [PATCH 085/101] Rename OOM to NotifyOOM Signed-off-by: Michael Crosby --- container.go | 4 ++-- linux_container.go | 2 +- nsinit/oom.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/container.go b/container.go index a22463780..6d681ac12 100644 --- a/container.go +++ b/container.go @@ -113,9 +113,9 @@ type Container interface { // Systemerror - System error. Signal(signal os.Signal) error - // OOM returns a read-only channel signaling when the container receives an OOM notification. + // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. // // errors: // Systemerror - System error. - OOM() (<-chan struct{}, error) + NotifyOOM() (<-chan struct{}, error) } diff --git a/linux_container.go b/linux_container.go index 07f08aee4..50df5ba7c 100644 --- a/linux_container.go +++ b/linux_container.go @@ -275,6 +275,6 @@ func (c *linuxContainer) Signal(signal os.Signal) error { } // TODO: rename to be more descriptive -func (c *linuxContainer) OOM() (<-chan struct{}, error) { +func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { return NotifyOnOOM(c.cgroupManager.GetPaths()) } diff --git a/nsinit/oom.go b/nsinit/oom.go index eabe0b2bd..c1b4c8051 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -17,7 +17,7 @@ var oomCommand = cli.Command{ if err != nil { log.Fatal(err) } - n, err := container.OOM() + n, err := container.NotifyOOM() if err != nil { log.Fatal(err) } From 5c246d038fc47b8d57a474e1b212ffe646764ee9 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 16:45:23 -0800 Subject: [PATCH 086/101] Persist container state to disk Signed-off-by: Michael Crosby --- configs/cgroup.go | 30 ++++++++--------- configs/config.go | 56 +++++++++++++++--------------- configs/device.go | 16 ++++----- configs/mount.go | 14 ++++---- configs/namespaces.go | 2 +- configs/network.go | 30 ++++++++--------- configs/state.go | 18 ---------- container.go | 33 +++++++++++++++--- generic_error.go | 15 +++++++++ integration/exec_test.go | 2 +- linux_container.go | 73 +++++++++++++++++++++++++++------------- linux_factory.go | 47 +++----------------------- linux_factory_test.go | 4 +-- linux_init.go | 34 +++++++++++++++++++ linux_process.go | 23 +------------ nsinit/init.go | 9 +---- nsinit/utils.go | 2 +- 17 files changed, 211 insertions(+), 197 deletions(-) delete mode 100644 configs/state.go diff --git a/configs/cgroup.go b/configs/cgroup.go index 0dffc6401..92b9286a9 100644 --- a/configs/cgroup.go +++ b/configs/cgroup.go @@ -9,46 +9,46 @@ const ( ) type Cgroup struct { - Name string `json:"name,omitempty"` + Name string `json:"name"` // name of parent cgroup or slice - Parent string `json:"parent,omitempty"` + Parent string `json:"parent"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - AllowAllDevices bool `json:"allow_all_devices,omitempty"` + AllowAllDevices bool `json:"allow_all_devices"` - AllowedDevices []*Device `json:"allowed_devices,omitempty"` + AllowedDevices []*Device `json:"allowed_devices"` // Memory limit (in bytes) - Memory int64 `json:"memory,omitempty"` + Memory int64 `json:"memory"` // Memory reservation or soft_limit (in bytes) - MemoryReservation int64 `json:"memory_reservation,omitempty"` + MemoryReservation int64 `json:"memory_reservation"` // Total memory usage (memory + swap); set `-1' to disable swap - MemorySwap int64 `json:"memory_swap,omitempty"` + MemorySwap int64 `json:"memory_swap"` // CPU shares (relative weight vs. other containers) - CpuShares int64 `json:"cpu_shares,omitempty"` + CpuShares int64 `json:"cpu_shares"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuQuota int64 `json:"cpu_quota,omitempty"` + CpuQuota int64 `json:"cpu_quota"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpuPeriod int64 `json:"cpu_period,omitempty"` + CpuPeriod int64 `json:"cpu_period"` // CPU to use - CpusetCpus string `json:"cpuset_cpus,omitempty"` + CpusetCpus string `json:"cpuset_cpus"` // MEM to use - CpusetMems string `json:"cpuset_mems,omitempty"` + CpusetMems string `json:"cpuset_mems"` // Specifies per cgroup weight, range is from 10 to 1000. - BlkioWeight int64 `json:"blkio_weight,omitempty"` + BlkioWeight int64 `json:"blkio_weight"` // set the freeze value for the process - Freezer FreezerState `json:"freezer,omitempty"` + Freezer FreezerState `json:"freezer"` // Parent slice to use for systemd TODO: remove in favor or parent - Slice string `json:"slice,omitempty"` + Slice string `json:"slice"` } diff --git a/configs/config.go b/configs/config.go index d8e2c9eb3..fc7450d9f 100644 --- a/configs/config.go +++ b/configs/config.go @@ -3,98 +3,98 @@ package configs import "fmt" type Rlimit struct { - Type int `json:"type,omitempty"` - Hard uint64 `json:"hard,omitempty"` - Soft uint64 `json:"soft,omitempty"` + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` } // IDMap represents UID/GID Mappings for User Namespaces. type IDMap struct { - ContainerID int `json:"container_id,omitempty"` - HostID int `json:"host_id,omitempty"` - Size int `json:"size,omitempty"` + ContainerID int `json:"container_id"` + HostID int `json:"host_id"` + Size int `json:"size"` } // Config defines configuration options for executing a process inside a contained environment. type Config struct { // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root,omitempty"` + NoPivotRoot bool `json:"no_pivot_root"` // ParentDeathSignal specifies the signal that is sent to the container's process in the case // that the parent process dies. - ParentDeathSignal int `json:"parent_death_signal,omitempty"` + ParentDeathSignal int `json:"parent_death_signal"` // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. - PivotDir string `json:"pivot_dir,omitempty"` + PivotDir string `json:"pivot_dir"` // Path to a directory containing the container's root filesystem. - Rootfs string `json:"rootfs,omitempty"` + Rootfs string `json:"rootfs"` // Readonlyfs will remount the container's rootfs as readonly where only externally mounted // bind mounts are writtable. - Readonlyfs bool `json:"readonlyfs,omitempty"` + Readonlyfs bool `json:"readonlyfs"` // Mounts specify additional source and destination paths that will be mounted inside the container's // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts,omitempty"` + Mounts []*Mount `json:"mounts"` // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - Devices []*Device `json:"devices,omitempty"` + Devices []*Device `json:"devices"` - MountLabel string `json:"mount_label,omitempty"` + MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided - Hostname string `json:"hostname,omitempty"` + Hostname string `json:"hostname"` // Console is the path to the console allocated to the container. - Console string `json:"console,omitempty"` + Console string `json:"console"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process - Namespaces Namespaces `json:"namespaces,omitempty"` + Namespaces Namespaces `json:"namespaces"` // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask - Capabilities []string `json:"capabilities,omitempty"` + Capabilities []string `json:"capabilities"` // Networks specifies the container's network setup to be created - Networks []*Network `json:"networks,omitempty"` + Networks []*Network `json:"networks"` // Routes can be specified to create entries in the route table as the container is started - Routes []*Route `json:"routes,omitempty"` + Routes []*Route `json:"routes"` // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available - Cgroups *Cgroup `json:"cgroups,omitempty"` + Cgroups *Cgroup `json:"cgroups"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed - AppArmorProfile string `json:"apparmor_profile,omitempty"` + AppArmorProfile string `json:"apparmor_profile"` // ProcessLabel specifies the label to apply to the process running in the container. It is // commonly used by selinux - ProcessLabel string `json:"process_label,omitempty"` + ProcessLabel string `json:"process_label"` // RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and // /proc/bus - RestrictSys bool `json:"restrict_sys,omitempty"` + RestrictSys bool `json:"restrict_sys"` // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process - Rlimits []Rlimit `json:"rlimits,omitempty"` + Rlimits []Rlimit `json:"rlimits"` // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. - AdditionalGroups []int `json:"additional_groups,omitempty"` + AdditionalGroups []int `json:"additional_groups"` // UidMappings is an array of User ID mappings for User Namespaces - UidMappings []IDMap `json:"uid_mappings,omitempty"` + UidMappings []IDMap `json:"uid_mappings"` // GidMappings is an array of Group ID mappings for User Namespaces - GidMappings []IDMap `json:"gid_mappings,omitempty"` + GidMappings []IDMap `json:"gid_mappings"` } // Gets the root uid for the process on host which could be non-zero diff --git a/configs/device.go b/configs/device.go index a8117068e..abff26696 100644 --- a/configs/device.go +++ b/configs/device.go @@ -11,28 +11,28 @@ const ( type Device struct { // Device type, block, char, etc. - Type rune `json:"type,omitempty"` + Type rune `json:"type"` // Path to the device. - Path string `json:"path,omitempty"` + Path string `json:"path"` // Major is the device's major number. - Major int64 `json:"major,omitempty"` + Major int64 `json:"major"` // Minor is the device's minor number. - Minor int64 `json:"minor,omitempty"` + Minor int64 `json:"minor"` // Cgroup permissions format, rwm. - Permissions string `json:"permissions,omitempty"` + Permissions string `json:"permissions"` // FileMode permission bits for the device. - FileMode os.FileMode `json:"file_mode,omitempty"` + FileMode os.FileMode `json:"file_mode"` // Uid of the device. - Uid uint32 `json:"uid,omitempty"` + Uid uint32 `json:"uid"` // Gid of the device. - Gid uint32 `json:"gid,omitempty"` + Gid uint32 `json:"gid"` } func (d *Device) CgroupString() string { diff --git a/configs/mount.go b/configs/mount.go index 2f20de9ed..eb26c5c05 100644 --- a/configs/mount.go +++ b/configs/mount.go @@ -1,11 +1,11 @@ package configs type Mount struct { - Type string `json:"type,omitempty"` - Source string `json:"source,omitempty"` // Source path, in the host namespace - Destination string `json:"destination,omitempty"` // Destination path, in the container - Writable bool `json:"writable,omitempty"` - Relabel string `json:"relabel,omitempty"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared - Private bool `json:"private,omitempty"` - Slave bool `json:"slave,omitempty"` + Type string `json:"type"` + Source string `json:"source"` // Source path, in the host namespace + Destination string `json:"destination"` // Destination path, in the container + Writable bool `json:"writable"` + Relabel string `json:"relabel"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared + Private bool `json:"private"` + Slave bool `json:"slave"` } diff --git a/configs/namespaces.go b/configs/namespaces.go index a227f1ba9..fbb56e1d0 100644 --- a/configs/namespaces.go +++ b/configs/namespaces.go @@ -19,7 +19,7 @@ const ( // alternate path that is able to be joined via setns. type Namespace struct { Type NamespaceType `json:"type"` - Path string `json:"path,omitempty"` + Path string `json:"path"` } func (n *Namespace) Syscall() int { diff --git a/configs/network.go b/configs/network.go index 890953a0d..554439883 100644 --- a/configs/network.go +++ b/configs/network.go @@ -6,42 +6,42 @@ package configs // container to be setup with the host's networking stack type Network struct { // Type sets the networks type, commonly veth and loopback - Type string `json:"type,omitempty"` + Type string `json:"type"` // Name of the network interface - Name string `json:"name,omitempty"` + Name string `json:"name"` // The bridge to use. - Bridge string `json:"bridge,omitempty"` + Bridge string `json:"bridge"` // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address,omitempty"` + MacAddress string `json:"mac_address"` // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address,omitempty"` + Address string `json:"address"` // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway,omitempty"` + Gateway string `json:"gateway"` // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address,omitempty"` + IPv6Address string `json:"ipv6_address"` // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway,omitempty"` + IPv6Gateway string `json:"ipv6_gateway"` // Mtu sets the mtu value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu,omitempty"` + Mtu int `json:"mtu"` // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and // container's interfaces if a pair is created, specifically in the case of type veth // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen,omitempty"` + TxQueueLen int `json:"txqueuelen"` // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the // container. - HostInterfaceName string `json:"host_interface_name,omitempty"` + HostInterfaceName string `json:"host_interface_name"` } // Routes can be specified to create entries in the route table as the container is started @@ -53,14 +53,14 @@ type Network struct { // destination of 0.0.0.0(or *) when viewed in the route table. type Route struct { // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination,omitempty"` + Destination string `json:"destination"` // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source,omitempty"` + Source string `json:"source"` // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway,omitempty"` + Gateway string `json:"gateway"` // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name,omitempty"` + InterfaceName string `json:"interface_name"` } diff --git a/configs/state.go b/configs/state.go deleted file mode 100644 index 5e7eef9a7..000000000 --- a/configs/state.go +++ /dev/null @@ -1,18 +0,0 @@ -package configs - -// The status of a container. -type Status int - -const ( - // The container exists and is running. - Running Status = iota + 1 - - // The container exists, it is in the process of being paused. - Pausing - - // The container exists, but all its processes are paused. - Paused - - // The container does not exist. - Destroyed -) diff --git a/container.go b/container.go index 6d681ac12..1954a994e 100644 --- a/container.go +++ b/container.go @@ -9,21 +9,44 @@ import ( "github.com/docker/libcontainer/configs" ) +// The status of a container. +type Status int + +const ( + // The container exists and is running. + Running Status = iota + 1 + + // The container exists, it is in the process of being paused. + Pausing + + // The container exists, but all its processes are paused. + Paused + + // The container does not exist. + Destroyed +) + // State represents a running container's state type State struct { + // ID is the container ID. + ID string `json:"id"` + // InitProcessPid is the init process id in the parent namespace. - InitProcessPid int + InitProcessPid int `json:"init_process_pid"` // InitProcessStartTime is the init process start time. - InitProcessStartTime string + InitProcessStartTime string `json:"init_process_start"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name // with the value as the path. - CgroupPaths map[string]string + CgroupPaths map[string]string `json:"cgroup_paths"` // NamespacePaths are filepaths to the container's namespaces. Key is the namespace name // with the value as the path. - NamespacePaths map[string]string + NamespacePaths map[string]string `json:"namespace_paths"` + + // Config is the container's configuration. + Config configs.Config `json:"config"` } // A libcontainer container object. @@ -40,7 +63,7 @@ type Container interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Status() (configs.Status, error) + Status() (Status, error) // State returns the current container's state information. // diff --git a/generic_error.go b/generic_error.go index 08a47f61e..5207822a0 100644 --- a/generic_error.go +++ b/generic_error.go @@ -20,6 +20,9 @@ File: {{$frame.File}}{{end}} `)) func newGenericError(err error, c ErrorCode) Error { + if le, ok := err.(Error); ok { + return le + } return &GenericError{ Timestamp: time.Now(), Err: err, @@ -28,6 +31,18 @@ func newGenericError(err error, c ErrorCode) Error { } } +func newSystemError(err error) Error { + if le, ok := err.(Error); ok { + return le + } + return &GenericError{ + Timestamp: time.Now(), + Err: err, + ECode: SystemError, + Stack: stacktrace.Capture(2), + } +} + type GenericError struct { Timestamp time.Time ECode ErrorCode diff --git a/integration/exec_test.go b/integration/exec_test.go index 5e9fa0e4e..4fef06ea7 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -383,7 +383,7 @@ func TestFreeze(t *testing.T) { if err := container.Resume(); err != nil { t.Fatal(err) } - if state != configs.Paused { + if state != libcontainer.Paused { t.Fatal("Unexpected state: ", state) } diff --git a/linux_container.go b/linux_container.go index 50df5ba7c..668cf06ca 100644 --- a/linux_container.go +++ b/linux_container.go @@ -3,9 +3,11 @@ package libcontainer import ( + "encoding/json" "fmt" "os" "os/exec" + "path/filepath" "syscall" "github.com/docker/libcontainer/cgroups" @@ -32,21 +34,21 @@ func (c *linuxContainer) Config() configs.Config { return *c.config } -func (c *linuxContainer) Status() (configs.Status, error) { +func (c *linuxContainer) Status() (Status, error) { if c.initProcess == nil { - return configs.Destroyed, nil + return Destroyed, nil } // return Running if the init process is alive if err := syscall.Kill(c.initProcess.pid(), 0); err != nil { if err == syscall.ESRCH { - return configs.Destroyed, nil + return Destroyed, nil } - return 0, err + return 0, newSystemError(err) } if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen { - return configs.Paused, nil + return Paused, nil } - return configs.Running, nil + return Running, nil } func (c *linuxContainer) State() (*State, error) { @@ -54,14 +56,16 @@ func (c *linuxContainer) State() (*State, error) { if err != nil { return nil, err } - if status == configs.Destroyed { + if status == Destroyed { return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists) } startTime, err := c.initProcess.startTime() if err != nil { - return nil, err + return nil, newSystemError(err) } state := &State{ + ID: c.ID(), + Config: *c.config, InitProcessPid: c.initProcess.pid(), InitProcessStartTime: startTime, CgroupPaths: c.cgroupManager.GetPaths(), @@ -96,7 +100,7 @@ func (c *linuxContainer) Processes() ([]int, error) { glog.Info("fetch container processes") pids, err := c.cgroupManager.GetPids() if err != nil { - return nil, newGenericError(err, SystemError) + return nil, newSystemError(err) } return pids, nil } @@ -108,14 +112,14 @@ func (c *linuxContainer) Stats() (*Stats, error) { stats = &Stats{} ) if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { - return stats, newGenericError(err, SystemError) + return stats, newSystemError(err) } for _, iface := range c.config.Networks { switch iface.Type { case "veth": istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) if err != nil { - return stats, newGenericError(err, SystemError) + return stats, newSystemError(err) } stats.Interfaces = append(stats.Interfaces, istats) } @@ -128,20 +132,20 @@ func (c *linuxContainer) Start(process *Process) (int, error) { if err != nil { return -1, err } - doInit := status == configs.Destroyed + doInit := status == Destroyed parent, err := c.newParentProcess(process, doInit) if err != nil { - return -1, err + return -1, newSystemError(err) } if err := parent.start(); err != nil { // terminate the process to ensure that it properly is reaped. if err := parent.terminate(); err != nil { glog.Warning(err) } - return -1, err + return -1, newSystemError(err) } if doInit { - c.initProcess = parent + c.updateState(parent) } return parent.pid(), nil } @@ -149,11 +153,11 @@ func (c *linuxContainer) Start(process *Process) (int, error) { func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) { parentPipe, childPipe, err := newPipe() if err != nil { - return nil, err + return nil, newSystemError(err) } cmd, err := c.commandTemplate(p, childPipe) if err != nil { - return nil, err + return nil, newSystemError(err) } if !doInit { return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil @@ -171,7 +175,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec. cmd.SysProcAttr = &syscall.SysProcAttr{} } cmd.ExtraFiles = []*os.File{childPipe} - cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + if c.config.ParentDeathSignal > 0 { + cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) + } return cmd, nil } @@ -254,11 +261,19 @@ func (c *linuxContainer) Destroy() error { if err != nil { return err } - if status != configs.Destroyed { + if status != Destroyed { return newGenericError(nil, ContainerNotStopped) } - // TODO: remove cgroups - return os.RemoveAll(c.root) + if !c.config.Namespaces.Contains(configs.NEWPID) { + if err := killCgroupProcesses(c.cgroupManager); err != nil { + glog.Warning(err) + } + } + err = c.cgroupManager.Destroy() + if rerr := os.RemoveAll(c.root); err == nil { + err = rerr + } + return err } func (c *linuxContainer) Pause() error { @@ -270,11 +285,23 @@ func (c *linuxContainer) Resume() error { } func (c *linuxContainer) Signal(signal os.Signal) error { - glog.Infof("sending signal %d to pid %d", signal, c.initProcess.pid()) return c.initProcess.signal(signal) } -// TODO: rename to be more descriptive func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { return NotifyOnOOM(c.cgroupManager.GetPaths()) } + +func (c *linuxContainer) updateState(process parentProcess) error { + c.initProcess = process + state, err := c.State() + if err != nil { + return err + } + f, err := os.Create(filepath.Join(c.root, stateFilename)) + if err != nil { + return err + } + defer f.Close() + return json.NewEncoder(f).Encode(state) +} diff --git a/linux_factory.go b/linux_factory.go index 8c33e0981..66823953e 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -18,8 +18,7 @@ import ( ) const ( - configFilename = "config.json" - stateFilename = "state.json" + stateFilename = "state.json" ) var ( @@ -65,23 +64,9 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err } else if !os.IsNotExist(err) { return nil, newGenericError(err, SystemError) } - data, err := json.MarshalIndent(config, "", "\t") - if err != nil { - return nil, newGenericError(err, SystemError) - } if err := os.MkdirAll(containerRoot, 0700); err != nil { return nil, newGenericError(err, SystemError) } - f, err := os.Create(filepath.Join(containerRoot, configFilename)) - if err != nil { - os.RemoveAll(containerRoot) - return nil, newGenericError(err, SystemError) - } - defer f.Close() - if _, err := f.Write(data); err != nil { - os.RemoveAll(containerRoot) - return nil, newGenericError(err, SystemError) - } return &linuxContainer{ id: id, root: containerRoot, @@ -96,13 +81,7 @@ func (l *linuxFactory) Load(id string) (Container, error) { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } containerRoot := filepath.Join(l.root, id) - glog.Infof("loading container config from %s", containerRoot) - config, err := l.loadContainerConfig(containerRoot) - if err != nil { - return nil, err - } - glog.Infof("loading container state from %s", containerRoot) - state, err := l.loadContainerState(containerRoot) + state, err := l.loadState(containerRoot) if err != nil { return nil, err } @@ -110,12 +89,12 @@ func (l *linuxFactory) Load(id string) (Container, error) { processPid: state.InitProcessPid, processStartTime: state.InitProcessStartTime, } - cgroupManager := cgroups.LoadCgroupManager(config.Cgroups, state.CgroupPaths) + cgroupManager := cgroups.LoadCgroupManager(state.Config.Cgroups, state.CgroupPaths) glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ initProcess: r, id: id, - config: config, + config: &state.Config, initArgs: l.initArgs, cgroupManager: cgroupManager, root: containerRoot, @@ -155,23 +134,7 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { return i.Init() } -func (l *linuxFactory) loadContainerConfig(root string) (*configs.Config, error) { - f, err := os.Open(filepath.Join(root, configFilename)) - if err != nil { - if os.IsNotExist(err) { - return nil, newGenericError(err, ContainerNotExists) - } - return nil, newGenericError(err, SystemError) - } - defer f.Close() - var config *configs.Config - if err := json.NewDecoder(f).Decode(&config); err != nil { - return nil, newGenericError(err, ConfigInvalid) - } - return config, nil -} - -func (l *linuxFactory) loadContainerState(root string) (*State, error) { +func (l *linuxFactory) loadState(root string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { diff --git a/linux_factory_test.go b/linux_factory_test.go index 457ec10d6..69001669d 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -82,14 +82,12 @@ func TestFactoryLoadContainer(t *testing.T) { } expectedState = &State{ InitProcessPid: 1024, + Config: *expectedConfig, } ) if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { t.Fatal(err) } - if err := marshal(filepath.Join(root, id, configFilename), expectedConfig); err != nil { - t.Fatal(err) - } if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } diff --git a/linux_init.go b/linux_init.go index 7c56afe87..9ed27e892 100644 --- a/linux_init.go +++ b/linux_init.go @@ -9,11 +9,13 @@ import ( "strings" "syscall" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" + "github.com/golang/glog" ) type initType string @@ -226,3 +228,35 @@ func setupRlimits(config *configs.Config) error { } return nil } + +// killCgroupProcesses freezes then itterates over all the processes inside the +// manager's cgroups sending a SIGKILL to each process then waiting for them to +// exit. +func killCgroupProcesses(m cgroups.Manager) error { + var procs []*os.Process + if err := m.Freeze(configs.Frozen); err != nil { + glog.Warning(err) + } + pids, err := m.GetPids() + if err != nil { + m.Freeze(configs.Thawed) + return err + } + for _, pid := range pids { + if p, err := os.FindProcess(pid); err == nil { + procs = append(procs, p) + if err := p.Kill(); err != nil { + glog.Warning(err) + } + } + } + if err := m.Freeze(configs.Thawed); err != nil { + glog.Warning(err) + } + for _, p := range procs { + if _, err := p.Wait(); err != nil { + glog.Warning(err) + } + } + return nil +} diff --git a/linux_process.go b/linux_process.go index 2b434a511..955df8829 100644 --- a/linux_process.go +++ b/linux_process.go @@ -11,7 +11,6 @@ import ( "syscall" "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/system" "github.com/golang/glog" ) @@ -184,27 +183,7 @@ func (p *initProcess) wait() (*os.ProcessState, error) { } // we should kill all processes in cgroup when init is died if we use host PID namespace if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 { - // TODO: this will not work for the success path because libcontainer - // does not wait on the process. This needs to be moved to destroy or add a Wait() - // method back onto the container. - var procs []*os.Process - p.manager.Freeze(configs.Frozen) - pids, err := p.manager.GetPids() - if err != nil { - return nil, err - } - for _, pid := range pids { - // TODO: log err without aborting if we are unable to find - // a single PID - if p, err := os.FindProcess(pid); err == nil { - procs = append(procs, p) - p.Kill() - } - } - p.manager.Freeze(configs.Thawed) - for _, p := range procs { - p.Wait() - } + killCgroupProcesses(p.manager) } return state, nil } diff --git a/nsinit/init.go b/nsinit/init.go index 9848c42b8..ea6295e5e 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -12,9 +12,6 @@ import ( var initCommand = cli.Command{ Name: "init", Usage: "runs the init process inside the namespace", - Flags: []cli.Flag{ - cli.IntFlag{Name: "fd", Value: 0, Usage: "internal pipe fd"}, - }, Action: func(context *cli.Context) { runtime.GOMAXPROCS(1) runtime.LockOSThread() @@ -22,11 +19,7 @@ var initCommand = cli.Command{ if err != nil { log.Fatal(err) } - if context.Int("fd") == 0 { - log.Fatal("--fd must be specified for init process") - } - fd := uintptr(context.Int("fd")) - if err := factory.StartInitialization(fd); err != nil { + if err := factory.StartInitialization(3); err != nil { log.Fatal(err) } panic("This line should never been executed") diff --git a/nsinit/utils.go b/nsinit/utils.go index 901972e81..62d8e6fd2 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -24,7 +24,7 @@ func loadConfig(context *cli.Context) (*configs.Config, error) { } func loadFactory(context *cli.Context) (libcontainer.Factory, error) { - return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) + return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init"}) } func getContainer(context *cli.Context) (libcontainer.Container, error) { From d909440c48b7b64b016478de1e6ee78e2faa9e13 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 17:12:03 -0800 Subject: [PATCH 087/101] Unexport certain internal funcs and types Signed-off-by: Michael Crosby --- console.go | 2 +- container.go | 7 ++++--- generic_error.go | 12 ++++++------ linux_console.go | 2 ++ linux_container.go | 2 +- linux_network.go | 14 ++++---------- notify_linux.go => linux_notify.go | 4 ++-- notify_linux_test.go => linux_notify_test.go | 2 +- stats.go | 12 ++++++------ 9 files changed, 27 insertions(+), 30 deletions(-) rename notify_linux.go => linux_notify.go (91%) rename notify_linux_test.go => linux_notify_test.go (98%) diff --git a/console.go b/console.go index d3392ee3b..1998086fc 100644 --- a/console.go +++ b/console.go @@ -2,7 +2,7 @@ package libcontainer import "io" -// Console is a psuedo TTY. +// Console represents a psuedo TTY. type Console interface { io.ReadWriter io.Closer diff --git a/container.go b/container.go index 1954a994e..2d4cbcad7 100644 --- a/container.go +++ b/container.go @@ -1,6 +1,7 @@ -/* -NOTE: The API is in flux and mainly not implemented. Proceed with caution until further notice. -*/ +// Libcontainer provides a native Go implementation for creating containers +// with namespaces, cgroups, capabilities, and filesystem access controls. +// It allows you to manage the lifecycle of the container performing additional operations +// after the container is created. package libcontainer import ( diff --git a/generic_error.go b/generic_error.go index 5207822a0..85010007b 100644 --- a/generic_error.go +++ b/generic_error.go @@ -23,7 +23,7 @@ func newGenericError(err error, c ErrorCode) Error { if le, ok := err.(Error); ok { return le } - return &GenericError{ + return &genericError{ Timestamp: time.Now(), Err: err, ECode: c, @@ -35,7 +35,7 @@ func newSystemError(err error) Error { if le, ok := err.(Error); ok { return le } - return &GenericError{ + return &genericError{ Timestamp: time.Now(), Err: err, ECode: SystemError, @@ -43,21 +43,21 @@ func newSystemError(err error) Error { } } -type GenericError struct { +type genericError struct { Timestamp time.Time ECode ErrorCode Err error Stack stacktrace.Stacktrace } -func (e *GenericError) Error() string { +func (e *genericError) Error() string { return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Err) } -func (e *GenericError) Code() ErrorCode { +func (e *genericError) Code() ErrorCode { return e.ECode } -func (e *GenericError) Detail(w io.Writer) error { +func (e *genericError) Detail(w io.Writer) error { return errorTemplate.Execute(w, e) } diff --git a/linux_console.go b/linux_console.go index f1eeaedfa..5cb5f7132 100644 --- a/linux_console.go +++ b/linux_console.go @@ -12,6 +12,8 @@ import ( "github.com/docker/libcontainer/label" ) +// NewConsole returns an initalized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. func NewConsole() (Console, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { diff --git a/linux_container.go b/linux_container.go index 668cf06ca..efe9375be 100644 --- a/linux_container.go +++ b/linux_container.go @@ -289,7 +289,7 @@ func (c *linuxContainer) Signal(signal os.Signal) error { } func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { - return NotifyOnOOM(c.cgroupManager.GetPaths()) + return notifyOnOOM(c.cgroupManager.GetPaths()) } func (c *linuxContainer) updateState(process parentProcess) error { diff --git a/linux_network.go b/linux_network.go index 0b5d33949..e720dade4 100644 --- a/linux_network.go +++ b/linux_network.go @@ -3,7 +3,6 @@ package libcontainer import ( - "errors" "fmt" "io/ioutil" "net" @@ -15,10 +14,6 @@ import ( "github.com/docker/libcontainer/utils" ) -var ( - ErrNotValidStrategyType = errors.New("not a valid network strategy type") -) - var strategies = map[string]networkStrategy{ "veth": &veth{}, "loopback": &loopback{}, @@ -32,19 +27,18 @@ type networkStrategy interface { } // getStrategy returns the specific network strategy for the -// provided type. If no strategy is registered for the type an -// ErrNotValidStrategyType is returned. +// provided type. func getStrategy(tpe string) (networkStrategy, error) { s, exists := strategies[tpe] if !exists { - return nil, ErrNotValidStrategyType + return nil, fmt.Errorf("unknown strategy type %q", tpe) } return s, nil } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) { - out := &NetworkInterface{Name: interfaceName} +func getNetworkInterfaceStats(interfaceName string) (*networkInterface, error) { + out := &networkInterface{Name: interfaceName} // This can happen if the network runtime information is missing - possible if the // container was created by an old version of libcontainer. if interfaceName == "" { diff --git a/notify_linux.go b/linux_notify.go similarity index 91% rename from notify_linux.go rename to linux_notify.go index 062fa11a0..db51d57da 100644 --- a/notify_linux.go +++ b/linux_notify.go @@ -12,10 +12,10 @@ import ( const oomCgroupName = "memory" -// NotifyOnOOM returns channel on which you can expect event about OOM, +// notifyOnOOM returns channel on which you can expect event about OOM, // if process died without OOM this channel will be closed. // s is current *libcontainer.State for container. -func NotifyOnOOM(paths map[string]string) (<-chan struct{}, error) { +func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { dir := paths[oomCgroupName] if dir == "" { return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName) diff --git a/notify_linux_test.go b/linux_notify_test.go similarity index 98% rename from notify_linux_test.go rename to linux_notify_test.go index 65189d362..09bdf6443 100644 --- a/notify_linux_test.go +++ b/linux_notify_test.go @@ -30,7 +30,7 @@ func TestNotifyOnOOM(t *testing.T) { paths := map[string]string{ "memory": memoryPath, } - ooms, err := NotifyOnOOM(paths) + ooms, err := notifyOnOOM(paths) if err != nil { t.Fatal("expected no error, got:", err) } diff --git a/stats.go b/stats.go index 198a8bf54..926d4d6bd 100644 --- a/stats.go +++ b/stats.go @@ -2,7 +2,12 @@ package libcontainer import "github.com/docker/libcontainer/cgroups" -type NetworkInterface struct { +type Stats struct { + Interfaces []*networkInterface + CgroupStats *cgroups.Stats +} + +type networkInterface struct { // Name is the name of the network interface. Name string @@ -15,8 +20,3 @@ type NetworkInterface struct { TxErrors uint64 TxDropped uint64 } - -type Stats struct { - Interfaces []*NetworkInterface - CgroupStats *cgroups.Stats -} From 5df859ad240af502aebef01ca28da3ef24951e05 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 17:26:29 -0800 Subject: [PATCH 088/101] Add config command to nsinit Signed-off-by: Michael Crosby --- nsinit/config.go | 106 +++++++++++++++++++++++++++++++++++++++++++++++ nsinit/main.go | 1 + 2 files changed, 107 insertions(+) create mode 100644 nsinit/config.go diff --git a/nsinit/config.go b/nsinit/config.go new file mode 100644 index 000000000..3760f64c0 --- /dev/null +++ b/nsinit/config.go @@ -0,0 +1,106 @@ +package main + +import ( + "bytes" + "encoding/json" + "io" + "os" + "path/filepath" + "syscall" + + "github.com/codegangsta/cli" + "github.com/docker/libcontainer/configs" +) + +var configCommand = cli.Command{ + Name: "config", + Usage: "generate a standard configuration file for a container", + Flags: []cli.Flag{ + cli.StringFlag{Name: "file,f", Value: "stdout", Usage: "write the configuration to the specified file"}, + }, + Action: func(context *cli.Context) { + template := getTemplate() + data, err := json.MarshalIndent(template, "", "\t") + if err != nil { + fatal(err) + } + var f *os.File + filePath := context.String("file") + switch filePath { + case "stdout", "": + f = os.Stdout + default: + if f, err = os.Create(filePath); err != nil { + fatal(err) + } + defer f.Close() + } + if _, err := io.Copy(f, bytes.NewBuffer(data)); err != nil { + fatal(err) + } + }, +} + +func getTemplate() *configs.Config { + cwd, err := os.Getwd() + if err != nil { + panic(err) + } + return &configs.Config{ + Rootfs: cwd, + ParentDeathSignal: int(syscall.SIGKILL), + Capabilities: []string{ + "CHOWN", + "DAC_OVERRIDE", + "FSETID", + "FOWNER", + "MKNOD", + "NET_RAW", + "SETGID", + "SETUID", + "SETFCAP", + "SETPCAP", + "NET_BIND_SERVICE", + "SYS_CHROOT", + "KILL", + "AUDIT_WRITE", + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWNET}, + }), + Cgroups: &configs.Cgroup{ + Name: filepath.Base(cwd), + Parent: "nsinit", + AllowAllDevices: false, + AllowedDevices: configs.DefaultAllowedDevices, + }, + + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "nsinit", + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: syscall.RLIMIT_NOFILE, + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, + Mounts: []*configs.Mount{ + { + Type: "tmpfs", + Destination: "/tmp", + }, + }, + } + +} diff --git a/nsinit/main.go b/nsinit/main.go index e0dcf460f..642611179 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -24,6 +24,7 @@ func main() { pauseCommand, statsCommand, unpauseCommand, + configCommand, } if err := app.Run(os.Args); err != nil { log.Fatal(err) From 91a3f162afc90339b1d8f8d2f22d9c4271eddb84 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 11 Feb 2015 17:42:58 -0800 Subject: [PATCH 089/101] Implement nsinit state command Signed-off-by: Michael Crosby --- error.go | 5 ++++- generic_error.go | 8 +++++--- linux_container.go | 7 ++++++- linux_process.go | 32 ++++++++++++++++---------------- linux_userns_init.go | 3 ++- nsinit/config.go | 43 +++++++++++++++++++++++++++++++++---------- nsinit/exec.go | 24 +++++++++++++++++++----- nsinit/main.go | 3 ++- nsinit/state.go | 31 +++++++++++++++++++++++++++++++ nsinit/stats.go | 9 ++++----- nsinit/utils.go | 5 +++++ 11 files changed, 127 insertions(+), 43 deletions(-) create mode 100644 nsinit/state.go diff --git a/error.go b/error.go index 31ebb3207..85b0dcaf0 100644 --- a/error.go +++ b/error.go @@ -15,6 +15,7 @@ const ( ContainerNotExists ContainerPaused ContainerNotStopped + ContainerNotRunning // Common errors ConfigInvalid @@ -36,7 +37,9 @@ func (c ErrorCode) String() string { case ContainerNotExists: return "Container does not exist" case ContainerNotStopped: - return "Container isn't stopped" + return "Container is not stopped" + case ContainerNotRunning: + return "Container is not running" default: return "Unknown error" } diff --git a/generic_error.go b/generic_error.go index 85010007b..ff614ee66 100644 --- a/generic_error.go +++ b/generic_error.go @@ -11,12 +11,14 @@ import ( var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} Code: {{.ECode}} +{{if .Err }} Message: {{.Err.Error}} +{{end}} Frames:{{range $i, $frame := .Stack.Frames}} --- {{$i}}: {{$frame.Function}} Package: {{$frame.Package}} -File: {{$frame.File}}{{end}} +File: {{$frame.File}}@{{$frame.Line}}{{end}} `)) func newGenericError(err error, c ErrorCode) Error { @@ -27,7 +29,7 @@ func newGenericError(err error, c ErrorCode) Error { Timestamp: time.Now(), Err: err, ECode: c, - Stack: stacktrace.Capture(2), + Stack: stacktrace.Capture(1), } } @@ -39,7 +41,7 @@ func newSystemError(err error) Error { Timestamp: time.Now(), Err: err, ECode: SystemError, - Stack: stacktrace.Capture(2), + Stack: stacktrace.Capture(1), } } diff --git a/linux_container.go b/linux_container.go index efe9375be..0cb6749f3 100644 --- a/linux_container.go +++ b/linux_container.go @@ -184,7 +184,6 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec. func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess { t := "_LIBCONTAINER_INITTYPE=standard" - cloneFlags := c.config.Namespaces.CloneFlags() if cloneFlags&syscall.CLONE_NEWUSER != 0 { c.addUidGidMappings(cmd.SysProcAttr) @@ -225,6 +224,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { Config: c.config, Args: process.Args, Env: process.Env, + User: process.User, + Cwd: process.Cwd, } } @@ -273,6 +274,7 @@ func (c *linuxContainer) Destroy() error { if rerr := os.RemoveAll(c.root); err == nil { err = rerr } + c.initProcess = nil return err } @@ -285,6 +287,9 @@ func (c *linuxContainer) Resume() error { } func (c *linuxContainer) Signal(signal os.Signal) error { + if c.initProcess == nil { + return newGenericError(nil, ContainerNotRunning) + } return c.initProcess.signal(signal) } diff --git a/linux_process.go b/linux_process.go index 955df8829..ceef3c704 100644 --- a/linux_process.go +++ b/linux_process.go @@ -54,15 +54,15 @@ func (p *setnsProcess) signal(s os.Signal) error { func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() if p.forkedProcess, err = p.execSetns(); err != nil { - return err + return newSystemError(err) } if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.forkedProcess.Pid); err != nil { - return err + return newSystemError(err) } } if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil { - return err + return newSystemError(err) } return nil } @@ -75,18 +75,18 @@ func (p *setnsProcess) execSetns() (*os.Process, error) { err := p.cmd.Start() p.childPipe.Close() if err != nil { - return nil, err + return nil, newSystemError(err) } status, err := p.cmd.Process.Wait() if err != nil { - return nil, err + return nil, newSystemError(err) } if !status.Success() { - return nil, &exec.ExitError{status} + return nil, newSystemError(&exec.ExitError{status}) } var pid *pid if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { - return nil, err + return nil, newSystemError(err) } return os.FindProcess(pid.Pid) } @@ -129,12 +129,12 @@ func (p *initProcess) start() error { err := p.cmd.Start() p.childPipe.Close() if err != nil { - return err + return newSystemError(err) } // Do this before syncing with child so that no children // can escape the cgroup if err := p.manager.Apply(p.pid()); err != nil { - return err + return newSystemError(err) } defer func() { if err != nil { @@ -143,13 +143,13 @@ func (p *initProcess) start() error { } }() if err := p.createNetworkInterfaces(); err != nil { - return err + return newSystemError(err) } // Start the setup process to setup the init process if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 { parent, err := p.newUsernsSetupProcess() if err != nil { - return err + return newSystemError(err) } if err := parent.start(); err != nil { if err := parent.terminate(); err != nil { @@ -158,20 +158,20 @@ func (p *initProcess) start() error { return err } if _, err := parent.wait(); err != nil { - return err + return newSystemError(err) } } if err := p.sendConfig(); err != nil { - return err + return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *initError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { - return err + return newSystemError(err) } if ierr != nil { - return ierr + return newSystemError(ierr) } return nil } @@ -232,7 +232,7 @@ func (p *initProcess) createNetworkInterfaces() error { func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) { parentPipe, childPipe, err := newPipe() if err != nil { - return nil, err + return nil, newSystemError(err) } cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...) cmd.ExtraFiles = []*os.File{childPipe} diff --git a/linux_userns_init.go b/linux_userns_init.go index a898f2d28..a7da9ce4c 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -21,7 +21,8 @@ func (l *linuxUsernsInit) Init() error { } consolePath := l.config.Config.Console if consolePath != "" { - console := newConsoleFromPath(consolePath) + // TODO: why is this hard coded? + console := newConsoleFromPath("/dev/console") if err := console.dupStdio(); err != nil { return err } diff --git a/nsinit/config.go b/nsinit/config.go index 3760f64c0..f06804f2e 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -12,14 +12,30 @@ import ( "github.com/docker/libcontainer/configs" ) +var createFlags = []cli.Flag{ + cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process incase the parent dies"}, + cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, + cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"}, + cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"}, + cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"}, + cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"}, + cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"}, + cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"}, + cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"}, + cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"}, + cli.StringFlag{Name: "process-label", Usage: "set the process label"}, + cli.StringFlag{Name: "mount-label", Usage: "set the mount label"}, +} + var configCommand = cli.Command{ Name: "config", Usage: "generate a standard configuration file for a container", - Flags: []cli.Flag{ + Flags: append([]cli.Flag{ cli.StringFlag{Name: "file,f", Value: "stdout", Usage: "write the configuration to the specified file"}, - }, + }, createFlags...), Action: func(context *cli.Context) { template := getTemplate() + modify(template, context) data, err := json.MarshalIndent(template, "", "\t") if err != nil { fatal(err) @@ -41,6 +57,19 @@ var configCommand = cli.Command{ }, } +func modify(config *configs.Config, context *cli.Context) { + config.ParentDeathSignal = context.Int("parent-death-signal") + config.Readonlyfs = context.Bool("read-only") + config.Cgroups.CpusetCpus = context.String("cpuset-cpus") + config.Cgroups.CpusetMems = context.String("cpuset-mems") + config.Cgroups.CpuShares = int64(context.Int("cpushares")) + config.Cgroups.Memory = int64(context.Int("memory-limit")) + config.Cgroups.MemorySwap = int64(context.Int("memory-swap")) + config.AppArmorProfile = context.String("apparmor-profile") + config.ProcessLabel = context.String("process-label") + config.MountLabel = context.String("mount-label") +} + func getTemplate() *configs.Config { cwd, err := os.Getwd() if err != nil { @@ -91,14 +120,8 @@ func getTemplate() *configs.Config { Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, - Hard: uint64(1024), - Soft: uint64(1024), - }, - }, - Mounts: []*configs.Mount{ - { - Type: "tmpfs", - Destination: "/tmp", + Hard: 1024, + Soft: 1024, }, }, } diff --git a/nsinit/exec.go b/nsinit/exec.go index 6b90ce28d..5c6b830fc 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -20,13 +20,14 @@ var execCommand = cli.Command{ Name: "exec", Usage: "execute a new command inside a container", Action: execAction, - Flags: []cli.Flag{ + Flags: append([]cli.Flag{ cli.BoolFlag{Name: "tty,t", Usage: "allocate a TTY to the container"}, cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, + cli.BoolFlag{Name: "create", Usage: "create the container's configuration on the fly with arguments"}, cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"}, cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"}, - }, + }, createFlags...), } func execAction(context *cli.Context) { @@ -38,6 +39,7 @@ func execAction(context *cli.Context) { if err != nil { fatal(err) } + created := false container, err := factory.Load(context.String("id")) if err != nil { if lerr, ok := err.(libcontainer.Error); !ok || lerr.Code() != libcontainer.ContainerNotExists { @@ -45,10 +47,15 @@ func execAction(context *cli.Context) { } config, err := loadConfig(context) if err != nil { + tty.Close() fatal(err) } - config.Console = tty.console.Path() + if tty.console != nil { + config.Console = tty.console.Path() + } + created = true if container, err = factory.Create(context.String("id"), config); err != nil { + tty.Close() fatal(err) } } @@ -64,19 +71,26 @@ func execAction(context *cli.Context) { tty.attach(process) pid, err := container.Start(process) if err != nil { + tty.Close() fatal(err) } proc, err := os.FindProcess(pid) if err != nil { + tty.Close() fatal(err) } status, err := proc.Wait() if err != nil { + tty.Close() fatal(err) } - if err := container.Destroy(); err != nil { - fatal(err) + if created { + if err := container.Destroy(); err != nil { + tty.Close() + fatal(err) + } } + tty.Close() os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus))) } diff --git a/nsinit/main.go b/nsinit/main.go index 642611179..a2afd00ca 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -18,13 +18,14 @@ func main() { cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } app.Commands = []cli.Command{ + configCommand, execCommand, initCommand, oomCommand, pauseCommand, statsCommand, unpauseCommand, - configCommand, + stateCommand, } if err := app.Run(os.Args); err != nil { log.Fatal(err) diff --git a/nsinit/state.go b/nsinit/state.go new file mode 100644 index 000000000..46981bb79 --- /dev/null +++ b/nsinit/state.go @@ -0,0 +1,31 @@ +package main + +import ( + "encoding/json" + "fmt" + + "github.com/codegangsta/cli" +) + +var stateCommand = cli.Command{ + Name: "state", + Usage: "get the container's current state", + Flags: []cli.Flag{ + cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"}, + }, + Action: func(context *cli.Context) { + container, err := getContainer(context) + if err != nil { + fatal(err) + } + state, err := container.State() + if err != nil { + fatal(err) + } + data, err := json.MarshalIndent(state, "", "\t") + if err != nil { + fatal(err) + } + fmt.Printf("%s", data) + }, +} diff --git a/nsinit/stats.go b/nsinit/stats.go index 8320fed4f..49087fa23 100644 --- a/nsinit/stats.go +++ b/nsinit/stats.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "fmt" - "log" "github.com/codegangsta/cli" ) @@ -17,15 +16,15 @@ var statsCommand = cli.Command{ Action: func(context *cli.Context) { container, err := getContainer(context) if err != nil { - log.Fatal(err) + fatal(err) } stats, err := container.Stats() if err != nil { - log.Fatal(err) + fatal(err) } - data, jerr := json.MarshalIndent(stats, "", "\t") + data, err := json.MarshalIndent(stats, "", "\t") if err != nil { - log.Fatal(jerr) + fatal(err) } fmt.Printf("%s", data) }, diff --git a/nsinit/utils.go b/nsinit/utils.go index 62d8e6fd2..73c13b598 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -11,6 +11,11 @@ import ( ) func loadConfig(context *cli.Context) (*configs.Config, error) { + if context.Bool("create") { + config := getTemplate() + modify(config, context) + return config, nil + } f, err := os.Open(context.String("config")) if err != nil { return nil, err From c2403c32dbf8a67870ab2ba7524c117fc0652256 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 12 Feb 2015 10:38:43 -0800 Subject: [PATCH 090/101] Add GetPath on namespace config Signed-off-by: Michael Crosby --- configs/namespaces.go | 27 +++++++++++++++++++++++++++ console.go | 2 +- linux_container.go | 21 +-------------------- nsinit/config.go | 2 +- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/configs/namespaces.go b/configs/namespaces.go index fbb56e1d0..9078e6abf 100644 --- a/configs/namespaces.go +++ b/configs/namespaces.go @@ -1,6 +1,7 @@ package configs import ( + "fmt" "syscall" ) @@ -26,6 +27,32 @@ func (n *Namespace) Syscall() int { return namespaceInfo[n.Type] } +func (n *Namespace) GetPath(pid int) string { + if n.Path != "" { + return n.Path + } + return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file()) +} + +func (n *Namespace) file() string { + file := "" + switch n.Type { + case NEWNET: + file = "net" + case NEWNS: + file = "mnt" + case NEWPID: + file = "pid" + case NEWIPC: + file = "ipc" + case NEWUSER: + file = "user" + case NEWUTS: + file = "uts" + } + return file +} + type Namespaces []Namespace func (n *Namespaces) Remove(t NamespaceType) bool { diff --git a/console.go b/console.go index 1998086fc..042a2a2e4 100644 --- a/console.go +++ b/console.go @@ -2,7 +2,7 @@ package libcontainer import "io" -// Console represents a psuedo TTY. +// Console represents a pseudo TTY. type Console interface { io.ReadWriter io.Closer diff --git a/linux_container.go b/linux_container.go index 0cb6749f3..492936ea2 100644 --- a/linux_container.go +++ b/linux_container.go @@ -72,26 +72,7 @@ func (c *linuxContainer) State() (*State, error) { NamespacePaths: make(map[string]string), } for _, ns := range c.config.Namespaces { - if ns.Path != "" { - state.NamespacePaths[string(ns.Type)] = ns.Path - continue - } - file := "" - switch ns.Type { - case configs.NEWNET: - file = "net" - case configs.NEWNS: - file = "mnt" - case configs.NEWPID: - file = "pid" - case configs.NEWIPC: - file = "ipc" - case configs.NEWUSER: - file = "user" - case configs.NEWUTS: - file = "uts" - } - state.NamespacePaths[string(ns.Type)] = fmt.Sprintf("/proc/%d/ns/%s", c.initProcess.pid(), file) + state.NamespacePaths[string(ns.Type)] = ns.GetPath(c.initProcess.pid()) } return state, nil } diff --git a/nsinit/config.go b/nsinit/config.go index f06804f2e..145fe59a5 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -13,7 +13,7 @@ import ( ) var createFlags = []cli.Flag{ - cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process incase the parent dies"}, + cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"}, cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"}, cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"}, From fe9f7668957641a404b0d2c8850f104df591e7f2 Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Thu, 12 Feb 2015 12:58:40 -0800 Subject: [PATCH 091/101] Fix compilation with golang 1.3(uid/gid mappings is unsupported) Signed-off-by: Alexander Morozov --- linux_container.go | 31 +++++++------------------------ linux_container_nouserns.go | 13 +++++++++++++ linux_container_userns.go | 26 ++++++++++++++++++++++++++ nsinit/oom.go | 4 +++- 4 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 linux_container_nouserns.go create mode 100644 linux_container_userns.go diff --git a/linux_container.go b/linux_container.go index 492936ea2..73aaa441e 100644 --- a/linux_container.go +++ b/linux_container.go @@ -143,7 +143,7 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces if !doInit { return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil } - return c.newInitProcess(p, cmd, parentPipe, childPipe), nil + return c.newInitProcess(p, cmd, parentPipe, childPipe) } func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { @@ -163,11 +163,14 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec. return cmd, nil } -func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *initProcess { +func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) { t := "_LIBCONTAINER_INITTYPE=standard" cloneFlags := c.config.Namespaces.CloneFlags() if cloneFlags&syscall.CLONE_NEWUSER != 0 { - c.addUidGidMappings(cmd.SysProcAttr) + if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil { + // mappings is not supported + return nil, err + } // Default to root user when user namespaces are enabled. if cmd.SysProcAttr.Credential == nil { cmd.SysProcAttr.Credential = &syscall.Credential{} @@ -182,7 +185,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c parentPipe: parentPipe, manager: c.cgroupManager, config: c.newInitConfig(p), - } + }, nil } func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess { @@ -210,26 +213,6 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { } } -// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. -func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) { - if c.config.UidMappings != nil { - sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings)) - for i, um := range c.config.UidMappings { - sys.UidMappings[i].ContainerID = um.ContainerID - sys.UidMappings[i].HostID = um.HostID - sys.UidMappings[i].Size = um.Size - } - } - if c.config.GidMappings != nil { - sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings)) - for i, gm := range c.config.GidMappings { - sys.GidMappings[i].ContainerID = gm.ContainerID - sys.GidMappings[i].HostID = gm.HostID - sys.GidMappings[i].Size = gm.Size - } - } -} - func newPipe() (parent *os.File, child *os.File, err error) { fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { diff --git a/linux_container_nouserns.go b/linux_container_nouserns.go new file mode 100644 index 000000000..3b75d593c --- /dev/null +++ b/linux_container_nouserns.go @@ -0,0 +1,13 @@ +// +build !go1.4 + +package libcontainer + +import ( + "fmt" + "syscall" +) + +// not available before go 1.4 +func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error { + return fmt.Errorf("User namespace is not supported in golang < 1.4") +} diff --git a/linux_container_userns.go b/linux_container_userns.go new file mode 100644 index 000000000..5f4cf3c9f --- /dev/null +++ b/linux_container_userns.go @@ -0,0 +1,26 @@ +// +build go1.4 + +package libcontainer + +import "syscall" + +// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. +func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error { + if c.config.UidMappings != nil { + sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings)) + for i, um := range c.config.UidMappings { + sys.UidMappings[i].ContainerID = um.ContainerID + sys.UidMappings[i].HostID = um.HostID + sys.UidMappings[i].Size = um.Size + } + } + if c.config.GidMappings != nil { + sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings)) + for i, gm := range c.config.GidMappings { + sys.GidMappings[i].ContainerID = gm.ContainerID + sys.GidMappings[i].HostID = gm.HostID + sys.GidMappings[i].Size = gm.Size + } + } + return nil +} diff --git a/nsinit/oom.go b/nsinit/oom.go index c1b4c8051..a59b75333 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -21,7 +21,9 @@ var oomCommand = cli.Command{ if err != nil { log.Fatal(err) } - for range n { + for x := range n { + // hack for calm down go1.4 gofmt + _ = x log.Printf("OOM notification received") } }, From 1a37242fa2af5db30ea72b95f948285efcd63d52 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Thu, 12 Feb 2015 16:23:05 -0800 Subject: [PATCH 092/101] Refactor system mounts to be placed on the config Also remove the RestrictSys bool replaced by configurable paths that the user can specify. Signed-off-by: Michael Crosby --- configs/config.go | 12 ++- configs/config_test.go | 6 -- configs/mount.go | 24 +++-- configs/validate/config.go | 3 +- integration/template_test.go | 30 +++++- linux_rootfs.go | 202 +++++++++++++---------------------- linux_standard_init.go | 12 +-- linux_userns_init.go | 12 +-- nsinit/config.go | 30 +++++- 9 files changed, 173 insertions(+), 158 deletions(-) diff --git a/configs/config.go b/configs/config.go index fc7450d9f..b08b376c6 100644 --- a/configs/config.go +++ b/configs/config.go @@ -78,10 +78,6 @@ type Config struct { // commonly used by selinux ProcessLabel string `json:"process_label"` - // RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and - // /proc/bus - RestrictSys bool `json:"restrict_sys"` - // Rlimits specifies the resource limits, such as max open files, to set in the container // If Rlimits are not set, the container will inherit rlimits from the parent process Rlimits []Rlimit `json:"rlimits"` @@ -95,6 +91,14 @@ type Config struct { // GidMappings is an array of Group ID mappings for User Namespaces GidMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` } // Gets the root uid for the process on host which could be non-zero diff --git a/configs/config_test.go b/configs/config_test.go index 826aa6c3b..a34d1b09e 100644 --- a/configs/config_test.go +++ b/configs/config_test.go @@ -107,18 +107,12 @@ func TestConfigJsonFormat(t *testing.T) { break } } - for _, d := range DefaultSimpleDevices { if !containsDevice(d, container.Devices) { t.Logf("expected device configuration for %s", d.Path) t.Fail() } } - - if !container.RestrictSys { - t.Log("expected restrict sys to be true") - t.Fail() - } } func TestApparmorProfile(t *testing.T) { diff --git a/configs/mount.go b/configs/mount.go index eb26c5c05..7b3dea331 100644 --- a/configs/mount.go +++ b/configs/mount.go @@ -1,11 +1,21 @@ package configs type Mount struct { - Type string `json:"type"` - Source string `json:"source"` // Source path, in the host namespace - Destination string `json:"destination"` // Destination path, in the container - Writable bool `json:"writable"` - Relabel string `json:"relabel"` // Relabel source if set, "z" indicates shared, "Z" indicates unshared - Private bool `json:"private"` - Slave bool `json:"slave"` + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` } diff --git a/configs/validate/config.go b/configs/validate/config.go index 6148e1eb9..710794bf2 100644 --- a/configs/validate/config.go +++ b/configs/validate/config.go @@ -68,7 +68,8 @@ func (v *ConfigValidator) hostname(config *configs.Config) error { func (v *ConfigValidator) security(config *configs.Config) error { // restrict sys without mount namespace - if config.RestrictSys && !config.Namespaces.Contains(configs.NEWNS) { + if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && + !config.Namespaces.Contains(configs.NEWNS) { return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") } return nil diff --git a/integration/template_test.go b/integration/template_test.go index 1e7c418f0..083c64887 100644 --- a/integration/template_test.go +++ b/integration/template_test.go @@ -13,6 +13,8 @@ var standardEnvironment = []string{ "TERM=xterm", } +const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + // newTemplateConfig returns a base template for running a container // // it uses a network strategy of just setting a loopback interface @@ -49,9 +51,35 @@ func newTemplateConfig(rootfs string) *configs.Config { AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, - + MaskPaths: []string{ + "/proc/kcore", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, Devices: configs.DefaultAutoCreatedDevices, Hostname: "integration", + Mounts: []*configs.Mount{ + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | syscall.MS_RDONLY, + }, + }, Networks: []*configs.Network{ { Type: "loopback", diff --git a/linux_rootfs.go b/linux_rootfs.go index 36fc18989..5bfe40186 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -10,19 +10,33 @@ import ( "syscall" "time" - "github.com/docker/docker/pkg/symlink" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" ) const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV -type mount struct { - source string - path string - device string - flags int - data string +var baseMounts = []*configs.Mount{ + { + Source: "proc", + Destination: "/proc", + Device: "proc", + Flags: defaultMountFlags, + }, + { + Source: "tmpfs", + Destination: "/dev", + Device: "tmpfs", + Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, + Data: "mode=755", + }, + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, + Data: "newinstance,ptmxmode=0666,mode=620,gid=5", + }, } // setupRootfs sets up the devices, mount points, and filesystems for use inside a @@ -31,12 +45,8 @@ func setupRootfs(config *configs.Config) (err error) { if err := prepareRoot(config); err != nil { return err } - if err := mountSystem(config); err != nil { - return err - } - // apply any user specified mounts within the new mount namespace - for _, m := range config.Mounts { - if err := mountUserMount(m, config.Rootfs, config.MountLabel); err != nil { + for _, m := range append(baseMounts, config.Mounts...) { + if err := mount(m, config.Rootfs, config.MountLabel); err != nil { return err } } @@ -77,16 +87,52 @@ func setupRootfs(config *configs.Config) (err error) { return nil } -// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts -// inside the mount namespace -func mountSystem(config *configs.Config) error { - for _, m := range newSystemMounts(config.Rootfs, config.MountLabel, config.RestrictSys) { - if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { +func mount(m *configs.Mount, rootfs, mountLabel string) error { + var ( + dest = filepath.Join(rootfs, m.Destination) + data = label.FormatMountLabel(m.Data, mountLabel) + ) + switch m.Device { + case "proc": + if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) { + return err + } + return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "") + case "tmpfs", "mqueue", "devpts", "sysfs": + if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) { + return err + } + return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data) + case "bind": + stat, err := os.Stat(m.Source) + if err != nil { + // error out if the source of a bind mount does not exist as we will be + // unable to bind anything to it. + return err + } + if err := createIfNotExists(dest, stat.IsDir()); err != nil { return err } - if err := syscall.Mount(m.source, m.path, m.device, uintptr(m.flags), m.data); err != nil { + if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { return err } + if m.Flags&syscall.MS_RDONLY != 0 { + if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil { + return err + } + } + if m.Relabel != "" { + if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { + return err + } + } + if m.Flags&syscall.MS_PRIVATE != 0 { + if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { + return err + } + } + default: + return fmt.Errorf("unknown mount device %q to %q", m.Device, m.Destination) } return nil } @@ -98,48 +144,23 @@ func setupDevSymlinks(rootfs string) error { {"/proc/self/fd/1", "/dev/stdout"}, {"/proc/self/fd/2", "/dev/stderr"}, } - // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink // in /dev if it exists in /proc. if _, err := os.Stat("/proc/kcore"); err == nil { links = append(links, [2]string{"/proc/kcore", "/dev/kcore"}) } - for _, link := range links { var ( src = link[0] dst = filepath.Join(rootfs, link[1]) ) - if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { return fmt.Errorf("symlink %s %s %s", src, dst, err) } } - return nil } -// TODO: this is crappy right now and should be cleaned up with a better way of handling system and -// standard bind mounts allowing them to be more dynamic -func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount { - systemMounts := []mount{ - {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, - {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}, - {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, - {source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags}, - {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, - } - - sysMountFlags := defaultMountFlags - if sysReadonly { - sysMountFlags |= syscall.MS_RDONLY - } - - systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: sysMountFlags}) - - return systemMounts -} - // Is stdin, stdout or stderr were to be pointing to '/dev/null', // this method will make them point to '/dev/null' from within this namespace. func reOpenDevNull(rootfs string) error { @@ -149,17 +170,17 @@ func reOpenDevNull(rootfs string) error { return fmt.Errorf("Failed to open /dev/null - %s", err) } defer file.Close() - if err = syscall.Fstat(int(file.Fd()), &devNullStat); err != nil { - return fmt.Errorf("Failed to stat /dev/null - %s", err) + if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil { + return err } for fd := 0; fd < 3; fd++ { - if err = syscall.Fstat(fd, &stat); err != nil { - return fmt.Errorf("Failed to stat fd %d - %s", fd, err) + if err := syscall.Fstat(fd, &stat); err != nil { + return err } if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. - if err = syscall.Dup2(int(file.Fd()), fd); err != nil { - return fmt.Errorf("Failed to dup fd %d to fd %d - %s", file.Fd(), fd, err) + if err := syscall.Dup2(int(file.Fd()), fd); err != nil { + return err } } } @@ -280,77 +301,6 @@ func msMoveRoot(rootfs string) error { return syscall.Chdir("/") } -func mountUserMount(m *configs.Mount, rootfs, mountLabel string) error { - switch m.Type { - case "bind": - return bindMount(m, rootfs, mountLabel) - case "tmpfs": - return tmpfsMount(m, rootfs, mountLabel) - default: - return fmt.Errorf("unsupported mount type %s for %s", m.Type, m.Destination) - } -} - -func bindMount(m *configs.Mount, rootfs, mountLabel string) error { - var ( - flags = syscall.MS_BIND | syscall.MS_REC - dest = filepath.Join(rootfs, m.Destination) - ) - if !m.Writable { - flags = flags | syscall.MS_RDONLY - } - if m.Slave { - flags = flags | syscall.MS_SLAVE - } - stat, err := os.Stat(m.Source) - if err != nil { - return err - } - // TODO: (crosbymichael) This does not belong here and should be done a layer above - dest, err = symlink.FollowSymlinkInScope(dest, rootfs) - if err != nil { - return err - } - if err := createIfNotExists(dest, stat.IsDir()); err != nil { - return fmt.Errorf("creating new bind mount target %s", err) - } - if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil { - return err - } - if !m.Writable { - if err := syscall.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil { - return err - } - } - if m.Relabel != "" { - if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil { - return err - } - } - if m.Private { - if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil { - return err - } - } - return nil -} - -func tmpfsMount(m *configs.Mount, rootfs, mountLabel string) error { - var ( - err error - l = label.FormatMountLabel("", mountLabel) - dest = filepath.Join(rootfs, m.Destination) - ) - // TODO: (crosbymichael) This does not belong here and should be done a layer above - if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { - return err - } - if err := createIfNotExists(dest, true); err != nil { - return err - } - return syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l) -} - // createIfNotExists creates a file or a directory only if it does not already exist. func createIfNotExists(path string, isDir bool) error { if _, err := os.Stat(path); err != nil { @@ -394,11 +344,11 @@ func remountReadonly(path string) error { return fmt.Errorf("unable to mount %s as readonly max retries reached", path) } -// maskProckcore bind mounts /dev/null over the top of /proc/kcore inside a container to avoid security -// issues from processes reading memory information. -func maskProckcore() error { - if err := syscall.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore: %s", err) +// maskFile bind mounts /dev/null over the top of the specified path inside a container +// to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ). +func maskFile(path string) error { + if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) { + return err } return nil } diff --git a/linux_standard_init.go b/linux_standard_init.go index c576bdbd8..28ce17271 100644 --- a/linux_standard_init.go +++ b/linux_standard_init.go @@ -62,13 +62,13 @@ func (l *linuxStandardInit) Init() error { if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } - if l.config.Config.RestrictSys { - for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { - if err := remountReadonly(path); err != nil { - return err - } + for _, path := range l.config.Config.ReadonlyPaths { + if err := remountReadonly(path); err != nil { + return err } - if err := maskProckcore(); err != nil { + } + for _, path := range l.config.Config.MaskPaths { + if err := maskFile(path); err != nil { return err } } diff --git a/linux_userns_init.go b/linux_userns_init.go index a7da9ce4c..bd7e402c5 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -52,13 +52,13 @@ func (l *linuxUsernsInit) Init() error { if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { return err } - if l.config.Config.RestrictSys { - for _, path := range []string{"proc/sys", "proc/sysrq-trigger", "proc/irq", "proc/bus"} { - if err := remountReadonly(path); err != nil { - return err - } + for _, path := range l.config.Config.ReadonlyPaths { + if err := remountReadonly(path); err != nil { + return err } - if err := maskProckcore(); err != nil { + } + for _, path := range l.config.Config.MaskPaths { + if err := maskFile(path); err != nil { return err } } diff --git a/nsinit/config.go b/nsinit/config.go index 145fe59a5..cbd256305 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -12,6 +12,8 @@ import ( "github.com/docker/libcontainer/configs" ) +const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + var createFlags = []cli.Flag{ cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"}, cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, @@ -107,9 +109,35 @@ func getTemplate() *configs.Config { AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, - Devices: configs.DefaultAutoCreatedDevices, Hostname: "nsinit", + MaskPaths: []string{ + "/proc/kcore", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Mounts: []*configs.Mount{ + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | syscall.MS_RDONLY, + }, + }, Networks: []*configs.Network{ { Type: "loopback", From 77085907a44039fe1cf9fe24d9c7675aa53d2f9b Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Fri, 13 Feb 2015 18:15:58 -0500 Subject: [PATCH 093/101] Add a constant for the container console path. Signed-off-by: Mrunal Patel --- linux_console.go | 6 +++++- linux_userns_init.go | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/linux_console.go b/linux_console.go index 5cb5f7132..60de3ee94 100644 --- a/linux_console.go +++ b/linux_console.go @@ -12,6 +12,10 @@ import ( "github.com/docker/libcontainer/label" ) +const ( + containerConsolePath string = "/dev/console" +) + // NewConsole returns an initalized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. func NewConsole() (Console, error) { @@ -83,7 +87,7 @@ func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { return err } - dest := filepath.Join(rootfs, "dev/console") + dest := filepath.Join(rootfs, containerConsolePath) f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err diff --git a/linux_userns_init.go b/linux_userns_init.go index bd7e402c5..d07668cd1 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -21,8 +21,9 @@ func (l *linuxUsernsInit) Init() error { } consolePath := l.config.Config.Console if consolePath != "" { - // TODO: why is this hard coded? - console := newConsoleFromPath("/dev/console") + // We use the containerConsolePath here, because the console has already been + // setup by the side car process for the user namespace scenario. + console := newConsoleFromPath(containerConsolePath) if err := console.dupStdio(); err != nil { return err } From a1d509759b9195a1c022f2eb9585b74d07a0f084 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Fri, 13 Feb 2015 19:06:17 -0500 Subject: [PATCH 094/101] Fixed some typos and tried to make comments read better. Signed-off-by: Mrunal Patel --- linux_container.go | 2 +- linux_init.go | 6 +++--- linux_network.go | 2 +- linux_rootfs.go | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/linux_container.go b/linux_container.go index 73aaa441e..78996d789 100644 --- a/linux_container.go +++ b/linux_container.go @@ -168,7 +168,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c cloneFlags := c.config.Namespaces.CloneFlags() if cloneFlags&syscall.CLONE_NEWUSER != 0 { if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil { - // mappings is not supported + // user mappings are not supported return nil, err } // Default to root user when user namespaces are enabled. diff --git a/linux_init.go b/linux_init.go index 9ed27e892..0fbda641e 100644 --- a/linux_init.go +++ b/linux_init.go @@ -40,7 +40,7 @@ type network struct { TempVethPeerName string `json:"temp_veth_peer_name"` } -// Process is used for transferring parameters from Exec() to Init() +// initConfig is used for transferring parameters from Exec() to Init() type initConfig struct { Args []string `json:"args"` Env []string `json:"env"` @@ -99,7 +99,7 @@ func populateProcessEnvironment(env []string) error { } // finalizeNamespace drops the caps, sets the correct user -// and working dir, and closes any leaky file descriptors +// and working dir, and closes any leaked file descriptors // before execing the command inside the namespace func finalizeNamespace(config *initConfig) error { // Ensure that all non-standard fds we may have accidentally @@ -229,7 +229,7 @@ func setupRlimits(config *configs.Config) error { return nil } -// killCgroupProcesses freezes then itterates over all the processes inside the +// killCgroupProcesses freezes then iterates over all the processes inside the // manager's cgroups sending a SIGKILL to each process then waiting for them to // exit. func killCgroupProcesses(m cgroups.Manager) error { diff --git a/linux_network.go b/linux_network.go index e720dade4..0325e97b7 100644 --- a/linux_network.go +++ b/linux_network.go @@ -98,7 +98,7 @@ func (l *loopback) initialize(config *network) error { } // veth is a network strategy that uses a bridge and creates -// a veth pair, one that stays outside on the host and the other +// a veth pair, one that is attached to the bridge on the host and the other // is placed inside the container's namespace type veth struct { } diff --git a/linux_rootfs.go b/linux_rootfs.go index 5bfe40186..5e3e51d7f 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -161,8 +161,8 @@ func setupDevSymlinks(rootfs string) error { return nil } -// Is stdin, stdout or stderr were to be pointing to '/dev/null', -// this method will make them point to '/dev/null' from within this namespace. +// If stdin, stdout or stderr are pointing to '/dev/null' in the global mount namespace, +// this method will make them point to '/dev/null' in this namespace. func reOpenDevNull(rootfs string) error { var stat, devNullStat syscall.Stat_t file, err := os.Open(filepath.Join(rootfs, "/dev/null")) From cacc15360ec04abb4c45f918e83bf33203946e32 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Fri, 13 Feb 2015 19:50:00 -0500 Subject: [PATCH 095/101] Add config generation for simple user namespace testing. Signed-off-by: Mrunal Patel --- nsinit/config.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nsinit/config.go b/nsinit/config.go index cbd256305..e26f39e90 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "io" + "math" "os" "path/filepath" "syscall" @@ -27,6 +28,7 @@ var createFlags = []cli.Flag{ cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"}, cli.StringFlag{Name: "process-label", Usage: "set the process label"}, cli.StringFlag{Name: "mount-label", Usage: "set the mount label"}, + cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"}, } var configCommand = cli.Command{ @@ -70,6 +72,21 @@ func modify(config *configs.Config, context *cli.Context) { config.AppArmorProfile = context.String("apparmor-profile") config.ProcessLabel = context.String("process-label") config.MountLabel = context.String("mount-label") + + userns_uid := context.Int("userns-root-uid") + if userns_uid != 0 { + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + config.UidMappings = []configs.IDMap{ + {ContainerID: 0, HostID: userns_uid, Size: 1}, + {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, + {ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid}, + } + config.GidMappings = []configs.IDMap{ + {ContainerID: 0, HostID: userns_uid, Size: 1}, + {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, + {ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid}, + } + } } func getTemplate() *configs.Config { From 4c43b0f49880840966cb5df13abeeb19aa8e16d7 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 13 Feb 2015 14:41:37 -0800 Subject: [PATCH 096/101] Add mutex around stateful container operations Signed-off-by: Michael Crosby --- linux_container.go | 105 ++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/linux_container.go b/linux_container.go index 78996d789..7ab13a72a 100644 --- a/linux_container.go +++ b/linux_container.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "sync" "syscall" "github.com/docker/libcontainer/cgroups" @@ -22,6 +23,7 @@ type linuxContainer struct { cgroupManager cgroups.Manager initArgs []string initProcess parentProcess + m sync.Mutex } // ID returns the container's unique ID @@ -35,46 +37,15 @@ func (c *linuxContainer) Config() configs.Config { } func (c *linuxContainer) Status() (Status, error) { - if c.initProcess == nil { - return Destroyed, nil - } - // return Running if the init process is alive - if err := syscall.Kill(c.initProcess.pid(), 0); err != nil { - if err == syscall.ESRCH { - return Destroyed, nil - } - return 0, newSystemError(err) - } - if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen { - return Paused, nil - } - return Running, nil + c.m.Lock() + defer c.m.Unlock() + return c.currentStatus() } func (c *linuxContainer) State() (*State, error) { - status, err := c.Status() - if err != nil { - return nil, err - } - if status == Destroyed { - return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists) - } - startTime, err := c.initProcess.startTime() - if err != nil { - return nil, newSystemError(err) - } - state := &State{ - ID: c.ID(), - Config: *c.config, - InitProcessPid: c.initProcess.pid(), - InitProcessStartTime: startTime, - CgroupPaths: c.cgroupManager.GetPaths(), - NamespacePaths: make(map[string]string), - } - for _, ns := range c.config.Namespaces { - state.NamespacePaths[string(ns.Type)] = ns.GetPath(c.initProcess.pid()) - } - return state, nil + c.m.Lock() + defer c.m.Unlock() + return c.currentState() } func (c *linuxContainer) Processes() ([]int, error) { @@ -109,7 +80,9 @@ func (c *linuxContainer) Stats() (*Stats, error) { } func (c *linuxContainer) Start(process *Process) (int, error) { - status, err := c.Status() + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() if err != nil { return -1, err } @@ -126,6 +99,7 @@ func (c *linuxContainer) Start(process *Process) (int, error) { return -1, newSystemError(err) } if doInit { + c.updateState(parent) } return parent.pid(), nil @@ -222,7 +196,9 @@ func newPipe() (parent *os.File, child *os.File, err error) { } func (c *linuxContainer) Destroy() error { - status, err := c.Status() + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() if err != nil { return err } @@ -243,14 +219,20 @@ func (c *linuxContainer) Destroy() error { } func (c *linuxContainer) Pause() error { + c.m.Lock() + defer c.m.Unlock() return c.cgroupManager.Freeze(configs.Frozen) } func (c *linuxContainer) Resume() error { + c.m.Lock() + defer c.m.Unlock() return c.cgroupManager.Freeze(configs.Thawed) } func (c *linuxContainer) Signal(signal os.Signal) error { + c.m.Lock() + defer c.m.Unlock() if c.initProcess == nil { return newGenericError(nil, ContainerNotRunning) } @@ -263,7 +245,7 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { func (c *linuxContainer) updateState(process parentProcess) error { c.initProcess = process - state, err := c.State() + state, err := c.currentState() if err != nil { return err } @@ -274,3 +256,46 @@ func (c *linuxContainer) updateState(process parentProcess) error { defer f.Close() return json.NewEncoder(f).Encode(state) } + +func (c *linuxContainer) currentStatus() (Status, error) { + if c.initProcess == nil { + return Destroyed, nil + } + // return Running if the init process is alive + if err := syscall.Kill(c.initProcess.pid(), 0); err != nil { + if err == syscall.ESRCH { + return Destroyed, nil + } + return 0, newSystemError(err) + } + if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen { + return Paused, nil + } + return Running, nil +} + +func (c *linuxContainer) currentState() (*State, error) { + status, err := c.currentStatus() + if err != nil { + return nil, err + } + if status == Destroyed { + return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists) + } + startTime, err := c.initProcess.startTime() + if err != nil { + return nil, newSystemError(err) + } + state := &State{ + ID: c.ID(), + Config: *c.config, + InitProcessPid: c.initProcess.pid(), + InitProcessStartTime: startTime, + CgroupPaths: c.cgroupManager.GetPaths(), + NamespacePaths: make(map[string]string), + } + for _, ns := range c.config.Namespaces { + state.NamespacePaths[string(ns.Type)] = ns.GetPath(c.initProcess.pid()) + } + return state, nil +} From b21b19e0607582cceb8d715b85d27ec113a0b799 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 13 Feb 2015 15:43:14 -0800 Subject: [PATCH 097/101] Add factory configuration via functional api This allows you to set certian configuration options such as what cgroup implementation to use on the factory at create time. Signed-off-by: Michael Crosby --- cgroups/manager/manager.go | 39 ------------- integration/exec_test.go | 4 +- integration/init_test.go | 2 +- integration/utils_test.go | 2 +- linux_factory.go | 112 ++++++++++++++++++++++++++----------- linux_factory_test.go | 12 ++-- nsinit/init.go | 2 +- nsinit/main.go | 2 +- nsinit/utils.go | 2 +- 9 files changed, 93 insertions(+), 84 deletions(-) delete mode 100644 cgroups/manager/manager.go diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go deleted file mode 100644 index b8e2010ed..000000000 --- a/cgroups/manager/manager.go +++ /dev/null @@ -1,39 +0,0 @@ -package manager - -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" - "github.com/docker/libcontainer/configs" -) - -// Create a new cgroup manager with specified configuration -// TODO this object is not really initialized until Apply() is called. -// Maybe make this to the equivalent of Apply() at some point? -// @vmarmol -func NewCgroupManager(cgroups *configs.Cgroup) cgroups.Manager { - if systemd.UseSystemd() { - return &systemd.Manager{ - Cgroups: cgroups, - } - } - - return &fs.Manager{ - Cgroups: cgroups, - } -} - -// Restore a cgroup manager with specified configuration and state -func LoadCgroupManager(cgroups *configs.Cgroup, paths map[string]string) cgroups.Manager { - if systemd.UseSystemd() { - return &systemd.Manager{ - Cgroups: cgroups, - Paths: paths, - } - } - - return &fs.Manager{ - Cgroups: cgroups, - Paths: paths, - } -} diff --git a/integration/exec_test.go b/integration/exec_test.go index 4fef06ea7..1b04ff50c 100644 --- a/integration/exec_test.go +++ b/integration/exec_test.go @@ -236,7 +236,7 @@ func TestEnter(t *testing.T) { config := newTemplateConfig(rootfs) - factory, err := libcontainer.New(root, []string{os.Args[0], "init", "--"}) + factory, err := libcontainer.New(root, libcontainer.InitArgs(os.Args[0], "init", "--"), libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } @@ -340,7 +340,7 @@ func TestFreeze(t *testing.T) { config := newTemplateConfig(rootfs) - factory, err := libcontainer.New(root, []string{os.Args[0], "init", "--"}) + factory, err := libcontainer.New(root, libcontainer.InitArgs(os.Args[0], "init", "--"), libcontainer.Cgroupfs) if err != nil { t.Fatal(err) } diff --git a/integration/init_test.go b/integration/init_test.go index 8af88efd9..c2abe3b7f 100644 --- a/integration/init_test.go +++ b/integration/init_test.go @@ -17,7 +17,7 @@ func init() { } runtime.GOMAXPROCS(1) runtime.LockOSThread() - factory, err := libcontainer.New("", nil) + factory, err := libcontainer.New("") if err != nil { log.Fatalf("unable to initialize for container: %s", err) } diff --git a/integration/utils_test.go b/integration/utils_test.go index 141035e7d..cd82859b5 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -81,7 +81,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe Stderr: buffers.Stderr, } - factory, err := libcontainer.New(".", []string{os.Args[0], "init", "--"}) + factory, err := libcontainer.New(".", libcontainer.InitArgs(os.Args[0], "init", "--"), libcontainer.Cgroupfs) if err != nil { return nil, -1, err } diff --git a/linux_factory.go b/linux_factory.go index 66823953e..879ad4274 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -10,9 +10,9 @@ import ( "path/filepath" "regexp" - "github.com/golang/glog" - - cgroups "github.com/docker/libcontainer/cgroups/manager" + "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/configs/validate" ) @@ -26,39 +26,89 @@ var ( maxIdLen = 1024 ) -// New returns a linux based container factory based in the root directory. -func New(root string, initArgs []string) (Factory, error) { +// InitArgs returns an options func to configure a LinuxFactory with the +// provided init arguments. +func InitArgs(args ...string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.InitArgs = args + return nil + } +} + +// SystemdCgroups is an options func to configure a LinuxFactory to return +// containers that use systemd to create and manage cgroups. +func SystemdCgroups(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &systemd.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// Cgroupfs is an options func to configure a LinuxFactory to return +// containers that use the native cgroups filesystem implementation to +// create and manage cgroups. +func Cgroupfs(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &fs.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// New returns a linux based container factory based in the root directory and +// configures the factory with the provided option funcs. +func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { if root != "" { if err := os.MkdirAll(root, 0700); err != nil { return nil, newGenericError(err, SystemError) } } - return &linuxFactory{ - root: root, - initArgs: initArgs, - validator: validate.New(), - }, nil + l := &LinuxFactory{ + Root: root, + InitArgs: []string{os.Args[0], "init"}, + Validator: validate.New(), + } + Cgroupfs(l) + for _, opt := range options { + if err := opt(l); err != nil { + return nil, err + } + } + return l, nil } -// linuxFactory implements the default factory interface for linux based systems. -type linuxFactory struct { - // root is the root directory - root string - initArgs []string - validator validate.Validator +// LinuxFactory implements the default factory interface for linux based systems. +type LinuxFactory struct { + // Root directory for the factory to store state. + Root string + + // InitArgs are arguments for calling the init responsibilities for spawning + // a container. + InitArgs []string + + // Validator provides validation to container configurations. + Validator validate.Validator + + // NewCgroupsManager returns an initialized cgroups manager for a single container. + NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager } -func (l *linuxFactory) Create(id string, config *configs.Config) (Container, error) { - if l.root == "" { +func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { + if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } if err := l.validateID(id); err != nil { return nil, err } - if err := l.validator.Validate(config); err != nil { + if err := l.Validator.Validate(config); err != nil { return nil, newGenericError(err, ConfigInvalid) } - containerRoot := filepath.Join(l.root, id) + containerRoot := filepath.Join(l.Root, id) if _, err := os.Stat(containerRoot); err == nil { return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse) } else if !os.IsNotExist(err) { @@ -71,16 +121,16 @@ func (l *linuxFactory) Create(id string, config *configs.Config) (Container, err id: id, root: containerRoot, config: config, - initArgs: l.initArgs, - cgroupManager: cgroups.NewCgroupManager(config.Cgroups), + initArgs: l.InitArgs, + cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), }, nil } -func (l *linuxFactory) Load(id string) (Container, error) { - if l.root == "" { +func (l *LinuxFactory) Load(id string) (Container, error) { + if l.Root == "" { return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) } - containerRoot := filepath.Join(l.root, id) + containerRoot := filepath.Join(l.Root, id) state, err := l.loadState(containerRoot) if err != nil { return nil, err @@ -89,21 +139,19 @@ func (l *linuxFactory) Load(id string) (Container, error) { processPid: state.InitProcessPid, processStartTime: state.InitProcessStartTime, } - cgroupManager := cgroups.LoadCgroupManager(state.Config.Cgroups, state.CgroupPaths) - glog.Infof("using %s as cgroup manager", cgroupManager) return &linuxContainer{ initProcess: r, id: id, config: &state.Config, - initArgs: l.initArgs, - cgroupManager: cgroupManager, + initArgs: l.InitArgs, + cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), root: containerRoot, }, nil } // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state // This is a low level implementation detail of the reexec and should not be consumed externally -func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { +func (l *LinuxFactory) StartInitialization(pipefd uintptr) (err error) { var ( pipe = os.NewFile(uintptr(pipefd), "pipe") it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) @@ -134,7 +182,7 @@ func (l *linuxFactory) StartInitialization(pipefd uintptr) (err error) { return i.Init() } -func (l *linuxFactory) loadState(root string) (*State, error) { +func (l *LinuxFactory) loadState(root string) (*State, error) { f, err := os.Open(filepath.Join(root, stateFilename)) if err != nil { if os.IsNotExist(err) { @@ -150,7 +198,7 @@ func (l *linuxFactory) loadState(root string) (*State, error) { return state, nil } -func (l *linuxFactory) validateID(id string) error { +func (l *LinuxFactory) validateID(id string) error { if !idRegex.MatchString(id) { return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat) } diff --git a/linux_factory_test.go b/linux_factory_test.go index 69001669d..19fc77ba5 100644 --- a/linux_factory_test.go +++ b/linux_factory_test.go @@ -29,19 +29,19 @@ func TestFactoryNew(t *testing.T) { t.Fatal(rerr) } defer os.RemoveAll(root) - factory, err := New(root, nil) + factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } if factory == nil { t.Fatal("factory should not be nil") } - lfactory, ok := factory.(*linuxFactory) + lfactory, ok := factory.(*LinuxFactory) if !ok { t.Fatal("expected linux factory returned on linux based systems") } - if lfactory.root != root { - t.Fatalf("expected factory root to be %q but received %q", root, lfactory.root) + if lfactory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) } } @@ -51,7 +51,7 @@ func TestFactoryLoadNotExists(t *testing.T) { t.Fatal(rerr) } defer os.RemoveAll(root) - factory, err := New(root, nil) + factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } @@ -91,7 +91,7 @@ func TestFactoryLoadContainer(t *testing.T) { if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } - factory, err := New(root, nil) + factory, err := New(root, Cgroupfs) if err != nil { t.Fatal(err) } diff --git a/nsinit/init.go b/nsinit/init.go index ea6295e5e..4d9a8e63f 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -15,7 +15,7 @@ var initCommand = cli.Command{ Action: func(context *cli.Context) { runtime.GOMAXPROCS(1) runtime.LockOSThread() - factory, err := libcontainer.New("", nil) + factory, err := libcontainer.New("") if err != nil { log.Fatal(err) } diff --git a/nsinit/main.go b/nsinit/main.go index a2afd00ca..28e315be9 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -10,7 +10,7 @@ import ( func main() { app := cli.NewApp() app.Name = "nsinit" - app.Version = "1" + app.Version = "2" app.Author = "libcontainer maintainers" app.Flags = []cli.Flag{ cli.StringFlag{Name: "nspid"}, diff --git a/nsinit/utils.go b/nsinit/utils.go index 73c13b598..6a3916fe1 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -29,7 +29,7 @@ func loadConfig(context *cli.Context) (*configs.Config, error) { } func loadFactory(context *cli.Context) (libcontainer.Factory, error) { - return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init"}) + return libcontainer.New(context.GlobalString("root"), libcontainer.Cgroupfs) } func getContainer(context *cli.Context) (libcontainer.Container, error) { From 339edce03ed7fe59ec4a778abff243fa4cabaa23 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 17 Feb 2015 21:37:02 -0800 Subject: [PATCH 098/101] Update console and mount handling for user namespaces This updates the console handling to chown the console on creation to the root user within the container. This also moves the setup mounts from the userns sidecar process into the main init processes by trying to mknod devices, if it fails on an EPERM then bind mount the device from the host into the container for use. This prevents access issues when the sidecar process mknods the device for the usernamespace returning an EPERM when writting to dev/null. This also adds some error handling for init processes and nsinit updates with added flags for testing and other functions. Signed-off-by: Michael Crosby --- linux_console.go | 20 +++---- linux_rootfs.go | 68 ++++++++++++----------- linux_userns_init.go | 35 +++++++----- linux_userns_sidecar_init.go | 12 ----- nsinit/config.go | 101 ++++++++++++++++++++++++++++++++--- nsinit/exec.go | 10 ++-- nsinit/init.go | 4 +- nsinit/tty.go | 20 +++---- 8 files changed, 178 insertions(+), 92 deletions(-) diff --git a/linux_console.go b/linux_console.go index 60de3ee94..f7a67a014 100644 --- a/linux_console.go +++ b/linux_console.go @@ -12,13 +12,9 @@ import ( "github.com/docker/libcontainer/label" ) -const ( - containerConsolePath string = "/dev/console" -) - // NewConsole returns an initalized console that can be used within a container by copying bytes // from the master side to the slave that is attached as the tty for the container's init process. -func NewConsole() (Console, error) { +func NewConsole(uid, gid int) (Console, error) { master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0) if err != nil { return nil, err @@ -30,6 +26,12 @@ func NewConsole() (Console, error) { if err := unlockpt(master); err != nil { return nil, err } + if err := os.Chmod(console, 0600); err != nil { + return nil, err + } + if err := os.Chown(console, uid, gid); err != nil { + return nil, err + } return &linuxConsole{ slavePath: console, master: master, @@ -78,16 +80,10 @@ func (c *linuxConsole) Close() error { func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { oldMask := syscall.Umask(0000) defer syscall.Umask(oldMask) - if err := os.Chmod(c.slavePath, 0600); err != nil { - return err - } - if err := os.Chown(c.slavePath, uid, gid); err != nil { - return err - } if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil { return err } - dest := filepath.Join(rootfs, containerConsolePath) + dest := filepath.Join(rootfs, "/dev/console") f, err := os.Create(dest) if err != nil && !os.IsExist(err) { return err diff --git a/linux_rootfs.go b/linux_rootfs.go index 5e3e51d7f..74c711492 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -35,7 +35,7 @@ var baseMounts = []*configs.Mount{ Destination: "/dev/pts", Device: "devpts", Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, - Data: "newinstance,ptmxmode=0666,mode=620,gid=5", + Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", }, } @@ -43,32 +43,29 @@ var baseMounts = []*configs.Mount{ // new mount namespace. func setupRootfs(config *configs.Config) (err error) { if err := prepareRoot(config); err != nil { - return err + return newSystemError(err) } for _, m := range append(baseMounts, config.Mounts...) { if err := mount(m, config.Rootfs, config.MountLabel); err != nil { - return err + return newSystemError(err) } } if err := createDevices(config); err != nil { - return err + return newSystemError(err) } if err := setupPtmx(config); err != nil { - return err + return newSystemError(err) } // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. - // Re-open them inside this namespace. - // FIXME: Need to fix this for user namespaces. - if !config.Namespaces.Contains(configs.NEWUSER) { - if err := reOpenDevNull(config.Rootfs); err != nil { - return err - } + // re-open them inside this namespace. + if err := reOpenDevNull(config.Rootfs); err != nil { + return newSystemError(err) } if err := setupDevSymlinks(config.Rootfs); err != nil { - return err + return newSystemError(err) } if err := syscall.Chdir(config.Rootfs); err != nil { - return err + return newSystemError(err) } if config.NoPivotRoot { err = msMoveRoot(config.Rootfs) @@ -76,11 +73,11 @@ func setupRootfs(config *configs.Config) (err error) { err = pivotRoot(config.Rootfs, config.PivotDir) } if err != nil { - return err + return newSystemError(err) } if config.Readonlyfs { if err := setReadonly(); err != nil { - return err + return newSystemError(err) } } syscall.Umask(0022) @@ -209,6 +206,28 @@ func createDeviceNode(rootfs string, node *configs.Device) error { if err := os.MkdirAll(parent, 0755); err != nil { return err } + if err := mknodDevice(dest, node); err != nil { + if os.IsExist(err) { + return nil + } + // containers running in a user namespace are not allowed to mknod + // devices so we can just bind mount it from the host. + if err == syscall.EPERM { + f, err := os.Create(dest) + if err != nil { + if os.IsExist(err) { + return nil + } + return err + } + f.Close() + return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "") + } + } + return nil +} + +func mknodDevice(dest string, node *configs.Device) error { fileMode := node.FileMode switch node.Type { case 'c': @@ -218,13 +237,10 @@ func createDeviceNode(rootfs string, node *configs.Device) error { default: return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) } - if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { - return fmt.Errorf("mknod %s %s", node.Path, err) - } - if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { - return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) + if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { + return err } - return nil + return syscall.Chown(dest, int(node.Uid), int(node.Gid)) } func prepareRoot(config *configs.Config) error { @@ -251,16 +267,8 @@ func setupPtmx(config *configs.Config) error { return fmt.Errorf("symlink dev ptmx %s", err) } if config.Console != "" { - uid, err := config.HostUID() - if err != nil { - return err - } - gid, err := config.HostGID() - if err != nil { - return err - } console := newConsoleFromPath(config.Console) - return console.mount(config.Rootfs, config.MountLabel, uid, gid) + return console.mount(config.Rootfs, config.MountLabel, 0, 0) } return nil } diff --git a/linux_userns_init.go b/linux_userns_init.go index d07668cd1..7f5806fe5 100644 --- a/linux_userns_init.go +++ b/linux_userns_init.go @@ -6,6 +6,7 @@ import ( "syscall" "github.com/docker/libcontainer/apparmor" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/system" ) @@ -17,63 +18,69 @@ type linuxUsernsInit struct { func (l *linuxUsernsInit) Init() error { // join any namespaces via a path to the namespace fd if provided if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { - return err + return newSystemError(err) } consolePath := l.config.Config.Console if consolePath != "" { // We use the containerConsolePath here, because the console has already been // setup by the side car process for the user namespace scenario. - console := newConsoleFromPath(containerConsolePath) + console := newConsoleFromPath(consolePath) if err := console.dupStdio(); err != nil { - return err + return newSystemError(err) } } if _, err := syscall.Setsid(); err != nil { - return err + return newSystemError(err) } if consolePath != "" { if err := system.Setctty(); err != nil { - return err + return newSystemError(err) } } if l.config.Cwd == "" { l.config.Cwd = "/" } if err := setupRlimits(l.config.Config); err != nil { - return err + return newSystemError(err) + } + // InitializeMountNamespace() can be executed only for a new mount namespace + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := setupRootfs(l.config.Config); err != nil { + return newSystemError(err) + } } if hostname := l.config.Config.Hostname; hostname != "" { if err := syscall.Sethostname([]byte(hostname)); err != nil { - return err + return newSystemError(err) } } if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { - return err + return newSystemError(err) } if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { - return err + return newSystemError(err) } for _, path := range l.config.Config.ReadonlyPaths { if err := remountReadonly(path); err != nil { - return err + return newSystemError(err) } } for _, path := range l.config.Config.MaskPaths { if err := maskFile(path); err != nil { - return err + return newSystemError(err) } } pdeath, err := system.GetParentDeathSignal() if err != nil { - return err + return newSystemError(err) } if err := finalizeNamespace(l.config); err != nil { - return err + return newSystemError(err) } // finalizeNamespace can change user/group which clears the parent death // signal, so we restore it here. if err := pdeath.Restore(); err != nil { - return err + return newSystemError(err) } // Signal self if parent is already dead. Does nothing if running in a new // PID namespace, as Getppid will always return 0. diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go index eedc63e0d..119abfabc 100644 --- a/linux_userns_sidecar_init.go +++ b/linux_userns_sidecar_init.go @@ -2,11 +2,6 @@ package libcontainer -import ( - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/label" -) - // linuxUsernsSideCar is run to setup mounts and networking related operations // for a user namespace enabled process as a user namespace root doesn't // have permissions to perform these operations. @@ -24,12 +19,5 @@ func (l *linuxUsernsSideCar) Init() error { if err := setupRoute(l.config.Config); err != nil { return err } - label.Init() - // InitializeMountNamespace() can be executed only for a new mount namespace - if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := setupRootfs(l.config.Config); err != nil { - return err - } - } return nil } diff --git a/nsinit/config.go b/nsinit/config.go index e26f39e90..7dbc8a78e 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -7,10 +7,13 @@ import ( "math" "os" "path/filepath" + "strings" "syscall" + "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/utils" ) const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV @@ -28,7 +31,17 @@ var createFlags = []cli.Flag{ cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"}, cli.StringFlag{Name: "process-label", Usage: "set the process label"}, cli.StringFlag{Name: "mount-label", Usage: "set the mount label"}, + cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"}, cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"}, + cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"}, + cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"}, + cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"}, + cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"}, + cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"}, + cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"}, + cli.StringFlag{Name: "veth-address", Usage: "veth ip address"}, + cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"}, + cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"}, } var configCommand = cli.Command{ @@ -72,10 +85,11 @@ func modify(config *configs.Config, context *cli.Context) { config.AppArmorProfile = context.String("apparmor-profile") config.ProcessLabel = context.String("process-label") config.MountLabel = context.String("mount-label") + config.Rootfs = context.String("rootfs") userns_uid := context.Int("userns-root-uid") if userns_uid != 0 { - config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + config.Namespaces.Add(configs.NEWUSER, "") config.UidMappings = []configs.IDMap{ {ContainerID: 0, HostID: userns_uid, Size: 1}, {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, @@ -86,6 +100,84 @@ func modify(config *configs.Config, context *cli.Context) { {ContainerID: 1, HostID: 1, Size: userns_uid - 1}, {ContainerID: userns_uid + 1, HostID: userns_uid + 1, Size: math.MaxInt32 - userns_uid}, } + for _, node := range config.Devices { + node.Uid = uint32(userns_uid) + node.Gid = uint32(userns_uid) + } + } + for _, rawBind := range context.StringSlice("bind") { + mount := &configs.Mount{ + Device: "bind", + Flags: syscall.MS_BIND | syscall.MS_REC, + } + parts := strings.SplitN(rawBind, ":", 3) + switch len(parts) { + default: + logrus.Fatalf("invalid bind mount %s", rawBind) + case 2: + mount.Source, mount.Destination = parts[0], parts[1] + case 3: + mount.Source, mount.Destination = parts[0], parts[1] + switch parts[2] { + case "ro": + mount.Flags |= syscall.MS_RDONLY + case "rw": + default: + logrus.Fatalf("invalid bind mount mode %s", parts[2]) + } + } + config.Mounts = append(config.Mounts, mount) + } + for _, tmpfs := range context.StringSlice("tmpfs") { + config.Mounts = append(config.Mounts, &configs.Mount{ + Device: "tmpfs", + Destination: tmpfs, + Flags: syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV, + }) + } + for flag, value := range map[string]configs.NamespaceType{ + "net": configs.NEWNET, + "mnt": configs.NEWNS, + "pid": configs.NEWPID, + "ipc": configs.NEWIPC, + "uts": configs.NEWUTS, + } { + switch v := context.String(flag); v { + case "host": + config.Namespaces.Remove(value) + case "", "private": + if !config.Namespaces.Contains(value) { + config.Namespaces.Add(value, "") + } + if v == "net" { + config.Networks = []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + } + } + default: + config.Namespaces.Remove(value) + config.Namespaces.Add(value, v) + } + } + if bridge := context.String("veth-bridge"); bridge != "" { + hostName, err := utils.GenerateRandomName("veth", 7) + if err != nil { + logrus.Fatal(err) + } + network := &configs.Network{ + Type: "veth", + Name: "eth0", + Bridge: bridge, + Address: context.String("veth-address"), + Gateway: context.String("veth-gateway"), + Mtu: context.Int("veth-mtu"), + HostInterfaceName: hostName, + } + config.Networks = append(config.Networks, network) } } @@ -155,13 +247,6 @@ func getTemplate() *configs.Config { Flags: defaultMountFlags | syscall.MS_RDONLY, }, }, - Networks: []*configs.Network{ - { - Type: "loopback", - Address: "127.0.0.1/0", - Gateway: "localhost", - }, - }, Rlimits: []configs.Rlimit{ { Type: syscall.RLIMIT_NOFILE, diff --git a/nsinit/exec.go b/nsinit/exec.go index b499d66ea..b8d210012 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -5,7 +5,6 @@ import ( "os/signal" "syscall" - log "github.com/Sirupsen/logrus" "github.com/codegangsta/cli" "github.com/docker/libcontainer" "github.com/docker/libcontainer/utils" @@ -27,12 +26,12 @@ var execCommand = cli.Command{ cli.StringFlag{Name: "config", Value: "container.json", Usage: "path to the configuration file"}, cli.BoolFlag{Name: "create", Usage: "create the container's configuration on the fly with arguments"}, cli.StringFlag{Name: "user,u", Value: "root", Usage: "set the user, uid, and/or gid for the process"}, + cli.StringFlag{Name: "cwd", Value: "", Usage: "set the current working dir"}, cli.StringSliceFlag{Name: "env", Value: standardEnvironment, Usage: "set environment variables for the process"}, }, createFlags...), } func execAction(context *cli.Context) { - entry := log.WithField("parent", "nsinit") factory, err := loadFactory(context) if err != nil { fatal(err) @@ -44,7 +43,6 @@ func execAction(context *cli.Context) { created := false container, err := factory.Load(context.String("id")) if err != nil { - entry.Debug("creating container") config, err := loadConfig(context) if err != nil { tty.Close() @@ -53,7 +51,6 @@ func execAction(context *cli.Context) { if tty.console != nil { config.Console = tty.console.Path() } - created = true if container, err = factory.Create(context.String("id"), config); err != nil { tty.Close() @@ -65,11 +62,14 @@ func execAction(context *cli.Context) { Args: context.Args(), Env: context.StringSlice("env"), User: context.String("user"), + Cwd: context.String("cwd"), Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, } - tty.attach(process) + if err := tty.attach(process); err != nil { + fatal(err) + } pid, err := container.Start(process) if err != nil { tty.Close() diff --git a/nsinit/init.go b/nsinit/init.go index 96957824b..7b2cf1935 100644 --- a/nsinit/init.go +++ b/nsinit/init.go @@ -18,10 +18,10 @@ var initCommand = cli.Command{ runtime.LockOSThread() factory, err := libcontainer.New("") if err != nil { - log.Fatal(err) + fatal(err) } if err := factory.StartInitialization(3); err != nil { - log.Fatal(err) + fatal(err) } panic("This line should never been executed") }, diff --git a/nsinit/tty.go b/nsinit/tty.go index f1e593740..d0d343756 100644 --- a/nsinit/tty.go +++ b/nsinit/tty.go @@ -11,19 +11,13 @@ import ( func newTty(context *cli.Context) (*tty, error) { if context.Bool("tty") { - console, err := libcontainer.NewConsole() - if err != nil { - return nil, err - } - go io.Copy(console, os.Stdin) - go io.Copy(os.Stdout, console) - state, err := term.SetRawTerminal(os.Stdin.Fd()) + rootid := context.Int("userns-root-uid") + console, err := libcontainer.NewConsole(rootid, rootid) if err != nil { return nil, err } return &tty{ console: console, - state: state, }, nil } return &tty{}, nil @@ -44,12 +38,20 @@ func (t *tty) Close() error { return nil } -func (t *tty) attach(process *libcontainer.Process) { +func (t *tty) attach(process *libcontainer.Process) error { if t.console != nil { + go io.Copy(t.console, os.Stdin) + go io.Copy(os.Stdout, t.console) + state, err := term.SetRawTerminal(os.Stdin.Fd()) + if err != nil { + return err + } + t.state = state process.Stderr = nil process.Stdout = nil process.Stdin = nil } + return nil } func (t *tty) resize() error { From afa8443118347a1f909941aec2732039d28a9034 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 17 Feb 2015 21:50:43 -0800 Subject: [PATCH 099/101] Remove userns sidecar process Move the network setup back into the standard init even for user namespaces now that mounts are fully supported and working. Signed-off-by: Michael Crosby --- error.go | 8 ---- generic_error.go | 11 +++-- linux_container.go | 1 - linux_factory.go | 4 +- linux_init.go | 14 +----- linux_process.go | 40 +--------------- linux_rootfs.go | 25 +++++----- linux_userns_init.go | 91 ------------------------------------ linux_userns_sidecar_init.go | 23 --------- nsinit/config.go | 9 ++-- nsinit/exec.go | 15 +++--- nsinit/tty.go | 5 +- 12 files changed, 39 insertions(+), 207 deletions(-) delete mode 100644 linux_userns_init.go delete mode 100644 linux_userns_sidecar_init.go diff --git a/error.go b/error.go index 85b0dcaf0..37e99366f 100644 --- a/error.go +++ b/error.go @@ -57,11 +57,3 @@ type Error interface { // Returns the error code for this error. Code() ErrorCode } - -type initError struct { - Message string `json:"message,omitempty"` -} - -func (i initError) Error() string { - return i.Message -} diff --git a/generic_error.go b/generic_error.go index ff614ee66..05ab0a9d3 100644 --- a/generic_error.go +++ b/generic_error.go @@ -11,8 +11,8 @@ import ( var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} Code: {{.ECode}} -{{if .Err }} -Message: {{.Err.Error}} +{{if .Message }} +Message: {{.Message}} {{end}} Frames:{{range $i, $frame := .Stack.Frames}} --- @@ -28,6 +28,7 @@ func newGenericError(err error, c ErrorCode) Error { return &genericError{ Timestamp: time.Now(), Err: err, + Message: err.Error(), ECode: c, Stack: stacktrace.Capture(1), } @@ -41,6 +42,7 @@ func newSystemError(err error) Error { Timestamp: time.Now(), Err: err, ECode: SystemError, + Message: err.Error(), Stack: stacktrace.Capture(1), } } @@ -48,12 +50,13 @@ func newSystemError(err error) Error { type genericError struct { Timestamp time.Time ECode ErrorCode - Err error + Err error `json:"-"` + Message string Stack stacktrace.Stacktrace } func (e *genericError) Error() string { - return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Err) + return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message) } func (e *genericError) Code() ErrorCode { diff --git a/linux_container.go b/linux_container.go index 01e34769a..52d9af87d 100644 --- a/linux_container.go +++ b/linux_container.go @@ -147,7 +147,6 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c if cmd.SysProcAttr.Credential == nil { cmd.SysProcAttr.Credential = &syscall.Credential{} } - t = "_LIBCONTAINER_INITTYPE=userns" } cmd.Env = append(cmd.Env, t) cmd.SysProcAttr.Cloneflags = cloneFlags diff --git a/linux_factory.go b/linux_factory.go index 879ad4274..d0fca3b41 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -166,9 +166,7 @@ func (l *LinuxFactory) StartInitialization(pipefd uintptr) (err error) { // ensure that any data sent from the parent is consumed so it doesn't // receive ECONNRESET when the child writes to the pipe. ioutil.ReadAll(pipe) - if err := json.NewEncoder(pipe).Encode(initError{ - Message: err.Error(), - }); err != nil { + if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil { panic(err) } } diff --git a/linux_init.go b/linux_init.go index 4755edb08..37891102f 100644 --- a/linux_init.go +++ b/linux_init.go @@ -21,10 +21,8 @@ import ( type initType string const ( - initSetns initType = "setns" - initStandard initType = "standard" - initUserns initType = "userns" - initUsernsSetup initType = "userns_setup" + initSetns initType = "setns" + initStandard initType = "standard" ) type pid struct { @@ -67,14 +65,6 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) { return &linuxSetnsInit{ config: config, }, nil - case initUserns: - return &linuxUsernsInit{ - config: config, - }, nil - case initUsernsSetup: - return &linuxUsernsSideCar{ - config: config, - }, nil case initStandard: return &linuxStandardInit{ config: config, diff --git a/linux_process.go b/linux_process.go index 83addf240..0fd33b92d 100644 --- a/linux_process.go +++ b/linux_process.go @@ -4,13 +4,11 @@ package libcontainer import ( "encoding/json" - "fmt" "io" "os" "os/exec" "syscall" - log "github.com/Sirupsen/logrus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/system" ) @@ -145,28 +143,12 @@ func (p *initProcess) start() error { if err := p.createNetworkInterfaces(); err != nil { return newSystemError(err) } - // Start the setup process to setup the init process - if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWUSER != 0 { - parent, err := p.newUsernsSetupProcess() - if err != nil { - return newSystemError(err) - } - if err := parent.start(); err != nil { - if err := parent.terminate(); err != nil { - log.Warn(err) - } - return err - } - if _, err := parent.wait(); err != nil { - return newSystemError(err) - } - } if err := p.sendConfig(); err != nil { return newSystemError(err) } // wait for the child process to fully complete and receive an error message // if one was encoutered - var ierr *initError + var ierr *genericError if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { return newSystemError(err) } @@ -229,26 +211,6 @@ func (p *initProcess) createNetworkInterfaces() error { return nil } -func (p *initProcess) newUsernsSetupProcess() (parentProcess, error) { - parentPipe, childPipe, err := newPipe() - if err != nil { - return nil, newSystemError(err) - } - cmd := exec.Command(p.cmd.Args[0], p.cmd.Args[1:]...) - cmd.ExtraFiles = []*os.File{childPipe} - cmd.Dir = p.cmd.Dir - cmd.Env = append(cmd.Env, - fmt.Sprintf("_LIBCONTAINER_INITPID=%d", p.pid()), - fmt.Sprintf("_LIBCONTAINER_INITTYPE=userns_setup"), - ) - return &setnsProcess{ - cmd: cmd, - childPipe: childPipe, - parentPipe: parentPipe, - config: p.config, - }, nil -} - func (p *initProcess) signal(s os.Signal) error { return p.cmd.Process.Signal(s) } diff --git a/linux_rootfs.go b/linux_rootfs.go index 74c711492..7aba21bdf 100644 --- a/linux_rootfs.go +++ b/linux_rootfs.go @@ -199,30 +199,27 @@ func createDevices(config *configs.Config) error { // Creates the device node in the rootfs of the container. func createDeviceNode(rootfs string, node *configs.Device) error { - var ( - dest = filepath.Join(rootfs, node.Path) - parent = filepath.Dir(dest) - ) - if err := os.MkdirAll(parent, 0755); err != nil { + dest := filepath.Join(rootfs, node.Path) + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { return err } if err := mknodDevice(dest, node); err != nil { if os.IsExist(err) { return nil } + if err != syscall.EPERM { + return err + } // containers running in a user namespace are not allowed to mknod // devices so we can just bind mount it from the host. - if err == syscall.EPERM { - f, err := os.Create(dest) - if err != nil { - if os.IsExist(err) { - return nil - } - return err - } + f, err := os.Create(dest) + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { f.Close() - return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "") } + return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "") } return nil } diff --git a/linux_userns_init.go b/linux_userns_init.go deleted file mode 100644 index 7f5806fe5..000000000 --- a/linux_userns_init.go +++ /dev/null @@ -1,91 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "syscall" - - "github.com/docker/libcontainer/apparmor" - "github.com/docker/libcontainer/configs" - "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/system" -) - -type linuxUsernsInit struct { - config *initConfig -} - -func (l *linuxUsernsInit) Init() error { - // join any namespaces via a path to the namespace fd if provided - if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil { - return newSystemError(err) - } - consolePath := l.config.Config.Console - if consolePath != "" { - // We use the containerConsolePath here, because the console has already been - // setup by the side car process for the user namespace scenario. - console := newConsoleFromPath(consolePath) - if err := console.dupStdio(); err != nil { - return newSystemError(err) - } - } - if _, err := syscall.Setsid(); err != nil { - return newSystemError(err) - } - if consolePath != "" { - if err := system.Setctty(); err != nil { - return newSystemError(err) - } - } - if l.config.Cwd == "" { - l.config.Cwd = "/" - } - if err := setupRlimits(l.config.Config); err != nil { - return newSystemError(err) - } - // InitializeMountNamespace() can be executed only for a new mount namespace - if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := setupRootfs(l.config.Config); err != nil { - return newSystemError(err) - } - } - if hostname := l.config.Config.Hostname; hostname != "" { - if err := syscall.Sethostname([]byte(hostname)); err != nil { - return newSystemError(err) - } - } - if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { - return newSystemError(err) - } - if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { - return newSystemError(err) - } - for _, path := range l.config.Config.ReadonlyPaths { - if err := remountReadonly(path); err != nil { - return newSystemError(err) - } - } - for _, path := range l.config.Config.MaskPaths { - if err := maskFile(path); err != nil { - return newSystemError(err) - } - } - pdeath, err := system.GetParentDeathSignal() - if err != nil { - return newSystemError(err) - } - if err := finalizeNamespace(l.config); err != nil { - return newSystemError(err) - } - // finalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := pdeath.Restore(); err != nil { - return newSystemError(err) - } - // Signal self if parent is already dead. Does nothing if running in a new - // PID namespace, as Getppid will always return 0. - if syscall.Getppid() == 1 { - return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) - } - return system.Execv(l.config.Args[0], l.config.Args[0:], l.config.Env) -} diff --git a/linux_userns_sidecar_init.go b/linux_userns_sidecar_init.go deleted file mode 100644 index 119abfabc..000000000 --- a/linux_userns_sidecar_init.go +++ /dev/null @@ -1,23 +0,0 @@ -// +build linux - -package libcontainer - -// linuxUsernsSideCar is run to setup mounts and networking related operations -// for a user namespace enabled process as a user namespace root doesn't -// have permissions to perform these operations. -// The setup process joins all the namespaces of user namespace enabled init -// except the user namespace, so it run as root in the root user namespace -// to perform these operations. -type linuxUsernsSideCar struct { - config *initConfig -} - -func (l *linuxUsernsSideCar) Init() error { - if err := setupNetwork(l.config); err != nil { - return err - } - if err := setupRoute(l.config.Config); err != nil { - return err - } - return nil -} diff --git a/nsinit/config.go b/nsinit/config.go index 7dbc8a78e..5663f5560 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -33,6 +33,7 @@ var createFlags = []cli.Flag{ cli.StringFlag{Name: "mount-label", Usage: "set the mount label"}, cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"}, cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"}, + cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"}, cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"}, cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"}, cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"}, @@ -149,7 +150,7 @@ func modify(config *configs.Config, context *cli.Context) { if !config.Namespaces.Contains(value) { config.Namespaces.Add(value, "") } - if v == "net" { + if flag == "net" { config.Networks = []*configs.Network{ { Type: "loopback", @@ -158,6 +159,9 @@ func modify(config *configs.Config, context *cli.Context) { }, } } + if flag == "uts" { + config.Hostname = context.String("hostname") + } default: config.Namespaces.Remove(value) config.Namespaces.Add(value, v) @@ -218,8 +222,7 @@ func getTemplate() *configs.Config { AllowAllDevices: false, AllowedDevices: configs.DefaultAllowedDevices, }, - Devices: configs.DefaultAutoCreatedDevices, - Hostname: "nsinit", + Devices: configs.DefaultAutoCreatedDevices, MaskPaths: []string{ "/proc/kcore", }, diff --git a/nsinit/exec.go b/nsinit/exec.go index b8d210012..52f251440 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -36,18 +36,21 @@ func execAction(context *cli.Context) { if err != nil { fatal(err) } - tty, err := newTty(context) + config, err := loadConfig(context) + if err != nil { + fatal(err) + } + rootuid, err := config.HostUID() + if err != nil { + fatal(err) + } + tty, err := newTty(context, rootuid) if err != nil { fatal(err) } created := false container, err := factory.Load(context.String("id")) if err != nil { - config, err := loadConfig(context) - if err != nil { - tty.Close() - fatal(err) - } if tty.console != nil { config.Console = tty.console.Path() } diff --git a/nsinit/tty.go b/nsinit/tty.go index d0d343756..5d417b98d 100644 --- a/nsinit/tty.go +++ b/nsinit/tty.go @@ -9,10 +9,9 @@ import ( "github.com/docker/libcontainer" ) -func newTty(context *cli.Context) (*tty, error) { +func newTty(context *cli.Context, rootuid int) (*tty, error) { if context.Bool("tty") { - rootid := context.Int("userns-root-uid") - console, err := libcontainer.NewConsole(rootid, rootid) + console, err := libcontainer.NewConsole(rootuid, rootuid) if err != nil { return nil, err } From 4d863b7bd0d7da6ca1108031fd7d7997bf504496 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Wed, 18 Feb 2015 21:36:04 -0800 Subject: [PATCH 100/101] Fixes bug where rootfs was empty instead of pwd when not specified. Signed-off-by: Mrunal Patel --- nsinit/config.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nsinit/config.go b/nsinit/config.go index 5663f5560..2ef1aee52 100644 --- a/nsinit/config.go +++ b/nsinit/config.go @@ -86,7 +86,11 @@ func modify(config *configs.Config, context *cli.Context) { config.AppArmorProfile = context.String("apparmor-profile") config.ProcessLabel = context.String("process-label") config.MountLabel = context.String("mount-label") - config.Rootfs = context.String("rootfs") + + rootfs := context.String("rootfs") + if rootfs != "" { + config.Rootfs = rootfs + } userns_uid := context.Int("userns-root-uid") if userns_uid != 0 { From f34b3b765fb964dee979ac7646b6d609adbeb2ba Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Wed, 18 Feb 2015 23:14:01 -0800 Subject: [PATCH 101/101] Validation for User Namespaces in the config. Signed-off-by: Mrunal Patel --- configs/validate/config.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/configs/validate/config.go b/configs/validate/config.go index 710794bf2..98926dd26 100644 --- a/configs/validate/config.go +++ b/configs/validate/config.go @@ -2,6 +2,7 @@ package validate import ( "fmt" + "os" "path/filepath" "github.com/docker/libcontainer/configs" @@ -31,6 +32,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.security(config); err != nil { return err } + if err := v.usernamespace(config); err != nil { + return err + } return nil } @@ -74,3 +78,16 @@ func (v *ConfigValidator) security(config *configs.Config) error { } return nil } + +func (v *ConfigValidator) usernamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWUSER) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + return fmt.Errorf("USER namespaces aren't enabled in the kernel") + } + } else { + if config.UidMappings != nil || config.GidMappings != nil { + return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") + } + } + return nil +}