From c87c0358c6303606039295655c65bc1ccd655ea5 Mon Sep 17 00:00:00 2001 From: Chethan Suresh Date: Tue, 11 Jul 2023 15:55:50 +0530 Subject: [PATCH] Support time namespace "time" namespace was introduced in Linux v5.6 support new time namespace to set boottime and monotonic time offset Example runtime spec "timeOffsets": { "monotonic": { "secs": 172800, "nanosecs": 0 }, "boottime": { "secs": 604800, "nanosecs": 0 } } Signed-off-by: Chethan Suresh --- libcontainer/configs/config.go | 3 ++ libcontainer/configs/namespaces_linux.go | 4 +++ libcontainer/configs/namespaces_syscall.go | 1 + libcontainer/configs/validate/validator.go | 6 ++++ libcontainer/container_linux.go | 38 ++++++++++++++++++++++ libcontainer/message_linux.go | 2 ++ libcontainer/nsenter/namespace.h | 3 ++ libcontainer/nsenter/nsexec.c | 38 ++++++++++++++++++++++ libcontainer/specconv/spec_linux.go | 7 ++++ 9 files changed, 102 insertions(+) diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index d43ea7860a2..d0174c7a24d 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -212,6 +212,9 @@ type Config struct { // RootlessCgroups is set when unlikely to have the full access to cgroups. // When RootlessCgroups is set, cgroups errors are ignored. RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + + // TimeOffsets specifies the offset for supporting time namespaces. + TimeOffsets map[string]specs.LinuxTimeOffset `json:"timeOffsets,omitempty"` } type ( diff --git a/libcontainer/configs/namespaces_linux.go b/libcontainer/configs/namespaces_linux.go index d52d6fcd147..5062432f8c3 100644 --- a/libcontainer/configs/namespaces_linux.go +++ b/libcontainer/configs/namespaces_linux.go @@ -14,6 +14,7 @@ const ( NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" NEWCGROUP NamespaceType = "NEWCGROUP" + NEWTIME NamespaceType = "NEWTIME" ) var ( @@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string { return "uts" case NEWCGROUP: return "cgroup" + case NEWTIME: + return "time" } return "" } @@ -72,6 +75,7 @@ func NamespaceTypes() []NamespaceType { NEWPID, NEWNS, NEWCGROUP, + NEWTIME, } } diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go index 543e059aa67..15d8046f3d6 100644 --- a/libcontainer/configs/namespaces_syscall.go +++ b/libcontainer/configs/namespaces_syscall.go @@ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{ NEWUTS: unix.CLONE_NEWUTS, NEWPID: unix.CLONE_NEWPID, NEWCGROUP: unix.CLONE_NEWCGROUP, + NEWTIME: unix.CLONE_NEWTIME, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 483e7a2ff3e..b825167d965 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -115,6 +115,12 @@ func namespaces(config *configs.Config) error { } } + if config.Namespaces.Contains(configs.NEWTIME) { + if _, err := os.Stat("/proc/self/timens_offsets"); os.IsNotExist(err) { + return errors.New("time namespaces aren't enabled in the kernel") + } + } + return nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index eac17027af8..ecd37378d9d 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -2114,6 +2114,16 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { return data.Bytes(), nil } +func encodeTimeNs(timeOffsets specs.LinuxTimeOffset) ([]byte, error) { + data := bytes.NewBuffer(nil) + line := fmt.Sprintf("%d %d\n", timeOffsets.Secs, timeOffsets.Nanosecs) + if _, err := data.WriteString(line); err != nil { + return nil, err + } + + return data.Bytes(), nil +} + // netlinkError is an error wrapper type for use by custom netlink message // types. Panics with errors are wrapped in netlinkError so that the recover // in bootstrapData can distinguish intentional panics. @@ -2246,6 +2256,34 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa }) } + // write boottime and monotonic offsets + // 0 is default value in /proc/PID/timens_offsets + // if 0, do not bootstrap data + if c.config.TimeOffsets != nil { + + if c.config.TimeOffsets["boottime"].Nanosecs != 0 || c.config.TimeOffsets["boottime"].Secs != 0 { + b, err := encodeTimeNs(c.config.TimeOffsets["boottime"]) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: BootTimeNsAttr, + Value: append([]byte("boottime "), b...), + }) + } + + if c.config.TimeOffsets["monotonic"].Nanosecs != 0 || c.config.TimeOffsets["monotonic"].Secs != 0 { + b, err := encodeTimeNs(c.config.TimeOffsets["monotonic"]) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: MonotonicNsAttr, + Value: append([]byte("monotonic "), b...), + }) + } + } + return bytes.NewReader(r.Serialize()), nil } diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 6d1107e875d..ef9bef36ced 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -22,6 +22,8 @@ const ( UidmapPathAttr uint16 = 27288 GidmapPathAttr uint16 = 27289 MountSourcesAttr uint16 = 27290 + BootTimeNsAttr uint16 = 27291 + MonotonicNsAttr uint16 = 27292 ) type Int32msg struct { diff --git a/libcontainer/nsenter/namespace.h b/libcontainer/nsenter/namespace.h index 9e9bdca05e1..ac443c40f41 100644 --- a/libcontainer/nsenter/namespace.h +++ b/libcontainer/nsenter/namespace.h @@ -28,5 +28,8 @@ #ifndef CLONE_NEWNET # define CLONE_NEWNET 0x40000000 /* New network namespace */ #endif +#ifndef CLONE_NEWTIME +# define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif #endif /* NSENTER_NAMESPACE_H */ diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 748791d6b82..1c5fa41a6d2 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -95,6 +95,13 @@ struct nlconfig_t { /* Mount sources opened outside the container userns. */ char *mountsources; size_t mountsources_len; + + /* Time NS settings for boottime and monotonic */ + char *monotonic; + size_t monotonic_len; + + char *boottime; + size_t boottime_len; }; /* @@ -112,6 +119,8 @@ struct nlconfig_t { #define UIDMAPPATH_ATTR 27288 #define GIDMAPPATH_ATTR 27289 #define MOUNT_SOURCES_ATTR 27290 +#define BOOTTIME_NS_ATTR 27291 +#define MONOTONIC_NS_ATTR 27292 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -341,6 +350,8 @@ static int nsflag(char *name) return CLONE_NEWUSER; else if (!strcmp(name, "uts")) return CLONE_NEWUTS; + else if (!strcmp(name, "time")) + return CLONE_NEWTIME; /* If we don't recognise a name, fallback to 0. */ return 0; @@ -431,6 +442,14 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->mountsources = current; config->mountsources_len = payload_len; break; + case BOOTTIME_NS_ATTR: + config->boottime = current; + config->boottime_len = payload_len; + break; + case MONOTONIC_NS_ATTR: + config->monotonic = current; + config->monotonic_len = payload_len; + break; default: bail("unknown netlink message type %d", nlattr->nla_type); } @@ -641,6 +660,17 @@ void try_unshare(int flags, const char *msg) bail("failed to unshare %s", msg); } +static void update_timens(char *map, size_t map_len) +{ + if (map == NULL || map_len == 0) + return; + write_log(DEBUG, "update /proc/self/timens_offsets to '%s'", map); + if (write_file(map, map_len , "/proc/self/timens_offsets") < 0) { + if (errno != EPERM) + bail("failed to update /proc/self/timens_offsets"); + } +} + void nsexec(void) { int pipenum; @@ -1053,6 +1083,14 @@ void nsexec(void) bail("failed to sync with parent: SYNC_MOUNTSOURCES_ACK: got %u", s); } + /* + * Update timens offsets + * set boottime and monotonic offsets + */ + write_log(DEBUG, "set timens offsets %s", config.boottime); + update_timens(config.boottime, config.boottime_len); + update_timens(config.monotonic, config.monotonic_len); + /* * TODO: What about non-namespace clone flags that we're dropping here? * diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 809424a97eb..94f3e28a09c 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -49,6 +49,7 @@ func initMaps() { specs.IPCNamespace: configs.NEWIPC, specs.UTSNamespace: configs.NEWUTS, specs.CgroupNamespace: configs.NEWCGROUP, + specs.TimeNamespace: configs.NEWTIME, } mountPropagationMapping = map[string]int{ @@ -433,6 +434,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { MemBwSchema: spec.Linux.IntelRdt.MemBwSchema, } } + + // update timens offsets + if spec.Linux.TimeOffsets != nil { + config.TimeOffsets = spec.Linux.TimeOffsets + } } // Set the host UID that should own the container's cgroup. @@ -492,6 +498,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { } } } + createHooks(spec, config) config.Version = specs.Version return config, nil