From ebc2e7c43523d067a56b561dc92e7b59d6392b87 Mon Sep 17 00:00:00 2001 From: Chethan Suresh Date: Thu, 3 Aug 2023 10:12:01 +0530 Subject: [PATCH] Support time namespace "time" namespace was introduced in Linux v5.6 support new time namespace to set boottime and monotonic time offset Example runtime spec "timeOffsets": { "monotonic": { "secs": 172800, "nanosecs": 0 }, "boottime": { "secs": 604800, "nanosecs": 0 } } Signed-off-by: Chethan Suresh --- libcontainer/configs/config.go | 3 +++ libcontainer/configs/namespaces_linux.go | 4 ++++ libcontainer/configs/namespaces_syscall.go | 1 + libcontainer/configs/validate/validator.go | 6 +++++ libcontainer/container_linux.go | 12 ++++++++++ libcontainer/message_linux.go | 1 + libcontainer/nsenter/namespace.h | 3 +++ libcontainer/nsenter/nsexec.c | 27 ++++++++++++++++++++++ libcontainer/specconv/spec_linux.go | 4 ++++ 9 files changed, 61 insertions(+) diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index bb5dbba6588..ea33b110a67 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -216,6 +216,9 @@ type Config struct { // Do not try to remount a bind mount again after the first attempt failed on source // filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set NoMountFallback bool `json:"no_mount_fallback,omitempty"` + + // TimeOffsets specifies the offset for supporting time namespaces. + TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` } type ( diff --git a/libcontainer/configs/namespaces_linux.go b/libcontainer/configs/namespaces_linux.go index d52d6fcd147..5062432f8c3 100644 --- a/libcontainer/configs/namespaces_linux.go +++ b/libcontainer/configs/namespaces_linux.go @@ -14,6 +14,7 @@ const ( NEWIPC NamespaceType = "NEWIPC" NEWUSER NamespaceType = "NEWUSER" NEWCGROUP NamespaceType = "NEWCGROUP" + NEWTIME NamespaceType = "NEWTIME" ) var ( @@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string { return "uts" case NEWCGROUP: return "cgroup" + case NEWTIME: + return "time" } return "" } @@ -72,6 +75,7 @@ func NamespaceTypes() []NamespaceType { NEWPID, NEWNS, NEWCGROUP, + NEWTIME, } } diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go index 543e059aa67..15d8046f3d6 100644 --- a/libcontainer/configs/namespaces_syscall.go +++ b/libcontainer/configs/namespaces_syscall.go @@ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{ NEWUTS: unix.CLONE_NEWUTS, NEWPID: unix.CLONE_NEWPID, NEWCGROUP: unix.CLONE_NEWCGROUP, + NEWTIME: unix.CLONE_NEWTIME, } // CloneFlags parses the container's Namespaces options to set the correct diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 196b431dba1..231df9db1ab 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -106,6 +106,12 @@ func namespaces(config *configs.Config) error { } } + if config.Namespaces.Contains(configs.NEWTIME) { + if _, err := os.Stat("/proc/self/timens_offsets"); os.IsNotExist(err) { + return errors.New("time namespaces aren't enabled in the kernel") + } + } + return nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 2f0a6c64166..e0f99961f00 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -2321,6 +2321,18 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa }) } + // write boottime and monotonic time ns offsets. + if c.config.Namespaces.Contains(configs.NEWTIME) && c.config.TimeOffsets != nil { + var offsetSpec bytes.Buffer + for clock, offset := range c.config.TimeOffsets { + fmt.Fprintf(&offsetSpec, "%s %d %d\n", clock, offset.Secs, offset.Nanosecs) + } + r.AddData(&Bytemsg{ + Type: TimeOffsetsAttr, + Value: offsetSpec.Bytes(), + }) + } + return bytes.NewReader(r.Serialize()), nil } diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 17db81a29f3..4e48826cdb5 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -23,6 +23,7 @@ const ( GidmapPathAttr uint16 = 27289 MountSourcesAttr uint16 = 27290 IdmapSourcesAttr uint16 = 27291 + TimeOffsetsAttr uint16 = 27292 ) type Int32msg struct { diff --git a/libcontainer/nsenter/namespace.h b/libcontainer/nsenter/namespace.h index 9e9bdca05e1..ac443c40f41 100644 --- a/libcontainer/nsenter/namespace.h +++ b/libcontainer/nsenter/namespace.h @@ -28,5 +28,8 @@ #ifndef CLONE_NEWNET # define CLONE_NEWNET 0x40000000 /* New network namespace */ #endif +#ifndef CLONE_NEWTIME +# define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif #endif /* NSENTER_NAMESPACE_H */ diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 22b6ea1cd21..8d4148cc645 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -104,6 +104,10 @@ struct nlconfig_t { /* Idmap sources opened outside the container userns which will be id mapped. */ char *idmapsources; size_t idmapsources_len; + + /* Time NS offsets. */ + char *timensoffset; + size_t timensoffset_len; }; /* @@ -122,6 +126,7 @@ struct nlconfig_t { #define GIDMAPPATH_ATTR 27289 #define MOUNT_SOURCES_ATTR 27290 #define IDMAP_SOURCES_ATTR 27291 +#define TIMENSOFFSET_ATTR 27292 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -351,6 +356,8 @@ static int nsflag(char *name) return CLONE_NEWUSER; else if (!strcmp(name, "uts")) return CLONE_NEWUTS; + else if (!strcmp(name, "time")) + return CLONE_NEWTIME; /* If we don't recognise a name, fallback to 0. */ return 0; @@ -445,6 +452,10 @@ static void nl_parse(int fd, struct nlconfig_t *config) config->idmapsources = current; config->idmapsources_len = payload_len; break; + case TIMENSOFFSET_ATTR: + config->timensoffset = current; + config->timensoffset_len = payload_len; + break; default: bail("unknown netlink message type %d", nlattr->nla_type); } @@ -747,6 +758,17 @@ void receive_idmapsources(int sockfd) receive_fd_sources(sockfd, "_LIBCONTAINER_IDMAP_FDS"); } +static void update_timens(char *map, size_t map_len) +{ + if (map == NULL || map_len == 0) + return; + write_log(DEBUG, "update /proc/self/timens_offsets to '%s'", map); + if (write_file(map, map_len, "/proc/self/timens_offsets") < 0) { + if (errno != EPERM) + bail("failed to update /proc/self/timens_offsets"); + } +} + void nsexec(void) { int pipenum; @@ -1185,6 +1207,11 @@ void nsexec(void) bail("failed to sync with parent: SYNC_MOUNT_IDMAP_ACK: got %u", s); } + /* + * set boottime and monotonic timens offsets. + */ + update_timens(config.timensoffset, config.timensoffset_len); + /* * TODO: What about non-namespace clone flags that we're dropping here? * diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d3938da516c..0afb4a6d8be 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -49,6 +49,7 @@ func initMaps() { specs.IPCNamespace: configs.NEWIPC, specs.UTSNamespace: configs.NEWUTS, specs.CgroupNamespace: configs.NEWCGROUP, + specs.TimeNamespace: configs.NEWTIME, } mountPropagationMapping = map[string]int{ @@ -435,6 +436,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { MemBwSchema: spec.Linux.IntelRdt.MemBwSchema, } } + + // update timens offsets + config.TimeOffsets = spec.Linux.TimeOffsets } // Set the host UID that should own the container's cgroup.