Skip to content

Commit

Permalink
Add secure-pod-defaults flag to default Pods to 'restricted' profile …
Browse files Browse the repository at this point in the history
…by default (#13398)

* Add secure-pod-defaults flag to default Pods to 'restricted' profile by default

* Account for pod-level SecurityContext when setting defaluts

* Fix format that somehow slipped through auto-format-on-save

* Fix lint unparam complaint

* Don't default Revision values when BYO name is unchanged.

Fixes #11512

* Switch to using a context-passed ConfigurationSpec reference instead of hard-coding Configuration and Service.

* Complete docstring for WithConfigurationSpec

* Permit explicitly selecting Kubernetes defaults in PodSecurityContext.

* Fix comment on defaultSecurityContext

* Allow both "safe" future default and "unsafe" current default to be set explicitly when feature is enabled.

* Add seccompProfile to CRD schemas (it was accepted, but not documented)

* Add e2e tests, checking CI combinations

* Capabilities.Drop is "omitempty", so it will always be empty instead of nil.

* Fix e2e test comment.
  • Loading branch information
evankanderson authored Jan 24, 2023
1 parent 897b61a commit 0360850
Show file tree
Hide file tree
Showing 13 changed files with 472 additions and 1 deletion.
12 changes: 12 additions & 0 deletions config/core/300-resources/configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,18 @@ spec:
description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows.
type: integer
format: int64
seccompProfile:
description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows.
type: object
required:
- type
properties:
localhostProfile:
description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost".
type: string
type:
description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied."
type: string
terminationMessagePath:
description: 'Optional: Path at which the file to which the container''s termination message will be written is mounted into the container''s filesystem. Message written is intended to be brief final status, such as an assertion failure message. Will be truncated by the node if greater than 4096 bytes. The total message length across all containers will be limited to 12kb. Defaults to /dev/termination-log. Cannot be updated.'
type: string
Expand Down
12 changes: 12 additions & 0 deletions config/core/300-resources/revision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,18 @@ spec:
description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows.
type: integer
format: int64
seccompProfile:
description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows.
type: object
required:
- type
properties:
localhostProfile:
description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost".
type: string
type:
description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied."
type: string
terminationMessagePath:
description: 'Optional: Path at which the file to which the container''s termination message will be written is mounted into the container''s filesystem. Message written is intended to be brief final status, such as an assertion failure message. Will be truncated by the node if greater than 4096 bytes. The total message length across all containers will be limited to 12kb. Defaults to /dev/termination-log. Cannot be updated.'
type: string
Expand Down
12 changes: 12 additions & 0 deletions config/core/300-resources/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,18 @@ spec:
description: The UID to run the entrypoint of the container process. Defaults to user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. Note that this field cannot be set when spec.os.name is windows.
type: integer
format: int64
seccompProfile:
description: The seccomp options to use by this container. If seccomp options are provided at both the pod & container level, the container options override the pod options. Note that this field cannot be set when spec.os.name is windows.
type: object
required:
- type
properties:
localhostProfile:
description: localhostProfile indicates a profile defined in a file on the node should be used. The profile must be preconfigured on the node to work. Must be a descending path, relative to the kubelet's configured seccomp profile location. Must only be set if type is "Localhost".
type: string
type:
description: "type indicates which kind of seccomp profile will be applied. Valid options are: \n Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied."
type: string
terminationMessagePath:
description: 'Optional: Path at which the file to which the container''s termination message will be written is mounted into the container''s filesystem. Message written is intended to be brief final status, such as an assertion failure message. Will be truncated by the node if greater than 4096 bytes. The total message length across all containers will be limited to 12kb. Defaults to /dev/termination-log. Cannot be updated.'
type: string
Expand Down
9 changes: 8 additions & 1 deletion config/core/configmaps/features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ metadata:
app.kubernetes.io/component: controller
app.kubernetes.io/version: devel
annotations:
knative.dev/example-checksum: "691a192e"
knative.dev/example-checksum: "d3565159"
data:
_example: |-
################################
Expand All @@ -40,6 +40,13 @@ data:
# this example block and unindented to be in the data block
# to actually change the configuration.
# Default SecurityContext settings to secure-by-default values
# if unset.
#
# This value will default to "enabled" in a future release,
# probably Knative 1.10
secure-pod-defaults: "disabled"
# Indicates whether multi container support is enabled
#
# WARNING: Cannot safely be disabled once enabled.
Expand Down
1 change: 1 addition & 0 deletions hack/schemapatch-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ k8s.io/api/core/v1.SecurityContext:
- RunAsGroup
- RunAsNonRoot
- RunAsUser
- SeccompProfile
k8s.io/api/core/v1.Capabilities:
fieldMask:
- Add
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/config/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func defaultFeaturesConfig() *Features {
PodSpecInitContainers: Disabled,
PodSpecDNSPolicy: Disabled,
PodSpecDNSConfig: Disabled,
SecurePodDefaults: Disabled,
TagHeaderBasedRouting: Disabled,
AutoDetectHTTP2: Disabled,
}
Expand Down Expand Up @@ -99,6 +100,7 @@ func NewFeaturesConfigFromMap(data map[string]string) (*Features, error) {
asFlag("kubernetes.podspec-persistent-volume-write", &nc.PodSpecPersistentVolumeWrite),
asFlag("kubernetes.podspec-dnspolicy", &nc.PodSpecDNSPolicy),
asFlag("kubernetes.podspec-dnsconfig", &nc.PodSpecDNSConfig),
asFlag("secure-pod-defaults", &nc.SecurePodDefaults),
asFlag("tag-header-based-routing", &nc.TagHeaderBasedRouting),
asFlag("queueproxy.mount-podinfo", &nc.QueueProxyMountPodInfo),
asFlag("autodetect-http2", &nc.AutoDetectHTTP2)); err != nil {
Expand Down Expand Up @@ -134,6 +136,7 @@ type Features struct {
QueueProxyMountPodInfo Flag
PodSpecDNSPolicy Flag
PodSpecDNSConfig Flag
SecurePodDefaults Flag
TagHeaderBasedRouting Flag
AutoDetectHTTP2 Flag
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/config/features_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func TestFeaturesConfiguration(t *testing.T) {
PodSpecSchedulerName: Enabled,
PodSpecDNSPolicy: Enabled,
PodSpecDNSConfig: Enabled,
SecurePodDefaults: Enabled,
TagHeaderBasedRouting: Enabled,
}),
data: map[string]string{
Expand All @@ -88,6 +89,7 @@ func TestFeaturesConfiguration(t *testing.T) {
"kubernetes.podspec-schedulername": "Enabled",
"kubernetes.podspec-dnspolicy": "Enabled",
"kubernetes.podspec-dnsconfig": "Enabled",
"secure-pod-defaults": "Enabled",
"tag-header-based-routing": "Enabled",
},
}, {
Expand Down
16 changes: 16 additions & 0 deletions pkg/apis/serving/fieldmask.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ func PodSpecMask(ctx context.Context, in *corev1.PodSpec) *corev1.PodSpec {
}
if cfg.Features.PodSpecSecurityContext != config.Disabled {
out.SecurityContext = in.SecurityContext
} else if cfg.Features.SecurePodDefaults != config.Disabled {
// This is further validated in ValidatePodSecurityContext.
out.SecurityContext = in.SecurityContext
}
if cfg.Features.PodSpecPriorityClassName != config.Disabled {
out.PriorityClassName = in.PriorityClassName
Expand Down Expand Up @@ -591,6 +594,19 @@ func PodSecurityContextMask(ctx context.Context, in *corev1.PodSecurityContext)

out := new(corev1.PodSecurityContext)

if config.FromContextOrDefaults(ctx).Features.SecurePodDefaults == config.Enabled {
// Allow to opt out of more-secure defaults if SecurePodDefaults is enabled.
// This aligns with defaultSecurityContext in revision_defaults.go.
if in.SeccompProfile != nil {
seccomp := in.SeccompProfile.Type
if seccomp == corev1.SeccompProfileTypeRuntimeDefault || seccomp == corev1.SeccompProfileTypeUnconfined {
out.SeccompProfile = &corev1.SeccompProfile{
Type: seccomp,
}
}
}
}

if config.FromContextOrDefaults(ctx).Features.PodSpecSecurityContext == config.Disabled {
return out
}
Expand Down
45 changes: 45 additions & 0 deletions pkg/apis/serving/fieldmask_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,51 @@ func TestPodSecurityContextMask_FeatureEnabled(t *testing.T) {
}
}

func TestPodSecurityContextMask_SecureEnabled(t *testing.T) {
// Ensure that users can opt out of better security by explicitly
// requesting the Kubernetes default, which is "Unconfined".
want := &corev1.PodSecurityContext{
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeUnconfined,
},
}

in := &corev1.PodSecurityContext{
SELinuxOptions: &corev1.SELinuxOptions{},
WindowsOptions: &corev1.WindowsSecurityContextOptions{},
SupplementalGroups: []int64{1},
Sysctls: []corev1.Sysctl{},
RunAsUser: ptr.Int64(1),
RunAsGroup: ptr.Int64(1),
RunAsNonRoot: ptr.Bool(true),
FSGroup: ptr.Int64(1),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeUnconfined,
},
}

ctx := config.ToContext(context.Background(),
&config.Config{
Features: &config.Features{
SecurePodDefaults: config.Enabled,
PodSpecSecurityContext: config.Disabled,
},
},
)

got := PodSecurityContextMask(ctx, in)

if &want == &got {
t.Error("Input and output share addresses. Want different addresses")
}

if diff, err := kmp.SafeDiff(want, got); err != nil {
t.Error("Got error comparing output, err =", err)
} else if diff != "" {
t.Error("PostSecurityContextMask (-want, +got):", diff)
}
}

func TestSecurityContextMask(t *testing.T) {
mtype := corev1.UnmaskedProcMount
want := &corev1.SecurityContext{
Expand Down
55 changes: 55 additions & 0 deletions pkg/apis/serving/v1/revision_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ func (rs *RevisionSpec) SetDefaults(ctx context.Context) {
applyDefaultContainerNames(rs.PodSpec.InitContainers, containerNames, defaultInitContainerName)
for idx := range rs.PodSpec.Containers {
rs.applyDefault(ctx, &rs.PodSpec.Containers[idx], cfg)
rs.defaultSecurityContext(rs.PodSpec.SecurityContext, &rs.PodSpec.Containers[idx], cfg)
}
for idx := range rs.PodSpec.InitContainers {
rs.defaultSecurityContext(rs.PodSpec.SecurityContext, &rs.PodSpec.InitContainers[idx], cfg)
}
}

Expand Down Expand Up @@ -158,6 +162,57 @@ func (*RevisionSpec) applyProbes(container *corev1.Container) {
}
}

// Upgrade SecurityContext for this container and the Pod definition to use settings
// for the `restricted` profile when the feature flag is enabled.
// This does not currently set `runAsNonRoot` for the restricted profile, because
// that feels harder to default safely.
func (rs *RevisionSpec) defaultSecurityContext(psc *corev1.PodSecurityContext, container *corev1.Container, cfg *config.Config) {
if cfg.Features.SecurePodDefaults != config.Enabled {
return
}

if psc == nil {
psc = &corev1.PodSecurityContext{}
}

updatedSC := container.SecurityContext

if updatedSC == nil {
updatedSC = &corev1.SecurityContext{}
}

if updatedSC.AllowPrivilegeEscalation == nil {
updatedSC.AllowPrivilegeEscalation = ptr.Bool(false)
}
if psc.SeccompProfile == nil || psc.SeccompProfile.Type == "" {
if updatedSC.SeccompProfile == nil {
updatedSC.SeccompProfile = &corev1.SeccompProfile{}
}
if updatedSC.SeccompProfile.Type == "" {
updatedSC.SeccompProfile.Type = corev1.SeccompProfileTypeRuntimeDefault
}
}
if updatedSC.Capabilities == nil {
updatedSC.Capabilities = &corev1.Capabilities{}
updatedSC.Capabilities.Drop = []corev1.Capability{"ALL"}
// Default in NET_BIND_SERVICE to allow binding to low-numbered ports.
needsLowPort := false
for _, p := range container.Ports {
if p.ContainerPort < 1024 {
needsLowPort = true
break
}
}
if updatedSC.Capabilities.Add == nil && needsLowPort {
updatedSC.Capabilities.Add = []corev1.Capability{"NET_BIND_SERVICE"}
}
}

if *updatedSC != (corev1.SecurityContext{}) {
container.SecurityContext = updatedSC
}
}

func applyDefaultContainerNames(containers []corev1.Container, containerNames sets.String, defaultContainerName string) {
// Default container name based on ContainerNameFromTemplate value from configmap.
// In multi-container or init-container mode, add a numeric suffix, avoiding clashes with user-supplied names.
Expand Down
Loading

0 comments on commit 0360850

Please sign in to comment.