From 6ef1a811a7d753d30cd7420579956725c2866aee Mon Sep 17 00:00:00 2001 From: kaihsun Date: Mon, 21 Nov 2022 21:02:20 +0000 Subject: [PATCH 01/21] update --- docs/guidance/pod-security.md | 77 +++++++++++++++++++ helm-chart/kuberay-operator/audit-policy.yaml | 15 ++++ helm-chart/kuberay-operator/kind-config.yaml | 29 +++++++ 3 files changed, 121 insertions(+) create mode 100644 docs/guidance/pod-security.md create mode 100644 helm-chart/kuberay-operator/audit-policy.yaml create mode 100644 helm-chart/kuberay-operator/kind-config.yaml diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md new file mode 100644 index 00000000000..fe600735725 --- /dev/null +++ b/docs/guidance/pod-security.md @@ -0,0 +1,77 @@ +# Pod Security + +Kubernetes defines three different Pod Security Standards, including `privileged`, `baseline`, and `restricted`, to broadly cover the security spectrum. The `privileged` standard allows users to do known privilege escalations, and thus it is not safe enough for security-critical applications. + +This document describes how to configure RayCluster YAML file to apply `restricted` Pod security standard. The following references can help you understand this document better: + +* [Kubernetes - Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted) +* [Kubernetes - Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) +* [Kubernetes - Auditing](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/) +* [KinD - Auditing](https://kind.sigs.k8s.io/docs/user/auditing/) + +# Step1: Create a KinD cluster +```bash +# Please use Kubernetes >= 1.23. I use 1.24 as my environment. +kind create cluster --config kind-config.yaml +``` +The `kind-config.yaml` enables audit logging with the audit policy defined in `audit-policy.yaml`. The `audit-policy.yaml` defines an auditing policy to listen to the Pod events in the namespace `pod-security`. With this policy, we can check whether our Pods violate the policies in `restricted` standard or not. + +# Step2: Check the audit logs +```bash +docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log +``` +The log should be empty because the namespace `pod-security` does not exist. + +# Step3: Create the `pod-security` namespace +```bash +kubectl create ns pod-security +kubectl label --overwrite ns pod-security \ + pod-security.kubernetes.io/warn=restricted \ + pod-security.kubernetes.io/warn-version=latest \ + pod-security.kubernetes.io/audit=restricted \ + pod-security.kubernetes.io/audit-version=latest \ + pod-security.kubernetes.io/enforce=restricted \ + pod-security.kubernetes.io/enforce-version=latest +``` +With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod security admission controller will apply the `restricted` Pod security standard to all Pods in the namespace `pod-security`. The label `pod-security.kubernetes.io/enforce=restricted` means that the Pod will be rejected if it violate the policies defined in `restricted` security standard. See [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) for more details about the labels. + +# Step4: Install a KubeRay operator +```bash +pushd helm-chart/kuberay-operator +helm install kuberay-operator . +popd +``` + +# Step5: Create a RayCluster (Choose either Step5.1 or Step5.2) +* If you choose Step5.1, no Pod will be created in the namespace `pod-security`. +* If you choose Step5.2, Pods can be created successfully. + +## Step5.1: Create a RayCluster without proper `securityContext` configurations +```bash +kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.complete.yaml + +# Wait 20 seconds and check audit logs for the error messages. +docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log + +# Example error messagess +# "pods \"raycluster-complete-head-fkbf5\" is forbidden: violates PodSecurity \"restricted:latest\": allowPrivilegeEscalation != false (container \"ray-head\" must set securityContext.allowPrivilegeEscalation=false) ... +``` +No Pod will be created in the namespace `pod-security`, and check audit logs for error messages. + +## Step5.2: Create a RayCluster with proper `securityContext` configurations +```bash +kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.pod-security.yaml + +# Wait for the RayCluster convergence and check audit logs for the messages. +docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log +``` +The RayCluster will work as expectation (1 head pod + 1 worker pod). + +# Step6: Test the functionality with a simple job. +```bash +# Log in to the Pod +kubectl exec -it -n pod-security $HEAD_POD -- bash + +# +``` + diff --git a/helm-chart/kuberay-operator/audit-policy.yaml b/helm-chart/kuberay-operator/audit-policy.yaml new file mode 100644 index 00000000000..13ee2ee0f07 --- /dev/null +++ b/helm-chart/kuberay-operator/audit-policy.yaml @@ -0,0 +1,15 @@ +apiVersion: audit.k8s.io/v1 # This is required. +kind: Policy +# Don't generate audit events for all requests in RequestReceived stage. +omitStages: + - "RequestReceived" +rules: + # Log pod changes at RequestResponse level + - level: Metadata + resources: + - group: "" + # Resource "pods" doesn't match requests to any subresource of pods, + # which is consistent with the RBAC policy. + resources: ["pods"] + # This rule only applies to resources in the "pod-security" namespace. + namespaces: ["pod-security"] diff --git a/helm-chart/kuberay-operator/kind-config.yaml b/helm-chart/kuberay-operator/kind-config.yaml new file mode 100644 index 00000000000..05426fcc358 --- /dev/null +++ b/helm-chart/kuberay-operator/kind-config.yaml @@ -0,0 +1,29 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: ClusterConfiguration + apiServer: + # enable auditing flags on the API server + extraArgs: + audit-log-path: /var/log/kubernetes/kube-apiserver-audit.log + audit-policy-file: /etc/kubernetes/policies/audit-policy.yaml + # mount new files / directories on the control plane + extraVolumes: + - name: audit-policies + hostPath: /etc/kubernetes/policies + mountPath: /etc/kubernetes/policies + readOnly: true + pathType: "DirectoryOrCreate" + - name: "audit-logs" + hostPath: "/var/log/kubernetes" + mountPath: "/var/log/kubernetes" + readOnly: false + pathType: DirectoryOrCreate + # mount the local file on the control plane + extraMounts: + - hostPath: ./audit-policy.yaml + containerPath: /etc/kubernetes/policies/audit-policy.yaml + readOnly: true From 47300671cd278cec8f6126005645d96c3c8a2723 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Mon, 21 Nov 2022 21:05:15 +0000 Subject: [PATCH 02/21] update --- .../samples/ray-cluster.pod-security.yaml | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 ray-operator/config/samples/ray-cluster.pod-security.yaml diff --git a/ray-operator/config/samples/ray-cluster.pod-security.yaml b/ray-operator/config/samples/ray-cluster.pod-security.yaml new file mode 100644 index 00000000000..10b913f39bc --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.pod-security.yaml @@ -0,0 +1,181 @@ +# The resource requests and limits in this config are too small for production! +# For examples with more realistic resource configuration, see +# ray-cluster.complete.large.yaml and +# ray-cluster.autoscaler.large.yaml. +apiVersion: ray.io/v1alpha1 +kind: RayCluster +metadata: + labels: + controller-tools.k8s.io: "1.0" + # A unique identifier for the head node and workers of this cluster. + name: raycluster-complete +spec: + rayVersion: '2.0.0' + ######################headGroupSpec################################# + # head group template and specs, (perhaps 'group' is not needed in the name) + headGroupSpec: + # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' + serviceType: ClusterIP + # for the head group, replicas should always be 1. + # headGroupSpec.replicas is deprecated in KubeRay >= 0.3.0. + replicas: 1 + # the following params are used to complete the ray start: ray start --head --block --dashboard-host: '0.0.0.0' ... + rayStartParams: + dashboard-host: '0.0.0.0' + block: 'true' + #pod template + template: + metadata: + labels: + # custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller. + # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + rayCluster: raycluster-sample # will be injected if missing + groupName: headgroup # will be injected if missing + # annotations for pod + annotations: + key: value + spec: + containers: + - name: ray-head + image: rayproject/ray-ml:2.0.0 + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: "4" + memory: "4G" + requests: + cpu: "1" + memory: "4G" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumes: + - name: ray-logs + emptyDir: {} + workerGroupSpecs: + # the pod replicas in this group typed worker + - replicas: 1 + minReplicas: 1 + maxReplicas: 10 + # logical group name, for this called large-group, also can be functional + groupName: large-group + # if worker pods need to be added, we can simply increment the replicas + # if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list + # the operator will remove pods from the list until the number of replicas is satisfied + # when a pod is confirmed to be deleted, its name will be removed from the list below + #scaleStrategy: + # workersToDelete: + # - raycluster-complete-worker-large-group-bdtwh + # - raycluster-complete-worker-large-group-hv457 + # - raycluster-complete-worker-large-group-k8tj7 + # the following params are used to complete the ray start: ray start --block + rayStartParams: + block: 'true' + #pod template + template: + metadata: + labels: + rayCluster: raycluster-complete # will be injected if missing + groupName: small-group # will be injected if missing + spec: + containers: + - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + image: rayproject/ray-ml:2.0.0 + # environment variables to set in the container.Optional. + # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + # use volumeMounts.Optional. + # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: "4" + memory: "4Gi" + requests: + cpu: "1" + memory: "4Gi" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + initContainers: + # the env var $RAY_IP is set by the operator if missing, with the value of the head service name + - name: init-myservice + image: busybox:1.28 + # Change the cluster postfix if you don't have a default setting + command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] + securityContext: + runAsUser: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + # use volumes + # Refer to https://kubernetes.io/docs/concepts/storage/volumes/ + volumes: + - name: ray-logs + emptyDir: {} +######################status################################# +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ray-job-code-sample +data: + sample_code.py: | + import ray + from ray.train.xgboost import XGBoostTrainer + from ray.air.config import ScalingConfig + + # Load data. + dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv") + + # Split data into train and validation. + train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3) + + trainer = XGBoostTrainer( + scaling_config=ScalingConfig( + # Number of workers to use for data parallelism. + num_workers=1, + # Whether to use GPU acceleration. + use_gpu=False, + ), + label_column="target", + num_boost_round=20, + params={ + # XGBoost specific params + "objective": "binary:logistic", + # "tree_method": "gpu_hist", # uncomment this to use GPU for training + "eval_metric": ["logloss", "error"], + }, + datasets={"train": train_dataset, "valid": valid_dataset}, + ) + result = trainer.fit() + print(result.metrics) From ed0f045efadd8cd73ae09be5688470ef5c559bcf Mon Sep 17 00:00:00 2001 From: kaihsun Date: Mon, 21 Nov 2022 23:24:03 +0000 Subject: [PATCH 03/21] update --- docs/guidance/pod-security.md | 33 +++++++++++-------- .../samples/ray-cluster.pod-security.yaml | 17 +++++++--- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index fe600735725..5863180e793 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -9,20 +9,20 @@ This document describes how to configure RayCluster YAML file to apply `restrict * [Kubernetes - Auditing](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/) * [KinD - Auditing](https://kind.sigs.k8s.io/docs/user/auditing/) -# Step1: Create a KinD cluster +# Step 1: Create a KinD cluster ```bash # Please use Kubernetes >= 1.23. I use 1.24 as my environment. kind create cluster --config kind-config.yaml ``` The `kind-config.yaml` enables audit logging with the audit policy defined in `audit-policy.yaml`. The `audit-policy.yaml` defines an auditing policy to listen to the Pod events in the namespace `pod-security`. With this policy, we can check whether our Pods violate the policies in `restricted` standard or not. -# Step2: Check the audit logs +# Step 2: Check the audit logs ```bash docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log ``` The log should be empty because the namespace `pod-security` does not exist. -# Step3: Create the `pod-security` namespace +# Step 3: Create the `pod-security` namespace ```bash kubectl create ns pod-security kubectl label --overwrite ns pod-security \ @@ -35,18 +35,18 @@ kubectl label --overwrite ns pod-security \ ``` With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod security admission controller will apply the `restricted` Pod security standard to all Pods in the namespace `pod-security`. The label `pod-security.kubernetes.io/enforce=restricted` means that the Pod will be rejected if it violate the policies defined in `restricted` security standard. See [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) for more details about the labels. -# Step4: Install a KubeRay operator +# Step 4: Install a KubeRay operator ```bash pushd helm-chart/kuberay-operator helm install kuberay-operator . popd ``` -# Step5: Create a RayCluster (Choose either Step5.1 or Step5.2) +# Step 5: Create a RayCluster (Choose either Step5.1 or Step5.2) * If you choose Step5.1, no Pod will be created in the namespace `pod-security`. * If you choose Step5.2, Pods can be created successfully. -## Step5.1: Create a RayCluster without proper `securityContext` configurations +## Step 5.1: Create a RayCluster without proper `securityContext` configurations ```bash kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.complete.yaml @@ -58,20 +58,25 @@ docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log ``` No Pod will be created in the namespace `pod-security`, and check audit logs for error messages. -## Step5.2: Create a RayCluster with proper `securityContext` configurations +## Step 5.2: Create a RayCluster with proper `securityContext` configurations ```bash kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.pod-security.yaml # Wait for the RayCluster convergence and check audit logs for the messages. docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log -``` -The RayCluster will work as expectation (1 head pod + 1 worker pod). -# Step6: Test the functionality with a simple job. -```bash -# Log in to the Pod -kubectl exec -it -n pod-security $HEAD_POD -- bash +# Log in to the head Pod +kubectl exec -it -n pod-security ${YOUR_HEAD_POD} -- bash + +# Run a sample job in the Pod +python3 samples/xgboost_example.py + +# Forward the dashboard port +kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n pod-security 8265:8265 -# +# Check the job status in the dashboard on your browser. +# http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". ``` +One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. +Next, we log in to the head Pod, and run a XGBoost example script. Finally, check the job status in the dashboard. diff --git a/ray-operator/config/samples/ray-cluster.pod-security.yaml b/ray-operator/config/samples/ray-cluster.pod-security.yaml index 10b913f39bc..45260c8deb5 100644 --- a/ray-operator/config/samples/ray-cluster.pod-security.yaml +++ b/ray-operator/config/samples/ray-cluster.pod-security.yaml @@ -8,7 +8,7 @@ metadata: labels: controller-tools.k8s.io: "1.0" # A unique identifier for the head node and workers of this cluster. - name: raycluster-complete + name: raycluster-pod-security spec: rayVersion: '2.0.0' ######################headGroupSpec################################# @@ -52,6 +52,8 @@ spec: volumeMounts: - mountPath: /tmp/ray name: ray-logs + - mountPath: /home/ray/samples + name: xgboost-example-configmap resources: limits: cpu: "4" @@ -69,6 +71,13 @@ spec: volumes: - name: ray-logs emptyDir: {} + - name: xgboost-example-configmap + configMap: + name: xgboost-example + # An array of keys from the ConfigMap to create as files + items: + - key: xgboost_example.py + path: xgboost_example.py workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -92,7 +101,7 @@ spec: template: metadata: labels: - rayCluster: raycluster-complete # will be injected if missing + rayCluster: raycluster-pod-security # will be injected if missing groupName: small-group # will be injected if missing spec: containers: @@ -147,9 +156,9 @@ spec: apiVersion: v1 kind: ConfigMap metadata: - name: ray-job-code-sample + name: xgboost-example data: - sample_code.py: | + xgboost_example.py: | import ray from ray.train.xgboost import XGBoostTrainer from ray.air.config import ScalingConfig From e7accb3d548545bcc2880cf201f4646a2303eedf Mon Sep 17 00:00:00 2001 From: kaihsun Date: Tue, 22 Nov 2022 00:28:45 +0000 Subject: [PATCH 04/21] update --- docs/guidance/pod-security.md | 38 +++++++++++++------ helm-chart/kuberay-operator/values.yaml | 9 +++++ .../config/security}/audit-policy.yaml | 0 .../config/security}/kind-config.yaml | 0 .../ray-cluster.pod-security.yaml | 0 5 files changed, 35 insertions(+), 12 deletions(-) rename {helm-chart/kuberay-operator => ray-operator/config/security}/audit-policy.yaml (100%) rename {helm-chart/kuberay-operator => ray-operator/config/security}/kind-config.yaml (100%) rename ray-operator/config/{samples => security}/ray-cluster.pod-security.yaml (100%) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 5863180e793..24fea63e0ed 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -1,8 +1,11 @@ # Pod Security -Kubernetes defines three different Pod Security Standards, including `privileged`, `baseline`, and `restricted`, to broadly cover the security spectrum. The `privileged` standard allows users to do known privilege escalations, and thus it is not safe enough for security-critical applications. +Kubernetes defines three different Pod Security Standards, including `privileged`, `baseline`, and `restricted`, to broadly +cover the security spectrum. The `privileged` standard allows users to do known privilege escalations, and thus it is not +safe enough for security-critical applications. -This document describes how to configure RayCluster YAML file to apply `restricted` Pod security standard. The following references can help you understand this document better: +This document describes how to configure RayCluster YAML file to apply `restricted` Pod security standard. The following +references can help you understand this document better: * [Kubernetes - Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted) * [Kubernetes - Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) @@ -11,10 +14,16 @@ This document describes how to configure RayCluster YAML file to apply `restrict # Step 1: Create a KinD cluster ```bash -# Please use Kubernetes >= 1.23. I use 1.24 as my environment. -kind create cluster --config kind-config.yaml +# Path: ray-operator/config/security +kind create cluster --config kind-config.yaml --image=kindest/node:v1.24.0 ``` -The `kind-config.yaml` enables audit logging with the audit policy defined in `audit-policy.yaml`. The `audit-policy.yaml` defines an auditing policy to listen to the Pod events in the namespace `pod-security`. With this policy, we can check whether our Pods violate the policies in `restricted` standard or not. +The `kind-config.yaml` enables audit logging with the audit policy defined in `audit-policy.yaml`. The `audit-policy.yaml` +defines an auditing policy to listen to the Pod events in the namespace `pod-security`. With this policy, we can check +whether our Pods violate the policies in `restricted` standard or not. + +The feature [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) is firstly +introduced in Kubernetes v1.22 (alpha) and becomes stable in Kubernetes v1.25. In addition, KubeRay currently supports +Kubernetes from v1.19 to v1.24 (not sure about the status of v1.25). Hence, I use **Kubernetes v1.24** in this step. # Step 2: Check the audit logs ```bash @@ -33,13 +42,15 @@ kubectl label --overwrite ns pod-security \ pod-security.kubernetes.io/enforce=restricted \ pod-security.kubernetes.io/enforce-version=latest ``` -With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod security admission controller will apply the `restricted` Pod security standard to all Pods in the namespace `pod-security`. The label `pod-security.kubernetes.io/enforce=restricted` means that the Pod will be rejected if it violate the policies defined in `restricted` security standard. See [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) for more details about the labels. +With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod security admission controller will apply the +`restricted` Pod security standard to all Pods in the namespace `pod-security`. The label +`pod-security.kubernetes.io/enforce=restricted` means that the Pod will be rejected if it violate the policies defined in +`restricted` security standard. See [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) for more details about the labels. # Step 4: Install a KubeRay operator ```bash -pushd helm-chart/kuberay-operator -helm install kuberay-operator . -popd +# Path: helm-chart/kuberay-operator +helm install -n pod-security kuberay-operator . ``` # Step 5: Create a RayCluster (Choose either Step5.1 or Step5.2) @@ -48,7 +59,8 @@ popd ## Step 5.1: Create a RayCluster without proper `securityContext` configurations ```bash -kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.complete.yaml +# Path: ray-operator/config/samples +kubectl apply -n pod-security -f ray-cluster.complete.yaml # Wait 20 seconds and check audit logs for the error messages. docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log @@ -60,7 +72,8 @@ No Pod will be created in the namespace `pod-security`, and check audit logs for ## Step 5.2: Create a RayCluster with proper `securityContext` configurations ```bash -kubectl apply -n pod-security -f ray-operator/config/samples/ray-cluster.pod-security.yaml +# Path: ray-operator/config/security +kubectl apply -n pod-security -f ray-cluster.pod-security.yaml # Wait for the RayCluster convergence and check audit logs for the messages. docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log @@ -78,5 +91,6 @@ kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n p # http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". ``` One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. -Next, we log in to the head Pod, and run a XGBoost example script. Finally, check the job status in the dashboard. +Next, we log in to the head Pod, and run a XGBoost example script. Finally, check the job +status in the dashboard. diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml index 36e74d344da..887dae830cf 100644 --- a/helm-chart/kuberay-operator/values.yaml +++ b/helm-chart/kuberay-operator/values.yaml @@ -58,3 +58,12 @@ rbacEnable: true batchScheduler: enabled: false + +# Respect the "restricted" security policy +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault diff --git a/helm-chart/kuberay-operator/audit-policy.yaml b/ray-operator/config/security/audit-policy.yaml similarity index 100% rename from helm-chart/kuberay-operator/audit-policy.yaml rename to ray-operator/config/security/audit-policy.yaml diff --git a/helm-chart/kuberay-operator/kind-config.yaml b/ray-operator/config/security/kind-config.yaml similarity index 100% rename from helm-chart/kuberay-operator/kind-config.yaml rename to ray-operator/config/security/kind-config.yaml diff --git a/ray-operator/config/samples/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml similarity index 100% rename from ray-operator/config/samples/ray-cluster.pod-security.yaml rename to ray-operator/config/security/ray-cluster.pod-security.yaml From 4c2cae2a45267b8ee08709246cd39c75278891f6 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 21 Nov 2022 18:38:22 -0800 Subject: [PATCH 05/21] Update docs/guidance/pod-security.md Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- docs/guidance/pod-security.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 24fea63e0ed..784e9396415 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -53,7 +53,7 @@ With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod securi helm install -n pod-security kuberay-operator . ``` -# Step 5: Create a RayCluster (Choose either Step5.1 or Step5.2) +# Step 5: Create a RayCluster (Choose either Step 5.1 or Step 5.2) * If you choose Step5.1, no Pod will be created in the namespace `pod-security`. * If you choose Step5.2, Pods can be created successfully. From ea4c215c0960f347adc7ef909f0850ec1f3230d2 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Mon, 21 Nov 2022 18:38:33 -0800 Subject: [PATCH 06/21] Update docs/guidance/pod-security.md Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- docs/guidance/pod-security.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 784e9396415..68b383e797c 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -47,7 +47,7 @@ With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod securi `pod-security.kubernetes.io/enforce=restricted` means that the Pod will be rejected if it violate the policies defined in `restricted` security standard. See [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) for more details about the labels. -# Step 4: Install a KubeRay operator +# Step 4: Install the KubeRay operator ```bash # Path: helm-chart/kuberay-operator helm install -n pod-security kuberay-operator . From 84d828cf27cfaa03297081550183e0d78a0e7a16 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Tue, 22 Nov 2022 14:15:32 -0800 Subject: [PATCH 07/21] Update docs/guidance/pod-security.md Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- docs/guidance/pod-security.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 68b383e797c..101d55651f3 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -23,7 +23,7 @@ whether our Pods violate the policies in `restricted` standard or not. The feature [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) is firstly introduced in Kubernetes v1.22 (alpha) and becomes stable in Kubernetes v1.25. In addition, KubeRay currently supports -Kubernetes from v1.19 to v1.24 (not sure about the status of v1.25). Hence, I use **Kubernetes v1.24** in this step. +Kubernetes from v1.19 to v1.24. (At the time of writing, we have not tested KubeRay with Kubernetes v1.25). Hence, I use **Kubernetes v1.24** in this step. # Step 2: Check the audit logs ```bash From 09ff5fd1902f805f2c68c790d17edf819bcccf5b Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Tue, 22 Nov 2022 14:15:42 -0800 Subject: [PATCH 08/21] Update docs/guidance/pod-security.md Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- docs/guidance/pod-security.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 101d55651f3..1d19185bd98 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -54,7 +54,7 @@ helm install -n pod-security kuberay-operator . ``` # Step 5: Create a RayCluster (Choose either Step 5.1 or Step 5.2) -* If you choose Step5.1, no Pod will be created in the namespace `pod-security`. +* If you choose Step 5.1, no Pod will be created in the namespace `pod-security`. * If you choose Step5.2, Pods can be created successfully. ## Step 5.1: Create a RayCluster without proper `securityContext` configurations From 7afbc6a0b49e126903e85250238eba4139f4066f Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Tue, 22 Nov 2022 14:15:50 -0800 Subject: [PATCH 09/21] Update docs/guidance/pod-security.md Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- docs/guidance/pod-security.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 1d19185bd98..89943ad4342 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -55,7 +55,7 @@ helm install -n pod-security kuberay-operator . # Step 5: Create a RayCluster (Choose either Step 5.1 or Step 5.2) * If you choose Step 5.1, no Pod will be created in the namespace `pod-security`. -* If you choose Step5.2, Pods can be created successfully. +* If you choose Step 5.2, Pods can be created successfully. ## Step 5.1: Create a RayCluster without proper `securityContext` configurations ```bash From c72a7ebb21f4cb43a61f55fea35066590f9fef1a Mon Sep 17 00:00:00 2001 From: kaihsun Date: Wed, 23 Nov 2022 20:16:57 +0000 Subject: [PATCH 10/21] update --- docs/guidance/pod-security.md | 13 +++++++++++++ .../security/ray-cluster.pod-security.yaml | 16 ++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 89943ad4342..12bb06904c0 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -67,6 +67,14 @@ docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log # Example error messagess # "pods \"raycluster-complete-head-fkbf5\" is forbidden: violates PodSecurity \"restricted:latest\": allowPrivilegeEscalation != false (container \"ray-head\" must set securityContext.allowPrivilegeEscalation=false) ... + +kubectl get pod -n pod-security +# NAME READY STATUS RESTARTS AGE +# kuberay-operator-8b6d55dbb-t8msf 1/1 Running 0 62s + +# Clean up the RayCluster +kubectl delete rayclusters.ray.io -n pod-security raycluster-complete +# raycluster.ray.io "raycluster-complete" deleted ``` No Pod will be created in the namespace `pod-security`, and check audit logs for error messages. @@ -89,6 +97,11 @@ kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n p # Check the job status in the dashboard on your browser. # http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". + +# Clean up the RayCluster +kubectl delete -n pod-security -f ray-cluster.pod-security.yaml +# raycluster.ray.io "raycluster-pod-security" deleted +# configmap "xgboost-example" deleted ``` One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. Next, we log in to the head Pod, and run a XGBoost example script. Finally, check the job diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 45260c8deb5..02104f88f69 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -56,11 +56,11 @@ spec: name: xgboost-example-configmap resources: limits: - cpu: "4" - memory: "4G" + cpu: 1 + memory: 2Gi requests: - cpu: "1" - memory: "4G" + cpu: 1 + memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: @@ -120,11 +120,11 @@ spec: name: ray-logs resources: limits: - cpu: "4" - memory: "4Gi" + cpu: 4 + memory: 2Gi requests: - cpu: "1" - memory: "4Gi" + cpu: 1 + memory: 2Gi securityContext: allowPrivilegeEscalation: false capabilities: From 1744ff5cceb152764037e925786497ae86644a44 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Tue, 29 Nov 2022 01:07:01 +0000 Subject: [PATCH 11/21] test pip --- docs/guidance/pod-security.md | 13 +++++++-- .../security/ray-cluster.pod-security.yaml | 27 ++++++++++++++++--- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 12bb06904c0..d2f395809cd 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -98,12 +98,21 @@ kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n p # Check the job status in the dashboard on your browser. # http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". +# Make sure Python dependencies can be installed under `restricted` security standard +python3 samples/runtime_env.py # => ModuleNotFoundError: No module named 'jsonpatch' +pip3 install jsonpatch +python3 samples/runtime_env.py +# 2022-11-28 16:57:17,033 INFO worker.py:1224 -- Using address 127.0.0.1:6379 set in the environment variable RAY_ADDRESS +# 2022-11-28 16:57:17,034 INFO worker.py:1333 -- Connecting to existing Ray cluster at address: 10.244.0.17:6379... +# 2022-11-28 16:57:17,045 INFO worker.py:1515 -- Connected to Ray cluster. View the dashboard at http://10.244.0.17:8265 +# {'a': {'b': 'd'}} + # Clean up the RayCluster kubectl delete -n pod-security -f ray-cluster.pod-security.yaml # raycluster.ray.io "raycluster-pod-security" deleted # configmap "xgboost-example" deleted ``` One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. -Next, we log in to the head Pod, and run a XGBoost example script. Finally, check the job -status in the dashboard. +First, we log in to the head Pod, run a XGBoost example script, and check the job +status in the dashboard. Next, we use `pip` to install a Python dependency (i.e. `jsonpatch`), and run a Runtime Environment script to make sure the new dependency works as expected. diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 02104f88f69..b1654a3f130 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -53,7 +53,7 @@ spec: - mountPath: /tmp/ray name: ray-logs - mountPath: /home/ray/samples - name: xgboost-example-configmap + name: ray-example-configmap resources: limits: cpu: 1 @@ -71,13 +71,15 @@ spec: volumes: - name: ray-logs emptyDir: {} - - name: xgboost-example-configmap + - name: ray-example-configmap configMap: - name: xgboost-example + name: ray-example # An array of keys from the ConfigMap to create as files items: - key: xgboost_example.py path: xgboost_example.py + - key: runtime_env.py + path: runtime_env.py workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -156,7 +158,7 @@ spec: apiVersion: v1 kind: ConfigMap metadata: - name: xgboost-example + name: ray-example data: xgboost_example.py: | import ray @@ -188,3 +190,20 @@ data: ) result = trainer.fit() print(result.metrics) + runtime_env.py: | + import ray + import jsonpatch + ray.init(runtime_env={"pip": ["jsonpatch"]}) + + @ray.remote + def apply_patch(): + dict = { + "a": { + "b": "c" + } + } + patch = jsonpatch.JsonPatch([{'op': 'replace', 'path': '/a/b','value': 'd'}]) + return patch.apply(dict) + update_dict = ray.get(apply_patch.remote()) + print(update_dict) + assert update_dict["a"]["b"] == "d" From c36e61ee7caf5cd4b8856cb4405549dfe6854903 Mon Sep 17 00:00:00 2001 From: kaihsun Date: Tue, 29 Nov 2022 22:09:48 +0000 Subject: [PATCH 12/21] update pip --- docs/guidance/pod-security.md | 9 ++------- .../security/ray-cluster.pod-security.yaml | 19 ------------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index d2f395809cd..fb50f227594 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -99,13 +99,8 @@ kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n p # http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". # Make sure Python dependencies can be installed under `restricted` security standard -python3 samples/runtime_env.py # => ModuleNotFoundError: No module named 'jsonpatch' pip3 install jsonpatch -python3 samples/runtime_env.py -# 2022-11-28 16:57:17,033 INFO worker.py:1224 -- Using address 127.0.0.1:6379 set in the environment variable RAY_ADDRESS -# 2022-11-28 16:57:17,034 INFO worker.py:1333 -- Connecting to existing Ray cluster at address: 10.244.0.17:6379... -# 2022-11-28 16:57:17,045 INFO worker.py:1515 -- Connected to Ray cluster. View the dashboard at http://10.244.0.17:8265 -# {'a': {'b': 'd'}} +echo $? # Check the exit code of `pip3 install jsonpatch`. It should be 0. # Clean up the RayCluster kubectl delete -n pod-security -f ray-cluster.pod-security.yaml @@ -114,5 +109,5 @@ kubectl delete -n pod-security -f ray-cluster.pod-security.yaml ``` One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. First, we log in to the head Pod, run a XGBoost example script, and check the job -status in the dashboard. Next, we use `pip` to install a Python dependency (i.e. `jsonpatch`), and run a Runtime Environment script to make sure the new dependency works as expected. +status in the dashboard. Next, we use `pip` to install a Python dependency (i.e. `jsonpatch`), and the exit code of the `pip` command should be 0. diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index b1654a3f130..b40ea96d725 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -78,8 +78,6 @@ spec: items: - key: xgboost_example.py path: xgboost_example.py - - key: runtime_env.py - path: runtime_env.py workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 @@ -190,20 +188,3 @@ data: ) result = trainer.fit() print(result.metrics) - runtime_env.py: | - import ray - import jsonpatch - ray.init(runtime_env={"pip": ["jsonpatch"]}) - - @ray.remote - def apply_patch(): - dict = { - "a": { - "b": "c" - } - } - patch = jsonpatch.JsonPatch([{'op': 'replace', 'path': '/a/b','value': 'd'}]) - return patch.apply(dict) - update_dict = ray.get(apply_patch.remote()) - print(update_dict) - assert update_dict["a"]["b"] == "d" From 24b6766434443e68f6a93e003bd24fd5ca038e81 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:04:09 -0800 Subject: [PATCH 13/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index b40ea96d725..a089ccded43 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -106,7 +106,7 @@ spec: spec: containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' - image: rayproject/ray-ml:2.0.0 + image: rayproject/ray-ml:2.1.0 # environment variables to set in the container.Optional. # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ lifecycle: From 5c8d23d5ea2d3fea2c8fea8d81a46e35fcf924c7 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:04:41 -0800 Subject: [PATCH 14/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index a089ccded43..224df9d0210 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -37,7 +37,7 @@ spec: spec: containers: - name: ray-head - image: rayproject/ray-ml:2.0.0 + image: rayproject/ray-ml:2.1.0 ports: - containerPort: 6379 name: gcs From 0d70255ba87eb680050d3da5ada3f4b7157d9f53 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:04:51 -0800 Subject: [PATCH 15/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 224df9d0210..80303500321 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -10,7 +10,7 @@ metadata: # A unique identifier for the head node and workers of this cluster. name: raycluster-pod-security spec: - rayVersion: '2.0.0' + rayVersion: '2.1.0' ######################headGroupSpec################################# # head group template and specs, (perhaps 'group' is not needed in the name) headGroupSpec: From 8b45229aa858bf4981362b9eb38407e04a29f43b Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:05:23 -0800 Subject: [PATCH 16/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 80303500321..1cd7e30be53 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -99,10 +99,6 @@ spec: block: 'true' #pod template template: - metadata: - labels: - rayCluster: raycluster-pod-security # will be injected if missing - groupName: small-group # will be injected if missing spec: containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' From c003552b9ed2f2b617d8050956ed42daf255c9f8 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:05:38 -0800 Subject: [PATCH 17/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- .../config/security/ray-cluster.pod-security.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 1cd7e30be53..70de19f7ee4 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -25,15 +25,6 @@ spec: block: 'true' #pod template template: - metadata: - labels: - # custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller. - # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ - rayCluster: raycluster-sample # will be injected if missing - groupName: headgroup # will be injected if missing - # annotations for pod - annotations: - key: value spec: containers: - name: ray-head From 84cb67fc52c072c89385595630e4c42cccbd5b09 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:05:52 -0800 Subject: [PATCH 18/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 70de19f7ee4..42d285c35dc 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -92,7 +92,7 @@ spec: template: spec: containers: - - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + - name: ray-worker image: rayproject/ray-ml:2.1.0 # environment variables to set in the container.Optional. # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ From 8149e822e38c48f8b4dc03c8a3129680687922bd Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 14:06:20 -0800 Subject: [PATCH 19/21] Update ray-operator/config/security/ray-cluster.pod-security.yaml Co-authored-by: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Signed-off-by: Kai-Hsun Chen --- ray-operator/config/security/ray-cluster.pod-security.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 42d285c35dc..7c607be782a 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -138,7 +138,6 @@ spec: volumes: - name: ray-logs emptyDir: {} -######################status################################# --- apiVersion: v1 kind: ConfigMap From b6e1465059b7c3ffe2b9ee03b99f98946d722501 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 18:06:09 -0500 Subject: [PATCH 20/21] resolve conflict --- docs/guidance/pod-security.md | 9 +++++++++ helm-chart/kuberay-operator/values.yaml | 11 +++-------- ray-operator/config/samples/ray-cluster.complete.yaml | 3 +-- .../config/security/ray-cluster.pod-security.yaml | 3 +-- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index fb50f227594..10398e065d3 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -49,6 +49,15 @@ With the `pod-security.kubernetes.io` labels, the built-in Kubernetes Pod securi # Step 4: Install the KubeRay operator ```bash +# Update the field securityContext in helm-chart/kuberay-operator/values.yaml +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + # Path: helm-chart/kuberay-operator helm install -n pod-security kuberay-operator . ``` diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml index 887dae830cf..f19843527a9 100644 --- a/helm-chart/kuberay-operator/values.yaml +++ b/helm-chart/kuberay-operator/values.yaml @@ -59,11 +59,6 @@ rbacEnable: true batchScheduler: enabled: false -# Respect the "restricted" security policy -securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - runAsNonRoot: true - seccompProfile: - type: RuntimeDefault +# Set up `securityContext` to improve Pod security. +# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/pod-security.md for further guidance. +securityContext: {} diff --git a/ray-operator/config/samples/ray-cluster.complete.yaml b/ray-operator/config/samples/ray-cluster.complete.yaml index 85b709353d2..14ba4b1f44f 100644 --- a/ray-operator/config/samples/ray-cluster.complete.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.yaml @@ -11,8 +11,7 @@ metadata: name: raycluster-complete spec: rayVersion: '2.1.0' - ######################headGroupSpec################################# - # Ray head pod template and specs + # Ray head pod configuration headGroupSpec: # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' serviceType: ClusterIP diff --git a/ray-operator/config/security/ray-cluster.pod-security.yaml b/ray-operator/config/security/ray-cluster.pod-security.yaml index 7c607be782a..60dec71c645 100644 --- a/ray-operator/config/security/ray-cluster.pod-security.yaml +++ b/ray-operator/config/security/ray-cluster.pod-security.yaml @@ -11,8 +11,7 @@ metadata: name: raycluster-pod-security spec: rayVersion: '2.1.0' - ######################headGroupSpec################################# - # head group template and specs, (perhaps 'group' is not needed in the name) + # Ray head pod configuration headGroupSpec: # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' serviceType: ClusterIP From 1e1be040ce27d23d4738a1ac1edd2466554a82d9 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 7 Dec 2022 18:17:55 -0500 Subject: [PATCH 21/21] update pod-security.md --- docs/guidance/pod-security.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/guidance/pod-security.md b/docs/guidance/pod-security.md index 10398e065d3..a4bcd271227 100644 --- a/docs/guidance/pod-security.md +++ b/docs/guidance/pod-security.md @@ -95,19 +95,19 @@ kubectl apply -n pod-security -f ray-cluster.pod-security.yaml # Wait for the RayCluster convergence and check audit logs for the messages. docker exec kind-control-plane cat /var/log/kubernetes/kube-apiserver-audit.log +# Forward the dashboard port +kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n pod-security 8265:8265 + # Log in to the head Pod kubectl exec -it -n pod-security ${YOUR_HEAD_POD} -- bash -# Run a sample job in the Pod +# (Head Pod) Run a sample job in the Pod python3 samples/xgboost_example.py -# Forward the dashboard port -kubectl port-forward --address 0.0.0.0 svc/raycluster-pod-security-head-svc -n pod-security 8265:8265 - # Check the job status in the dashboard on your browser. # http://127.0.0.1:8265/#/job => The job status should be "SUCCEEDED". -# Make sure Python dependencies can be installed under `restricted` security standard +# (Head Pod) Make sure Python dependencies can be installed under `restricted` security standard pip3 install jsonpatch echo $? # Check the exit code of `pip3 install jsonpatch`. It should be 0. @@ -119,4 +119,3 @@ kubectl delete -n pod-security -f ray-cluster.pod-security.yaml One head Pod and one worker Pod will be created as specified in `ray-cluster.pod-security.yaml`. First, we log in to the head Pod, run a XGBoost example script, and check the job status in the dashboard. Next, we use `pip` to install a Python dependency (i.e. `jsonpatch`), and the exit code of the `pip` command should be 0. -