Skip to content

Commit

Permalink
MSP-3261: add apparmor configurable (#165)
Browse files Browse the repository at this point in the history
  • Loading branch information
Uburro authored Nov 5, 2024
1 parent 193829c commit 3ec7040
Show file tree
Hide file tree
Showing 24 changed files with 256 additions and 40 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.14.15
1.14.16
6 changes: 6 additions & 0 deletions api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,12 @@ type NodeContainer struct {
//
// +kubebuilder:validation:Optional
SecurityLimitsConfig string `json:"securityLimitsConfig,omitempty"`

// AppArmorProfile defines the AppArmor profile for the Slurm worker node
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="unconfined"
AppArmorProfile string `json:"appArmorProfile,omitempty"`
}

// NodeVolume defines the configuration for a node volume.
Expand Down
55 changes: 55 additions & 0 deletions config/crd/bases/slurm.nebius.ai_slurmclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1521,6 +1521,11 @@ spec:
mariadbOperator:
description: MariaDbOpeator represents the MariaDB CRD configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
enabled:
type: boolean
image:
Expand Down Expand Up @@ -8281,6 +8286,11 @@ spec:
munge:
description: Munge represents the Slurm munge configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -8363,6 +8373,11 @@ spec:
description: Slurmdbd represents the Slurm database daemon
configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -8514,6 +8529,11 @@ spec:
munge:
description: Munge represents the Slurm munge configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -8556,6 +8576,11 @@ spec:
description: Slurmctld represents the Slurm control daemon
configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9044,6 +9069,11 @@ spec:
description: Exporter represents the Slurm exporter daemon
configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9090,6 +9120,11 @@ spec:
munge:
description: Munge represents the Slurm munge configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9574,6 +9609,11 @@ spec:
munge:
description: Munge represents the Slurm munge configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9621,6 +9661,11 @@ spec:
sshd:
description: Sshd represents the SSH daemon service configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9947,6 +9992,11 @@ spec:
munge:
description: Munge represents the Slurm munge configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down Expand Up @@ -9988,6 +10038,11 @@ spec:
slurmd:
description: Slurmd represents the Slurm daemon service configuration
properties:
appArmorProfile:
default: unconfined
description: AppArmorProfile defines the AppArmor profile
for the Slurm worker node
type: string
image:
description: Image defines the container image
type: string
Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ resources:
images:
- name: controller
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
newTag: 1.14.15
newTag: 1.14.16
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ spec:
value: "false"
- name: SLURM_OPERATOR_WATCH_NAMESPACES
value: "*"
image: controller:1.14.15
image: controller:1.14.16
imagePullPolicy: Always
name: manager
securityContext:
Expand Down
4 changes: 2 additions & 2 deletions helm/slurm-cluster-storage/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-slurm-cluster-storage
description: A Helm chart for Kubernetes
type: application
version: "1.14.15"
appVersion: "1.14.15"
version: "1.14.16"
appVersion: "1.14.16"
4 changes: 2 additions & 2 deletions helm/slurm-cluster/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-slurm-cluster
description: A Helm chart for Kubernetes
type: application
version: "1.14.15"
appVersion: "1.14.15"
version: "1.14.16"
appVersion: "1.14.16"
24 changes: 24 additions & 0 deletions helm/slurm-cluster/templates/slurm-cluster-cr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ spec:
sshdKeysName: {{ include "slurm-cluster.secret.sshdKeysName" . }}
populateJail:
image: {{ required "populateJail image" .Values.images.populateJail | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.populateJail.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.populateJail.appArmorProfile | quote }}
k8sNodeFilterName: {{ required "Populate Jail job k8s node filter name must be provided." .Values.populateJail.k8sNodeFilterName | quote }}
{{- if .Values.populateJail.jailSnapshotVolume }}
jailSnapshotVolume:
Expand All @@ -61,6 +63,8 @@ spec:
successfulJobsHistoryLimit: {{ default 3 .Values.periodicChecks.ncclBenchmark.successfulJobsHistoryLimit }}
failedJobsHistoryLimit: {{ default 3 .Values.periodicChecks.ncclBenchmark.failedJobsHistoryLimit }}
image: {{ required "NCCl benchmark image" .Values.images.ncclBenchmark | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.periodicChecks.ncclBenchmark.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.periodicChecks.ncclBenchmark.appArmorProfile | quote }}
ncclArguments:
minBytes: {{ (default "512Mb" .Values.periodicChecks.ncclBenchmark.ncclArguments.minBytes) | quote }}
maxBytes: {{ (default "8Gb" .Values.periodicChecks.ncclBenchmark.ncclArguments.maxBytes) | quote }}
Expand All @@ -76,6 +80,8 @@ spec:
k8sNodeFilterName: {{ required ".Values.slurmNodes.accounting.k8sNodeFilterName must be provided." .Values.slurmNodes.accounting.k8sNodeFilterName | quote }}
slurmdbd:
image: {{ required "slurmd image" .Values.images.slurmdbd | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.accounting.slurmdbd.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.accounting.slurmdbd.appArmorProfile | quote }}
{{- if .Values.slurmNodes.accounting.enabled }}
port: {{ default 6819 .Values.slurmNodes.accounting.slurmdbd.port }}
resources:
Expand Down Expand Up @@ -111,6 +117,8 @@ spec:
{{- end }}
munge:
image: {{ required "munge image" .Values.images.munge | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.accounting.munge.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.accounting.munge.appArmorProfile | quote }}
resources:
cpu: {{ required ".Values.slurmNodes.accounting.munge.resources.cpu must be provided." .Values.slurmNodes.accounting.munge.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.accounting.munge.resources.memory must be provided." .Values.slurmNodes.accounting.munge.resources.memory | quote}}
Expand All @@ -121,13 +129,17 @@ spec:
k8sNodeFilterName: {{ required ".Values.slurmNodes.controller.k8sNodeFilterName must be provided." .Values.slurmNodes.controller.k8sNodeFilterName | quote }}
slurmctld:
image: {{ required "slurmctld image" .Values.images.slurmctld | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.controller.slurmctld.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.controller.slurmctld.appArmorProfile | quote }}
port: {{ default 6817 .Values.slurmNodes.controller.slurmctld.port }}
resources:
cpu: {{ required ".Values.slurmNodes.controller.slurmctld.resources.cpu must be provided." .Values.slurmNodes.controller.slurmctld.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.controller.slurmctld.resources.memory must be provided." .Values.slurmNodes.controller.slurmctld.resources.memory | quote}}
ephemeral-storage: {{ required ".Values.slurmNodes.controller.slurmctld.resources.ephemeralStorage must be provided." .Values.slurmNodes.controller.slurmctld.resources.ephemeralStorage | quote}}
munge:
image: {{ required "munge image" .Values.images.munge | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.controller.munge.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.controller.munge.appArmorProfile | quote }}
resources:
cpu: {{ required ".Values.slurmNodes.controller.munge.resources.cpu must be provided." .Values.slurmNodes.controller.munge.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.controller.munge.resources.memory must be provided." .Values.slurmNodes.controller.munge.resources.memory | quote}}
Expand All @@ -143,6 +155,8 @@ spec:
k8sNodeFilterName: {{ required ".Values.slurmNodes.worker.k8sNodeFilterName must be provided." .Values.slurmNodes.worker.k8sNodeFilterName | quote }}
slurmd:
image: {{ required "slurmd image" .Values.images.slurmd | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.worker.slurmd.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.worker.slurmd.appArmorProfile | quote }}
port: {{ default 6818 .Values.slurmNodes.worker.slurmd.port }}
resources:
cpu: {{ required ".Values.slurmNodes.worker.slurmd.resources.cpu must be provided." .Values.slurmNodes.worker.slurmd.resources.cpu | quote}}
Expand All @@ -151,6 +165,8 @@ spec:
nvidia.com/gpu: {{ required ".Values.slurmNodes.worker.slurmd.resources.gpu must be provided." .Values.slurmNodes.worker.slurmd.resources.gpu | quote }}
munge:
image: {{ required "munge image" .Values.images.munge | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.worker.munge.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.worker.munge.appArmorProfile | quote }}
resources:
cpu: {{ required ".Values.slurmNodes.worker.munge.resources.cpu must be provided." .Values.slurmNodes.worker.munge.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.worker.munge.resources.memory must be provided." .Values.slurmNodes.worker.munge.resources.memory | quote}}
Expand All @@ -170,6 +186,8 @@ spec:
k8sNodeFilterName: {{ required ".Values.slurmNodes.login.k8sNodeFilterName must be provided." .Values.slurmNodes.login.k8sNodeFilterName | quote }}
sshd:
image: {{ required "sshd image" .Values.images.sshd | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.login.sshd.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.login.sshd.appArmorProfile | quote }}
port: {{ default 22 .Values.slurmNodes.login.sshd.port }}
resources:
cpu: {{ required ".Values.slurmNodes.login.sshd.resources.cpu must be provided." .Values.slurmNodes.login.sshd.resources.cpu | quote}}
Expand All @@ -189,6 +207,8 @@ spec:
{{- end }}
munge:
image: {{ required "sshd munge" .Values.images.munge | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.login.munge.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.login.munge.appArmorProfile | quote }}
resources:
cpu: {{ required ".Values.slurmNodes.login.munge.resources.cpu must be provided." .Values.slurmNodes.login.munge.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.login.munge.resources.memory must be provided." .Values.slurmNodes.login.munge.resources.memory | quote}}
Expand All @@ -204,8 +224,12 @@ spec:
k8sNodeFilterName: {{ required ".Values.slurmNodes.exporter.k8sNodeFilterName must be provided." .Values.slurmNodes.exporter.k8sNodeFilterName | quote }}
exporter:
image: {{ required "exporter image" .Values.images.exporter | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.exporter.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.exporter.appArmorProfile | quote }}
munge:
image: {{ required "munge image" .Values.images.munge | quote }}
imagePullPolicy: {{ default "IfNotPresent" .Values.slurmNodes.exporter.munge.imagePullPolicy | quote }}
appArmorProfile: {{ default "unconfined" .Values.slurmNodes.exporter.munge.appArmorProfile | quote }}
resources:
cpu: {{ required ".Values.slurmNodes.controller.munge.resources.cpu must be provided." .Values.slurmNodes.controller.munge.resources.cpu | quote}}
memory: {{ required ".Values.slurmNodes.controller.munge.resources.memory must be provided." .Values.slurmNodes.controller.munge.resources.memory | quote}}
Expand Down
Loading

0 comments on commit 3ec7040

Please sign in to comment.