Skip to content

Commit

Permalink
Merge pull request #389 from nebius/many-small-changes-aggregated/1
Browse files Browse the repository at this point in the history
Pre-create enroot credentials, exclude enroot bind-mounts from motd, make Docker mount /dev/infiniband, store /tmp on disk and add tmpfs /mnt/memory
  • Loading branch information
rdjjke authored Feb 13, 2025
2 parents e1adc1b + 16feb1a commit 1b65493
Show file tree
Hide file tree
Showing 41 changed files with 274 additions and 119 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.17.0
1.18.0
20 changes: 19 additions & 1 deletion api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ type SlurmClusterSpec struct {
// +kubebuilder:validation:Optional
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", epilog: "", prolog: "", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400}
SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"`

// MPIConfig represents the PMIx configuration in mpi.conf. Not all options are supported.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default={pmixEnv: "OMPI_MCA_btl_tcp_if_include=eth0"}
MPIConfig MPIConfig `json:"mpiConfig,omitempty"`

// Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed.
//
// +kubebuilder:default=false
Expand Down Expand Up @@ -143,6 +150,16 @@ type SlurmConfig struct {
MinJobAge *int32 `json:"minJobAge,omitempty"`
}

type MPIConfig struct {
// Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
// Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="OMPI_MCA_btl_tcp_if_include=eth0"
// +kubebuilder:validation:Optional
PMIxEnv string `json:"pmixEnv,omitempty"`
}

type PartitionConfiguration struct {
// ConfigType
// +kubebuilder:validation:Enum=default;custom
Expand Down Expand Up @@ -319,7 +336,8 @@ type NCCLArguments struct {
// +kubebuilder:default="0"
ThresholdMoreThan string `json:"thresholdMoreThan,omitempty"`

// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
// According to NVIDIA these env vars should be used only for debugging.
// https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
//
// +kubebuilder:validation:Optional
Expand Down
16 changes: 16 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 23 additions & 7 deletions config/crd/bases/slurm.nebius.ai_slurmclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,19 @@ spec:
- downscaleAndOverwritePopulateJail
- skipPopulateJail
type: string
mpiConfig:
default:
pmixEnv: OMPI_MCA_btl_tcp_if_include=eth0
description: MPIConfig represents the PMIx configuration in mpi.conf.
Not all options are supported.
properties:
pmixEnv:
default: OMPI_MCA_btl_tcp_if_include=eth0
description: |-
Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
type: string
type: object
ncclSettings:
description: NCCLSettings
properties:
Expand Down Expand Up @@ -1205,7 +1218,8 @@ spec:
useInfiniband:
default: true
description: |-
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
According to NVIDIA these env vars should be used only for debugging.
https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
type: boolean
type: object
Expand Down Expand Up @@ -1487,9 +1501,9 @@ spec:
defCpuPerGPU: 16
defMemPerNode: 1228800
epilog: ""
prolog: ""
maxJobCount: 10000
minJobAge: 86400
prolog: ""
taskPluginParam: ""
description: SlurmConfig represents the Slurm configuration in slurm.conf.
Not all options are supported.
Expand Down Expand Up @@ -1519,11 +1533,8 @@ spec:
type: integer
epilog:
default: ""
description: The Epilog script runs after a job completes
type: string
prolog:
default: ""
description: The Prolog script runs before a job starts on the compute node
description: Defines specific file to run the epilog when job
ends. Default value is no epilog
type: string
maxJobCount:
default: 10000
Expand All @@ -1536,6 +1547,11 @@ spec:
time
format: int32
type: integer
prolog:
default: ""
description: Defines specific file to run the prolog when job
starts. Default value is no prolog
type: string
taskPluginParam:
default: ""
description: Additional parameters for the task plugin
Expand Down
2 changes: 1 addition & 1 deletion config/crd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# It should be run by config/default
resources:
- bases/slurm.nebius.ai_slurmclusters.yaml
- bases/slurm.nebius.ai_nodeconfigurators.yaml
#- bases/slurm.nebius.ai_nodeconfigurators.yaml

#+kubebuilder:scaffold:crdkustomizeresource

Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ resources:
images:
- name: controller
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
newTag: 1.17.0
newTag: 1.18.0
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ spec:
value: "false"
- name: SLURM_OPERATOR_WATCH_NAMESPACES
value: "*"
image: controller:1.17.0
image: controller:1.18.0
imagePullPolicy: Always
name: manager
securityContext:
Expand Down
4 changes: 2 additions & 2 deletions helm/slurm-cluster-storage/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-slurm-cluster-storage
description: A Helm chart for Kubernetes
type: application
version: "1.17.0"
appVersion: "1.17.0"
version: "1.18.0"
appVersion: "1.18.0"
2 changes: 2 additions & 0 deletions helm/slurm-cluster-storage/templates/jail-pvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ kind: PersistentVolumeClaim
metadata:
namespace: {{ .Release.Namespace }}
name: {{ include "slurm-cluster-storage.volume.jail.pvc" . }}
annotations:
k8up.io/backup: 'true'
spec:
storageClassName: {{ include "slurm-cluster-storage.volume.jail.storageClass" . }}
resources:
Expand Down
4 changes: 2 additions & 2 deletions helm/slurm-cluster/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-slurm-cluster
description: A Helm chart for Kubernetes
type: application
version: "1.17.0"
appVersion: "1.17.0"
version: "1.18.0"
appVersion: "1.18.0"
kubeVersion: ">=1.29.0-0"
18 changes: 9 additions & 9 deletions helm/slurm-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -408,14 +408,14 @@ telemetry: {}
# otelCollectorPort: 8429

images:
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.17.0-jammy-slurm24.05.5"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.17.0-jammy-slurm24.05.5"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.17.0-jammy-slurm24.05.5"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.17.0-jammy-slurm24.05.5"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.17.0-jammy-slurm24.05.5"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.17.0-jammy-slurm24.05.5"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.17.0-jammy-slurm24.05.5"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.17.0-jammy-slurm24.05.5"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.17.0-jammy-slurm24.05.5"
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.18.0-jammy-slurm24.05.5"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.18.0-jammy-slurm24.05.5"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.18.0-jammy-slurm24.05.5"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.18.0-jammy-slurm24.05.5"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.18.0-jammy-slurm24.05.5"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.18.0-jammy-slurm24.05.5"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.18.0-jammy-slurm24.05.5"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.18.0-jammy-slurm24.05.5"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.18.0-jammy-slurm24.05.5"
mariaDB: "docker-registry1.mariadb.com/library/mariadb:11.4.3"
rebooter: "cr.eu-north1.nebius.cloud/soperator/rebooter:1.17.0"
4 changes: 2 additions & 2 deletions helm/soperator-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-soperator-crds
description: A Helm chart for Kubernetes
type: application
version: 1.17.0
appVersion: "1.17.0"
version: 1.18.0
appVersion: "1.18.0"
kubeVersion: ">=1.29.0-0"
30 changes: 23 additions & 7 deletions helm/soperator-crds/templates/slurmcluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,19 @@ spec:
- downscaleAndOverwritePopulateJail
- skipPopulateJail
type: string
mpiConfig:
default:
pmixEnv: OMPI_MCA_btl_tcp_if_include=eth0
description: MPIConfig represents the PMIx configuration in mpi.conf.
Not all options are supported.
properties:
pmixEnv:
default: OMPI_MCA_btl_tcp_if_include=eth0
description: |-
Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
type: string
type: object
ncclSettings:
description: NCCLSettings
properties:
Expand Down Expand Up @@ -1204,7 +1217,8 @@ spec:
useInfiniband:
default: true
description: |-
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
According to NVIDIA these env vars should be used only for debugging.
https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
type: boolean
type: object
Expand Down Expand Up @@ -1486,9 +1500,9 @@ spec:
defCpuPerGPU: 16
defMemPerNode: 1228800
epilog: ""
prolog: ""
maxJobCount: 10000
minJobAge: 86400
prolog: ""
taskPluginParam: ""
description: SlurmConfig represents the Slurm configuration in slurm.conf.
Not all options are supported.
Expand Down Expand Up @@ -1518,11 +1532,8 @@ spec:
type: integer
epilog:
default: ""
description: The Epilog script runs after a job completes
type: string
prolog:
default: ""
description: The Prolog script runs before a job starts on the compute node
description: Defines specific file to run the epilog when job
ends. Default value is no epilog
type: string
maxJobCount:
default: 10000
Expand All @@ -1535,6 +1546,11 @@ spec:
time
format: int32
type: integer
prolog:
default: ""
description: Defines specific file to run the prolog when job
starts. Default value is no prolog
type: string
taskPluginParam:
default: ""
description: Additional parameters for the task plugin
Expand Down
4 changes: 2 additions & 2 deletions helm/soperator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-soperator
description: A Helm chart for Kubernetes
type: application
version: 1.17.0
appVersion: "1.17.0"
version: 1.18.0
appVersion: "1.18.0"
kubeVersion: ">=1.29.0-0"
30 changes: 23 additions & 7 deletions helm/soperator/crds/slurmcluster-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,19 @@ spec:
- downscaleAndOverwritePopulateJail
- skipPopulateJail
type: string
mpiConfig:
default:
pmixEnv: OMPI_MCA_btl_tcp_if_include=eth0
description: MPIConfig represents the PMIx configuration in mpi.conf.
Not all options are supported.
properties:
pmixEnv:
default: OMPI_MCA_btl_tcp_if_include=eth0
description: |-
Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
type: string
type: object
ncclSettings:
description: NCCLSettings
properties:
Expand Down Expand Up @@ -1204,7 +1217,8 @@ spec:
useInfiniband:
default: true
description: |-
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
According to NVIDIA these env vars should be used only for debugging.
https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
type: boolean
type: object
Expand Down Expand Up @@ -1486,9 +1500,9 @@ spec:
defCpuPerGPU: 16
defMemPerNode: 1228800
epilog: ""
prolog: ""
maxJobCount: 10000
minJobAge: 86400
prolog: ""
taskPluginParam: ""
description: SlurmConfig represents the Slurm configuration in slurm.conf.
Not all options are supported.
Expand Down Expand Up @@ -1518,11 +1532,8 @@ spec:
type: integer
epilog:
default: ""
description: The Epilog script runs after a job completes
type: string
prolog:
default: ""
description: The Prolog script runs before a job starts on the compute node
description: Defines specific file to run the epilog when job
ends. Default value is no epilog
type: string
maxJobCount:
default: 10000
Expand All @@ -1535,6 +1546,11 @@ spec:
time
format: int32
type: integer
prolog:
default: ""
description: Defines specific file to run the prolog when job
starts. Default value is no prolog
type: string
taskPluginParam:
default: ""
description: Additional parameters for the task plugin
Expand Down
2 changes: 1 addition & 1 deletion helm/soperator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ controllerManager:
slurmOperatorWatchNamespaces: '*'
image:
repository: cr.eu-north1.nebius.cloud/soperator/slurm-operator
tag: 1.17.0
tag: 1.18.0
imagePullPolicy: Always
resources:
limits:
Expand Down
Loading

0 comments on commit 1b65493

Please sign in to comment.