Skip to content

Commit

Permalink
Update example configs and add comments to the config schema (#28)
Browse files Browse the repository at this point in the history
Resolves [Feature #25](#25)
  • Loading branch information
cartalla authored May 23, 2022
1 parent d91aaba commit e880fc8
Show file tree
Hide file tree
Showing 17 changed files with 481 additions and 213 deletions.
4 changes: 2 additions & 2 deletions source/cdk/cdk_slurm_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def get_config(self, context_var, default_path):
from schema import SchemaError
region = self.node.try_get_context('region')
try:
config_parameters = check_schema(config_parameters, [region])
config_parameters = check_schema(config_parameters)
except SchemaError:
logger.exception(f"Invalid config file: {config_file_path}")
exit(1)
Expand Down Expand Up @@ -378,7 +378,7 @@ def check_config(self):
from config_schema import check_schema
from schema import SchemaError
try:
validated_config = check_schema(self.config, [self.config['Region']])
validated_config = check_schema(self.config)
except SchemaError:
logger.exception(f"Invalid config")
exit(1)
Expand Down
253 changes: 217 additions & 36 deletions source/cdk/config_schema.py

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions source/resources/config/default_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
#====================================================================
# Sample configuraton that creates a minimal Slurm cluster
#
# Shows all available configuration options
# Note that CentOS 8 has been discontinued and support has been removed.
# Uses arm64 architecture for SlurmCtl and SlurmDbd by default.
# No SlurmDbd in this configuration.
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmminimal
Expand Down
7 changes: 6 additions & 1 deletion source/resources/config/slurm_all_instance_types.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
---
#====================================================================
# Create a minimal cluster with all instance types
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmalltypes

Expand Down Expand Up @@ -41,4 +46,4 @@ slurm:

# Use defaults from schema
SlurmCtl: {}
storage: {'efs': {}}
storage: {'zfs': {}}
8 changes: 7 additions & 1 deletion source/resources/config/slurm_all_os.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
---
#====================================================================
# Slurm cluster with all supported OS distributions and versions.
#
# Note that CentOS 8 has been discontinued and support has been removed.
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmallos

Expand Down Expand Up @@ -34,4 +40,4 @@ slurm:

# Use defaults from schema
SlurmCtl: {}
storage: {'efs': {}}
storage: {'zfs': {}}
8 changes: 7 additions & 1 deletion source/resources/config/slurm_alma_linux.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
---
#====================================================================
# Slurm cluster to test Alma Linux support
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================


StackName: slurmalma

Expand All @@ -26,4 +32,4 @@ slurm:

# Use defaults from schema
SlurmCtl: {}
storage: {'efs': {}}
storage: {'zfs': {}}
40 changes: 22 additions & 18 deletions source/resources/config/slurm_eda.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
---
#====================================================================
# Slurm cluster for EDA
#
# Redundant controllers and typical instances used by EDA.
# Uses CentOS 7 and AlmaLinux 8 and both x86_64 and arm64 architectures.
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmeda

slurm:
# High level configuration

SlurmVersion: "21.08.5"

SlurmCtl:
# For high availability configure multiple controllers
NumberOfControllers: 2

# The accounting database is required to enable fairshare scheduling
# It is managed by the Slurm Database Daemon (slurmdbd) instance
# This instance can be created as part of the cluster or can use an existing instance.
SlurmDbd: {}

# InstanceConfig:
# Configure the instances used by the cluster
# Configure typical EDA instance types
# A partition will be created for each combination of Base OS, Architecture, and Spot
InstanceConfig:
UseSpot: true
Expand All @@ -33,34 +33,38 @@ slurm:
Include:
MaxSizeOnly: false
InstanceFamilies:
- 'c5' # Mixed depending on size
#- 'c5' # Mixed depending on size
#- 'c5a' # AMD EPYC 7R32 3.3 GHz
#- 'c5ad' # AMD EPYC 7R32 3.3 GHz
#- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6a' # AMD EPYC 7R13 Processor 3.6 GHz
- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6g' # AWS Graviton2 Processor 2.5 GHz
#- 'c6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'f1' # Intel Xeon E5-2686 v4 (Broadwell) 2.3 GHz
- 'm5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5a' # AMD EPYC 7571 2.5 GHz
#- 'm5ad' # AMD EPYC 7571 2.5 GHz
#- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
#- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
#- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm6g' # AWS Graviton2 Processor 2.5 GHz
#- 'm6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'r5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5b' # Intel Xeon Platinum 8259 (Cascade Lake) 3.1 GHz
#- 'r5a' # AMD EPYC 7571 2.5 GHz
#- 'r5ad' # AMD EPYC 7571 2.5 GHz
#- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
#- 'r6g' # AWS Graviton2 Processor 2.5 GHz
- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
- 'r6g' # AWS Graviton2 Processor 2.5 GHz
#- 'r6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'x1' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 2TB
#- 'x1e' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 4TB
#- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
#- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
- 'x2idn' # Intel Xeon Scalable (Icelake) 3.5 GHz 2 TB
- 'x2iedn' # Intel Xeon Scalable (Icelake) 3.5 GHz 4 TB
- 'x2iezn' # Intel Xeon Platinum 8252 4.5 GHz 1.5 TB
- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
#- 'u-6tb1' # Intel Xeon Scalable (Skylake) 6 TB
#- 'u-9tb1' # Intel Xeon Scalable (Skylake) 9 TB
#- 'u-12tb1' # Intel Xeon Scalable (Skylake) 12 TB
Expand All @@ -75,4 +79,4 @@ slurm:
- '.*\.metal'

# Use defaults from schema
storage: {'efs': {}}
storage: {'zfs': {}}
43 changes: 26 additions & 17 deletions source/resources/config/slurm_eda_az1.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
---
#====================================================================
# Federated Slurm cluster for EDA
#
# This is the first AZ that other AZs will reference.
# Other federated clusters will share this cluster's SlurmDbd instance.
# Redundant controllers and typical instances used by EDA.
# Uses CentOS 7 and AlmaLinux 8 and both x86_64 and arm64 architectures.
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmedaaz1

#SubnetId: subnet-xxxxxxxxxxxxxxxxx # PrivateSubnet1
# Add your subnet id
SubnetId: subnet-xxxxxxxxxxxxxxxxx # PrivateSubnet1

slurm:
#MungeKeySsmParameter: "/slurm/munge_key"
# This is optional, but highly recommended
#ErrorSnsTopicArn: arn:aws:sns:{{region}}:{AccountId}:{TopicName}

slurm:
SlurmCtl:
# For high availability configure multiple controllers
NumberOfControllers: 2

# The accounting database is required to enable fairshare scheduling
# It is managed by the Slurm Database Daemon (slurmdbd) instance
# This instance can be created as part of the cluster or can use an existing instance.
SlurmDbd: {}

Federation:
Name: slurmeda
FederatedClusterStackNames: []

# InstanceConfig:
# Configure the instances used by the cluster
# Configure typical EDA instance types
# A partition will be created for each combination of Base OS, Architecture, and Spot
InstanceConfig:
UseSpot: true
Expand All @@ -40,34 +45,38 @@ slurm:
Include:
MaxSizeOnly: false
InstanceFamilies:
- 'c5' # Mixed depending on size
#- 'c5' # Mixed depending on size
#- 'c5a' # AMD EPYC 7R32 3.3 GHz
#- 'c5ad' # AMD EPYC 7R32 3.3 GHz
#- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6a' # AMD EPYC 7R13 Processor 3.6 GHz
- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6g' # AWS Graviton2 Processor 2.5 GHz
#- 'c6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'f1' # Intel Xeon E5-2686 v4 (Broadwell) 2.3 GHz
- 'm5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5a' # AMD EPYC 7571 2.5 GHz
#- 'm5ad' # AMD EPYC 7571 2.5 GHz
#- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
#- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
#- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm6g' # AWS Graviton2 Processor 2.5 GHz
#- 'm6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'r5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5b' # Intel Xeon Platinum 8259 (Cascade Lake) 3.1 GHz
#- 'r5a' # AMD EPYC 7571 2.5 GHz
#- 'r5ad' # AMD EPYC 7571 2.5 GHz
#- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
#- 'r6g' # AWS Graviton2 Processor 2.5 GHz
- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
- 'r6g' # AWS Graviton2 Processor 2.5 GHz
#- 'r6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'x1' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 2TB
#- 'x1e' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 4TB
#- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
#- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
- 'x2idn' # Intel Xeon Scalable (Icelake) 3.5 GHz 2 TB
- 'x2iedn' # Intel Xeon Scalable (Icelake) 3.5 GHz 4 TB
- 'x2iezn' # Intel Xeon Platinum 8252 4.5 GHz 1.5 TB
- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
#- 'u-6tb1' # Intel Xeon Scalable (Skylake) 6 TB
#- 'u-9tb1' # Intel Xeon Scalable (Skylake) 9 TB
#- 'u-12tb1' # Intel Xeon Scalable (Skylake) 12 TB
Expand All @@ -82,4 +91,4 @@ slurm:
- '.*\.metal'

# Use defaults from schema
storage: {'efs': {}}
storage: {'zfs': {}}
56 changes: 30 additions & 26 deletions source/resources/config/slurm_eda_az2.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
---
#====================================================================
# Federated Slurm cluster for EDA
#
# This is the 2nd AZ that must be created after the 1st cluster.
# Shares the SlurmDbd instance from the 1st AZ's cluster.
# Redundant controllers and typical instances used by EDA.
# Uses CentOS 7 and AlmaLinux 8 and both x86_64 and arm64 architectures.
#
# Defaults and valid configuration options are in source/config_schema.py.
# Command line values override values in the config file.
#====================================================================

StackName: slurmedaaz2

#SubnetId: subnet-xxxxxxxxxxxxxxxxx # PrivateSubnet2
# Add your subnet id
SubnetId: subnet-xxxxxxxxxxxxxxxxx # PrivateSubnet2

# This is optional, but highly recommended
#ErrorSnsTopicArn: arn:aws:sns:{{region}}:{AccountId}:{TopicName}

slurm:
#MungeKeySsmParameter: "/slurm/munge_key"

SlurmCtl:
# For high availability configure multiple controllers
NumberOfControllers: 2

# The accounting database is required to enable fairshare scheduling
# It is managed by the Slurm Database Daemon (slurmdbd) instance
# This instance can be created as part of the cluster or can use an existing instance.
# Re-use the SlurmDbd instance from slurmedaaz1
ExistingSlurmDbd:
StackName: slurmedaaz1

Expand All @@ -31,8 +33,7 @@ slurm:
FederatedClusterStackNames:
- slurmedaaz1

# InstanceConfig:
# Configure the instances used by the cluster
# Configure typical EDA instance types
# A partition will be created for each combination of Base OS, Architecture, and Spot
InstanceConfig:
UseSpot: true
Expand All @@ -45,40 +46,43 @@ slurm:
Include:
MaxSizeOnly: false
InstanceFamilies:
- 'c5' # Mixed depending on size
#- 'c5a' # AMD EPYC 7R32 3.3 GHz
#- 'c5' # Mixed depending on size
- 'c5a' # AMD EPYC 7R32 3.3 GHz
#- 'c5ad' # AMD EPYC 7R32 3.3 GHz
#- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'c6g' # AWS Graviton2 Processor 2.5 GHz
#- 'c6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'f1' # Intel Xeon E5-2686 v4 (Broadwell) 2.3 GHz
- 'm5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'm5a' # AMD EPYC 7571 2.5 GHz
- 'm5a' # AMD EPYC 7571 2.5 GHz
#- 'm5ad' # AMD EPYC 7571 2.5 GHz
#- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
#- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
#- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm5zn' # Intel Xeon Platinum 8252 4.5 GHz
- 'm6a' # AMD EPYC 7R13 Processor 3.6 GHz
- 'm6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz
- 'm6g' # AWS Graviton2 Processor 2.5 GHz
#- 'm6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'r5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
- 'r5' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
- 'r5d' # Intel Xeon Platinum 8175 (Skylake) 3.1 GHz
#- 'r5b' # Intel Xeon Platinum 8259 (Cascade Lake) 3.1 GHz
#- 'r5a' # AMD EPYC 7571 2.5 GHz
#- 'r5ad' # AMD EPYC 7571 2.5 GHz
#- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
#- 'r6g' # AWS Graviton2 Processor 2.5 GHz
- 'r5a' # AMD EPYC 7571 2.5 GHz
- 'r5ad' # AMD EPYC 7571 2.5 GHz
- 'r6i' # Intel Xeon 8375C (Ice Lake) 3.5 GHz 1TB
- 'r6g' # AWS Graviton2 Processor 2.5 GHz
#- 'r6gd' # AWS Graviton2 Processor 2.5 GHz
#- 'x1' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 2TB
#- 'x1e' # High Frequency Intel Xeon E7-8880 v3 (Haswell) 2.3 GHz 4TB
#- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
#- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
- 'x2gd' # AWS Graviton2 Processor 2.5 GHz 1TB
- 'x2idn' # Intel Xeon Scalable (Icelake) 3.5 GHz 2 TB
- 'x2iedn' # Intel Xeon Scalable (Icelake) 3.5 GHz 4 TB
- 'x2iezn' # Intel Xeon Platinum 8252 4.5 GHz 1.5 TB
- 'z1d' # Intel Xeon Platinum 8151 4.0 GHz
#- 'u-6tb1' # Intel Xeon Scalable (Skylake) 6 TB
#- 'u-9tb1' # Intel Xeon Scalable (Skylake) 9 TB
#- 'u-12tb1' # Intel Xeon Scalable (Skylake) 12 TB
InstanceTypes: []
InstanceTypes:
#- 'c5\.(l|x|2|4|9|18).*' # Intel Xeon Platinum 8124M 3.4 GHz
#- 'c5\.(12|24).*' # Intel Xeon Platinum 8275L 3.6 GHz
- 'c5\.(12|24).*' # Intel Xeon Platinum 8275L 3.6 GHz
#- 'c5d\.(l|x|2|4|9|18).*' # Intel Xeon Platinum 8124M 3.4 GHz
#- 'c5d\.(12|24).*' # Intel Xeon Platinum 8275L 3.6 GHz
Exclude:
Expand All @@ -87,4 +91,4 @@ slurm:
- '.*\.metal'

# Use defaults from schema
storage: {'efs': {}}
storage: {'zfs': {}}
Loading

0 comments on commit e880fc8

Please sign in to comment.