Skip to content

Commit

Permalink
Remove Lustre file system support (#29)
Browse files Browse the repository at this point in the history
With the addition of zfs and ontap, this is really not a good option for
the slurm file system.
  • Loading branch information
cartalla authored May 23, 2022
1 parent e880fc8 commit 503325d
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 67 deletions.
46 changes: 1 addition & 45 deletions source/cdk/cdk_slurm_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ def create_security_groups(self):
Tags.of(self.zfs_sg).add("Name", f"{self.stack_name}-ZfsSG")
self.suppress_cfn_nag(self.zfs_sg, 'W29', 'Egress port range used to block all egress')

# Compute nodes may use lustre file systems to create a security group with the required ports.
self.lustre_sg = ec2.SecurityGroup(self, "LustreSG", vpc=self.vpc, allow_all_outbound=False, description="Lustre Security Group")
Tags.of(self.lustre_sg).add("Name", f"{self.stack_name}-LustreSG")
self.suppress_cfn_nag(self.lustre_sg, 'W29', 'Egress port range used to block all egress')
Expand Down Expand Up @@ -887,51 +888,6 @@ def create_file_system(self):

self.file_system_mount_command = f"sudo mkdir -p {self.config['slurm']['storage']['mount_path']} && sudo yum -y install nfs-utils && sudo mount -t {self.file_system_type} -o {self.file_system_options} {self.file_system_mount_src} {self.config['slurm']['storage']['mount_path']}"

elif self.config['slurm']['storage']['provider'] == "lustre":
deployment_types = {
'PERSISTENT_1': fsx.LustreDeploymentType.PERSISTENT_1,
'SCRATCH_1': fsx.LustreDeploymentType.SCRATCH_1,
'SCRATCH_2': fsx.LustreDeploymentType.SCRATCH_2,
}
deployment_type = deployment_types[self.config['slurm']['storage']['lustre']['deployment_type']]

if deployment_type == fsx.LustreDeploymentType.PERSISTENT_1:
per_unit_storage_throughput = self.config['slurm']['storage']['lustre']['per_unit_storage_throughput']
if per_unit_storage_throughput not in [50, 100, 200]:
raise ValueError(f"Invalid per_unit_storage_throughput: {per_unit_storage_throughput}")
else:
per_unit_storage_throughput = None

lustre_configuration = fsx.LustreConfiguration(
deployment_type = deployment_type,
per_unit_storage_throughput = per_unit_storage_throughput,
)

self.file_system = fsx.LustreFileSystem(
self, "FSxLustre",
lustre_configuration = lustre_configuration,
vpc = self.vpc,
vpc_subnet = self.vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE).subnets[0],
kms_key = kms_key,
removal_policy = removal_policies[self.config['slurm']['storage']['removal_policy']],
security_group = self.lustre_sg,
storage_capacity_gib = self.config['slurm']['storage']['lustre']['storage_capacity'],
)

self.file_system_dependency = self.file_system

self.file_system_port = 988

self.file_system_type = 'lustre'
self.file_system_dns = self.file_system.dns_name
self.file_system_mount_name = self.file_system.mount_name

self.file_system_mount_source = f"{self.file_system_dns}@tcp:/{self.file_system_mount_name}"

self.file_system_options = 'noatime,flock'

self.file_system_mount_command = f"sudo mkdir -p {self.config['slurm']['storage']['mount_path']} && sudo mount -t lustre -o {self.file_system_options} {self.file_system_mount_source} {self.config['slurm']['storage']['mount_path']}"

elif self.config['slurm']['storage']['provider'] == "ontap":
if 'iops' in self.config['slurm']['storage']['ontap']:
disk_iops_configuration = fsx.CfnFileSystem.DiskIopsConfigurationProperty(
Expand Down
23 changes: 1 addition & 22 deletions source/cdk/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@
# mount_path:
# Default is /opt/slurm/{{cluster_name}}
Optional('mount_path'): str,
Optional('provider', default='efs'): And(str, lambda s: s in ('efs', 'lustre', 'ontap', 'zfs')),
Optional('provider', default='efs'): And(str, lambda s: s in ('efs', 'ontap', 'zfs')),
#
# removal_policy:
# RETAIN will preserve the EFS even if you delete the stack.
Expand All @@ -343,27 +343,6 @@
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-efs-filesystem.html#cfn-efs-filesystem-encrypted
Optional('encrypted', default=True): bool,
},
Optional('lustre'): {
# deployment_type
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-fsx-filesystem-lustreconfiguration.html#cfn-fsx-filesystem-lustreconfiguration-deploymenttype
Optional('deployment_type', default='SCRATCH_2'): And(str, lambda s: s in ('PERSISTENT_1', 'SCRATCH_1', 'SCRATCH_2')),
#
# drive_cache_type
# Required when storage_type is HDD. https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-fsx-filesystem-lustreconfiguration.html#cfn-fsx-filesystem-lustreconfiguration-drivecachetype
Optional('drive_cache_type', default='NONE'): And(str, lambda s: s in ('NONE', 'READ')),
#
# per_unit_storage_throughput
# Required for the PERSISTENT_1 deployment_type. https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-fsx-filesystem-lustreconfiguration.html#cfn-fsx-filesystem-lustreconfiguration-perunitstoragethroughput
Optional('per_unit_storage_throughput', default=50): int,
#
# storage_capacity
# For SCRATCH_2 and PERSISTENT_1 types, valid values are 1,200, 2,400, then continuing in increments of 2,400 GiB. For SCRATCH_1 deployment types, valid values are 1,200, 2,400, 3,600, then continuing in increments of 3,600 GiB. https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-fsx-filesystem.html#cfn-fsx-filesystem-storagecapacity
Optional('storage_capacity', default=1200): int,
#
# storage_type
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-fsx-filesystem.html#cfn-fsx-filesystem-storagety
Optional('storage_type'): And(str, lambda s: s in ('HDD', 'SSD')),
},
Optional('ontap'): {
Optional('deployment_type', default='SINGLE_AZ_1'): And(str, lambda s: s in ('SINGLE_AZ_1', 'MULTI_AZ_1')),
Optional('storage_capacity', default=1024): And(int, lambda s: s >= 1024 and s <= 196608), # 1024 GiB up to 196,608 GiB (192 TiB)
Expand Down

0 comments on commit 503325d

Please sign in to comment.