-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path01-cluster.yaml
51 lines (48 loc) · 2.74 KB
/
01-cluster.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Cost-Optimized EKS cluster for mining with spot GPU instances
# Built in efforts to reducing costs of ML workloads and increase hash rate.
# This spec creates a cluster on EKS with the following active nodes
# +---------------+-------------------------------------+-------+--------+-----------+-------------------+--------------------+
# | Instance Name | GPUs series | GPUs | vCPUs | RAM (GiB) | GPU Memory (GiB) | Spot Price |
# +-----------+-----------------------------------------+-------+--------+-----------+-------------------+--------------------+
# | g4dn.xlarge | NVIDIA T4 Tensor | 1 | 4 | 16 | | $0.157 per Hour |
# | g2.2xlarge | NVIDIA GRID K520 (Kepler) | 1 | 8 | 64 | | $0.195 per Hour |
# | g3s.xlarge | NVIDIA Tesla M60 | 1 | 4 | 30.5 | 8 | $0.225 per Hour |
# | p2.xlarge | NVIDIA K80 | 1 | 4 | 61 | 12 | $0.270 per Hour |
# | p3.2xlarge | NVIDIA Tesla V100 | 1 | 8 | 61 | 16 | $0.918 per Hour |
# +---------------+-------------------------------------+-------+---------+----------+-------------------+----------------- --+
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: cluster-1 # Name of your cluster, change to whatever you find fit.
region: us-east-1 # choose your region wisely, this will significantly impact the cost incurred
version: "1.18"
availabilityZones: ["us-east-1a", "us-east-1b", "us-east-1c", "us-east-1d"]
vpc:
clusterEndpoints:
privateAccess: false # Nothing is secret on iur cluster
nat:
# NatGateway is an AWS managed instance that permits Internet traffic from instances sitting in a private subnet inside your VPC.
# NAT Gateway resource and that’s billed 24/7
# Disable for reducing costs
gateway: Disable # other options: Disable, Single (default)
managedNodeGroups:
- name: gpu-spot-g4dn-xlarge
spot: true
minSize: 2
maxSize: 3
instanceTypes: ["g4dn.xlarge"]
amiFamily: AmazonLinux2
labels:
workgroup: g4dn-xlarge
lifecycle: Ec2Spot
aws.amazon.com/spot: "true"
gpu-count: "1"
tags:
k8s.io/cluster-autoscaler/node-template/label/lifecycle: Ec2Spot
k8s.io/cluster-autoscaler/node-template/label/aws.amazon.com/spot: "true"
k8s.io/cluster-autoscaler/node-template/label/gpu-count: "1"
k8s.io/cluster-autoscaler/node-template/label/workgroup: g4dn-xlarge
k8s.io/cluster-autoscaler/enabled: "true"
iam:
withAddonPolicies:
autoScaler: true