Skip to content

Commit

Permalink
feat: add aws_g6e instances (#969)
Browse files Browse the repository at this point in the history
Co-authored-by: Alexander Jipa <[email protected]>
  • Loading branch information
clumsy and azzhipa authored Nov 26, 2024
1 parent 53933e3 commit 26cb186
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 0 deletions.
82 changes: 82 additions & 0 deletions torchx/specs/named_resources_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,80 @@ def aws_g5_48xlarge() -> Resource:
)


def aws_g6e_xlarge() -> Resource:
return Resource(
cpu=4,
gpu=1,
memMB=32 * GiB,
capabilities={K8S_ITYPE: "g6e.xlarge"},
)


def aws_g6e_2xlarge() -> Resource:
return Resource(
cpu=8,
gpu=1,
memMB=64 * GiB,
capabilities={K8S_ITYPE: "g6e.2xlarge"},
)


def aws_g6e_4xlarge() -> Resource:
return Resource(
cpu=16,
gpu=1,
memMB=128 * GiB,
capabilities={K8S_ITYPE: "g6e.4xlarge"},
)


def aws_g6e_8xlarge() -> Resource:
return Resource(
cpu=32,
gpu=1,
memMB=256 * GiB,
capabilities={K8S_ITYPE: "g6e.8xlarge"},
)


def aws_g6e_16xlarge() -> Resource:
return Resource(
cpu=64,
gpu=1,
memMB=512 * GiB,
capabilities={K8S_ITYPE: "g6e.16xlarge"},
)


def aws_g6e_12xlarge() -> Resource:
return Resource(
cpu=48,
gpu=4,
memMB=384 * GiB,
capabilities={K8S_ITYPE: "g6e.12xlarge"},
)


def aws_g6e_24xlarge() -> Resource:
return Resource(
cpu=96,
gpu=4,
memMB=768 * GiB,
capabilities={K8S_ITYPE: "g6e.24xlarge"},
devices={EFA_DEVICE: 2},
)


def aws_g6e_48xlarge() -> Resource:
return Resource(
cpu=192,
gpu=8,
memMB=1536 * GiB,
capabilities={K8S_ITYPE: "g6e.48xlarge"},
devices={EFA_DEVICE: 4},
)


def aws_trn1_2xlarge() -> Resource:
return Resource(
cpu=8,
Expand Down Expand Up @@ -299,6 +373,14 @@ def aws_trn1_32xlarge() -> Resource:
"aws_g5.12xlarge": aws_g5_12xlarge,
"aws_g5.24xlarge": aws_g5_24xlarge,
"aws_g5.48xlarge": aws_g5_48xlarge,
"aws_g6e.xlarge": aws_g6e_xlarge,
"aws_g6e.2xlarge": aws_g6e_2xlarge,
"aws_g6e.4xlarge": aws_g6e_4xlarge,
"aws_g6e.8xlarge": aws_g6e_8xlarge,
"aws_g6e.16xlarge": aws_g6e_16xlarge,
"aws_g6e.12xlarge": aws_g6e_12xlarge,
"aws_g6e.24xlarge": aws_g6e_24xlarge,
"aws_g6e.48xlarge": aws_g6e_48xlarge,
"aws_trn1.2xlarge": aws_trn1_2xlarge,
"aws_trn1.32xlarge": aws_trn1_32xlarge,
}
52 changes: 52 additions & 0 deletions torchx/specs/test/named_resources_aws_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
aws_g5_4xlarge,
aws_g5_8xlarge,
aws_g5_xlarge,
aws_g6e_12xlarge,
aws_g6e_16xlarge,
aws_g6e_24xlarge,
aws_g6e_2xlarge,
aws_g6e_48xlarge,
aws_g6e_4xlarge,
aws_g6e_8xlarge,
aws_g6e_xlarge,
aws_m5_2xlarge,
aws_p3_16xlarge,
aws_p3_2xlarge,
Expand Down Expand Up @@ -87,6 +95,50 @@ def test_aws_p5(self) -> None:
self.assertEqual(2048 * GiB, p5.memMB)
self.assertEqual({EFA_DEVICE: 32}, p5.devices)

def test_aws_g6e(self) -> None:
g6e = aws_g6e_xlarge()
g6e_2 = aws_g6e_2xlarge()
g6e_4 = aws_g6e_4xlarge()
g6e_8 = aws_g6e_8xlarge()
g6e_16 = aws_g6e_16xlarge()
g6e_12 = aws_g6e_12xlarge()
g6e_24 = aws_g6e_24xlarge()
g6e_48 = aws_g6e_48xlarge()

self.assertEqual(4, g6e.cpu)
self.assertEqual(1, g6e.gpu)
self.assertEqual(32 * GiB, g6e.memMB)

self.assertEqual(8, g6e_2.cpu)
self.assertEqual(1, g6e_2.gpu)
self.assertEqual(64 * GiB, g6e_2.memMB)

self.assertEqual(16, g6e_4.cpu)
self.assertEqual(1, g6e_4.gpu)
self.assertEqual(128 * GiB, g6e_4.memMB)

self.assertEqual(32, g6e_8.cpu)
self.assertEqual(1, g6e_8.gpu)
self.assertEqual(256 * GiB, g6e_8.memMB)

self.assertEqual(64, g6e_16.cpu)
self.assertEqual(1, g6e_16.gpu)
self.assertEqual(512 * GiB, g6e_16.memMB)

self.assertEqual(48, g6e_12.cpu)
self.assertEqual(4, g6e_12.gpu)
self.assertEqual(384 * GiB, g6e_12.memMB)

self.assertEqual(96, g6e_24.cpu)
self.assertEqual(4, g6e_24.gpu)
self.assertEqual(768 * GiB, g6e_24.memMB)
self.assertEqual({EFA_DEVICE: 2}, g6e_24.devices)

self.assertEqual(192, g6e_48.cpu)
self.assertEqual(8, g6e_48.gpu)
self.assertEqual(1536 * GiB, g6e_48.memMB)
self.assertEqual({EFA_DEVICE: 4}, g6e_48.devices)

def test_aws_g4dn(self) -> None:
g4d = aws_g4dn_xlarge()
self.assertEqual(4, g4d.cpu)
Expand Down

0 comments on commit 26cb186

Please sign in to comment.