feat(ray): add accelerator and custom resource support (#118)

Because - We are going to support requesting different Nvidia accelerator type or even custom resource This commit - support configuring accelerator type or custom resource for model Resolves INS-3967
instill-ai · Mar 20, 2024 · f974f98 · f974f98
1 parent ad0f250
commit f974f98
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 0 deletions.
diff --git a/instill/helpers/const.py b/instill/helpers/const.py
@@ -120,3 +120,33 @@ class VisualQuestionAnsweringInput:
 }
 
 DEFAULT_DEPENDENCIES = ["protobuf==4.25.3", "grpcio-tools==1.62.0"]
+
+# ray accelerators
+NVIDIA_TESLA_V100 = "V100"
+NVIDIA_TESLA_P100 = "P100"
+NVIDIA_TESLA_T4 = "T4"
+NVIDIA_TESLA_P4 = "P4"
+NVIDIA_TESLA_K80 = "K80"
+NVIDIA_TESLA_A10G = "A10G"
+NVIDIA_L4 = "L4"
+NVIDIA_A100 = "A100"
+INTEL_MAX_1550 = "Intel-GPU-Max-1550"
+INTEL_MAX_1100 = "Intel-GPU-Max-1100"
+INTEL_GAUDI = "Intel-GAUDI"
+AMD_INSTINCT_MI100 = "AMD-Instinct-MI100"
+AMD_INSTINCT_MI250X = "AMD-Instinct-MI250X"
+AMD_INSTINCT_MI250 = "AMD-Instinct-MI250X-MI250"
+AMD_INSTINCT_MI210 = "AMD-Instinct-MI210"
+AMD_INSTINCT_MI300X = "AMD-Instinct-MI300X-OAM"
+AMD_RADEON_R9_200_HD_7900 = "AMD-Radeon-R9-200-HD-7900"
+AMD_RADEON_HD_7900 = "AMD-Radeon-HD-7900"
+AWS_NEURON_CORE = "aws-neuron-core"
+GOOGLE_TPU_V2 = "TPU-V2"
+GOOGLE_TPU_V3 = "TPU-V3"
+GOOGLE_TPU_V4 = "TPU-V4"
+
+# Use these instead of NVIDIA_A100 if you need a specific accelerator size. Note that
+# these labels are not auto-added to nodes, you'll have to add them manually in
+# addition to the default A100 label if needed.
+NVIDIA_A100_40G = "A100-40G"
+NVIDIA_A100_80G = "A100-80G"
diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py
@@ -57,6 +57,22 @@ def update_num_gpus(self, num_gpus: float):
 
         return self
 
+    def update_accelerator_type(self, accelerator_type: str):
+        if self._deployment.ray_actor_options is not None:
+            self._deployment.ray_actor_options.update(
+                {"accelerator_type": accelerator_type}
+            )
+
+        return self
+
+    def update_num_custom_resource(self, resource_name: str, num: float):
+        if self._deployment.ray_actor_options is not None:
+            self._deployment.ray_actor_options.update(
+                {"resources": {resource_name: num}}
+            )
+
+        return self
+
     def _determine_vram_usage(self, model_path: str, total_vram: str):
         warn(
             "determine vram usage base on file size will soon be removed",