Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(ray)!: retire non-decorator deploy and update scaling config #67

Merged
merged 1 commit into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions instill/helpers/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,21 @@


DEFAULT_RAY_ACTOR_OPRTIONS = {
"num_cpus": 1,
"num_cpus": 2,
}
DEFAULT_AUTOSCALING_CONFIG = {
"target_num_ongoing_requests_per_replica": 7,
"target_num_ongoing_requests_per_replica": 10,
"initial_replicas": 1,
"min_replicas": 0,
"max_replicas": 5,
"upscale_delay_s": 4,
"downscale_delay_s": 60,
"metrics_interval_s": 2,
"look_pack_period_s": 10,
}
DEFAULT_RUNTIME_ENV = {
"env_vars": {
"PYTHONPATH": os.getcwd(),
},
}
DEFAULT_MAX_CONCURRENT_QUERIES = 10
DEFAULT_MAX_CONCURRENT_QUERIES = 15

Check warning on line 92 in instill/helpers/const.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/const.py#L92

Added line #L92 was not covered by tests
114 changes: 28 additions & 86 deletions instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
from typing import Callable, Optional

import ray
Expand All @@ -14,97 +13,44 @@
)


class InstillRayModelConfig:
def __init__(
self,
ray_actor_options: dict,
ray_autoscaling_options: dict,
max_concurrent_queries: int,
og_model_path: str,
ray_addr: str,
) -> None:
self.ray_addr = ray_addr

og_model_string_parts = og_model_path.split("/")

self.ray_actor_options = ray_actor_options
self.ray_autoscaling_options = ray_autoscaling_options
self.max_concurrent_queries = max_concurrent_queries

self.model_path = og_model_path
self.application_name = og_model_string_parts[5]
self.model_name = "_".join(og_model_string_parts[3].split("#")[:2])
self.route_prefix = (
f'/{self.model_name}/{og_model_string_parts[3].split("#")[3]}'
)


def get_compose_ray_address(port: int):
return f"ray://ray_server:{port}"


def entry(model_weight_name_or_folder: str):
parser = argparse.ArgumentParser()

ray_actor_options = {
"num_cpus": 1,
}
max_concurrent_queries = 10
ray_autoscaling_options = {
"target_num_ongoing_requests_per_replica": 7,
"initial_replicas": 1,
"min_replicas": 0,
"max_replicas": 5,
}

parser.add_argument(
"--func", required=True, choices=["deploy", "undeploy"], help="deploy/undeploy"
)
parser.add_argument("--model", required=True, help="model path for the deployment")
parser.add_argument(
"--ray-addr", default=get_compose_ray_address(10001), help="ray head address"
)
parser.add_argument(
"--ray-actor-options",
default=ray_actor_options,
help="custom actor options for the deployment",
)
parser.add_argument(
"--ray-autoscaling-options",
default=ray_autoscaling_options,
help="custom autoscaling options for the deployment",
)
args = parser.parse_args()

ray_addr = "ray://" + args.ray_addr.replace("9000", "10001")

model_config = InstillRayModelConfig(
ray_addr=ray_addr,
ray_actor_options=args.ray_actor_options,
ray_autoscaling_options=args.ray_autoscaling_options,
max_concurrent_queries=max_concurrent_queries,
og_model_path="/".join([args.model, model_weight_name_or_folder]),
)

return args.func, model_config


class InstillDeployable:
def __init__(
self, deployable: Deployment, model_weight_or_folder_name: str
self,
deployable: Deployment,
model_weight_or_folder_name: str,
use_gpu: bool,
) -> None:
self._deployment: Deployment = deployable
# params
self.model_weight_or_folder_name: str = model_weight_or_folder_name
if use_gpu:
self._update_num_cpus(1)
self._update_num_gpus(0.25)

Check warning on line 28 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L27-L28

Added lines #L27 - L28 were not covered by tests
else:
self._update_num_cpus(2)

Check warning on line 30 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L30

Added line #L30 was not covered by tests

def update_num_cpus(self, num_cpus: float):
def _update_num_cpus(self, num_cpus: float):

Check warning on line 32 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L32

Added line #L32 was not covered by tests
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_cpus": num_cpus})

def update_num_gpus(self, num_gpus: float):
def _update_num_gpus(self, num_gpus: float):

Check warning on line 36 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L36

Added line #L36 was not covered by tests
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_gpus": num_gpus})

def update_min_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["min_replicas"] = num_replicas
self._deployment = self._deployment.options(

Check warning on line 43 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L40-L43

Added lines #L40 - L43 were not covered by tests
autoscaling_config=new_autoscaling_config
)

def update_max_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["max_replicas"] = num_replicas
self._deployment = self._deployment.options(

Check warning on line 50 in instill/helpers/ray_config.py

View check run for this annotation

Codecov / codecov/patch

instill/helpers/ray_config.py#L47-L50

Added lines #L47 - L50 were not covered by tests
autoscaling_config=new_autoscaling_config
)

def deploy(self, model_folder_path: str, ray_addr: str):
if not ray.is_initialized():
ray_addr = "ray://" + ray_addr.replace("9000", "10001")
Expand Down Expand Up @@ -138,13 +84,9 @@
def instill_deployment(
_func_or_class: Optional[Callable] = None,
) -> Callable[[Callable], InstillDeployable]:
ray_actor_options = DEFAULT_RAY_ACTOR_OPRTIONS
autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
max_concurrent_queries = DEFAULT_MAX_CONCURRENT_QUERIES

return ray_deployment(
_func_or_class=_func_or_class,
ray_actor_options=ray_actor_options,
autoscaling_config=autoscaling_config,
max_concurrent_queries=max_concurrent_queries,
ray_actor_options=DEFAULT_RAY_ACTOR_OPRTIONS,
autoscaling_config=DEFAULT_AUTOSCALING_CONFIG,
max_concurrent_queries=DEFAULT_MAX_CONCURRENT_QUERIES,
)