Skip to content

Commit

Permalink
add azure fetcher & tiered price
Browse files Browse the repository at this point in the history
  • Loading branch information
cblmemo committed Mar 2, 2023
1 parent 0ab0f89 commit de8d21d
Show file tree
Hide file tree
Showing 15 changed files with 233 additions and 139 deletions.
26 changes: 14 additions & 12 deletions sky/clouds/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,20 @@ def get_zone_shell_cmd(cls) -> Optional[str]:

#### Normal methods ####

def instance_type_to_hourly_cost(self,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_hourly_cost(instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='aws')
def instance_type_to_cost(self,
time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_cost(time_in_hour,
instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='aws')

def accelerators_to_hourly_cost(self,
accelerators: Dict[str, int],
Expand Down
26 changes: 14 additions & 12 deletions sky/clouds/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,20 @@ def _cloud_unsupported_features(
def _max_cluster_name_length(cls) -> int:
return cls._MAX_CLUSTER_NAME_LEN_LIMIT

def instance_type_to_hourly_cost(self,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_hourly_cost(instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='azure')
def instance_type_to_cost(self,
time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_cost(time_in_hour,
instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='azure')

def accelerators_to_hourly_cost(self,
accelerators: Dict[str, int],
Expand Down
7 changes: 4 additions & 3 deletions sky/clouds/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ def get_zone_shell_cmd(cls) -> Optional[str]:

#### Normal methods ####

def instance_type_to_hourly_cost(self, instance_type: str, disk_size: int,
use_spot: bool, region: Optional[str],
zone: Optional[str]) -> float:
def instance_type_to_cost(self, time_in_hour: float, instance_type: str,
disk_size: int, use_spot: bool,
region: Optional[str],
zone: Optional[str]) -> float:
"""Returns the hourly on-demand/spot price for an instance type."""
raise NotImplementedError

Expand Down
26 changes: 14 additions & 12 deletions sky/clouds/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,20 @@ def get_zone_shell_cmd(cls) -> Optional[str]:

#### Normal methods ####

def instance_type_to_hourly_cost(self,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_hourly_cost(instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='gcp')
def instance_type_to_cost(self,
time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_cost(time_in_hour,
instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='gcp')

def accelerators_to_hourly_cost(self,
accelerators: Dict[str, int],
Expand Down
26 changes: 14 additions & 12 deletions sky/clouds/lambda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,20 @@ def region_zones_provision_loop(
for region in regions:
yield region, region.zones

def instance_type_to_hourly_cost(self,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_hourly_cost(instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='lambda')
def instance_type_to_cost(self,
time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return service_catalog.get_cost(time_in_hour,
instance_type,
disk_size=disk_size,
use_spot=use_spot,
region=region,
zone=zone,
clouds='lambda')

def accelerators_to_hourly_cost(self,
accelerators: Dict[str, int],
Expand Down
7 changes: 4 additions & 3 deletions sky/clouds/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ def region_zones_provision_loop(

#### Normal methods ####

def instance_type_to_hourly_cost(self, instance_type: str, disk_size: int,
use_spot: bool, region: Optional[str],
zone: Optional[str]) -> float:
def instance_type_to_cost(self, time_in_hour: float, instance_type: str,
disk_size: int, use_spot: bool,
region: Optional[str],
zone: Optional[str]) -> float:
# On-prem machines on Sky are assumed free
# (minus electricity/utility bills).
return 0.0
Expand Down
29 changes: 15 additions & 14 deletions sky/clouds/service_catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,25 @@ def get_region_zones_for_instance_type(
instance_type, use_spot)


def get_hourly_cost(instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str],
zone: Optional[str],
clouds: CloudFilter = None) -> float:
"""Returns the hourly price of a VM instance in the given region and zone.
* If (region, zone) == (None, None), return the cheapest hourly price among
def get_cost(time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool,
region: Optional[str],
zone: Optional[str],
clouds: CloudFilter = None) -> float:
"""Returns the price of a VM instance in the given region and zone.
* If (region, zone) == (None, None), return the cheapest price among
all regions and zones.
* If (region, zone) == (str, None), return the cheapest hourly price among
* If (region, zone) == (str, None), return the cheapest price among
all the zones in the given region.
* If (region, zone) == (None, str), return the hourly price of the instance
* If (region, zone) == (None, str), return the price of the instance
type in the zone.
* If (region, zone) == (str, str), zone must be in the region, and the
function returns the hourly price of the instance type in the zone.
function returns the price of the instance type in the zone.
"""
return _map_clouds_catalog(clouds, 'get_hourly_cost', instance_type,
return _map_clouds_catalog(clouds, 'get_cost', time_in_hour, instance_type,
disk_size, use_spot, region, zone)


Expand Down Expand Up @@ -300,7 +301,7 @@ def is_image_tag_valid(tag: str,
'list_accelerators',
'list_accelerator_counts',
'get_region_zones_for_instance_type',
'get_hourly_cost',
'get_cost',
'get_accelerators_from_instance_type',
'get_instance_type_for_accelerator',
'get_accelerator_hourly_cost',
Expand Down
15 changes: 8 additions & 7 deletions sky/clouds/service_catalog/aws_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,14 @@ def accelerator_in_region_or_zone(acc_name: str,
region, zone)


def get_hourly_cost(instance_type: str,
disk_size: int,
use_spot: bool = False,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return common.get_hourly_cost_impl(_df, _storage_df, instance_type,
disk_size, use_spot, region, zone)
def get_cost(time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool = False,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
return common.get_cost_impl(time_in_hour, _df, _storage_df, instance_type,
disk_size, use_spot, region, zone)


def get_vcpus_from_instance_type(instance_type: str) -> Optional[float]:
Expand Down
15 changes: 8 additions & 7 deletions sky/clouds/service_catalog/azure_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,19 @@ def accelerator_in_region_or_zone(acc_name: str,
region, zone)


def get_hourly_cost(instance_type: str,
disk_size: int,
use_spot: bool = False,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
def get_cost(time_in_hour: float,
instance_type: str,
disk_size: int,
use_spot: bool = False,
region: Optional[str] = None,
zone: Optional[str] = None) -> float:
# Ref: https://azure.microsoft.com/en-us/support/legal/offer-details/
assert not use_spot, 'Current Azure subscription does not support spot.'
if zone is not None:
with ux_utils.print_exception_no_traceback():
raise ValueError('Azure does not support zones.')
return common.get_hourly_cost_impl(_df, _storage_df, instance_type,
disk_size, use_spot, region, zone)
return common.get_cost_impl(time_in_hour, _df, _storage_df, instance_type,
disk_size, use_spot, region, zone)


def get_vcpus_from_instance_type(instance_type: str) -> Optional[float]:
Expand Down
44 changes: 34 additions & 10 deletions sky/clouds/service_catalog/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Common utilities for service catalog."""
import hashlib
import json
import os
import time
from typing import Dict, List, NamedTuple, Optional, Tuple
Expand Down Expand Up @@ -214,7 +215,8 @@ def _get_all_supported_regions_str() -> str:
return validated_region, validated_zone


def get_region_cheapest_hourly_storage_cost(
def get_region_cheapest_storage_cost(
time_in_hour: float,
storage_df: pd.DataFrame,
region: str,
disk_size: int,
Expand All @@ -226,12 +228,29 @@ def get_region_cheapest_hourly_storage_cost(
if 'Region' not in storage_df.columns:
return 0.0
df = storage_df[storage_df['Region'] == region]
cheapest_idx = df['Price'].idxmin() # GB per month
# Suppose one month have 30 days here.
return df.loc[cheapest_idx]['Price'] * disk_size / (30 * 24)
cheapest_price = None
for _, row in df.iterrows():
price_str = row['Price']
price = json.loads(price_str)
assert isinstance(price, dict)
# assume one month have 30 days here
units = time_in_hour * disk_size / 30 / 24
tot_price = 0.0
# for tier_min_unit in reversed order
for tier_min_unit in sorted(price.keys(), key=lambda x: -float(x)):
if units >= float(tier_min_unit):
tot_price += price[tier_min_unit] * (units -
float(tier_min_unit))
units = float(tier_min_unit)
assert abs(units) < 1e-2, units
if cheapest_price is None or tot_price < cheapest_price:
cheapest_price = tot_price
assert cheapest_price is not None
return cheapest_price


def get_hourly_cost_impl(
def get_cost_impl(
time_in_hour: float,
df: pd.DataFrame,
storage_df: pd.DataFrame,
instance_type: str,
Expand All @@ -240,10 +259,13 @@ def get_hourly_cost_impl(
region: Optional[str],
zone: Optional[str],
) -> float:
"""Returns the hourly price of a VM instance in the given region and zone.
"""Returns the price of a VM instance in the given region and zone.
if `time_in_hour` < 0, this function returns hourly price.
@see gcp_catalog.py
This implenemtation considered both vm price and storage price.
Refer to get_hourly_cost in service_catalog/__init__.py for the docstring.
Refer to get_cost in service_catalog/__init__.py for the docstring.
"""
# total_hourly_cost = instance_price + disk_size * storage_price
df = _get_instance_type(df, instance_type, region, zone)
Expand Down Expand Up @@ -273,9 +295,11 @@ def get_hourly_cost_impl(
cheapest_price = None
for _, row in df[['Region', price_str]].iterrows():
instance_region, price = row['Region'], row[price_str]
storage_price = get_region_cheapest_hourly_storage_cost(
storage_df, instance_region, disk_size)
cur_price = price + storage_price
storage_price = get_region_cheapest_storage_cost(
time_in_hour, storage_df, instance_region, disk_size)
cur_price = price * time_in_hour + storage_price \
if time_in_hour >= 0 else price
cur_price = storage_price
if cheapest_price is None or cur_price < cheapest_price:
cheapest_price = cur_price
assert cheapest_price is not None
Expand Down
Loading

0 comments on commit de8d21d

Please sign in to comment.