Skip to content

Commit

Permalink
add azure fetcher & tiered price
Browse files Browse the repository at this point in the history
  • Loading branch information
cblmemo committed Mar 2, 2023
1 parent 0ab0f89 commit b70f86f
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 18 deletions.
65 changes: 54 additions & 11 deletions sky/clouds/service_catalog/data_fetchers/fetch_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import json
import os
import subprocess
from typing import List, Optional, Set
from typing import List, Optional, Set, Dict
import urllib
import collections

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -54,9 +55,9 @@ def get_regions() -> List[str]:
]


def get_pricing_url(region: Optional[str] = None) -> str:
def get_pricing_url(service_name: str, region: Optional[str] = None) -> str:
filters = [
'serviceName eq \'Virtual Machines\'',
f'serviceName eq \'{service_name}\'',
'priceType eq \'Consumption\'',
]
if region is not None:
Expand All @@ -66,15 +67,18 @@ def get_pricing_url(region: Optional[str] = None) -> str:


@ray.remote
def get_pricing_df(region: Optional[str] = None) -> pd.DataFrame:
def get_pricing_df(service_name: str,
region: Optional[str] = None) -> pd.DataFrame:
all_items = []
url = get_pricing_url(region)
print(f'Getting pricing for {region}')
url = get_pricing_url(service_name, region)
print(f'Getting pricing for {region} with service name {service_name}')
page = 0
while url is not None:
page += 1
if page % 10 == 0:
print(f'Fetched pricing pages {page}')
print(
f'Fetched pricing pages {page} with service name {service_name}'
)
r = requests.get(url)
r.raise_for_status()
content_str = r.content.decode('ascii')
Expand All @@ -84,16 +88,18 @@ def get_pricing_df(region: Optional[str] = None) -> pd.DataFrame:
break
all_items += items
url = content.get('NextPageLink')
print(f'Done fetching pricing {region}')
print(f'Done fetching pricing {region} with service name {service_name}')
df = pd.DataFrame(all_items)
assert 'productName' in df.columns, (region, df.columns)
return df[(~df['productName'].str.contains(' Windows')) &
(df['unitPrice'] > 0)]


@ray.remote
def get_all_regions_pricing_df(regions: Set[str]) -> pd.DataFrame:
dfs = ray.get([get_pricing_df.remote(region) for region in regions])
def get_all_regions_pricing_df(service_name: str,
regions: Set[str]) -> pd.DataFrame:
dfs = ray.get(
[get_pricing_df.remote(service_name, region) for region in regions])
return pd.concat(dfs)


Expand Down Expand Up @@ -151,7 +157,7 @@ def get_gpu_name(family: str) -> Optional[str]:

def get_all_regions_instance_types_df(region_set: Set[str]):
df, df_sku = ray.get([
get_all_regions_pricing_df.remote(region_set),
get_all_regions_pricing_df.remote('Virtual Machines', region_set),
get_sku_df.remote(region_set),
])
print('Processing dataframes')
Expand Down Expand Up @@ -239,6 +245,39 @@ def get_additional_columns(row):
return df_ret


def get_all_regions_storage_df(region_set: Set[str]):
df_storage = get_all_regions_pricing_df.remote('Storage', region_set)
df_storage.drop_duplicates(inplace=True)
df_storage = df_storage[df_storage['unitPrice'] > 0]
# only default storage is used by sky
df_storage = df_storage[df_storage['skuName'] == 'Standard LRS']
df_storage = df_storage[df_storage['unitOfMeasure'] == '1 GB/Month']

print('Getting storage df')

class DiskInfo:

def __init__(self) -> None:
self.region: str = ''
# tierMinimumUnits -> unitPrice
self.price: Dict[int, float] = dict()

# name -> (region, tiered_price_dict)
storage_dict: Dict[str, DiskInfo] = collections.defaultdict(DiskInfo)
for _, row in df_storage.iterrows():
name = row['meterId']
region = row['armRegionName']
price = row['unitPrice']
tier_minimum = row['tierMinimumUnits']
storage_dict[name].region = region
storage_dict[name].price[tier_minimum] = price
df = pd.DataFrame(columns=['Name', 'Region', 'Price'])
for name, info in storage_dict.items():
df.loc[len(df)] = [name, info.region, str(info.price)]

return df


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
Expand All @@ -255,3 +294,7 @@ def get_additional_columns(row):
os.makedirs('azure', exist_ok=True)
instance_df.to_csv('azure/vms.csv', index=False)
print('Azure Service Catalog saved to azure/vms.csv')

storage_df = get_all_regions_storage_df(region_filter)
storage_df.to_csv('azure/storage.csv', index=False)
print('Azure Storage Catalog saved to azure/storage.csv')
15 changes: 8 additions & 7 deletions sky/clouds/service_catalog/data_fetchers/fetch_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,20 @@ def _get_unit_price(sku: Dict[str, Any]) -> float:
return units + nanos


def _get_tired_unit_price(sku: Dict[str, Any]) -> float:
def _get_tired_unit_price(sku: Dict[str, Any]) -> str:
pricing_info = sku['pricingInfo'][0]['pricingExpression']

def _get_tired_price(tier: int) -> float:
unit_price = pricing_info['tieredRates'][tier]['unitPrice']
def _get_tired_price(unit_price: Dict[str, Any]) -> float:
assert unit_price['currencyCode'] == 'USD'
units = int(unit_price['units'])
nanos = unit_price['nanos'] / 1e9
return units + nanos

# TODO(tian): Ignore first tier for now since it only applies to
# first 30 GiBy.mo.
return _get_tired_price(0) if len(
pricing_info['tieredRates']) == 1 else _get_tired_price(1)
# tierMinimumUnits -> unitPrice
price: Dict[int, float] = dict()
for tier in pricing_info['tieredRates']:
price[tier['startUsageAmount']] = _get_tired_price(tier['unitPrice'])
return str(price)


def get_vm_df(skus: List[Dict[str, Any]]) -> pd.DataFrame:
Expand Down

0 comments on commit b70f86f

Please sign in to comment.