Skip to content

Commit

Permalink
Merge pull request #43 from AdiY10/Data_extraction_improvements
Browse files Browse the repository at this point in the history
Accelerates data extraction from AWS- change extraction data structure.
  • Loading branch information
ilya-kolchinsky authored May 31, 2022
2 parents dc1e5d9 + d2ec9f1 commit 673922d
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 30 deletions.
1 change: 0 additions & 1 deletion src/CloudCostOptimizer (CCO)/CCO

This file was deleted.

135 changes: 131 additions & 4 deletions src/CloudCostOptimizer (CCO)/FindPrice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from urllib.request import urlopen
import json
import pandas as pd
import constants

# import numpy as np
# import boto3
Expand Down Expand Up @@ -114,10 +115,8 @@ def add_scores(self, ec2):
# self.calculateCorrelations()
# self.exportArraysToCsv()

def calculate_spot_price(self, ec2):
"""Calculate spot price function."""
aws_data = self.calculate_price()
# print('Join spot prices')
def join_spot_prices(self, ec2, aws_data):
"""Join spot prices function."""
for k, v in ec2.items():
for price in v:
# ##boto3
Expand Down Expand Up @@ -146,6 +145,134 @@ def calculate_spot_price(self, ec2):
spot_price_value / float(price["memory"])
)
self.spot_price.append(spot_price_value)
return ec2

def correct_region(self, region):
"""Correct region function."""
if region == "us-east":
region = "us-east-1"
elif region == "us-west":
region = "us-west-1"
elif region == "apac-sin":
region = "ap-southeast-1"
elif region == "apac-syd":
region = "ap-southeast-2"
elif region == "apac-tokyo":
region = "ap-northeast-1"
elif region == "eu-ireland":
region = "eu-west-1"
return region

def correct_os(self, os):
"""Correct os function."""
if os == "linux":
os = "Linux"
elif os == "mswin":
os = "Windows"
else:
print("the os is wrong")
return os

def aws_data_extraction(self, ec2, region):
"""Aws_data_extraction function."""
file_to_read = urlopen(self.url)
raw_data = file_to_read.read()
raw_data = raw_data.lstrip(b"callback(").rstrip(b");")
## create json file
prices = json.loads(raw_data)
if region != "all" and not isinstance(region, list): ##case of one region
data_region = ec2[region]
for item in data_region:
# selecting searching criteria form ec2 file
os_type = item["os"]
type_name = item["typeName"]
# looping through the Prices JSON to find a match
for ec2_region in prices["config"]["regions"]:
# check if the region is matching
prices_region = self.correct_region(ec2_region["region"])
if prices_region == region:
for instance_type in ec2_region["instanceTypes"]:
for size in instance_type["sizes"]:
# check if the instance type is matching
if size["size"].lower() == type_name.lower():
for value in size["valueColumns"]:
# check if the os is matching
os_name = self.correct_os(value["name"])
if os_name == os_type:
index = data_region.index(item)
# updating the item details with spot price
if isinstance(
value["prices"]["USD"], str
): ## check if string
item["spot_price"] = "N/A"
item["Price_per_CPU"] = "N/A"
item["Price_per_memory"] = "N/A"
else:
item["spot_price"] = float(
value["prices"]["USD"]
)
item["Price_per_CPU"] = float(
item["spot_price"]
/ float(item["cpu"])
)
item["Price_per_memory"] = float(
item["spot_price"]
/ float(item["memory"])
)
ec2[region][index] = item
else: ##case of multiple regions
if isinstance(region, list):
regions = region
else:
regions = constants.regions.copy()
for region in regions:
region = self.correct_region(region)
data_region = ec2[region]
for item in data_region:
# selecting searching criteria form ec2 file
os_type = item["os"]
type_name = item["typeName"]
# looping through the Prices JSON to find a match
for ec2_region in prices["config"]["regions"]:
# check if the region is matching
prices_region = self.correct_region(ec2_region["region"])
if prices_region == region:
for instance_type in ec2_region["instanceTypes"]:
for size in instance_type["sizes"]:
# check if the instance type is matching
if size["size"].lower() == type_name.lower():
for value in size["valueColumns"]:
# check if the os is matching
os_name = self.correct_os(value["name"])
if os_name == os_type:
index = data_region.index(item)
# updating the item details with spot price
try:
item["spot_price"] = float(
value["prices"]["USD"]
)
item["Price_per_CPU"] = float(
item["spot_price"]
/ float(item["cpu"])
)
item["Price_per_memory"] = float(
item["spot_price"]
/ float(item["memory"])
)
except Exception as e:
print(e)
item["spot_price"] = "N/A"
item["Price_per_CPU"] = "N/A"
item["Price_per_memory"] = "N/A"
ec2[region][index] = item
return ec2

def calculate_spot_price(self, ec2, region):
"""Calculate spot price function."""
# aws_data = self.calculate_price()
# ec2 = self.join_spot_prices(ec2, aws_data)
print("Extracting Data from AWS")
ec2 = self.aws_data_extraction(ec2, region)
# ec2 = self.add_scores(ec2)
# self.analysis()
return ec2
8 changes: 4 additions & 4 deletions src/CloudCostOptimizer (CCO)/FleetResults.json
Original file line number Diff line number Diff line change
Expand Up @@ -897,8 +897,8 @@
"processorArchitecture": "64-bit",
"Architecture": "x86_64",
"discount": 70,
"interruption_frequency": "10%-15%",
"interruption_frequency_filter": 2.0,
"interruption_frequency": "<5%",
"interruption_frequency_filter": 0.0,
"spot_price": 0.195,
"Price_per_CPU": 0.01625,
"Price_per_memory": 0.008125,
Expand Down Expand Up @@ -1122,8 +1122,8 @@
"processorArchitecture": "64-bit",
"Architecture": "x86_64",
"discount": 70,
"interruption_frequency": "10%-15%",
"interruption_frequency_filter": 2.0,
"interruption_frequency": "<5%",
"interruption_frequency_filter": 0.0,
"spot_price": 0.195,
"Price_per_CPU": 0.01625,
"Price_per_memory": 0.008125,
Expand Down
1 change: 0 additions & 1 deletion src/CloudCostOptimizer (CCO)/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ def serialize_component(component: ComponentOffer):
if __name__ == "__main__":
app.run()


### POST endpoint to get spot fleet hourly price estimations
### body: configuration for fleet, i.e apps,components and other optional configurations
### dto = {
Expand Down
70 changes: 51 additions & 19 deletions src/CloudCostOptimizer (CCO)/get_spot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from FindPrice import GetPriceFromAWS

# from ebs_prices import get_ebs_for_region, get_ebs
# import json
import json


class SpotCalculator:
Expand Down Expand Up @@ -74,30 +74,54 @@ def get_spot_estimations(
# continue
# price['volumeType'] = ebs[price['region']]['volumeType']
# price['storagePrice'] = ebs[price['region']]['price']
price["total_price"] = price["spot_price"]
price["CPU_Score"] = round(price["Price_per_CPU"], 5)
price["Memory_Score"] = round(price["Price_per_memory"], 5)
lst.append(price)
if isinstance(price["spot_price"], str):
price["total_price"] = "N/A"
price["CPU_Score"] = "N/A"
price["Memory_Score"] = "N/A"
else:
price["total_price"] = price["spot_price"]
price["CPU_Score"] = round(price["Price_per_CPU"], 5)
price["Memory_Score"] = round(price["Price_per_memory"], 5)
lst.append(price)
lst = sorted(lst, key=lambda p: p["total_price"])
return lst[0:30]

##fleet offers
def get_fleet_offers(
self, os, region, app_size, params, pricing, architecture, type_major
self, user_os, region, app_size, params, pricing, architecture, type_major
): ## params- list of all components
"""Get_fleet_offers function."""
ec2_data = self.get_ec2_from_cache(region, os)
# if os == 'linux':
# file = open('ec2_data_Linux.json')
# else:
# file = open('ec2_data_Windows.json')
# ec2_data = json.load(file)
import os.path
import datetime

if user_os == "linux":
if (
datetime.datetime.now()
- datetime.datetime.fromtimestamp(
os.path.getmtime("ec2_data_Linux.json")
)
).days != 0: ## if the file hasn't modified today
ec2_data = self.get_ec2_from_cache(region, user_os)
else:
file = open("ec2_data_Linux.json")
ec2_data = json.load(file)
else:
if (
datetime.datetime.now()
- datetime.datetime.fromtimestamp(
os.path.getmtime("ec2_data_Linux.json")
)
).days != 0: ## if the file hasn't modified today
ec2_data = self.get_ec2_from_cache(region, user_os)
else:
file = open("ec2_data_Windows.json")
ec2_data = json.load(file)
print("calculating best configuration")
ec2 = SpotInstanceCalculator(ec2_data)
# ebs_data = self.get_ebs_from_cache(region) ## get EBS volumes from AWS
# ebs = EbsCalculator(ebs_data)
return get_fleet_offers(
params, region, os, app_size, ec2, pricing, architecture, type_major
params, region, user_os, app_size, ec2, pricing, architecture, type_major
)

def is_cached(self, os, region):
Expand Down Expand Up @@ -129,18 +153,26 @@ def get_ec2_from_cache(self, region, os):
ec2 = Ec2Parser()
if region != "all" and not isinstance(region, list):
ec2_data = ec2.get_ec2_for_region(os, region)
ec2_data = self.aws_price.calculate_spot_price(ec2_data)
# with open('ec2_data.json', 'w', encoding='utf-8') as f:
# json.dump(ec2_data, f, ensure_ascii=False, indent=4)
ec2_data = self.aws_price.calculate_spot_price(ec2_data, region)
if os == "linux":
with open("ec2_data_Linux.json", "w", encoding="utf-8") as f:
json.dump(ec2_data, f, ensure_ascii=False, indent=4)
else:
with open("ec2_data_Windows.json", "w", encoding="utf-8") as f:
json.dump(ec2_data, f, ensure_ascii=False, indent=4)
if os not in self.ec2_cache:
self.ec2_cache[os] = {}
self.ec2_cache[os][region] = ec2_data[region]
return ec2_data
else:
ec2_data = ec2.get_ec2(os, region)
ec2_data = self.aws_price.calculate_spot_price(ec2_data)
# with open('ec2_data.json', 'w', encoding='utf-8') as f:
# json.dump(ec2_data, f, ensure_ascii=False, indent=4)
ec2_data = self.aws_price.calculate_spot_price(ec2_data, region)
if os == "linux":
with open("ec2_data_Linux.json", "w", encoding="utf-8") as f:
json.dump(ec2_data, f, ensure_ascii=False, indent=4)
else:
with open("ec2_data_Windows.json", "w", encoding="utf-8") as f:
json.dump(ec2_data, f, ensure_ascii=False, indent=4)
self.ec2_cache[os] = ec2_data
self.cached_os[os] = True
return ec2_data
1 change: 0 additions & 1 deletion src/CloudCostOptimizer (CCO)/input_Fleet.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"spot/onDemand": "onDemand",
"AvailabilityZone": "all",
"Architecture": "all",
"typeMajor": ["c5","r5","m5","a1"],
"apps": [
{
"app": "App1",
Expand Down

0 comments on commit 673922d

Please sign in to comment.