scripts/populate_azure_shapes.py (238 lines of code) (raw):
import argparse
import concurrent
import concurrent.futures
import os
import sys
from pathlib import Path
import requests
dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parent
src_path = os.path.join(dir_path, 'src')
sys.path.append(src_path)
from commons.log_helper import get_logger
_LOG = get_logger('populate-azure-prices')
AZURE_REGIONS = [
"eastasia",
"southeastasia",
"centralus",
"eastus",
"eastus2",
"westus",
"northcentralus",
"southcentralus",
"northeurope",
"westeurope",
"japanwest",
"japaneast",
"brazilsouth",
"australiaeast",
"australiasoutheast",
"southindia",
"centralindia",
"westindia",
"canadacentral",
"canadaeast",
"uksouth",
"ukwest",
"westcentralus",
"westus2",
"koreacentral",
"koreasouth",
"francecentral",
"francesouth",
"australiacentral",
"australiacentral2",
"uaecentral",
"uaenorth",
"southafricanorth",
"southafricawest"
]
ACTION_PRICE = 'PRICE'
ACTION_SHAPE = 'SHAPE'
DEFAULT_CONCURRENT_WORKERS = 7
def parse_args():
parser = argparse.ArgumentParser(
description='Script for r8s AWS Shape/Shape Price '
'collections population')
parser.add_argument('-uri', '--r8s_mongodb_connection_uri',
help='MongoDB Connection string', required=True)
parser.add_argument('--action', choices=['SHAPE', 'PRICE'],
required=True, action='append',
help='Determines whether Shape Specs or '
'pricing data will be parsed.')
parser.add_argument('-acid', '--AZURE_CLIENT_ID',
help='AZURE Client id. Required for \'SHAPE\' action',
required=False)
parser.add_argument('-atid', '--AZURE_TENANT_ID',
help='AZURE Tenant id. Required for \'SHAPE\' action',
required=False)
parser.add_argument('-acs', '--AZURE_CLIENT_SECRET',
help='AZURE Client secret. Required for \'SHAPE\' action',
required=False)
parser.add_argument('-asid', '--AZURE_SUBSCRIPTION_ID',
help='AZURE Subscription id. Required for \'SHAPE\' action',
required=False)
parser.add_argument('-pr', '--price_region', action='append',
required=False, default=AZURE_REGIONS,
help='List of AWS regions to populate price for. '
'If not specified, all Azure regions will '
'be parsed. Required for \'PRICE\' action')
parser.add_argument('-cw', '--concurrent_workers', type=int,
help='Number of concurrent workers for price parsing',
required=False, default=DEFAULT_CONCURRENT_WORKERS)
return dict(vars(parser.parse_args()))
def export_args(**kwargs):
for key, value in kwargs.items():
if isinstance(value, str):
os.environ[key] = value
def get_virtual_machine_info():
from azure.identity import EnvironmentCredential
from azure.mgmt.compute import ComputeManagementClient
_LOG.debug(f'Initializing Azure Credentials')
client = ComputeManagementClient(
credential=EnvironmentCredential(),
subscription_id=os.environ.get('AZURE_SUBSCRIPTION_ID'),
)
result = []
_LOG.debug(f'Querying for Azure VM data')
response = client.resource_skus.list()
for index, item in enumerate(response):
result.append(item)
return [item for item in result if item.resource_type == 'virtualMachines']
def populate_shapes():
from models.shape import Shape
from mongoengine import NotUniqueError
_LOG.debug(f'Loading VM Info')
virtual_machine_info = get_virtual_machine_info()
_LOG.debug(f'Removing duplicated vm data')
virtual_machine_info_unique = get_unique_by_name(virtual_machine_info)
for index, virtual_machine in enumerate(virtual_machine_info_unique):
_LOG.debug(
f'Processing {index}/{len(virtual_machine_info_unique)} shape')
shape = get_shape_data(resource=virtual_machine)
try:
shape.save()
_LOG.debug(f'Shape \'{shape.name}\' has been saved')
except NotUniqueError:
_LOG.debug(f'Shape \'{shape.name}\' already exist, replacing.')
old_shape = Shape.objects.get(name=shape.name)
old_shape.delete()
shape.save()
def populate_prices(region, connection_uri):
os.environ['r8s_mongodb_connection_uri'] = connection_uri
_LOG.debug(f'Querying for Azure VM Pricing data for region: {region}')
url = f"https://prices.azure.com/api/retail/prices?$filter=serviceName " \
f"eq 'Virtual Machines' and priceType eq 'Consumption' and armRegionName eq '{region}'"
_LOG.debug(f'Processing page 1 for region: {region}')
response = requests.get(url)
response = response.json()
items = response.get('Items')
items_saved = create_prices(items=items)
page_count = 1
while response.get('NextPageLink'):
page_count += 1
_LOG.debug(
f'Processing page {page_count} for region: {region}. '
f'Region items saved: {items_saved}')
response = requests.get(response.get('NextPageLink'))
response = response.json()
items = response.get('Items')
items_saved += create_prices(items=items)
def create_prices(items):
from models.shape_price import ShapePrice
from mongoengine import NotUniqueError
filtered_items = []
for item in items:
if "Spot" in item.get('skuName'):
continue
if "Low Priority" in item.get('meterName'):
continue
filtered_items.append(item)
for item in filtered_items:
price = get_shape_price(resource=item)
try:
price.save()
except NotUniqueError:
_LOG.debug(f'Shape Price \'{price.name}\' already exist in '
f'region {price.region}, replacing.')
old_price = ShapePrice.objects.get(name=price.name,
customer=price.customer,
region=price.region,
os=price.os)
if old_price.on_demand != price.on_demand:
old_price.on_demand = price.on_demand
old_price.save()
return len(filtered_items)
def get_shape_price(resource: dict):
from models.shape_price import ShapePrice, OSEnum
from models.base_model import CloudEnum
is_windows = 'Windows' in resource.get('productName')
os_ = OSEnum.OS_WINDOWS if is_windows else OSEnum.OS_LINUX
return ShapePrice(
customer="DEFAULT",
cloud=CloudEnum.CLOUD_AZURE.value,
name=resource.get('armSkuName'),
region=resource.get('armRegionName'),
os=os_.value,
on_demand=resource.get('unitPrice')
)
def run_populate_prices(regions: list, workers: int, connection_uri: str):
_LOG.debug(f'Populating Azure Prices data')
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) \
as executor:
futures = []
for region in regions:
futures.append(
executor.submit(populate_prices,
region=region,
connection_uri=connection_uri))
for future in concurrent.futures.as_completed(futures):
_LOG.debug(f"Thread finished: {future.result()}")
def get_unique_by_name(resources: list):
names = []
result = []
for resource in resources:
if resource.name not in names:
names.append(resource.name)
result.append(resource)
return result
def get_shape_data(resource):
from models.shape import Shape
from models.base_model import CloudEnum
capabilities = {item.name: item.value for item in resource.capabilities}
return Shape(
name=resource.name,
cloud=CloudEnum.CLOUD_AZURE.value,
cpu=float(capabilities.get('vCPUs')),
memory=float(capabilities.get('MemoryGB')),
family_type=resource.family,
architecture=capabilities.get('CpuArchitectureType')
)
def update_last_update_date():
from services.setting_service import SettingsService
from models.base_model import CloudEnum
setting_service = SettingsService()
setting = setting_service.update_shape_update_date(
cloud=CloudEnum.CLOUD_AZURE.value
)
print(f"Updated setting: {setting.value}")
def main():
_LOG.info("Parsing arguments")
parameters = parse_args()
_LOG.info('Exporting env variables')
export_args(**parameters)
allowed_actions = parameters.get('action')
if ACTION_SHAPE in allowed_actions:
_LOG.info('Populating shapes')
populate_shapes()
if ACTION_PRICE in allowed_actions:
_LOG.info('Populating Prices')
run_populate_prices(
regions=parameters.get('price_region',
AZURE_REGIONS),
workers=parameters.get('concurrent_workers',
DEFAULT_CONCURRENT_WORKERS),
connection_uri=parameters.get('r8s_mongodb_connection_uri')
)
update_last_update_date()
if __name__ == "__main__":
main()