diff --git a/README.md b/README.md index e967559f..a43cf3d0 100644 --- a/README.md +++ b/README.md @@ -16,28 +16,24 @@ This toolkit is built on top of Azure Batch but does not require any Azure Batch - Ability to run _spark submit_ directly from your local machine's CLI ## Setup -1. Clone the repo -```bash - git clone -b stable https://www.github.com/azure/aztk - # You can also clone directly from master to get the latest bits - git clone https://www.github.com/azure/aztk -``` -2. Use pip to install required packages (requires python 3.5+ and pip 9.0.1+) -```bash - pip install -r requirements.txt -``` -3. Use setuptools: +1. Install `aztk` with pip: ```bash - pip install -e . + pip install aztk ``` -4. Initialize the project in a directory [This will automatically create a *.aztk* folder with config files in your working directory]: +2. Initialize the project in a directory. This will automatically create a *.aztk* folder with config files in your working directory: ```bash aztk spark init ``` -5. Fill in the fields for your Batch account and Storage account in your *.aztk/secrets.yaml* file. (We'd also recommend that you enter SSH key info in this file) +3. Login or register for an [Azure Account](https://azure.microsoft.com), navigate to [Azure Cloud Shell](https://shell.azure.com), and run: +```sh +wget -q https://raw.githubusercontent.com/Azure/aztk/master/account_setup.sh -O account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh +``` + +4. Follow the on screen prompts to create the necessary Azure resources and copy the output into your `.aztk/secrets.yaml` file. For more infomration see [Getting Started Scripts](./01-Getting-Started-Script). - This package is built on top of two core Azure services, [Azure Batch](https://azure.microsoft.com/en-us/services/batch/) and [Azure Storage](https://azure.microsoft.com/en-us/services/storage/). Create those resources via the portal (see [Getting Started](./docs/00-getting-started.md)). ## Quickstart Guide @@ -55,7 +51,7 @@ aztk spark cluster list aztk spark cluster delete ``` ```sh -# login and submit jobs to your cluster +# login and submit applications to your cluster aztk spark cluster ssh aztk spark cluster submit ``` diff --git a/account_setup.py b/account_setup.py new file mode 100644 index 00000000..2e1bd33a --- /dev/null +++ b/account_setup.py @@ -0,0 +1,419 @@ +''' + Getting Started AZTK script +''' +import sys +import threading +import time +import uuid +import yaml +from azure.common import credentials +from azure.graphrbac import GraphRbacManagementClient +from azure.graphrbac.models import ApplicationCreateParameters, PasswordCredential, ServicePrincipalCreateParameters +from azure.graphrbac.models.graph_error import GraphErrorException +from azure.mgmt.authorization import AuthorizationManagementClient +from azure.mgmt.batch import BatchManagementClient +from azure.mgmt.batch.models import AutoStorageBaseProperties, BatchAccountCreateParameters +from azure.mgmt.network import NetworkManagementClient +from azure.mgmt.network.models import AddressSpace, Subnet, VirtualNetwork +from azure.mgmt.resource import ResourceManagementClient +from azure.mgmt.storage import StorageManagementClient +from azure.mgmt.storage.models import Kind, Sku, SkuName, StorageAccountCreateParameters +from azure.mgmt.subscription import SubscriptionClient +from datetime import datetime, timezone +from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD +from msrestazure.azure_exceptions import CloudError + + +class AccountSetupError(Exception): + pass + + +class DefaultSettings(): + resource_group = 'aztk' + storage_account = 'aztkstorage' + batch_account = 'aztkbatch' + virtual_network_name = "aztkvnet" + subnet_name = 'aztksubnet' + application_name = 'aztkapplication' + application_credential_name = 'aztkapplicationcredential' + service_principal = 'aztksp' + region = 'westus' + + +def create_resource_group(credentials, subscription_id, **kwargs): + """ + Create a resource group + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **region: str + """ + resource_client = ResourceManagementClient(credentials, subscription_id) + resource_client.resource_groups.list() + for i in range(3): + try: + resource_group = resource_client.resource_groups.create_or_update( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + parameters={ + 'location': kwargs.get("region", DefaultSettings.region), + } + ) + except CloudError as e: + if i == 2: + raise AccountSetupError( + "Unable to create resource group in region {}".format(kwargs.get("region", DefaultSettings.region))) + print(e.message) + print("Please try again.") + kwargs["resource_group"] = prompt_with_default("Azure Region", DefaultSettings.region) + return resource_group.id + + +def create_storage_account(credentials, subscription_id, **kwargs): + """ + Create a Storage account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **storage_account: str + :param **region: str + """ + storage_management_client = StorageManagementClient(credentials, subscription_id) + storage_account = storage_management_client.storage_accounts.create( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("storage_account", DefaultSettings.storage_account), + parameters=StorageAccountCreateParameters( + sku=Sku(SkuName.standard_lrs), + kind=Kind.storage, + location=kwargs.get('region', DefaultSettings.region) + ) + ) + return storage_account.result().id + + + +def create_batch_account(credentials, subscription_id, **kwargs): + """ + Create a Batch account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **batch_account: str + :param **region: str + :param **storage_account_id: str + """ + batch_management_client = BatchManagementClient(credentials, subscription_id) + batch_account = batch_management_client.batch_account.create( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("batch_account", DefaultSettings.batch_account), + parameters=BatchAccountCreateParameters( + location=kwargs.get('region', DefaultSettings.region), + auto_storage=AutoStorageBaseProperties( + storage_account_id=kwargs.get('storage_account_id', DefaultSettings.region) + ) + ) + ) + return batch_account.result().id + + +def create_vnet(credentials, subscription_id, **kwargs): + """ + Create a Batch account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **virtual_network_name: str + :param **subnet_name: str + :param **region: str + """ + network_client = NetworkManagementClient(credentials, subscription_id) + resource_group_name = kwargs.get("resource_group", DefaultSettings.resource_group) + virtual_network_name = kwargs.get("virtual_network_name", DefaultSettings.virtual_network_name) + subnet_name = kwargs.get("subnet_name", DefaultSettings.subnet_name) + # get vnet, and subnet if they exist + virtual_network = subnet = None + try: + virtual_network = network_client.virtual_networks.get( + resource_group_name=resource_group_name, + virtual_network_name=virtual_network_name, + ) + except CloudError as e: + pass + + if virtual_network: + confirmation_prompt = "A virtual network with the same name ({}) was found. \n"\ + "Please note that the existing address space and subnets may be changed or destroyed. \n"\ + "Do you want to use this virtual network? (y/n): ".format(virtual_network_name) + deny_error = AccountSetupError("Virtual network already exists, not recreating.") + unrecognized_input_error = AccountSetupError("Input not recognized.") + prompt_for_confirmation(confirmation_prompt, deny_error, unrecognized_input_error) + + virtual_network = network_client.virtual_networks.create_or_update( + resource_group_name=resource_group_name, + virtual_network_name=kwargs.get("virtual_network_name", DefaultSettings.virtual_network_name), + parameters=VirtualNetwork( + location=kwargs.get("region", DefaultSettings.region), + address_space=AddressSpace(["10.0.0.0/24"]) + ) + ) + virtual_network = virtual_network.result() + subnet = network_client.subnets.create_or_update( + resource_group_name=resource_group_name, + virtual_network_name=virtual_network_name, + subnet_name=subnet_name, + subnet_parameters=Subnet( + address_prefix='10.0.0.0/24' + ) + ) + return subnet.result().id + + +def create_aad_user(credentials, tenant_id, **kwargs): + """ + Create an AAD application and service principal + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param tenant_id: str + :param **application_name: str + """ + graph_rbac_client = GraphRbacManagementClient( + credentials, + tenant_id, + base_url=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id + ) + application_credential = uuid.uuid4() + try: + display_name = kwargs.get("application_name", DefaultSettings.application_name) + application = graph_rbac_client.applications.create( + parameters=ApplicationCreateParameters( + available_to_other_tenants=False, + identifier_uris=["http://{}.com".format(display_name)], + display_name=display_name, + password_credentials=[ + PasswordCredential( + end_date=datetime(2299, 12, 31, 0, 0, 0, 0, tzinfo=timezone.utc), + value=application_credential, + key_id=uuid.uuid4() + ) + ] + ) + ) + service_principal = graph_rbac_client.service_principals.create( + ServicePrincipalCreateParameters( + app_id=application.app_id, + account_enabled=True + ) + ) + except GraphErrorException as e: + if e.inner_exception.code == "Request_BadRequest": + application = next(graph_rbac_client.applications.list( + filter="identifierUris/any(c:c eq 'http://{}.com')".format(display_name))) + + confirmation_prompt = "Previously created application with name {} found. "\ + "Would you like to use it? (y/n): ".format(application.display_name) + prompt_for_confirmation(confirmation_prompt, e, ValueError("Response not recognized. Please try again.")) + + service_principal = next(graph_rbac_client.service_principals.list( + filter="appId eq '{}'".format(application.app_id))) + else: + raise e + + return application.app_id, service_principal.object_id, str(application_credential) + + +def create_role_assignment(credentials, subscription_id, scope, principal_id): + """ + Gives service principal contributor role authorization on scope + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param scope: str + :param principal_id: str + """ + authorization_client = AuthorizationManagementClient(credentials, subscription_id) + role_name = 'Contributor' + roles = list(authorization_client.role_definitions.list( + scope, + filter="roleName eq '{}'".format(role_name) + )) + contributor_role = roles[0] + for i in range(10): + try: + authorization_client.role_assignments.create( + scope, + uuid.uuid4(), + { + 'role_definition_id': contributor_role.id, + 'principal_id': principal_id + } + ) + break + except CloudError as e: + # ignore error if service principal has not yet been created + time.sleep(1) + if i == 10: + raise e + + +def format_secrets(**kwargs): + ''' + Retuns the secrets for the created resources to be placed in secrets.yaml + The following form is returned: + + service_principal: + tenant_id: + client_id: + credential: + batch_account_resource_id: + storage_account_resource_id: + ''' + return yaml.dump({"service_principal": kwargs}, default_flow_style=False) + + +def prompt_for_confirmation(prompt, deny_error, unrecognized_input_error): + """ + Prompt user for confirmation, 'y' for confirm, 'n' for deny + :param prompt: str + :param deny_error: Exception + :param unrecognized_input_error: Exception + :return None if prompt successful, else raises error + """ + confirmation = input(prompt).lower() + for i in range(3): + if confirmation == "n": + raise deny_error + elif confirmation == "y": + break + elif confirmation != "y" and i == 2: + raise unrecognized_input_error + confirmation = input("Please input 'y' or 'n': ").lower() + + +def prompt_with_default(key, value): + user_value = input("{0} [{1}]: ".format(key, value)) + if user_value != "": + return user_value + else: + return value + + +def prompt_tenant_selection(tenant_ids): + print("Multiple tenants detected. Please input the ID of the tenant you wish to use.") + print("Tenants:", ", ".join(tenant_ids)) + given_tenant_id = input("Please input the ID of the tenant you wish to use: ") + for i in range(3): + if given_tenant_id in tenant_ids: + return given_tenant_id + if i != 2: + given_tenant_id = input("Input not recognized, please try again: ") + raise AccountSetupError("Tenant selection not recognized after 3 attempts.") + + + +class Spinner: + busy = False + delay = 0.1 + + @staticmethod + def spinning_cursor(): + while 1: + for cursor in '|/-\\': yield cursor + + def __init__(self, delay=None): + self.spinner_generator = self.spinning_cursor() + if delay and float(delay): self.delay = delay + + def __enter__(self): + return self.start() + + def __exit__(self, exc_type, exc_val, exc_tb): + return self.stop() + + def spinner_task(self): + while self.busy: + sys.stdout.write(next(self.spinner_generator)) + sys.stdout.flush() + time.sleep(self.delay) + sys.stdout.write('\b') + sys.stdout.flush() + + def start(self): + self.busy = True + threading.Thread(target=self.spinner_task, daemon=True).start() + + def stop(self): + self.busy = False + time.sleep(self.delay) + + + +if __name__ == "__main__": + print("\nGetting credentials.") + # get credentials and tenant_id + creds, subscription_id = credentials.get_azure_cli_credentials() + subscription_client = SubscriptionClient(creds) + tenant_ids = [tenant.id for tenant in subscription_client.tenants.list()] + if len(tenant_ids) != 1: + tenant_id = prompt_tenant_selection(tenant_ids) + else: + tenant_id = tenant_ids[0] + + print("Input the desired names and values for your Azure resources. "\ + "Default values are provided in the brackets. "\ + "Hit enter to use default.") + kwargs = { + "region": prompt_with_default("Azure Region", DefaultSettings.region), + "resource_group": prompt_with_default("Resource Group Name", DefaultSettings.resource_group), + "storage_account": prompt_with_default("Storage Account Name", DefaultSettings.storage_account), + "batch_account": prompt_with_default("Batch Account Name", DefaultSettings.batch_account), + # "virtual_network_name": prompt_with_default("Virtual Network Name", DefaultSettings.virtual_network_name), + # "subnet_name": prompt_with_default("Subnet Name", DefaultSettings.subnet_name), + "application_name": prompt_with_default("Active Directory Application Name", DefaultSettings.application_name), + "application_credential_name": prompt_with_default("Active Directory Application Credential Name", DefaultSettings.resource_group), + "service_principal": prompt_with_default("Service Principal Name", DefaultSettings.service_principal) + } + print("Creating the Azure resources.") + + # create resource group + with Spinner(): + resource_group_id = create_resource_group(creds, subscription_id, **kwargs) + kwargs["resource_group_id"] = resource_group_id + print("Created resource group.") + + # create storage account + with Spinner(): + storage_account_id = create_storage_account(creds, subscription_id, **kwargs) + kwargs["storage_account_id"] = storage_account_id + print("Created Storage group.") + + # create batch account + with Spinner(): + batch_account_id = create_batch_account(creds, subscription_id, **kwargs) + print("Created Batch account.") + + # create vnet with a subnet + # subnet_id = create_vnet(creds, subscription_id) + + # create AAD application and service principal + with Spinner(): + profile = credentials.get_cli_profile() + aad_cred, subscirption_id, tenant_id = profile.get_login_credentials( + resource=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id + ) + + application_id, service_principal_object_id, application_credential = create_aad_user(aad_cred, tenant_id, **kwargs) + print("Created Azure Active Directory service principal.") + + with Spinner(): + create_role_assignment(creds, subscription_id, resource_group_id, service_principal_object_id) + print("Configured permsisions.") + + secrets = format_secrets( + **{ + "tenant_id": tenant_id, + "client_id": application_id, + "credential": application_credential, + # "subnet_id": subnet_id, + "batch_account_resource_id": batch_account_id, + "storage_account_resource_id": storage_account_id + } + ) + + print("\n# Copy the following into your .aztk/secrets.yaml file\n{}".format(secrets)) diff --git a/account_setup.sh b/account_setup.sh new file mode 100644 index 00000000..49b68a78 --- /dev/null +++ b/account_setup.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "Installing depdendencies..." && +pip install --force-reinstall --upgrade --user pyyaml==3.12 azure==3.0.0 azure-cli-core==2.0.30 msrestazure==0.4.25 > /dev/null 2>&1 && +echo "Finished installing depdencies." && +echo "Getting account setup script..." && +wget -q https://raw.githubusercontent.com/Azure/aztk/feature/getting_started_scripts/account_setup.py -O account_setup.py && +chmod 755 account_setup.py && +echo "Finished getting account setup script." && +echo "Running account setup script..." && +python3 account_setup.py diff --git a/docs/00-getting-started.md b/docs/00-getting-started.md index 258931bb..2ec9a352 100644 --- a/docs/00-getting-started.md +++ b/docs/00-getting-started.md @@ -13,13 +13,8 @@ The minimum requirements to get started with this package are: 2. Make sure you are running python 3.5 or greater. _If the default version on your machine is python 2 make sure to run the following commands with **pip3** instead of **pip**._ -3. Use pip to install required packages: - ```bash - pip install -r requirements.txt +3. Install `aztk`: ``` - -4. Use setuptools to install the required biaries locally: - ```bash pip install -e . ``` 5. Initialize your environment: @@ -46,6 +41,11 @@ The minimum requirements to get started with this package are: ### Setting up your accounts + +#### Using the account setup script +A script to create and configure the Azure resources required to use `aztk` is provided. For more more information and usage, see [Getting Started Script](./01-getting-started-script.md). + +#### Manual resource creation To finish setting up, you need to fill out your Azure Batch and Azure Storage secrets in *.aztk/secrets.yaml*. We'd also recommend that you enter SSH key info in this file too. Please note that if you use ssh keys and a have a non-standard ssh key file name or path, you will need to specify the location of your ssh public and private keys. To do so, set them as shown below: @@ -61,10 +61,10 @@ default: 0. Log into Azure If you do not already have an Azure account, go to [https://azure.microsoft.com/](https://azure.microsoft.com/) to get started for free today. - Once you have one, simply log in and go to the [Azure Portal](https://portal.azure.com) to start creating your Azure Batch account and Azure Storage account. + Once you have one, simply log in and go to the [Azure Portal](https://portal.azure.com) to start creating your Azure Batch account and Azure Storage account. -#### Using AAD +##### Using AAD To get the required keys for your Azure Active Directory (AAD) Service Principal, Azure Batch Account and Azure Storage Account, please follow these instructions. Note that this is the recommended path for use with AZTK, as some features require AAD and are disabled if using Shared Key authentication. 1. Register an Azure Active Directory (AAD) Application diff --git a/docs/01-getting-started-script.md b/docs/01-getting-started-script.md new file mode 100644 index 00000000..944415f8 --- /dev/null +++ b/docs/01-getting-started-script.md @@ -0,0 +1,44 @@ +# Getting Started Script + +The provided account setup script creates and configures all of the required Azure resources. + +The script will create and configure the following resources: +- Resource group +- Storage account +- Batch account +- Azure Active Directory application and service principal + + +The script outputs all of the necessary information to use `aztk`, just copy the output into the `.aztk/secrets.yaml` file created when running `aztk spark init`. + +## Usage +Copy and paste the following into an [Azure Cloud Shell](https://shell.azure.com): +```sh +wget -q https://raw.githubusercontent.com/Azure/aztk/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh +``` +A series of prompts will appear, and you can set the values you desire for each field. Default values appear in brackets `[]` and will be used if no value is provided. +``` +Azure Region [westus]: +Resource Group Name [aztk]: +Storage Account Name [aztkstorage]: +Batch Account Name [aztkbatch]: +Active Directory Application Name [aztkapplication]: +Active Directory Application Credential Name [aztk]: +``` + +Once the script has finished running you will see the following output: + +``` +service_principal: + tenant_id: + client_id: + credential: + batch_account_resource_id: + storage_account_resource_id: +``` + +Copy the entire `service_principal` section in your `.aztk/secrets.yaml`. If you do not have a `secrets.yaml` file, you can create one in your current working directory by running `aztk spark init`. + +Now you are ready to create your first `aztk` cluster. See [Creating a Cluster](./10-clusters.md#creating-a-cluster). diff --git a/linux_memory.py b/linux_memory.py deleted file mode 100644 index f66a6464..00000000 --- a/linux_memory.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import print_function -import sys - -def memory_info(): - res = {} - for row in open('/proc/meminfo', 'r'): - k, v = row.split(':') - k = k.strip() - v = v.split() - if len(v) == 1: - v = int(v[0]) - elif v[1] == 'kB': - v = int(v[0]) / 1024 - elif v[1] == 'mB': - v = int(v[0]) / 1024 / 1024 - res[k] = v - return res - -if __name__ == "__main__": - x = memory_info() - print('{} GB', x['MemAvailable']) - print('Set to 75% = {}', x['MemAvailable'] * 0.75) - print(x) - - -