From b1a4ffc2b97ba940e82d44a3b0f780c5a8d8aaf6 Mon Sep 17 00:00:00 2001 From: Tarun Goyal Date: Fri, 15 Feb 2019 11:51:02 +0530 Subject: [PATCH 01/23] SDK-335: GCP creds schema changes (#263) --- qds_sdk/cloud/gcp_cloud.py | 154 +++++++++++++++---------------------- tests/test_clusterv2.py | 29 +++---- 2 files changed, 74 insertions(+), 109 deletions(-) diff --git a/qds_sdk/cloud/gcp_cloud.py b/qds_sdk/cloud/gcp_cloud.py index 92413bd2..20897c8c 100644 --- a/qds_sdk/cloud/gcp_cloud.py +++ b/qds_sdk/cloud/gcp_cloud.py @@ -12,19 +12,16 @@ def __init__(self): self.storage_config = {} def set_cloud_config(self, - compute_client_id=None, - compute_project_id=None, - compute_client_email=None, - compute_private_key_id=None, - compute_private_key=None, + qsa_client_id=None, + customer_project_id=None, + qsa_client_email=None, + qsa_private_key_id=None, + qsa_private_key=None, + comp_client_email=None, + inst_client_email=None, use_account_compute_creds=None, gcp_region=None, gcp_zone=None, - storage_client_id=None, - storage_project_id=None, - storage_client_email=None, - storage_private_key_id=None, - storage_private_key=None, storage_disk_size_in_gb=None, storage_disk_count=None, storage_disk_type=None, @@ -34,31 +31,25 @@ def set_cloud_config(self, ''' Args: - compute_client_id: Compute client id for gcp cluster + qsa_client_id: Compute client id for gcp cluster - compute_project_id: Compute project id for gcp cluster + customer_project_id: Compute project id for gcp cluster - compute_client_email: Compute client email for gcp cluster + qsa_client_email: Compute client email for gcp cluster - compute_private_key_id: Compute private key id for gcp cluster + qsa_private_key_id: Compute private key id for gcp cluster - compute_private_key: Compute private key for gcp cluster + qsa_private_key: Compute private key for gcp cluster + + comp_client_email: Client compute service account email + + inst_client_email: Client storage/instance service account email use_account_compute_creds: Set it to true to use the account's compute credentials for all clusters of the account.The default value is false gcp_region: Region for gcp cluster - storage_client_id: Storage client id for gcp cluster - - storage_project_id: Storage project id for gcp cluster - - storage_client_email: Storage client email for gcp cluster - - storage_private_key_id: Storage private key id for gcp cluster - - storage_private_key: Storage private key for gcp cluster - bastion_node_public_dns: public dns name of the bastion node. Required only if cluster is in a private subnet. @@ -67,26 +58,27 @@ def set_cloud_config(self, subnet_id: Subnet id for gcp cluster ''' - self.set_compute_config(use_account_compute_creds, compute_client_id, compute_project_id, compute_client_email, - compute_private_key_id, compute_private_key) + self.set_compute_config(use_account_compute_creds, qsa_client_id, customer_project_id, qsa_client_email, + qsa_private_key_id, qsa_private_key, comp_client_email) self.set_location(gcp_region, gcp_zone) self.set_network_config(bastion_node_public_dns, vpc_id, subnet_id) - self.set_storage_config(storage_client_id, storage_project_id, storage_client_email, storage_private_key_id, - storage_private_key, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) + self.set_storage_config(inst_client_email, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) def set_compute_config(self, use_account_compute_creds=None, - compute_client_id=None, - compute_project_id=None, - compute_client_email=None, - compute_private_key_id=None, - compute_private_key=None): + qsa_client_id=None, + customer_project_id=None, + qsa_client_email=None, + qsa_private_key_id=None, + qsa_private_key=None, + comp_client_email=None): self.compute_config['use_account_compute_creds'] = use_account_compute_creds - self.compute_config['compute_client_id'] = compute_client_id - self.compute_config['compute_project_id'] = compute_project_id - self.compute_config['compute_client_email'] = compute_client_email - self.compute_config['compute_private_key_id'] = compute_private_key_id - self.compute_config['compute_private_key'] = compute_private_key + self.compute_config['qsa_client_id'] = qsa_client_id + self.compute_config['customer_project_id'] = customer_project_id + self.compute_config['qsa_client_email'] = qsa_client_email + self.compute_config['qsa_private_key_id'] = qsa_private_key_id + self.compute_config['qsa_private_key'] = qsa_private_key + self.compute_config['comp_client_email'] = comp_client_email def set_location(self, gcp_region=None, @@ -104,38 +96,27 @@ def set_network_config(self, self.network_config['subnet'] = subnet_id def set_storage_config(self, - storage_client_id=None, - storage_project_id=None, - storage_client_email=None, - storage_private_key_id=None, - storage_private_key=None, + inst_client_email=None, storage_disk_size_in_gb=None, storage_disk_count=None, storage_disk_type=None ): - self.storage_config['storage_client_id'] = storage_client_id - self.storage_config['storage_project_id'] = storage_project_id - self.storage_config['storage_client_email'] = storage_client_email - self.storage_config['storage_private_key_id'] = storage_private_key_id - self.storage_config['storage_private_key'] = storage_private_key + self.storage_config['inst_client_email'] = inst_client_email self.storage_config['disk_size_in_gb'] = storage_disk_size_in_gb self.storage_config['disk_count'] = storage_disk_count self.storage_config['disk_type'] = storage_disk_type def set_cloud_config_from_arguments(self, arguments): - self.set_cloud_config(compute_client_id=arguments.compute_client_id, - compute_project_id=arguments.compute_project_id, - compute_client_email=arguments.compute_client_email, - compute_private_key_id=arguments.compute_private_key_id, - compute_private_key=arguments.compute_private_key, + self.set_cloud_config(qsa_client_id=arguments.qsa_client_id, + customer_project_id=arguments.customer_project_id, + qsa_client_email=arguments.qsa_client_email, + qsa_private_key_id=arguments.qsa_private_key_id, + qsa_private_key=arguments.qsa_private_key, + inst_client_email=arguments.inst_client_email, + comp_client_email=arguments.comp_client_email, use_account_compute_creds=arguments.use_account_compute_creds, gcp_region=arguments.gcp_region, gcp_zone=arguments.gcp_zone, - storage_client_id=arguments.storage_client_id, - storage_project_id=arguments.storage_project_id, - storage_client_email=arguments.storage_client_email, - storage_private_key_id=arguments.storage_private_key_id, - storage_private_key=arguments.storage_private_key, storage_disk_size_in_gb=arguments.storage_disk_size_in_gb, storage_disk_count=arguments.storage_disk_count, storage_disk_type=arguments.storage_disk_type, @@ -157,26 +138,30 @@ def create_parser(self, argparser): action="store_false", default=None, help="to disable account compute credentials") - compute_config.add_argument("--compute-client-id", - dest="compute_client_id", + compute_config.add_argument("--qsa-client-id", + dest="qsa_client_id", default=None, - help="compute client id for gcp cluster") - compute_config.add_argument("--compute-project-id", - dest="compute_project_id", + help="qsa client id for gcp cluster") + compute_config.add_argument("--customer-project-id", + dest="customer_project_id", default=None, - help="compute project id for gcp cluster") - compute_config.add_argument("--compute-client-email", - dest="compute_client_email", + help="customer project id for gcp cluster") + compute_config.add_argument("--qsa-client-email", + dest="qsa_client_email", default=None, - help="compute client email for gcp cluster") - compute_config.add_argument("--compute-private-key-id", - dest="compute_private_key_id", + help="qsa client email for gcp cluster") + compute_config.add_argument("--qsa-private-key-id", + dest="qsa_private_key_id", default=None, - help="compute private key id for gcp cluster") - compute_config.add_argument("--compute-private-key", - dest="compute_private_key", + help="qsa private key id for gcp cluster") + compute_config.add_argument("--qsa-private-key", + dest="qsa_private_key", default=None, - help="compute private key for gcp cluster") + help="qsa private key for gcp cluster") + compute_config.add_argument("--compute-client-email", + dest="comp_client_email", + default=None, + help="client compute service account email") # location settings parser location_group = argparser.add_argument_group("location config settings") @@ -201,26 +186,11 @@ def create_parser(self, argparser): # storage config settings parser storage_config = argparser.add_argument_group("storage config settings") - storage_config.add_argument("--storage-client-id", - dest="storage_client_id", - default=None, - help="storage client id for gcp cluster") - storage_config.add_argument("--storage-project-id", - dest="storage_project_id", - default=None, - help="storage project id for gcp cluster") + storage_config.add_argument("--storage-client-email", - dest="storage_client_email", - default=None, - help="storage client email for gcp cluster") - storage_config.add_argument("--storage-private-key-id", - dest="storage_private_key_id", - default=None, - help="storage private key id for gcp cluster") - storage_config.add_argument("--storage-private-key", - dest="storage_private_key", + dest="inst_client_email", default=None, - help="storage private key for gcp cluster") + help="client storage service account email") storage_config.add_argument("--storage-disk-size-in-gb", dest="storage_disk_size_in_gb", default=None, diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index e8b4de78..f07a350b 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -313,24 +313,23 @@ def test_oracle_opc_network_config(self): def test_gcp_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', - '--compute-client-id', 'xxx11', '--compute-project-id', 'www11', '--compute-client-email', - 'yyyy11', '--compute-private-key-id', 'zzz22', '--compute-private-key', 'aaa'] + '--qsa-client-id', 'xxx11', '--customer-project-id', 'www11', '--qsa-client-email', + 'yyyy11', '--qsa-private-key-id', 'zzz22', '--qsa-private-key', 'aaa'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'compute_config': - {'compute_private_key_id': 'zzz22', - 'compute_private_key': 'aaa', - 'compute_client_email': 'yyyy11', - 'compute_project_id': 'www11', - 'compute_client_id': 'xxx11'}}, + {'qsa_private_key_id': 'zzz22', + 'qsa_private_key': 'aaa', + 'qsa_client_email': 'yyyy11', + 'customer_project_id': 'www11', + 'qsa_client_id': 'xxx11'}}, 'cluster_info': {'label': ['test_label']}}) def test_gcp_storage_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', - '--storage-client-id', 'xxx11', '--storage-project-id', 'yyyy11', '--storage-client-email', 'www11', - '--storage-private-key-id', 'zzz22', '--storage-private-key', 'aaa', '--storage-disk-size-in-gb', 'aaa', + '--storage-client-email', 'aaa', '--storage-disk-size-in-gb', 'aaa', '--storage-disk-count', 'bbb', '--storage-disk-type', 'ccc' ] Qubole.cloud = None print_command() @@ -338,11 +337,7 @@ def test_gcp_storage_config(self): qds.main() Connection._api_call.assert_called_with('POST', 'clusters', {'cloud_config': {'storage_config': - {'storage_private_key_id': 'zzz22', - 'storage_private_key': 'aaa', - 'storage_client_email': 'www11', - 'storage_project_id': 'yyyy11', - 'storage_client_id': 'xxx11', + {'inst_client_email': 'aaa', 'disk_size_in_gb': 'aaa', 'disk_count': 'bbb', 'disk_type': 'ccc'}}, @@ -653,7 +648,7 @@ def test_oracle_opc_cloud_config(self): def test_gcp_cloud_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'update', '123', '--gcp-region', 'xxx', '--subnet-id', 'abc-subnet', - '--storage-client-id', 'xxx11', '--compute-client-id', 'yyyy11'] + '--storage-client-email', 'xxx11', '--qsa-client-id', 'yyyy11'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -661,8 +656,8 @@ def test_gcp_cloud_config(self): Connection._api_call.assert_called_with('PUT', 'clusters/123', {'cloud_config': {'network_config': {'subnet': 'abc-subnet'}, - 'compute_config': {'compute_client_id': 'yyyy11'}, - 'storage_config': {'storage_client_id': 'xxx11'}, + 'compute_config': {'qsa_client_id': 'yyyy11'}, + 'storage_config': {'inst_client_email': 'xxx11'}, 'location': {'region': 'xxx'} } }) From 3631d0e4d6b01677c25653092019faddbb794fc5 Mon Sep 17 00:00:00 2001 From: abhijithshankar93 <31067437+abhijithshankar93@users.noreply.github.com> Date: Thu, 21 Feb 2019 15:57:20 +0530 Subject: [PATCH 02/23] [SDK-338] Added option to enable Rubix (#265) --- bin/qds.py | 3 ++- qds_sdk/cluster.py | 21 ++++++++++++++++++--- qds_sdk/engine.py | 24 ++++++++++++++++++++---- tests/test_cluster.py | 37 +++++++++++++++++++++++++++++++++++++ tests/test_clusterv2.py | 14 +++++++++----- 5 files changed, 86 insertions(+), 13 deletions(-) diff --git a/bin/qds.py b/bin/qds.py index 24a1b13b..fac8b439 100755 --- a/bin/qds.py +++ b/bin/qds.py @@ -314,7 +314,8 @@ def _create_cluster_info(arguments, api_version): is_ha=arguments.is_ha, env_name=arguments.env_name, python_version=arguments.python_version, - r_version=arguments.r_version) + r_version=arguments.r_version, + enable_rubix=arguments.enable_rubix) else: cluster_info = ClusterInfo(arguments.label, arguments.aws_access_key_id, diff --git a/qds_sdk/cluster.py b/qds_sdk/cluster.py index 504d9976..e6666227 100755 --- a/qds_sdk/cluster.py +++ b/qds_sdk/cluster.py @@ -304,6 +304,17 @@ def _parse_create_update(cls, args, action, api_version): dest="ebs_volume_size", type=int, help="Size of each EBS volume, in GB",) + enable_rubix_group = hadoop_group.add_mutually_exclusive_group() + enable_rubix_group.add_argument("--enable-rubix", + dest="enable_rubix", + action="store_true", + default=None, + help="Enable rubix for cluster", ) + enable_rubix_group.add_argument("--no-enable-rubix", + dest="enable_rubix", + action="store_false", + default=None, + help="Do not enable rubix for cluster", ) hadoop2 = hadoop_group.add_mutually_exclusive_group() hadoop2.add_argument("--use-hadoop2", @@ -1034,7 +1045,8 @@ def set_cluster_info(self, aws_access_key_id=None, is_ha=None, env_name=None, python_version=None, - r_version=None): + r_version=None, + enable_rubix=None): """ Kwargs: @@ -1159,6 +1171,7 @@ def set_cluster_info(self, aws_access_key_id=None, `r_version`: Version of R for environment. (For Spark clusters) + `enable_rubix`: Enable rubix on the cluster (For Presto clusters) """ self.disallow_cluster_termination = disallow_cluster_termination @@ -1169,7 +1182,7 @@ def set_cluster_info(self, aws_access_key_id=None, node_base_cooldown_period, node_spot_cooldown_period, root_volume_size) self.set_ec2_settings(aws_access_key_id, aws_secret_access_key, aws_region, aws_availability_zone, vpc_id, subnet_id, master_elastic_ip, bastion_node_public_dns, role_instance_profile) - self.set_hadoop_settings(custom_config, use_hbase, use_hadoop2, use_spark, use_qubole_placement_policy, is_ha) + self.set_hadoop_settings(custom_config, use_hbase, use_hadoop2, use_spark, use_qubole_placement_policy, is_ha, enable_rubix) self.set_spot_instance_settings(maximum_bid_price_percentage, timeout_for_request, maximum_spot_instance_percentage) self.set_stable_spot_instance_settings(stable_maximum_bid_price_percentage, stable_timeout_for_request, stable_allow_fallback) self.set_spot_block_settings(spot_block_duration) @@ -1230,13 +1243,15 @@ def set_hadoop_settings(self, custom_config=None, use_hadoop2=None, use_spark=None, use_qubole_placement_policy=None, - is_ha=None): + is_ha=None, + enable_rubix=None): self.hadoop_settings['custom_config'] = custom_config self.hadoop_settings['use_hbase'] = use_hbase self.hadoop_settings['use_hadoop2'] = use_hadoop2 self.hadoop_settings['use_spark'] = use_spark self.hadoop_settings['use_qubole_placement_policy'] = use_qubole_placement_policy self.hadoop_settings['is_ha'] = is_ha + self.hadoop_settings['enable_rubix'] = enable_rubix def set_spot_instance_settings(self, maximum_bid_price_percentage=None, timeout_for_request=None, diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 811eaee5..ec041b44 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -28,7 +28,8 @@ def set_engine_config(self, dbtap_id=None, fernet_key=None, overrides=None, - is_ha=None): + is_ha=None, + enable_rubix=None): ''' Args: @@ -60,10 +61,11 @@ def set_engine_config(self, is_ha: Enabling HA config for cluster is_deeplearning : this is a deeplearning cluster config + enable_rubix: Enable rubix on the cluster ''' - self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool) + self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) self.set_airflow_settings(dbtap_id, fernet_key, overrides) @@ -81,11 +83,13 @@ def set_hadoop_settings(self, use_qubole_placement_policy=None, is_ha=None, fairscheduler_config_xml=None, - default_pool=None): + default_pool=None, + enable_rubix=None): self.hadoop_settings['custom_hadoop_config'] = custom_hadoop_config self.hadoop_settings['use_qubole_placement_policy'] = use_qubole_placement_policy self.hadoop_settings['is_ha'] = is_ha self.set_fairscheduler_settings(fairscheduler_config_xml, default_pool) + self.hadoop_settings['enable_rubix'] = enable_rubix def set_presto_settings(self, presto_version=None, @@ -123,7 +127,8 @@ def set_engine_config_settings(self, arguments): custom_spark_config=arguments.custom_spark_config, dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, - overrides=arguments.overrides) + overrides=arguments.overrides, + enable_rubix=arguments.enable_rubix) @staticmethod def engine_parser(argparser): @@ -153,6 +158,17 @@ def engine_parser(argparser): default=None, help="Do not use Qubole Block Placement policy" + " for clusters with spot nodes", ) + enable_rubix_group = hadoop_settings_group.add_mutually_exclusive_group() + enable_rubix_group.add_argument("--enable-rubix", + dest="enable_rubix", + action="store_true", + default=None, + help="Enable rubix for cluster", ) + enable_rubix_group.add_argument("--no-enable-rubix", + dest="enable_rubix", + action="store_false", + default=None, + help="Do not enable rubix for cluster", ) fairscheduler_group = argparser.add_argument_group( "fairscheduler configuration options") diff --git a/tests/test_cluster.py b/tests/test_cluster.py index d16764ea..b06e1c08 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -1672,6 +1672,43 @@ def test_root_volume_size_invalid_v13(self): qds.main() + def test_use_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--enable-rubix'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'label': ['test_label'], + 'ec2_settings': {'compute_secret_key': 'sak', + 'compute_access_key': 'aki'}, + 'hadoop_settings': {'enable_rubix': True}, + }) + + def test_no_use_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--no-enable-rubix'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'label': ['test_label'], + 'ec2_settings': {'compute_secret_key': 'sak', + 'compute_access_key': 'aki'}, + 'hadoop_settings': {'enable_rubix': False}, + }) + + @unittest.skipIf(sys.version_info < (2, 7, 0), "Known failure on Python 2.6") + def test_conflict_enable_rubix_v13(self): + sys.argv = ['qds.py', '--version', 'v1.3', 'cluster', 'create', '--label', 'test_label', + '--access-key-id', 'aki', '--secret-access-key', 'sak', + '--enable-rubix', '--no-enable-rubix'] + print_command() + with self.assertRaises(SystemExit): + qds.main() + class TestClusterUpdate(QdsCliTestCase): def test_minimal(self): sys.argv = ['qds.py', 'cluster', 'update', '123'] diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index f07a350b..ef56ec04 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -372,7 +372,7 @@ def test_presto_engine_config(self): temp.write("config.properties:\na=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'presto', '--presto-custom-config', temp.name] + '--flavour', 'presto', '--enable-rubix' , '--presto-custom-config', temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -381,7 +381,10 @@ def test_presto_engine_config(self): {'engine_config': {'flavour': 'presto', 'presto_settings': { - 'custom_presto_config': 'config.properties:\na=1\nb=2'}}, + 'custom_presto_config': 'config.properties:\na=1\nb=2'}, + 'hadoop_settings':{ + 'enable_rubix': True + }}, 'cluster_info': {'label': ['test_label']}}) def test_spark_engine_config(self): @@ -667,8 +670,8 @@ def test_engine_config(self): temp.write("a=1\nb=2".encode("utf8")) temp.flush() sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123', - '--use-qubole-placement-policy', '--custom-hadoop-config', - temp.name] + '--use-qubole-placement-policy', '--enable-rubix', + '--custom-hadoop-config',temp.name] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -676,7 +679,8 @@ def test_engine_config(self): Connection._api_call.assert_called_with('PUT', 'clusters/123', {'engine_config': {'hadoop_settings': {'use_qubole_placement_policy': True, - 'custom_hadoop_config': 'a=1\nb=2'}} + 'custom_hadoop_config': 'a=1\nb=2', + 'enable_rubix': True}} }) def test_cluster_info(self): From 62468d2eea002889606db72a62904dad790500cd Mon Sep 17 00:00:00 2001 From: akaranjkar-qu <41927856+akaranjkar-qu@users.noreply.github.com> Date: Thu, 28 Feb 2019 21:23:04 -0800 Subject: [PATCH 03/23] SDK-323: GCP changes for preemptible nodes (#266) --- qds_sdk/cloud/gcp_cloud.py | 71 ++++++++++++++++++++++++++++++++++++-- tests/test_clusterv2.py | 40 +++++++++++++++++++++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/qds_sdk/cloud/gcp_cloud.py b/qds_sdk/cloud/gcp_cloud.py index 20897c8c..a2033246 100644 --- a/qds_sdk/cloud/gcp_cloud.py +++ b/qds_sdk/cloud/gcp_cloud.py @@ -10,6 +10,7 @@ def __init__(self): self.location = {} self.network_config = {} self.storage_config = {} + self.cluster_composition = {} def set_cloud_config(self, qsa_client_id=None, @@ -27,7 +28,12 @@ def set_cloud_config(self, storage_disk_type=None, bastion_node_public_dns=None, vpc_id=None, - subnet_id=None): + subnet_id=None, + master_preemptible=None, + min_nodes_preemptible=None, + min_nodes_preemptible_percentage=None, + autoscaling_nodes_preemptible=None, + autoscaling_nodes_preemptible_percentage=None): ''' Args: @@ -56,6 +62,16 @@ def set_cloud_config(self, vpc_id: Vpc id for gcp cluster subnet_id: Subnet id for gcp cluster + + master_preemptible: if the master node is preemptible + + min_nodes_preemptible: if the min nodes are preemptible + + min_nodes_preemptible_percentage: percentage of min nodes that are preemptible + + autoscaling_nodes_preemptible: if the autoscaling nodes are preemptible + + autoscaling_nodes_preemptible_percentage: percentage of autoscaling nodes that are preemptible ''' self.set_compute_config(use_account_compute_creds, qsa_client_id, customer_project_id, qsa_client_email, @@ -63,6 +79,8 @@ def set_cloud_config(self, self.set_location(gcp_region, gcp_zone) self.set_network_config(bastion_node_public_dns, vpc_id, subnet_id) self.set_storage_config(inst_client_email, storage_disk_size_in_gb, storage_disk_count, storage_disk_type) + self.set_cluster_composition(master_preemptible, min_nodes_preemptible, min_nodes_preemptible_percentage, + autoscaling_nodes_preemptible, autoscaling_nodes_preemptible_percentage) def set_compute_config(self, use_account_compute_creds=None, @@ -106,6 +124,21 @@ def set_storage_config(self, self.storage_config['disk_count'] = storage_disk_count self.storage_config['disk_type'] = storage_disk_type + def set_cluster_composition(self, + master_preemptible=None, + min_nodes_preemptible=None, + min_nodes_preemptible_percentage=None, + autoscaling_nodes_preemptible=None, + autoscaling_nodes_preemptible_percentage=None): + self.cluster_composition['master'] = {} + self.cluster_composition['master']['preemptible'] = master_preemptible + self.cluster_composition['min_nodes'] = {} + self.cluster_composition['min_nodes']['preemptible'] = min_nodes_preemptible + self.cluster_composition['min_nodes']['percentage'] = min_nodes_preemptible_percentage + self.cluster_composition['autoscaling_nodes'] = {} + self.cluster_composition['autoscaling_nodes']['preemptible'] = autoscaling_nodes_preemptible + self.cluster_composition['autoscaling_nodes']['percentage'] = autoscaling_nodes_preemptible_percentage + def set_cloud_config_from_arguments(self, arguments): self.set_cloud_config(qsa_client_id=arguments.qsa_client_id, customer_project_id=arguments.customer_project_id, @@ -122,7 +155,12 @@ def set_cloud_config_from_arguments(self, arguments): storage_disk_type=arguments.storage_disk_type, bastion_node_public_dns=arguments.bastion_node_public_dns, vpc_id=arguments.vpc_id, - subnet_id=arguments.subnet_id) + subnet_id=arguments.subnet_id, + master_preemptible=arguments.master_preemptible, + min_nodes_preemptible=arguments.min_nodes_preemptible, + min_nodes_preemptible_percentage=arguments.min_nodes_preemptible_percentage, + autoscaling_nodes_preemptible=arguments.autoscaling_nodes_preemptible, + autoscaling_nodes_preemptible_percentage=arguments.autoscaling_nodes_preemptible_percentage) def create_parser(self, argparser): # compute settings parser @@ -202,4 +240,31 @@ def create_parser(self, argparser): storage_config.add_argument("--storage-disk-type", dest="storage_disk_type", default=None, - help="disk type for gcp cluster") \ No newline at end of file + help="disk type for gcp cluster") + # cluster composition settings parser + cluster_composition = argparser.add_argument_group("cluster composition settings") + cluster_composition.add_argument("--master-preemptible", + dest="master_preemptible", + action="store_true", + default=None, + help="if the master node is preemptible") + cluster_composition.add_argument("--min-nodes-preemptible", + dest="min_nodes_preemptible", + action="store_true", + default=None, + help="if the min nodes are preemptible") + cluster_composition.add_argument("--min-nodes-preemptible-percentage", + dest="min_nodes_preemptible_percentage", + type=int, + default=None, + help="percentage of min nodes that are preemptible") + cluster_composition.add_argument("--autoscaling-nodes-preemptible", + dest="autoscaling_nodes_preemptible", + action="store_true", + default=None, + help="if the autoscaling nodes are preemptible") + cluster_composition.add_argument("--autoscaling-nodes-preemptible-percentage", + dest="autoscaling_nodes_preemptible_percentage", + type=int, + default=None, + help="percentage of autoscaling nodes that are preemptible") diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index ef56ec04..1b33f4e2 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -367,6 +367,46 @@ def test_gcp_location_config(self): 'zone': 'yyy'}}, 'cluster_info': {'label': ['test_label']}}) + def test_gcp_cluster_composition(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', + '--master-preemptible', + '--min-nodes-preemptible', '--min-nodes-preemptible-percentage', '50', + '--autoscaling-nodes-preemptible', '--autoscaling-nodes-preemptible-percentage', '75'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cloud_config': { + 'cluster_composition': { + 'master': { + 'preemptible': True + }, + 'min_nodes': { + 'preemptible': True, + 'percentage': 50 + }, + 'autoscaling_nodes': { + 'preemptible': True, + 'percentage': 75 + } + } + }, + 'cluster_info': { + 'label': ['test_label'] + } + }) + + def test_gcp_cluster_composition_invalid(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'GCP', 'cluster', 'create', '--label', 'test_label', + '--master-preemptible', + '--min-nodes-preemptible', '--min-nodes-preemptible-percentage', 'invalid_value'] + Qubole.cloud = None + print_command() + with self.assertRaises(SystemExit): + qds.main() + def test_presto_engine_config(self): with tempfile.NamedTemporaryFile() as temp: temp.write("config.properties:\na=1\nb=2".encode("utf8")) From 756d5174cd041d6edc13883719e1486786cf756c Mon Sep 17 00:00:00 2001 From: mcarlsen Date: Tue, 19 Mar 2019 14:39:00 +0100 Subject: [PATCH 04/23] fixes #175. utf-8 errors in Python 3 (#208) Caused by block reads chopping multibyte utf-8 sequences in half. --- qds_sdk/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index d3c96def..80e48609 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1375,9 +1375,9 @@ def _read_iteratively(key_instance, fp, delim): else: import io if isinstance(fp, io.TextIOBase): - fp.buffer.write(data.decode('utf-8').replace(chr(1), delim).encode('utf8')) + fp.buffer.write(data.replace(bytes([1]), delim.encode('utf8'))) elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): - fp.write(data.decode('utf8').replace(chr(1), delim).encode('utf8')) + fp.write(data.replace(bytes([1]), delim.encode('utf8'))) else: # Can this happen? Don't know what's the right thing to do in this case. pass From 8afa7fe487e4e79fef07a11ad5f0cb8f846a73ed Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 19 Mar 2019 19:12:53 +0530 Subject: [PATCH 05/23] SDK-344: Enable use of port argument for DbTaps #267 --- qds_sdk/dbtaps.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/qds_sdk/dbtaps.py b/qds_sdk/dbtaps.py index c5ded3c6..84941c21 100644 --- a/qds_sdk/dbtaps.py +++ b/qds_sdk/dbtaps.py @@ -72,8 +72,7 @@ def parsers(): help="Username") edit.add_argument("--password", dest="password", help="Password") - edit.add_argument("--port", dest="port", - help="Database Port") + edit.add_argument("--port", dest="port", help="Database Port") edit.add_argument("--type", dest="type", choices=["mysql","vertica","mongo","postgresql","redshift","sqlserver"], help="Type of database") edit.add_argument("--location", dest="location", choices=["us-east-1", "us-west-2", "ap-southeast-1", "eu-west-1", "on-premise"], @@ -107,7 +106,8 @@ def create(args): db_user=args.user, db_passwd=args.password, db_type=args.type, - db_location=args.location) + db_location=args.location, + port=args.port) return json.dumps(dbtap.attributes, sort_keys=True, indent=4) @@ -148,6 +148,8 @@ def edit(args): options["db_type"] = args.type if args.location is not None: options["db_location"] = args.location + if args.port is not None: + options["port"] = args.port tap = tap.edit(**options) return json.dumps(tap.attributes, sort_keys=True, indent=4) From c7b9cb6560e964b0e6d78b423b11dc8864cf75a3 Mon Sep 17 00:00:00 2001 From: Timofei Korostelev Date: Tue, 19 Mar 2019 06:52:01 -0700 Subject: [PATCH 06/23] Added exception instead of "pass" to avoid silently skipping of file write (#205) --- qds_sdk/commands.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 80e48609..299160f7 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -1379,8 +1379,7 @@ def _read_iteratively(key_instance, fp, delim): elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): fp.write(data.replace(bytes([1]), delim.encode('utf8'))) else: - # Can this happen? Don't know what's the right thing to do in this case. - pass + raise ValueError('Only subclasses of io.TextIOBase or io.BufferedIOBase supported') except StopIteration: # Stream closes itself when the exception is raised return From ac95aafb3b576d11322d09686e55695bd1e1a1b1 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Tue, 26 Mar 2019 17:15:17 +0530 Subject: [PATCH 07/23] SDK-339 : Added comments on how to use List command. (#269) --- qds_sdk/commands.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 299160f7..2fa9cc27 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -76,6 +76,20 @@ def is_success(status): def list(cls, **kwargs): """ List a command by issuing a GET request to the /command endpoint + + Args: + `**kwargs`: Various parameters can be used to filter the commands such as: + * command_type - HiveQuery, PrestoQuery, etc. The types should be in title case. + * status - failed, success, etc + * name + * command_id + * qbol_user_id + * command_source + * page + * cluster_label + * session_id, etc + + For example - Command.list(command_type = "HiveQuery", status = "success") """ conn = Qubole.agent() params = {} From 70b075e9d71fb36ac7c8c7c3ae5fd09c8bebfa8a Mon Sep 17 00:00:00 2001 From: akaranjkar-qu <41927856+akaranjkar-qu@users.noreply.github.com> Date: Mon, 1 Apr 2019 04:11:21 -0700 Subject: [PATCH 08/23] SDK-345: Add custom resource group and start-stop related params for Azure clusters (#268) --- qds_sdk/cloud/azure_cloud.py | 11 ++++-- qds_sdk/clusterv2.py | 68 ++++++++++++++++++++++++++++++++++-- tests/test_clusterv2.py | 53 ++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/qds_sdk/cloud/azure_cloud.py b/qds_sdk/cloud/azure_cloud.py index 682c58d4..435e4993 100755 --- a/qds_sdk/cloud/azure_cloud.py +++ b/qds_sdk/cloud/azure_cloud.py @@ -29,7 +29,8 @@ def set_cloud_config(self, vnet_resource_group_name=None, master_elastic_ip=None, master_static_nic_name=None, - master_static_public_ip_name=None): + master_static_public_ip_name=None, + resource_group_name=None): ''' Args: @@ -74,6 +75,7 @@ def set_cloud_config(self, master_static_public_ip_name: Name of Static Public Ip address that has to be attached to cluster's master node + resource_group_name: Resource group for cluster ''' self.set_compute_config(use_account_compute_creds, compute_tenant_id, @@ -86,6 +88,7 @@ def set_cloud_config(self, self.set_storage_config(storage_access_key, storage_account_name, disk_storage_account_name, disk_storage_account_resource_group_name) + self.resource_group_name = resource_group_name def set_compute_config(self, use_account_compute_creds=None, @@ -147,7 +150,8 @@ def set_cloud_config_from_arguments(self, arguments): subnet_name=arguments.subnet_name, vnet_resource_group_name=arguments.vnet_resource_group_name, master_static_nic_name=arguments.master_static_nic_name, - master_static_public_ip_name=arguments.master_static_public_ip_name) + master_static_public_ip_name=arguments.master_static_public_ip_name, + resource_group_name=arguments.resource_group_name) def create_parser(self, argparser): # compute settings parser @@ -204,6 +208,9 @@ def create_parser(self, argparser): network_config_group.add_argument("--master-static-public-ip-name", dest="master_static_public_ip_name", help="name of public IP to be attached to master node") + network_config_group.add_argument("--resource-group-name", + dest="resource_group_name", + help="resource group for cluster") # storage config settings parser storage_config = argparser.add_argument_group("storage config settings") storage_config.add_argument("--storage-access-key", diff --git a/qds_sdk/clusterv2.py b/qds_sdk/clusterv2.py index 95e7905a..eb3f07f6 100755 --- a/qds_sdk/clusterv2.py +++ b/qds_sdk/clusterv2.py @@ -105,7 +105,11 @@ def get_cluster_create_clone_update(arguments, action): image_uri_overrides=arguments.image_uri_overrides, env_name=arguments.env_name, python_version=arguments.python_version, - r_version=arguments.r_version) + r_version=arguments.r_version, + disable_cluster_pause=arguments.disable_cluster_pause, + paused_cluster_timeout_mins=arguments.paused_cluster_timeout_mins, + disable_autoscale_node_pause=arguments.disable_autoscale_node_pause, + paused_autoscale_node_timeout_mins=arguments.paused_autoscale_node_timeout_mins) # This will set cloud config settings cloud_config = Qubole.get_cloud() @@ -193,7 +197,11 @@ def set_cluster_info(self, image_uri_overrides=None, env_name=None, python_version=None, - r_version=None): + r_version=None, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): """ Args: @@ -290,6 +298,14 @@ def set_cluster_info(self, `r_version`: Version of R for environment. (For Spark clusters) + `disable_cluster_pause`: Disable cluster pause + + `paused_cluster_timeout_mins`: Paused cluster timeout in mins + + `disable_autoscale_node_pause`: Disable autoscale node pause + + `paused_autoscale_node_timeout_mins`: Paused autoscale node timeout in mins + Doc: For getting details about arguments http://docs.qubole.com/en/latest/rest-api/cluster_api/create-new-cluster.html#parameters @@ -327,6 +343,8 @@ def set_cluster_info(self, self.set_monitoring(enable_ganglia_monitoring, datadog_api_token, datadog_app_token) self.set_internal(image_uri_overrides) self.set_env_settings(env_name, python_version, r_version) + self.set_start_stop_settings(disable_cluster_pause, paused_cluster_timeout_mins, + disable_autoscale_node_pause, paused_autoscale_node_timeout_mins) def set_datadog_setting(self, datadog_api_token=None, @@ -392,6 +410,20 @@ def set_env_settings(self, env_name=None, python_version=None, r_version=None): self.cluster_info['env_settings']['python_version'] = python_version self.cluster_info['env_settings']['r_version'] = r_version + def set_start_stop_settings(self, + disable_cluster_pause=None, + paused_cluster_timeout_mins=None, + disable_autoscale_node_pause=None, + paused_autoscale_node_timeout_mins=None): + if disable_cluster_pause is not None: + disable_cluster_pause = int(disable_cluster_pause) + self.cluster_info['disable_cluster_pause'] = disable_cluster_pause + self.cluster_info['paused_cluster_timeout_mins'] = paused_cluster_timeout_mins + if disable_autoscale_node_pause is not None: + disable_autoscale_node_pause = int(disable_autoscale_node_pause) + self.cluster_info['disable_autoscale_node_pause'] = disable_autoscale_node_pause + self.cluster_info['paused_autoscale_node_timeout_mins'] = paused_autoscale_node_timeout_mins + @staticmethod def list_info_parser(argparser, action): argparser.add_argument("--id", dest="cluster_id", @@ -641,6 +673,38 @@ def cluster_info_parser(argparser, action): default=None, help="version of R in environment") + start_stop_group = argparser.add_argument_group("start stop settings") + start_stop_group.add_argument("--disable-cluster-pause", + dest="disable_cluster_pause", + action='store_true', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--no-disable-cluster-pause", + dest="disable_cluster_pause", + action='store_false', + default=None, + help="disable cluster pause") + start_stop_group.add_argument("--paused-cluster-timeout", + dest="paused_cluster_timeout_mins", + default=None, + type=int, + help="paused cluster timeout in min") + start_stop_group.add_argument("--disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_true', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--no-disable-autoscale-node-pause", + dest="disable_autoscale_node_pause", + action='store_false', + default=None, + help="disable autoscale node pause") + start_stop_group.add_argument("--paused-autoscale-node-timeout", + dest="paused_autoscale_node_timeout_mins", + default=None, + type=int, + help="paused autoscale node timeout in min") + class ClusterV2(Resource): rest_entity_path = "clusters" diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 1b33f4e2..0f6f94b8 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -259,6 +259,19 @@ def test_azure_master_static_pip(self): 'master_static_public_ip_name':'pip1'}}, 'cluster_info': {'label': ['test_label']}}) + def test_azure_resource_group_name(self): + sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'AZURE', 'cluster', 'create', '--label', 'test_label', + '--resource-group-name', 'testrg'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'cloud_config': { + 'resource_group_name': 'testrg' + }, + 'cluster_info': {'label': ['test_label']}}) + def test_oracle_opc_compute_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_OPC', 'cluster', 'create', '--label', 'test_label', '--username', 'testusername', '--password', 'testpassword', @@ -585,6 +598,46 @@ def test_root_disk_size_invalid_v2(self): qds.main() + def test_disable_start_stop(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--disable-cluster-pause', '--disable-autoscale-node-pause'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cluster_info': { + 'label': ['test_label'], + 'disable_cluster_pause': 1, + 'disable_autoscale_node_pause': 1 + } + }) + def test_start_stop_timeouts(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--no-disable-cluster-pause', '--paused-cluster-timeout', '30', + '--no-disable-autoscale-node-pause', '--paused-autoscale-node-timeout', '60'] + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + { + 'cluster_info': { + 'label': ['test_label'], + 'disable_cluster_pause': 0, + 'paused_cluster_timeout_mins': 30, + 'disable_autoscale_node_pause': 0, + 'paused_autoscale_node_timeout_mins': 60 + } + }) + + def test_start_stop_timeouts_invalid(self): + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--paused-cluster-timeout', 'invalid_value', '--paused-autoscale-node-timeout', 'invalid_value'] + print_command() + with self.assertRaises(SystemExit): + qds.main() + + class TestClusterUpdate(QdsCliTestCase): def test_minimal(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'update', '123'] From 7fb3292532b61d2c6e5e8fcaab62ae7873cc9c07 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Fri, 19 Apr 2019 16:21:56 +0530 Subject: [PATCH 09/23] SDK-340 : Support for Airflow Version and Python Version (#270) --- qds_sdk/engine.py | 24 ++++++++++++++++++++++-- tests/test_clusterv2.py | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index ec041b44..3070f699 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -28,6 +28,8 @@ def set_engine_config(self, dbtap_id=None, fernet_key=None, overrides=None, + airflow_version=None, + airflow_python_version=None, is_ha=None, enable_rubix=None): ''' @@ -59,6 +61,10 @@ def set_engine_config(self, overrides: Airflow configuration to override the default settings.Use the following syntax for overrides:
.=\n
.=... + airflow_version: The airflow version. + + airflow_python_version: The python version for the environment on the cluster. + is_ha: Enabling HA config for cluster is_deeplearning : this is a deeplearning cluster config enable_rubix: Enable rubix on the cluster @@ -68,7 +74,7 @@ def set_engine_config(self, self.set_hadoop_settings(custom_hadoop_config, use_qubole_placement_policy, is_ha, fairscheduler_config_xml, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) - self.set_airflow_settings(dbtap_id, fernet_key, overrides) + self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -106,10 +112,14 @@ def set_spark_settings(self, def set_airflow_settings(self, dbtap_id=None, fernet_key=None, - overrides=None): + overrides=None, + airflow_version="1.10.0", + airflow_python_version="2.7"): self.airflow_settings['dbtap_id'] = dbtap_id self.airflow_settings['fernet_key'] = fernet_key self.airflow_settings['overrides'] = overrides + self.airflow_settings['version'] = airflow_version + self.airflow_settings['airflow_python_version'] = airflow_python_version def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) @@ -128,6 +138,8 @@ def set_engine_config_settings(self, arguments): dbtap_id=arguments.dbtap_id, fernet_key=arguments.fernet_key, overrides=arguments.overrides, + airflow_version=arguments.airflow_version, + airflow_python_version=arguments.airflow_python_version, enable_rubix=arguments.enable_rubix) @staticmethod @@ -215,4 +227,12 @@ def engine_parser(argparser): dest="overrides", default=None, help="overrides for airflow cluster", ) + airflow_settings_group.add_argument("--airflow-version", + dest="airflow_version", + default=None, + help="airflow version for airflow cluster", ) + airflow_settings_group.add_argument("--airflow-python-version", + dest="airflow_python_version", + default=None, + help="python environment version for airflow cluster", ) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 0f6f94b8..dadd5817 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -457,6 +457,27 @@ def test_spark_engine_config(self): 'custom_spark_config': 'spark-overrides'}}, 'cluster_info': {'label': ['test_label'],}}) + def test_airflow_engine_config(self): + with tempfile.NamedTemporaryFile() as temp: + temp.write("config.properties:\na=1\nb=2".encode("utf8")) + temp.flush() + sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', + '--flavour', 'airflow', '--dbtap-id', '1', '--fernet-key', '-1', '--overrides', 'airflow_overrides', '--airflow-version', '1.10.0', '--airflow-python-version', '2.7'] + Qubole.cloud = None + print_command() + Connection._api_call = Mock(return_value={}) + qds.main() + Connection._api_call.assert_called_with('POST', 'clusters', + {'engine_config': + {'flavour': 'airflow', + 'airflow_settings': { + 'dbtap_id': '1', + 'fernet_key': '-1', + 'overrides': 'airflow_overrides', + 'version': '1.10.0', + 'airflow_python_version': '2.7' + }}, + 'cluster_info': {'label': ['test_label'],}}) def test_persistent_security_groups_v2(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', From 0db727c18365640d8e7da8dc368fb307cf0faf82 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 8 May 2019 11:04:40 +0530 Subject: [PATCH 10/23] SDK-350 MultiCloud Fix for Script Location Parameter (#272) --- .travis.yml | 2 +- qds_sdk/commands.py | 27 +++++++++------------------ qds_sdk/util.py | 8 ++++++++ tests/test_command.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2607b4b0..883a43ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ install: - pip install urllib3==1.22 requests - "python setup.py install" - pip install mock - - pip install pytest + - pip install pytest==3.2.0 - if [[ $TRAVIS_PYTHON_VERSION == 2.6 ]]; then pip install unittest2; fi # command to run tests script: py.test diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index 2fa9cc27..fe0ac9a3 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -9,9 +9,7 @@ from qds_sdk.resource import Resource from qds_sdk.exception import ParseError from qds_sdk.account import Account -from qds_sdk.util import GentleOptionParser -from qds_sdk.util import OptionParsingError -from qds_sdk.util import OptionParsingExit +from qds_sdk.util import GentleOptionParser, OptionParsingError, OptionParsingExit, _is_cloud_url from optparse import SUPPRESS_HELP import boto @@ -407,8 +405,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -493,8 +490,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -627,9 +623,8 @@ def validate_script_location(cls, options): else: raise ParseError("Invalid program type %s. Please choose one from python, scala, R or sql." % str(fileExtension), cls.optparser.format_help()) - - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + + if not _is_cloud_url(options.script_location): # script location is local file so set the program as the text from the file @@ -749,8 +744,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file try: @@ -912,8 +906,7 @@ def parse(cls, args): "Both script and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -1011,8 +1004,7 @@ def parse(cls, args): "Both script and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file @@ -1333,8 +1325,7 @@ def parse(cls, args): "Both query and script_location cannot be specified", cls.optparser.format_help()) - if ((options.script_location.find("s3://") != 0) and - (options.script_location.find("s3n://") != 0)): + if not _is_cloud_url(options.script_location): # script location is local file diff --git a/qds_sdk/util.py b/qds_sdk/util.py index 1efe6865..b79eb70a 100755 --- a/qds_sdk/util.py +++ b/qds_sdk/util.py @@ -143,6 +143,7 @@ def underscore(word): return re.sub(r'\B((?<=[a-z])[A-Z]|[A-Z](?=[a-z]))', r'_\1', word).lower() + def _make_minimal(dictionary): """ This function removes all the keys whose value is either None or an empty @@ -159,6 +160,7 @@ def _make_minimal(dictionary): new_dict[key] = value return new_dict + def _read_file(file_path): file_content = None if file_path is not None: @@ -170,3 +172,9 @@ def _read_file(file_path): raise IOError("Unable to read %s: %s\n" % (file_path, str(e))) return file_content + +def _is_cloud_url(file_path): + cloud_prefixes = ('oci://', 'oraclebmc://', 'wasb://', 'gs://', 's3://', + 's3n://', 's3a://', 'swift://', 'adl://', 'abfs://', 'abfss://') + return file_path.startswith(cloud_prefixes) + diff --git a/tests/test_command.py b/tests/test_command.py index 80cefc8e..55198415 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -1,6 +1,7 @@ from __future__ import print_function import sys import os +import pytest if sys.version_info > (2, 7, 0): import unittest else: @@ -2054,5 +2055,33 @@ def test_result_failed_more_than_two_arguments(self): qds.main() +@pytest.mark.parametrize("script_location", [ + 'oci://some_path/file', 'oraclebmc://some_path/file', 'wasb://some_path/file', + 'gs://some_path/file', 's3://some_path/file', 's3n://some_path/file', + 's3a://some_path/file', 'swift://some_path/file', 'adl://some_path/file', + 'abfs://some_path/file', 'abfss://some_path/file']) +def test_submit_script_location_multi_cloud(script_location): + os.environ['QDS_API_TOKEN'] = 'dummy_token' + os.environ['QDS_API_URL'] = 'https://qds.api.url/api' + sys.argv = ['qds.py', 'hivecmd', 'submit', '--script_location', script_location, + '--tags', 'abc,def'] + print_command() + Connection._api_call = Mock(return_value={'id': 1234}) + qds.main() + Connection._api_call.assert_called_with('POST', 'commands', + {'macros': None, + 'hive_version': None, + 'label': None, + 'tags': ["abc", "def"], + 'sample_size': None, + 'name': None, + 'query': None, + 'command_type': 'HiveCommand', + 'can_notify': False, + 'script_location': script_location, + 'retry': 0, + 'pool': None}) + + if __name__ == '__main__': unittest.main() From d1cccf4bd8fd60c23edfd880934d987c8e486b5a Mon Sep 17 00:00:00 2001 From: Sumit Maheshwari Date: Tue, 14 May 2019 15:20:29 +0530 Subject: [PATCH 11/23] Release Version 1.11.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 176cab70..c5486fa4 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.11.0", + version="1.11.1", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 838819e186ce5a4860a8f0b0218cc97c52a814cd Mon Sep 17 00:00:00 2001 From: Sumit Maheshwari Date: Tue, 14 May 2019 15:54:51 +0530 Subject: [PATCH 12/23] SDK-XXX: Update readme --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 46a6ba14..3551794d 100644 --- a/README.rst +++ b/README.rst @@ -21,9 +21,9 @@ The SDK is available on `PyPI `_. From source ~~~~~~~~~~~ -* Download the source code: +* Get the source code: - - Either clone the project: ``git clone git@github.com:qubole/qds-sdk-py.git`` + - Either clone the project: ``git clone git@github.com:qubole/qds-sdk-py.git`` and checkout latest release tag from `Releases `_. - Or download one of the releases from https://github.com/qubole/qds-sdk-py/releases From 6951fe2b7691a337b7c1f19c9967f7e4f9710b91 Mon Sep 17 00:00:00 2001 From: Pulkit Chawla Date: Tue, 21 May 2019 11:01:03 +0530 Subject: [PATCH 13/23] SDK-352 : Add storage config params - block_volume_count and block_volume_size (#274) * SDK-352 : Add storage config params - block_volume_count and block_volume_size --- qds_sdk/cloud/oracle_bmc_cloud.py | 34 ++++++++++++++++++++++++++----- tests/test_clusterv2.py | 7 +++++-- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/qds_sdk/cloud/oracle_bmc_cloud.py b/qds_sdk/cloud/oracle_bmc_cloud.py index f71c9445..edefe0a4 100755 --- a/qds_sdk/cloud/oracle_bmc_cloud.py +++ b/qds_sdk/cloud/oracle_bmc_cloud.py @@ -29,7 +29,9 @@ def set_cloud_config(self, storage_tenant_id=None, storage_user_id=None, storage_key_finger_print=None, - storage_api_private_rsa_key=None): + storage_api_private_rsa_key=None, + block_volume_count=None, + block_volume_size=None): ''' Args: @@ -66,6 +68,10 @@ def set_cloud_config(self, storage_api_private_rsa_key: storage api private rsa key for oracle cluster + block_volume_count: count of block volumes to be mounted to an instance as reserved disks + + block_volume_size: It is the size (in GB) of each block volume to be mounted to an instance as reserved disk + ''' self.set_compute_config(use_account_compute_creds, compute_tenant_id, @@ -75,7 +81,8 @@ def set_cloud_config(self, self.set_network_config(vcn_id, subnet_id, compartment_id, image_id, availability_domain_info_map) self.set_storage_config(storage_tenant_id, storage_user_id, - storage_key_finger_print, storage_api_private_rsa_key) + storage_key_finger_print, storage_api_private_rsa_key, block_volume_count, + block_volume_size) def set_compute_config(self, use_account_compute_creds=None, @@ -115,11 +122,15 @@ def set_storage_config(self, storage_tenant_id=None, storage_user_id=None, storage_key_finger_print=None, - storage_api_private_rsa_key=None): + storage_api_private_rsa_key=None, + block_volume_count=None, + block_volume_size=None): self.storage_config['storage_tenant_id'] = storage_tenant_id self.storage_config['storage_user_id'] = storage_user_id self.storage_config['storage_key_finger_print'] = storage_key_finger_print self.storage_config['storage_api_private_rsa_key'] = storage_api_private_rsa_key + self.storage_config['block_volume_count'] = block_volume_count + self.storage_config['block_volume_size'] = block_volume_size def set_cloud_config_from_arguments(self, arguments): self.set_cloud_config(compute_tenant_id=arguments.compute_tenant_id, @@ -137,7 +148,10 @@ def set_cloud_config_from_arguments(self, arguments): storage_tenant_id=arguments.storage_tenant_id, storage_user_id=arguments.storage_user_id, storage_key_finger_print=arguments.storage_key_finger_print, - storage_api_private_rsa_key=arguments.storage_api_private_rsa_key) + storage_api_private_rsa_key=arguments.storage_api_private_rsa_key, + block_volume_count=arguments.block_volume_count, + block_volume_size=arguments.block_volume_size + ) def create_parser(self, argparser): # compute settings parser @@ -215,4 +229,14 @@ def create_parser(self, argparser): storage_config.add_argument("--storage-api-private-rsa-key", dest="storage_api_private_rsa_key", default=None, - help="storage api private rsa key for oracle cluster") \ No newline at end of file + help="storage api private rsa key for oracle cluster") + storage_config.add_argument("--block-volume-count", + dest="block_volume_count", + default=None, + help="count of block volumes to be mounted to an instance as reserved disks", + type=int) + storage_config.add_argument("--block-volume-size", + dest="block_volume_size", + default=None, + help="size (in GB) of each block volume to be mounted to an instance", + type=int) \ No newline at end of file diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index dadd5817..c1e0b413 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -114,7 +114,8 @@ def test_oracle_bmc_compute_config(self): def test_oracle_bmc_storage_config(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', '--storage-tenant-id', 'xxx11', '--storage-user-id', 'yyyy11', '--storage-key-finger-print', - 'zzz22', '--storage-api-private-rsa-key', 'aaa'] + 'zzz22', '--storage-api-private-rsa-key', 'aaa', '--block-volume-count', '1', + '--block-volume-size', '100'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -124,7 +125,9 @@ def test_oracle_bmc_storage_config(self): {'storage_key_finger_print': 'zzz22', 'storage_api_private_rsa_key': 'aaa', 'storage_user_id': 'yyyy11', - 'storage_tenant_id': 'xxx11'}}, + 'storage_tenant_id': 'xxx11', + 'block_volume_count': 1, + 'block_volume_size': 100}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_network_config(self): From f8e3548862ca31850dc439209b4a74050c1eddce Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 21 May 2019 12:57:44 +0530 Subject: [PATCH 14/23] SDK-354 Default to S3 Signature Version V4 (#273) Signature Version V2 will be deprecated by AWS which was the default for boto, configured it to use v4 signature for s3 authentication. --- qds_sdk/commands.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/qds_sdk/commands.py b/qds_sdk/commands.py index fe0ac9a3..fd715fd2 100755 --- a/qds_sdk/commands.py +++ b/qds_sdk/commands.py @@ -299,19 +299,17 @@ def get_results(self, fp=sys.stdout, inline=True, delim=None, fetch=True, qlog=N pass else: if fetch: + if not boto.config.has_section('s3'): + boto.config.add_section('s3') + boto.config.set('s3', 'use-sigv4', 'True') storage_credentials = conn.get(Account.credentials_rest_entity_path) - if storage_credentials['region_endpoint'] is not None: - boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], - aws_secret_access_key=storage_credentials['storage_secret_key'], - security_token = storage_credentials['session_token'], - host = storage_credentials['region_endpoint']) - else: - boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], - aws_secret_access_key=storage_credentials['storage_secret_key'], - security_token=storage_credentials['session_token']) - + host = storage_credentials['region_endpoint'] if storage_credentials['region_endpoint'] else "s3.amazonaws.com" + boto_conn = boto.connect_s3(aws_access_key_id=storage_credentials['storage_access_key'], + aws_secret_access_key=storage_credentials['storage_secret_key'], + security_token=storage_credentials['session_token'], + host=host) log.info("Starting download from result locations: [%s]" % ",".join(r['result_location'])) - #fetch latest value of num_result_dir + # fetch latest value of num_result_dir num_result_dir = Command.find(self.id).num_result_dir # If column/header names are not able to fetch then use include header as true From 35fe3343d7bfc760a96e92bc79c7a84f6d3c7fc5 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Tue, 21 May 2019 13:37:41 +0530 Subject: [PATCH 15/23] Release Version 1.12.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c5486fa4..12559c75 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.11.1", + version="1.12.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 87989ceeca94c90b511c22b2c22898755cdeb814 Mon Sep 17 00:00:00 2001 From: Aaditya Sharma Date: Wed, 19 Jun 2019 15:04:00 +0530 Subject: [PATCH 16/23] SDK-109 Proper Error Message when results unavailable (#276) --- qds_sdk/connection.py | 5 ++++- qds_sdk/exception.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/qds_sdk/connection.py b/qds_sdk/connection.py index ede6c412..0398acb5 100644 --- a/qds_sdk/connection.py +++ b/qds_sdk/connection.py @@ -152,9 +152,12 @@ def _handle_error(response): elif code == 422: sys.stderr.write(response.text + "\n") raise ResourceInvalid(response) - elif code in (449, 502, 503, 504): + elif code in (502, 503, 504): sys.stderr.write(response.text + "\n") raise RetryWithDelay(response) + elif code == 449: + sys.stderr.write(response.text + "\n") + raise RetryWithDelay(response, "Data requested is unavailable. Retrying ...") elif 401 <= code < 500: sys.stderr.write(response.text + "\n") raise ClientError(response) diff --git a/qds_sdk/exception.py b/qds_sdk/exception.py index 5e361902..ef06a335 100644 --- a/qds_sdk/exception.py +++ b/qds_sdk/exception.py @@ -11,8 +11,9 @@ def __init__(self, message, usage): class Error(Exception): """A general error derived from Exception.""" - def __init__(self, request): - Exception.__init__(self, request.text) + def __init__(self, request, message = ""): + response = message if message else request.text + Exception.__init__(self, response) self.request = request From 915bdea725bc1c5de06caac5270dc24dfc8ffb50 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Mon, 29 Jul 2019 19:35:27 +0530 Subject: [PATCH 17/23] fix: dev: SDK-364: Oracle Multi AD changes required in qds-sdk --- .travis.yml | 1 + qds_sdk/cloud/oracle_bmc_cloud.py | 16 ++++++++++------ tests/test_clusterv2.py | 6 +++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 883a43ed..0ef565bc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,3 +15,4 @@ install: # command to run tests script: py.test env: BOTO_CONFIG=/tmp/nowhere +dist: trusty diff --git a/qds_sdk/cloud/oracle_bmc_cloud.py b/qds_sdk/cloud/oracle_bmc_cloud.py index edefe0a4..2f0ba903 100755 --- a/qds_sdk/cloud/oracle_bmc_cloud.py +++ b/qds_sdk/cloud/oracle_bmc_cloud.py @@ -1,5 +1,6 @@ from qds_sdk.cloud.cloud import Cloud import json +import ast class OracleBmcCloud(Cloud): ''' @@ -112,11 +113,8 @@ def set_network_config(self, self.network_config['subnet_id'] = subnet_id self.network_config['compartment_id'] = compartment_id self.network_config['image_id'] = image_id - if availability_domain_info_map and availability_domain_info_map.strip(): - try: - self.network_config['availability_domain_info_map'] = json.loads(availability_domain_info_map.strip()) - except Exception as e: - raise Exception("Invalid JSON string for availability domain info map: %s" % e.message) + if availability_domain_info_map: + self.network_config['availability_domain_info_map'] = availability_domain_info_map def set_storage_config(self, storage_tenant_id=None, @@ -133,6 +131,12 @@ def set_storage_config(self, self.storage_config['block_volume_size'] = block_volume_size def set_cloud_config_from_arguments(self, arguments): + if arguments.availability_domain_info_map: + try: + arguments.availability_domain_info_map = ast.literal_eval(arguments.availability_domain_info_map) + assert isinstance(arguments.availability_domain_info_map, list) + except Exception as e: + raise Exception("Invalid List format for availability_domain_info_map: %s" % e.message) self.set_cloud_config(compute_tenant_id=arguments.compute_tenant_id, compute_user_id=arguments.compute_user_id, compute_key_finger_print=arguments.compute_key_finger_print, @@ -239,4 +243,4 @@ def create_parser(self, argparser): dest="block_volume_size", default=None, help="size (in GB) of each block volume to be mounted to an instance", - type=int) \ No newline at end of file + type=int) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index c1e0b413..eec62ade 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -148,7 +148,7 @@ def test_oracle_bmc_network_config(self): def test_oracle_bmc_network_config_az_info_map(self): sys.argv = ['qds.py', '--version', 'v2', '--cloud', 'ORACLE_BMC', 'cluster', 'create', '--label', 'test_label', '--compartment-id', 'abc-compartment', '--image-id', 'abc-image', '--vcn-id', 'vcn-1', - '--availability-domain-info-map', '{"availability_domain": "AD-1", "subnet_id": "subnet-1"}'] + '--availability-domain-info-map', str([{"availability_domain": "AD-1", "subnet_id": "subnet-1"}])] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -159,8 +159,8 @@ def test_oracle_bmc_network_config_az_info_map(self): 'compartment_id': 'abc-compartment', 'image_id': 'abc-image', 'availability_domain_info_map': - {'availability_domain': 'AD-1', - 'subnet_id': 'subnet-1'}}}, + [{'availability_domain': 'AD-1', + 'subnet_id': 'subnet-1'}]}}, 'cluster_info': {'label': ['test_label']}}) def test_oracle_bmc_location_config(self): From c5c035b6f0cea0feeaead4dddb01b2147d58d016 Mon Sep 17 00:00:00 2001 From: Ben Roubicek Date: Tue, 30 Jul 2019 23:38:53 -0700 Subject: [PATCH 18/23] Update requests and urllib library versions (#280) This fixes security vulnerabilities in current versions of these packages. --- .travis.yml | 3 +-- requirements.txt | 8 ++++++++ setup.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index 0ef565bc..7478fed1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,12 @@ language: python python: - - "2.6" - "2.7" - "3.3" - "3.4" - "3.5" # command to install dependencies install: - - pip install urllib3==1.22 requests + - pip install -r requirements.txt - "python setup.py install" - pip install mock - pip install pytest==3.2.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..54393b3e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +boto==2.45.0 +certifi==2019.6.16 +chardet==3.0.4 +idna==2.8 +inflection==0.3.1 +requests==2.21.0 +six==1.12.0 +urllib3==1.24.3 diff --git a/setup.py b/setup.py index 12559c75..6d878420 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import sys from setuptools import setup -INSTALL_REQUIRES = ['requests >=1.0.3', 'boto >=2.45.0', 'six >=1.2.0', 'urllib3 >= 1.0.2', 'inflection >= 0.3.1'] +INSTALL_REQUIRES = ['requests >=2.21.0', 'boto >=2.45.0', 'six >=1.12.0', 'urllib3 >= 1.24.3', 'inflection >= 0.3.1'] if sys.version_info < (2, 7, 0): INSTALL_REQUIRES.append('argparse>=1.1') From 76613276f9e273fa0e037994cf3f9e2583aa2169 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 31 Jul 2019 12:30:52 +0530 Subject: [PATCH 19/23] SDK-366 Drop Support for Python 2.6 (#281) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6d878420..211c8ee5 100644 --- a/setup.py +++ b/setup.py @@ -23,13 +23,13 @@ def read(fname): scripts=['bin/qds.py'], install_requires=INSTALL_REQUIRES, long_description=read('README.rst'), + python_requires='>=2.7', classifiers=[ "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", From 2d72f4be156746914c3a3aaae4bf1d68e6603079 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Wed, 31 Jul 2019 13:13:47 +0530 Subject: [PATCH 20/23] Release Version 1.13.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 211c8ee5..60d183c1 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ def read(fname): setup( name="qds_sdk", - version="1.12.0", + version="1.13.0", author="Qubole", author_email="dev@qubole.com", description=("Python SDK for coding to the Qubole Data Service API"), From 22880b24b10fe24b3a9bd638c374d1d2a7505f58 Mon Sep 17 00:00:00 2001 From: Sanket Saurav Date: Wed, 7 Aug 2019 18:57:41 +0530 Subject: [PATCH 21/23] DeepSource.io Integeration (#275) --- .deepsource.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..f464e01a --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,14 @@ +version = 1 + +test_patterns = [ + 'tests/**' +] + +exclude_patterns = [ + 'example/**' +] + +[[analyzers]] +name = 'python' +enabled = true +runtime_version = '2.x.x' From 488e3e8432af4b068b75bb09556d8c7ea9423727 Mon Sep 17 00:00:00 2001 From: Joy Lal Chattaraj Date: Sat, 10 Aug 2019 23:13:04 +0530 Subject: [PATCH 22/23] DeepSource Integeration for Commit checks and Test Coverage (#282) --- .deepsource.toml | 5 +++++ .travis.yml | 10 +++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.deepsource.toml b/.deepsource.toml index f464e01a..41837d03 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -12,3 +12,8 @@ exclude_patterns = [ name = 'python' enabled = true runtime_version = '2.x.x' + +# Test coverage analyzer +[[analyzers]] +name = "test-coverage" +enabled = true \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 7478fed1..f2104f70 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,9 +9,13 @@ install: - pip install -r requirements.txt - "python setup.py install" - pip install mock - - pip install pytest==3.2.0 + - pip install pytest==3.2.0 pytest-cov==2.6.0 - if [[ $TRAVIS_PYTHON_VERSION == 2.6 ]]; then pip install unittest2; fi + - curl https://deepsource.io/cli | sh # command to run tests -script: py.test -env: BOTO_CONFIG=/tmp/nowhere +script: + - py.test --cov=./ --cov-report xml + - ./bin/deepsource report --analyzer test-coverage --key python --value-file ./coverage.xml +env: + - BOTO_CONFIG=/tmp/nowhere DEEPSOURCE_DSN=https://c9d4fb28ce6f41798861936c25b0361e@deepsource.io dist: trusty From 4816274743a9fd74308c3c924b4dd129ce25a930 Mon Sep 17 00:00:00 2001 From: Saiyam Agarwal Date: Thu, 24 Oct 2019 17:16:53 +0530 Subject: [PATCH 23/23] spark streaming cluster support --- qds_sdk/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 3070f699..ee55e50d 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -147,7 +147,7 @@ def engine_parser(argparser): engine_group = argparser.add_argument_group("engine settings") engine_group.add_argument("--flavour", dest="flavour", - choices=["hadoop", "hadoop2", "presto", "spark", "hbase", "airflow", "deeplearning"], + choices=["hadoop", "hadoop2", "presto", "spark", "sparkstreaming", "hbase", "airflow", "deeplearning"], default=None, help="Set engine flavour")