From ba362da0e34939d49b161d8884151b1e773aef35 Mon Sep 17 00:00:00 2001 From: kukushking Date: Thu, 28 Sep 2023 21:42:41 +0100 Subject: [PATCH 1/4] feat: Redshift data api - allow all auth combinations (#2475) * feat: Redshift data api - allow all auth combinations Signed-off-by: Anton Kukushkin * Better error message Signed-off-by: Anton Kukushkin * Formatting Signed-off-by: Anton Kukushkin --------- Signed-off-by: Anton Kukushkin --- awswrangler/data_api/redshift.py | 25 ++++++++++++++----------- tests/unit/test_data_api.py | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/awswrangler/data_api/redshift.py b/awswrangler/data_api/redshift.py index 8988d5340..707185b48 100644 --- a/awswrangler/data_api/redshift.py +++ b/awswrangler/data_api/redshift.py @@ -93,8 +93,12 @@ def _validate_redshift_target(self) -> None: raise ValueError("Either `cluster_id` or `workgroup_name`(Redshift Serverless) must be set for connection") def _validate_auth_method(self) -> None: - if not self.workgroup_name and not self.secret_arn and not self.db_user: - raise ValueError("Either `secret_arn` or `db_user` must be set for authentication") + if not self.workgroup_name and not self.secret_arn and not self.db_user and not self.cluster_id: + raise exceptions.InvalidArgumentCombination( + "Either `secret_arn`, `workgroup_name`, `db_user`, or `cluster_id` must be set for authentication." + ) + if self.db_user and self.secret_arn: + raise exceptions.InvalidArgumentCombination("Only one of `secret_arn` or `db_user` is allowed.") def _execute_statement( self, @@ -110,26 +114,25 @@ def _execute_statement( self._validate_redshift_target() self._validate_auth_method() - credentials = {} + args = {} if self.secret_arn: - credentials = {"SecretArn": self.secret_arn} - elif self.db_user: - credentials = {"DbUser": self.db_user} + args["SecretArn"] = self.secret_arn + if self.db_user: + args["DbUser"] = self.db_user if database is None: database = self.database if self.cluster_id: - redshift_target = {"ClusterIdentifier": self.cluster_id} - elif self.workgroup_name: - redshift_target = {"WorkgroupName": self.workgroup_name} + args["ClusterIdentifier"] = self.cluster_id + if self.workgroup_name: + args["WorkgroupName"] = self.workgroup_name _logger.debug("Executing %s", sql) response = self.client.execute_statement( - **redshift_target, # type: ignore[arg-type] Database=database, Sql=sql, - **credentials, # type: ignore[arg-type] + **args, # type: ignore[arg-type] ) return response["Id"] diff --git a/tests/unit/test_data_api.py b/tests/unit/test_data_api.py index a2af98326..06134bd60 100644 --- a/tests/unit/test_data_api.py +++ b/tests/unit/test_data_api.py @@ -54,6 +54,23 @@ def test_connect_redshift_serverless_iam_role(databases_parameters: Dict[str, An assert df.shape == (1, 1) +def test_connect_redshift_cluster_iam_role(databases_parameters: Dict[str, Any]) -> None: + cluster_id = databases_parameters["redshift"]["identifier"] + database = databases_parameters["redshift"]["database"] + con = wr.data_api.redshift.connect(cluster_id=cluster_id, database=database, boto3_session=None) + df = wr.data_api.redshift.read_sql_query("SELECT 1", con=con) + assert df.shape == (1, 1) + + +def test_connect_redshift_cluster_db_user(databases_parameters: Dict[str, Any]) -> None: + cluster_id = databases_parameters["redshift"]["identifier"] + database = databases_parameters["redshift"]["database"] + db_user = databases_parameters["user"] + con = wr.data_api.redshift.connect(cluster_id=cluster_id, database=database, db_user=db_user, boto3_session=None) + df = wr.data_api.redshift.read_sql_query("SELECT 1", con=con) + assert df.shape == (1, 1) + + def test_connect_redshift_serverless_secrets_manager(databases_parameters: Dict[str, Any]) -> None: workgroup_name = databases_parameters["redshift_serverless"]["workgroup"] database = databases_parameters["redshift_serverless"]["database"] From 47f220e1ca7bb21ba28479517085f9149d071789 Mon Sep 17 00:00:00 2001 From: Abdel Jaidi Date: Fri, 29 Sep 2023 13:08:16 +0100 Subject: [PATCH 2/4] [skip ci] - fix: revert coverage to 87 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 9d877088c..fa4ce267d 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ passenv = AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN setenv = - COV_FAIL_UNDER = 82.00 + COV_FAIL_UNDER = 87.00 allowlist_externals = poetry commands_pre = poetry install --no-root --sync --extras "deltalake gremlin mysql opencypher opensearch oracle postgres redshift sparql sqlserver geopandas" From 8eb07778805b6248b1771bb5d7d3f2a8e8fe015d Mon Sep 17 00:00:00 2001 From: kukushking Date: Fri, 29 Sep 2023 14:43:22 +0100 Subject: [PATCH 3/4] docs: Update layers.rst - add cn (#2477) --- docs/source/layers.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/source/layers.rst b/docs/source/layers.rst index ea187c8f2..c118c9bb4 100644 --- a/docs/source/layers.rst +++ b/docs/source/layers.rst @@ -408,3 +408,15 @@ Version 3.4.0 +----------------+--------+-------+-----------------------------------------------------------------------------------+ | me-south-1 | 3.9 | arm64 | arn:aws:lambda:me-south-1:938046470361:layer:AWSSDKPandas-Python39-Arm64:1 | +----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-north-1 | 3.10 | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python310:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-north-1 | 3.8 | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python38:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-north-1 | 3.9 | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python39:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-northwest-1 | 3.10 | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python310:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-northwest-1 | 3.8 | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python38:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ +| cn-northwest-1 | 3.9 | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python39:1 | ++----------------+--------+-------+-----------------------------------------------------------------------------------+ From cc2d074c1277bb5d137000e97f8a7968c50e6e58 Mon Sep 17 00:00:00 2001 From: mfrench Date: Wed, 27 Sep 2023 17:08:54 -0500 Subject: [PATCH 4/4] Add columns comments to wr.athena.to_iceberg --- awswrangler/athena/_write_iceberg.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/awswrangler/athena/_write_iceberg.py b/awswrangler/athena/_write_iceberg.py index 5d5a790dc..c960384c4 100644 --- a/awswrangler/athena/_write_iceberg.py +++ b/awswrangler/athena/_write_iceberg.py @@ -35,12 +35,18 @@ def _create_iceberg_table( kms_key: Optional[str] = None, boto3_session: Optional[boto3.Session] = None, dtype: Optional[Dict[str, str]] = None, + columns_comments: Optional[Dict[str, str]] = None, ) -> None: if not path: raise exceptions.InvalidArgumentValue("Must specify table location to create the table.") columns_types, _ = catalog.extract_athena_types(df=df, index=index, dtype=dtype) - cols_str: str = ", ".join([f"{k} {v}" for k, v in columns_types.items()]) + cols_str: str = ", ".join( + [ + f"{k} {v}" if columns_comments.get(k) is None else f"{k} {v} COMMENT \'{columns_comments.get(k)}\'" + for k, v in columns_types.items() + ] + ) partition_cols_str: str = f"PARTITIONED BY ({', '.join([col for col in partition_cols])})" if partition_cols else "" table_properties_str: str = ( ", " + ", ".join([f"'{key}'='{value}'" for key, value in additional_table_properties.items()]) @@ -196,6 +202,7 @@ def to_iceberg( dtype: Optional[Dict[str, str]] = None, catalog_id: Optional[str] = None, schema_evolution: bool = False, + columns_comments: Optional[Dict[str, str]] = None, ) -> None: """ Insert into Athena Iceberg table using INSERT INTO ... SELECT. Will create Iceberg table if it does not exist. @@ -252,6 +259,8 @@ def to_iceberg( If none is provided, the AWS account ID is used by default schema_evolution: bool If True allows schema evolution for new columns or changes in column types. + columns_comments: Optional[Dict[str, str]] + Glue/Athena catalog: Columns names and the related comments (e.g. {'col0': 'Column 0.', 'col1': 'Column 1.', 'col2': 'Partition.'}) Returns ------- @@ -314,6 +323,7 @@ def to_iceberg( kms_key=kms_key, boto3_session=boto3_session, dtype=dtype, + columns_comments=columns_comments, ) else: schema_differences = _determine_differences( @@ -352,6 +362,7 @@ def to_iceberg( s3_additional_kwargs=s3_additional_kwargs, dtype=dtype, catalog_id=catalog_id, + glue_table_settings={"columns_comments": columns_comments} ) # Insert into iceberg table