From ba362da0e34939d49b161d8884151b1e773aef35 Mon Sep 17 00:00:00 2001
From: kukushking <kukushkin.anton@gmail.com>
Date: Thu, 28 Sep 2023 21:42:41 +0100
Subject: [PATCH 1/4] feat: Redshift data api - allow all auth combinations
 (#2475)

* feat: Redshift data api - allow all auth combinations

Signed-off-by: Anton Kukushkin <kukushkin.anton@gmail.com>

* Better error message

Signed-off-by: Anton Kukushkin <kukushkin.anton@gmail.com>

* Formatting

Signed-off-by: Anton Kukushkin <kukushkin.anton@gmail.com>

---------

Signed-off-by: Anton Kukushkin <kukushkin.anton@gmail.com>
---
 awswrangler/data_api/redshift.py | 25 ++++++++++++++-----------
 tests/unit/test_data_api.py      | 17 +++++++++++++++++
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/awswrangler/data_api/redshift.py b/awswrangler/data_api/redshift.py
index 8988d5340..707185b48 100644
--- a/awswrangler/data_api/redshift.py
+++ b/awswrangler/data_api/redshift.py
@@ -93,8 +93,12 @@ def _validate_redshift_target(self) -> None:
             raise ValueError("Either `cluster_id` or `workgroup_name`(Redshift Serverless) must be set for connection")
 
     def _validate_auth_method(self) -> None:
-        if not self.workgroup_name and not self.secret_arn and not self.db_user:
-            raise ValueError("Either `secret_arn` or `db_user` must be set for authentication")
+        if not self.workgroup_name and not self.secret_arn and not self.db_user and not self.cluster_id:
+            raise exceptions.InvalidArgumentCombination(
+                "Either `secret_arn`, `workgroup_name`, `db_user`, or `cluster_id` must be set for authentication."
+            )
+        if self.db_user and self.secret_arn:
+            raise exceptions.InvalidArgumentCombination("Only one of `secret_arn` or `db_user` is allowed.")
 
     def _execute_statement(
         self,
@@ -110,26 +114,25 @@ def _execute_statement(
 
         self._validate_redshift_target()
         self._validate_auth_method()
-        credentials = {}
+        args = {}
         if self.secret_arn:
-            credentials = {"SecretArn": self.secret_arn}
-        elif self.db_user:
-            credentials = {"DbUser": self.db_user}
+            args["SecretArn"] = self.secret_arn
+        if self.db_user:
+            args["DbUser"] = self.db_user
 
         if database is None:
             database = self.database
 
         if self.cluster_id:
-            redshift_target = {"ClusterIdentifier": self.cluster_id}
-        elif self.workgroup_name:
-            redshift_target = {"WorkgroupName": self.workgroup_name}
+            args["ClusterIdentifier"] = self.cluster_id
+        if self.workgroup_name:
+            args["WorkgroupName"] = self.workgroup_name
 
         _logger.debug("Executing %s", sql)
         response = self.client.execute_statement(
-            **redshift_target,  # type: ignore[arg-type]
             Database=database,
             Sql=sql,
-            **credentials,  # type: ignore[arg-type]
+            **args,  # type: ignore[arg-type]
         )
         return response["Id"]
 
diff --git a/tests/unit/test_data_api.py b/tests/unit/test_data_api.py
index a2af98326..06134bd60 100644
--- a/tests/unit/test_data_api.py
+++ b/tests/unit/test_data_api.py
@@ -54,6 +54,23 @@ def test_connect_redshift_serverless_iam_role(databases_parameters: Dict[str, An
     assert df.shape == (1, 1)
 
 
+def test_connect_redshift_cluster_iam_role(databases_parameters: Dict[str, Any]) -> None:
+    cluster_id = databases_parameters["redshift"]["identifier"]
+    database = databases_parameters["redshift"]["database"]
+    con = wr.data_api.redshift.connect(cluster_id=cluster_id, database=database, boto3_session=None)
+    df = wr.data_api.redshift.read_sql_query("SELECT 1", con=con)
+    assert df.shape == (1, 1)
+
+
+def test_connect_redshift_cluster_db_user(databases_parameters: Dict[str, Any]) -> None:
+    cluster_id = databases_parameters["redshift"]["identifier"]
+    database = databases_parameters["redshift"]["database"]
+    db_user = databases_parameters["user"]
+    con = wr.data_api.redshift.connect(cluster_id=cluster_id, database=database, db_user=db_user, boto3_session=None)
+    df = wr.data_api.redshift.read_sql_query("SELECT 1", con=con)
+    assert df.shape == (1, 1)
+
+
 def test_connect_redshift_serverless_secrets_manager(databases_parameters: Dict[str, Any]) -> None:
     workgroup_name = databases_parameters["redshift_serverless"]["workgroup"]
     database = databases_parameters["redshift_serverless"]["database"]

From 47f220e1ca7bb21ba28479517085f9149d071789 Mon Sep 17 00:00:00 2001
From: Abdel Jaidi <jaidisido@gmail.com>
Date: Fri, 29 Sep 2023 13:08:16 +0100
Subject: [PATCH 2/4] [skip ci] - fix: revert coverage to 87

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 9d877088c..fa4ce267d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@ passenv =
        AWS_SECRET_ACCESS_KEY
        AWS_SESSION_TOKEN
 setenv =
-       COV_FAIL_UNDER = 82.00
+       COV_FAIL_UNDER = 87.00
 allowlist_externals = poetry
 commands_pre =
        poetry install --no-root --sync --extras "deltalake gremlin mysql opencypher opensearch oracle postgres redshift sparql sqlserver geopandas"

From 8eb07778805b6248b1771bb5d7d3f2a8e8fe015d Mon Sep 17 00:00:00 2001
From: kukushking <kukushkin.anton@gmail.com>
Date: Fri, 29 Sep 2023 14:43:22 +0100
Subject: [PATCH 3/4] docs: Update layers.rst - add cn (#2477)

---
 docs/source/layers.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/source/layers.rst b/docs/source/layers.rst
index ea187c8f2..c118c9bb4 100644
--- a/docs/source/layers.rst
+++ b/docs/source/layers.rst
@@ -408,3 +408,15 @@ Version 3.4.0
 +----------------+--------+-------+-----------------------------------------------------------------------------------+
 | me-south-1     | 3.9    | arm64 | arn:aws:lambda:me-south-1:938046470361:layer:AWSSDKPandas-Python39-Arm64:1        |
 +----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-north-1     | 3.10   | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python310:1          |
++----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-north-1     | 3.8    | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python38:1           |
++----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-north-1     | 3.9    | x86_64| arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python39:1           |
++----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-northwest-1 | 3.10   | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python310:1      |
++----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-northwest-1 | 3.8    | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python38:1       |
++----------------+--------+-------+-----------------------------------------------------------------------------------+
+| cn-northwest-1 | 3.9    | x86_64| arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python39:1       |
++----------------+--------+-------+-----------------------------------------------------------------------------------+

From cc2d074c1277bb5d137000e97f8a7968c50e6e58 Mon Sep 17 00:00:00 2001
From: mfrench <mfrench@bensonhill.com>
Date: Wed, 27 Sep 2023 17:08:54 -0500
Subject: [PATCH 4/4] Add columns comments to wr.athena.to_iceberg

---
 awswrangler/athena/_write_iceberg.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/awswrangler/athena/_write_iceberg.py b/awswrangler/athena/_write_iceberg.py
index 5d5a790dc..c960384c4 100644
--- a/awswrangler/athena/_write_iceberg.py
+++ b/awswrangler/athena/_write_iceberg.py
@@ -35,12 +35,18 @@ def _create_iceberg_table(
     kms_key: Optional[str] = None,
     boto3_session: Optional[boto3.Session] = None,
     dtype: Optional[Dict[str, str]] = None,
+    columns_comments: Optional[Dict[str, str]] = None,
 ) -> None:
     if not path:
         raise exceptions.InvalidArgumentValue("Must specify table location to create the table.")
 
     columns_types, _ = catalog.extract_athena_types(df=df, index=index, dtype=dtype)
-    cols_str: str = ", ".join([f"{k} {v}" for k, v in columns_types.items()])
+    cols_str: str = ", ".join(
+        [
+            f"{k} {v}" if columns_comments.get(k) is None else f"{k} {v} COMMENT \'{columns_comments.get(k)}\'"
+            for k, v in columns_types.items()
+        ]
+    )
     partition_cols_str: str = f"PARTITIONED BY ({', '.join([col for col in partition_cols])})" if partition_cols else ""
     table_properties_str: str = (
         ", " + ", ".join([f"'{key}'='{value}'" for key, value in additional_table_properties.items()])
@@ -196,6 +202,7 @@ def to_iceberg(
     dtype: Optional[Dict[str, str]] = None,
     catalog_id: Optional[str] = None,
     schema_evolution: bool = False,
+    columns_comments: Optional[Dict[str, str]] = None,
 ) -> None:
     """
     Insert into Athena Iceberg table using INSERT INTO ... SELECT. Will create Iceberg table if it does not exist.
@@ -252,6 +259,8 @@ def to_iceberg(
         If none is provided, the AWS account ID is used by default
     schema_evolution: bool
         If True allows schema evolution for new columns or changes in column types.
+    columns_comments: Optional[Dict[str, str]]
+        Glue/Athena catalog: Columns names and the related comments (e.g. {'col0': 'Column 0.', 'col1': 'Column 1.', 'col2': 'Partition.'})
 
     Returns
     -------
@@ -314,6 +323,7 @@ def to_iceberg(
                 kms_key=kms_key,
                 boto3_session=boto3_session,
                 dtype=dtype,
+                columns_comments=columns_comments,
             )
         else:
             schema_differences = _determine_differences(
@@ -352,6 +362,7 @@ def to_iceberg(
             s3_additional_kwargs=s3_additional_kwargs,
             dtype=dtype,
             catalog_id=catalog_id,
+            glue_table_settings={"columns_comments": columns_comments}
         )
 
         # Insert into iceberg table