From 19eb0bb711339ed92ec7be411c5f32d974833255 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 21:08:05 +0800 Subject: [PATCH] [Enhancement] Make region is optional in iceberg rest catalog vended credentials (backport #53860) (#53874) Co-authored-by: Smith Cruise --- .../en/data_source/catalog/iceberg_catalog.md | 80 ++++++++++++++++--- .../zh/data_source/catalog/iceberg_catalog.md | 44 ++++++++-- .../credential/CloudConfigurationFactory.java | 12 ++- .../CloudConfigurationFactoryTest.java | 13 +++ 4 files changed, 131 insertions(+), 18 deletions(-) diff --git a/docs/en/data_source/catalog/iceberg_catalog.md b/docs/en/data_source/catalog/iceberg_catalog.md index dfe33fbebf667..63c67cf6b9b90 100644 --- a/docs/en/data_source/catalog/iceberg_catalog.md +++ b/docs/en/data_source/catalog/iceberg_catalog.md @@ -235,20 +235,21 @@ Description: The secret key of your AWS IAM user. If you use the IAM user-based For information about how to choose an authentication method for accessing AWS Glue and how to configure an access control policy in the AWS IAM Console, see [Authentication parameters for accessing AWS Glue](../../integrations/authenticate_to_aws_resources.md#authentication-parameters-for-accessing-aws-glue). - + -##### Tabular +##### REST -If you use Tabular as metastore, you must specify the metastore type as REST (`"iceberg.catalog.type" = "rest"`). Configure `MetastoreParams` as follows: +If you use REST as metastore, you must specify the metastore type as REST (`"iceberg.catalog.type" = "rest"`). Configure `MetastoreParams` as follows: ```SQL "iceberg.catalog.type" = "rest", "iceberg.catalog.uri" = "", -"iceberg.catalog.credential" = "", +"iceberg.catalog.security" = "oauth2", +"iceberg.catalog.oauth2.credential" = "", "iceberg.catalog.warehouse" = "" ``` -`MetastoreParams` for Tabular: +`MetastoreParams` for REST catalog: ###### iceberg.catalog.type @@ -258,18 +259,51 @@ Description: The type of metastore that you use for your Iceberg cluster. Set th ###### iceberg.catalog.uri Required: Yes -Description: The URI of the Tabular service endpoint. Example: `https://api.tabular.io/ws`. +Description: The URI of the REST service endpoint. Example: `https://api.tabular.io/ws`. -###### iceberg.catalog.credential +###### iceberg.catalog.security -Required: Yes -Description: The authentication information of the Tabular service. +Required: No + +Description: The type of authorization protocol to use. Default: `NONE`. Valid value: `OAUTH2`, which requires either a `token` or `credential`. + +###### iceberg.catalog.oauth2.token + +Required: No + +Description: The bearer token used for interactions with the server. A `token` or `credential` is required for `OAUTH2` authorization protocol. Example: `AbCdEf123456`. + +###### iceberg.catalog.oauth2.credential + +Required: No + +Description: The credential to exchange for a token in the OAuth2 client credentials flow with the server. A `token` or `credential` is required for `OAUTH2` authorization protocol. Example: `AbCdEf123456`. + +###### iceberg.catalog.oauth2.scope + +Required: No + +Description: Scope to be used when communicating with the REST Catalog. Applicable only when `credential` is used. + +###### iceberg.catalog.oauth2.server-uri + +Required: No + +Description: The endpoint to retrieve access token from OAuth2 Server. + +###### iceberg.catalog.vended-credentials-enabled + +Required: No + +Description: Whether to support querying objects under nested namespace. Default: `true`. ###### iceberg.catalog.warehouse Required: No Description: The warehouse location or identifier of the Iceberg catalog. Example: `s3://my_bucket/warehouse_location` or `sandbox`. + + The following example creates an Iceberg catalog named `tabular` that uses Tabular as metastore: ```SQL @@ -283,6 +317,34 @@ PROPERTIES "iceberg.catalog.warehouse" = "sandbox" ); ``` +The following example creates an Iceberg catalog named `smith_polaris` that uses Polaris as metastore: + +```sql +CREATE EXTERNAL CATALOG smith_polaris +PROPERTIES ( + "iceberg.catalog.uri" = "http://xxx.xx.xx.xxx:8181/api/catalog", + "type" = "iceberg", + "iceberg.catalog.type" = "rest", + "iceberg.catalog.warehouse" = "starrocks_catalog", + "iceberg.catalog.security" = "oauth2", + "iceberg.catalog.oauth2.credential" = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "iceberg.catalog.oauth2.scope"='PRINCIPAL_ROLE:ALL' + ); + +# `ns1.ns2.tpch_namespace` is a nested namespace +create table smith_polaris.`ns1.ns2.tpch_namespace`.tbl (c1 string); + +mysql> select * from smith_polaris.`ns1.ns2.tpch_namespace`.tbl; ++------+ +| c1 | ++------+ +| 1 | +| 2 | +| 3 | ++------+ +3 rows in set (0.34 sec) +``` + diff --git a/docs/zh/data_source/catalog/iceberg_catalog.md b/docs/zh/data_source/catalog/iceberg_catalog.md index 019777ca5eacc..110ae9023cf6d 100644 --- a/docs/zh/data_source/catalog/iceberg_catalog.md +++ b/docs/zh/data_source/catalog/iceberg_catalog.md @@ -170,14 +170,15 @@ StarRocks 访问 Iceberg 集群元数据服务的相关参数配置。 有关如何选择用于访问 AWS Glue 的鉴权方式、以及如何在 AWS IAM 控制台配置访问控制策略,参见[访问 AWS Glue 的认证参数](../../integrations/authenticate_to_aws_resources.md#访问-aws-glue-的认证参数)。 -##### Tabular +##### REST -如果您使用 Tabular 作为元数据服务,则必须设置元数据服务的类型为 REST (`"iceberg.catalog.type" = "rest"`),请按如下配置 `MetastoreParams`: +如果您使用 REST 作为元数据服务,则必须设置元数据服务的类型为 REST (`"iceberg.catalog.type" = "rest"`),请按如下配置 `MetastoreParams`: ```SQL "iceberg.catalog.type" = "rest", "iceberg.catalog.uri" = "", -"iceberg.catalog.credential" = "", +"iceberg.catalog.security" = "oauth2", +"iceberg.catalog.oauth2.credential" = "", "iceberg.catalog.warehouse" = "" ``` @@ -186,8 +187,13 @@ StarRocks 访问 Iceberg 集群元数据服务的相关参数配置。 | 参数 | 是否必须 | 说明 | | -------------------------- | ------ | ------------------------------------------------------------ | | iceberg.catalog.type | 是 | Iceberg 集群所使用的元数据服务的类型。设置为 `rest`。 | -| iceberg.catalog.uri | 是 | Tabular 服务 Endpoint 的 URI,如 `https://api.tabular.io/ws`。 | -| iceberg.catalog.credential | 是 | Tabular 服务的认证信息。 | +| iceberg.catalog.uri | 是 | REST 服务 Endpoint 的 URI,如 `https://api.tabular.io/ws`。 | +| iceberg.catalog.security | 否 | 要使用的授权协议类型。默认值:`NONE`。有效值:`OAUTH2`。使用 `OAUTH2` 需要指定 `token` 或 `credential`。 | +| iceberg.catalog.oauth2.token | 否 | 用于与服务器交互的 Bearer Token。使用 `OAUTH2` 需要指定 `token` 或 `credential`。示例:`AbCdEf123456`。 | +| iceberg.catalog.oauth2.credential | 否 | 用于与服务器的 OAuth2 客户端 Credentials Flow 交换 Token 的 Credential。使用 `OAUTH2` 需要指定 `token` 或 `credential`。示例:`AbCdEf123456`。 | +| iceberg.catalog.oauth2.scope | 否 | 与 REST Catalog 通信时使用的范围。仅在使用 `credential` 时适用。 | +| iceberg.catalog.oauth2.server-uri | 否 | 从 OAuth2 服务器获取 Token 的端点。 | +| iceberg.catalog.vended-credentials-enabled | 否 | 是否支持查询嵌套 namespace 下的对象。默认:`true`。| | iceberg.catalog.warehouse | 否 | Catalog 的仓库位置或标志符,如 `s3://my_bucket/warehouse_location` 或 `sandbox`。 | 例如,创建一个名为 `tabular` 的 Iceberg Catalog,使用 Tabular 作为元数据服务: @@ -204,6 +210,34 @@ PROPERTIES ); ``` +以下示例创建了一个名为 `smith_polaris` 的 Iceberg Catalog,使用 Polaris 作为元数据服务: + +```sql +CREATE EXTERNAL CATALOG smith_polaris +PROPERTIES ( + "iceberg.catalog.uri" = "http://xxx.xx.xx.xxx:8181/api/catalog", + "type" = "iceberg", + "iceberg.catalog.type" = "rest", + "iceberg.catalog.warehouse" = "starrocks_catalog", + "iceberg.catalog.security" = "oauth2", + "iceberg.catalog.oauth2.credential" = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "iceberg.catalog.oauth2.scope"='PRINCIPAL_ROLE:ALL' + ); + +# `ns1.ns2.tpch_namespace` 为嵌套 namespace +create table smith_polaris.`ns1.ns2.tpch_namespace`.tbl (c1 string); + +mysql> select * from smith_polaris.`ns1.ns2.tpch_namespace`.tbl; ++------+ +| c1 | ++------+ +| 1 | +| 2 | +| 3 | ++------+ +3 rows in set (0.34 sec) +``` + #### StorageCredentialParams StarRocks 访问 Iceberg 集群文件存储的相关参数配置。 diff --git a/fe/fe-core/src/main/java/com/starrocks/credential/CloudConfigurationFactory.java b/fe/fe-core/src/main/java/com/starrocks/credential/CloudConfigurationFactory.java index 4a0805f31c206..c45ceb2e84f66 100644 --- a/fe/fe-core/src/main/java/com/starrocks/credential/CloudConfigurationFactory.java +++ b/fe/fe-core/src/main/java/com/starrocks/credential/CloudConfigurationFactory.java @@ -89,13 +89,17 @@ public static CloudConfiguration buildCloudConfigurationForVendedCredentials(Map String sessionSk = properties.getOrDefault(S3FileIOProperties.SECRET_ACCESS_KEY, null); String sessionToken = properties.getOrDefault(S3FileIOProperties.SESSION_TOKEN, null); String region = properties.getOrDefault(AwsClientProperties.CLIENT_REGION, null); - String enablePathStyle = properties.getOrDefault(S3FileIOProperties.PATH_STYLE_ACCESS, "false"); - if (sessionAk != null && sessionSk != null && sessionToken != null && region != null) { + String enablePathStyle = properties.getOrDefault(S3FileIOProperties.PATH_STYLE_ACCESS, null); + if (sessionAk != null && sessionSk != null && sessionToken != null) { copiedProperties.put(CloudConfigurationConstants.AWS_S3_ACCESS_KEY, sessionAk); copiedProperties.put(CloudConfigurationConstants.AWS_S3_SECRET_KEY, sessionSk); copiedProperties.put(CloudConfigurationConstants.AWS_S3_SESSION_TOKEN, sessionToken); - copiedProperties.put(CloudConfigurationConstants.AWS_S3_REGION, region); - copiedProperties.put(CloudConfigurationConstants.AWS_S3_ENABLE_PATH_STYLE_ACCESS, enablePathStyle); + if (region != null) { + copiedProperties.put(CloudConfigurationConstants.AWS_S3_REGION, region); + } + if (enablePathStyle != null) { + copiedProperties.put(CloudConfigurationConstants.AWS_S3_ENABLE_PATH_STYLE_ACCESS, enablePathStyle); + } } return buildCloudConfigurationForStorage(copiedProperties); } diff --git a/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java b/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java index b4f6793f8b339..938156e301219 100644 --- a/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/credential/CloudConfigurationFactoryTest.java @@ -47,6 +47,19 @@ public void testBuildCloudConfigurationForVendedCredentials() { "sessionToken='token', iamRoleArn='', stsRegion='', stsEndpoint='', externalId='', " + "region='region', endpoint=''}, enablePathStyleAccess=true, enableSSL=true}", cloudConfiguration.toConfString()); + + map.remove(AwsClientProperties.CLIENT_REGION); + map.remove(S3FileIOProperties.PATH_STYLE_ACCESS); + cloudConfiguration = CloudConfigurationFactory.buildCloudConfigurationForVendedCredentials(map); + Assert.assertNotNull(cloudConfiguration); + Assert.assertEquals(CloudType.AWS, cloudConfiguration.getCloudType()); + Assert.assertEquals( + "AWSCloudConfiguration{resources='', jars='', hdpuser='', " + + "cred=AWSCloudCredential{useAWSSDKDefaultBehavior=false, " + + "useInstanceProfile=false, accessKey='ak', secretKey='sk', " + + "sessionToken='token', iamRoleArn='', stsRegion='', stsEndpoint='', externalId='', " + + "region='us-east-1', endpoint=''}, enablePathStyleAccess=false, enableSSL=true}", + cloudConfiguration.toConfString()); } @Test