From 3cae4cde2a838b7509c2ccc032cd9162bf996b08 Mon Sep 17 00:00:00 2001 From: fanng Date: Thu, 24 Oct 2024 18:55:55 +0800 Subject: [PATCH 1/3] add cloud storage for IcebergRESTServer docker image --- bundles/gcp-bundle/build.gradle.kts | 1 + dev/docker/iceberg-rest-server/Dockerfile | 2 +- .../iceberg-rest-server-dependency.sh | 28 +++++++ .../iceberg-rest-server/rewrite_config.py | 76 +++++++++++++++++++ .../start-iceberg-rest-server.sh | 29 +++++++ docs/iceberg-rest-service.md | 19 ++++- 6 files changed, 152 insertions(+), 3 deletions(-) create mode 100755 dev/docker/iceberg-rest-server/rewrite_config.py create mode 100755 dev/docker/iceberg-rest-server/start-iceberg-rest-server.sh diff --git a/bundles/gcp-bundle/build.gradle.kts b/bundles/gcp-bundle/build.gradle.kts index b887ef2c5a3..4ff29b84574 100644 --- a/bundles/gcp-bundle/build.gradle.kts +++ b/bundles/gcp-bundle/build.gradle.kts @@ -49,6 +49,7 @@ tasks.withType(ShadowJar::class.java) { relocate("org.apache.httpcomponents", "org.apache.gravitino.shaded.org.apache.httpcomponents") relocate("org.apache.commons", "org.apache.gravitino.shaded.org.apache.commons") relocate("com.google", "org.apache.gravitino.shaded.com.google") + relocate("com.fasterxml", "org.apache.gravitino.shaded.com.fasterxml") } tasks.jar { diff --git a/dev/docker/iceberg-rest-server/Dockerfile b/dev/docker/iceberg-rest-server/Dockerfile index d4a85915ff0..eae94c4a43d 100644 --- a/dev/docker/iceberg-rest-server/Dockerfile +++ b/dev/docker/iceberg-rest-server/Dockerfile @@ -26,4 +26,4 @@ COPY packages/gravitino-iceberg-rest-server /root/gravitino-iceberg-rest-server EXPOSE 9001 -ENTRYPOINT ["/bin/bash", "/root/gravitino-iceberg-rest-server/bin/gravitino-iceberg-rest-server.sh", "start"] +ENTRYPOINT ["/bin/bash", "/root/gravitino-iceberg-rest-server/bin/start-iceberg-rest-server.sh"] diff --git a/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh b/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh index 8581cc5be2e..5df82111401 100755 --- a/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh +++ b/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh @@ -34,6 +34,34 @@ cd distribution tar xfz gravitino-iceberg-rest-server-*.tar.gz cp -r gravitino-iceberg-rest-server*-bin ${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server +cd ${gravitino_home} +./gradlew :bundles:gcp-bundle:jar +./gradlew :bundles:aws-bundle:jar + +# prepare bundle jar +cd ${iceberg_rest_server_dir} +mkdir -p bundles +cp ${gravitino_home}/bundles/gcp-bundle/build/libs/gravitino-gcp-bundle-*.jar bundles/ +cp ${gravitino_home}/bundles/aws-bundle/build/libs/gravitino-aws-bundle-*.jar bundles/ + +iceberg_gcp_bundle="iceberg-gcp-bundle-1.5.2.jar" +if [ ! -f "bundles/${iceberg_gcp_bundle}" ]; then + wget -P bundles https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-gcp-bundle/1.5.2/${iceberg_gcp_bundle} +fi + +iceberg_aws_bundle="iceberg-aws-bundle-1.5.2.jar" +if [ ! -f "bundles/${iceberg_aws_bundle}" ]; then + wget -P bundles https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.5.2/${iceberg_aws_bundle} +fi + +# download jdbc driver +wget -P bundles https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.42.0.0/sqlite-jdbc-3.42.0.0.jar + +cp bundles/*jar ${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server/libs/ + +cp start-iceberg-rest-server.sh ${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server/bin/ +cp rewrite_config.py ${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server/bin/ + # Keeping the container running at all times cat <> "${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server/bin/gravitino-iceberg-rest-server.sh" diff --git a/dev/docker/iceberg-rest-server/rewrite_config.py b/dev/docker/iceberg-rest-server/rewrite_config.py new file mode 100755 index 00000000000..7cbe241e101 --- /dev/null +++ b/dev/docker/iceberg-rest-server/rewrite_config.py @@ -0,0 +1,76 @@ +#!/usr/bin/env +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +env_map = { + "GRAVITINO_IO_IMPL" : "io-impl", + "GRAVITINO_URI" : "uri", + "GRAVITINO_WAREHOUSE" : "warehouse", + "GRAVITINO_CREDENTIAL_PROVIDER_TYPE" : "credential-provider-type", + "GRAVITINO_GCS_CREDENTIAL_FILE_PATH" : "gcs-credential-file-path", + "GRAVITINO_S3_ACCESS_KEY" : "s3-access-key-id", + "GRAVITINO_S3_SECRET_KEY" : "s3-secret-access-key", + "GRAVITINO_S3_REGION" : "s3-region", + "GRAVITINO_S3_ROLE_ARN" : "s3-role-arn", + "GRAVITINO_S3_EXTERNAL_ID" : "s3-external-id" +} + + +def parse_config_file(file_path): + config_map = {} + with open(file_path, 'r') as file: + for line in file: + stripped_line = line.strip() + if stripped_line and not stripped_line.startswith('#'): + key, value = stripped_line.split('=') + key = key.strip() + value = value.strip() + config_map[key] = value + return config_map + +config_prefix = "gravitino.iceberg-rest." + +def update_config(config, key, value): + config[config_prefix + key] = value + +config_file_path = 'conf/gravitino-iceberg-rest-server.conf' +config_map = parse_config_file(config_file_path) + +update_config(config_map, "catalog-backend", "jdbc") +update_config(config_map, "jdbc-driver", "org.sqlite.JDBC") +update_config(config_map, "uri", "jdbc:sqlite::memory:") +update_config(config_map, "jdbc-user", "iceberg") +update_config(config_map, "jdbc-password", "iceberg") +update_config(config_map, "jdbc-initialize", "true") +update_config(config_map, "jdbc.schema-version", "V1") + +for k, v in env_map.items(): + if k in os.environ: + update_config(config_map, v, os.environ[k]) + +# for key, value in config_map.items(): +# print(f"{key}: {value}") + +if os.path.exists(config_file_path): + os.remove(config_file_path) + +with open(config_file_path, 'w') as file: + for key, value in config_map.items(): + line = "{} = {}\n".format(key, value) + file.write(line) diff --git a/dev/docker/iceberg-rest-server/start-iceberg-rest-server.sh b/dev/docker/iceberg-rest-server/start-iceberg-rest-server.sh new file mode 100755 index 00000000000..449ed5ebf59 --- /dev/null +++ b/dev/docker/iceberg-rest-server/start-iceberg-rest-server.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex +bin_dir="$(dirname "${BASH_SOURCE-$0}")" +iceberg_rest_server_dir="$(cd "${bin_dir}/../">/dev/null; pwd)" + +cd ${iceberg_rest_server_dir} + +python bin/rewrite_config.py + +./bin/gravitino-iceberg-rest-server.sh start diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 01f651449be..3cacc421d09 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -388,10 +388,25 @@ SELECT * FROM dml.test; You could run Gravitino Iceberg REST server though docker container: ```shell -docker run -d -p 9001:9001 apache/gravitino-iceberg-rest:0.6.0 +docker run -d -p 9001:9001 apache/gravitino-iceberg-rest:0.7.0-incubating ``` -Or build it manually to add custom logics: +Gravitino Iceberg REST server in docker image could access local fs by default, you could change the configuration by environment variables to access S3 and GCS storage. + +| Environment variables | Configuration items | Since version | +|--------------------------------------|---------------------------------------------------|-------------------| +| `GRAVITINO_IO_IMPL` | `gravitino.iceberg-rest.io-impl` | 0.7.0-incubating | +| `GRAVITINO_URI` | `gravitino.iceberg-rest.uri` | 0.7.0-incubating | +| `GRAVITINO_WAREHOUSE` | `gravitino.iceberg-rest.warehouse` | 0.7.0-incubating | +| `GRAVITINO_CREDENTIAL_PROVIDER_TYPE` | `gravitino.iceberg-rest.credential-provider-type` | 0.7.0-incubating | +| `GRAVITINO_GCS_CREDENTIAL_FILE_PATH` | `gravitino.iceberg-rest.gcs-credential-file-path` | 0.7.0-incubating | +| `GRAVITINO_S3_ACCESS_KEY` | `gravitino.iceberg-rest.s3-access-key-id` | 0.7.0-incubating | +| `GRAVITINO_S3_SECRET_KEY` | `gravitino.iceberg-rest.s3-secret-access-key` | 0.7.0-incubating | +| `GRAVITINO_S3_REGION` | `gravitino.iceberg-rest.s3-region` | 0.7.0-incubating | +| `GRAVITINO_S3_ROLE_ARN` | `gravitino.iceberg-rest.s3-role-arn` | 0.7.0-incubating | +| `GRAVITINO_S3_EXTERNAL_ID` | `gravitino.iceberg-rest.s3-external-id` | 0.7.0-incubating | + +Or build it manually to add custom configuration or logics: ```shell sh ./dev/docker/build-docker.sh --platform linux/arm64 --type iceberg-rest-server --image apache/gravitino-iceberg-rest --tag 0.6.0 From e130cda397e20ce4bc50beae37b28c8f3a496d28 Mon Sep 17 00:00:00 2001 From: fanng Date: Tue, 29 Oct 2024 18:34:53 +0800 Subject: [PATCH 2/3] polish --- dev/docker/iceberg-rest-server/rewrite_config.py | 3 --- docs/iceberg-rest-service.md | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/dev/docker/iceberg-rest-server/rewrite_config.py b/dev/docker/iceberg-rest-server/rewrite_config.py index 7cbe241e101..3822ef413ef 100755 --- a/dev/docker/iceberg-rest-server/rewrite_config.py +++ b/dev/docker/iceberg-rest-server/rewrite_config.py @@ -64,9 +64,6 @@ def update_config(config, key, value): if k in os.environ: update_config(config_map, v, os.environ[k]) -# for key, value in config_map.items(): -# print(f"{key}: {value}") - if os.path.exists(config_file_path): os.remove(config_file_path) diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 3cacc421d09..977fd1d60d5 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -391,7 +391,7 @@ You could run Gravitino Iceberg REST server though docker container: docker run -d -p 9001:9001 apache/gravitino-iceberg-rest:0.7.0-incubating ``` -Gravitino Iceberg REST server in docker image could access local fs by default, you could change the configuration by environment variables to access S3 and GCS storage. +Gravitino Iceberg REST server in docker image could access local storage, you could change the configuration by environment variables to access S3 or GCS storage, please refer to storage section for more details. | Environment variables | Configuration items | Since version | |--------------------------------------|---------------------------------------------------|-------------------| @@ -409,7 +409,7 @@ Gravitino Iceberg REST server in docker image could access local fs by default, Or build it manually to add custom configuration or logics: ```shell -sh ./dev/docker/build-docker.sh --platform linux/arm64 --type iceberg-rest-server --image apache/gravitino-iceberg-rest --tag 0.6.0 +sh ./dev/docker/build-docker.sh --platform linux/arm64 --type iceberg-rest-server --image apache/gravitino-iceberg-rest --tag 0.7.0-incubating ``` You could try Spark with Gravitino REST catalog service in our [playground](./how-to-use-the-playground.md#using-apache-iceberg-rest-service). From bf65dcca3b227e5acc036d37135be714a75a95ec Mon Sep 17 00:00:00 2001 From: fanng Date: Wed, 30 Oct 2024 08:48:51 +0800 Subject: [PATCH 3/3] polish --- .../iceberg-rest-server-dependency.sh | 6 +++--- .../iceberg-rest-server/rewrite_config.py | 19 ++++++++++++------- docs/docker-image-details.md | 8 +++++++- docs/iceberg-rest-service.md | 2 +- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh b/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh index 5df82111401..5d00157862b 100755 --- a/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh +++ b/dev/docker/iceberg-rest-server/iceberg-rest-server-dependency.sh @@ -46,16 +46,16 @@ cp ${gravitino_home}/bundles/aws-bundle/build/libs/gravitino-aws-bundle-*.jar bu iceberg_gcp_bundle="iceberg-gcp-bundle-1.5.2.jar" if [ ! -f "bundles/${iceberg_gcp_bundle}" ]; then - wget -P bundles https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-gcp-bundle/1.5.2/${iceberg_gcp_bundle} + curl -L -s -o bundles/${iceberg_gcp_bundle} https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-gcp-bundle/1.5.2/${iceberg_gcp_bundle} fi iceberg_aws_bundle="iceberg-aws-bundle-1.5.2.jar" if [ ! -f "bundles/${iceberg_aws_bundle}" ]; then - wget -P bundles https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.5.2/${iceberg_aws_bundle} + curl -L -s -o bundles/${iceberg_aws_bundle} https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/1.5.2/${iceberg_aws_bundle} fi # download jdbc driver -wget -P bundles https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.42.0.0/sqlite-jdbc-3.42.0.0.jar +curl -L -s -o bundles/sqlite-jdbc-3.42.0.0.jar https://repo1.maven.org/maven2/org/xerial/sqlite-jdbc/3.42.0.0/sqlite-jdbc-3.42.0.0.jar cp bundles/*jar ${iceberg_rest_server_dir}/packages/gravitino-iceberg-rest-server/libs/ diff --git a/dev/docker/iceberg-rest-server/rewrite_config.py b/dev/docker/iceberg-rest-server/rewrite_config.py index 3822ef413ef..dce5479cf08 100755 --- a/dev/docker/iceberg-rest-server/rewrite_config.py +++ b/dev/docker/iceberg-rest-server/rewrite_config.py @@ -31,6 +31,16 @@ "GRAVITINO_S3_EXTERNAL_ID" : "s3-external-id" } +init_config = { + "catalog-backend" : "jdbc", + "jdbc-driver" : "org.sqlite.JDBC", + "uri" : "jdbc:sqlite::memory:", + "jdbc-user" : "iceberg", + "jdbc-password" : "iceberg", + "jdbc-initialize" : "true", + "jdbc.schema-version" : "V1" +} + def parse_config_file(file_path): config_map = {} @@ -52,13 +62,8 @@ def update_config(config, key, value): config_file_path = 'conf/gravitino-iceberg-rest-server.conf' config_map = parse_config_file(config_file_path) -update_config(config_map, "catalog-backend", "jdbc") -update_config(config_map, "jdbc-driver", "org.sqlite.JDBC") -update_config(config_map, "uri", "jdbc:sqlite::memory:") -update_config(config_map, "jdbc-user", "iceberg") -update_config(config_map, "jdbc-password", "iceberg") -update_config(config_map, "jdbc-initialize", "true") -update_config(config_map, "jdbc.schema-version", "V1") +for k, v in init_config.items(): + update_config(config_map, k, v) for k, v in env_map.items(): if k in os.environ: diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md index cad304657ac..5344d656ce6 100644 --- a/docs/docker-image-details.md +++ b/docs/docker-image-details.md @@ -51,11 +51,17 @@ You can deploy the standalone Gravitino Iceberg REST server with the Docker imag Container startup commands ```shell -docker run --rm -d -p 9001:9001 apache/gravitino-iceberg-rest:0.6.1-incubating +docker run --rm -d -p 9001:9001 apache/gravitino-iceberg-rest:0.7.0-incubating ``` Changelog +- apache/gravitino-iceberg-rest:0.7.0-incubating + - Using JDBC catalog backend. + - Supports S3 and GCS storage. + - Supports credential vending. + - Supports changing configuration by environment variables. + - apache/gravitino-iceberg-rest:0.6.1-incubating - Based on Gravitino 0.6.1-incubating, you can know more information from 0.6.1-incubating release notes. diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 977fd1d60d5..d55061c4291 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -391,7 +391,7 @@ You could run Gravitino Iceberg REST server though docker container: docker run -d -p 9001:9001 apache/gravitino-iceberg-rest:0.7.0-incubating ``` -Gravitino Iceberg REST server in docker image could access local storage, you could change the configuration by environment variables to access S3 or GCS storage, please refer to storage section for more details. +Gravitino Iceberg REST server in docker image could access local storage by default, you could set the following environment variables if the storage is cloud/remote storage like S3, please refer to [storage section](#storage) for more details. | Environment variables | Configuration items | Since version | |--------------------------------------|---------------------------------------------------|-------------------|