From 36f0aa5e398ec532f657a24ffc570024d61b67e3 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Wed, 6 Dec 2023 16:12:30 +0800 Subject: [PATCH] Remove legacy Hive tests --- .github/workflows/ci.yml | 4 - plugin/trino-hive-hadoop2/bin/common.sh | 214 - .../trino-hive-hadoop2/bin/run_hive_tests.sh | 35 - plugin/trino-hive-hadoop2/bin/start_hive.sh | 21 - .../conf/docker-compose.yml | 22 - .../conf/files/core-site.xml.s3-template | 61 - .../conf/files/test_table.csv | 3 - .../conf/files/test_table.csv.bz2 | Bin 43 -> 0 bytes .../conf/files/test_table.csv.gz | Bin 29 -> 0 bytes .../conf/files/test_table.csv.lz4 | Bin 18 -> 0 bytes .../conf/files/test_table.json | 2 - .../conf/files/test_table.json.bz2 | Bin 87 -> 0 bytes .../conf/files/test_table.json.gz | Bin 73 -> 0 bytes ...table_csv_scan_range_select_pushdown_1.csv | 100 - ...table_csv_scan_range_select_pushdown_2.csv | 100 - ...table_csv_scan_range_select_pushdown_3.csv | 100 - ...ble_json_scan_range_select_pushdown_1.json | 100 - ...ble_json_scan_range_select_pushdown_2.json | 100 - ...ble_json_scan_range_select_pushdown_3.json | 100 - .../files/test_table_with_comma_delimiter.csv | 3 - .../test_table_with_comma_delimiter.csv.bz2 | Bin 51 -> 0 bytes .../test_table_with_comma_delimiter.csv.gz | Bin 78 -> 0 bytes .../conf/files/test_table_with_header.csv | 4 - .../conf/files/test_table_with_header.csv.bz2 | Bin 44 -> 0 bytes .../conf/files/test_table_with_header.csv.gz | Bin 31 -> 0 bytes .../conf/files/test_table_with_header.csv.lz4 | Bin 19 -> 0 bytes .../test_table_with_header_and_footer.csv | 7 - .../test_table_with_header_and_footer.csv.bz2 | Bin 48 -> 0 bytes .../test_table_with_header_and_footer.csv.gz | Bin 37 -> 0 bytes .../test_table_with_header_and_footer.csv.lz4 | Bin 27 -> 0 bytes .../files/test_table_with_pipe_delimiter.csv | 3 - .../test_table_with_pipe_delimiter.csv.bz2 | Bin 54 -> 0 bytes .../test_table_with_pipe_delimiter.csv.gz | Bin 78 -> 0 bytes .../conf/files/tez-site.xml | 100 - plugin/trino-hive-hadoop2/conf/files/words | 100 - .../conf/hive-tests-config-apache-hive3.sh | 1 - .../conf/hive-tests-config-hdp3.sh | 1 - .../conf/hive-tests-defaults.sh | 11 - plugin/trino-hive-hadoop2/pom.xml | 38 - .../java/io/trino/plugin/hive/TestHive.java | 184 - .../trino/plugin/hive/AbstractTestHive.java | 6368 ----------------- .../plugin/hive/AbstractTestHiveLocal.java | 351 - .../plugin/hive/TestHiveFileMetastore.java | 84 - .../metastore/glue/TestGlueHiveMetastore.java | 1605 ----- .../spark_bucketed_nation/._SUCCESS.crc | Bin 8 -> 0 bytes ...513-2b2e82ec274f_00000.c000.snappy.orc.crc | Bin 24 -> 0 bytes ...513-2b2e82ec274f_00001.c000.snappy.orc.crc | Bin 20 -> 0 bytes ...513-2b2e82ec274f_00002.c000.snappy.orc.crc | Bin 20 -> 0 bytes .../resources/spark_bucketed_nation/_SUCCESS | 0 ...85-a513-2b2e82ec274f_00000.c000.snappy.orc | Bin 1865 -> 0 bytes ...85-a513-2b2e82ec274f_00001.c000.snappy.orc | Bin 1480 -> 0 bytes ...85-a513-2b2e82ec274f_00002.c000.snappy.orc | Bin 1200 -> 0 bytes 52 files changed, 9822 deletions(-) delete mode 100644 plugin/trino-hive-hadoop2/bin/common.sh delete mode 100755 plugin/trino-hive-hadoop2/bin/run_hive_tests.sh delete mode 100755 plugin/trino-hive-hadoop2/bin/start_hive.sh delete mode 100644 plugin/trino-hive-hadoop2/conf/docker-compose.yml delete mode 100644 plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.csv.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.csv.lz4 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.json delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table.json.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_1.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_2.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_csv_scan_range_select_pushdown_3.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_1.json delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_2.json delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_json_scan_range_select_pushdown_3.json delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 delete mode 100644 plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz delete mode 100644 plugin/trino-hive-hadoop2/conf/files/tez-site.xml delete mode 100644 plugin/trino-hive-hadoop2/conf/files/words delete mode 100644 plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh delete mode 100644 plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh delete mode 100644 plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh delete mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileMetastore.java delete mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc.crc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/_SUCCESS delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc delete mode 100644 plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8fd233feea2b..42c188ebb8c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -302,10 +302,6 @@ jobs: run: | export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}" $MAVEN clean install ${MAVEN_FAST_INSTALL} ${MAVEN_GIB} -Dgib.logImpactedTo=gib-impacted.log -am -pl :trino-hive-hadoop2 - - name: Run Hive Tests - run: | - source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh && - plugin/trino-hive-hadoop2/bin/run_hive_tests.sh - name: Run Hive AWS Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.TRINO_AWS_ACCESS_KEY_ID }} diff --git a/plugin/trino-hive-hadoop2/bin/common.sh b/plugin/trino-hive-hadoop2/bin/common.sh deleted file mode 100644 index 534bc9020ec7..000000000000 --- a/plugin/trino-hive-hadoop2/bin/common.sh +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env bash - -function retry() { - local END - local EXIT_CODE - - END=$(($(date +%s) + 600)) - - while (( $(date +%s) < $END )); do - set +e - "$@" - EXIT_CODE=$? - set -e - - if [[ ${EXIT_CODE} == 0 ]]; then - break - fi - sleep 5 - done - - return ${EXIT_CODE} -} - -function hadoop_master_container() { - docker-compose -f "${DOCKER_COMPOSE_LOCATION}" ps -q hadoop-master | grep . -} - -function hadoop_master_ip() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $HADOOP_MASTER_CONTAINER -} - -function check_hadoop() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status hive-server2 | grep -i running &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status hive-metastore | grep -i running &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} netstat -lpn | grep -i 0.0.0.0:10000 &> /dev/null && - docker exec ${HADOOP_MASTER_CONTAINER} netstat -lpn | grep -i 0.0.0.0:9083 &> /dev/null -} - -function exec_in_hadoop_master_container() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} "$@" -} - -function stop_unnecessary_hadoop_services() { - HADOOP_MASTER_CONTAINER=$(hadoop_master_container) - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl status - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl stop yarn-resourcemanager - docker exec ${HADOOP_MASTER_CONTAINER} supervisorctl stop yarn-nodemanager -} - -# Expands docker compose file paths files into the format "-f $1 -f $2 ...." -# Arguments: -# $1, $2, ...: A list of docker-compose files used to start/stop containers -function expand_compose_args() { - local files=( "${@}" ) - local compose_args="" - for file in ${files[@]}; do - compose_args+=" -f ${file}" - done - echo "${compose_args}" -} - -function cleanup_docker_containers() { - local compose_args="$(expand_compose_args "$@")" - # stop containers started with "up" - docker-compose ${compose_args} down --remove-orphans - - # docker logs processes are being terminated as soon as docker container are stopped - # wait for docker logs termination - wait -} - -function cleanup_hadoop_docker_containers() { - cleanup_docker_containers "${DOCKER_COMPOSE_LOCATION}" -} - -function termination_handler() { - set +e - cleanup_docker_containers "$@" - exit 130 -} - -# Check that all arguments are the names of non-empty variables. -function check_vars() { - ( # Subshell to preserve xtrace - set +x # Disable xtrace to make the messages printed clear - local failing=0 - for arg; do - if [[ ! -v "${arg}" ]]; then - echo "error: Variable not set: ${arg}" >&2 - failing=1 - elif [[ -z "${!arg}" ]]; then - echo "error: Variable is empty: ${arg}" >&2 - failing=1 - fi - done - return "$failing" - ) -} - -SCRIPT_DIR="${BASH_SOURCE%/*}" -INTEGRATION_TESTS_ROOT="${SCRIPT_DIR}/.." -PROJECT_ROOT="${INTEGRATION_TESTS_ROOT}/../.." -DOCKER_COMPOSE_LOCATION="${INTEGRATION_TESTS_ROOT}/conf/docker-compose.yml" -source "${INTEGRATION_TESTS_ROOT}/conf/hive-tests-defaults.sh" - -# check docker and docker compose installation -docker-compose version -docker version - -# extract proxy IP -if [ -n "${DOCKER_MACHINE_NAME:-}" ] -then - PROXY=`docker-machine ip` -else - PROXY=127.0.0.1 -fi - -# Starts containers based on multiple docker compose locations -# Arguments: -# $1, $2, ...: A list of docker-compose files used to start containers -function start_docker_containers() { - local compose_args="$(expand_compose_args $@)" - # Purposefully don't surround ${compose_args} with quotes so that docker-compose infers multiple arguments - # stop already running containers - docker-compose ${compose_args} down || true - - # catch terminate signals - # trap arguments are not expanded until the trap is called, so they must be in a global variable - TRAP_ARGS="$@" - trap 'termination_handler $TRAP_ARGS' INT TERM - - # pull docker images - if [[ "${CONTINUOUS_INTEGRATION:-false}" == 'true' ]]; then - retry docker-compose ${compose_args} pull --quiet - fi - - # start containers - docker-compose ${compose_args} up -d -} - -function start_hadoop_docker_containers() { - start_docker_containers "${DOCKER_COMPOSE_LOCATION}" - - # start docker logs for hadoop container - docker-compose -f "${DOCKER_COMPOSE_LOCATION}" logs --no-color hadoop-master & - - # wait until hadoop processes is started - retry check_hadoop -} - -# $1 = base URI for table names -function create_test_tables() { - local table_name table_path - local base_path="${1:?create_test_tables requires an argument}" - base_path="${base_path%/}" # remove trailing slash - - table_name="trino_test_external_fs" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e "CREATE EXTERNAL TABLE $table_name(t_bigint bigint) LOCATION '${table_path}'" - - table_name="trino_test_external_fs_with_header" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table_with_header.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE $table_name(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='1')" - - table_name="trino_test_external_fs_with_header_and_footer" - table_path="$base_path/$table_name/" - exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}" - exec_in_hadoop_master_container hadoop fs -copyFromLocal -f /docker/files/test_table_with_header_and_footer.csv{,.gz,.bz2,.lz4} "${table_path}" - exec_in_hadoop_master_container /usr/bin/hive -e " - CREATE EXTERNAL TABLE $table_name(t_bigint bigint) - STORED AS TEXTFILE - LOCATION '${table_path}' - TBLPROPERTIES ('skip.header.line.count'='2', 'skip.footer.line.count'='2')" -} - -# $1 = basename of core-site.xml template -# other arguments are names of variables to substitute in the file -function deploy_core_site_xml() { - local template="${1:?deploy_core_site_xml expects at least one argument}" - shift - local args=() - local name value - for name; do - shift - value="${!name//\\/\\\\}" # escape \ as \\ - value="${value//|/\\|}" # escape | as \| - args+=(-e "s|%$name%|$value|g") - done - exec_in_hadoop_master_container bash -c \ - 'sed "${@:2}" "/docker/files/$1" > /etc/hadoop/conf/core-site.xml' \ - bash "$template" "${args[@]}" -} - -# Checks if Gitflow Incremental Builder (GIB) is enabled and the trino-hive-hadoop2 module should be build and/or tested -function abort_if_not_gib_impacted() { - local module=plugin/trino-hive-hadoop2 - local impacted_log=gib-impacted.log - if [ -f "$impacted_log" ] && ! grep -q "^${module}$" "$impacted_log"; then - echo >&2 "Module $module not present in $impacted_log, exiting" - exit 0 - fi - return 0 -} diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh deleted file mode 100755 index e7a82e9cf500..000000000000 --- a/plugin/trino-hive-hadoop2/bin/run_hive_tests.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail -x - -. "${BASH_SOURCE%/*}/common.sh" - -abort_if_not_gib_impacted - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -# generate test data -exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -f /docker/sql/create-test.sql - -stop_unnecessary_hadoop_services - -HADOOP_MASTER_IP=$(hadoop_master_ip) - -# run product tests -pushd "${PROJECT_ROOT}" -set +e -./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2 \ - -DHADOOP_USER_NAME=hive \ - -Dtest.metastore=localhost:9083 \ - -Dtest.database=default \ - -Dhive.metastore.thrift.client.socks-proxy="${PROXY}:1180" \ - -Dhive.hdfs.socks-proxy="${PROXY}:1180" \ - -Dhadoop-master-ip="${HADOOP_MASTER_IP}" -EXIT_CODE=$? -set -e -popd - -cleanup_hadoop_docker_containers - -exit "${EXIT_CODE}" diff --git a/plugin/trino-hive-hadoop2/bin/start_hive.sh b/plugin/trino-hive-hadoop2/bin/start_hive.sh deleted file mode 100755 index 82179fd6eb69..000000000000 --- a/plugin/trino-hive-hadoop2/bin/start_hive.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -. "${BASH_SOURCE%/*}/common.sh" - -cleanup_hadoop_docker_containers -start_hadoop_docker_containers - -HADOOP_MASTER_IP=$(hadoop_master_ip) - -# get short version of container ID (as shown by "docker ps") -CONTAINER=$(echo "${HADOOP_MASTER_CONTAINER}" | cut -b1-12) - -echo -echo "Proxy: ${PROXY}:1180" -echo "Hadoop: ${HADOOP_MASTER_IP}" -echo "Docker: ${CONTAINER}" -echo -echo "docker exec -it ${CONTAINER} bash" -echo diff --git a/plugin/trino-hive-hadoop2/conf/docker-compose.yml b/plugin/trino-hive-hadoop2/conf/docker-compose.yml deleted file mode 100644 index fe9414fe33ed..000000000000 --- a/plugin/trino-hive-hadoop2/conf/docker-compose.yml +++ /dev/null @@ -1,22 +0,0 @@ -version: '2' -services: - hadoop-master: - hostname: hadoop-master - image: '${HADOOP_BASE_IMAGE}:${DOCKER_IMAGES_VERSION}' - ports: - - '1180:1180' - - '8020:8020' # Default hadoop namenode port - - '8042:8042' - - '8088:8088' - - '9000:9000' # Default hadoop namenode port - - '9083:9083' # Metastore Thrift - - '9864:9864' # DataNode Web UI since Hadoop 3 - - '9870:9870' # NameNode Web UI since Hadoop 3 - - '10000:10000' # HiveServer2 - - '19888:19888' - - '50070:50070' # NameNode Web UI prior to Hadoop 3 - - '50075:50075' # DataNode Web UI prior to Hadoop 3 - volumes: - - ../../trino-hive/src/test/sql:/docker/sql:ro - - ./files:/docker/files:ro - - ./files/tez-site.xml:/etc/tez/conf/tez-site.xml:ro diff --git a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template b/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template deleted file mode 100644 index 984026e39e38..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/core-site.xml.s3-template +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - hadoop.proxyuser.hive.hosts - * - - - - hadoop.proxyuser.hive.groups - * - - - - fs.defaultFS - hdfs://hadoop-master:9000 - - - - fs.s3.awsAccessKeyId - %AWS_ACCESS_KEY_ID% - - - - fs.s3.awsSecretAccessKey - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.access.key - %AWS_ACCESS_KEY_ID% - - - - fs.s3a.secret.key - %AWS_SECRET_ACCESS_KEY% - - - - fs.s3a.endpoint - %S3_BUCKET_ENDPOINT% - - - diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv b/plugin/trino-hive-hadoop2/conf/files/test_table.csv deleted file mode 100644 index 0628eaa1bdc2..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table.csv +++ /dev/null @@ -1,3 +0,0 @@ -3 -14 -15 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table.csv.bz2 deleted file mode 100644 index d9d6b339f90497567e1c5c7af3e705c1e5b58024..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 43 ycmZ>Y%CIzaj8qGb?Ak1r&cMLz!N4FO@j!t=v4Mw~U2%8!PNg{-9#${y21pp250sjC1 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json b/plugin/trino-hive-hadoop2/conf/files/test_table.json deleted file mode 100644 index 6173ff8aae87..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table.json +++ /dev/null @@ -1,2 +0,0 @@ -{"col_1":2, "col_2":4} -{"col_1":5, "col_2":6} diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table.json.bz2 deleted file mode 100644 index 6b90f2081e35b18e9228caf1cab8d1d501376773..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 87 zcmV-d0I2^$T4*^jL0KkKSqQZU0RR9j+kgNNPy~Ij004*tfC?Z0P^L^wgChc}dZR*Z tMrid)wawfll8w!A_YHDp(2tTwcj$T`p+i1fpKBk*+>uTcBmyl#z(DZ`AVB~C diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table.json.gz b/plugin/trino-hive-hadoop2/conf/files/test_table.json.gz deleted file mode 100644 index ae46357976038069a4496cbac7144a0358a4d6c7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 73 zcmV-P0Ji@hiwFo{BH3a919W9`bYFB~Vr*qDYIARH0IOC?&d-TARI)PHQ2;ZIl&p-5 fYq_dn;zkxo5=KZ8hQY%CIzaj8qGbESM*3&A`BRgMmSSfu)o|fx(c6kw<-HRmTP86SupM-kInpq9UW- Ha_#^CUC$5* diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_comma_delimiter.csv.gz deleted file mode 100644 index 2d8c9cb91edd8be4f5baa9914bc278425a840a4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 78 zcmb2|=HTEHT$IGXT#{N`5?_*-l#?1?o>`I+pPZkYn;4&xnvY%CIzaj8qGb+*)B_$-uzm!N4G3rl7!JaDXG(c|quF?wv|=G(4jA$jCGR00aaK AhX4Qo diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.gz deleted file mode 100644 index 5466eaa771c0e37e364ba7c45098485cbc4478f5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 31 gcmb2|=3oE=VdE#841nm-CWD7e46?s{4{!s;0f|8gTmS$7 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header.csv.lz4 deleted file mode 100644 index 746baf57149b9fb2011d61961b0dcb2f32fefbbf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19 acmZQzVBlh4VBlV0$YpHGWo*V}#03Bl1p*QP diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv deleted file mode 100644 index c25ef79c3a63..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv +++ /dev/null @@ -1,7 +0,0 @@ -1 -2 -1 -41 -42 -4 -8 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.bz2 deleted file mode 100644 index a23e84d6c1882cb0f1b4d5678329078bcdf9eb82..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 48 zcmZ>Y%CIzaj8qGboXZz6gMop?gMmSy?tub>5?h-hgMovssz#~dqjw(5xrBrlo)%^R E05cE`EC2ui diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.gz deleted file mode 100644 index 7a439a4d1b0e87cb9ec60a88677e78a09b87984b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 37 ocmb2|=3oE=VdE!8kBm&78JRvdGJ0n8aMJ@Oh9!^R#R&ps0nE4zTmS$7 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_header_and_footer.csv.lz4 deleted file mode 100644 index d337bd0bb853827030cec5004b39bcb29a4b9b95..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27 icmZQzU=U=u0 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv deleted file mode 100644 index 0cc012bf882a..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv +++ /dev/null @@ -1,3 +0,0 @@ -1|2 -3|4 -55|66 diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.bz2 deleted file mode 100644 index df138bc6d194a6bf87b341e321dd654e30303146..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 54 zcmZ>Y%CIzaj8qGbydUs#E&~J0i3SFN$_ETA3Jit}2AVIvDxaLf>#w4AdY6%}c#*3` Izyy#w0Pr6Zl>h($ diff --git a/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz b/plugin/trino-hive-hadoop2/conf/files/test_table_with_pipe_delimiter.csv.gz deleted file mode 100644 index 6634c19f33456ab008deb5170308f2cc07880d5c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 78 zcmb2|=HR&0yda5zxg@o?B)%juDJM0)JhLPtz96$8H9jRZCo?y*B(*3$y(+UnFS)pk i!PscMp~-_yD-0hQ8Jey(+`i!n6T{q_&&-7w7#IMh^BzF} diff --git a/plugin/trino-hive-hadoop2/conf/files/tez-site.xml b/plugin/trino-hive-hadoop2/conf/files/tez-site.xml deleted file mode 100644 index 8f340b5611e9..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/tez-site.xml +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - tez.lib.uris.ignore - false - - - tez.lib.uris - file:///usr/hdp/current/tez-client/lib/tez.tar.gz - - - tez.am.mode.session - false - - - tez.am.acl.enabled - false - - - tez.am.log.level - WARN - - - tez.task.log.level - WARN - - - tez.runtime.io.sort.mb - 8 - - - tez.am.max.app.attempts - 1 - - - tez.am.task.max.failed.attempts - 1 - - - tez.shuffle-vertex-manager.min-src-fraction - 0.10 - - - tez.shuffle-vertex-manager.max-src-fraction - 1.00 - - - tez.am.launch.cmd-opts - -server -Djava.net.preferIPv4Stack=true -XX:+UseParallelGC -Dhadoop.metrics.log.level=WARN - - - tez.am.resource.memory.mb - 512 - - - tez.task.launch.cmd-opts - -server -Djava.net.preferIPv4Stack=true -XX:+UseParallelGC -Dhadoop.metrics.log.level=WARN - - - tez.task.resource.memory.mb - 512 - - - tez.task.resource.cpu.vcores - 1 - - - tez.runtime.sort.threads - 1 - - - tez.runtime.io.sort.factor - 100 - - - tez.runtime.shuffle.memory-to-memory.enable - false - - - tez.runtime.optimize.local.fetch - true - - - hive.tez.container.size - 2048 - - diff --git a/plugin/trino-hive-hadoop2/conf/files/words b/plugin/trino-hive-hadoop2/conf/files/words deleted file mode 100644 index 6d01e6112705..000000000000 --- a/plugin/trino-hive-hadoop2/conf/files/words +++ /dev/null @@ -1,100 +0,0 @@ -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x -x diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh deleted file mode 100644 index 8e05591f9118..000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-config-apache-hive3.sh +++ /dev/null @@ -1 +0,0 @@ -export HADOOP_BASE_IMAGE="ghcr.io/trinodb/testing/hive3.1-hive" diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh deleted file mode 100644 index c736e171caba..000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-config-hdp3.sh +++ /dev/null @@ -1 +0,0 @@ -export HADOOP_BASE_IMAGE="ghcr.io/trinodb/testing/hdp3.1-hive" diff --git a/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh b/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh deleted file mode 100644 index cc5a3030aee0..000000000000 --- a/plugin/trino-hive-hadoop2/conf/hive-tests-defaults.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -DEFAULT_DOCKER_VERSION=$(./mvnw help:evaluate -Dexpression=dep.docker.images.version -q -DforceStdout) - -if [ -z "$DEFAULT_DOCKER_VERSION" ]; -then - >&2 echo "Could not read dep.docker.images.version from parent POM" - exit 1 -fi - -export DOCKER_IMAGES_VERSION=${DOCKER_IMAGES_VERSION:-$DEFAULT_DOCKER_VERSION} diff --git a/plugin/trino-hive-hadoop2/pom.xml b/plugin/trino-hive-hadoop2/pom.xml index eea081792b71..54baeb2832c8 100644 --- a/plugin/trino-hive-hadoop2/pom.xml +++ b/plugin/trino-hive-hadoop2/pom.xml @@ -216,42 +216,4 @@ test - - - - default - - true - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHive.java - - - - - - - - test-hive-hadoop2 - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestHive.java - - - - - - - diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java deleted file mode 100644 index 2ae7c145603d..000000000000 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.net.HostAndPort; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.SchemaTablePrefix; -import org.apache.hadoop.net.NetUtils; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assumptions.abort; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestHive - extends AbstractTestHive -{ - @BeforeAll - public void initialize() - { - String metastore = System.getProperty("test.metastore"); - String database = System.getProperty("test.database"); - String hadoopMasterIp = System.getProperty("hadoop-master-ip"); - if (hadoopMasterIp != null) { - // Even though Hadoop is accessed by proxy, Hadoop still tries to resolve hadoop-master - // (e.g: in: NameNodeProxies.createProxy) - // This adds a static resolution for hadoop-master to docker container internal ip - NetUtils.addStaticResolution("hadoop-master", hadoopMasterIp); - } - - setup(HostAndPort.fromString(metastore), database); - } - - @Test - @Override - public void testHideDeltaLakeTables() - { - assertThatThrownBy(super::testHideDeltaLakeTables) - .hasMessageMatching("(?s)\n" + - "Expecting\n" + - " \\[.*\\b(\\w+.tmp_trino_test_trino_delta_lake_table_\\w+)\\b.*]\n" + - "not to contain\n" + - " \\[\\1]\n" + - "but found.*"); - - abort("not supported"); - } - - @Test - public void testHiveViewsHaveNoColumns() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(listTableColumns(metadata, newSession(), new SchemaTablePrefix(view.getSchemaName(), view.getTableName()))) - .isEmpty(); - } - } - - @Test - public void testHiveViewTranslationError() - { - try (Transaction transaction = newTransaction()) { - assertThatThrownBy(() -> transaction.getMetadata().getView(newSession(), view)) - .isInstanceOf(HiveViewNotSupportedException.class) - .hasMessageContaining("Hive views are not supported"); - - // TODO: combine this with tests for successful translation (currently in TestHiveViews product test) - } - } - - @Test - @Override - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testStorePartitionWithStatistics() - throws Exception - { - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, EMPTY_ROWCOUNT_STATISTICS); - } - - @Test - @Override - public void testDataColumnProperties() - { - // Column properties are currently not supported in ThriftHiveMetastore - assertThatThrownBy(super::testDataColumnProperties) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Persisting column properties is not supported: Column{name=id, type=bigint}"); - } - - @Test - @Override - public void testPartitionColumnProperties() - { - // Column properties are currently not supported in ThriftHiveMetastore - assertThatThrownBy(super::testPartitionColumnProperties) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Persisting column properties is not supported: Column{name=part_key, type=varchar(256)}"); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java deleted file mode 100644 index c5a1c2d041c0..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java +++ /dev/null @@ -1,6368 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMultimap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Sets; -import com.google.common.net.HostAndPort; -import io.airlift.json.JsonCodec; -import io.airlift.log.Logger; -import io.airlift.slice.Slice; -import io.airlift.stats.CounterStat; -import io.airlift.units.DataSize; -import io.airlift.units.Duration; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.TrinoFileSystemFactory; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.operator.GroupByHashPageIndexerFactory; -import io.trino.plugin.base.CatalogName; -import io.trino.plugin.base.metrics.LongCount; -import io.trino.plugin.hive.LocationService.WriteInfo; -import io.trino.plugin.hive.fs.DirectoryLister; -import io.trino.plugin.hive.fs.RemoteIterator; -import io.trino.plugin.hive.fs.TransactionScopeCachingDirectoryListerFactory; -import io.trino.plugin.hive.fs.TrinoFileStatus; -import io.trino.plugin.hive.fs.TrinoFileStatusRemoteIterator; -import io.trino.plugin.hive.line.LinePageSource; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.HiveMetastoreFactory; -import io.trino.plugin.hive.metastore.HivePrincipal; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo; -import io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege; -import io.trino.plugin.hive.metastore.Partition; -import io.trino.plugin.hive.metastore.PartitionWithStatistics; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore; -import io.trino.plugin.hive.metastore.SortingColumn; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore; -import io.trino.plugin.hive.metastore.cache.CachingHiveMetastoreConfig; -import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; -import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; -import io.trino.plugin.hive.orc.OrcPageSource; -import io.trino.plugin.hive.parquet.ParquetPageSource; -import io.trino.plugin.hive.rcfile.RcFilePageSource; -import io.trino.plugin.hive.security.SqlStandardAccessControlMetadata; -import io.trino.spi.Page; -import io.trino.spi.TrinoException; -import io.trino.spi.block.Block; -import io.trino.spi.connector.Assignment; -import io.trino.spi.connector.CatalogSchemaTableName; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorBucketNodeMap; -import io.trino.spi.connector.ConnectorInsertTableHandle; -import io.trino.spi.connector.ConnectorMaterializedViewDefinition; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorNodePartitioningProvider; -import io.trino.spi.connector.ConnectorOutputTableHandle; -import io.trino.spi.connector.ConnectorPageSink; -import io.trino.spi.connector.ConnectorPageSinkProvider; -import io.trino.spi.connector.ConnectorPageSource; -import io.trino.spi.connector.ConnectorPageSourceProvider; -import io.trino.spi.connector.ConnectorPartitioningHandle; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorSplit; -import io.trino.spi.connector.ConnectorSplitManager; -import io.trino.spi.connector.ConnectorSplitSource; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.ConnectorTableLayout; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.ConnectorTableProperties; -import io.trino.spi.connector.ConnectorTransactionHandle; -import io.trino.spi.connector.ConnectorViewDefinition; -import io.trino.spi.connector.ConnectorViewDefinition.ViewColumn; -import io.trino.spi.connector.Constraint; -import io.trino.spi.connector.ConstraintApplicationResult; -import io.trino.spi.connector.DiscretePredicates; -import io.trino.spi.connector.DynamicFilter; -import io.trino.spi.connector.ProjectionApplicationResult; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.SchemaTablePrefix; -import io.trino.spi.connector.SortingProperty; -import io.trino.spi.connector.TableColumnsMetadata; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.connector.TableScanRedirectApplicationResult; -import io.trino.spi.connector.ViewNotFoundException; -import io.trino.spi.expression.ConnectorExpression; -import io.trino.spi.expression.FieldDereference; -import io.trino.spi.expression.Variable; -import io.trino.spi.metrics.Metrics; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.NullableValue; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.predicate.ValueSet; -import io.trino.spi.statistics.ColumnStatistics; -import io.trino.spi.statistics.TableStatistics; -import io.trino.spi.type.ArrayType; -import io.trino.spi.type.CharType; -import io.trino.spi.type.MapType; -import io.trino.spi.type.NamedTypeSignature; -import io.trino.spi.type.RowFieldName; -import io.trino.spi.type.RowType; -import io.trino.spi.type.SqlDate; -import io.trino.spi.type.SqlTimestamp; -import io.trino.spi.type.SqlTimestampWithTimeZone; -import io.trino.spi.type.SqlVarbinary; -import io.trino.spi.type.Type; -import io.trino.spi.type.TypeId; -import io.trino.spi.type.TypeOperators; -import io.trino.spi.type.VarcharType; -import io.trino.sql.gen.JoinCompiler; -import io.trino.testing.MaterializedResult; -import io.trino.testing.MaterializedRow; -import io.trino.testing.TestingConnectorSession; -import io.trino.testing.TestingNodeManager; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.assertj.core.api.InstanceOfAssertFactories; -import org.joda.time.DateTime; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.parallel.Execution; - -import java.io.IOException; -import java.io.OutputStream; -import java.math.BigDecimal; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalDouble; -import java.util.OptionalInt; -import java.util.OptionalLong; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.stream.IntStream; -import java.util.stream.LongStream; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Iterables.concat; -import static com.google.common.collect.Iterables.getOnlyElement; -import static com.google.common.collect.Lists.newArrayList; -import static com.google.common.collect.Lists.reverse; -import static com.google.common.collect.Maps.uniqueIndex; -import static com.google.common.collect.MoreCollectors.onlyElement; -import static com.google.common.collect.Sets.difference; -import static com.google.common.collect.Streams.stream; -import static com.google.common.hash.Hashing.sha256; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.airlift.concurrent.Threads.daemonThreadsNamed; -import static io.airlift.slice.Slices.utf8Slice; -import static io.airlift.testing.Assertions.assertGreaterThan; -import static io.airlift.testing.Assertions.assertGreaterThanOrEqual; -import static io.airlift.testing.Assertions.assertInstanceOf; -import static io.airlift.testing.Assertions.assertLessThanOrEqual; -import static io.airlift.units.DataSize.Unit.KILOBYTE; -import static io.trino.parquet.reader.ParquetReader.PARQUET_CODEC_METRIC_PREFIX; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH; -import static io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics; -import static io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; -import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; -import static io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle; -import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE; -import static io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH; -import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; -import static io.trino.plugin.hive.HiveMetadata.TRINO_VERSION_NAME; -import static io.trino.plugin.hive.HiveStorageFormat.AVRO; -import static io.trino.plugin.hive.HiveStorageFormat.CSV; -import static io.trino.plugin.hive.HiveStorageFormat.JSON; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; -import static io.trino.plugin.hive.HiveStorageFormat.RCBINARY; -import static io.trino.plugin.hive.HiveStorageFormat.RCTEXT; -import static io.trino.plugin.hive.HiveStorageFormat.REGEX; -import static io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; -import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; -import static io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER; -import static io.trino.plugin.hive.HiveTestUtils.SESSION; -import static io.trino.plugin.hive.HiveTestUtils.arrayType; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories; -import static io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; -import static io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties; -import static io.trino.plugin.hive.HiveTestUtils.getTypes; -import static io.trino.plugin.hive.HiveTestUtils.mapType; -import static io.trino.plugin.hive.HiveTestUtils.rowType; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_LONG; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.HiveType.toHiveType; -import static io.trino.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; -import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; -import static io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING; -import static io.trino.plugin.hive.metastore.SortingColumn.Order.DESCENDING; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; -import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.createCachingHiveMetastore; -import static io.trino.plugin.hive.orc.OrcPageSource.ORC_CODEC_METRIC_PREFIX; -import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo; -import static io.trino.plugin.hive.util.HiveUtil.toPartitionValues; -import static io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.StandardErrorCode.TRANSACTION_CONFLICT; -import static io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST; -import static io.trino.spi.connector.SortOrder.DESC_NULLS_LAST; -import static io.trino.spi.security.PrincipalType.USER; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.BooleanType.BOOLEAN; -import static io.trino.spi.type.CharType.createCharType; -import static io.trino.spi.type.DateType.DATE; -import static io.trino.spi.type.DecimalType.createDecimalType; -import static io.trino.spi.type.DoubleType.DOUBLE; -import static io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG; -import static io.trino.spi.type.IntegerType.INTEGER; -import static io.trino.spi.type.RealType.REAL; -import static io.trino.spi.type.SmallintType.SMALLINT; -import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; -import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; -import static io.trino.spi.type.TinyintType.TINYINT; -import static io.trino.spi.type.VarbinaryType.VARBINARY; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; -import static io.trino.spi.type.VarcharType.createVarcharType; -import static io.trino.testing.DateTimeTestingUtils.sqlTimestampOf; -import static io.trino.testing.MaterializedResult.materializeSourceDataStream; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingPageSinkId.TESTING_PAGE_SINK_ID; -import static io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.lang.Float.floatToRawIntBits; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.nio.file.Files.createTempDirectory; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.Executors.newScheduledThreadPool; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static java.util.concurrent.TimeUnit.MINUTES; -import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.stream.Collectors.toList; -import static org.apache.hadoop.hive.common.FileUtils.makePartName; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.assertj.core.api.Fail.fail; -import static org.assertj.core.data.Offset.offset; -import static org.joda.time.DateTimeZone.UTC; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; -import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; - -@TestInstance(PER_CLASS) -@Execution(SAME_THREAD) // staging directory is shared mutable state -public abstract class AbstractTestHive -{ - private static final Logger log = Logger.get(AbstractTestHive.class); - - protected static final String TEMPORARY_TABLE_PREFIX = "tmp_trino_test_"; - - protected static final String INVALID_DATABASE = "totally_invalid_database_name"; - protected static final String INVALID_TABLE = "totally_invalid_table_name"; - - protected static final String TEST_SERVER_VERSION = "test_version"; - - private static final Type ARRAY_TYPE = arrayType(createUnboundedVarcharType()); - private static final Type MAP_TYPE = mapType(createUnboundedVarcharType(), BIGINT); - private static final Type ROW_TYPE = rowType(ImmutableList.of( - new NamedTypeSignature(Optional.of(new RowFieldName("f_string")), createUnboundedVarcharType().getTypeSignature()), - new NamedTypeSignature(Optional.of(new RowFieldName("f_bigint")), BIGINT.getTypeSignature()), - new NamedTypeSignature(Optional.of(new RowFieldName("f_boolean")), BOOLEAN.getTypeSignature()))); - - private static final List CREATE_TABLE_COLUMNS = ImmutableList.builder() - .add(new ColumnMetadata("id", BIGINT)) - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata("t_tinyint", TINYINT)) - .add(new ColumnMetadata("t_smallint", SMALLINT)) - .add(new ColumnMetadata("t_integer", INTEGER)) - .add(new ColumnMetadata("t_bigint", BIGINT)) - .add(new ColumnMetadata("t_float", REAL)) - .add(new ColumnMetadata("t_double", DOUBLE)) - .add(new ColumnMetadata("t_boolean", BOOLEAN)) - .add(new ColumnMetadata("t_array", ARRAY_TYPE)) - .add(new ColumnMetadata("t_map", MAP_TYPE)) - .add(new ColumnMetadata("t_row", ROW_TYPE)) - .build(); - - private static final MaterializedResult CREATE_TABLE_DATA = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), TINYINT, SMALLINT, INTEGER, BIGINT, REAL, DOUBLE, BOOLEAN, ARRAY_TYPE, MAP_TYPE, ROW_TYPE) - .row(1L, "hello", (byte) 45, (short) 345, 234, 123L, -754.1985f, 43.5, true, ImmutableList.of("apple", "banana"), ImmutableMap.of("one", 1L, "two", 2L), ImmutableList.of("true", 1L, true)) - .row(2L, null, null, null, null, null, null, null, null, null, null, null) - .row(3L, "bye", (byte) 46, (short) 346, 345, 456L, 754.2008f, 98.1, false, ImmutableList.of("ape", "bear"), ImmutableMap.of("three", 3L, "four", 4L), ImmutableList.of("false", 0L, false)) - .build(); - - protected static final List CREATE_TABLE_COLUMNS_PARTITIONED = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - protected static final Set COLUMN_NAMES_PARTITIONED = CREATE_TABLE_COLUMNS_PARTITIONED.stream().map(ColumnMetadata::getName).collect(toImmutableSet()); - - protected static final Predicate PARTITION_COLUMN_FILTER = columnName -> columnName.equals("ds") || columnName.startsWith("part_"); - - private static final MaterializedResult CREATE_TABLE_PARTITIONED_DATA = new MaterializedResult( - CREATE_TABLE_DATA.getMaterializedRows().stream() - .map(row -> new MaterializedRow(row.getPrecision(), newArrayList(concat(row.getFields(), ImmutableList.of("2015-07-0" + row.getField(0)))))) - .collect(toList()), - ImmutableList.builder() - .addAll(CREATE_TABLE_DATA.getTypes()) - .add(createUnboundedVarcharType()) - .build()); - - private static final String CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE = "2015-07-04"; - - private static final MaterializedResult CREATE_TABLE_PARTITIONED_DATA_2ND = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), TINYINT, SMALLINT, INTEGER, BIGINT, REAL, DOUBLE, BOOLEAN, ARRAY_TYPE, MAP_TYPE, ROW_TYPE, createUnboundedVarcharType()) - .row(4L, "hello", (byte) 45, (short) 345, 234, 123L, 754.1985f, 43.5, true, ImmutableList.of("apple", "banana"), ImmutableMap.of("one", 1L, "two", 2L), ImmutableList.of("true", 1L, true), CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .row(5L, null, null, null, null, null, null, null, null, null, null, null, CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .row(6L, "bye", (byte) 46, (short) 346, 345, 456L, -754.2008f, 98.1, false, ImmutableList.of("ape", "bear"), ImmutableMap.of("three", 3L, "four", 4L), ImmutableList.of("false", 0L, false), CREATE_TABLE_PARTITIONED_DATA_2ND_PARTITION_VALUE) - .build(); - - private static final List MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE = ImmutableList.builder() - .add(new ColumnMetadata("tinyint_to_smallint", TINYINT)) - .add(new ColumnMetadata("tinyint_to_integer", TINYINT)) - .add(new ColumnMetadata("tinyint_to_bigint", TINYINT)) - .add(new ColumnMetadata("smallint_to_integer", SMALLINT)) - .add(new ColumnMetadata("smallint_to_bigint", SMALLINT)) - .add(new ColumnMetadata("integer_to_bigint", INTEGER)) - .add(new ColumnMetadata("integer_to_varchar", INTEGER)) - .add(new ColumnMetadata("varchar_to_integer", createUnboundedVarcharType())) - .add(new ColumnMetadata("float_to_double", REAL)) - .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) - .build(); - - private static final List MISMATCH_SCHEMA_TABLE_BEFORE = ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE) - .add(new ColumnMetadata("struct_to_struct", toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE))) - .add(new ColumnMetadata("list_to_list", arrayType(toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE)))) - .add(new ColumnMetadata("map_to_map", mapType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE.get(1).getType(), toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_BEFORE)))) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - private static RowType toRowType(List columns) - { - return rowType(columns.stream() - .map(col -> new NamedTypeSignature(Optional.of(new RowFieldName(format("f_%s", col.getName()))), col.getType().getTypeSignature())) - .collect(toImmutableList())); - } - - private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_BEFORE = - MaterializedResult.resultBuilder(SESSION, TINYINT, TINYINT, TINYINT, SMALLINT, SMALLINT, INTEGER, INTEGER, createUnboundedVarcharType(), REAL, createUnboundedVarcharType()) - .row((byte) -11, (byte) 12, (byte) -13, (short) 14, (short) 15, -16, 17, "2147483647", 18.0f, "2016-08-01") - .row((byte) 21, (byte) -22, (byte) 23, (short) -24, (short) 25, 26, -27, "asdf", -28.0f, "2016-08-02") - .row((byte) -31, (byte) -32, (byte) 33, (short) 34, (short) -35, 36, 37, "-923", 39.5f, "2016-08-03") - .row(null, (byte) 42, (byte) 43, (short) 44, (short) -45, 46, 47, "2147483648", 49.5f, "2016-08-03") - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_BEFORE = - MaterializedResult.resultBuilder(SESSION, MISMATCH_SCHEMA_TABLE_BEFORE.stream().map(ColumnMetadata::getType).collect(toImmutableList())) - .rows(MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_BEFORE.getMaterializedRows() - .stream() - .map(materializedRow -> { - List result = materializedRow.getFields(); - List rowResult = materializedRow.getFields(); - result.add(rowResult); - result.add(Arrays.asList(rowResult, null, rowResult)); - result.add(ImmutableMap.of(rowResult.get(1), rowResult)); - result.add(rowResult.get(9)); - return new MaterializedRow(materializedRow.getPrecision(), result); - }).collect(toImmutableList())) - .build(); - - private static final List MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER = ImmutableList.builder() - .add(new ColumnMetadata("tinyint_to_smallint", SMALLINT)) - .add(new ColumnMetadata("tinyint_to_integer", INTEGER)) - .add(new ColumnMetadata("tinyint_to_bigint", BIGINT)) - .add(new ColumnMetadata("smallint_to_integer", INTEGER)) - .add(new ColumnMetadata("smallint_to_bigint", BIGINT)) - .add(new ColumnMetadata("integer_to_bigint", BIGINT)) - .add(new ColumnMetadata("integer_to_varchar", createUnboundedVarcharType())) - .add(new ColumnMetadata("varchar_to_integer", INTEGER)) - .add(new ColumnMetadata("float_to_double", DOUBLE)) - .add(new ColumnMetadata("varchar_to_drop_in_row", createUnboundedVarcharType())) - .build(); - - private static final Type MISMATCH_SCHEMA_ROW_TYPE_APPEND = toRowType(ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER) - .add(new ColumnMetadata(format("%s_append", MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(0).getName()), MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(0).getType())) - .build()); - private static final Type MISMATCH_SCHEMA_ROW_TYPE_DROP = toRowType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.subList(0, MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.size() - 1)); - - private static final List MISMATCH_SCHEMA_TABLE_AFTER = ImmutableList.builder() - .addAll(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER) - .add(new ColumnMetadata("struct_to_struct", MISMATCH_SCHEMA_ROW_TYPE_APPEND)) - .add(new ColumnMetadata("list_to_list", arrayType(MISMATCH_SCHEMA_ROW_TYPE_APPEND))) - .add(new ColumnMetadata("map_to_map", mapType(MISMATCH_SCHEMA_PRIMITIVE_COLUMN_AFTER.get(1).getType(), MISMATCH_SCHEMA_ROW_TYPE_DROP))) - .add(new ColumnMetadata("ds", createUnboundedVarcharType())) - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_AFTER = - MaterializedResult.resultBuilder(SESSION, SMALLINT, INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, createUnboundedVarcharType(), INTEGER, DOUBLE, createUnboundedVarcharType()) - .row((short) -11, 12, -13L, 14, 15L, -16L, "17", 2147483647, 18.0, "2016-08-01") - .row((short) 21, -22, 23L, -24, 25L, 26L, "-27", null, -28.0, "2016-08-02") - .row((short) -31, -32, 33L, 34, -35L, 36L, "37", -923, 39.5, "2016-08-03") - .row(null, 42, 43L, 44, -45L, 46L, "47", null, 49.5, "2016-08-03") - .build(); - - private static final MaterializedResult MISMATCH_SCHEMA_TABLE_DATA_AFTER = - MaterializedResult.resultBuilder(SESSION, MISMATCH_SCHEMA_TABLE_AFTER.stream().map(ColumnMetadata::getType).collect(toImmutableList())) - .rows(MISMATCH_SCHEMA_PRIMITIVE_FIELDS_DATA_AFTER.getMaterializedRows() - .stream() - .map(materializedRow -> { - List result = materializedRow.getFields(); - List appendFieldRowResult = materializedRow.getFields(); - appendFieldRowResult.add(null); - List dropFieldRowResult = materializedRow.getFields().subList(0, materializedRow.getFields().size() - 1); - result.add(appendFieldRowResult); - result.add(Arrays.asList(appendFieldRowResult, null, appendFieldRowResult)); - result.add(ImmutableMap.of(result.get(1), dropFieldRowResult)); - result.add(result.get(9)); - return new MaterializedRow(materializedRow.getPrecision(), result); - }).collect(toImmutableList())) - .build(); - - protected Set createTableFormats = difference( - ImmutableSet.copyOf(HiveStorageFormat.values()), - // exclude formats that change table schema with serde and read-only formats - ImmutableSet.of(AVRO, CSV, REGEX)); - - private static final JoinCompiler JOIN_COMPILER = new JoinCompiler(new TypeOperators()); - - protected static final List STATISTICS_TABLE_COLUMNS = ImmutableList.builder() - .add(new ColumnMetadata("t_boolean", BOOLEAN)) - .add(new ColumnMetadata("t_bigint", BIGINT)) - .add(new ColumnMetadata("t_integer", INTEGER)) - .add(new ColumnMetadata("t_smallint", SMALLINT)) - .add(new ColumnMetadata("t_tinyint", TINYINT)) - .add(new ColumnMetadata("t_double", DOUBLE)) - .add(new ColumnMetadata("t_float", REAL)) - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata("t_varchar", createVarcharType(100))) - .add(new ColumnMetadata("t_char", createCharType(5))) - .add(new ColumnMetadata("t_varbinary", VARBINARY)) - .add(new ColumnMetadata("t_date", DATE)) - .add(new ColumnMetadata("t_timestamp", TIMESTAMP_MILLIS)) - .add(new ColumnMetadata("t_short_decimal", createDecimalType(5, 2))) - .add(new ColumnMetadata("t_long_decimal", createDecimalType(20, 3))) - .build(); - - protected static final List STATISTICS_PARTITIONED_TABLE_COLUMNS = ImmutableList.builder() - .addAll(STATISTICS_TABLE_COLUMNS) - .add(new ColumnMetadata("ds", VARCHAR)) - .build(); - - protected static final PartitionStatistics ZERO_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); - protected static final PartitionStatistics EMPTY_ROWCOUNT_STATISTICS = ZERO_TABLE_STATISTICS.withBasicStatistics(ZERO_TABLE_STATISTICS.getBasicStatistics().withEmptyRowCount()); - protected static final PartitionStatistics BASIC_STATISTICS_1 = new PartitionStatistics(new HiveBasicStatistics(0, 20, 3, 0), ImmutableMap.of()); - protected static final PartitionStatistics BASIC_STATISTICS_2 = new PartitionStatistics(new HiveBasicStatistics(0, 30, 2, 0), ImmutableMap.of()); - - protected static final PartitionStatistics STATISTICS_1 = - new PartitionStatistics( - BASIC_STATISTICS_1.getBasicStatistics(), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(5), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.of(1234L), OptionalLong.of(5678L), OptionalLong.of(2), OptionalLong.of(5))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.of(123L), OptionalLong.of(567L), OptionalLong.of(3), OptionalLong.of(4))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.of(12L), OptionalLong.of(56L), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.of(1L), OptionalLong.of(2L), OptionalLong.of(1), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.of(1234.25), OptionalDouble.of(5678.58), OptionalLong.of(7), OptionalLong.of(8))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.of(123.25), OptionalDouble.of(567.58), OptionalLong.of(9), OptionalLong.of(10))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(10), OptionalLong.of(50), OptionalLong.of(3), OptionalLong.of(7))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(100), OptionalLong.of(230), OptionalLong.of(5), OptionalLong.of(3))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(5), OptionalLong.of(50), OptionalLong.of(1), OptionalLong.of(4))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(4), OptionalLong.of(50), OptionalLong.of(1))) - .put("t_date", createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)), OptionalLong.of(7), OptionalLong.of(6))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.of(1234567L), OptionalLong.of(71234567L), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal(10)), Optional.of(new BigDecimal(12)), OptionalLong.of(3), OptionalLong.of(5))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal("12345678901234567.123")), Optional.of(new BigDecimal("81234567890123456.123")), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected static final PartitionStatistics STATISTICS_1_1 = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(15), OptionalLong.empty(), OptionalLong.of(0)), - STATISTICS_1.getColumnStatistics().entrySet() - .stream() - .filter(entry -> entry.getKey().hashCode() % 2 == 0) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - - protected static final PartitionStatistics STATISTICS_1_2 = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(15), OptionalLong.of(3), OptionalLong.of(0)), - STATISTICS_1.getColumnStatistics().entrySet() - .stream() - .filter(entry -> entry.getKey().hashCode() % 2 == 1) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - - protected static final PartitionStatistics STATISTICS_2 = - new PartitionStatistics( - BASIC_STATISTICS_2.getBasicStatistics(), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.of(2345L), OptionalLong.of(6789L), OptionalLong.of(4), OptionalLong.of(7))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.of(234L), OptionalLong.of(678L), OptionalLong.of(5), OptionalLong.of(6))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.of(23L), OptionalLong.of(65L), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.of(3L), OptionalLong.of(12L), OptionalLong.of(2), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.of(2345.25), OptionalDouble.of(6785.58), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.of(235.25), OptionalDouble.of(676.58), OptionalLong.of(7), OptionalLong.of(11))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(301), OptionalLong.of(600), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(99), OptionalLong.of(223), OptionalLong.of(7), OptionalLong.of(1))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(6), OptionalLong.of(60), OptionalLong.of(0), OptionalLong.of(3))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(2), OptionalLong.of(10), OptionalLong.of(2))) - .put("t_date", createDateColumnStatistics(Optional.of(LocalDate.ofEpochDay(2)), Optional.of(LocalDate.ofEpochDay(3)), OptionalLong.of(8), OptionalLong.of(7))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.of(2345671L), OptionalLong.of(12345677L), OptionalLong.of(9), OptionalLong.of(1))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal(11)), Optional.of(new BigDecimal(14)), OptionalLong.of(5), OptionalLong.of(7))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal("71234567890123456.123")), Optional.of(new BigDecimal("78123456789012345.123")), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected static final PartitionStatistics STATISTICS_EMPTY_OPTIONAL_FIELDS = - new PartitionStatistics( - new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(20), OptionalLong.empty(), OptionalLong.of(0)), - ImmutableMap.builder() - .put("t_boolean", createBooleanColumnStatistics(OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2))) - .put("t_bigint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(4), OptionalLong.of(7))) - .put("t_integer", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(6))) - .put("t_smallint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(5))) - .put("t_tinyint", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(3))) - .put("t_double", createDoubleColumnStatistics(OptionalDouble.empty(), OptionalDouble.empty(), OptionalLong.of(6), OptionalLong.of(3))) - .put("t_float", createDoubleColumnStatistics(OptionalDouble.empty(), OptionalDouble.empty(), OptionalLong.of(7), OptionalLong.of(11))) - .put("t_string", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(2), OptionalLong.of(6))) - .put("t_varchar", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(7), OptionalLong.of(1))) - .put("t_char", createStringColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(3))) - .put("t_varbinary", createBinaryColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.of(2))) - // https://issues.apache.org/jira/browse/HIVE-20098 - // .put("t_date", createDateColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(8), OptionalLong.of(7))) - .put("t_timestamp", createIntegerColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(9), OptionalLong.of(1))) - .put("t_short_decimal", createDecimalColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(5), OptionalLong.of(7))) - .put("t_long_decimal", createDecimalColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(2), OptionalLong.of(1))) - .buildOrThrow()); - - protected String database; - protected SchemaTableName tablePartitionFormat; - protected SchemaTableName tableUnpartitioned; - protected SchemaTableName tablePartitionedWithNull; - protected SchemaTableName tableOffline; - protected SchemaTableName tableNotReadable; - protected SchemaTableName view; - protected SchemaTableName invalidTable; - protected SchemaTableName tableBucketedStringInt; - protected SchemaTableName tableBucketedBigintBoolean; - protected SchemaTableName tableBucketedDoubleFloat; - protected SchemaTableName tablePartitionSchemaChange; - protected SchemaTableName tablePartitionSchemaChangeNonCanonical; - - protected ConnectorTableHandle invalidTableHandle; - - protected ColumnHandle dsColumn; - protected ColumnHandle fileFormatColumn; - protected ColumnHandle dummyColumn; - protected ColumnHandle intColumn; - protected ColumnHandle pStringColumn; - protected ColumnHandle pIntegerColumn; - - protected ConnectorTableProperties tablePartitionFormatProperties; - protected List tablePartitionFormatPartitions; - protected List tableUnpartitionedPartitions; - - protected HdfsEnvironment hdfsEnvironment; - protected LocationService locationService; - - protected CountingDirectoryLister countingDirectoryLister; - protected HiveMetadataFactory metadataFactory; - protected HiveTransactionManager transactionManager; - protected HiveMetastore metastoreClient; - protected ConnectorSplitManager splitManager; - protected ConnectorPageSourceProvider pageSourceProvider; - protected ConnectorPageSinkProvider pageSinkProvider; - protected ConnectorNodePartitioningProvider nodePartitioningProvider; - protected ExecutorService executor; - - private ScheduledExecutorService heartbeatService; - private java.nio.file.Path temporaryStagingDirectory; - - protected final Set materializedViews = Sets.newConcurrentHashSet(); - - @BeforeAll - public void setupClass() - throws Exception - { - executor = newCachedThreadPool(daemonThreadsNamed("hive-%s")); - heartbeatService = newScheduledThreadPool(1); - // Use separate staging directory for each test class to prevent intermittent failures coming from test parallelism - temporaryStagingDirectory = createTempDirectory("trino-staging-"); - } - - @AfterAll - public void tearDown() - { - if (executor != null) { - executor.shutdownNow(); - executor = null; - } - if (heartbeatService != null) { - heartbeatService.shutdownNow(); - heartbeatService = null; - } - if (temporaryStagingDirectory != null) { - try { - deleteRecursively(temporaryStagingDirectory, ALLOW_INSECURE); - } - catch (Exception e) { - log.warn(e, "Error deleting %s", temporaryStagingDirectory); - } - } - } - - protected void setupHive(String databaseName) - { - database = databaseName; - tablePartitionFormat = new SchemaTableName(database, "trino_test_partition_format"); - tableUnpartitioned = new SchemaTableName(database, "trino_test_unpartitioned"); - tablePartitionedWithNull = new SchemaTableName(database, "trino_test_partitioned_with_null"); - tableOffline = new SchemaTableName(database, "trino_test_offline"); - tableNotReadable = new SchemaTableName(database, "trino_test_not_readable"); - view = new SchemaTableName(database, "trino_test_view"); - invalidTable = new SchemaTableName(database, INVALID_TABLE); - tableBucketedStringInt = new SchemaTableName(database, "trino_test_bucketed_by_string_int"); - tableBucketedBigintBoolean = new SchemaTableName(database, "trino_test_bucketed_by_bigint_boolean"); - tableBucketedDoubleFloat = new SchemaTableName(database, "trino_test_bucketed_by_double_float"); - tablePartitionSchemaChange = new SchemaTableName(database, "trino_test_partition_schema_change"); - tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "trino_test_partition_schema_change_non_canonical"); - - invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty()); - - dsColumn = createBaseColumn("ds", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - fileFormatColumn = createBaseColumn("file_format", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - dummyColumn = createBaseColumn("dummy", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - intColumn = createBaseColumn("t_int", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - pStringColumn = createBaseColumn("p_string", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty()); - pIntegerColumn = createBaseColumn("p_integer", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty()); - - List partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn); - tablePartitionFormatPartitions = ImmutableList.builder() - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=textfile/dummy=1", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))) - .put(dummyColumn, NullableValue.of(INTEGER, 1L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=sequencefile/dummy=2", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))) - .put(dummyColumn, NullableValue.of(INTEGER, 2L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=rctext/dummy=3", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))) - .put(dummyColumn, NullableValue.of(INTEGER, 3L)) - .buildOrThrow())) - .add(new HivePartition(tablePartitionFormat, - "ds=2012-12-29/file_format=rcbinary/dummy=4", - ImmutableMap.builder() - .put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))) - .put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))) - .put(dummyColumn, NullableValue.of(INTEGER, 4L)) - .buildOrThrow())) - .build(); - tableUnpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned)); - tablePartitionFormatProperties = new ConnectorTableProperties( - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), - Optional.empty(), - Optional.of(new DiscretePredicates(partitionColumns, ImmutableList.of( - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), - TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), - fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), - dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), - ImmutableList.of()); - } - - protected final void setup(HostAndPort metastoreAddress, String databaseName) - { - HiveConfig hiveConfig = getHiveConfig() - .setParquetTimeZone("UTC") - .setRcfileTimeZone("UTC"); - - hdfsEnvironment = HDFS_ENVIRONMENT; - - CachingHiveMetastoreConfig cachingHiveMetastoreConfig = new CachingHiveMetastoreConfig(); - HiveMetastore metastore = createCachingHiveMetastore( - new BridgingHiveMetastore(testingThriftHiveMetastoreBuilder() - .metastoreClient(metastoreAddress) - .hiveConfig(hiveConfig) - .thriftMetastoreConfig(new ThriftMetastoreConfig() - .setAssumeCanonicalPartitionKeys(true)) - .fileSystemFactory(new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS)) - .build()), - new Duration(1, MINUTES), - new Duration(1, MINUTES), - Optional.of(new Duration(15, SECONDS)), - executor, - 10000, - CachingHiveMetastore.StatsRecording.ENABLED, - cachingHiveMetastoreConfig.isCacheMissing(), - cachingHiveMetastoreConfig.isPartitionCacheEnabled()); - - setup(databaseName, hiveConfig, metastore, hdfsEnvironment); - } - - protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) - { - setupHive(databaseName); - - metastoreClient = hiveMetastore; - hdfsEnvironment = hdfsConfiguration; - HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig); - HdfsFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS); - locationService = new HiveLocationService(fileSystemFactory, hiveConfig); - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - countingDirectoryLister = new CountingDirectoryLister(); - metadataFactory = new HiveMetadataFactory( - new CatalogName("hive"), - HiveMetastoreFactory.ofInstance(metastoreClient), - getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), - fileSystemFactory, - partitionManager, - 10, - 10, - 10, - 100_000, - false, - false, - false, - true, - true, - false, - false, - 1000, - Optional.empty(), - true, - TESTING_TYPE_MANAGER, - NOOP_METADATA_PROVIDER, - locationService, - partitionUpdateCodec, - executor, - heartbeatService, - TEST_SERVER_VERSION, - (session, tableHandle) -> { - if (!tableHandle.getTableName().contains("apply_redirection_tester")) { - return Optional.empty(); - } - return Optional.of(new TableScanRedirectApplicationResult( - new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), - ImmutableMap.of(), - TupleDomain.all())); - }, - ImmutableSet.of( - new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), - new PropertiesSystemTableProvider()), - metastore -> new NoneHiveMaterializedViewMetadata() - { - @Override - public List listMaterializedViews(ConnectorSession session, Optional schemaName) - { - return materializedViews.stream() - .filter(schemaName - .>map(name -> mvName -> mvName.getSchemaName().equals(name)) - .orElse(mvName -> true)) - .collect(toImmutableList()); - } - - @Override - public Optional getMaterializedView(ConnectorSession session, SchemaTableName viewName) - { - if (!viewName.getTableName().contains("materialized_view_tester")) { - return Optional.empty(); - } - return Optional.of(new ConnectorMaterializedViewDefinition( - "dummy_view_sql", - Optional.empty(), - Optional.empty(), - Optional.empty(), - ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"), Optional.empty())), - Optional.of(java.time.Duration.ZERO), - Optional.empty(), - Optional.of("alice"), - ImmutableList.of(), - ImmutableMap.of())); - } - }, - SqlStandardAccessControlMetadata::new, - countingDirectoryLister, - new TransactionScopeCachingDirectoryListerFactory(hiveConfig), - false, - true, - HiveTimestampPrecision.DEFAULT_PRECISION); - transactionManager = new HiveTransactionManager(metadataFactory); - splitManager = new HiveSplitManager( - transactionManager, - partitionManager, - fileSystemFactory, - executor, - new CounterStat(), - 100, - hiveConfig.getMaxOutstandingSplitsSize(), - hiveConfig.getMinPartitionBatchSize(), - hiveConfig.getMaxPartitionBatchSize(), - hiveConfig.getMaxInitialSplits(), - hiveConfig.getSplitLoaderConcurrency(), - hiveConfig.getMaxSplitsPerSecond(), - false, - TESTING_TYPE_MANAGER, - hiveConfig.getMaxPartitionsPerScan()); - pageSinkProvider = new HivePageSinkProvider( - getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), - fileSystemFactory, - PAGE_SORTER, - HiveMetastoreFactory.ofInstance(metastoreClient), - new GroupByHashPageIndexerFactory(JOIN_COMPILER), - TESTING_TYPE_MANAGER, - getHiveConfig(), - getSortingFileWriterConfig(), - locationService, - partitionUpdateCodec, - new TestingNodeManager("fake-environment"), - new HiveEventClient(), - getHiveSessionProperties(hiveConfig), - new HiveWriterStats()); - pageSourceProvider = new HivePageSourceProvider( - TESTING_TYPE_MANAGER, - hiveConfig, - getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig)); - nodePartitioningProvider = new HiveNodePartitioningProvider( - new TestingNodeManager("fake-environment"), - TESTING_TYPE_MANAGER); - } - - /** - * Allow subclass to change default configuration. - */ - protected HiveConfig getHiveConfig() - { - return new HiveConfig() - .setTemporaryStagingDirectoryPath(temporaryStagingDirectory.resolve("temp_path_").toAbsolutePath().toString()); - } - - protected SortingFileWriterConfig getSortingFileWriterConfig() - { - return new SortingFileWriterConfig() - .setMaxOpenSortFiles(10) - .setWriterSortBufferSize(DataSize.of(100, KILOBYTE)); - } - - protected ConnectorSession newSession() - { - return newSession(ImmutableMap.of()); - } - - protected ConnectorSession newSession(Map propertyValues) - { - return TestingConnectorSession.builder() - .setPropertyMetadata(getHiveSessionProperties(getHiveConfig()).getSessionProperties()) - .setPropertyValues(propertyValues) - .build(); - } - - protected Transaction newTransaction() - { - return new HiveTransaction(transactionManager); - } - - protected interface Transaction - extends AutoCloseable - { - ConnectorMetadata getMetadata(); - - SemiTransactionalHiveMetastore getMetastore(); - - ConnectorTransactionHandle getTransactionHandle(); - - void commit(); - - void rollback(); - - @Override - void close(); - } - - static class HiveTransaction - implements Transaction - { - private final HiveTransactionManager transactionManager; - private final ConnectorTransactionHandle transactionHandle; - private boolean closed; - - public HiveTransaction(HiveTransactionManager transactionManager) - { - this.transactionManager = requireNonNull(transactionManager, "transactionManager is null"); - this.transactionHandle = new HiveTransactionHandle(false); - transactionManager.begin(transactionHandle); - getMetastore().testOnlyThrowOnCleanupFailures(); - } - - @Override - public ConnectorMetadata getMetadata() - { - return transactionManager.get(transactionHandle, SESSION.getIdentity()); - } - - @Override - public SemiTransactionalHiveMetastore getMetastore() - { - return transactionManager.get(transactionHandle, SESSION.getIdentity()).getMetastore(); - } - - @Override - public ConnectorTransactionHandle getTransactionHandle() - { - return transactionHandle; - } - - @Override - public void commit() - { - checkState(!closed); - closed = true; - transactionManager.commit(transactionHandle); - } - - @Override - public void rollback() - { - checkState(!closed); - closed = true; - transactionManager.rollback(transactionHandle); - } - - @Override - public void close() - { - if (!closed) { - try { - getMetastore().testOnlyCheckIsReadOnly(); // transactions in this test with writes in it must explicitly commit or rollback - } - finally { - rollback(); - } - } - } - } - - @Test - public void testGetDatabaseNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List databases = metadata.listSchemaNames(newSession()); - assertThat(databases).contains(database); - } - } - - @Test - public void testGetTableNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List tables = metadata.listTables(newSession(), Optional.of(database)); - assertThat(tables).contains(tablePartitionFormat); - assertThat(tables).contains(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableNames() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - List tables = metadata.listTables(newSession(), Optional.empty()); - assertThat(tables).contains(tablePartitionFormat); - assertThat(tables).contains(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableColumns() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix()); - assertThat(allColumns).containsKey(tablePartitionFormat); - assertThat(allColumns).containsKey(tableUnpartitioned); - } - } - - @Test - public void testGetAllTableColumnsInSchema() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix(database)); - assertThat(allColumns).containsKey(tablePartitionFormat); - assertThat(allColumns).containsKey(tableUnpartitioned); - } - } - - @Test - public void testListUnknownSchema() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - assertThat(metadata.getTableHandle(session, new SchemaTableName(INVALID_DATABASE, INVALID_TABLE))).isNull(); - assertThat(metadata.listTables(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableList.of()); - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(INVALID_DATABASE, INVALID_TABLE))).isEqualTo(ImmutableMap.of()); - assertThat(metadata.listViews(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableList.of()); - assertThat(metadata.getViews(session, Optional.of(INVALID_DATABASE))).isEqualTo(ImmutableMap.of()); - assertThat(metadata.getView(session, new SchemaTableName(INVALID_DATABASE, INVALID_TABLE))).isEqualTo(Optional.empty()); - } - } - - @Test - public void testGetPartitions() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - tableHandle = applyFilter(metadata, tableHandle, Constraint.alwaysTrue()); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, tablePartitionFormatProperties); - assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions); - } - } - - @Test - public void testGetPartitionsWithBindings() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - Constraint constraint = new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(intColumn, Domain.singleValue(BIGINT, 5L)))); - tableHandle = applyFilter(metadata, tableHandle, constraint); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, tablePartitionFormatProperties); - assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions); - } - } - - @Test - public void testGetPartitionsWithFilter() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionedWithNull); - - Domain varcharSomeValue = Domain.singleValue(VARCHAR, utf8Slice("abc")); - Domain varcharOnlyNull = Domain.onlyNull(VARCHAR); - Domain varcharNotNull = Domain.notNull(VARCHAR); - - Domain integerSomeValue = Domain.singleValue(INTEGER, 123L); - Domain integerOnlyNull = Domain.onlyNull(INTEGER); - Domain integerNotNull = Domain.notNull(INTEGER); - - // all - assertThat(getPartitionNamesByFilter(metadata, tableHandle, new Constraint(TupleDomain.all()))) - .containsOnly( - "p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", - "p_string=abc/p_integer=123", - "p_string=def/p_integer=456"); - - // is some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue)) - .containsOnly("p_string=abc/p_integer=123"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue)) - .containsOnly("p_string=abc/p_integer=123"); - - // IS NULL - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharOnlyNull)) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerOnlyNull)) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__"); - - // IS NOT NULL - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharNotNull)) - .containsOnly("p_string=abc/p_integer=123", "p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerNotNull)) - .containsOnly("p_string=abc/p_integer=123", "p_string=def/p_integer=456"); - - // IS NULL OR is some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharOnlyNull.union(varcharSomeValue))) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=abc/p_integer=123"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerOnlyNull.union(integerSomeValue))) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=abc/p_integer=123"); - - // IS NOT NULL AND is NOT some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue.complement().intersect(varcharNotNull))) - .containsOnly("p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue.complement().intersect(integerNotNull))) - .containsOnly("p_string=def/p_integer=456"); - - // IS NULL OR is NOT some value - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pStringColumn, varcharSomeValue.complement())) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=def/p_integer=456"); - assertThat(getPartitionNamesByFilter(metadata, tableHandle, pIntegerColumn, integerSomeValue.complement())) - .containsOnly("p_string=__HIVE_DEFAULT_PARTITION__/p_integer=__HIVE_DEFAULT_PARTITION__", "p_string=def/p_integer=456"); - } - } - - private Set getPartitionNamesByFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, ColumnHandle columnHandle, Domain domain) - { - return getPartitionNamesByFilter(metadata, tableHandle, new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(columnHandle, domain)))); - } - - private Set getPartitionNamesByFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, Constraint constraint) - { - return applyFilter(metadata, tableHandle, constraint) - .getPartitions().orElseThrow(() -> new IllegalStateException("No partitions")) - .stream() - .map(HivePartition::getPartitionId) - .collect(toImmutableSet()); - } - - @Test - public void testMismatchSchemaTable() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - // TODO: fix coercion for JSON - if (storageFormat == JSON) { - continue; - } - SchemaTableName temporaryMismatchSchemaTable = temporaryTable("mismatch_schema"); - try { - doTestMismatchSchemaTable( - temporaryMismatchSchemaTable, - storageFormat, - MISMATCH_SCHEMA_TABLE_BEFORE, - MISMATCH_SCHEMA_TABLE_DATA_BEFORE, - MISMATCH_SCHEMA_TABLE_AFTER, - MISMATCH_SCHEMA_TABLE_DATA_AFTER); - } - finally { - dropTable(temporaryMismatchSchemaTable); - } - } - } - - protected void doTestMismatchSchemaTable( - SchemaTableName schemaTableName, - HiveStorageFormat storageFormat, - List tableBefore, - MaterializedResult dataBefore, - List tableAfter, - MaterializedResult dataAfter) - throws Exception - { - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - doCreateEmptyTable(schemaTableName, storageFormat, tableBefore); - - // insert the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(dataBefore.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - // load the table and verify the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows()); - transaction.commit(); - } - - // alter the table schema - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session); - Table oldTable = transaction.getMetastore().getTable(schemaName, tableName).get(); - List dataColumns = tableAfter.stream() - .filter(columnMetadata -> !columnMetadata.getName().equals("ds")) - .map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(columnMetadata.getType()), Optional.empty(), Map.of())) - .collect(toList()); - Table.Builder newTable = Table.builder(oldTable) - .setDataColumns(dataColumns); - - transaction.getMetastore().replaceTable(schemaName, tableName, newTable.build(), principalPrivileges); - - transaction.commit(); - } - - // load the altered table and verify the data - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows()); - - transaction.commit(); - } - - // insertions to the partitions with type mismatches should fail - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName); - - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(dataAfter.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - transaction.commit(); - - fail("expected exception"); - } - catch (TrinoException e) { - // expected - assertThat(e.getErrorCode()).isEqualTo(HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode()); - } - } - - protected void assertExpectedTableProperties(ConnectorTableProperties actualProperties, ConnectorTableProperties expectedProperties) - { - assertThat(actualProperties.getPredicate()).isEqualTo(expectedProperties.getPredicate()); - assertThat(actualProperties.getDiscretePredicates().isPresent()).isEqualTo(expectedProperties.getDiscretePredicates().isPresent()); - actualProperties.getDiscretePredicates().ifPresent(actual -> { - DiscretePredicates expected = expectedProperties.getDiscretePredicates().get(); - assertThat(actual.getColumns()).isEqualTo(expected.getColumns()); - assertEqualsIgnoreOrder(actual.getPredicates(), expected.getPredicates()); - }); - assertThat(actualProperties.getLocalProperties()).isEqualTo(expectedProperties.getLocalProperties()); - } - - protected void assertExpectedPartitions(ConnectorTableHandle table, Iterable expectedPartitions) - { - Iterable actualPartitions = ((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new); - Map actualById = uniqueIndex(actualPartitions, HivePartition::getPartitionId); - Map expectedById = uniqueIndex(expectedPartitions, HivePartition::getPartitionId); - - assertThat(actualById).isEqualTo(expectedById); - - // HivePartition.equals doesn't compare all the fields, so let's check them - for (Map.Entry expected : expectedById.entrySet()) { - HivePartition actualPartition = actualById.get(expected.getKey()); - HivePartition expectedPartition = expected.getValue(); - assertThat(actualPartition.getPartitionId()).isEqualTo(expectedPartition.getPartitionId()); - assertThat(actualPartition.getKeys()).isEqualTo(expectedPartition.getKeys()); - assertThat(actualPartition.getTableName()).isEqualTo(expectedPartition.getTableName()); - } - } - - @Test - public void testGetPartitionNamesUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - tableHandle = applyFilter(metadata, tableHandle, Constraint.alwaysTrue()); - ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle); - assertExpectedTableProperties(properties, new ConnectorTableProperties()); - assertExpectedPartitions(tableHandle, tableUnpartitionedPartitions); - } - } - - @Test - public void testGetTableSchemaPartitionFormat() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), getTableHandle(metadata, tablePartitionFormat)); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - assertPrimitiveField(map, "t_tinyint", TINYINT, false); - assertPrimitiveField(map, "t_smallint", SMALLINT, false); - assertPrimitiveField(map, "t_int", INTEGER, false); - assertPrimitiveField(map, "t_bigint", BIGINT, false); - assertPrimitiveField(map, "t_float", REAL, false); - assertPrimitiveField(map, "t_double", DOUBLE, false); - assertPrimitiveField(map, "t_boolean", BOOLEAN, false); - assertPrimitiveField(map, "ds", createUnboundedVarcharType(), true); - assertPrimitiveField(map, "file_format", createUnboundedVarcharType(), true); - assertPrimitiveField(map, "dummy", INTEGER, true); - } - } - - @Test - public void testGetTableSchemaUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), tableHandle); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - assertPrimitiveField(map, "t_tinyint", TINYINT, false); - } - } - - @Test - public void testGetTableSchemaOffline() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> columns = listTableColumns(metadata, newSession(), tableOffline.toSchemaTablePrefix()); - assertThat(columns.size()).isEqualTo(1); - Map map = uniqueIndex(getOnlyElement(columns.values()), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - } - } - - @Test - public void testGetTableSchemaNotReadablePartition() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableNotReadable); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(newSession(), tableHandle); - Map map = uniqueIndex(tableMetadata.getColumns(), ColumnMetadata::getName); - - assertPrimitiveField(map, "t_string", createUnboundedVarcharType(), false); - } - } - - @Test - public void testGetTableSchemaException() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getTableHandle(newSession(), invalidTable)).isNull(); - } - } - - @Test - public void testGetTableStatsBucketedStringInt() - { - assertTableStatsComputed( - tableBucketedStringInt, - ImmutableSet.of( - "t_bigint", - "t_boolean", - "t_double", - "t_float", - "t_int", - "t_smallint", - "t_string", - "t_tinyint", - "ds")); - } - - @Test - public void testGetTableStatsUnpartitioned() - { - assertTableStatsComputed( - tableUnpartitioned, - ImmutableSet.of("t_string", "t_tinyint")); - } - - private void assertTableStatsComputed( - SchemaTableName tableName, - Set expectedColumnStatsColumns) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // first check if table handle with only one projected column will return this column stats - String firstColumnName = expectedColumnStatsColumns.iterator().next(); - verifyTableStatisticsWithColumns(metadata, session, applyProjection(metadata, session, tableHandle, firstColumnName), ImmutableSet.of(firstColumnName)); - - verifyTableStatisticsWithColumns(metadata, session, tableHandle, expectedColumnStatsColumns); - } - } - - private static ConnectorTableHandle applyProjection(ConnectorMetadata metadata, ConnectorSession session, ConnectorTableHandle tableHandle, String columnName) - { - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - HiveColumnHandle firstColumn = (HiveColumnHandle) columnHandles.get(columnName); - return metadata.applyProjection( - session, - tableHandle, - ImmutableList.of(new Variable("c1", firstColumn.getBaseType())), - ImmutableMap.of("c1", firstColumn)) - .orElseThrow() - .getHandle(); - } - - private static void verifyTableStatisticsWithColumns( - ConnectorMetadata metadata, - ConnectorSession session, - ConnectorTableHandle tableHandle, - Set expectedColumnStatsColumns) - { - TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle); - - assertThat(tableStatistics.getRowCount().isUnknown()) - .describedAs("row count is unknown") - .isFalse(); - - Map columnsStatistics = tableStatistics - .getColumnStatistics() - .entrySet() - .stream() - .collect( - toImmutableMap( - entry -> ((HiveColumnHandle) entry.getKey()).getName(), - Map.Entry::getValue)); - - assertThat(columnsStatistics.keySet()) - .describedAs("columns with statistics") - .isEqualTo(expectedColumnStatsColumns); - - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - columnsStatistics.forEach((columnName, columnStatistics) -> { - ColumnHandle columnHandle = columnHandles.get(columnName); - Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType(); - - assertThat(columnStatistics.getNullsFraction().isUnknown()) - .describedAs("unknown nulls fraction for " + columnName) - .isFalse(); - - assertThat(columnStatistics.getDistinctValuesCount().isUnknown()) - .describedAs("unknown distinct values count for " + columnName) - .isFalse(); - - if (columnType instanceof VarcharType) { - assertThat(columnStatistics.getDataSize().isUnknown()) - .describedAs("unknown data size for " + columnName) - .isFalse(); - } - else { - assertThat(columnStatistics.getDataSize().isUnknown()) - .describedAs("unknown data size for" + columnName) - .isTrue(); - } - }); - } - - @Test - public void testGetPartitionSplitsBatch() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - - assertThat(getSplitCount(splitSource)).isEqualTo(tablePartitionFormatPartitions.size()); - } - } - - @Test - public void testGetPartitionSplitsBatchUnpartitioned() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle); - - assertThat(getSplitCount(splitSource)).isEqualTo(1); - } - } - - @Test - public void testPerTransactionDirectoryListerCache() - throws Exception - { - long initListCount = countingDirectoryLister.getListCount(); - SchemaTableName tableName = temporaryTable("per_transaction_listing_cache_test"); - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - createEmptyTable(tableName, ORC, columns, ImmutableList.of()); - try { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - - // directory should be listed initially - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 1); - - // directory content should be cached - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 1); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - - // directory should be listed again in new transaction - assertThat(countingDirectoryLister.getListCount()).isEqualTo(initListCount + 2); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testGetPartitionSplitsBatchInvalidTable() - { - assertThatThrownBy(() -> { - try (Transaction transaction = newTransaction()) { - getSplits(splitManager, transaction, newSession(), invalidTableHandle); - } - }).isInstanceOf(TableNotFoundException.class); - } - - @Test - public void testGetPartitionTableOffline() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - try { - getTableHandle(metadata, tableOffline); - fail("expected TableOfflineException"); - } - catch (TableOfflineException e) { - assertThat(e.getTableName()).isEqualTo(tableOffline); - } - } - } - - @Test - public void testGetPartitionSplitsTableNotReadablePartition() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableNotReadable); - assertThat(tableHandle).isNotNull(); - - try { - getSplitCount(getSplits(splitManager, transaction, session, tableHandle)); - fail("Expected HiveNotReadableException"); - } - catch (HiveNotReadableException e) { - assertThat(e).hasMessageMatching("Table '.*\\.trino_test_not_readable' is not readable: reason for not readable"); - assertThat(e.getTableName()).isEqualTo(tableNotReadable); - assertThat(e.getPartition()).isEqualTo(Optional.empty()); - } - } - } - - @Test - public void testBucketedTableStringInt() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedStringInt); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - - String testString = "test"; - Integer testInt = 13; - Short testSmallint = 12; - - // Reverse the order of bindings as compared to bucketing order - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_int")), NullableValue.of(INTEGER, (long) testInt)) - .put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))) - .put(columnHandles.get(columnIndex.get("t_smallint")), NullableValue.of(SMALLINT, (long) testSmallint)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - - boolean rowFound = false; - for (MaterializedRow row : result) { - if (testString.equals(row.getField(columnIndex.get("t_string"))) && - testInt.equals(row.getField(columnIndex.get("t_int"))) && - testSmallint.equals(row.getField(columnIndex.get("t_smallint")))) { - rowFound = true; - } - } - assertThat(rowFound).isTrue(); - } - } - - @SuppressWarnings("ConstantConditions") - @Test - public void testBucketedTableBigintBoolean() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedBigintBoolean); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - ConnectorTableProperties properties = metadata.getTableProperties( - newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)), - tableHandle); - // trino_test_bucketed_by_bigint_boolean does not define sorting, therefore local properties is empty - assertThat(properties.getLocalProperties().isEmpty()).isTrue(); - assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties().isEmpty()).isTrue(); - - String testString = "test"; - Long testBigint = 89L; - Boolean testBoolean = true; - - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))) - .put(columnHandles.get(columnIndex.get("t_bigint")), NullableValue.of(BIGINT, testBigint)) - .put(columnHandles.get(columnIndex.get("t_boolean")), NullableValue.of(BOOLEAN, testBoolean)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - - boolean rowFound = false; - for (MaterializedRow row : result) { - if (testString.equals(row.getField(columnIndex.get("t_string"))) && - testBigint.equals(row.getField(columnIndex.get("t_bigint"))) && - testBoolean.equals(row.getField(columnIndex.get("t_boolean")))) { - rowFound = true; - break; - } - } - assertThat(rowFound).isTrue(); - } - } - - @Test - public void testBucketedTableDoubleFloat() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedDoubleFloat); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - assertTableIsBucketed(tableHandle, transaction, session); - - float testFloatValue = 87.1f; - double testDoubleValue = 88.2; - - ImmutableMap bindings = ImmutableMap.builder() - .put(columnHandles.get(columnIndex.get("t_float")), NullableValue.of(REAL, (long) floatToRawIntBits(testFloatValue))) - .put(columnHandles.get(columnIndex.get("t_double")), NullableValue.of(DOUBLE, testDoubleValue)) - .buildOrThrow(); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty()); - assertThat(result).anyMatch(row -> testFloatValue == (float) row.getField(columnIndex.get("t_float")) - && testDoubleValue == (double) row.getField(columnIndex.get("t_double"))); - } - } - - @Test - public void testBucketedTableEvolutionWithDifferentReadBucketCount() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryBucketEvolutionTable = temporaryTable("bucket_evolution"); - try { - doTestBucketedTableEvolutionWithDifferentReadCount(storageFormat, temporaryBucketEvolutionTable); - } - finally { - dropTable(temporaryBucketEvolutionTable); - } - } - } - - private void doTestBucketedTableEvolutionWithDifferentReadCount(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - int bucketCount = 16; - - // Produce a table with a partition with bucket count different but compatible with the table bucket count - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 4, ImmutableList.of()))); - // write a 4-bucket partition - MaterializedResult.Builder bucket8Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket8Builder.row((long) i, String.valueOf(i), "four")); - insertData(tableName, bucket8Builder.build()); - - // Alter the bucket count to 16 - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, bucketCount, ImmutableList.of()))); - - MaterializedResult result; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - assertThat(splits.size()).isEqualTo(16); - - ImmutableList.Builder allRows = ImmutableList.builder(); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult intermediateResult = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - allRows.addAll(intermediateResult.getMaterializedRows()); - } - } - result = new MaterializedResult(allRows.build(), getTypes(columnHandles)); - - assertThat(result.getRowCount()).isEqualTo(rowCount); - - Map columnIndex = indexColumns(columnHandles); - int nameColumnIndex = columnIndex.get("name"); - int bucketColumnIndex = columnIndex.get(BUCKET_COLUMN_NAME); - for (MaterializedRow row : result.getMaterializedRows()) { - String name = (String) row.getField(nameColumnIndex); - int bucket = (int) row.getField(bucketColumnIndex); - - assertThat(bucket).isEqualTo(Integer.parseInt(name) % bucketCount); - } - } - } - - @Test - public void testBucketedTableEvolution() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryBucketEvolutionTable = temporaryTable("bucket_evolution"); - try { - doTestBucketedTableEvolution(storageFormat, temporaryBucketEvolutionTable); - } - finally { - dropTable(temporaryBucketEvolutionTable); - } - } - } - - private void doTestBucketedTableEvolution(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - - // - // Produce a table with 8 buckets. - // The table has 3 partitions of 3 different bucket count (4, 8, 16). - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 4, ImmutableList.of()))); - // write a 4-bucket partition - MaterializedResult.Builder bucket4Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket4Builder.row((long) i, String.valueOf(i), "four")); - insertData(tableName, bucket4Builder.build()); - // write a 16-bucket partition - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 16, ImmutableList.of()))); - MaterializedResult.Builder bucket16Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket16Builder.row((long) i, String.valueOf(i), "sixteen")); - insertData(tableName, bucket16Builder.build()); - // write an 8-bucket partition - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 8, ImmutableList.of()))); - MaterializedResult.Builder bucket8Builder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> bucket8Builder.row((long) i, String.valueOf(i), "eight")); - insertData(tableName, bucket8Builder.build()); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.all(), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(0, 1, 2, 3, 4, 5, 6, 7), rowCount); - - // read single bucket (table/logical bucket) - result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.fromFixedValues(ImmutableMap.of(bucketColumnHandle(), NullableValue.of(INTEGER, 6L))), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(6), rowCount); - - // read single bucket, without selecting the bucketing column (i.e. id column) - columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !"id".equals(((HiveColumnHandle) columnHandle).getName())) - .collect(toImmutableList()); - result = readTable( - transaction, - tableHandle, - columnHandles, - session, - TupleDomain.fromFixedValues(ImmutableMap.of(bucketColumnHandle(), NullableValue.of(INTEGER, 6L))), - OptionalInt.empty(), - Optional.empty()); - assertBucketTableEvolutionResult(result, columnHandles, ImmutableSet.of(6), rowCount); - } - } - - private static void assertBucketTableEvolutionResult(MaterializedResult result, List columnHandles, Set bucketIds, int rowCount) - { - // Assert that only elements in the specified bucket shows up, and each element shows up 3 times. - int bucketCount = 8; - Set expectedIds = LongStream.range(0, rowCount) - .filter(x -> bucketIds.contains(toIntExact(x % bucketCount))) - .boxed() - .collect(toImmutableSet()); - - // assert that content from all three buckets are the same - Map columnIndex = indexColumns(columnHandles); - OptionalInt idColumnIndex = columnIndex.containsKey("id") ? OptionalInt.of(columnIndex.get("id")) : OptionalInt.empty(); - int nameColumnIndex = columnIndex.get("name"); - int bucketColumnIndex = columnIndex.get(BUCKET_COLUMN_NAME); - Map idCount = new HashMap<>(); - for (MaterializedRow row : result.getMaterializedRows()) { - String name = (String) row.getField(nameColumnIndex); - int bucket = (int) row.getField(bucketColumnIndex); - idCount.compute(Long.parseLong(name), (key, oldValue) -> oldValue == null ? 1 : oldValue + 1); - assertThat(bucket).isEqualTo(Integer.parseInt(name) % bucketCount); - if (idColumnIndex.isPresent()) { - long id = (long) row.getField(idColumnIndex.getAsInt()); - assertThat(Integer.parseInt(name)).isEqualTo(id); - } - } - assertThat((int) idCount.values().stream() - .distinct() - .collect(onlyElement())).isEqualTo(3); - assertThat(idCount.keySet()).isEqualTo(expectedIds); - } - - @Test - public void testBucketedSortedTableEvolution() - throws Exception - { - SchemaTableName temporaryTable = temporaryTable("test_bucket_sorting_evolution"); - try { - doTestBucketedSortedTableEvolution(temporaryTable); - } - finally { - dropTable(temporaryTable); - } - } - - private void doTestBucketedSortedTableEvolution(SchemaTableName tableName) - throws Exception - { - int rowCount = 100; - // Create table and populate it with 3 partitions with different sort orders but same bucketing - createEmptyTable( - tableName, - ORC, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())), - Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("id", ASCENDING), new SortingColumn("name", ASCENDING))))); - // write a 4-bucket partition sorted by id, name - MaterializedResult.Builder sortedByIdNameBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByIdNameBuilder.row((long) i, String.valueOf(i), "sorted_by_id_name")); - insertData(tableName, sortedByIdNameBuilder.build()); - - // write a 4-bucket partition sorted by name - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("name", ASCENDING))))); - MaterializedResult.Builder sortedByNameBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByNameBuilder.row((long) i, String.valueOf(i), "sorted_by_name")); - insertData(tableName, sortedByNameBuilder.build()); - - // write a 4-bucket partition sorted by id - alterBucketProperty(tableName, Optional.of(new HiveBucketProperty( - ImmutableList.of("id"), - BUCKETING_V1, - 4, - ImmutableList.of(new SortingColumn("id", ASCENDING))))); - MaterializedResult.Builder sortedByIdBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR, VARCHAR); - IntStream.range(0, rowCount).forEach(i -> sortedByIdBuilder.row((long) i, String.valueOf(i), "sorted_by_id")); - insertData(tableName, sortedByIdBuilder.build()); - - ConnectorTableHandle tableHandle; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(300); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)); - metadata.beginQuery(session); - Map columnHandles = metadata.getColumnHandles(session, tableHandle); - // verify local sorting property - ConnectorTableProperties properties = metadata.getTableProperties(session, tableHandle); - assertThat(properties.getLocalProperties()).isEqualTo(ImmutableList.of( - new SortingProperty<>(columnHandles.get("id"), ASC_NULLS_FIRST))); - - // read on a entire table should fail with exception - assertThatThrownBy(() -> readTable(transaction, tableHandle, ImmutableList.copyOf(columnHandles.values()), session, TupleDomain.all(), OptionalInt.empty(), Optional.empty())) - .isInstanceOf(TrinoException.class) - .hasMessage("Hive table (%s) sorting by [id] is not compatible with partition (pk=sorted_by_name) sorting by [name]." + - " This restriction can be avoided by disabling propagate_table_scan_sorting_properties.", tableName); - - // read only the partitions with sorting that is compatible to table sorting - MaterializedResult result = readTable( - transaction, - tableHandle, - ImmutableList.copyOf(columnHandles.values()), - session, - TupleDomain.withColumnDomains(ImmutableMap.of( - columnHandles.get("pk"), - Domain.create(ValueSet.of(VARCHAR, utf8Slice("sorted_by_id_name"), utf8Slice("sorted_by_id")), false))), - OptionalInt.empty(), - Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(200); - } - } - - @Test - public void testBucketedTableValidation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName table = temporaryTable("bucket_validation"); - try { - doTestBucketedTableValidation(storageFormat, table); - } - finally { - dropTable(table); - } - } - } - - private void doTestBucketedTableValidation(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - prepareInvalidBuckets(storageFormat, tableName); - - // read succeeds when validation is disabled - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(ImmutableMap.of("validate_bucketing", false)); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(87); // fewer rows due to deleted file - } - - // read fails due to validation failure - assertReadFailsWithMessageMatching(storageFormat, tableName, "Hive table is corrupt\\. File '.*/000002_0_.*' is for bucket 2, but contains a row for bucket 5."); - } - - private void prepareInvalidBuckets(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(), - Optional.of(new HiveBucketProperty(ImmutableList.of("id"), BUCKETING_V1, 8, ImmutableList.of()))); - - MaterializedResult.Builder dataBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR); - for (long id = 0; id < 100; id++) { - dataBuilder.row(id, String.valueOf(id)); - } - insertData(tableName, dataBuilder.build()); - - try (Transaction transaction = newTransaction()) { - Set files = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - - Path bucket2 = files.stream() - .map(Path::new) - .filter(path -> path.getName().startsWith("000002_0_")) - .collect(onlyElement()); - - Path bucket5 = files.stream() - .map(Path::new) - .filter(path -> path.getName().startsWith("000005_0_")) - .collect(onlyElement()); - - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, bucket2); - fileSystem.delete(bucket2, false); - fileSystem.rename(bucket5, bucket2); - } - } - - protected void assertReadFailsWithMessageMatching(HiveStorageFormat storageFormat, SchemaTableName tableName, String regex) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - assertTrinoExceptionThrownBy( - () -> readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat))) - .hasErrorCode(HIVE_INVALID_BUCKET_FILES) - .hasMessageMatching(regex); - } - } - - private void assertTableIsBucketed(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - // the bucketed test tables should have ~32 splits - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).as("splits.size()") - .isBetween(31, 32); - - // verify all paths are unique - Set paths = new HashSet<>(); - for (ConnectorSplit split : splits) { - assertThat(paths.add(((HiveSplit) split).getPath())).isTrue(); - } - } - - @Test - public void testGetRecords() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(tablePartitionFormatPartitions.size()); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - List partitionKeys = hiveSplit.getPartitionKeys(); - String ds = partitionKeys.get(0).getValue(); - String fileFormat = partitionKeys.get(1).getValue(); - HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH)); - int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue()); - - long rowNumber = 0; - long completedBytes = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - - assertPageSourceType(pageSource, fileType); - - for (MaterializedRow row : result) { - try { - assertValueTypes(row, tableMetadata.getColumns()); - } - catch (RuntimeException e) { - throw new RuntimeException("row " + rowNumber, e); - } - - rowNumber++; - Object value; - - value = row.getField(columnIndex.get("t_string")); - if (rowNumber % 19 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(value).isEqualTo(""); - } - else { - assertThat(value).isEqualTo("test"); - } - - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_smallint"))).isEqualTo((short) (2 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_int"))).isEqualTo(3 + (int) rowNumber); - - if (rowNumber % 13 == 0) { - assertThat(row.getField(columnIndex.get("t_bigint"))).isNull(); - } - else { - assertThat(row.getField(columnIndex.get("t_bigint"))).isEqualTo(4 + rowNumber); - } - - assertThat((Float) row.getField(columnIndex.get("t_float"))).isCloseTo(5.1f + rowNumber, offset(0.001f)); - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - - if (rowNumber % 3 == 2) { - assertThat(row.getField(columnIndex.get("t_boolean"))).isNull(); - } - else { - assertThat(row.getField(columnIndex.get("t_boolean"))).isEqualTo(rowNumber % 3 != 0); - } - - assertThat(row.getField(columnIndex.get("ds"))).isEqualTo(ds); - assertThat(row.getField(columnIndex.get("file_format"))).isEqualTo(fileFormat); - assertThat(row.getField(columnIndex.get("dummy"))).isEqualTo(dummyPartition); - - long newCompletedBytes = pageSource.getCompletedBytes(); - assertThat(newCompletedBytes >= completedBytes).isTrue(); - assertThat(newCompletedBytes <= hiveSplit.getLength()).isTrue(); - completedBytes = newCompletedBytes; - } - - assertThat(completedBytes <= hiveSplit.getLength()).isTrue(); - assertThat(rowNumber).isEqualTo(100); - } - } - } - } - - @Test - public void testGetPartialRecords() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(tablePartitionFormatPartitions.size()); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - List partitionKeys = hiveSplit.getPartitionKeys(); - String ds = partitionKeys.get(0).getValue(); - String fileFormat = partitionKeys.get(1).getValue(); - HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH)); - int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue()); - - long rowNumber = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - assertPageSourceType(pageSource, fileType); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - for (MaterializedRow row : result) { - rowNumber++; - - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - assertThat(row.getField(columnIndex.get("ds"))).isEqualTo(ds); - assertThat(row.getField(columnIndex.get("file_format"))).isEqualTo(fileFormat); - assertThat(row.getField(columnIndex.get("dummy"))).isEqualTo(dummyPartition); - } - } - assertThat(rowNumber).isEqualTo(100); - } - } - } - - @Test - public void testGetRecordsUnpartitioned() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - Map columnIndex = indexColumns(columnHandles); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits).hasSameSizeAs(tableUnpartitionedPartitions); - - for (ConnectorSplit split : splits) { - HiveSplit hiveSplit = (HiveSplit) split; - - assertThat(hiveSplit.getPartitionKeys()).isEqualTo(ImmutableList.of()); - - long rowNumber = 0; - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - assertPageSourceType(pageSource, TEXTFILE); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - - for (MaterializedRow row : result) { - rowNumber++; - - if (rowNumber % 19 == 0) { - assertThat(row.getField(columnIndex.get("t_string"))).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(row.getField(columnIndex.get("t_string"))).isEqualTo(""); - } - else { - assertThat(row.getField(columnIndex.get("t_string"))).isEqualTo("unpartitioned"); - } - - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - } - } - assertThat(rowNumber).isEqualTo(100); - } - } - } - - @Test - public void testPartitionSchemaMismatch() - { - assertThatThrownBy(() -> { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChange); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - readTable(transaction, table, ImmutableList.of(dsColumn), session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - } - }) - .isInstanceOf(TrinoException.class) - .hasMessageMatching(".*The column 't_data' in table '.*\\.trino_test_partition_schema_change' is declared as type 'float', but partition 'ds=2012-12-29' declared column 't_data' as type 'string'."); - } - - // TODO coercion of non-canonical values should be supported - @Test - @Disabled - public void testPartitionSchemaNonCanonical() - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChangeNonCanonical); - ColumnHandle column = metadata.getColumnHandles(session, table).get("t_boolean"); - - Constraint constraint = new Constraint(TupleDomain.fromFixedValues(ImmutableMap.of(column, NullableValue.of(BOOLEAN, false)))); - table = applyFilter(metadata, table, constraint); - HivePartition partition = getOnlyElement(((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new)); - assertThat(getPartitionId(partition)).isEqualTo("t_boolean=0"); - - ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, table); - ConnectorSplit split = getOnlyElement(getAllSplits(splitSource)); - - ImmutableList columnHandles = ImmutableList.of(column); - try (ConnectorPageSource ignored = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, table, columnHandles, DynamicFilter.EMPTY)) { - fail("expected exception"); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(HIVE_INVALID_PARTITION_VALUE.toErrorCode()); - } - } - } - - @Test - public void testTypesTextFile() - throws Exception - { - assertGetRecords("trino_test_types_textfile", TEXTFILE); - } - - @Test - public void testTypesSequenceFile() - throws Exception - { - assertGetRecords("trino_test_types_sequencefile", SEQUENCEFILE); - } - - @Test - public void testTypesRcText() - throws Exception - { - assertGetRecords("trino_test_types_rctext", RCTEXT); - } - - @Test - public void testTypesRcBinary() - throws Exception - { - assertGetRecords("trino_test_types_rcbinary", RCBINARY); - } - - @Test - public void testTypesOrc() - throws Exception - { - assertGetRecords("trino_test_types_orc", ORC); - } - - @Test - public void testTypesParquet() - throws Exception - { - assertGetRecords("trino_test_types_parquet", PARQUET); - } - - @Test - public void testEmptyTextFile() - throws Exception - { - assertEmptyFile(TEXTFILE); - } - - @Test - public void testEmptySequenceFile() - throws Exception - { - assertEmptyFile(SEQUENCEFILE); - } - - @Test - public void testEmptyRcTextFile() - throws Exception - { - assertEmptyFile(RCTEXT); - } - - @Test - public void testEmptyRcBinaryFile() - throws Exception - { - assertEmptyFile(RCBINARY); - } - - @Test - public void testEmptyOrcFile() - throws Exception - { - assertEmptyFile(ORC); - } - - private void assertEmptyFile(HiveStorageFormat format) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_file"); - try { - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - createEmptyTable(tableName, format, columns, ImmutableList.of()); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - Table table = transaction.getMetastore() - .getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(AssertionError::new); - - // verify directory is empty - HdfsContext context = new HdfsContext(session); - Path location = new Path(table.getStorage().getLocation()); - assertThat(listDirectory(context, location).isEmpty()).isTrue(); - - // read table with empty directory - readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.of(ORC)); - - // create empty file - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, location); - assertThat(fileSystem.createNewFile(new Path(location, "empty-file"))).isTrue(); - assertThat(listDirectory(context, location)).isEqualTo(ImmutableList.of("empty-file")); - - // read table with empty file - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.empty()); - assertThat(result.getRowCount()).isEqualTo(0); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testRenameTable() - { - SchemaTableName temporaryRenameTableOld = temporaryTable("rename_old"); - SchemaTableName temporaryRenameTableNew = temporaryTable("rename_new"); - try { - createDummyTable(temporaryRenameTableOld); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - metadata.renameTable(session, getTableHandle(metadata, temporaryRenameTableOld), temporaryRenameTableNew); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - assertThat(metadata.getTableHandle(session, temporaryRenameTableOld)).isNull(); - assertThat(metadata.getTableHandle(session, temporaryRenameTableNew)).isNotNull(); - } - } - finally { - dropTable(temporaryRenameTableOld); - dropTable(temporaryRenameTableNew); - } - } - - @Test - public void testTableCreation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateTable = temporaryTable("create"); - try { - doCreateTable(temporaryCreateTable, storageFormat); - } - finally { - dropTable(temporaryCreateTable); - } - } - } - - @Test - public void testTableCreationWithTrailingSpaceInLocation() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_table_creation_with_trailing_space_in_location_" + randomNameSuffix()); - String tableDefaultLocationWithTrailingSpace = null; - try { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - SemiTransactionalHiveMetastore metastore = transaction.getMetastore(); - TrinoFileSystem fileSystem = HDFS_FILE_SYSTEM_FACTORY.create(session); - - // Write data - tableDefaultLocationWithTrailingSpace = getTableDefaultLocation(metastore, fileSystem, tableName.getSchemaName(), tableName.getTableName()) + " "; - Path dataFilePath = new Path(tableDefaultLocationWithTrailingSpace, "foo.txt"); - FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableDefaultLocationWithTrailingSpace)); - try (OutputStream outputStream = fs.create(dataFilePath)) { - outputStream.write("hello\u0001world\nbye\u0001world".getBytes(UTF_8)); - } - - // create table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata( - tableName, - ImmutableList.builder() - .add(new ColumnMetadata("t_string1", VARCHAR)) - .add(new ColumnMetadata("t_string2", VARCHAR)) - .build(), - ImmutableMap.builder() - .putAll(createTableProperties(TEXTFILE, ImmutableList.of())) - .put(EXTERNAL_LOCATION_PROPERTY, tableDefaultLocationWithTrailingSpace) - .buildOrThrow()); - - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.createTable(session, tableMetadata, false); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // verify the data - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(TEXTFILE)); - assertEqualsIgnoreOrder( - result.getMaterializedRows(), - MaterializedResult.resultBuilder(SESSION, VARCHAR, VARCHAR) - .row("hello", "world") - .row("bye", "world") - .build()); - } - } - finally { - dropTable(tableName); - if (tableDefaultLocationWithTrailingSpace != null) { - FileSystem fs = hdfsEnvironment.getFileSystem(new HdfsContext(SESSION), new Path(tableDefaultLocationWithTrailingSpace)); - fs.delete(new Path(tableDefaultLocationWithTrailingSpace), true); - } - } - } - - @Test - public void testTableCreationRollback() - throws Exception - { - SchemaTableName temporaryCreateRollbackTable = temporaryTable("create_rollback"); - try { - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(temporaryCreateRollbackTable, CREATE_TABLE_COLUMNS, createTableProperties(RCBINARY)); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - getFutureValue(sink.finish()); - - // verify we have data files - stagingPathRoot = getStagingPathRoot(outputHandle); - HdfsContext context = new HdfsContext(session); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isFalse(); - - // rollback the table - transaction.rollback(); - } - - // verify all files have been deleted - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify table is not in the metastore - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getTableHandle(session, temporaryCreateRollbackTable)).isNull(); - } - } - finally { - dropTable(temporaryCreateRollbackTable); - } - } - - @Test - public void testTableCreationIgnoreExisting() - { - List columns = ImmutableList.of(new Column("dummy", HiveType.valueOf("uniontype"), Optional.empty(), Map.of())); - SchemaTableName schemaTableName = temporaryTable("create"); - ConnectorSession session = newSession(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - PrincipalPrivileges privileges = testingPrincipalPrivilege(session); - Location targetPath; - try { - try (Transaction transaction = newTransaction()) { - LocationService locationService = getLocationService(); - targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - Table table = createSimpleTable(schemaTableName, columns, session, targetPath, "q1"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); - Optional tableHandle = transaction.getMetastore().getTable(schemaName, tableName); - assertThat(tableHandle.isPresent()).isTrue(); - transaction.commit(); - } - - // try creating it again from another transaction with ignoreExisting=false - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_2"), "q2"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - fail("Expected exception"); - } - catch (TrinoException e) { - assertInstanceOf(e, TableAlreadyExistsException.class); - } - - // try creating it again from another transaction with ignoreExisting=true - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_3"), "q3"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - } - - // at this point the table should exist, now try creating the table again with a different table definition - columns = ImmutableList.of(new Column("new_column", HiveType.valueOf("string"), Optional.empty(), Map.of())); - try (Transaction transaction = newTransaction()) { - Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_4"), "q4"); - transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - fail("Expected exception"); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(TRANSACTION_CONFLICT.toErrorCode()); - assertThat(e.getMessage()).isEqualTo(format("Table already exists with a different schema: '%s'", schemaTableName.getTableName())); - } - } - finally { - dropTable(schemaTableName); - } - } - - private static Table createSimpleTable(SchemaTableName schemaTableName, List columns, ConnectorSession session, Location targetPath, String queryId) - { - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - return Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, queryId)) - .setDataColumns(columns) - .withStorage(storage -> storage - .setLocation(targetPath.toString()) - .setStorageFormat(fromHiveStorageFormat(ORC)) - .setSerdeParameters(ImmutableMap.of())) - .build(); - } - - @Test - public void testBucketSortedTables() - throws Exception - { - SchemaTableName table = temporaryTable("create_sorted"); - try { - doTestBucketSortedTables(table); - } - finally { - dropTable(table); - } - } - - private void doTestBucketSortedTables(SchemaTableName table) - throws IOException - { - int bucketCount = 3; - int expectedRowCount = 0; - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata( - table, - ImmutableList.builder() - .add(new ColumnMetadata("id", VARCHAR)) - .add(new ColumnMetadata("value_asc", VARCHAR)) - .add(new ColumnMetadata("value_desc", BIGINT)) - .add(new ColumnMetadata("ds", VARCHAR)) - .build(), - ImmutableMap.builder() - .put(STORAGE_FORMAT_PROPERTY, RCBINARY) - .put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")) - .put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")) - .put(BUCKET_COUNT_PROPERTY, bucketCount) - .put(SORTED_BY_PROPERTY, ImmutableList.builder() - .add(new SortingColumn("value_asc", ASCENDING)) - .add(new SortingColumn("value_desc", DESCENDING)) - .build()) - .buildOrThrow()); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - List types = tableMetadata.getColumns().stream() - .map(ColumnMetadata::getType) - .collect(toList()); - ThreadLocalRandom random = ThreadLocalRandom.current(); - for (int i = 0; i < 50; i++) { - MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types); - for (int j = 0; j < 1000; j++) { - builder.row( - sha256().hashLong(random.nextLong()).toString(), - "test" + random.nextInt(100), - random.nextLong(100_000), - "2018-04-01"); - expectedRowCount++; - } - sink.appendPage(builder.build().toPage()); - } - - HdfsContext context = new HdfsContext(session); - HiveConfig config = getHiveConfig(); - // verify we have enough temporary files per bucket to require multiple passes - Location stagingPathRoot; - if (config.isTemporaryStagingDirectoryEnabled()) { - stagingPathRoot = Location.of(config.getTemporaryStagingDirectoryPath() - .replace("${USER}", context.getIdentity().getUser())); - } - else { - stagingPathRoot = getStagingPathRoot(outputHandle); - } - assertThat(listAllDataFiles(context, stagingPathRoot)) - .filteredOn(file -> file.contains(".tmp-sort.")) - .size().isGreaterThan(bucketCount * getSortingFileWriterConfig().getMaxOpenSortFiles() * 2); - - // finish the write - Collection fragments = getFutureValue(sink.finish()); - - // verify there are no temporary files - for (String file : listAllDataFiles(context, stagingPathRoot)) { - assertThat(file).doesNotContain(".tmp-sort."); - } - - // finish creating table - metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - // verify that bucket files are sorted - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, table); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - // verify local sorting property - ConnectorTableProperties properties = metadata.getTableProperties( - newSession(ImmutableMap.of( - "propagate_table_scan_sorting_properties", true, - "bucket_execution_enabled", false)), - tableHandle); - Map columnIndex = indexColumns(columnHandles); - assertThat(properties.getLocalProperties()).isEqualTo(ImmutableList.of( - new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), - new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST))); - assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty(); - - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits).hasSize(bucketCount); - - int actualRowCount = 0; - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - String lastValueAsc = null; - long lastValueDesc = -1; - - while (!pageSource.isFinished()) { - Page page = pageSource.getNextPage(); - if (page == null) { - continue; - } - for (int i = 0; i < page.getPositionCount(); i++) { - Block blockAsc = page.getBlock(1); - Block blockDesc = page.getBlock(2); - assertThat(blockAsc.isNull(i)).isFalse(); - assertThat(blockDesc.isNull(i)).isFalse(); - - String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8(); - if (lastValueAsc != null) { - assertGreaterThanOrEqual(valueAsc, lastValueAsc); - if (valueAsc.equals(lastValueAsc)) { - long valueDesc = BIGINT.getLong(blockDesc, i); - if (lastValueDesc != -1) { - assertLessThanOrEqual(valueDesc, lastValueDesc); - } - lastValueDesc = valueDesc; - } - else { - lastValueDesc = -1; - } - } - lastValueAsc = valueAsc; - actualRowCount++; - } - } - } - } - assertThat(actualRowCount).isEqualTo(expectedRowCount); - } - } - - @Test - public void testInsert() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertTable = temporaryTable("insert"); - try { - doInsert(storageFormat, temporaryInsertTable); - } - finally { - dropTable(temporaryInsertTable); - } - } - } - - @Test - public void testInsertOverwriteUnpartitioned() - throws Exception - { - SchemaTableName table = temporaryTable("insert_overwrite"); - try { - doInsertOverwriteUnpartitioned(table); - } - finally { - dropTable(table); - } - } - - @Test - public void testInsertIntoNewPartition() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoNewPartitionTable = temporaryTable("insert_new_partitioned"); - try { - doInsertIntoNewPartition(storageFormat, temporaryInsertIntoNewPartitionTable); - } - finally { - dropTable(temporaryInsertIntoNewPartitionTable); - } - } - } - - @Test - public void testInsertIntoExistingPartition() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoExistingPartitionTable = temporaryTable("insert_existing_partitioned"); - try { - doInsertIntoExistingPartition(storageFormat, temporaryInsertIntoExistingPartitionTable); - } - finally { - dropTable(temporaryInsertIntoExistingPartitionTable); - } - } - } - - @Test - public void testInsertIntoExistingPartitionEmptyStatistics() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryInsertIntoExistingPartitionTable = temporaryTable("insert_existing_partitioned_empty_statistics"); - try { - doInsertIntoExistingPartitionEmptyStatistics(storageFormat, temporaryInsertIntoExistingPartitionTable); - } - finally { - dropTable(temporaryInsertIntoExistingPartitionTable); - } - } - } - - @Test - public void testInsertUnsupportedWriteType() - throws Exception - { - SchemaTableName temporaryInsertUnsupportedWriteType = temporaryTable("insert_unsupported_type"); - try { - doInsertUnsupportedWriteType(ORC, temporaryInsertUnsupportedWriteType); - } - finally { - dropTable(temporaryInsertUnsupportedWriteType); - } - } - - @Test - public void testMetadataDelete() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryMetadataDeleteTable = temporaryTable("metadata_delete"); - try { - doTestMetadataDelete(storageFormat, temporaryMetadataDeleteTable); - } - finally { - dropTable(temporaryMetadataDeleteTable); - } - } - } - - @Test - public void testEmptyTableCreation() - throws Exception - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateEmptyTable = temporaryTable("create_empty"); - try { - doCreateEmptyTable(temporaryCreateEmptyTable, storageFormat, CREATE_TABLE_COLUMNS); - } - finally { - dropTable(temporaryCreateEmptyTable); - } - } - } - - @Test - public void testCreateEmptyTableShouldNotCreateStagingDirectory() - throws IOException - { - for (HiveStorageFormat storageFormat : createTableFormats) { - SchemaTableName temporaryCreateEmptyTable = temporaryTable("create_empty"); - try { - List columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty(), Map.of())); - try (Transaction transaction = newTransaction()) { - String temporaryStagingPrefix = "hive-temporary-staging-prefix-" + UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); - ConnectorSession session = newSession(); - String tableOwner = session.getUser(); - String schemaName = temporaryCreateEmptyTable.getSchemaName(); - String tableName = temporaryCreateEmptyTable.getTableName(); - HiveConfig hiveConfig = getHiveConfig() - .setTemporaryStagingDirectoryPath(temporaryStagingPrefix) - .setTemporaryStagingDirectoryEnabled(true); - TrinoFileSystemFactory fileSystemFactory = new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS); - LocationService locationService = new HiveLocationService(fileSystemFactory, hiveConfig); - Location targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns); - tableBuilder.getStorageBuilder() - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.create(storageFormat.getSerde(), storageFormat.getInputFormat(), storageFormat.getOutputFormat())); - transaction.getMetastore().createTable( - session, - tableBuilder.build(), - testingPrincipalPrivilege(tableOwner, session.getUser()), - Optional.empty(), - Optional.empty(), - true, - ZERO_TABLE_STATISTICS, - false); - transaction.commit(); - - HdfsContext context = new HdfsContext(session); - Path temporaryRoot = new Path(targetPath.toString(), temporaryStagingPrefix); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, temporaryRoot); - assertThat(fileSystem.exists(temporaryRoot)) - .describedAs(format("Temporary staging directory %s is created.", temporaryRoot)) - .isFalse(); - } - } - finally { - dropTable(temporaryCreateEmptyTable); - } - } - } - - @Test - public void testViewCreation() - { - SchemaTableName temporaryCreateView = temporaryTable("create_view"); - try { - verifyViewCreation(temporaryCreateView); - } - finally { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.dropView(newSession(), temporaryCreateView); - transaction.commit(); - } - catch (RuntimeException e) { - // this usually occurs because the view was not created - } - } - } - - @Test - public void testCreateTableUnsupportedType() - { - for (HiveStorageFormat storageFormat : createTableFormats) { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - List columns = ImmutableList.of(new ColumnMetadata("dummy", HYPER_LOG_LOG)); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(invalidTable, columns, createTableProperties(storageFormat)); - metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - fail("create table with unsupported type should fail for storage format " + storageFormat); - } - catch (TrinoException e) { - assertThat(e.getErrorCode()).isEqualTo(NOT_SUPPORTED.toErrorCode()); - } - } - } - - @Test - public void testHideDeltaLakeTables() - { - ConnectorSession session = newSession(); - SchemaTableName tableName = temporaryTable("trino_delta_lake_table"); - - Table.Builder table = Table.builder() - .setDatabaseName(tableName.getSchemaName()) - .setTableName(tableName.getTableName()) - .setOwner(Optional.of(session.getUser())) - .setTableType(MANAGED_TABLE.name()) - .setPartitionColumns(List.of(new Column("a_partition_column", HIVE_INT, Optional.empty(), Map.of()))) - .setDataColumns(List.of(new Column("a_column", HIVE_STRING, Optional.empty(), Map.of()))) - .setParameter(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER); - table.getStorageBuilder() - .setStorageFormat(fromHiveStorageFormat(PARQUET)) - .setLocation(getTableDefaultLocation( - metastoreClient.getDatabase(tableName.getSchemaName()).orElseThrow(), - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS).create(session), - tableName.getSchemaName(), - tableName.getTableName()).toString()); - metastoreClient.createTable(table.build(), NO_PRIVILEGES); - - try { - // Verify the table was created as a Delta Lake table - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - assertThatThrownBy(() -> getTableHandle(metadata, tableName)) - .hasMessage(format("Cannot query Delta Lake table '%s'", tableName)); - } - - // Verify the hidden `$properties` and `$partitions` Delta Lake table handle can't be obtained within the hive connector - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - SchemaTableName propertiesTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$properties", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), propertiesTableName)).isEmpty(); - SchemaTableName partitionsTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$partitions", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), partitionsTableName)).isEmpty(); - } - - // Assert that table is hidden - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - - // TODO (https://github.com/trinodb/trino/issues/5426) these assertions should use information_schema instead of metadata directly, - // as information_schema or MetadataManager may apply additional logic - - // list all tables - assertThat(metadata.listTables(session, Optional.empty())) - .doesNotContain(tableName); - - // list all tables in a schema - assertThat(metadata.listTables(session, Optional.of(tableName.getSchemaName()))) - .doesNotContain(tableName); - - // list all columns in a schema - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(tableName.getSchemaName())).keySet()) - .doesNotContain(tableName); - - // list all columns in a table - assertThat(listTableColumns(metadata, session, new SchemaTablePrefix(tableName.getSchemaName(), tableName.getTableName())).keySet()) - .doesNotContain(tableName); - } - } - finally { - // Clean up - metastoreClient.dropTable(tableName.getSchemaName(), tableName.getTableName(), true); - } - } - - @Test - public void testDisallowQueryingOfIcebergTables() - { - ConnectorSession session = newSession(); - SchemaTableName tableName = temporaryTable("trino_iceberg_table"); - - Table.Builder table = Table.builder() - .setDatabaseName(tableName.getSchemaName()) - .setTableName(tableName.getTableName()) - .setOwner(Optional.of(session.getUser())) - .setTableType(MANAGED_TABLE.name()) - .setPartitionColumns(List.of(new Column("a_partition_column", HIVE_INT, Optional.empty(), Map.of()))) - .setDataColumns(List.of(new Column("a_column", HIVE_STRING, Optional.empty(), Map.of()))) - .setParameter(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE); - table.getStorageBuilder() - .setStorageFormat(fromHiveStorageFormat(PARQUET)) - .setLocation(getTableDefaultLocation( - metastoreClient.getDatabase(tableName.getSchemaName()).orElseThrow(), - new HdfsFileSystemFactory(hdfsEnvironment, HDFS_FILE_SYSTEM_STATS).create(session), - tableName.getSchemaName(), - tableName.getTableName()).toString()); - metastoreClient.createTable(table.build(), NO_PRIVILEGES); - - try { - // Verify that the table was created as a Iceberg table can't be queried in hive - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - assertThatThrownBy(() -> getTableHandle(metadata, tableName)) - .hasMessage(format("Cannot query Iceberg table '%s'", tableName)); - } - - // Verify the hidden `$properties` and `$partitions` hive system tables table handle can't be obtained for the Iceberg tables - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - SchemaTableName propertiesTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$properties", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), propertiesTableName)).isEmpty(); - SchemaTableName partitionsTableName = new SchemaTableName(tableName.getSchemaName(), format("%s$partitions", tableName.getTableName())); - assertThat(metadata.getSystemTable(newSession(), partitionsTableName)).isEmpty(); - } - } - finally { - // Clean up - metastoreClient.dropTable(tableName.getSchemaName(), tableName.getTableName(), true); - } - } - - @Test - public void testUpdateBasicTableStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_table_statistics"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, BASIC_STATISTICS_1, BASIC_STATISTICS_2); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdateTableColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_table_column_statistics"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdateTableColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_table_column_statistics_empty_optional_fields"); - try { - doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_EMPTY_OPTIONAL_FIELDS); - } - finally { - dropTable(tableName); - } - } - - protected void testUpdateTableStatistics(SchemaTableName tableName, PartitionStatistics initialStatistics, PartitionStatistics... statistics) - { - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(initialStatistics); - - AtomicReference expectedStatistics = new AtomicReference<>(initialStatistics); - for (PartitionStatistics partitionStatistics : statistics) { - metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatistics.get()); - return partitionStatistics; - }); - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - expectedStatistics.set(partitionStatistics); - } - - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(expectedStatistics.get()); - - metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatistics.get()); - return initialStatistics; - }); - - assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(initialStatistics); - } - - @Test - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - ZERO_TABLE_STATISTICS, - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), - ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testDataColumnProperties() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_column_properties"); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - try { - doCreateEmptyTable(tableName, ORC, List.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("part_key", createVarcharType(256)))); - - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - - String columnPropertyValue = "data column value ,;.!??? \" ' {} [] non-printable \000 \001 spaces \n\r\t\f hiragana だ emoji 🤷‍♂️ x"; - metastoreClient.replaceTable( - tableName.getSchemaName(), - tableName.getTableName(), - Table.builder(table) - .setDataColumns(List.of(new Column("id", HIVE_LONG, Optional.empty(), Map.of("data prop", columnPropertyValue)))) - .build(), - NO_PRIVILEGES); - - table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEqualTo(Map.of("data prop", columnPropertyValue)); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testPartitionColumnProperties() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_column_properties"); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - try { - doCreateEmptyTable(tableName, ORC, List.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("part_key", createVarcharType(256)))); - - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - - String columnPropertyValue = "partition column value ,;.!??? \" ' {} [] non-printable \000 \001 spaces \n\r\t\f hiragana だ emoji 🤷‍♂️ x"; - metastoreClient.replaceTable( - tableName.getSchemaName(), - tableName.getTableName(), - Table.builder(table) - .setPartitionColumns(List.of(new Column("part_key", HiveType.valueOf("varchar(256)"), Optional.empty(), Map.of("partition prop", columnPropertyValue)))) - .build(), - NO_PRIVILEGES); - - table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(table.getDataColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEmpty(); - assertThat(table.getPartitionColumns()) - .singleElement() - .extracting(Column::getProperties, InstanceOfAssertFactories.MAP) - .isEqualTo(Map.of("partition prop", columnPropertyValue)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWhenTableIsPartitioned() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_input_info_with_partitioned_table"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), true, Optional.of("ORC"))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWhenTableIsNotPartitioned() - { - SchemaTableName tableName = temporaryTable("test_input_info_without_partitioned_table"); - try { - createDummyTable(tableName); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), false, Optional.of("TEXTFILE"))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInputInfoWithParquetTableFormat() - { - SchemaTableName tableName = temporaryTable("test_input_info_with_parquet_table_format"); - try { - createDummyTable(tableName, PARQUET); - assertInputInfo(tableName, new HiveInputInfo(ImmutableList.of(), false, Optional.of("PARQUET"))); - } - finally { - dropTable(tableName); - } - } - - private void assertInputInfo(SchemaTableName tableName, HiveInputInfo expectedInputInfo) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - HiveTableHandle tableHandle = (HiveTableHandle) metadata.getTableHandle(session, tableName); - assertThat(metadata.getInfo(tableHandle)).isEqualTo(Optional.of(expectedInputInfo)); - } - } - - /** - * During table scan, the illegal storage format for some specific table should not fail the whole table scan - */ - @Test - public void testIllegalStorageFormatDuringTableScan() - { - SchemaTableName schemaTableName = temporaryTable("test_illegal_storage_format"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - List columns = ImmutableList.of(new Column("pk", HIVE_STRING, Optional.empty(), Map.of())); - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - Location targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); - //create table whose storage format is null - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns) - .withStorage(storage -> storage - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.createNullable(null, null, null)) - .setSerdeParameters(ImmutableMap.of())); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - transaction.commit(); - } - - // We retrieve the table whose storageFormat has null serde/inputFormat/outputFormat - // to make sure it can still be retrieved instead of throwing exception. - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - Map> allColumns = listTableColumns(metadata, newSession(), new SchemaTablePrefix(schemaTableName.getSchemaName())); - assertThat(allColumns).containsKey(schemaTableName); - } - finally { - dropTable(schemaTableName); - } - } - - protected static Map> listTableColumns(ConnectorMetadata metadata, ConnectorSession session, SchemaTablePrefix prefix) - { - return stream(metadata.streamTableColumns(session, prefix)) - .collect(toImmutableMap( - TableColumnsMetadata::getTable, - tableColumns -> tableColumns.getColumns().orElseThrow(() -> new IllegalStateException("Table " + tableColumns.getTable() + " reported as redirected")))); - } - - private void createDummyTable(SchemaTableName tableName) - { - createDummyTable(tableName, TEXTFILE); - } - - private void createDummyTable(SchemaTableName tableName, HiveStorageFormat storageFormat) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of(new ColumnMetadata("dummy", createUnboundedVarcharType())); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat)); - ConnectorOutputTableHandle handle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - metadata.finishCreateTable(session, handle, ImmutableList.of(), ImmutableList.of()); - - transaction.commit(); - } - } - - protected void createDummyPartitionedTable(SchemaTableName tableName, List columns) - throws Exception - { - doCreateEmptyTable(tableName, ORC, columns); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - List firstPartitionValues = ImmutableList.of("2016-01-01"); - List secondPartitionValues = ImmutableList.of("2016-01-02"); - - String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues); - String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues); - - List partitions = ImmutableList.of(firstPartitionName, secondPartitionName) - .stream() - .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) - .collect(toImmutableList()); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); - } - - protected void testUpdatePartitionStatistics( - SchemaTableName tableName, - PartitionStatistics initialStatistics, - List firstPartitionStatistics, - List secondPartitionStatistics) - { - verify(firstPartitionStatistics.size() == secondPartitionStatistics.size()); - - String firstPartitionName = "ds=2016-01-01"; - String secondPartitionName = "ds=2016-01-02"; - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics)); - - AtomicReference expectedStatisticsPartition1 = new AtomicReference<>(initialStatistics); - AtomicReference expectedStatisticsPartition2 = new AtomicReference<>(initialStatistics); - - for (int i = 0; i < firstPartitionStatistics.size(); i++) { - PartitionStatistics statisticsPartition1 = firstPartitionStatistics.get(i); - PartitionStatistics statisticsPartition2 = secondPartitionStatistics.get(i); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition1.get()); - return statisticsPartition1; - }); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition2.get()); - return statisticsPartition2; - }); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, statisticsPartition1, secondPartitionName, statisticsPartition2)); - expectedStatisticsPartition1.set(statisticsPartition1); - expectedStatisticsPartition2.set(statisticsPartition2); - } - - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, expectedStatisticsPartition1.get(), secondPartitionName, expectedStatisticsPartition2.get())); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { - assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition1.get()); - return initialStatistics; - }); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { - assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition2.get()); - return initialStatistics; - }); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) - .isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics)); - } - - @Test - public void testStorePartitionWithStatistics() - throws Exception - { - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, ZERO_TABLE_STATISTICS); - } - - protected void testStorePartitionWithStatistics( - List columns, - PartitionStatistics statsForAllColumns1, - PartitionStatistics statsForAllColumns2, - PartitionStatistics statsForSubsetOfColumns, - PartitionStatistics emptyStatistics) - throws Exception - { - SchemaTableName tableName = temporaryTable("store_partition_with_statistics"); - try { - doCreateEmptyTable(tableName, ORC, columns); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - List partitionValues = ImmutableList.of("2016-01-01"); - String partitionName = makePartName(ImmutableList.of("ds"), partitionValues); - - Partition partition = createDummyPartition(table, partitionName); - - // create partition with stats for all columns - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1))); - assertThat(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat()).isEqualTo(fromHiveStorageFormat(ORC)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1)); - - // alter the partition into one with other stats - Partition modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(RCBINARY)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2)); - assertThat(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat()).isEqualTo(fromHiveStorageFormat(RCBINARY)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2)); - - // alter the partition into one with stats for only subset of columns - modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns)); - - // alter the partition into one without stats - modifiedPartition = Partition.builder(partition) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(partitionTargetPath(tableName, partitionName))) - .build(); - metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics)); - assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))) - .isEqualTo(ImmutableMap.of(partitionName, emptyStatistics)); - } - finally { - dropTable(tableName); - } - } - - protected Partition createDummyPartition(Table table, String partitionName) - { - return Partition.builder() - .setDatabaseName(table.getDatabaseName()) - .setTableName(table.getTableName()) - .setColumns(table.getDataColumns()) - .setValues(toPartitionValues(partitionName)) - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(ORC)) - .setLocation(partitionTargetPath(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionName))) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, "testversion", - TRINO_QUERY_ID_NAME, "20180101_123456_00001_x1y2z")) - .build(); - } - - protected String partitionTargetPath(SchemaTableName schemaTableName, String partitionName) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - SemiTransactionalHiveMetastore metastore = transaction.getMetastore(); - LocationService locationService = getLocationService(); - Table table = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()).get(); - LocationHandle handle = locationService.forExistingTable(metastore, session, table); - return locationService.getPartitionWriteInfo(handle, Optional.empty(), partitionName).targetPath().toString(); - } - } - - /** - * This test creates 2 identical partitions and verifies that the statistics projected based on - * a single partition sample are equal to the statistics computed in a fair way - */ - @Test - public void testPartitionStatisticsSampling() - throws Exception - { - testPartitionStatisticsSampling(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1); - } - - protected void testPartitionStatisticsSampling(List columns, PartitionStatistics statistics) - throws Exception - { - SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling"); - - try { - createDummyPartitionedTable(tableName, columns); - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName); - TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle); - TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle); - assertThat(sampledStatistics).isEqualTo(unsampledStatistics); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testApplyProjection() - throws Exception - { - ColumnMetadata bigIntColumn0 = new ColumnMetadata("int0", BIGINT); - ColumnMetadata bigIntColumn1 = new ColumnMetadata("int1", BIGINT); - - RowType oneLevelRowType = toRowType(ImmutableList.of(bigIntColumn0, bigIntColumn1)); - ColumnMetadata oneLevelRow0 = new ColumnMetadata("onelevelrow0", oneLevelRowType); - - RowType twoLevelRowType = toRowType(ImmutableList.of(oneLevelRow0, bigIntColumn0, bigIntColumn1)); - ColumnMetadata twoLevelRow0 = new ColumnMetadata("twolevelrow0", twoLevelRowType); - - List columnsForApplyProjectionTest = ImmutableList.of(bigIntColumn0, bigIntColumn1, oneLevelRow0, twoLevelRow0); - - SchemaTableName tableName = temporaryTable("apply_projection_tester"); - doCreateEmptyTable(tableName, ORC, columnsForApplyProjectionTest); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - List columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - assertThat(columnHandles.size()).isEqualTo(columnsForApplyProjectionTest.size()); - - Map columnHandleMap = columnHandles.stream() - .collect(toImmutableMap(handle -> ((HiveColumnHandle) handle).getBaseColumnName(), Function.identity())); - - // Emulate symbols coming from the query plan and map them to column handles - Map columnHandlesWithSymbols = ImmutableMap.of( - "symbol_0", columnHandleMap.get("int0"), - "symbol_1", columnHandleMap.get("int1"), - "symbol_2", columnHandleMap.get("onelevelrow0"), - "symbol_3", columnHandleMap.get("twolevelrow0")); - - // Create variables for the emulated symbols - Map symbolVariableMapping = columnHandlesWithSymbols.entrySet().stream() - .collect(toImmutableMap( - Map.Entry::getKey, - e -> new Variable( - e.getKey(), - ((HiveColumnHandle) e.getValue()).getBaseType()))); - - // Create dereference expressions for testing - FieldDereference symbol2Field0 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_2"), 0); - FieldDereference symbol3Field0 = new FieldDereference(oneLevelRowType, symbolVariableMapping.get("symbol_3"), 0); - FieldDereference symbol3Field0Field0 = new FieldDereference(BIGINT, symbol3Field0, 0); - FieldDereference symbol3Field1 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_3"), 1); - - Map inputAssignments; - List inputProjections; - Optional> projectionResult; - List expectedProjections; - Map expectedAssignments; - - // Test projected columns pushdown to HiveTableHandle in case of all variable references - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_0", "symbol_1")); - inputProjections = ImmutableList.of(symbolVariableMapping.get("symbol_0"), symbolVariableMapping.get("symbol_1")); - expectedAssignments = ImmutableMap.of( - "symbol_0", BIGINT, - "symbol_1", BIGINT); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments); - - // Empty result when projected column handles are same as those present in table handle - projectionResult = metadata.applyProjection(session, projectionResult.get().getHandle(), inputProjections, inputAssignments); - assertProjectionResult(projectionResult, true, ImmutableList.of(), ImmutableMap.of()); - - // Extra columns handles in HiveTableHandle should get pruned - projectionResult = metadata.applyProjection( - session, - ((HiveTableHandle) tableHandle).withProjectedColumns(ImmutableSet.copyOf(columnHandles)), - inputProjections, - inputAssignments); - assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments); - - // Test projection pushdown for dereferences - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2", "symbol_3")); - inputProjections = ImmutableList.of(symbol2Field0, symbol3Field0Field0, symbol3Field1); - expectedAssignments = ImmutableMap.of( - "onelevelrow0#f_int0", BIGINT, - "twolevelrow0#f_onelevelrow0#f_int0", BIGINT, - "twolevelrow0#f_int0", BIGINT); - expectedProjections = ImmutableList.of( - new Variable("onelevelrow0#f_int0", BIGINT), - new Variable("twolevelrow0#f_onelevelrow0#f_int0", BIGINT), - new Variable("twolevelrow0#f_int0", BIGINT)); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - - // Test reuse of virtual column handles - // Round-1: input projections [symbol_2, symbol_2.int0]. virtual handle is created for symbol_2.int0. - inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2")); - inputProjections = ImmutableList.of(symbol2Field0, symbolVariableMapping.get("symbol_2")); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), symbolVariableMapping.get("symbol_2")); - expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "symbol_2", oneLevelRowType); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - - // Round-2: input projections [symbol_2.int0 and onelevelrow0#f_int0]. Virtual handle is reused. - Assignment newlyCreatedColumn = getOnlyElement(projectionResult.get().getAssignments().stream() - .filter(handle -> handle.getVariable().equals("onelevelrow0#f_int0")) - .collect(toList())); - inputAssignments = ImmutableMap.builder() - .putAll(getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"))) - .put(newlyCreatedColumn.getVariable(), newlyCreatedColumn.getColumn()) - .buildOrThrow(); - inputProjections = ImmutableList.of(symbol2Field0, new Variable("onelevelrow0#f_int0", BIGINT)); - projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments); - expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("onelevelrow0#f_int0", BIGINT)); - expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT); - assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments); - } - finally { - dropTable(tableName); - } - } - - private static Map getColumnHandlesFor(Map columnHandles, List symbols) - { - return columnHandles.entrySet().stream() - .filter(e -> symbols.contains(e.getKey())) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - private static void assertProjectionResult(Optional> projectionResult, boolean shouldBeEmpty, List expectedProjections, Map expectedAssignments) - { - if (shouldBeEmpty) { - assertThat(projectionResult.isEmpty()) - .describedAs("expected projectionResult to be empty") - .isTrue(); - return; - } - - assertThat(projectionResult.isPresent()) - .describedAs("expected non-empty projection result") - .isTrue(); - - ProjectionApplicationResult result = projectionResult.get(); - - // Verify projections - assertThat(expectedProjections).isEqualTo(result.getProjections()); - - // Verify assignments - List assignments = result.getAssignments(); - Map actualAssignments = uniqueIndex(assignments, Assignment::getVariable); - - for (String variable : expectedAssignments.keySet()) { - Type expectedType = expectedAssignments.get(variable); - assertThat(actualAssignments).containsKey(variable); - assertThat(actualAssignments.get(variable).getType()).isEqualTo(expectedType); - assertThat(((HiveColumnHandle) actualAssignments.get(variable).getColumn()).getType()).isEqualTo(expectedType); - } - - assertThat(actualAssignments.size()).isEqualTo(expectedAssignments.size()); - assertThat(actualAssignments.values().stream().map(Assignment::getColumn).collect(toImmutableSet())).isEqualTo(((HiveTableHandle) result.getHandle()).getProjectedColumns()); - } - - @Test - public void testApplyRedirection() - throws Exception - { - SchemaTableName sourceTableName = temporaryTable("apply_redirection_tester"); - doCreateEmptyTable(sourceTableName, ORC, CREATE_TABLE_COLUMNS); - SchemaTableName tableName = temporaryTable("apply_no_redirection_tester"); - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.applyTableScanRedirect(session, getTableHandle(metadata, tableName))).isEmpty(); - Optional result = metadata.applyTableScanRedirect(session, getTableHandle(metadata, sourceTableName)); - assertThat(result).isPresent(); - assertThat(result.get().getDestinationTable()) - .isEqualTo(new CatalogSchemaTableName("hive", database, "mock_redirection_target")); - } - finally { - dropTable(sourceTableName); - dropTable(tableName); - } - } - - @Test - public void testMaterializedViewMetadata() - throws Exception - { - SchemaTableName sourceTableName = temporaryTable("materialized_view_tester"); - doCreateEmptyTable(sourceTableName, ORC, CREATE_TABLE_COLUMNS); - SchemaTableName tableName = temporaryTable("mock_table"); - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getMaterializedView(session, tableName)).isEmpty(); - Optional result = metadata.getMaterializedView(session, sourceTableName); - assertThat(result).isPresent(); - assertThat(result.get().getOriginalSql()).isEqualTo("dummy_view_sql"); - } - finally { - dropTable(sourceTableName); - dropTable(tableName); - } - } - - @Test - public void testOrcPageSourceMetrics() - throws Exception - { - SchemaTableName tableName = temporaryTable("orc_page_source_metrics"); - try { - assertPageSourceMetrics(tableName, ORC, new Metrics(ImmutableMap.of(ORC_CODEC_METRIC_PREFIX + "SNAPPY", new LongCount(209)))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testParquetPageSourceMetrics() - throws Exception - { - SchemaTableName tableName = temporaryTable("parquet_page_source_metrics"); - try { - assertPageSourceMetrics(tableName, PARQUET, new Metrics(ImmutableMap.of(PARQUET_CODEC_METRIC_PREFIX + "SNAPPY", new LongCount(1157)))); - } - finally { - dropTable(tableName); - } - } - - private void assertPageSourceMetrics(SchemaTableName tableName, HiveStorageFormat storageFormat, Metrics expectedMetrics) - throws Exception - { - createEmptyTable( - tableName, - storageFormat, - ImmutableList.of( - new Column("id", HIVE_LONG, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of()); - MaterializedResult.Builder inputDataBuilder = MaterializedResult.resultBuilder(SESSION, BIGINT, VARCHAR); - IntStream.range(0, 100).forEach(i -> inputDataBuilder.row((long) i, String.valueOf(i))); - insertData(tableName, inputDataBuilder.build(), ImmutableMap.of("compression_codec", "SNAPPY")); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // read entire table - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - assertThat(pageSource.getMetrics()).isEqualTo(expectedMetrics); - } - } - } - } - - private ConnectorSession sampleSize(int sampleSize) - { - return getHiveSession(getHiveConfig() - .setPartitionStatisticsSampleSize(sampleSize)); - } - - private void verifyViewCreation(SchemaTableName temporaryCreateView) - { - // replace works for new view - doCreateView(temporaryCreateView, true); - - // replace works for existing view - doCreateView(temporaryCreateView, true); - - // create fails for existing view - try { - doCreateView(temporaryCreateView, false); - fail("create existing should fail"); - } - catch (ViewAlreadyExistsException e) { - assertThat(e.getViewName()).isEqualTo(temporaryCreateView); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - // drop works when view exists - metadata.dropView(newSession(), temporaryCreateView); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - assertThat(metadata.getView(newSession(), temporaryCreateView)) - .isEmpty(); - assertThat(metadata.getViews(newSession(), Optional.of(temporaryCreateView.getSchemaName()))) - .doesNotContainKey(temporaryCreateView); - assertThat(metadata.listViews(newSession(), Optional.of(temporaryCreateView.getSchemaName()))) - .doesNotContain(temporaryCreateView); - } - - // drop fails when view does not exist - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.dropView(newSession(), temporaryCreateView); - fail("drop non-existing should fail"); - } - catch (ViewNotFoundException e) { - assertThat(e.getViewName()).isEqualTo(temporaryCreateView); - } - - // create works for new view - doCreateView(temporaryCreateView, false); - } - - private void doCreateView(SchemaTableName viewName, boolean replace) - { - String viewData = "test data"; - ConnectorViewDefinition definition = new ConnectorViewDefinition( - viewData, - Optional.empty(), - Optional.empty(), - ImmutableList.of(new ViewColumn("test", BIGINT.getTypeId(), Optional.empty())), - Optional.empty(), - Optional.empty(), - true, - ImmutableList.of()); - - try (Transaction transaction = newTransaction()) { - transaction.getMetadata().createView(newSession(), viewName, definition, replace); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - - assertThat(metadata.getView(newSession(), viewName)) - .map(ConnectorViewDefinition::getOriginalSql) - .contains(viewData); - - Map views = metadata.getViews(newSession(), Optional.of(viewName.getSchemaName())); - assertThat(views.size()).isEqualTo(1); - assertThat(views.get(viewName).getOriginalSql()).isEqualTo(definition.getOriginalSql()); - - assertThat(metadata.listViews(newSession(), Optional.of(viewName.getSchemaName()))).contains(viewName); - } - } - - protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) - throws Exception - { - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - queryId = session.getQueryId(); - - // begin creating the table - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, CREATE_TABLE_COLUMNS, createTableProperties(storageFormat)); - - ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write the data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // verify all new files start with the unique prefix - HdfsContext context = new HdfsContext(session); - for (String filePath : listAllDataFiles(context, getStagingPathRoot(outputHandle))) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // commit the table - metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_DATA.getMaterializedRows()); - - // verify the node version and query ID in table - Table table = getMetastoreClient().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - assertThat(table.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(table.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - - // verify basic statistics - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount()); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat storageFormat, List createTableColumns) - throws Exception - { - List partitionedBy = createTableColumns.stream() - .map(ColumnMetadata::getName) - .filter(PARTITION_COLUMN_FILTER) - .collect(toList()); - - doCreateEmptyTable(tableName, storageFormat, createTableColumns, partitionedBy); - } - - protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat storageFormat, List createTableColumns, List partitionedBy) - throws Exception - { - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - queryId = session.getQueryId(); - - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, createTableColumns, createTableProperties(storageFormat, partitionedBy)); - metadata.createTable(session, tableMetadata, false); - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - - List expectedColumns = createTableColumns.stream() - .map(column -> ColumnMetadata.builder() - .setName(column.getName()) - .setType(column.getType()) - .setComment(Optional.ofNullable(column.getComment())) - .setExtraInfo(Optional.ofNullable(columnExtraInfo(partitionedBy.contains(column.getName())))) - .build()) - .collect(toList()); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(expectedColumns); - - // verify table format - Table table = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - assertThat(table.getStorage().getStorageFormat().getInputFormat()).isEqualTo(storageFormat.getInputFormat()); - - // verify the node version and query ID - assertThat(table.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(table.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - - // verify the table is empty - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(0); - - // verify basic statistics - if (partitionedBy.isEmpty()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(0L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(0L); - assertThat(statistics.getInMemoryDataSizeInBytes().getAsLong()).isEqualTo(0L); - assertThat(statistics.getOnDiskDataSizeInBytes().getAsLong()).isEqualTo(0L); - } - } - } - - private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS); - - MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes()); - for (int i = 0; i < 3; i++) { - insertData(tableName, CREATE_TABLE_DATA); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // statistics - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().orElse(0)).isEqualTo(CREATE_TABLE_DATA.getRowCount() * (i + 1L)); - assertThat(tableStatistics.getFileCount().getAsLong()).isEqualTo(i + 1L); - assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - // test rollback - Set existingFiles; - try (Transaction transaction = newTransaction()) { - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - sink.appendPage(CREATE_TABLE_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // statistics, visible from within transaction - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 5L); - - try (Transaction otherTransaction = newTransaction()) { - // statistics, not visible from outside transaction - HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName); - assertThat(otherTableStatistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 3L); - } - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify all temp files start with the unique prefix - stagingPathRoot = getStagingPathRoot(insertTableHandle); - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, stagingPathRoot); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify temp directory is empty - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - } - - // verify statistics unchanged - try (Transaction transaction = newTransaction()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(CREATE_TABLE_DATA.getRowCount() * 3L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(3L); - } - } - - private void doInsertOverwriteUnpartitioned(SchemaTableName tableName) - throws Exception - { - // create table with data - doCreateEmptyTable(tableName, ORC, CREATE_TABLE_COLUMNS); - insertData(tableName, CREATE_TABLE_DATA); - - // overwrite table with new data - MaterializedResult.Builder overwriteDataBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes()); - MaterializedResult overwriteData = null; - - Map overwriteProperties = ImmutableMap.of("insert_existing_partitions_behavior", "OVERWRITE"); - - for (int i = 0; i < 3; i++) { - overwriteDataBuilder.rows(reverse(CREATE_TABLE_DATA.getMaterializedRows())); - overwriteData = overwriteDataBuilder.build(); - - insertData(tableName, overwriteData, overwriteProperties); - - // verify overwrite - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // load the new table - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the metadata - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName)); - assertThat(filterNonHiddenColumnMetadata(tableMetadata.getColumns())).isEqualTo(CREATE_TABLE_COLUMNS); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows()); - - // statistics - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - assertThat(tableStatistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - // test rollback - Set existingFiles; - try (Transaction transaction = newTransaction()) { - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(overwriteProperties); - ConnectorMetadata metadata = transaction.getMetadata(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - for (int i = 0; i < 4; i++) { - sink.appendPage(overwriteData.toPage()); - } - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // statistics, visible from within transaction - HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(tableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount() * 4L); - - try (Transaction otherTransaction = newTransaction()) { - // statistics, not visible from outside transaction - HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName); - assertThat(otherTableStatistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - } - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify all temp files start with the unique prefix - stagingPathRoot = getStagingPathRoot(insertTableHandle); - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, stagingPathRoot); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify temp directory is empty - HdfsContext context = new HdfsContext(newSession()); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), overwriteData.getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - } - - // verify statistics unchanged - try (Transaction transaction = newTransaction()) { - HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(overwriteData.getRowCount()); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(1L); - } - } - - private Location getStagingPathRoot(ConnectorInsertTableHandle insertTableHandle) - { - HiveInsertTableHandle handle = (HiveInsertTableHandle) insertTableHandle; - WriteInfo writeInfo = getLocationService().getQueryWriteInfo(handle.getLocationHandle()); - if (writeInfo.writeMode() != STAGE_AND_MOVE_TO_TARGET_DIRECTORY) { - throw new AssertionError("writeMode is not STAGE_AND_MOVE_TO_TARGET_DIRECTORY"); - } - return writeInfo.writePath(); - } - - private Location getStagingPathRoot(ConnectorOutputTableHandle outputTableHandle) - { - HiveOutputTableHandle handle = (HiveOutputTableHandle) outputTableHandle; - return getLocationService() - .getQueryWriteInfo(handle.getLocationHandle()) - .writePath(); - } - - private Location getTargetPathRoot(ConnectorInsertTableHandle insertTableHandle) - { - HiveInsertTableHandle hiveInsertTableHandle = (HiveInsertTableHandle) insertTableHandle; - - return getLocationService() - .getQueryWriteInfo(hiveInsertTableHandle.getLocationHandle()) - .targetPath(); - } - - protected Set listAllDataFiles(Transaction transaction, String schemaName, String tableName) - throws IOException - { - HdfsContext hdfsContext = new HdfsContext(newSession()); - Set existingFiles = new HashSet<>(); - for (String location : listAllDataPaths(transaction.getMetastore(), schemaName, tableName)) { - existingFiles.addAll(listAllDataFiles(hdfsContext, Location.of(location))); - } - return existingFiles; - } - - public static List listAllDataPaths(SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) - { - ImmutableList.Builder locations = ImmutableList.builder(); - Table table = metastore.getTable(schemaName, tableName).get(); - if (table.getStorage().getLocation() != null) { - // For partitioned table, there should be nothing directly under this directory. - // But including this location in the set makes the directory content assert more - // extensive, which is desirable. - locations.add(table.getStorage().getLocation()); - } - - Optional> partitionNames = metastore.getPartitionNames(schemaName, tableName); - if (partitionNames.isPresent()) { - metastore.getPartitionsByNames(schemaName, tableName, partitionNames.get()).values().stream() - .map(Optional::get) - .map(partition -> partition.getStorage().getLocation()) - .filter(location -> !location.startsWith(table.getStorage().getLocation())) - .forEach(locations::add); - } - - return locations.build(); - } - - protected Set listAllDataFiles(HdfsContext context, Location location) - throws IOException - { - Path path = new Path(location.toString()); - Set result = new HashSet<>(); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - if (fileSystem.exists(path)) { - for (FileStatus fileStatus : fileSystem.listStatus(path)) { - if (fileStatus.getPath().getName().startsWith(".trino")) { - // skip hidden files - } - else if (fileStatus.isFile()) { - result.add(fileStatus.getPath().toString()); - } - else if (fileStatus.isDirectory()) { - result.addAll(listAllDataFiles(context, Location.of(fileStatus.getPath().toString()))); - } - } - } - return result; - } - - private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - // insert the data - String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - Set existingFiles; - try (Transaction transaction = newTransaction()) { - // verify partitions were created - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // verify the node versions in partitions - Map> partitions = getMetastoreClient().getPartitionsByNames(table, partitionNames); - assertThat(partitions.size()).isEqualTo(partitionNames.size()); - for (String partitionName : partitionNames) { - Partition partition = partitions.get(partitionName).get(); - assertThat(partition.getParameters()).containsEntry(TRINO_VERSION_NAME, TEST_SERVER_VERSION); - assertThat(partition.getParameters()).containsEntry(TRINO_QUERY_ID_NAME, queryId); - } - - // load the new table - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - // test rollback - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - - // test statistics - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(1L); - assertThat(partitionStatistics.getFileCount().getAsLong()).isEqualTo(1L); - assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - stagingPathRoot = getStagingPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // verify all temp files start with the unique prefix - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // rollback insert - transaction.rollback(); - } - - // verify the data is unchanged - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify temp directory is empty - HdfsContext context = new HdfsContext(session); - assertThat(listAllDataFiles(context, stagingPathRoot).isEmpty()).isTrue(); - } - } - - private void doInsertUnsupportedWriteType(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - List columns = ImmutableList.of(new Column("dummy", HiveType.valueOf("uniontype"), Optional.empty(), Map.of())); - List partitionColumns = ImmutableList.of(new Column("name", HIVE_STRING, Optional.empty(), Map.of())); - - createEmptyTable(tableName, storageFormat, columns, partitionColumns); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - fail("expected failure"); - } - catch (TrinoException e) { - assertThat(e).hasMessageMatching("Inserting into Hive table .* with column type uniontype not supported"); - } - } - - private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes()); - for (int i = 0; i < 3; i++) { - // insert the data - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // verify partitions were created - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // load the new table - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // test statistics - for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(statistics.getRowCount().getAsLong()).isEqualTo(i + 1L); - assertThat(statistics.getFileCount().getAsLong()).isEqualTo(i + 1L); - assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); - assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); - } - } - } - - // test rollback - Set existingFiles; - Location stagingPathRoot; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(existingFiles.isEmpty()).isFalse(); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - - // "stage" insert data - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - stagingPathRoot = getStagingPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); - sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); - Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - - // verify all temp files start with the unique prefix - HdfsContext context = new HdfsContext(session); - Set tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle)); - assertThat(!tempFiles.isEmpty()).isTrue(); - for (String filePath : tempFiles) { - assertThat(new Path(filePath).getName()).startsWith(session.getQueryId()); - } - - // verify statistics are visible from within of the current transaction - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(5L); - } - - // rollback insert - transaction.rollback(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data is unchanged - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty()); - assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); - - // verify we did not modify the table directory - assertThat(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(existingFiles); - - // verify temp directory is empty - HdfsContext hdfsContext = new HdfsContext(session); - assertThat(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty()).isTrue(); - - // verify statistics have been rolled back - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(partitionStatistics.getRowCount().getAsLong()).isEqualTo(3L); - } - } - } - - private void doInsertIntoExistingPartitionEmptyStatistics(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - eraseStatistics(tableName); - - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - try (Transaction transaction = newTransaction()) { - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - - for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, COLUMN_NAMES_PARTITIONED, partitionName); - assertThat(statistics.getRowCount()).isNotPresent(); - assertThat(statistics.getInMemoryDataSizeInBytes()).isNotPresent(); - // fileCount and rawSize statistics are computed on the fly by the metastore, thus cannot be erased - } - } - } - - private static HiveBasicStatistics getBasicStatisticsForTable(Transaction transaction, SchemaTableName table) - { - return transaction - .getMetastore() - .getTableStatistics(table.getSchemaName(), table.getTableName(), Optional.empty()) - .getBasicStatistics(); - } - - private static HiveBasicStatistics getBasicStatisticsForPartition(Transaction transaction, SchemaTableName table, Set columns, String partitionName) - { - return transaction - .getMetastore() - .getPartitionStatistics(table.getSchemaName(), table.getTableName(), columns, ImmutableSet.of(partitionName)) - .get(partitionName) - .getBasicStatistics(); - } - - private void eraseStatistics(SchemaTableName schemaTableName) - { - HiveMetastore metastoreClient = getMetastoreClient(); - metastoreClient.updateTableStatistics(schemaTableName.getSchemaName(), schemaTableName.getTableName(), NO_ACID_TRANSACTION, statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); - Table table = metastoreClient.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(schemaTableName)); - List partitionColumns = table.getPartitionColumns().stream() - .map(Column::getName) - .collect(toImmutableList()); - if (!table.getPartitionColumns().isEmpty()) { - List partitionNames = metastoreClient.getPartitionNamesByFilter(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionColumns, TupleDomain.all()) - .orElse(ImmutableList.of()); - List partitions = metastoreClient - .getPartitionsByNames(table, partitionNames) - .values() - .stream() - .filter(Optional::isPresent) - .map(Optional::get) - .collect(toImmutableList()); - for (Partition partition : partitions) { - metastoreClient.updatePartitionsStatistics( - table, - makePartName(partitionColumns, partition.getValues()), - statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); - } - } - } - - /** - * @return query id - */ - private String insertData(SchemaTableName tableName, MaterializedResult data) - throws Exception - { - return insertData(tableName, data, ImmutableMap.of()); - } - - private String insertData(SchemaTableName tableName, MaterializedResult data, Map sessionProperties) - throws Exception - { - Location writePath; - Location targetPath; - String queryId; - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(sessionProperties); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - queryId = session.getQueryId(); - writePath = getStagingPathRoot(insertTableHandle); - targetPath = getTargetPathRoot(insertTableHandle); - - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - - // write data - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // commit the insert - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - transaction.commit(); - } - - // check that temporary files are removed - if (!writePath.equals(targetPath)) { - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, new Path(writePath.toString())); - assertThat(fileSystem.exists(new Path(writePath.toString()))).isFalse(); - } - - return queryId; - } - - private void doTestMetadataDelete(HiveStorageFormat storageFormat, SchemaTableName tableName) - throws Exception - { - // creating the table - doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED); - - insertData(tableName, CREATE_TABLE_PARTITIONED_DATA); - - MaterializedResult.Builder expectedResultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes()); - expectedResultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows()); - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - // verify partitions were created - List partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream() - .map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)) - .collect(toImmutableList())); - - // verify table directory is not empty - Set filesAfterInsert = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(filesAfterInsert.isEmpty()).isFalse(); - - // verify the data - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedResultBuilder.build().getMaterializedRows()); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - // get ds column handle - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - - // delete ds=2015-07-03 - session = newSession(); - TupleDomain tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumnHandle, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2015-07-03")))); - Constraint constraint = new Constraint(tupleDomain, tupleDomain.asPredicate(), tupleDomain.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - int dsColumnOrdinalPosition = columnHandles.indexOf(dsColumnHandle); - - // verify the data - ImmutableList expectedRows = expectedResultBuilder.build().getMaterializedRows().stream() - .filter(row -> !"2015-07-03".equals(row.getField(dsColumnOrdinalPosition))) - .collect(toImmutableList()); - MaterializedResult actualAfterDelete = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(actualAfterDelete.getMaterializedRows(), expectedRows); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds"); - - // delete ds=2015-07-01 and 2015-07-02 - session = newSession(); - TupleDomain tupleDomain2 = TupleDomain.withColumnDomains( - ImmutableMap.of(dsColumnHandle, Domain.create(ValueSet.ofRanges(Range.range(createUnboundedVarcharType(), utf8Slice("2015-07-01"), true, utf8Slice("2015-07-02"), true)), false))); - Constraint constraint2 = new Constraint(tupleDomain2, tupleDomain2.asPredicate(), tupleDomain2.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint2); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - - transaction.commit(); - } - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - session = newSession(); - MaterializedResult actualAfterDelete2 = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(actualAfterDelete2.getMaterializedRows(), ImmutableList.of()); - - // verify table directory is empty - Set filesAfterDelete = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()); - assertThat(filesAfterDelete.isEmpty()).isTrue(); - } - } - - protected void assertGetRecords(String tableName, HiveStorageFormat hiveStorageFormat) - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - - ConnectorTableHandle tableHandle = getTableHandle(metadata, new SchemaTableName(database, tableName)); - ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle); - HiveSplit hiveSplit = getHiveSplit(tableHandle, transaction, session); - - List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values()); - - ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY); - assertGetRecords(hiveStorageFormat, tableMetadata, hiveSplit, pageSource, columnHandles); - } - } - - protected HiveSplit getHiveSplit(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - List splits = getAllSplits(tableHandle, transaction, session); - assertThat(splits.size()).isEqualTo(1); - return (HiveSplit) getOnlyElement(splits); - } - - protected void assertGetRecords( - HiveStorageFormat hiveStorageFormat, - ConnectorTableMetadata tableMetadata, - HiveSplit hiveSplit, - ConnectorPageSource pageSource, - List columnHandles) - throws IOException - { - try { - MaterializedResult result = materializeSourceDataStream(newSession(), pageSource, getTypes(columnHandles)); - - assertPageSourceType(pageSource, hiveStorageFormat); - - ImmutableMap columnIndex = indexColumns(tableMetadata); - - long rowNumber = 0; - long completedBytes = 0; - for (MaterializedRow row : result) { - try { - assertValueTypes(row, tableMetadata.getColumns()); - } - catch (RuntimeException e) { - throw new RuntimeException("row " + rowNumber, e); - } - - rowNumber++; - Integer index; - Object value; - - // STRING - index = columnIndex.get("t_string"); - value = row.getField(index); - if (rowNumber % 19 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 19 == 1) { - assertThat(value).isEqualTo(""); - } - else { - assertThat(value).isEqualTo("test"); - } - - // NUMBERS - assertThat(row.getField(columnIndex.get("t_tinyint"))).isEqualTo((byte) (1 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_smallint"))).isEqualTo((short) (2 + rowNumber)); - assertThat(row.getField(columnIndex.get("t_int"))).isEqualTo((int) (3 + rowNumber)); - - index = columnIndex.get("t_bigint"); - if ((rowNumber % 13) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(4 + rowNumber); - } - - assertThat((Float) row.getField(columnIndex.get("t_float"))).isCloseTo(5.1f + rowNumber, offset(0.001f)); - assertThat(row.getField(columnIndex.get("t_double"))).isEqualTo(6.2 + rowNumber); - - // BOOLEAN - index = columnIndex.get("t_boolean"); - if ((rowNumber % 3) == 2) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo((rowNumber % 3) != 0); - } - - // TIMESTAMP - index = columnIndex.get("t_timestamp"); - if (index != null) { - if ((rowNumber % 17) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlTimestamp expected = sqlTimestampOf(3, 2011, 5, 6, 7, 8, 9, 123); - assertThat(row.getField(index)).isEqualTo(expected); - } - } - - // BINARY - index = columnIndex.get("t_binary"); - if (index != null) { - if ((rowNumber % 23) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(new SqlVarbinary("test binary".getBytes(UTF_8))); - } - } - - // DATE - index = columnIndex.get("t_date"); - if (index != null) { - if ((rowNumber % 37) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlDate expected = new SqlDate(toIntExact(MILLISECONDS.toDays(new DateTime(2013, 8, 9, 0, 0, 0, UTC).getMillis()))); - assertThat(row.getField(index)).isEqualTo(expected); - } - } - - // VARCHAR(50) - index = columnIndex.get("t_varchar"); - if (index != null) { - value = row.getField(index); - if (rowNumber % 39 == 0) { - assertThat(value).isNull(); - } - else if (rowNumber % 39 == 1) { - // https://issues.apache.org/jira/browse/HIVE-13289 - // RCBINARY reads empty VARCHAR as null - if (hiveStorageFormat == RCBINARY) { - assertThat(value).isNull(); - } - else { - assertThat(value).isEqualTo(""); - } - } - else { - assertThat(value).isEqualTo("test varchar"); - } - } - - //CHAR(25) - index = columnIndex.get("t_char"); - if (index != null) { - value = row.getField(index); - if ((rowNumber % 41) == 0) { - assertThat(value).isNull(); - } - else { - assertThat(value).isEqualTo((rowNumber % 41) == 1 ? " " : "test char "); - } - } - - // MAP - index = columnIndex.get("t_map"); - if (index != null) { - if ((rowNumber % 27) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(ImmutableMap.of("test key", "test value")); - } - } - - // ARRAY - index = columnIndex.get("t_array_string"); - if (index != null) { - if ((rowNumber % 29) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index)).isEqualTo(ImmutableList.of("abc", "xyz", "data")); - } - } - - // ARRAY - index = columnIndex.get("t_array_timestamp"); - if (index != null) { - if ((rowNumber % 43) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - SqlTimestamp expected = sqlTimestampOf(3, LocalDateTime.of(2011, 5, 6, 7, 8, 9, 123_000_000)); - assertThat(row.getField(index)).isEqualTo(ImmutableList.of(expected)); - } - } - - // ARRAY> - index = columnIndex.get("t_array_struct"); - if (index != null) { - if ((rowNumber % 31) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - List expected1 = ImmutableList.of("test abc", 0.1); - List expected2 = ImmutableList.of("test xyz", 0.2); - assertThat(row.getField(index)).isEqualTo(ImmutableList.of(expected1, expected2)); - } - } - - // STRUCT - index = columnIndex.get("t_struct"); - if (index != null) { - if ((rowNumber % 31) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - assertThat(row.getField(index) instanceof List).isTrue(); - List values = (List) row.getField(index); - assertThat(values.size()).isEqualTo(2); - assertThat(values.get(0)).isEqualTo("test abc"); - assertThat(values.get(1)).isEqualTo(0.1); - } - } - - // MAP>> - index = columnIndex.get("t_complex"); - if (index != null) { - if ((rowNumber % 33) == 0) { - assertThat(row.getField(index)).isNull(); - } - else { - List expected1 = ImmutableList.of("test abc", 0.1); - List expected2 = ImmutableList.of("test xyz", 0.2); - assertThat(row.getField(index)).isEqualTo(ImmutableMap.of(1, ImmutableList.of(expected1, expected2))); - } - } - - // NEW COLUMN - assertThat(row.getField(columnIndex.get("new_column"))).isNull(); - - long newCompletedBytes = pageSource.getCompletedBytes(); - assertThat(newCompletedBytes >= completedBytes).isTrue(); - // some formats (e.g., parquet) over read the data by a bit - assertLessThanOrEqual(newCompletedBytes, hiveSplit.getLength() + (100 * 1024)); - completedBytes = newCompletedBytes; - } - - assertLessThanOrEqual(completedBytes, hiveSplit.getLength() + (100 * 1024)); - assertThat(rowNumber).isEqualTo(100); - } - finally { - pageSource.close(); - } - } - - protected void dropTable(SchemaTableName table) - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - - ConnectorTableHandle handle = metadata.getTableHandle(session, table); - if (handle == null) { - return; - } - - metadata.dropTable(session, handle); - try { - // todo I have no idea why this is needed... maybe there is a propagation delay in the metastore? - metadata.dropTable(session, handle); - fail("expected NotFoundException"); - } - catch (TableNotFoundException expected) { - } - - transaction.commit(); - } - } - - protected ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) - { - ConnectorTableHandle handle = metadata.getTableHandle(newSession(), tableName); - checkArgument(handle != null, "table not found: %s", tableName); - return handle; - } - - private HiveTableHandle applyFilter(ConnectorMetadata metadata, ConnectorTableHandle tableHandle, Constraint constraint) - { - return metadata.applyFilter(newSession(), tableHandle, constraint) - .map(ConstraintApplicationResult::getHandle) - .map(HiveTableHandle.class::cast) - .orElseThrow(AssertionError::new); - } - - protected MaterializedResult readTable( - Transaction transaction, - ConnectorTableHandle tableHandle, - List columnHandles, - ConnectorSession session, - TupleDomain tupleDomain, - OptionalInt expectedSplitCount, - Optional expectedStorageFormat) - throws Exception - { - tableHandle = applyFilter(transaction.getMetadata(), tableHandle, new Constraint(tupleDomain)); - List splits = getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - if (expectedSplitCount.isPresent()) { - assertThat(splits.size()).isEqualTo(expectedSplitCount.getAsInt()); - } - - ImmutableList.Builder allRows = ImmutableList.builder(); - for (ConnectorSplit split : splits) { - try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) { - expectedStorageFormat.ifPresent(format -> assertPageSourceType(pageSource, format)); - MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); - allRows.addAll(result.getMaterializedRows()); - } - } - return new MaterializedResult(allRows.build(), getTypes(columnHandles)); - } - - protected HiveMetastore getMetastoreClient() - { - return metastoreClient; - } - - protected LocationService getLocationService() - { - return locationService; - } - - protected static int getSplitCount(ConnectorSplitSource splitSource) - { - int splitCount = 0; - while (!splitSource.isFinished()) { - splitCount += getFutureValue(splitSource.getNextBatch(1000)).getSplits().size(); - } - return splitCount; - } - - private List getAllSplits(ConnectorTableHandle tableHandle, Transaction transaction, ConnectorSession session) - { - return getAllSplits(getSplits(splitManager, transaction, session, tableHandle)); - } - - protected static List getAllSplits(ConnectorSplitSource splitSource) - { - ImmutableList.Builder splits = ImmutableList.builder(); - while (!splitSource.isFinished()) { - splits.addAll(getFutureValue(splitSource.getNextBatch(1000)).getSplits()); - } - return splits.build(); - } - - protected static ConnectorSplitSource getSplits(ConnectorSplitManager splitManager, Transaction transaction, ConnectorSession session, ConnectorTableHandle tableHandle) - { - return splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, DynamicFilter.EMPTY, Constraint.alwaysTrue()); - } - - protected String getPartitionId(Object partition) - { - return ((HivePartition) partition).getPartitionId(); - } - - protected static void assertPageSourceType(ConnectorPageSource pageSource, HiveStorageFormat hiveStorageFormat) - { - assertInstanceOf(((HivePageSource) pageSource).getPageSource(), pageSourceType(hiveStorageFormat), hiveStorageFormat.name()); - } - - private static Class pageSourceType(HiveStorageFormat hiveStorageFormat) - { - switch (hiveStorageFormat) { - case RCTEXT: - case RCBINARY: - return RcFilePageSource.class; - case ORC: - return OrcPageSource.class; - case PARQUET: - return ParquetPageSource.class; - case CSV: - case JSON: - case OPENX_JSON: - case TEXTFILE: - case SEQUENCEFILE: - return LinePageSource.class; - default: - throw new AssertionError("File type does not use a PageSource: " + hiveStorageFormat); - } - } - - private static void assertValueTypes(MaterializedRow row, List schema) - { - for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) { - ColumnMetadata column = schema.get(columnIndex); - Object value = row.getField(columnIndex); - if (value != null) { - if (BOOLEAN.equals(column.getType())) { - assertInstanceOf(value, Boolean.class); - } - else if (TINYINT.equals(column.getType())) { - assertInstanceOf(value, Byte.class); - } - else if (SMALLINT.equals(column.getType())) { - assertInstanceOf(value, Short.class); - } - else if (INTEGER.equals(column.getType())) { - assertInstanceOf(value, Integer.class); - } - else if (BIGINT.equals(column.getType())) { - assertInstanceOf(value, Long.class); - } - else if (DOUBLE.equals(column.getType())) { - assertInstanceOf(value, Double.class); - } - else if (REAL.equals(column.getType())) { - assertInstanceOf(value, Float.class); - } - else if (column.getType() instanceof VarcharType) { - assertInstanceOf(value, String.class); - } - else if (column.getType() instanceof CharType) { - assertInstanceOf(value, String.class); - } - else if (VARBINARY.equals(column.getType())) { - assertInstanceOf(value, SqlVarbinary.class); - } - else if (TIMESTAMP_MILLIS.equals(column.getType())) { - assertInstanceOf(value, SqlTimestamp.class); - } - else if (TIMESTAMP_TZ_MILLIS.equals(column.getType())) { - assertInstanceOf(value, SqlTimestampWithTimeZone.class); - } - else if (DATE.equals(column.getType())) { - assertInstanceOf(value, SqlDate.class); - } - else if (column.getType() instanceof ArrayType || column.getType() instanceof RowType) { - assertInstanceOf(value, List.class); - } - else if (column.getType() instanceof MapType) { - assertInstanceOf(value, Map.class); - } - else { - fail("Unknown primitive type " + columnIndex); - } - } - } - } - - private static void assertPrimitiveField(Map map, String name, Type type, boolean partitionKey) - { - assertThat(map).containsKey(name); - ColumnMetadata column = map.get(name); - assertThat(column.getType()) - .describedAs(name) - .isEqualTo(type); - assertThat(column.getExtraInfo()).isEqualTo(columnExtraInfo(partitionKey)); - } - - protected static ImmutableMap indexColumns(List columnHandles) - { - ImmutableMap.Builder index = ImmutableMap.builder(); - int i = 0; - for (ColumnHandle columnHandle : columnHandles) { - HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle; - index.put(hiveColumnHandle.getName(), i); - i++; - } - return index.buildOrThrow(); - } - - protected static ImmutableMap indexColumns(ConnectorTableMetadata tableMetadata) - { - ImmutableMap.Builder index = ImmutableMap.builder(); - int i = 0; - for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) { - index.put(columnMetadata.getName(), i); - i++; - } - return index.buildOrThrow(); - } - - protected SchemaTableName temporaryTable(String tableName) - { - return temporaryTable(database, tableName); - } - - protected static SchemaTableName temporaryTable(String database, String tableName) - { - String randomName = UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""); - return new SchemaTableName(database, TEMPORARY_TABLE_PREFIX + tableName + "_" + randomName); - } - - protected static Map createTableProperties(HiveStorageFormat storageFormat) - { - return createTableProperties(storageFormat, ImmutableList.of()); - } - - protected static Map createTableProperties(HiveStorageFormat storageFormat, Iterable partitionedBy) - { - return ImmutableMap.builder() - .put(STORAGE_FORMAT_PROPERTY, storageFormat) - .put(PARTITIONED_BY_PROPERTY, ImmutableList.copyOf(partitionedBy)) - .put(BUCKETED_BY_PROPERTY, ImmutableList.of()) - .put(BUCKET_COUNT_PROPERTY, 0) - .put(SORTED_BY_PROPERTY, ImmutableList.of()) - .buildOrThrow(); - } - - protected static List filterNonHiddenColumnHandles(Collection columnHandles) - { - return columnHandles.stream() - .filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()) - .collect(toList()); - } - - protected static List filterNonHiddenColumnMetadata(Collection columnMetadatas) - { - return columnMetadatas.stream() - .filter(columnMetadata -> !columnMetadata.isHidden()) - .collect(toList()); - } - - private void createEmptyTable(SchemaTableName schemaTableName, HiveStorageFormat hiveStorageFormat, List columns, List partitionColumns) - throws Exception - { - createEmptyTable(schemaTableName, hiveStorageFormat, columns, partitionColumns, Optional.empty(), false); - } - - private void createEmptyTable( - SchemaTableName schemaTableName, - HiveStorageFormat hiveStorageFormat, - List columns, - List partitionColumns, - Optional bucketProperty) - throws Exception - { - createEmptyTable(schemaTableName, hiveStorageFormat, columns, partitionColumns, bucketProperty, false); - } - - protected void createEmptyTable( - SchemaTableName schemaTableName, - HiveStorageFormat hiveStorageFormat, - List columns, - List partitionColumns, - Optional bucketProperty, - boolean isTransactional) - throws Exception - { - Path targetPath; - - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - LocationService locationService = getLocationService(); - targetPath = new Path(locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName).toString()); - - ImmutableMap.Builder tableParamBuilder = ImmutableMap.builder() - .put(TRINO_VERSION_NAME, TEST_SERVER_VERSION) - .put(TRINO_QUERY_ID_NAME, session.getQueryId()); - if (isTransactional) { - tableParamBuilder.put(TRANSACTIONAL, "true"); - } - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(MANAGED_TABLE.name()) - .setParameters(tableParamBuilder.buildOrThrow()) - .setDataColumns(columns) - .setPartitionColumns(partitionColumns); - - tableBuilder.getStorageBuilder() - .setLocation(targetPath.toString()) - .setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) - .setBucketProperty(bucketProperty) - .setSerdeParameters(ImmutableMap.of()); - - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - - transaction.commit(); - } - - HdfsContext context = new HdfsContext(newSession()); - List targetDirectoryList = listDirectory(context, targetPath); - assertThat(targetDirectoryList).isEqualTo(ImmutableList.of()); - } - - private void alterBucketProperty(SchemaTableName schemaTableName, Optional bucketProperty) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - Optional
table = transaction.getMetastore().getTable(schemaName, tableName); - Table.Builder tableBuilder = Table.builder(table.get()); - tableBuilder.getStorageBuilder().setBucketProperty(bucketProperty); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().replaceTable(schemaName, tableName, tableBuilder.build(), principalPrivileges); - - transaction.commit(); - } - } - - protected PrincipalPrivileges testingPrincipalPrivilege(ConnectorSession session) - { - return testingPrincipalPrivilege(session.getUser(), session.getUser()); - } - - protected PrincipalPrivileges testingPrincipalPrivilege(String tableOwner, String grantor) - { - return new PrincipalPrivileges( - ImmutableMultimap.builder() - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.SELECT, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.INSERT, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.UPDATE, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .put(tableOwner, new HivePrivilegeInfo(HivePrivilege.DELETE, true, new HivePrincipal(USER, grantor), new HivePrincipal(USER, grantor))) - .build(), - ImmutableMultimap.of()); - } - - private List listDirectory(HdfsContext context, Path path) - throws IOException - { - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - return Arrays.stream(fileSystem.listStatus(path)) - .map(FileStatus::getPath) - .map(Path::getName) - .filter(name -> !name.startsWith(".trino")) - .collect(toList()); - } - - @Test - public void testTransactionDeleteInsert() - throws Exception - { - doTestTransactionDeleteInsert( - RCBINARY, - true, - ImmutableList.builder() - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_RIGHT_AWAY, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_DELETE, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_BEGIN_INSERT, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_APPEND_PAGE, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_SINK_FINISH, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, ROLLBACK_AFTER_FINISH_INSERT, Optional.empty())) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new AddPartitionFailure()))) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new DirectoryRenameFailure()))) - .add(new TransactionDeleteInsertTestCase(false, false, COMMIT, Optional.of(new FileRenameFailure()))) - .add(new TransactionDeleteInsertTestCase(true, false, COMMIT, Optional.of(new DropPartitionFailure()))) - .add(new TransactionDeleteInsertTestCase(true, true, COMMIT, Optional.empty())) - .build()); - } - - @Test - public void testPreferredInsertLayout() - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_partitioned_table"); - - try { - Column partitioningColumn = new Column("column2", HIVE_STRING, Optional.empty(), Map.of()); - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - partitioningColumn); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(partitioningColumn)); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - assertThat(insertLayout.get().getPartitioning().isPresent()).isFalse(); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of(partitioningColumn.getName())); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInsertBucketedTableLayout() - throws Exception - { - insertBucketedTableLayout(false); - } - - @Test - public void testInsertBucketedTransactionalTableLayout() - throws Exception - { - insertBucketedTableLayout(true); - } - - protected void insertBucketedTableLayout(boolean transactional) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_bucketed_table"); - try { - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - new Column("column2", HIVE_LONG, Optional.empty(), Map.of())); - HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of()); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(), Optional.of(bucketProperty), transactional); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - bucketProperty.getBucketingVersion(), - bucketProperty.getBucketCount(), - ImmutableList.of(HIVE_STRING), - OptionalInt.empty(), - false); - assertThat(insertLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(4); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInsertPartitionedBucketedTableLayout() - throws Exception - { - insertPartitionedBucketedTableLayout(false); - } - - @Test - public void testInsertPartitionedBucketedTransactionalTableLayout() - throws Exception - { - insertPartitionedBucketedTableLayout(true); - } - - protected void insertPartitionedBucketedTableLayout(boolean transactional) - throws Exception - { - SchemaTableName tableName = temporaryTable("empty_partitioned_table"); - try { - Column partitioningColumn = new Column("column2", HIVE_LONG, Optional.empty(), Map.of()); - List columns = ImmutableList.of( - new Column("column1", HIVE_STRING, Optional.empty(), Map.of()), - partitioningColumn); - HiveBucketProperty bucketProperty = new HiveBucketProperty(ImmutableList.of("column1"), BUCKETING_V1, 4, ImmutableList.of()); - createEmptyTable(tableName, ORC, columns, ImmutableList.of(partitioningColumn), Optional.of(bucketProperty), transactional); - - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - Optional insertLayout = metadata.getInsertLayout(session, tableHandle); - assertThat(insertLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - bucketProperty.getBucketingVersion(), - bucketProperty.getBucketCount(), - ImmutableList.of(HIVE_STRING), - OptionalInt.empty(), - true); - assertThat(insertLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(insertLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1", "column2")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(32); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - finally { - dropTable(tableName); - } - } - - @Test - public void testPreferredCreateTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), - BUCKETED_BY_PROPERTY, ImmutableList.of(), - BUCKET_COUNT_PROPERTY, 0, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - assertThat(newTableLayout.get().getPartitioning().isPresent()).isFalse(); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column2")); - } - } - - @Test - public void testCreateBucketedTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of(), - BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), - BUCKET_COUNT_PROPERTY, 10, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - BUCKETING_V1, - 10, - ImmutableList.of(HIVE_LONG), - OptionalInt.empty(), - false); - assertThat(newTableLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(10); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - - @Test - public void testCreatePartitionedBucketedTableLayout() - { - try (Transaction transaction = newTransaction()) { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorSession session = newSession(); - Optional newTableLayout = metadata.getNewTableLayout( - session, - new ConnectorTableMetadata( - new SchemaTableName("schema", "table"), - ImmutableList.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT)), - ImmutableMap.of( - PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), - BUCKETED_BY_PROPERTY, ImmutableList.of("column1"), - BUCKET_COUNT_PROPERTY, 10, - SORTED_BY_PROPERTY, ImmutableList.of()))); - assertThat(newTableLayout.isPresent()).isTrue(); - ConnectorPartitioningHandle partitioningHandle = new HivePartitioningHandle( - BUCKETING_V1, - 10, - ImmutableList.of(HIVE_LONG), - OptionalInt.empty(), - true); - assertThat(newTableLayout.get().getPartitioning()).isEqualTo(Optional.of(partitioningHandle)); - assertThat(newTableLayout.get().getPartitionColumns()).isEqualTo(ImmutableList.of("column1", "column2")); - ConnectorBucketNodeMap connectorBucketNodeMap = nodePartitioningProvider.getBucketNodeMapping(transaction.getTransactionHandle(), session, partitioningHandle).orElseThrow(); - assertThat(connectorBucketNodeMap.getBucketCount()).isEqualTo(32); - assertThat(connectorBucketNodeMap.hasFixedMapping()).isFalse(); - } - } - - protected void doTestTransactionDeleteInsert(HiveStorageFormat storageFormat, boolean allowInsertExisting, List testCases) - throws Exception - { - // There are 4 types of operations on a partition: add, drop, alter (drop then add), insert existing. - // There are 12 partitions in this test, 3 for each type. - // 3 is chosen to verify that cleanups, commit aborts, rollbacks are always as complete as possible regardless of failure. - MaterializedResult beforeData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(110L, "a", "alter1") - .row(120L, "a", "insert1") - .row(140L, "a", "drop1") - .row(210L, "b", "drop2") - .row(310L, "c", "alter2") - .row(320L, "c", "alter3") - .row(510L, "e", "drop3") - .row(610L, "f", "insert2") - .row(620L, "f", "insert3") - .build(); - Domain domainToDrop = Domain.create(ValueSet.of( - createUnboundedVarcharType(), - utf8Slice("alter1"), utf8Slice("alter2"), utf8Slice("alter3"), utf8Slice("drop1"), utf8Slice("drop2"), utf8Slice("drop3")), - false); - List extraRowsForInsertExisting = ImmutableList.of(); - if (allowInsertExisting) { - extraRowsForInsertExisting = MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(121L, "a", "insert1") - .row(611L, "f", "insert2") - .row(621L, "f", "insert3") - .build() - .getMaterializedRows(); - } - MaterializedResult insertData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(111L, "a", "alter1") - .row(131L, "a", "add1") - .row(221L, "b", "add2") - .row(311L, "c", "alter2") - .row(321L, "c", "alter3") - .row(411L, "d", "add3") - .rows(extraRowsForInsertExisting) - .build(); - MaterializedResult afterData = - MaterializedResult.resultBuilder(SESSION, BIGINT, createUnboundedVarcharType(), createUnboundedVarcharType()) - .row(120L, "a", "insert1") - .row(610L, "f", "insert2") - .row(620L, "f", "insert3") - .rows(insertData.getMaterializedRows()) - .build(); - - for (TransactionDeleteInsertTestCase testCase : testCases) { - SchemaTableName temporaryDeleteInsert = temporaryTable("delete_insert"); - try { - createEmptyTable( - temporaryDeleteInsert, - storageFormat, - ImmutableList.of(new Column("col1", HIVE_LONG, Optional.empty(), Map.of())), - ImmutableList.of(new Column("pk1", HIVE_STRING, Optional.empty(), Map.of()), new Column("pk2", HIVE_STRING, Optional.empty(), Map.of()))); - insertData(temporaryDeleteInsert, beforeData); - try { - doTestTransactionDeleteInsert( - storageFormat, - temporaryDeleteInsert, - domainToDrop, - insertData, - testCase.isExpectCommittedData() ? afterData : beforeData, - testCase.getTag(), - testCase.isExpectQuerySucceed(), - testCase.getConflictTrigger()); - } - catch (AssertionError e) { - throw new AssertionError(format("Test case: %s", testCase), e); - } - } - finally { - dropTable(temporaryDeleteInsert); - } - } - } - - private void doTestTransactionDeleteInsert( - HiveStorageFormat storageFormat, - SchemaTableName tableName, - Domain domainToDrop, - MaterializedResult insertData, - MaterializedResult expectedData, - TransactionDeleteInsertTestTag tag, - boolean expectQuerySucceed, - Optional conflictTrigger) - throws Exception - { - Location writePath = null; - Location targetPath = null; - - try (Transaction transaction = newTransaction()) { - try { - ConnectorMetadata metadata = transaction.getMetadata(); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - ConnectorSession session; - rollbackIfEquals(tag, ROLLBACK_RIGHT_AWAY); - - // Query 1: delete - session = newSession(); - HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("pk2"); - TupleDomain tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( - dsColumnHandle, domainToDrop)); - Constraint constraint = new Constraint(tupleDomain, tupleDomain.asPredicate(), tupleDomain.getDomains().orElseThrow().keySet()); - tableHandle = applyFilter(metadata, tableHandle, constraint); - tableHandle = metadata.applyDelete(session, tableHandle).get(); - metadata.executeDelete(session, tableHandle); - rollbackIfEquals(tag, ROLLBACK_AFTER_DELETE); - - // Query 2: insert - session = newSession(); - ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES); - rollbackIfEquals(tag, ROLLBACK_AFTER_BEGIN_INSERT); - writePath = getStagingPathRoot(insertTableHandle); - targetPath = getTargetPathRoot(insertTableHandle); - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TESTING_PAGE_SINK_ID); - sink.appendPage(insertData.toPage()); - rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE); - Collection fragments = getFutureValue(sink.finish()); - rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH); - metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); - rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT); - - assertThat(tag).isEqualTo(COMMIT); - - if (conflictTrigger.isPresent()) { - JsonCodec partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); - List partitionUpdates = fragments.stream() - .map(Slice::getBytes) - .map(partitionUpdateCodec::fromJson) - .collect(toList()); - conflictTrigger.get().triggerConflict(session, tableName, insertTableHandle, partitionUpdates); - } - transaction.commit(); - if (conflictTrigger.isPresent()) { - assertThat(expectQuerySucceed).isTrue(); - conflictTrigger.get().verifyAndCleanup(session, tableName); - } - } - catch (TestingRollbackException e) { - transaction.rollback(); - } - catch (TrinoException e) { - assertThat(expectQuerySucceed).isFalse(); - if (conflictTrigger.isPresent()) { - conflictTrigger.get().verifyAndCleanup(newSession(), tableName); - } - } - } - - // check that temporary files are removed - if (writePath != null && !writePath.equals(targetPath)) { - HdfsContext context = new HdfsContext(newSession()); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, new Path(writePath.toString())); - assertThat(fileSystem.exists(new Path(writePath.toString()))).isFalse(); - } - - try (Transaction transaction = newTransaction()) { - // verify partitions - List partitionNames = transaction.getMetastore() - .getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); - assertEqualsIgnoreOrder( - partitionNames, - expectedData.getMaterializedRows().stream() - .map(row -> format("pk1=%s/pk2=%s", row.getField(1), row.getField(2))) - .distinct() - .collect(toImmutableList())); - - // load the new table - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - - // verify the data - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedData.getMaterializedRows()); - } - } - - private static void rollbackIfEquals(TransactionDeleteInsertTestTag tag, TransactionDeleteInsertTestTag expectedTag) - { - if (expectedTag == tag) { - throw new TestingRollbackException(); - } - } - - private static class TestingRollbackException - extends RuntimeException - { - } - - protected static class TransactionDeleteInsertTestCase - { - private final boolean expectCommittedData; - private final boolean expectQuerySucceed; - private final TransactionDeleteInsertTestTag tag; - private final Optional conflictTrigger; - - public TransactionDeleteInsertTestCase(boolean expectCommittedData, boolean expectQuerySucceed, TransactionDeleteInsertTestTag tag, Optional conflictTrigger) - { - this.expectCommittedData = expectCommittedData; - this.expectQuerySucceed = expectQuerySucceed; - this.tag = tag; - this.conflictTrigger = conflictTrigger; - } - - public boolean isExpectCommittedData() - { - return expectCommittedData; - } - - public boolean isExpectQuerySucceed() - { - return expectQuerySucceed; - } - - public TransactionDeleteInsertTestTag getTag() - { - return tag; - } - - public Optional getConflictTrigger() - { - return conflictTrigger; - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("tag", tag) - .add("conflictTrigger", conflictTrigger.map(conflictTrigger -> conflictTrigger.getClass().getName())) - .add("expectCommittedData", expectCommittedData) - .add("expectQuerySucceed", expectQuerySucceed) - .toString(); - } - } - - protected enum TransactionDeleteInsertTestTag - { - ROLLBACK_RIGHT_AWAY, - ROLLBACK_AFTER_DELETE, - ROLLBACK_AFTER_BEGIN_INSERT, - ROLLBACK_AFTER_APPEND_PAGE, - ROLLBACK_AFTER_SINK_FINISH, - ROLLBACK_AFTER_FINISH_INSERT, - COMMIT, - } - - protected interface ConflictTrigger - { - void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException; - - void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException; - } - - protected class AddPartitionFailure - implements ConflictTrigger - { - private final ImmutableList copyPartitionFrom = ImmutableList.of("a", "insert1"); - private final String partitionNameToConflict = "pk1=b/pk2=add2"; - private Partition conflictPartition; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - Optional partition = metastoreClient.getPartition(table, copyPartitionFrom); - conflictPartition = Partition.builder(partition.get()) - .setValues(toPartitionValues(partitionNameToConflict)) - .build(); - metastoreClient.addPartitions( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(new PartitionWithStatistics(conflictPartition, partitionNameToConflict, PartitionStatistics.empty()))); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - Optional actualPartition = metastoreClient.getPartition(table, toPartitionValues(partitionNameToConflict)); - // Make sure the partition inserted to trigger conflict was not overwritten - // Checking storage location is sufficient because implement never uses .../pk1=a/pk2=a2 as the directory for partition [b, b2]. - assertThat(actualPartition.get().getStorage().getLocation()).isEqualTo(conflictPartition.getStorage().getLocation()); - metastoreClient.dropPartition(tableName.getSchemaName(), tableName.getTableName(), conflictPartition.getValues(), false); - } - } - - protected class DropPartitionFailure - implements ConflictTrigger - { - private final ImmutableList partitionValueToConflict = ImmutableList.of("b", "drop2"); - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - { - // This method bypasses transaction interface because this method is inherently hacky and doesn't work well with the transaction abstraction. - // Additionally, this method is not part of a test. Its purpose is to set up an environment for another test. - HiveMetastore metastoreClient = getMetastoreClient(); - metastoreClient.dropPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValueToConflict, false); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - { - // Do not add back the deleted partition because the implementation is expected to move forward instead of backward when delete fails - } - } - - protected class DirectoryRenameFailure - implements ConflictTrigger - { - private HdfsContext context; - private Path path; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException - { - Location writePath = getStagingPathRoot(insertTableHandle); - Location targetPath = getTargetPathRoot(insertTableHandle); - if (writePath.equals(targetPath)) { - // This conflict does not apply. Trigger a rollback right away so that this test case passes. - throw new TestingRollbackException(); - } - path = new Path(targetPath.appendPath("pk1=b").appendPath("pk2=add2").toString()); - context = new HdfsContext(session); - if (!hdfsEnvironment.getFileSystem(context, path).mkdirs(path, hdfsEnvironment.getNewDirectoryPermissions().orElse(null))) { - throw new IOException("mkdirs returned false"); - } - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException - { - assertThat(listDirectory(context, path)).isEqualTo(ImmutableList.of()); - hdfsEnvironment.getFileSystem(context, path).delete(path, false); - } - } - - protected class FileRenameFailure - implements ConflictTrigger - { - private HdfsContext context; - private Path path; - - @Override - public void triggerConflict(ConnectorSession session, SchemaTableName tableName, ConnectorInsertTableHandle insertTableHandle, List partitionUpdates) - throws IOException - { - for (PartitionUpdate partitionUpdate : partitionUpdates) { - if ("pk2=insert2".equals(partitionUpdate.getTargetPath().fileName())) { - path = new Path(partitionUpdate.getTargetPath().toString(), partitionUpdate.getFileNames().get(0)); - break; - } - } - assertThat(path).isNotNull(); - - context = new HdfsContext(session); - FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, path); - fileSystem.createNewFile(path); - } - - @Override - public void verifyAndCleanup(ConnectorSession session, SchemaTableName tableName) - throws IOException - { - // The file we added to trigger a conflict was cleaned up because it matches the query prefix. - // Consider this the same as a network failure that caused the successful creation of file not reported to the caller. - assertThat(hdfsEnvironment.getFileSystem(context, path).exists(path)).isFalse(); - } - } - - private static class CountingDirectoryLister - implements DirectoryLister - { - private final AtomicInteger listCount = new AtomicInteger(); - - @Override - public RemoteIterator listFilesRecursively(TrinoFileSystem fs, Table table, Location location) - throws IOException - { - listCount.incrementAndGet(); - return new TrinoFileStatusRemoteIterator(fs.listFiles(location)); - } - - public int getListCount() - { - return listCount.get(); - } - - @Override - public void invalidate(Partition partition) - { - } - - @Override - public void invalidate(Table table) - { - } - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java deleted file mode 100644 index 10d4b3130cdb..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.io.RecursiveDeleteOption; -import com.google.common.reflect.ClassPath; -import io.airlift.log.Logger; -import io.trino.filesystem.Location; -import io.trino.plugin.hive.metastore.Column; -import io.trino.plugin.hive.metastore.Database; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.PrincipalPrivileges; -import io.trino.plugin.hive.metastore.SortingColumn; -import io.trino.plugin.hive.metastore.StorageFormat; -import io.trino.plugin.hive.metastore.Table; -import io.trino.spi.connector.ColumnHandle; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorTableHandle; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.security.PrincipalType; -import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalInt; - -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveMetadata.TRINO_QUERY_ID_NAME; -import static io.trino.plugin.hive.HiveMetadata.TRINO_VERSION_NAME; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; -import static io.trino.plugin.hive.HiveType.HIVE_INT; -import static io.trino.plugin.hive.HiveType.HIVE_STRING; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.TableType.MANAGED_TABLE; -import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; -import static io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; -import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static java.nio.file.Files.copy; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.abort; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public abstract class AbstractTestHiveLocal - extends AbstractTestHive -{ - private static final Logger log = Logger.get(AbstractTestHiveLocal.class); - private static final String DEFAULT_TEST_DB_NAME = "test"; - - private File tempDir; - private final String testDbName; - - protected AbstractTestHiveLocal() - { - this(DEFAULT_TEST_DB_NAME); - } - - protected AbstractTestHiveLocal(String testDbName) - { - this.testDbName = requireNonNull(testDbName, "testDbName is null"); - } - - protected abstract HiveMetastore createMetastore(File tempDir); - - @BeforeAll - public void initialize() - throws Exception - { - tempDir = Files.createTempDirectory(null).toFile(); - - HiveMetastore metastore = createMetastore(tempDir); - - metastore.createDatabase( - Database.builder() - .setDatabaseName(testDbName) - .setOwnerName(Optional.of("public")) - .setOwnerType(Optional.of(PrincipalType.ROLE)) - .build()); - - HiveConfig hiveConfig = new HiveConfig() - .setParquetTimeZone("America/Los_Angeles") - .setRcfileTimeZone("America/Los_Angeles"); - - setup(testDbName, hiveConfig, metastore, HDFS_ENVIRONMENT); - - createTestTables(); - } - - protected void createTestTables() - throws Exception - { - Location location = Location.of(metastoreClient.getDatabase(database).orElseThrow() - .getLocation().orElseThrow()); - - createTestTable( - // Matches create-test.sql » trino_test_partition_format - Table.builder() - .setDatabaseName(database) - .setTableName(tablePartitionFormat.getTableName()) - .setTableType(MANAGED_TABLE.name()) - .setOwner(Optional.empty()) - .setDataColumns(List.of( - new Column("t_string", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("t_tinyint", HiveType.HIVE_BYTE, Optional.empty(), Map.of()), - new Column("t_smallint", HiveType.HIVE_SHORT, Optional.empty(), Map.of()), - new Column("t_int", HiveType.HIVE_INT, Optional.empty(), Map.of()), - new Column("t_bigint", HiveType.HIVE_LONG, Optional.empty(), Map.of()), - new Column("t_float", HiveType.HIVE_FLOAT, Optional.empty(), Map.of()), - new Column("t_boolean", HiveType.HIVE_BOOLEAN, Optional.empty(), Map.of()))) - .setPartitionColumns(List.of( - new Column("ds", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("file_format", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("dummy", HiveType.HIVE_INT, Optional.empty(), Map.of()))) - .setParameter(TABLE_COMMENT, "Presto test data") - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(new HiveConfig().getHiveStorageFormat())) - .setLocation(Optional.of(location.appendPath(tablePartitionFormat.getTableName()).toString()))) - .build()); - - createTestTable( - // Matches create-test.sql » trino_test_partition_format - Table.builder() - .setDatabaseName(database) - .setTableName(tableUnpartitioned.getTableName()) - .setTableType(MANAGED_TABLE.name()) - .setOwner(Optional.empty()) - .setDataColumns(List.of( - new Column("t_string", HiveType.HIVE_STRING, Optional.empty(), Map.of()), - new Column("t_tinyint", HiveType.HIVE_BYTE, Optional.empty(), Map.of()))) - .setParameter(TABLE_COMMENT, "Presto test data") - .withStorage(storage -> storage - .setStorageFormat(fromHiveStorageFormat(TEXTFILE)) - .setLocation(Optional.of(location.appendPath(tableUnpartitioned.getTableName()).toString()))) - .build()); - } - - protected void createTestTable(Table table) - throws Exception - { - metastoreClient.createTable(table, NO_PRIVILEGES); - } - - @AfterAll - public void cleanup() - throws IOException - { - try { - for (String tableName : metastoreClient.getTables(database)) { - metastoreClient.dropTable(database, tableName, true); - } - metastoreClient.dropDatabase(testDbName, true); - } - finally { - deleteRecursively(tempDir.toPath(), ALLOW_INSECURE); - } - } - - @Override - protected ConnectorTableHandle getTableHandle(ConnectorMetadata metadata, SchemaTableName tableName) - { - if (tableName.getTableName().startsWith(TEMPORARY_TABLE_PREFIX)) { - return super.getTableHandle(metadata, tableName); - } - return abort("tests using existing tables are not supported"); - } - - @Test - @Override - public void testGetAllTableColumns() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetAllTableColumnsInSchema() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetTableNames() - { - abort("Test disabled for this subclass"); - } - - @Test - @Override - public void testGetTableSchemaOffline() - { - abort("Test disabled for this subclass"); - } - - @Test - public void testSparkBucketedTableValidation() - throws Exception - { - SchemaTableName table = temporaryTable("spark_bucket_validation"); - try { - doTestSparkBucketedTableValidation(table); - } - finally { - dropTable(table); - } - } - - private void doTestSparkBucketedTableValidation(SchemaTableName tableName) - throws Exception - { - Path externalLocation = copyResourceDirToTemporaryDirectory("spark_bucketed_nation"); - try { - createExternalTable( - tableName, - ORC, - ImmutableList.of( - new Column("nationkey", HIVE_INT, Optional.empty(), Map.of()), - new Column("name", HIVE_STRING, Optional.empty(), Map.of()), - new Column("regionkey", HIVE_INT, Optional.empty(), Map.of()), - new Column("comment", HIVE_STRING, Optional.empty(), Map.of())), - ImmutableList.of(), - Optional.of(new HiveBucketProperty( - ImmutableList.of("nationkey"), - BUCKETING_V1, - 3, - ImmutableList.of(new SortingColumn("name", SortingColumn.Order.ASCENDING)))), - Location.of(externalLocation.toUri().toString())); - - assertReadFailsWithMessageMatching(ORC, tableName, "Hive table is corrupt\\. File '.*/.*' is for bucket [0-2], but contains a row for bucket [0-2]."); - markTableAsCreatedBySpark(tableName, "orc"); - assertReadReturnsRowCount(ORC, tableName, 25); - } - finally { - deleteRecursively(externalLocation, RecursiveDeleteOption.ALLOW_INSECURE); - } - } - - private void assertReadReturnsRowCount(HiveStorageFormat storageFormat, SchemaTableName tableName, int rowCount) - throws Exception - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - metadata.beginQuery(session); - ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName); - List columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values()); - MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat)); - assertThat(result.getRowCount()).isEqualTo(rowCount); - } - } - - private void markTableAsCreatedBySpark(SchemaTableName tableName, String provider) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session); - Table oldTable = transaction.getMetastore().getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - Table.Builder newTable = Table.builder(oldTable).setParameter(SPARK_TABLE_PROVIDER_KEY, provider); - transaction.getMetastore().replaceTable(tableName.getSchemaName(), tableName.getTableName(), newTable.build(), principalPrivileges); - transaction.commit(); - } - } - - private void createExternalTable(SchemaTableName schemaTableName, HiveStorageFormat hiveStorageFormat, List columns, List partitionColumns, Optional bucketProperty, Location externalLocation) - { - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - - String tableOwner = session.getUser(); - String schemaName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - Table.Builder tableBuilder = Table.builder() - .setDatabaseName(schemaName) - .setTableName(tableName) - .setOwner(Optional.of(tableOwner)) - .setTableType(EXTERNAL_TABLE.name()) - .setParameters(ImmutableMap.of( - TRINO_VERSION_NAME, TEST_SERVER_VERSION, - TRINO_QUERY_ID_NAME, session.getQueryId())) - .setDataColumns(columns) - .setPartitionColumns(partitionColumns); - - tableBuilder.getStorageBuilder() - .setLocation(externalLocation.toString()) - .setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) - .setBucketProperty(bucketProperty) - .setSerdeParameters(ImmutableMap.of()); - - PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.of(externalLocation), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); - - transaction.commit(); - } - } - - private Path copyResourceDirToTemporaryDirectory(String resourceName) - throws IOException - { - Path tempDir = java.nio.file.Files.createTempDirectory(getClass().getSimpleName()).normalize(); - log.info("Copying resource dir '%s' to %s", resourceName, tempDir); - ClassPath.from(getClass().getClassLoader()) - .getResources().stream() - .filter(resourceInfo -> resourceInfo.getResourceName().startsWith(resourceName)) - .forEach(resourceInfo -> { - try { - Path target = tempDir.resolve(resourceInfo.getResourceName()); - java.nio.file.Files.createDirectories(target.getParent()); - try (InputStream inputStream = resourceInfo.asByteSource().openStream()) { - copy(inputStream, target); - } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - }); - return tempDir.resolve(resourceName).normalize(); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileMetastore.java deleted file mode 100644 index 2a7f1deb1252..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileMetastore.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastore; -import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig; -import org.junit.jupiter.api.Test; - -import java.io.File; - -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static org.junit.jupiter.api.Assumptions.abort; - -public class TestHiveFileMetastore - extends AbstractTestHiveLocal -{ - @Override - protected HiveMetastore createMetastore(File tempDir) - { - File baseDir = new File(tempDir, "metastore"); - return new FileHiveMetastore( - new NodeVersion("test_version"), - HDFS_FILE_SYSTEM_FACTORY, - true, - new FileHiveMetastoreConfig() - .setCatalogDirectory(baseDir.toURI().toString()) - .setMetastoreUser("test")); - } - - @Test - @Override - public void testMismatchSchemaTable() - { - // FileHiveMetastore only supports replaceTable() for views - } - - @Test - @Override - public void testPartitionSchemaMismatch() - { - // test expects an exception to be thrown - abort("FileHiveMetastore only supports replaceTable() for views"); - } - - @Test - @Override - public void testBucketedTableEvolution() - { - // FileHiveMetastore only supports replaceTable() for views - } - - @Test - @Override - public void testBucketedTableEvolutionWithDifferentReadBucketCount() - { - // FileHiveMetastore has various incompatibilities - } - - @Test - @Override - public void testTransactionDeleteInsert() - { - // FileHiveMetastore has various incompatibilities - } - - @Test - @Override - public void testInsertOverwriteUnpartitioned() - { - // FileHiveMetastore has various incompatibilities - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java deleted file mode 100644 index 6d95ddbb5dea..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueHiveMetastore.java +++ /dev/null @@ -1,1605 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive.metastore.glue; - -import com.amazonaws.services.glue.AWSGlueAsync; -import com.amazonaws.services.glue.AWSGlueAsyncClientBuilder; -import com.amazonaws.services.glue.model.CreateTableRequest; -import com.amazonaws.services.glue.model.Database; -import com.amazonaws.services.glue.model.DeleteDatabaseRequest; -import com.amazonaws.services.glue.model.DeleteTableRequest; -import com.amazonaws.services.glue.model.EntityNotFoundException; -import com.amazonaws.services.glue.model.GetDatabasesRequest; -import com.amazonaws.services.glue.model.GetDatabasesResult; -import com.amazonaws.services.glue.model.TableInput; -import com.amazonaws.services.glue.model.UpdateTableRequest; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.airlift.concurrent.BoundedExecutor; -import io.airlift.log.Logger; -import io.airlift.slice.Slice; -import io.trino.plugin.hive.AbstractTestHiveLocal; -import io.trino.plugin.hive.HiveBasicStatistics; -import io.trino.plugin.hive.HiveMetastoreClosure; -import io.trino.plugin.hive.HiveType; -import io.trino.plugin.hive.PartitionStatistics; -import io.trino.plugin.hive.metastore.HiveColumnStatistics; -import io.trino.plugin.hive.metastore.HiveMetastore; -import io.trino.plugin.hive.metastore.PartitionWithStatistics; -import io.trino.plugin.hive.metastore.Table; -import io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter; -import io.trino.spi.TrinoException; -import io.trino.spi.block.Block; -import io.trino.spi.block.BlockBuilder; -import io.trino.spi.connector.ColumnMetadata; -import io.trino.spi.connector.ConnectorMetadata; -import io.trino.spi.connector.ConnectorOutputTableHandle; -import io.trino.spi.connector.ConnectorPageSink; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.connector.ConnectorTableMetadata; -import io.trino.spi.connector.SchemaTableName; -import io.trino.spi.connector.TableNotFoundException; -import io.trino.spi.predicate.Domain; -import io.trino.spi.predicate.Range; -import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.predicate.ValueSet; -import io.trino.spi.statistics.ComputedStatistics; -import io.trino.spi.statistics.TableStatisticType; -import io.trino.spi.type.BigintType; -import io.trino.spi.type.DateType; -import io.trino.spi.type.IntegerType; -import io.trino.spi.type.SmallintType; -import io.trino.spi.type.TimestampType; -import io.trino.spi.type.TinyintType; -import io.trino.spi.type.VarcharType; -import io.trino.testing.MaterializedResult; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.Set; -import java.util.concurrent.Executor; -import java.util.function.Supplier; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.airlift.concurrent.MoreFutures.getFutureValue; -import static io.airlift.slice.Slices.utf8Slice; -import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; -import static io.trino.plugin.hive.HiveColumnStatisticType.MAX_VALUE; -import static io.trino.plugin.hive.HiveColumnStatisticType.MIN_VALUE; -import static io.trino.plugin.hive.HiveColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; -import static io.trino.plugin.hive.HiveColumnStatisticType.NUMBER_OF_NON_NULL_VALUES; -import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; -import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.TEXTFILE; -import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; -import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; -import static io.trino.plugin.hive.TableType.VIRTUAL_VIEW; -import static io.trino.plugin.hive.ViewReaderUtil.ICEBERG_MATERIALIZED_VIEW_COMMENT; -import static io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG; -import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; -import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; -import static io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; -import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; -import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE; -import static io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf; -import static io.trino.plugin.hive.metastore.glue.TestingGlueHiveMetastore.createTestingAsyncGlueClient; -import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; -import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; -import static io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY; -import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; -import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; -import static io.trino.testing.TestingPageSinkId.TESTING_PAGE_SINK_ID; -import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; -import static java.lang.String.format; -import static java.lang.System.currentTimeMillis; -import static java.util.Collections.unmodifiableList; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.UUID.randomUUID; -import static java.util.concurrent.TimeUnit.DAYS; -import static org.apache.hadoop.hive.common.FileUtils.makePartName; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assumptions.abort; - -/* - * GlueHiveMetastore currently uses AWS Default Credential Provider Chain, - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default - * on ways to set your AWS credentials which will be needed to run this test. - */ -public class TestGlueHiveMetastore - extends AbstractTestHiveLocal -{ - private static final Logger log = Logger.get(TestGlueHiveMetastore.class); - - private static final String PARTITION_KEY = "part_key_1"; - private static final String PARTITION_KEY2 = "part_key_2"; - private static final String TEST_DATABASE_NAME_PREFIX = "test_glue"; - - private static final List CREATE_TABLE_COLUMNS = ImmutableList.of(new ColumnMetadata("id", BIGINT)); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, VarcharType.VARCHAR)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, VarcharType.VARCHAR)) - .add(new ColumnMetadata(PARTITION_KEY2, BigintType.BIGINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, TinyintType.TINYINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, SmallintType.SMALLINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, IntegerType.INTEGER)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, BigintType.BIGINT)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, DECIMAL_TYPE)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_DATE = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, DateType.DATE)) - .build(); - private static final List CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP = ImmutableList.builder() - .addAll(CREATE_TABLE_COLUMNS) - .add(new ColumnMetadata(PARTITION_KEY, TimestampType.TIMESTAMP_MILLIS)) - .build(); - private static final List VARCHAR_PARTITION_VALUES = ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"); - - protected static final HiveBasicStatistics HIVE_BASIC_STATISTICS = new HiveBasicStatistics(1000, 5000, 3000, 4000); - protected static final HiveColumnStatistics INTEGER_COLUMN_STATISTICS = createIntegerColumnStatistics( - OptionalLong.of(-1000), - OptionalLong.of(1000), - OptionalLong.of(1), - OptionalLong.of(2)); - - private HiveMetastoreClosure metastore; - private AWSGlueAsync glueClient; - - public TestGlueHiveMetastore() - { - super(TEST_DATABASE_NAME_PREFIX + randomUUID().toString().toLowerCase(ENGLISH).replace("-", "")); - } - - protected AWSGlueAsync getGlueClient() - { - return glueClient; - } - - @BeforeAll - @Override - public void initialize() - throws Exception - { - super.initialize(); - // uncomment to get extra AWS debug information -// Logging logging = Logging.initialize(); -// logging.setLevel("com.amazonaws.request", Level.DEBUG); - - metastore = new HiveMetastoreClosure(metastoreClient, TESTING_TYPE_MANAGER, false); - glueClient = AWSGlueAsyncClientBuilder.defaultClient(); - } - - @Override - protected HiveMetastore createMetastore(File tempDir) - { - GlueHiveMetastoreConfig glueConfig = new GlueHiveMetastoreConfig(); - glueConfig.setDefaultWarehouseDir(tempDir.toURI().toString()); - glueConfig.setAssumeCanonicalPartitionKeys(true); - - Executor executor = new BoundedExecutor(this.executor, 10); - GlueMetastoreStats stats = new GlueMetastoreStats(); - return new GlueHiveMetastore( - HDFS_FILE_SYSTEM_FACTORY, - glueConfig, - executor, - new DefaultGlueColumnStatisticsProviderFactory(executor, executor), - createTestingAsyncGlueClient(glueConfig, stats), - stats, - new DefaultGlueMetastoreTableFilterProvider(true).get()); - } - - @Test - public void cleanupOrphanedDatabases() - { - long creationTimeMillisThreshold = currentTimeMillis() - DAYS.toMillis(1); - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - List orphanedDatabases = getPaginatedResults( - glueClient::getDatabases, - new GetDatabasesRequest(), - GetDatabasesRequest::setNextToken, - GetDatabasesResult::getNextToken, - stats.getGetDatabases()) - .map(GetDatabasesResult::getDatabaseList) - .flatMap(List::stream) - .filter(database -> database.getName().startsWith(TEST_DATABASE_NAME_PREFIX) && - database.getCreateTime().getTime() <= creationTimeMillisThreshold) - .map(Database::getName) - .collect(toImmutableList()); - - log.info("Found %s %s* databases that look orphaned, removing", orphanedDatabases.size(), TEST_DATABASE_NAME_PREFIX); - orphanedDatabases.forEach(database -> { - try { - glueClient.deleteDatabase(new DeleteDatabaseRequest() - .withName(database)); - } - catch (EntityNotFoundException e) { - log.info("Database [%s] not found, could be removed by other cleanup process", database); - } - catch (RuntimeException e) { - log.warn(e, "Failed to remove database [%s]", database); - } - }); - } - - @Test - @Override - public void testRenameTable() - { - // rename table is not yet supported by Glue - } - - @Test - @Override - public void testUpdateTableColumnStatisticsEmptyOptionalFields() - { - // this test expects consistency between written and read stats but this is not provided by glue at the moment - // when writing empty min/max statistics glue will return 0 to the readers - // in order to avoid incorrect data we skip writes for statistics with min/max = null - } - - @Test - @Override - public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() - { - // this test expects consistency between written and read stats but this is not provided by glue at the moment - // when writing empty min/max statistics glue will return 0 to the readers - // in order to avoid incorrect data we skip writes for statistics with min/max = null - } - - @Test - @Override - public void testUpdateBasicPartitionStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), - ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testUpdatePartitionColumnStatistics() - throws Exception - { - SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); - try { - createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testUpdatePartitionStatistics( - tableName, - EMPTY_ROWCOUNT_STATISTICS, - ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), - ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testStorePartitionWithStatistics() - throws Exception - { - // When the table has partitions, but row count statistics are set to zero, we treat this case as empty - // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are - // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, EMPTY_ROWCOUNT_STATISTICS); - } - - @Test - @Override - public void testGetPartitions() - throws Exception - { - try { - SchemaTableName tableName = temporaryTable("get_partitions"); - createDummyPartitionedTable(tableName, CREATE_TABLE_COLUMNS_PARTITIONED); - HiveMetastore metastoreClient = getMetastoreClient(); - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of("ds"), TupleDomain.all()); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("ds=2016-01-01", "ds=2016-01-02")); - } - finally { - dropTable(tablePartitionFormat); - } - } - - @Test - public void testGetPartitionsWithFilterUsingReservedKeywordsAsColumnName() - throws Exception - { - SchemaTableName tableName = temporaryTable("get_partitions_with_filter_using_reserved_keyword_column_name"); - try { - String reservedKeywordPartitionColumnName = "key"; - String regularColumnPartitionName = "int_partition"; - List columns = ImmutableList.builder() - .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) - .add(new ColumnMetadata(reservedKeywordPartitionColumnName, createUnboundedVarcharType())) - .add(new ColumnMetadata(regularColumnPartitionName, BIGINT)) - .build(); - List partitionedBy = ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName); - - doCreateEmptyTable(tableName, ORC, columns, partitionedBy); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - - String partitionName1 = makePartName(ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), ImmutableList.of("value1", "1")); - String partitionName2 = makePartName(ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), ImmutableList.of("value2", "2")); - - List partitions = ImmutableList.of(partitionName1, partitionName2) - .stream() - .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) - .collect(toImmutableList()); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName1, currentStatistics -> ZERO_TABLE_STATISTICS); - metastoreClient.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName2, currentStatistics -> ZERO_TABLE_STATISTICS); - - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), - TupleDomain.withColumnDomains(ImmutableMap.of(regularColumnPartitionName, Domain.singleValue(BIGINT, 2L)))); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("key=value2/int_partition=2")); - - // KEY is a reserved keyword in the grammar of the SQL parser used internally by Glue API - // and therefore should not be used in the partition filter - partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - ImmutableList.of(reservedKeywordPartitionColumnName, regularColumnPartitionName), - TupleDomain.withColumnDomains(ImmutableMap.of(reservedKeywordPartitionColumnName, Domain.singleValue(VARCHAR, utf8Slice("value1"))))); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()).isEqualTo(ImmutableList.of("key=value1/int_partition=1", "key=value2/int_partition=2")); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testGetDatabasesLogsStats() - { - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - double initialCallCount = stats.getGetDatabases().getTime().getAllTime().getCount(); - long initialFailureCount = stats.getGetDatabases().getTotalFailures().getTotalCount(); - getMetastoreClient().getAllDatabases(); - assertThat(stats.getGetDatabases().getTime().getAllTime().getCount()).isGreaterThan(initialCallCount); - assertThat(stats.getGetDatabases().getTime().getAllTime().getAvg()).isGreaterThan(0.0); - assertThat(stats.getGetDatabases().getTotalFailures().getTotalCount()).isEqualTo(initialFailureCount); - } - - @Test - public void testGetDatabaseFailureLogsStats() - { - GlueHiveMetastore metastore = (GlueHiveMetastore) getMetastoreClient(); - GlueMetastoreStats stats = metastore.getStats(); - long initialFailureCount = stats.getGetDatabase().getTotalFailures().getTotalCount(); - assertThatThrownBy(() -> getMetastoreClient().getDatabase(null)) - .isInstanceOf(TrinoException.class) - .hasMessageStartingWith("Database name cannot be equal to null or empty"); - assertThat(stats.getGetDatabase().getTotalFailures().getTotalCount()).isEqualTo(initialFailureCount + 1); - } - - @Test - public void testGetPartitionsFilterVarChar() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-01-01") - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(VarcharType.VARCHAR, utf8Slice("2020-02-01"))) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(VarcharType.VARCHAR, utf8Slice("2020-02-01"), true, utf8Slice("2020-03-01"), true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(VarcharType.VARCHAR, utf8Slice("2020-03-01"))) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-01-01", "2020-02-01") - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(VarcharType.VARCHAR, utf8Slice("2020-03-01"))) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("2020-01-01"), - ImmutableList.of("2020-03-01", "2020-04-01"), - ImmutableList.of("2020-02-01", "2020-03-01"), - ImmutableList.of("2020-03-01", "2020-04-01"), - ImmutableList.of("2020-01-01", "2020-02-01"), - ImmutableList.of("2020-01-01", "2020-02-01"), - ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterBigInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addBigintValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(BigintType.BIGINT, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(BigintType.BIGINT, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(BigintType.BIGINT, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addBigintValues(PARTITION_KEY, 1L, 1000000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(BigintType.BIGINT, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "1000000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "1000000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "1000000"), - ImmutableList.of("1", "1000000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "1000000"))); - } - - @Test - public void testGetPartitionsFilterInteger() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addIntegerValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(IntegerType.INTEGER, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(IntegerType.INTEGER, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(IntegerType.INTEGER, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addIntegerValues(PARTITION_KEY, 1L, 1000000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(IntegerType.INTEGER, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "1000000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "1000000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "1000000"), - ImmutableList.of("1", "1000000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "1000000"))); - } - - @Test - public void testGetPartitionsFilterSmallInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addSmallintValues(PARTITION_KEY, 1000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(SmallintType.SMALLINT, 100L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(SmallintType.SMALLINT, 100L, true, 1000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(SmallintType.SMALLINT, 100L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addSmallintValues(PARTITION_KEY, 1L, 10000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(SmallintType.SMALLINT, 1000L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, - PARTITION_KEY, - ImmutableList.of("1", "100", "1000", "10000"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("1000"), - ImmutableList.of("1000", "10000"), - ImmutableList.of("100", "1000"), - ImmutableList.of("100", "1000", "10000"), - ImmutableList.of("1", "10000"), - ImmutableList.of("1", "100"), - ImmutableList.of("1", "100", "1000", "10000"))); - } - - @Test - public void testGetPartitionsFilterTinyInt() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 127L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(TinyintType.TINYINT, 10L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(TinyintType.TINYINT, 10L, true, 100L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(TinyintType.TINYINT, 10L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 1L, 127L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(TinyintType.TINYINT, 100L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, - PARTITION_KEY, - ImmutableList.of("1", "10", "100", "127"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("127"), - ImmutableList.of("100", "127"), - ImmutableList.of("10", "100"), - ImmutableList.of("10", "100", "127"), - ImmutableList.of("1", "127"), - ImmutableList.of("1", "10"), - ImmutableList.of("1", "10", "100", "127"))); - } - - @Test - public void testGetPartitionsFilterTinyIntNegatives() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, -128L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(TinyintType.TINYINT, 0L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(TinyintType.TINYINT, 0L, true, 50L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(TinyintType.TINYINT, 0L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addTinyintValues(PARTITION_KEY, 0L, -128L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(TinyintType.TINYINT, 0L)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, - PARTITION_KEY, - ImmutableList.of("-128", "0", "50", "100"), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of("-128"), - ImmutableList.of("100", "50"), - ImmutableList.of("0", "50"), - ImmutableList.of("0", "100", "50"), - ImmutableList.of("-128", "0"), - ImmutableList.of("-128"), - ImmutableList.of("-128", "0", "100", "50"))); - } - - @Test - public void testGetPartitionsFilterDecimal() - throws Exception - { - String value1 = "1.000"; - String value2 = "10.134"; - String value3 = "25.111"; - String value4 = "30.333"; - - TupleDomain singleEquals = new PartitionFilterBuilder() - .addDecimalValues(PARTITION_KEY, value1) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(DECIMAL_TYPE, decimalOf(value2))) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(DECIMAL_TYPE, decimalOf(value2), true, decimalOf(value3), true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(DECIMAL_TYPE, decimalOf(value3))) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addDecimalValues(PARTITION_KEY, value1, value4) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(DECIMAL_TYPE, decimalOf("25.5"))) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, - PARTITION_KEY, - ImmutableList.of(value1, value2, value3, value4), - ImmutableList.of(singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of(value1), - ImmutableList.of(value3, value4), - ImmutableList.of(value2, value3), - ImmutableList.of(value3, value4), - ImmutableList.of(value1, value4), - ImmutableList.of(value1, value2, value3), - ImmutableList.of(value1, value2, value3, value4))); - } - - // we don't presently know how to properly convert a Date type into a string that is compatible with Glue. - @Test - public void testGetPartitionsFilterDate() - throws Exception - { - TupleDomain singleEquals = new PartitionFilterBuilder() - .addDateValues(PARTITION_KEY, 18000L) - .build(); - TupleDomain greaterThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThan(DateType.DATE, 19000L)) - .build(); - TupleDomain betweenInclusive = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.range(DateType.DATE, 19000L, true, 20000L, true)) - .build(); - TupleDomain greaterThanOrEquals = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(DateType.DATE, 19000L)) - .build(); - TupleDomain inClause = new PartitionFilterBuilder() - .addDateValues(PARTITION_KEY, 18000L, 21000L) - .build(); - TupleDomain lessThan = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.lessThan(DateType.DATE, 20000L)) - .build(); - // we are unable to convert Date to a string format that Glue will accept, so it should translate to the wildcard in all cases. Commented out results are - // what we expect if we are able to do a proper conversion - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_DATE, - PARTITION_KEY, - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of( - singleEquals, greaterThan, betweenInclusive, greaterThanOrEquals, inClause, lessThan, TupleDomain.all()), - ImmutableList.of( -// ImmutableList.of("18000"), -// ImmutableList.of("20000", "21000"), -// ImmutableList.of("19000", "20000"), -// ImmutableList.of("19000", "20000", "21000"), -// ImmutableList.of("18000", "21000"), -// ImmutableList.of("18000", "19000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"), - ImmutableList.of("18000", "19000", "20000", "21000"))); - } - - @Test - public void testGetPartitionsFilterTwoPartitionKeys() - throws Exception - { - TupleDomain equalsFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-03-01") - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - TupleDomain rangeFilter = new PartitionFilterBuilder() - .addRanges(PARTITION_KEY, Range.greaterThanOrEqual(VarcharType.VARCHAR, utf8Slice("2020-02-01"))) - .addRanges(PARTITION_KEY2, Range.greaterThan(BigintType.BIGINT, 200L)) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of(equalsFilter, rangeFilter, TupleDomain.all()), - ImmutableList.of( - ImmutableList.of(PartitionValues.make("2020-03-01", "300")), - ImmutableList.of( - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")))); - } - - @Test - public void testGetPartitionsFilterMaxLengthWildcard() - throws Exception - { - // this filter string will exceed the 2048 char limit set by glue, and we expect the filter to revert to the wildcard - TupleDomain filter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "x".repeat(2048)) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(filter), - ImmutableList.of( - ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterTwoPartitionKeysPartialQuery() - throws Exception - { - // we expect the second constraint to still be present and provide filtering - TupleDomain equalsFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "x".repeat(2048)) - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make("2020-04-01", "400")), - ImmutableList.of(equalsFilter), - ImmutableList.of(ImmutableList.of(PartitionValues.make("2020-03-01", "300")))); - } - - @Test - public void testGetPartitionsFilterNone() - throws Exception - { - // test both a global none and that with a single column none, and a valid domain with none() - TupleDomain noneFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.none(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(TupleDomain.none(), noneFilter), - ImmutableList.of(ImmutableList.of(), ImmutableList.of())); - } - - @Test - public void testGetPartitionsFilterNotNull() - throws Exception - { - TupleDomain notNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(notNullFilter), - ImmutableList.of(ImmutableList.of("2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"))); - } - - @Test - public void testGetPartitionsFilterIsNull() - throws Exception - { - TupleDomain isNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build(); - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - VARCHAR_PARTITION_VALUES, - ImmutableList.of(isNullFilter), - ImmutableList.of(ImmutableList.of())); - } - - @Test - public void testGetPartitionsFilterIsNullWithValue() - throws Exception - { - List partitionList = new ArrayList<>(); - partitionList.add("100"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(new PartitionFilterBuilder() - // IS NULL - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build()), - ImmutableList.of(ImmutableList.of(GlueExpressionUtil.NULL_STRING))); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(new PartitionFilterBuilder() - // IS NULL or is a specific value - .addDomain(PARTITION_KEY, Domain.create(ValueSet.of(VARCHAR, utf8Slice("100")), true)) - .build()), - ImmutableList.of(ImmutableList.of("100", GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterEqualsOrIsNullWithValue() - throws Exception - { - TupleDomain equalsOrIsNullFilter = new PartitionFilterBuilder() - .addStringValues(PARTITION_KEY, "2020-03-01") - .addDomain(PARTITION_KEY, Domain.onlyNull(VarcharType.VARCHAR)) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add("2020-01-01"); - partitionList.add("2020-02-01"); - partitionList.add("2020-03-01"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(equalsOrIsNullFilter), - ImmutableList.of(ImmutableList.of("2020-03-01", GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterIsNotNull() - throws Exception - { - TupleDomain isNotNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add("100"); - partitionList.add(null); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_VARCHAR, - PARTITION_KEY, - partitionList, - ImmutableList.of(isNotNullFilter), - ImmutableList.of(ImmutableList.of("100"))); - } - - @Test - public void testGetPartitionsFilterUnsupported() - throws Exception - { - // Numeric types are unsupported for IS (NOT) NULL predicate pushdown - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, Domain.onlyNull(TinyintType.TINYINT), "127"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, Domain.onlyNull(SmallintType.SMALLINT), "32767"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, Domain.onlyNull(IntegerType.INTEGER), "2147483647"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, Domain.onlyNull(BigintType.BIGINT), "9223372036854775807"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, Domain.onlyNull(DECIMAL_TYPE), "12345.12345"); - - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TINYINT, Domain.notNull(TinyintType.TINYINT), "127"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_SMALLINT, Domain.notNull(SmallintType.SMALLINT), "32767"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_INTEGER, Domain.notNull(IntegerType.INTEGER), "2147483647"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_BIGINT, Domain.notNull(BigintType.BIGINT), "9223372036854775807"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DECIMAL, Domain.notNull(DECIMAL_TYPE), "12345.12345"); - - // Date and timestamp aren't numeric types, but the pushdown is unsupported because of GlueExpressionUtil.canConvertSqlTypeToStringForGlue - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DATE, Domain.onlyNull(DateType.DATE), "2022-07-11"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, Domain.onlyNull(TimestampType.TIMESTAMP_MILLIS), "2022-07-11 01:02:03.123"); - - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_DATE, Domain.notNull(DateType.DATE), "2022-07-11"); - testGetPartitionsFilterUnsupported(CREATE_TABLE_COLUMNS_PARTITIONED_TIMESTAMP, Domain.notNull(TimestampType.TIMESTAMP_MILLIS), "2022-07-11 01:02:03.123"); - } - - @Test - @Override - public void testPartitionSchemaMismatch() - { - abort("tests using existing tables are not supported"); - } - - private void testGetPartitionsFilterUnsupported(List columnMetadata, Domain domain, String partitionValue) - throws Exception - { - TupleDomain isNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, domain) - .build(); - List partitionList = new ArrayList<>(); - partitionList.add(partitionValue); - partitionList.add(null); - - doGetPartitionsFilterTest( - columnMetadata, - PARTITION_KEY, - partitionList, - ImmutableList.of(isNullFilter), - // Currently, we get NULL partition from Glue and filter it in our side because - // (column '__HIVE_DEFAULT_PARTITION__') on numeric types causes exception on Glue. e.g. 'input string: "__HIVE_D" is not an integer' - ImmutableList.of(ImmutableList.of(partitionValue, GlueExpressionUtil.NULL_STRING))); - } - - @Test - public void testGetPartitionsFilterEqualsAndIsNotNull() - throws Exception - { - TupleDomain equalsAndIsNotNullFilter = new PartitionFilterBuilder() - .addDomain(PARTITION_KEY, Domain.notNull(VarcharType.VARCHAR)) - .addBigintValues(PARTITION_KEY2, 300L) - .build(); - - doGetPartitionsFilterTest( - CREATE_TABLE_COLUMNS_PARTITIONED_TWO_KEYS, - ImmutableList.of(PARTITION_KEY, PARTITION_KEY2), - ImmutableList.of( - PartitionValues.make("2020-01-01", "100"), - PartitionValues.make("2020-02-01", "200"), - PartitionValues.make("2020-03-01", "300"), - PartitionValues.make(null, "300")), - ImmutableList.of(equalsAndIsNotNullFilter), - ImmutableList.of(ImmutableList.of(PartitionValues.make("2020-03-01", "300")))); - } - - @Test - public void testUpdateStatisticsOnCreate() - { - SchemaTableName tableName = temporaryTable("update_statistics_create"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT)); - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE)); - ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle, TESTING_PAGE_SINK_ID); - MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT) - .row(1L) - .row(2L) - .row(3L) - .row(4L) - .row(5L) - .build(); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // prepare statistics - ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .build(); - - // finish CTAS - metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics)); - transaction.commit(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testUpdatePartitionedStatisticsOnCreate() - { - SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create"); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - - List columns = ImmutableList.of( - new ColumnMetadata("a_column", BigintType.BIGINT), - new ColumnMetadata("part_column", BigintType.BIGINT)); - - ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column"))); - ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES); - - // write data - ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle, TESTING_PAGE_SINK_ID); - MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT) - .row(1L, 1L) - .row(2L, 1L) - .row(3L, 1L) - .row(4L, 2L) - .row(5L, 2L) - .build(); - sink.appendPage(data.toPage()); - Collection fragments = getFutureValue(sink.finish()); - - // prepare statistics - ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(1)) - .build(); - ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))) - .addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)) - .addColumnStatistic(MIN_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(MAX_VALUE.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(NUMBER_OF_DISTINCT_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .addColumnStatistic(NUMBER_OF_NON_NULL_VALUES.createColumnStatisticMetadata("a_column"), singleValueBlock(4)) - .build(); - - // finish CTAS - metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2)); - transaction.commit(); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsLargeNumberOfColumns() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_large_number_of_columns"); - try { - ImmutableList.Builder columns = ImmutableList.builder(); - ImmutableMap.Builder columnStatistics = ImmutableMap.builder(); - for (int i = 1; i < 1500; ++i) { - String columnName = "t_bigint " + i + "_" + String.join("", Collections.nCopies(240, "x")); - columns.add(new ColumnMetadata(columnName, BIGINT)); - columnStatistics.put( - columnName, - createIntegerColumnStatistics( - OptionalLong.of(-1000 - i), - OptionalLong.of(1000 + i), - OptionalLong.of(i), - OptionalLong.of(2L * i))); - } - - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics.buildOrThrow()).build(); - - doCreateEmptyTable(tableName, ORC, columns.build()); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsLongColumnNames() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_long_column_name"); - try { - String columnName1 = String.join("", Collections.nCopies(255, "x")); - String columnName2 = String.join("", Collections.nCopies(255, "ӆ")); - String columnName3 = String.join("", Collections.nCopies(255, "ö")); - - List columns = List.of( - new ColumnMetadata(columnName1, BIGINT), - new ColumnMetadata(columnName2, BIGINT), - new ColumnMetadata(columnName3, BIGINT)); - - Map columnStatistics = Map.of( - columnName1, INTEGER_COLUMN_STATISTICS, - columnName2, INTEGER_COLUMN_STATISTICS, - columnName3, INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - doCreateEmptyTable(tableName, ORC, columns); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(ZERO_TABLE_STATISTICS); - testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsColumnModification() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_column_modification"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT), - new ColumnMetadata("column3", BIGINT)); - - doCreateEmptyTable(tableName, ORC, columns); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS, - "column2", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - // set table statistics for column1 - metastore.updateTableStatistics( - tableName.getSchemaName(), - tableName.getTableName(), - NO_ACID_TRANSACTION, - actualStatistics -> { - assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); - return partitionStatistics; - }); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column1", "column4"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics( - HIVE_BASIC_STATISTICS, - Map.of("column2", INTEGER_COLUMN_STATISTICS))); - - metastore.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "column2"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics(HIVE_BASIC_STATISTICS, Map.of())); - - metastore.addColumn(tableName.getSchemaName(), tableName.getTableName(), "column5", HiveType.HIVE_INT, "comment"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics(HIVE_BASIC_STATISTICS, Map.of())); - - // TODO: column1 stats should be removed on column delete. However this is tricky since stats can be stored in multiple partitions. - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column4", "column1"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(new PartitionStatistics( - HIVE_BASIC_STATISTICS, - Map.of("column1", INTEGER_COLUMN_STATISTICS))); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testStatisticsPartitionedTableColumnModification() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_partitioned_table_statistics_column_modification"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT), - new ColumnMetadata("column2", BIGINT), - new ColumnMetadata("ds", VARCHAR)); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS, - "column2", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - createDummyPartitionedTable(tableName, columns); - GlueHiveMetastore metastoreClient = (GlueHiveMetastore) getMetastoreClient(); - double countBefore = metastoreClient.getStats().getBatchUpdatePartition().getTime().getAllTime().getCount(); - - metastore.updatePartitionsStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> partitionStatistics); - - assertThat(metastoreClient.getStats().getBatchUpdatePartition().getTime().getAllTime().getCount()).isEqualTo(countBefore + 1); - PartitionStatistics tableStatistics = new PartitionStatistics(createEmptyStatistics(), Map.of()); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - - // renaming table column does not rename partition columns - metastore.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "column1", "column4"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - - // dropping table column does not drop partition columns - metastore.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "column2"); - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(tableStatistics); - assertThat(metastore.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), Set.of("ds=2016-01-01"))) - .isEqualTo(Map.of("ds=2016-01-01", partitionStatistics)); - } - finally { - dropTable(tableName); - } - } - - @Test - public void testInvalidColumnStatisticsMetadata() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_statistics_invalid_column_metadata"); - try { - List columns = List.of( - new ColumnMetadata("column1", BIGINT)); - - Map columnStatistics = Map.of( - "column1", INTEGER_COLUMN_STATISTICS); - PartitionStatistics partitionStatistics = PartitionStatistics.builder() - .setBasicStatistics(HIVE_BASIC_STATISTICS) - .setColumnStatistics(columnStatistics).build(); - - doCreateEmptyTable(tableName, ORC, columns); - - // set table statistics for column1 - metastore.updateTableStatistics( - tableName.getSchemaName(), - tableName.getTableName(), - NO_ACID_TRANSACTION, - actualStatistics -> { - assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); - return partitionStatistics; - }); - - Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - TableInput tableInput = GlueInputConverter.convertTable(table); - tableInput.setParameters(ImmutableMap.builder() - .putAll(tableInput.getParameters()) - .put("column_stats_bad_data", "bad data") - .buildOrThrow()); - getGlueClient().updateTable(new UpdateTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withTableInput(tableInput)); - - assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName(), Optional.empty())) - .isEqualTo(partitionStatistics); - } - finally { - dropTable(tableName); - } - } - - @Test - @Override - public void testPartitionColumnProperties() - { - // Glue currently does not support parameters on the partitioning columns - assertThatThrownBy(super::testPartitionColumnProperties) - .isInstanceOf(TrinoException.class) - .hasMessageStartingWith("Parameters not supported for partition columns (Service: AWSGlue; Status Code: 400; Error Code: InvalidInputException;"); - } - - @Test - public void testGlueObjectsWithoutStorageDescriptor() - { - // StorageDescriptor is an Optional field for Glue tables. - SchemaTableName table = temporaryTable("test_missing_storage_descriptor"); - DeleteTableRequest deleteTableRequest = new DeleteTableRequest() - .withDatabaseName(table.getSchemaName()) - .withName(table.getTableName()); - - try { - Supplier resetTableInput = () -> new TableInput() - .withStorageDescriptor(null) - .withName(table.getTableName()) - .withTableType(EXTERNAL_TABLE.name()); - - TableInput tableInput = resetTableInput.get(); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - - assertThatThrownBy(() -> metastore.getTable(table.getSchemaName(), table.getTableName())) - .hasMessageStartingWith("Table StorageDescriptor is null for table"); - glueClient.deleteTable(deleteTableRequest); - - // Iceberg table - tableInput = resetTableInput.get().withParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isIcebergTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - glueClient.deleteTable(deleteTableRequest); - - // Delta Lake table - tableInput = resetTableInput.get().withParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isDeltaLakeTable(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - glueClient.deleteTable(deleteTableRequest); - - // Iceberg materialized view - tableInput = resetTableInput.get().withTableType(VIRTUAL_VIEW.name()) - .withViewOriginalText("/* Presto Materialized View: eyJvcmlnaW5hbFNxbCI6IlNFTEVDVCAxIiwiY29sdW1ucyI6W3sibmFtZSI6ImEiLCJ0eXBlIjoiaW50ZWdlciJ9XX0= */") - .withViewExpandedText(ICEBERG_MATERIALIZED_VIEW_COMMENT) - .withParameters(ImmutableMap.of( - PRESTO_VIEW_FLAG, "true", - TABLE_COMMENT, ICEBERG_MATERIALIZED_VIEW_COMMENT)); - glueClient.createTable(new CreateTableRequest() - .withDatabaseName(database) - .withTableInput(tableInput)); - assertThat(isTrinoMaterializedView(metastore.getTable(table.getSchemaName(), table.getTableName()).orElseThrow())).isTrue(); - materializedViews.add(table); - try (Transaction transaction = newTransaction()) { - ConnectorSession session = newSession(); - ConnectorMetadata metadata = transaction.getMetadata(); - // Not a view - assertThat(metadata.listViews(session, Optional.empty())) - .doesNotContain(table); - assertThat(metadata.listViews(session, Optional.of(table.getSchemaName()))) - .doesNotContain(table); - assertThat(metadata.getView(session, table)).isEmpty(); - } - finally { - materializedViews.remove(table); - } - } - finally { - // Table cannot be dropped through HiveMetastore since a TableHandle cannot be created - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(table.getSchemaName()) - .withName(table.getTableName())); - } - } - - @Test - public void testAlterTableComment() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_alter_table_comment"); - doCreateEmptyTable(tableName, ORC, ImmutableList.of(new ColumnMetadata("name", BIGINT)), ImmutableList.of()); - try { - assertThat(metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters()).doesNotContainKey(TABLE_COMMENT); - metastore.commentTable(tableName.getSchemaName(), tableName.getTableName(), Optional.of("a table comment")); - Map tableParameters = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters(); - assertThat(tableParameters).containsEntry(TABLE_COMMENT, "a table comment"); - - metastore.commentTable(tableName.getSchemaName(), tableName.getTableName(), Optional.empty()); - tableParameters = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow().getParameters(); - assertThat(tableParameters.get(TABLE_COMMENT)).isNull(); - } - finally { - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withName(tableName.getTableName())); - } - } - - @Test - public void testAlterColumnComment() - throws Exception - { - SchemaTableName tableName = temporaryTable("test_alter_column_comment"); - List columns = ImmutableList.of( - new ColumnMetadata("first_column", BIGINT), - new ColumnMetadata("second_column", VARCHAR), - new ColumnMetadata("partition_column", BIGINT)); - createDummyPartitionedTable(tableName, columns, ImmutableList.of("partition_column"), ImmutableList.of()); - try { - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "second_column", Optional.of("second column comment")); - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "partition_column", Optional.of("partition column comment")); - - Table withComment = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(withComment.getColumn("first_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("second_column").orElseThrow().getComment()).isEqualTo(Optional.of("second column comment")); - assertThat(withComment.getColumn("partition_column").orElseThrow().getComment()).isEqualTo(Optional.of("partition column comment")); - - metastore.commentColumn(tableName.getSchemaName(), tableName.getTableName(), "second_column", Optional.empty()); - withComment = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(); - assertThat(withComment.getColumn("first_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("second_column").orElseThrow().getComment()).isEmpty(); - assertThat(withComment.getColumn("partition_column").orElseThrow().getComment()).isEqualTo(Optional.of("partition column comment")); - } - finally { - glueClient.deleteTable(new DeleteTableRequest() - .withDatabaseName(tableName.getSchemaName()) - .withName(tableName.getTableName())); - } - } - - private Block singleValueBlock(long value) - { - BlockBuilder blockBuilder = BIGINT.createBlockBuilder(null, 1); - BIGINT.writeLong(blockBuilder, value); - return blockBuilder.build(); - } - - private void doGetPartitionsFilterTest( - List columnMetadata, - String partitionColumnName, - List partitionStringValues, - List> filterList, - List> expectedSingleValueList) - throws Exception - { - List partitionValuesList = partitionStringValues.stream() - .map(PartitionValues::make) - .collect(toImmutableList()); - List> expectedPartitionValuesList = expectedSingleValueList.stream() - .map(expectedValue -> expectedValue.stream() - .map(PartitionValues::make) - .collect(toImmutableList())) - .collect(toImmutableList()); - doGetPartitionsFilterTest(columnMetadata, ImmutableList.of(partitionColumnName), partitionValuesList, filterList, expectedPartitionValuesList); - } - - /** - * @param filterList should be same sized list as expectedValuesList - */ - private void doGetPartitionsFilterTest( - List columnMetadata, - List partitionColumnNames, - List partitionValues, - List> filterList, - List> expectedValuesList) - throws Exception - { - try (CloseableSchamaTableName closeableTableName = new CloseableSchamaTableName(temporaryTable("get_partitions"))) { - SchemaTableName tableName = closeableTableName.getSchemaTableName(); - createDummyPartitionedTable(tableName, columnMetadata, partitionColumnNames, partitionValues); - HiveMetastore metastoreClient = getMetastoreClient(); - - for (int i = 0; i < filterList.size(); i++) { - TupleDomain filter = filterList.get(i); - List expectedValues = expectedValuesList.get(i); - List expectedResults = expectedValues.stream() - .map(expectedPartitionValues -> makePartName(partitionColumnNames, expectedPartitionValues.getValues())) - .collect(toImmutableList()); - - Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( - tableName.getSchemaName(), - tableName.getTableName(), - partitionColumnNames, - filter); - assertThat(partitionNames.isPresent()).isTrue(); - assertThat(partitionNames.get()) - .describedAs(format("lists \nactual: %s\nexpected: %s\nmismatch for filter %s (input index %d)\n", partitionNames.get(), expectedResults, filter, i)) - .isEqualTo(expectedResults); - } - } - } - - private void createDummyPartitionedTable(SchemaTableName tableName, List columns, List partitionColumnNames, List partitionValues) - throws Exception - { - doCreateEmptyTable(tableName, ORC, columns, partitionColumnNames); - - HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient(), TESTING_TYPE_MANAGER, false); - Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) - .orElseThrow(() -> new TableNotFoundException(tableName)); - List partitions = new ArrayList<>(); - List partitionNames = new ArrayList<>(); - partitionValues.stream() - .map(partitionValue -> makePartName(partitionColumnNames, partitionValue.values)) - .forEach( - partitionName -> { - partitions.add(new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())); - partitionNames.add(partitionName); - }); - metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - partitionNames.forEach( - partitionName -> metastoreClient.updatePartitionsStatistics( - tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> ZERO_TABLE_STATISTICS)); - } - - private class CloseableSchamaTableName - implements AutoCloseable - { - private final SchemaTableName schemaTableName; - - private CloseableSchamaTableName(SchemaTableName schemaTableName) - { - this.schemaTableName = schemaTableName; - } - - public SchemaTableName getSchemaTableName() - { - return schemaTableName; - } - - @Override - public void close() - { - dropTable(schemaTableName); - } - } - - // container class for readability. Each value is one for a partitionKey, in order they appear in the schema - private static class PartitionValues - { - private final List values; - - private static PartitionValues make(String... values) - { - return new PartitionValues(Arrays.asList(values)); - } - - private PartitionValues(List values) - { - // Elements are nullable - //noinspection Java9CollectionFactory - this.values = unmodifiableList(new ArrayList<>(requireNonNull(values, "values is null"))); - } - - public List getValues() - { - return values; - } - } -} diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00000.c000.snappy.orc.crc deleted file mode 100644 index df1434b32cf94902c2713136841b24337975248c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24 fcmYc;N@ieSU}9i7$=K0-_}0Q-eqZzI%j-A*VvY%c diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/.part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc.crc deleted file mode 100644 index 5eda6fd9ac791375fbc3ae4c025b82ffe1936774..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 bcmYc;N@ieSU}D(gcVp83g7%Q6vt=QUT3{{6R)w+IwUX+DWud{IcXxbhLmhmw=pEP>)6zx;$yuYJKL;x z-2F)73Q)v>s3%&1I3ZOC4jd{0<$$O-07a-$PkbD>AOSbThjQovf%te$9Vk6ih)5h} zc4pq_?c3k`zqf_bjEs;NiUozfA{vEdTnr$DVv^7o>k&{itz71cXdVp1A`b>I-6`}9 zCBz4gAVl8s%%bFkZyL*1MIiM$2ml zwVBymUdtX{ES9t7*<7KR(*V%CmS4=}59TzO3i<2;EJ|Abk!*fZD`rcTQe^>-D*0SF zdyp)YwQ_c$i!*`Hq23i*^a8sTbpt=iT*iGr)fmD}s zVopvg4sH6D?j#hxKGEtI|)JI?{GPK>!bQC8fu*itct=ZIAIk z%BL$d_k4IPLC7+kG_-MtA5Qr@K;M7xy?))m*Q2TL9 z+;v4E4n5&hSKAkXGP3bA`paX#@OsdudI+bHL(d02mR&f~7<&=3v%!5|4{X)eq9g$n zIV_(|`tj@aGj<7g%=?b~fsgNaXvfDR1ZvDQKuT6;^2y|QiUQD7x0E+)XwNb|X55|8 zobktx7i{%1LiT_d5<*d=+{Oz6R+2Dt$}V1*3n+@%1D2q25yKT6#Y&tPMmVJoJ1)v8 z5+6+k6bUY2xO9Q%lhvRSk8uKm!QD8nV)Q%SSQmfA;%18z`~K0`s&R4f7VWHhLhQY< zja4P_!vEE{v~4!7>L>gw9QuOWcvuKTzX|t8;Bk&qz6_D(NZ-(?5Qzmb7YO!;ZQV1O z4La-(+IoxjhoO4A2R#OBwIH!WoDhJ~Gb9Nc>*>DCCd$msC^NT4nYjsNW_u_*(!c)+ k9NIq1=`;CRBz$^JeQRscAr3`Ge#7a|b5{d_LTM)XC+n&b-v9sr diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00001.c000.snappy.orc deleted file mode 100644 index eb4387fc03ac4a665aaf04bc9567675e021c1539..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1480 zcmchW&rcIU6vt<#yJfl*T`D947&8_^h#)o^l>i4Z`@YG{P$K|F zG@)UA3UsU214aY@kceYF5yi;tBJ)$=s`4K(bmdn&hq0b)Gh$Ze+(>$QJZ&Z7m#r(q z8Pj?bwtDxyM} zZA&lbxCI(W8q9L)@j_WJPjAVU-N5gqGg9P~^Jba*flrx`ZbHV|$NWmM6v#luV;-k$ zMwSD8_*uT}IkN22Vp#-ENjTh75%@yMu@~jIyMuc z(omlY6HUE4EA?k_9 zF&$4t2r=r^jz-=>bp)D_#B|(+NVobzCL&ljdm;p@Lr@)hiu9JOMvMf)KpEO@0|*$8 zkP%LXv2Xi4WMfAAAEzLDV)Ig{{wy|D_bB)c;q4&#+DuTk)wE63hx~S>``Wb3^YGr5 z^w$(*Z)6e5Kf8=RAn+Ac&*O;sINBOl%Q7NA5G{1+$)h-)&>)IvtudDcj&$dErB!oT zk+;Uw5U)GxY*{QSK_n55D6300;ObyX?+=Oh|3bX~Z-_UatS+p>?*H$w;c)fZKZAW{ eDLn@8mzshX7tJIB@vbK*6}|H<5}C{lwfqEV3Zwb} diff --git a/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc b/plugin/trino-hive/src/test/resources/spark_bucketed_nation/part-00000-0a72da44-98a5-4b85-a513-2b2e82ec274f_00002.c000.snappy.orc deleted file mode 100644 index c2aa001763ea72ca2822c2ef6a14e5f954ec3bf5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1200 zcmbtSO-~a+7@nQoEz_ZuvH}?zHIu55K-*yA!88&m1*K>|sA3x8!DX4&wcFigKLYhc zl!Ni49z7V(dhti7G0~%m9=&+uz`-Bj3y6Up)WBi(W8P<;_kEsuDz!pCLNuZ&M6IF> zd=5|IN;P&z}@6r)P9mS50LES8J6W@q^IV)@!krNpXL zGzQp{dn7R+i4c?^OsgU+p-0jH-H?DslD4mCYrwNgCxmYMeA#jQfE&6e8lh!%xbF(X z)Gh9b)lm3>pXIt?IH4W*957klKl)kAz^>^zwrQk!Xor3Xc-`-`-M|Ti&)4*p;LDa7 z+!H|Pu`fy$#S!|T4B?CJ0zy0U&*!W8a;Z|Q6bhA6Whq~pE-oz29@eybM!{#NdJ(#+ zY@)jqqu2N&wB;+pb$ks1Mn*f2aXORMCD%oy!~io!M}fJh06MODHv8-qWb!0|*{W&|J-KptatxUMj& zVS-@cgX@`u=zEFo29@?5if27a`QJlXzeG