From c998f444b3b1a0d77692f135a5cc2f7f68fc5702 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 21:39:28 +0800 Subject: [PATCH 1/6] Revert "cherry pick #2017 #2020 #2051 to release-5.0 (#2074)" This reverts commit 8e0712cc815412baac34d276c90c8a3466790976. --- .ci/build.groovy | 72 ------------ .ci/integration_test.groovy | 26 +++-- .ci/util.groovy | 97 ++++++----------- CMakeLists.txt | 17 +-- cmake/find_ccache.cmake | 19 ++-- release-centos7/build/build-tiflash-ci.sh | 103 ++++-------------- .../build/build-tiflash-release.sh | 3 +- release-centos7/build/env.sh | 3 - release-centos7/build/fetch-ci-build.sh | 35 ------ release-centos7/build/upload-ci-build.sh | 20 ---- release-darwin/build/build-tiflash-release.sh | 4 - tests/docker/gtest.yaml | 3 +- tests/docker/mock-test-dt.yaml | 7 +- tests/docker/mock-test-tmt.yaml | 7 +- tests/docker/run.sh | 12 +- tests/docker/tiflash-dt.yaml | 7 +- tests/docker/tiflash-tagged-image.yaml | 27 ----- 17 files changed, 95 insertions(+), 367 deletions(-) delete mode 100644 .ci/build.groovy delete mode 100644 release-centos7/build/env.sh delete mode 100755 release-centos7/build/fetch-ci-build.sh delete mode 100755 release-centos7/build/upload-ci-build.sh delete mode 100644 tests/docker/tiflash-tagged-image.yaml diff --git a/.ci/build.groovy b/.ci/build.groovy deleted file mode 100644 index 139dec9317f..00000000000 --- a/.ci/build.groovy +++ /dev/null @@ -1,72 +0,0 @@ -catchError { - def util = load('util.groovy') - - def label = "build-tics" - - def tiflashTag = ({ - def m = ghprbCommentBody =~ /tiflash\s*=\s*([^\s\\]+)(\s|\\|$)/ - if (m) { - return "${m.group(1)}" - } - return params.tiflashTag ?: ghprbTargetBranch ?: 'master' - }).call() - - podTemplate(name: label, label: label, instanceCap: 5, - workspaceVolume: emptyDirWorkspaceVolume(memory: true), - nodeSelector: 'role_type=slave', - containers: [ - containerTemplate(name: 'dockerd', image: 'docker:18.09.6-dind', privileged: true), - containerTemplate(name: 'docker', image: 'hub.pingcap.net/zyguan/docker:build-essential', - alwaysPullImage: true, envVars: [ - envVar(key: 'DOCKER_HOST', value: 'tcp://localhost:2375'), - ], ttyEnabled: true, command: 'cat'), - containerTemplate(name: 'builder', image: 'hub.pingcap.net/tiflash/tiflash-builder-ci', - alwaysPullImage: true, ttyEnabled: true, command: 'cat', - resourceRequestCpu: '4000m', resourceRequestMemory: '8Gi', - resourceLimitCpu: '10000m', resourceLimitMemory: '30Gi'), - ]) { - - node(label) { - - dir("tics") { - stage("Checkout") { - container("docker") { - sh """ - archive_url=${FILE_SERVER_URL}/download/builds/pingcap/tics/cache/tics-repo_latest.tar.gz - if [ ! -d contrib ]; then curl -sL \$archive_url | tar -zx --strip-components=1 || true; fi - """ - sh "chown -R 1000:1000 ./" - // sh "if ! grep -q hub.pingcap.net /etc/hosts ; then echo '172.16.10.5 hub.pingcap.net' >> /etc/hosts; fi" - } - util.checkoutTiCSFull("${ghprbActualCommit}", "${ghprbPullId}") - } - stage("Build & Upload") { - timeout(time: 70, unit: 'MINUTES') { - container("builder") { - sh "NPROC=5 BUILD_BRANCH=${ghprbTargetBranch} release-centos7/build/build-tiflash-ci.sh" - sh "PULL_ID=${ghprbPullId} COMMIT_HASH=${ghprbActualCommit} release-centos7/build/upload-ci-build.sh" - } - } - } - } - } - } -} - - -stage('Summary') { - def duration = ((System.currentTimeMillis() - currentBuild.startTimeInMillis) / 1000 / 60).setScale(2, BigDecimal.ROUND_HALF_UP) - def msg = "[#${ghprbPullId}: ${ghprbPullTitle}]" + "\n" + - "${ghprbPullLink}" + "\n" + - "${ghprbPullDescription}" + "\n" + - "Build Result: `${currentBuild.currentResult}`" + "\n" + - "Elapsed Time: `${duration} mins` " + "\n" + - "${env.RUN_DISPLAY_URL}" - - echo "${msg}" - - if (currentBuild.currentResult != "SUCCESS") { - echo "Send slack here ..." - slackSend channel: '#jenkins-ci', color: 'danger', teamDomain: 'pingcap', tokenCredentialId: 'slack-pingcap-token', message: "${msg}" - } -} diff --git a/.ci/integration_test.groovy b/.ci/integration_test.groovy index c392a840204..e5d9e499fc9 100644 --- a/.ci/integration_test.groovy +++ b/.ci/integration_test.groovy @@ -9,8 +9,18 @@ catchError { return params.ghprbTargetBranch ?: 'master' }).call() - stage("Wait for ci build") { - echo "ticsTag=${params.ghprbActualCommit} tidbBranch=${tidbBranch}" + echo "ticsTag=${params.ghprbActualCommit} tidbBranch=${tidbBranch}" + + stage("Wait for images") { + util.runClosure("wait-for-images") { + timeout(time: 60, unit: 'MINUTES') { + container("docker") { + sh """ + while ! docker pull hub.pingcap.net/tiflash/tics:${params.ghprbActualCommit}; do sleep 60; done + """ + } + } + } } node("${GO_BUILD_SLAVE}") { @@ -23,20 +33,13 @@ catchError { cp -R /nfs/cache/git/src-tics.tar.gz* ./ mkdir -p ${curws}/tics tar -xzf src-tics.tar.gz -C ${curws}/tics --strip-components=1 - """ + """ } } } dir("${curws}/tics") { util.checkoutTiCS("${params.ghprbActualCommit}", "${params.ghprbPullId}") } - timeout(time: 60, unit: 'MINUTES') { - container("golang") { - sh """ - COMMIT_HASH=${params.ghprbActualCommit} PULL_ID=${params.ghprbPullId} TAR_PATH=${curws}/tics/tests/.build bash -e ${curws}/tics/release-centos7/build/fetch-ci-build.sh - """ - } - } } stash includes: "tics/**", name: "git-code-tics", useDefaultExcludes: false } @@ -71,9 +74,10 @@ catchError { stage('Summary') { def duration = ((System.currentTimeMillis() - currentBuild.startTimeInMillis) / 1000 / 60).setScale(2, BigDecimal.ROUND_HALF_UP) - def msg = "Result: `${currentBuild.currentResult}`" + "\n" + + def msg = "Build Result: `${currentBuild.currentResult}`" + "\n" + "Elapsed Time: `${duration} mins`" + "\n" + "${env.RUN_DISPLAY_URL}" echo "${msg}" + } diff --git a/.ci/util.groovy b/.ci/util.groovy index 016428afd4d..4fc13dd8b47 100644 --- a/.ci/util.groovy +++ b/.ci/util.groovy @@ -1,30 +1,26 @@ -def doCheckout(commit, refspec) { - checkout(changelog: false, poll: false, scm: [ - $class : "GitSCM", - branches : [ - [name: "${commit}"], - ], - userRemoteConfigs: [ - [ - url : "git@github.com:pingcap/tics.git", - refspec : refspec, - credentialsId: "github-sre-bot-ssh", - ] - ], - extensions : [ - [$class: 'PruneStaleBranch'], - [$class: 'CleanBeforeCheckout'], - ], - ]) -} - def checkoutTiCS(commit, pullId) { def refspec = "+refs/heads/*:refs/remotes/origin/*" if (pullId) { refspec += " +refs/pull/${pullId}/*:refs/remotes/origin/pr/${pullId}/*" } try { - doCheckout(commit, refspec) + checkout(changelog: false, poll: false, scm: [ + $class : "GitSCM", + branches : [ + [name: "${commit}"], + ], + userRemoteConfigs: [ + [ + url : "git@github.com:pingcap/tics.git", + refspec : refspec, + credentialsId: "github-sre-bot-ssh", + ] + ], + extensions : [ + [$class: 'PruneStaleBranch'], + [$class: 'CleanBeforeCheckout'], + ], + ]) } catch (info) { retry(2) { echo "checkout failed, retry.." @@ -33,42 +29,27 @@ def checkoutTiCS(commit, pullId) { echo ".git already exist or not a valid git dir. Delete dir..." deleteDir() } - doCheckout(commit, refspec) + checkout(changelog: false, poll: false, scm: [ + $class : "GitSCM", + branches : [ + [name: "${commit}"], + ], + userRemoteConfigs: [ + [ + url : "git@github.com:pingcap/tics.git", + refspec : refspec, + credentialsId: "github-sre-bot-ssh", + ] + ], + extensions : [ + [$class: 'PruneStaleBranch'], + [$class: 'CleanBeforeCheckout'], + ], + ]) } } } -def checkoutTiCSFull(commit, pullId) { - def refspec = "+refs/heads/*:refs/remotes/origin/*" - if (pullId) { - refspec += " +refs/pull/${pullId}/*:refs/remotes/origin/pr/${pullId}/*" - } - checkout(changelog: false, poll: false, scm: [ - $class : "GitSCM", - branches : [ - [name: "${commit}"], - ], - userRemoteConfigs : [ - [ - url : "git@github.com:pingcap/tics.git", - refspec : refspec, - credentialsId: "github-sre-bot-ssh", - ] - ], - extensions : [ - [$class : 'SubmoduleOption', - disableSubmodules : false, - parentCredentials : true, - recursiveSubmodules: true, - trackingSubmodules : false, - reference : ''], - [$class: 'PruneStaleBranch'], - [$class: 'CleanBeforeCheckout'], - ], - doGenerateSubmoduleConfigurations: false, - ]) -} - def runClosure(label, Closure body) { podTemplate(name: label, label: label, instanceCap: 15, containers: [ containerTemplate(name: 'dockerd', image: 'docker:18.09.6-dind', privileged: true, @@ -89,16 +70,6 @@ def runTest(label, testPath, tidbBranch) { runClosure(label) { stage("Unstash") { unstash 'git-code-tics' - dir("tics") { - timeout(time: 5, unit: 'MINUTES') { - container("docker") { - sh """ - pwd - DOWNLOAD_TAR=true COMMIT_HASH=${params.ghprbActualCommit} PULL_ID=${params.ghprbPullId} TAR_PATH=./tests/.build bash -e release-centos7/build/fetch-ci-build.sh - """ - } - } - } } dir(testPath) { stage("Test") { diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e3d9857607..1c1c6a368f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,6 @@ endif () # Write compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS 1) -option (USE_CCACHE "Set to OFF to disable ccache" ON) include (cmake/find_ccache.cmake) if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None") @@ -149,24 +148,14 @@ if (NOT ARCH_ARM) set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") endif () -option (DEBUG_WITHOUT_DEBUG_INFO "Set to ON to build dev target without debug info (remove flag `-g` in order to accelerate compiling speed and reduce target binary size)" OFF) -if (DEBUG_WITHOUT_DEBUG_INFO) - string(REPLACE "-g" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") - string(REPLACE "-g" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") - message (STATUS "Build debug target without debug info") -else () - set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g3 -ggdb3") - set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g3 -ggdb3") -endif () - set (CMAKE_BUILD_COLOR_MAKEFILE ON) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -fno-inline ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -fno-inline ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND ARCH_FREEBSD)) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") @@ -239,7 +228,7 @@ if (UNBUNDLED OR NOT (ARCH_LINUX OR APPLE) OR ARCH_32) option (NO_WERROR "Disable -Werror compiler option" ON) endif () -message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} UNBUNDLED=${UNBUNDLED}") +message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}") include(GNUInstallDirs) diff --git a/cmake/find_ccache.cmake b/cmake/find_ccache.cmake index eef203f9cf1..22372c3cbfe 100644 --- a/cmake/find_ccache.cmake +++ b/cmake/find_ccache.cmake @@ -1,19 +1,14 @@ find_program (CCACHE_FOUND ccache) -if (USE_CCACHE AND CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" AND NOT CMAKE_CXX_COMPILER MATCHES "ccache") - execute_process (COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) - execute_process (COMMAND ${CCACHE_FOUND} "-p" OUTPUT_VARIABLE CCACHE_CONFIG) - string (REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) +if (CCACHE_FOUND AND NOT CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" AND NOT CMAKE_CXX_COMPILER MATCHES "ccache") + execute_process(COMMAND ${CCACHE_FOUND} "-V" OUTPUT_VARIABLE CCACHE_VERSION) + string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION} ) if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - message (STATUS "Using ccache: ${CCACHE_FOUND}, version ${CCACHE_VERSION}") - message (STATUS "Show ccache config:") - message ("${CCACHE_CONFIG}") - set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND}) - set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) + #message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}") + set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND}) + set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) else () - message (STATUS "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") + message(STATUS "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") endif () -else () - message (STATUS "Not using ccache ${CCACHE_FOUND}, USE_CCACHE=${USE_CCACHE}") endif () diff --git a/release-centos7/build/build-tiflash-ci.sh b/release-centos7/build/build-tiflash-ci.sh index 9458c50d5e6..16cdca2ba01 100755 --- a/release-centos7/build/build-tiflash-ci.sh +++ b/release-centos7/build/build-tiflash-ci.sh @@ -1,79 +1,28 @@ #!/bin/bash -command -v ccache > /dev/null 2>&1 -if [[ $? != 0 ]]; then - echo "try to install ccache" - wget http://fileserver.pingcap.net/download/builds/pingcap/tiflash/ci-cache/ccache.x86_64.rpm - rpm -Uvh ccache.x86_64.rpm -else - echo "ccache has been installed" -fi - set -ueox pipefail SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" SRCPATH=${1:-$(cd $SCRIPTPATH/../..; pwd -P)} -CI_CCACHE_USED_SRCPATH="/build/tics" -export INSTALL_DIR=${INSTALL_DIR:-"$SRCPATH/release-centos7/tiflash"} - -if [[ ${CI_CCACHE_USED_SRCPATH} != ${SRCPATH} ]]; then - rm -rf "${CI_CCACHE_USED_SRCPATH}" - mkdir -p /build && cd /build - cp -r ${SRCPATH} ${CI_CCACHE_USED_SRCPATH} - sh ${CI_CCACHE_USED_SRCPATH}/release-centos7/build/build-tiflash-ci.sh - exit 0 -fi - NPROC=${NPROC:-$(nproc || grep -c ^processor /proc/cpuinfo)} + ENABLE_TEST=${ENABLE_TEST:-1} ENABLE_EMBEDDED_COMPILER="FALSE" CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Debug} -UPDATE_CCACHE=${UPDATE_CCACHE:-false} -BUILD_BRANCH=${BUILD_BRANCH:-master} -BUILD_UPDATE_DEBUG_CI_CCACHE=${BUILD_UPDATE_DEBUG_CI_CCACHE:-false} -CCACHE_REMOTE_TAR="${BUILD_BRANCH}-${CMAKE_BUILD_TYPE}.tar" -CCACHE_REMOTE_TAR=$(echo "${CCACHE_REMOTE_TAR}" | tr 'A-Z' 'a-z') + if [[ "${CMAKE_BUILD_TYPE}" != "Debug" ]]; then ENABLE_TEST=0 fi -# https://cd.pingcap.net/blue/organizations/jenkins/build_tiflash_multi_branch/activity/ -# Each time after a new commit merged into target branch, a task about nightly build will be triggered. -# BUILD_UPDATE_DEBUG_CI_CCACHE is set true in order to build and upload ccache. -if [[ "${BUILD_UPDATE_DEBUG_CI_CCACHE}" != "false" ]]; then - echo "====== begin to build & upload ccache for ci debug build ======" - UPDATE_CCACHE=true NPROC=${NPROC} BUILD_BRANCH=${BUILD_BRANCH} CMAKE_BUILD_TYPE=Debug BUILD_UPDATE_DEBUG_CI_CCACHE=false sh ${SRCPATH}/release-centos7/build/build-tiflash-ci.sh - echo "====== finish build & upload ccache for ci debug build ======" -fi -rm -rf "${INSTALL_DIR}" -mkdir -p "${INSTALL_DIR}" - -USE_CCACHE=ON -rm -rf "${SRCPATH}/.ccache" -cache_file="${SRCPATH}/ccache.tar" -rm -rf "${cache_file}" -curl -o "${cache_file}" http://fileserver.pingcap.net/download/builds/pingcap/tiflash/ci-cache/${CCACHE_REMOTE_TAR} -cache_size=`ls -l "${cache_file}" | awk '{print $5}'` -min_size=$((1024000)) -if [[ ${cache_size} -gt ${min_size} ]]; then - echo "try to use ccache to accelerate compile speed" - cd "${SRCPATH}" - tar -xf ccache.tar -fi -ccache -o cache_dir="${SRCPATH}/.ccache" -ccache -o max_size=2G -ccache -o limit_multiple=0.99 -ccache -o hash_dir=false -ccache -o compression=true -ccache -o compression_level=6 -if [[ ${UPDATE_CCACHE} == "false" ]]; then - ccache -o read_only=true -fi -ccache -z +set -xe + +install_dir="$SRCPATH/release-centos7/tiflash" +if [ -d "$install_dir" ]; then rm -rf "$install_dir"/*; else mkdir -p "$install_dir"; fi pushd ${SRCPATH}/cluster_manage/ rm -rf ./dist ./release.sh +cp -r dist/flash_cluster_manager "$install_dir"/flash_cluster_manager popd if [ -d "$SRCPATH/contrib/kvproto" ]; then @@ -113,41 +62,29 @@ fi chmod 0731 "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" build_dir="$SRCPATH/release-centos7/build-release" -rm -rf ${build_dir} mkdir -p $build_dir && cd $build_dir cmake "$SRCPATH" \ -DENABLE_EMBEDDED_COMPILER=$ENABLE_EMBEDDED_COMPILER \ -DENABLE_TESTS=$ENABLE_TEST \ - -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE \ - -DUSE_CCACHE=${USE_CCACHE} \ - -DDEBUG_WITHOUT_DEBUG_INFO=ON + -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE make -j $NPROC tiflash +# Reduce binary size by compressing. +objcopy --compress-debug-sections=zlib-gnu "$build_dir/dbms/src/Server/tiflash" + +cp -f "$build_dir/dbms/src/Server/tiflash" "$install_dir/tiflash" +cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" "$install_dir/libtiflash_proxy.so" + # copy gtest binary under Debug mode if [[ "${CMAKE_BUILD_TYPE}" = "Debug" && ${ENABLE_TEST} -ne 0 ]]; then #ctest -V -j $(nproc || grep -c ^processor /proc/cpuinfo) make -j ${NPROC} gtests_dbms gtests_libcommon - cp -f "$build_dir/dbms/gtests_dbms" "${INSTALL_DIR}/" - cp -f "$build_dir/libs/libcommon/src/tests/gtests_libcommon" "${INSTALL_DIR}/" + cp -f "$build_dir/dbms/gtests_dbms" "$install_dir/" + cp -f "$build_dir/libs/libcommon/src/tests/gtests_libcommon" "$install_dir/" fi -ccache -s - -if [[ ${UPDATE_CCACHE} == "true" ]]; then - cd ${SRCPATH} - rm -rf ccache.tar - tar -cf ccache.tar .ccache - curl -F builds/pingcap/tiflash/ci-cache/${CCACHE_REMOTE_TAR}=@ccache.tar http://fileserver.pingcap.net/upload -fi - -# Reduce binary size by compressing. -objcopy --compress-debug-sections=zlib-gnu "$build_dir/dbms/src/Server/tiflash" -cp -r "${SRCPATH}/cluster_manage/dist/flash_cluster_manager" "${INSTALL_DIR}"/flash_cluster_manager -cp -f "$build_dir/dbms/src/Server/tiflash" "${INSTALL_DIR}/tiflash" -cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.so" "${INSTALL_DIR}/libtiflash_proxy.so" -ldd "${INSTALL_DIR}/tiflash" -cd "${INSTALL_DIR}" -chrpath -d libtiflash_proxy.so "${INSTALL_DIR}/tiflash" -ldd "${INSTALL_DIR}/tiflash" -ls -lh "${INSTALL_DIR}" +ldd "$install_dir/tiflash" +cd "$install_dir" +chrpath -d libtiflash_proxy.so "$install_dir/tiflash" +ldd "$install_dir/tiflash" diff --git a/release-centos7/build/build-tiflash-release.sh b/release-centos7/build/build-tiflash-release.sh index 7a6a5e9db8f..fdc3052a641 100755 --- a/release-centos7/build/build-tiflash-release.sh +++ b/release-centos7/build/build-tiflash-release.sh @@ -50,8 +50,7 @@ cmake "$SRCPATH" ${DEFINE_CMAKE_PREFIX_PATH} \ -DENABLE_MYSQL=OFF \ -DENABLE_TESTING=OFF \ -DENABLE_TESTS=OFF \ - -Wno-dev \ - -DUSE_CCACHE=OFF + -Wno-dev make -j $NPROC tiflash diff --git a/release-centos7/build/env.sh b/release-centos7/build/env.sh deleted file mode 100644 index 8867709d0f8..00000000000 --- a/release-centos7/build/env.sh +++ /dev/null @@ -1,3 +0,0 @@ -TIFLASH_CI_BUILD_URI_PREFIX="builds/pingcap/tiflash/ci-cache/tmp/pr-build" -TIFLASH_CI_BUILD_PRE_FIX="http://fileserver.pingcap.net/download/${TIFLASH_CI_BUILD_URI_PREFIX}" -CI_BUILD_INFO_PRE_FIX="commit hash value" diff --git a/release-centos7/build/fetch-ci-build.sh b/release-centos7/build/fetch-ci-build.sh deleted file mode 100755 index 1d5d9c3be9f..00000000000 --- a/release-centos7/build/fetch-ci-build.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -set -ueox pipefail - -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -source ${SCRIPTPATH}/env.sh - -DOWNLOAD_TAR=${DOWNLOAD_TAR:-false} - -rm -rf "${TAR_PATH}" -mkdir -p "${TAR_PATH}" -cd "${TAR_PATH}" -TAR_BIN_URL="${TIFLASH_CI_BUILD_PRE_FIX}/${PULL_ID}/tiflash.tar.gz" -COMMIT_HASH_FILE_URL="${TIFLASH_CI_BUILD_PRE_FIX}/${PULL_ID}/commit-hash" -while [[ true ]]; do - curl -o ./commit-hash ${COMMIT_HASH_FILE_URL} - if [[ `grep -c "${CI_BUILD_INFO_PRE_FIX}" ./commit-hash` -ne '0' ]];then - COMMIT_HASH_VAL=$(tail -n -1 ./commit-hash) - if [[ ${COMMIT_HASH_VAL} == ${COMMIT_HASH} ]]; then - if [[ ${DOWNLOAD_TAR} == "true" ]]; then - curl -o ./tiflash.tar.gz ${TAR_BIN_URL} - MD5_SUM=$(md5sum ./tiflash.tar.gz | awk '{ print $1 }') - EXPECT_MD5_SUM=$(tail -n +2 ./commit-hash | head -n 1) - if [[ ${MD5_SUM} != ${EXPECT_MD5_SUM} ]]; then - echo "wrong md5sum got ${MD5_SUM} but expect ${EXPECT_MD5_SUM}" - exit -1 - fi - tar -zxf tiflash.tar.gz - fi - break - fi - fi - echo "fail to get ci build tiflash binary, sleep 60s" - sleep 60 -done diff --git a/release-centos7/build/upload-ci-build.sh b/release-centos7/build/upload-ci-build.sh deleted file mode 100755 index 929e8c2ccc4..00000000000 --- a/release-centos7/build/upload-ci-build.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -ueox pipefail - -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -SRCPATH=${1:-$(cd $SCRIPTPATH/../..; pwd -P)} -source ${SCRIPTPATH}/env.sh -TAR_BIN_URI="${TIFLASH_CI_BUILD_URI_PREFIX}/${PULL_ID}/tiflash.tar.gz" -COMMIT_HASH_FILE_URI="${TIFLASH_CI_BUILD_URI_PREFIX}/${PULL_ID}/commit-hash" - -build_dir="$SRCPATH/release-centos7" -cd ${build_dir} -tar -zcf tiflash.tar.gz ./tiflash -MD5_SUM=$(md5sum ./tiflash.tar.gz | awk '{ print $1 }') -echo "${CI_BUILD_INFO_PRE_FIX}" > ./commit-hash -echo "${MD5_SUM}" >> ./commit-hash -echo "${COMMIT_HASH}" >> ./commit-hash - -curl -F ${TAR_BIN_URI}=@tiflash.tar.gz http://fileserver.pingcap.net/upload -curl -F ${COMMIT_HASH_FILE_URI}=@commit-hash http://fileserver.pingcap.net/upload diff --git a/release-darwin/build/build-tiflash-release.sh b/release-darwin/build/build-tiflash-release.sh index dabea816af6..30ab2816748 100755 --- a/release-darwin/build/build-tiflash-release.sh +++ b/release-darwin/build/build-tiflash-release.sh @@ -47,7 +47,3 @@ cp -f "${SRCPATH}/libs/libtiflash-proxy/libtiflash_proxy.dylib" "$install_dir/li FILE="$install_dir/tiflash" otool -L "$FILE" - -set +e -echo "show ccache stats" -ccache -s \ No newline at end of file diff --git a/tests/docker/gtest.yaml b/tests/docker/gtest.yaml index be2edca2a68..01bb32d9290 100644 --- a/tests/docker/gtest.yaml +++ b/tests/docker/gtest.yaml @@ -3,10 +3,9 @@ version: '2.3' services: # tics-gtest container is for gtest cases tics-gtest: - image: hub.pingcap.net/tiflash/tiflash-ci-base + image: hub.pingcap.net/tiflash/tics:${TAG:-master} volumes: - ./log/tics-gtest:/tmp/tiflash/log - ..:/tests - ../docker/_env.sh:/tests/_env.sh - - ../.build/tiflash:/tiflash entrypoint: sleep infinity # just wait diff --git a/tests/docker/mock-test-dt.yaml b/tests/docker/mock-test-dt.yaml index c5770ba7db2..70e712b270b 100644 --- a/tests/docker/mock-test-dt.yaml +++ b/tests/docker/mock-test-dt.yaml @@ -3,7 +3,7 @@ version: '2.3' services: # tics0 container is for tests under mutable-test && delta-merge-test directory tics0: - image: hub.pingcap.net/tiflash/tiflash-ci-base + image: hub.pingcap.net/tiflash/tics:${TAG:-master} volumes: - ./config/tics_dt.toml:/config.toml:ro - ./config/users.toml:/users.toml:ro @@ -11,9 +11,6 @@ services: - ./log/tics_dt:/tmp/tiflash/log - ..:/tests - ../docker/_env.sh:/tests/_env.sh - - ../.build/tiflash:/tiflash - entrypoint: - - /tiflash/tiflash - - server + command: - --config-file - /config.toml diff --git a/tests/docker/mock-test-tmt.yaml b/tests/docker/mock-test-tmt.yaml index 633c4936b6f..6d240db4849 100644 --- a/tests/docker/mock-test-tmt.yaml +++ b/tests/docker/mock-test-tmt.yaml @@ -3,7 +3,7 @@ version: '2.3' services: # tics0 container is for tests under mutable-test && delta-merge-test directory tics0: - image: hub.pingcap.net/tiflash/tiflash-ci-base + image: hub.pingcap.net/tiflash/tics:${TAG:-master} volumes: - ./config/tics_tmt.toml:/config.toml:ro - ./config/users.toml:/users.toml:ro @@ -11,9 +11,6 @@ services: - ./log/tics_tmt:/tmp/tiflash/log - ..:/tests - ../docker/_env.sh:/tests/_env.sh - - ../.build/tiflash:/tiflash - entrypoint: - - /tiflash/tiflash - - server + command: - --config-file - /config.toml diff --git a/tests/docker/run.sh b/tests/docker/run.sh index c364ead9886..645d494c9b0 100644 --- a/tests/docker/run.sh +++ b/tests/docker/run.sh @@ -59,18 +59,22 @@ fi # Stop all docker instances if exist. docker-compose \ + -f gtest.yaml \ -f cluster.yaml \ - -f tiflash-tagged-image.yaml \ + -f cluster_new_collation.yaml \ + -f tiflash-dt.yaml \ + -f mock-test-dt.yaml \ + -f cluster_tidb_fail_point.yaml \ down rm -rf ./data ./log #################################### TIDB-CI ONLY #################################### # run fullstack-tests (for engine DeltaTree) -docker-compose -f cluster.yaml -f tiflash-tagged-image.yaml up -d +docker-compose -f cluster.yaml -f tiflash-dt.yaml up -d wait_env dt -docker-compose -f cluster.yaml -f tiflash-tagged-image.yaml exec -T tiflash0 bash -c 'cd /tests ; ./run-test.sh tidb-ci/fullstack-test true && ./run-test.sh tidb-ci/fullstack-test-dt' -docker-compose -f cluster.yaml -f tiflash-tagged-image.yaml down +docker-compose -f cluster.yaml -f tiflash-dt.yaml exec -T tiflash0 bash -c 'cd /tests ; ./run-test.sh tidb-ci/fullstack-test true && ./run-test.sh tidb-ci/fullstack-test-dt' +docker-compose -f cluster.yaml -f tiflash-dt.yaml down rm -rf ./data ./log [[ "$TIDB_CI_ONLY" -eq 1 ]] && exit diff --git a/tests/docker/tiflash-dt.yaml b/tests/docker/tiflash-dt.yaml index 01b5a31664d..acefd09e5cb 100644 --- a/tests/docker/tiflash-dt.yaml +++ b/tests/docker/tiflash-dt.yaml @@ -4,7 +4,7 @@ services: # for tests under fullstack-test directory # (engine DeltaTree) tiflash0: - image: hub.pingcap.net/tiflash/tiflash-ci-base + image: hub.pingcap.net/tiflash/tics:${TAG:-master} volumes: - ./config/tiflash_dt.toml:/config.toml:ro - ./config/tiflash-users.toml:/users.toml:ro @@ -17,10 +17,7 @@ services: - ./config/cipher-file-256:/cipher-file-256:ro - ./data/proxy:/data - ./log/proxy:/log - - ../.build/tiflash:/tiflash - entrypoint: - - /tiflash/tiflash - - server + command: - --config-file - /config.toml restart: on-failure diff --git a/tests/docker/tiflash-tagged-image.yaml b/tests/docker/tiflash-tagged-image.yaml deleted file mode 100644 index acefd09e5cb..00000000000 --- a/tests/docker/tiflash-tagged-image.yaml +++ /dev/null @@ -1,27 +0,0 @@ -version: '2.3' - -services: - # for tests under fullstack-test directory - # (engine DeltaTree) - tiflash0: - image: hub.pingcap.net/tiflash/tics:${TAG:-master} - volumes: - - ./config/tiflash_dt.toml:/config.toml:ro - - ./config/tiflash-users.toml:/users.toml:ro - - ./data/tiflash:/tmp/tiflash/data - - ./log/tiflash:/tmp/tiflash/log - - ..:/tests - - ../docker/_env.sh:/tests/_env.sh - - ./log/tiflash-cluster-manager:/tmp/tiflash/data/tmp - - ./config/proxy.toml:/proxy.toml:ro - - ./config/cipher-file-256:/cipher-file-256:ro - - ./data/proxy:/data - - ./log/proxy:/log - command: - - --config-file - - /config.toml - restart: on-failure - depends_on: - - "pd0" - - "tikv0" - From 0a99ffa2a1cb3f78594df33dd313030c374a9e9a Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 21:39:39 +0800 Subject: [PATCH 2/6] Revert "Fix compile error on Mac Clang 12.0.5 (#2058) (#2070)" This reverts commit 3c31b9293c36e8f43af8470b12d4bd0b952370fd. --- contrib/poco | 2 +- dbms/src/Common/UInt128.h | 9 --------- dbms/src/Core/Types.h | 4 ---- dbms/src/DataStreams/PKColumnIterator.hpp | 12 ++---------- dbms/src/Functions/GeoUtils.h | 4 ---- dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.h | 7 ------- dbms/src/Storages/Page/PageUtil.cpp | 2 -- 8 files changed, 4 insertions(+), 38 deletions(-) diff --git a/contrib/poco b/contrib/poco index 00fd749ea7b..f22d49ee709 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 00fd749ea7b244172fa173c9a8c240d5c9b513ab +Subproject commit f22d49ee709867cdd2a789cc581449ee25f42a7f diff --git a/dbms/src/Common/UInt128.h b/dbms/src/Common/UInt128.h index 1080a744e94..3481640529d 100644 --- a/dbms/src/Common/UInt128.h +++ b/dbms/src/Common/UInt128.h @@ -193,25 +193,16 @@ template <> struct is_signed static constexpr bool value = false; }; -template <> -inline constexpr bool is_signed_v = is_signed::value; - template <> struct is_unsigned { static constexpr bool value = true; }; -template <> -inline constexpr bool is_unsigned_v = is_unsigned::value; - template <> struct is_integral { static constexpr bool value = true; }; -template <> -inline constexpr bool is_integral_v = is_integral::value; - // Operator +, -, /, *, % aren't implemented so it's not an arithmetic type template <> struct is_arithmetic { diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index fdae3a60e49..da8db874e65 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -30,10 +30,6 @@ struct numeric_limits<__int128_t> #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#if defined(__clang__) -#pragma GCC diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wdeprecated-copy" -#endif #include #pragma GCC diagnostic pop diff --git a/dbms/src/DataStreams/PKColumnIterator.hpp b/dbms/src/DataStreams/PKColumnIterator.hpp index d70c228c87e..23682cc2ab0 100644 --- a/dbms/src/DataStreams/PKColumnIterator.hpp +++ b/dbms/src/DataStreams/PKColumnIterator.hpp @@ -14,7 +14,8 @@ struct PKColumnIterator : public std::iterator diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index 9ce40d558eb..d1267b18769 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -15,10 +15,6 @@ #endif #pragma GCC diagnostic ignored "-Wunused-parameter" -#if defined(__clang__) -#pragma GCC diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wdeprecated-copy" -#endif #include diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 0c3dd678022..90c0577ebc6 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -1815,7 +1815,7 @@ void DeltaMergeStore::applyAlters(const AlterCommands & commands, original_table_columns.swap(new_original_table_columns); store_columns.swap(new_store_columns); - std::atomic_store(&original_table_header, std::make_shared(toEmptyBlock(original_table_columns))); + std::atomic_store(&original_table_header, std::make_shared(toEmptyBlock(original_table_columns))); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2441c3d1429..d7d72dbc2de 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -17,17 +17,10 @@ #include #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#if defined(__clang__) -#pragma GCC diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wdeprecated-copy" -#endif #include #include #include #include -#pragma GCC diagnostic pop namespace TiDB { diff --git a/dbms/src/Storages/Page/PageUtil.cpp b/dbms/src/Storages/Page/PageUtil.cpp index 1a7ee25714f..bf33bbe4930 100644 --- a/dbms/src/Storages/Page/PageUtil.cpp +++ b/dbms/src/Storages/Page/PageUtil.cpp @@ -75,7 +75,6 @@ void writeFile(WritableFilePtr & file, UInt64 offset, char * data, size_t to_wri res = file->pwrite(data + bytes_written, to_write - bytes_written, offset + bytes_written); } -#ifndef NDEBUG // Can inject failpoint under debug mode fiu_do_on(FailPoints::force_set_page_file_write_errno, { if (enable_failpoint) @@ -84,7 +83,6 @@ void writeFile(WritableFilePtr & file, UInt64 offset, char * data, size_t to_wri errno = ENOSPC; } }); -#endif if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::PSMWriteFailed); From 213fd48524a5bb8bcb44620e8867d3b9081d03fa Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 21:39:51 +0800 Subject: [PATCH 3/6] Revert "Fix ingesting file may add ref page to deleted page (#2054) (#2055)" This reverts commit c773d6157ec7796a312639a9a0d2f8eb246fb3cb. --- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 121 +++++++++--------- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 49 +++---- dbms/src/Storages/DeltaMerge/File/DMFile.h | 9 +- .../DeltaMerge/tests/gtest_dm_file.cpp | 24 +--- 4 files changed, 81 insertions(+), 122 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 90c0577ebc6..3aa4fdfd759 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -167,8 +167,7 @@ DeltaMergeStore::DeltaMergeStore(Context & db_context, { LOG_INFO(log, "Restore DeltaMerge Store start [" << db_name << "." << table_name << "]"); - // Restore existing dm files and set capacity for path_pool. - // Should be done before any background task setup. + // restore existing dm files and set capacity for path_pool. restoreStableFiles(); original_table_columns.emplace_back(original_table_handle_define); @@ -237,13 +236,11 @@ void DeltaMergeStore::setUpBackgroundTask(const DMContextPtr & dm_context) auto dmfile_scanner = [=]() { PageStorage::PathAndIdsVec path_and_ids_vec; auto delegate = path_pool.getStableDiskDelegator(); - DMFile::ListOptions options; - options.only_list_can_gc = true; for (auto & root_path : delegate.listPaths()) { auto & path_and_ids = path_and_ids_vec.emplace_back(); path_and_ids.first = root_path; - auto file_ids_in_current_path = DMFile::listAllInPath(global_context.getFileProvider(), root_path, options); + auto file_ids_in_current_path = DMFile::listAllInPath(global_context.getFileProvider(), root_path, /* can_gc= */ true); for (auto id : file_ids_in_current_path) path_and_ids.second.insert(id); } @@ -578,35 +575,21 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, auto file_parent_path = delegate.getDTFilePath(file_id); auto file = DMFile::restore(file_provider, file_id, file_id, file_parent_path); + files.push_back(file); + rows += file->getRows(); bytes += file->getBytes(); bytes_on_disk += file->getBytesOnDisk(); - - files.emplace_back(std::move(file)); } LOG_INFO(log, - __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes << ", bytes on disk: " - << bytes_on_disk << ", region range: " << range.toDebugString() << ", clear_data: " << clear_data_in_range); + __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes + << ", bytes on disk: " << bytes_on_disk << ", region range: " << range.toDebugString()); Segments updated_segments; RowKeyRange cur_range = range; - // Put the ingest file ids into `storage_pool` and use ref id in each segments to ensure the atomic - // of ingesting. - // Check https://github.com/pingcap/tics/issues/2040 for more details. - // TODO: If tiflash crash during the middle of ingesting, we may leave some DTFiles on disk and - // they can not be deleted. We should find a way to cleanup those files. - WriteBatches ingest_wbs(storage_pool); - if (files.size() > 0) - { - for (const auto & file : files) - { - ingest_wbs.data.putExternal(file->fileId(), 0); - } - ingest_wbs.writeLogAndData(); - ingest_wbs.setRollback(); // rollback if exception thrown - } + std::vector file_used(files.size(), 0); while (!cur_range.none()) { @@ -639,19 +622,38 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, DeltaPacks packs; WriteBatches wbs(storage_pool); - for (const auto & file : files) + std::vector my_file_used = file_used; + + for (size_t index = 0; index < files.size(); ++index) { - /// Generate DMFile instance with a new ref_id pointed to the file_id. - auto file_id = file->fileId(); - auto & file_parent_path = file->parentPath(); - auto ref_id = storage_pool.newDataPageId(); - - auto ref_file = DMFile::restore(file_provider, file_id, ref_id, file_parent_path); - auto pack = std::make_shared(*dm_context, ref_file, segment_range); - if (pack->getRows() != 0) + auto & file = files[index]; + + auto file_id = file->fileId(); + /// For the first segment, we use the original file_id and DMFile instance. + /// For the rest segments, we will generate another DMFile instance with a new ref_id pointed to the file_id. + if (!my_file_used[index]) + { + auto pack = std::make_shared(*dm_context, file, segment_range); + // All rows could be filtered out by segment_range. + if (pack->getRows()) + { + packs.push_back(pack); + wbs.data.putExternal(file_id, 0); + my_file_used[index] = 1; + } + } + else { - packs.emplace_back(std::move(pack)); - wbs.data.putRefPage(ref_id, file_id); + auto & file_parent_path = file->parentPath(); + auto ref_id = storage_pool.newDataPageId(); + + auto ref_file = DMFile::restore(file_provider, file_id, ref_id, file_parent_path); + auto pack = std::make_shared(*dm_context, ref_file, segment_range); + if (pack->getRows()) + { + packs.push_back(pack); + wbs.data.putRefPage(ref_id, file_id); + } } } @@ -664,6 +666,8 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, if (ingest_success) { updated_segments.push_back(segment); + // only update `file_used` after ingest_success + file_used.swap(my_file_used); fiu_do_on(FailPoints::segment_merge_after_ingest_packs, { segment->flushCache(*dm_context); segmentMergeDelta(*dm_context, segment, false); @@ -683,32 +687,26 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, // Enable gc for DTFile after all segment applied. // Note that we can not enable gc for them once they have applied to any segments. - // Assume that one segment get compacted after file ingested, `gc_handle` gc the - // DTFiles before they get applied to all segments. Then we will apply some + // Assume that one segment (call `s0`) get compacted after file ingested, `gc_handle` + // gc the DTFiles before they get applied to all segments. Then we will apply some // deleted DTFiles to other segments. - for (const auto & file : files) - file->enableGC(); - // After the ingest DTFiles applied, remove the original page - ingest_wbs.rollbackWrittenLogAndData(); + for (size_t index = 0; index < file_used.size(); ++index) + { + auto & file = files[index]; + if (file_used[index]) + file->enableGC(); + } { // Add some logging about the ingested file ids and updated segments std::stringstream ss; - // Example: "ingest dmf_1001,1002,1003 into segment [1,3]" - // "ingest into segment [1,3]" - if (file_ids.empty()) - { - ss << "ingest "; - } - else + // "ingest dmf_1001,1002,1003 into segment [1,3,5]" + ss << "ingest dmf_"; + for (size_t i = 0; i < file_ids.size(); ++i) { - ss << "ingest dmf_"; - for (size_t i = 0; i < file_ids.size(); ++i) - { - if (i != 0) - ss << ","; - ss << file_ids[i]; - } + if (i != 0) + ss << ","; + ss << file_ids[i]; } ss << " into segment ["; for (size_t i = 0; i < updated_segments.size(); ++i) @@ -718,8 +716,7 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, ss << updated_segments[i]->segmentId(); } ss << "]"; - LOG_INFO(log, - __FUNCTION__ << " table: " << db_name << "." << table_name << ", clear_data: " << clear_data_in_range << ", " << ss.str()); + LOG_INFO(log, __FUNCTION__ << " table: " << db_name << "." << table_name << ", " << ss.str()); } GET_METRIC(dm_context->metrics, tiflash_storage_throughput_bytes, type_ingest).Increment(bytes); @@ -1832,16 +1829,12 @@ void DeltaMergeStore::restoreStableFiles() { LOG_DEBUG(log, "Loading dt files"); - DMFile::ListOptions options; - options.only_list_can_gc = false; // We need all files to restore the bytes on disk - options.clean_up = true; - auto file_provider = global_context.getFileProvider(); - auto path_delegate = path_pool.getStableDiskDelegator(); + auto path_delegate = path_pool.getStableDiskDelegator(); for (const auto & root_path : path_delegate.listPaths()) { - for (auto & file_id : DMFile::listAllInPath(file_provider, root_path, options)) + for (auto & file_id : DMFile::listAllInPath(global_context.getFileProvider(), root_path, false)) { - auto dmfile = DMFile::restore(file_provider, file_id, /* ref_id= */ 0, root_path, true); + auto dmfile = DMFile::restore(global_context.getFileProvider(), file_id, /* ref_id= */ 0, root_path, true); path_delegate.addDTFile(file_id, dmfile->getBytesOnDisk(), root_path); } } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 2fdaa7eb1b9..4d3af9f6142 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -383,8 +383,7 @@ void DMFile::finalizeForSingleFileMode(WriteBuffer & buffer) old_ngc_file.remove(); } -std::set -DMFile::listAllInPath(const FileProviderPtr & file_provider, const String & parent_path, const DMFile::ListOptions & options) +std::set DMFile::listAllInPath(const FileProviderPtr & file_provider, const String & parent_path, bool can_gc) { Poco::File folder(parent_path); if (!folder.exists()) @@ -407,40 +406,24 @@ DMFile::listAllInPath(const FileProviderPtr & file_provider, const String & pare for (const auto & name : file_names) { - // Clean up temporary files and files should be deleted - // Note that you should not do clean up if some DTFiles are writing, - // or you may delete some writing files - if (options.clean_up) + + // clear deleted (maybe broken) DMFiles + if (startsWith(name, details::FOLDER_PREFIX_DROPPED)) { - if (startsWith(name, details::FOLDER_PREFIX_WRITABLE)) + auto res = try_parse_file_id(name); + if (!res) { - // Clear temporary files - const auto full_path = parent_path + "/" + name; - if (Poco::File temp_file(full_path); temp_file.exists()) - temp_file.remove(true); - LOG_WARNING(log, __PRETTY_FUNCTION__ << ": Existing temporary dmfile, removed: " << full_path); - continue; - } - else if (startsWith(name, details::FOLDER_PREFIX_DROPPED)) - { - // Clear deleted (maybe broken) DTFiles - auto res = try_parse_file_id(name); - if (!res) - { - LOG_INFO(log, "Unrecognized dropped DM file, ignored: " + name); - continue; - } - UInt64 file_id = *res; - // The encryption info use readable path. We are not sure the encryption info is deleted or not. - // Try to delete and ignore if it is already deleted. - const String readable_path = getPathByStatus(parent_path, file_id, DMFile::Status::READABLE); - file_provider->deleteEncryptionInfo(EncryptionPath(readable_path, ""), /* throw_on_error= */ false); - const auto full_path = parent_path + "/" + name; - if (Poco::File del_file(full_path); del_file.exists()) - del_file.remove(true); - LOG_WARNING(log, __PRETTY_FUNCTION__ << ": Existing dropped dmfile, removed: " << full_path); + LOG_INFO(log, "Unrecognized dropped DM file, ignored: " + name); continue; } + UInt64 file_id = *res; + // The encryption info use readable path. We are not sure the encryption info is deleted or not. + // Try to delete and ignore if it is already deleted. + const String readable_path = getPathByStatus(parent_path, file_id, DMFile::Status::READABLE); + file_provider->deleteEncryptionInfo(EncryptionPath(readable_path, ""), /* throw_on_error= */ false); + if (Poco::File del_file(parent_path + "/" + name); del_file.exists()) + del_file.remove(true); + continue; } if (!startsWith(name, details::FOLDER_PREFIX_READABLE)) @@ -456,7 +439,7 @@ DMFile::listAllInPath(const FileProviderPtr & file_provider, const String & pare } UInt64 file_id = *res; - if (options.only_list_can_gc) + if (can_gc) { // Only return the ID if the file is able to be GC-ed. const auto file_path = parent_path + "/" + name; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 6051d8e4153..d09815b4505 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -108,14 +108,7 @@ class DMFile : private boost::noncopyable static DMFilePtr restore(const FileProviderPtr & file_provider, UInt64 file_id, UInt64 ref_id, const String & parent_path, bool read_meta = true); - struct ListOptions - { - // Only return the DTFiles id list that can be GC - bool only_list_can_gc = true; - // Try to clean up temporary / dropped files - bool clean_up = false; - }; - static std::set listAllInPath(const FileProviderPtr & file_provider, const String & parent_path, const ListOptions & options); + static std::set listAllInPath(const FileProviderPtr & file_provider, const String & parent_path, bool can_gc); // static helper function for getting path static String getPathByStatus(const String & parent_path, UInt64 file_id, DMFile::Status status); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index eeac2418300..065af0a5450 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -252,9 +252,7 @@ try dm_file = DMFile::create(id, parent_path, single_file_mode); // Right after created, the fil is not abled to GC and it is ignored by `listAllInPath` EXPECT_FALSE(dm_file->canGC()); - DMFile::ListOptions options; - options.only_list_can_gc = true; - auto scanIds = DMFile::listAllInPath(file_provider, parent_path, options); + auto scanIds = DMFile::listAllInPath(file_provider, parent_path, /*can_gc=*/true); ASSERT_TRUE(scanIds.empty()); { @@ -272,24 +270,20 @@ try // The file remains not able to GC ASSERT_FALSE(dm_file->canGC()); - options.only_list_can_gc = false; // Now the file can be scaned - scanIds = DMFile::listAllInPath(file_provider, parent_path, options); + scanIds = DMFile::listAllInPath(file_provider, parent_path, /*can_gc=*/false); ASSERT_EQ(scanIds.size(), 1UL); EXPECT_EQ(*scanIds.begin(), id); - options.only_list_can_gc = true; - scanIds = DMFile::listAllInPath(file_provider, parent_path, options); + scanIds = DMFile::listAllInPath(file_provider, parent_path, /*can_gc=*/true); EXPECT_TRUE(scanIds.empty()); // After enable GC, the file can be scaned with `can_gc=true` dm_file->enableGC(); ASSERT_TRUE(dm_file->canGC()); - options.only_list_can_gc = false; - scanIds = DMFile::listAllInPath(file_provider, parent_path, options); + scanIds = DMFile::listAllInPath(file_provider, parent_path, /*can_gc=*/false); ASSERT_EQ(scanIds.size(), 1UL); EXPECT_EQ(*scanIds.begin(), id); - options.only_list_can_gc = true; - scanIds = DMFile::listAllInPath(file_provider, parent_path, options); + scanIds = DMFile::listAllInPath(file_provider, parent_path, /*can_gc=*/true); ASSERT_EQ(scanIds.size(), 1UL); EXPECT_EQ(*scanIds.begin(), id); } @@ -361,9 +355,7 @@ try } // The broken file is ignored - DMFile::ListOptions options; - options.only_list_can_gc = true; - auto res = DMFile::listAllInPath(file_provider, parent_path, options); + auto res = DMFile::listAllInPath(file_provider, parent_path, true); EXPECT_TRUE(res.empty()); } CATCH @@ -431,9 +423,7 @@ try } // The broken file is ignored - DMFile::ListOptions options; - options.only_list_can_gc = true; - auto res = DMFile::listAllInPath(file_provider, parent_path, options); + auto res = DMFile::listAllInPath(file_provider, parent_path, true); EXPECT_TRUE(res.empty()); } CATCH From 31952b738085b5ae430934a5133ce637a0659cb0 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 21:40:44 +0800 Subject: [PATCH 4/6] Revert "Fix bug for unexpected deleted ingest file (#2047) (#2048)" This reverts commit b329618062c63f7794e1e20d95dd49aadda87778. --- dbms/src/Common/FailPoint.cpp | 4 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 71 +++------ .../src/Storages/DeltaMerge/DeltaMergeStore.h | 20 +-- .../SSTFilesToDTFilesOutputStream.cpp | 4 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 1 + dbms/src/Storages/DeltaMerge/StoragePool.cpp | 10 +- .../tests/gtest_dm_delta_merge_store.cpp | 144 +----------------- 7 files changed, 40 insertions(+), 214 deletions(-) diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index 73058499b56..c1958b2e96e 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -40,9 +40,7 @@ std::unordered_map> FailPointHelper::f M(exception_during_write_to_storage) \ M(force_set_sst_to_dtfile_block_size) \ M(force_set_sst_decode_rand) \ - M(exception_before_page_file_write_sync) \ - M(force_set_segment_ingest_packs_fail) \ - M(segment_merge_after_ingest_packs) + M(exception_before_page_file_write_sync) #define APPLY_FOR_FAILPOINTS(M) M(force_set_page_file_write_errno) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 3aa4fdfd759..24435335454 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -66,8 +66,6 @@ extern const char pause_when_ingesting_to_dt_store[]; extern const char pause_when_altering_dt_store[]; extern const char force_triggle_background_merge_delta[]; extern const char force_triggle_foreground_flush[]; -extern const char force_set_segment_ingest_packs_fail[]; -extern const char segment_merge_after_ingest_packs[]; } // namespace FailPoints namespace DM @@ -546,10 +544,10 @@ void DeltaMergeStore::preIngestFile(const String & parent_path, const PageId fil delegator.addDTFile(file_id, file_size, parent_path); } -void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, - const RowKeyRange & range, - const std::vector & file_ids, - bool clear_data_in_range) +void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range) { if (unlikely(shutdown_called.load(std::memory_order_relaxed))) { @@ -560,6 +558,8 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, throw Exception(msg); } + LOG_INFO(log, __FUNCTION__ << " table: " << db_name << "." << table_name << ", region range:" << range.toDebugString()); + EventRecorder write_block_recorder(ProfileEvents::DMWriteFile, ProfileEvents::DMWriteFileNS); auto delegate = dm_context->path_pool.getStableDiskDelegator(); @@ -582,9 +582,9 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, bytes_on_disk += file->getBytesOnDisk(); } - LOG_INFO(log, - __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes - << ", bytes on disk: " << bytes_on_disk << ", region range: " << range.toDebugString()); + LOG_DEBUG(log, + __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes << ", bytes on disk" + << bytes_on_disk); Segments updated_segments; RowKeyRange cur_range = range; @@ -661,18 +661,17 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, // they are visible for readers who require file_ids to be found in PageStorage. wbs.writeLogAndData(); - bool ingest_success = segment->ingestPacks(*dm_context, range.shrink(segment_range), packs, clear_data_in_range); - fiu_do_on(FailPoints::force_set_segment_ingest_packs_fail, { ingest_success = false; }); - if (ingest_success) + if (segment->ingestPacks(*dm_context, range.shrink(segment_range), packs, clear_data_in_range)) { updated_segments.push_back(segment); - // only update `file_used` after ingest_success + // Enable gc for DTFile once it has been committed. + for (size_t index = 0; index < my_file_used.size(); ++index) + { + auto & file = files[index]; + if (my_file_used[index]) + file->enableGC(); + } file_used.swap(my_file_used); - fiu_do_on(FailPoints::segment_merge_after_ingest_packs, { - segment->flushCache(*dm_context); - segmentMergeDelta(*dm_context, segment, false); - storage_pool.gc(global_context.getSettingsRef(), StoragePool::Seconds(0)); - }); break; } else @@ -685,40 +684,6 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, cur_range.setEnd(range.end); } - // Enable gc for DTFile after all segment applied. - // Note that we can not enable gc for them once they have applied to any segments. - // Assume that one segment (call `s0`) get compacted after file ingested, `gc_handle` - // gc the DTFiles before they get applied to all segments. Then we will apply some - // deleted DTFiles to other segments. - for (size_t index = 0; index < file_used.size(); ++index) - { - auto & file = files[index]; - if (file_used[index]) - file->enableGC(); - } - - { - // Add some logging about the ingested file ids and updated segments - std::stringstream ss; - // "ingest dmf_1001,1002,1003 into segment [1,3,5]" - ss << "ingest dmf_"; - for (size_t i = 0; i < file_ids.size(); ++i) - { - if (i != 0) - ss << ","; - ss << file_ids[i]; - } - ss << " into segment ["; - for (size_t i = 0; i < updated_segments.size(); ++i) - { - if (i != 0) - ss << ","; - ss << updated_segments[i]->segmentId(); - } - ss << "]"; - LOG_INFO(log, __FUNCTION__ << " table: " << db_name << "." << table_name << ", " << ss.str()); - } - GET_METRIC(dm_context->metrics, tiflash_storage_throughput_bytes, type_ingest).Increment(bytes); GET_METRIC(dm_context->metrics, tiflash_storage_throughput_rows, type_ingest).Increment(rows); @@ -1303,7 +1268,7 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) /* ignore_cache= */ false, global_context.getSettingsRef().safe_point_update_interval_seconds); - LOG_DEBUG(log, "Task " << toString(task.type) << " GC safe point: " << safe_point); + LOG_DEBUG(log, "Task" << toString(task.type) << " GC safe point: " << safe_point); // Foreground task don't get GC safe point from remote, but we better make it as up to date as possible. latest_gc_safe_point = safe_point; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 4438bdfe691..5cd212a00fb 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -270,16 +270,16 @@ class DeltaMergeStore : private boost::noncopyable void preIngestFile(const String & parent_path, const PageId file_id, size_t file_size); - void ingestFiles(const DMContextPtr & dm_context, // - const RowKeyRange & range, - const std::vector & file_ids, - bool clear_data_in_range); - - void ingestFiles(const Context & db_context, // - const DB::Settings & db_settings, - const RowKeyRange & range, - const std::vector & file_ids, - bool clear_data_in_range) + void ingestFiles(const DMContextPtr & dm_context, // + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range); + + void ingestFiles(const Context & db_context, // + const DB::Settings & db_settings, + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range) { auto dm_context = newDMContext(db_context, db_settings); return ingestFiles(dm_context, range, file_ids, clear_data_in_range); diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp index 41fd80e6cad..be0c61d6523 100644 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp @@ -120,9 +120,7 @@ bool SSTFilesToDTFilesOutputStream::newDTFileStream() return false; } auto dt_file = DMFile::create(file_id, parent_path, flags.isSingleFile()); - LOG_INFO(log, - "Create file for snapshot data " << child->getRegion()->toString(true) << " [file=" << dt_file->path() - << "] [single_file_mode=" << flags.isSingleFile() << "]"); + LOG_INFO(log, "Create file for snapshot data [file=" << dt_file->path() << "] [single_file_mode=" << flags.isSingleFile() << "]"); dt_stream = std::make_unique(tmt.getContext(), dt_file, *schema_snap, flags); dt_stream->writePrefix(); ingest_files.emplace_back(dt_file); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 55471b8fb7f..90c164a96f1 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -1,3 +1,4 @@ + #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index f7ab57d29f0..46e653379dc 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -105,22 +105,22 @@ bool StoragePool::gc(const Settings & /*settings*/, const Seconds & try_gc_perio last_try_gc_time = now; } - bool done_anything = false; + bool ok = false; // FIXME: The global_context.settings is mutable, we need a way to reload thses settings. // auto config = extractConfig(settings, StorageType::Meta); // meta_storage.reloadSettings(config); - done_anything |= meta_storage.gc(); + ok |= meta_storage.gc(); // config = extractConfig(settings, StorageType::Data); // data_storage.reloadSettings(config); - done_anything |= data_storage.gc(); + ok |= data_storage.gc(); // config = extractConfig(settings, StorageType::Log); // log_storage.reloadSettings(config); - done_anything |= log_storage.gc(); + ok |= log_storage.gc(); - return done_anything; + return ok; } } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 87d526434ed..371da5ee020 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -27,9 +27,6 @@ extern const char pause_before_dt_background_delta_merge[]; extern const char pause_until_dt_background_delta_merge[]; extern const char force_triggle_background_merge_delta[]; extern const char force_triggle_foreground_flush[]; -extern const char force_set_segment_ingest_packs_fail[]; -extern const char segment_merge_after_ingest_packs[]; -extern const char force_set_segment_physical_split[]; } // namespace FailPoints namespace DM @@ -876,7 +873,7 @@ try { // Prepare DTFiles for ingesting - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); + auto dm_context = store->newDMContext(*context, context->getSettingsRef()); auto [range1, file_ids1] = genDMFile(*dm_context, DMTestEnv::prepareSimpleWriteBlock(32, 48, false, tso2)); auto [range2, file_ids2] = genDMFile(*dm_context, DMTestEnv::prepareSimpleWriteBlock(80, 256, false, tso3)); @@ -1038,139 +1035,6 @@ try } CATCH -TEST_P(DeltaMergeStore_test, IngestWithFail) -try -{ - if (mode == TestMode::V1_BlockOnly) - return; - - const UInt64 tso1 = 4; - const size_t num_rows_before_ingest = 128; - // Write to store [0, 128) - { - Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_before_ingest, false, tso1); - store->write(*context, context->getSettingsRef(), std::move(block)); - - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); - store->flushCache(dm_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); - - SegmentPtr seg; - std::tie(std::ignore, seg) = *store->segments.begin(); - store->segmentSplit(*dm_context, seg, /*is_foreground*/ true); - } - - const UInt64 tso2 = 10; - - { - // Prepare DTFiles for ingesting - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); - auto [ingest_range, file_ids] = genDMFile(*dm_context, DMTestEnv::prepareSimpleWriteBlock(32, 128, false, tso2)); - // Enable failpoint for testing - FailPointHelper::enableFailPoint(FailPoints::force_set_segment_ingest_packs_fail); - FailPointHelper::enableFailPoint(FailPoints::segment_merge_after_ingest_packs); - store->ingestFiles(dm_context, ingest_range, file_ids, /*clear_data_in_range*/ true); - } - - - // After ingesting, the data in [32, 128) should be overwrite by the data in ingested files. - { - // Read all data <= tso1 - // We can only get [0, 32) with tso1 - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; - } - - { - // Read all data between [tso, tso2) - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso2 - 1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "Data [32, 128) after ingest with tso less than: " << tso2 - << " are erased, should only get [0, 32)"; - } - - { - // Read all data between [tso2, tso3) - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ std::numeric_limits::max(), - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32 + 128 - 32) << "The rows number after ingest is not match"; - } -} -CATCH - TEST_P(DeltaMergeStore_test, IngestEmptyFileLists) try { @@ -1188,10 +1052,10 @@ try // Test that if we ingest a empty file list, the data in range will be removed. // The ingest range is [32, 256) { - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); + auto dm_context = store->newDMContext(*context, context->getSettingsRef()); - std::vector file_ids; - auto ingest_range = RowKeyRange::fromHandleRange(HandleRange{32, 256}); + std::vector file_ids ; + auto ingest_range = RowKeyRange::fromHandleRange(HandleRange{32, 256}); store->ingestFiles(dm_context, ingest_range, file_ids, /*clear_data_in_range*/ true); } From cb448defa2f6765f0ff77f6cf72c5bca8e041e74 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 22:37:32 +0800 Subject: [PATCH 5/6] Revert "Pre-handle SSTFiles to DTFiles when applying snapshots (#1439) (#1867)" This reverts commit c947fd65b9e306c9d8a5698ca09d159508fb21c3. Conflicts: dbms/src/Common/FailPoint.cpp dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h dbms/src/Storages/DeltaMerge/File/DMFileWriter.h dbms/src/Storages/DeltaMerge/ReorganizeBlockInputStream.h dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp dbms/src/Storages/DeltaMerge/Segment.cpp dbms/src/Storages/DeltaMerge/StableValueSpace.cpp dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp dbms/src/Storages/StorageDeltaMerge.cpp dbms/src/Storages/StorageDeltaMerge.h dbms/src/Storages/Transaction/ApplySnapshot.cpp dbms/src/Storages/Transaction/PartitionStreams.cpp tests/delta-merge-test/raft/schema/drop_on_restart.test --- .github/pull_request_template.md | 10 +- dbms/src/Common/ErrorCodes.cpp | 1 - dbms/src/Common/FailPoint.cpp | 8 +- dbms/src/Common/TiFlashMetrics.h | 4 +- .../AggregatingBlockInputStream.cpp | 1 + .../DataStreams/BlocksListBlockInputStream.h | 4 +- dbms/src/DataStreams/IBlockInputStream.h | 2 +- .../ParallelAggregatingBlockInputStream.cpp | 1 + .../tests/gtest_data_type_get_common_type.cpp | 5 + dbms/src/Debug/DBGInvoker.cpp | 6 +- dbms/src/Debug/dbgFuncCoprocessor.cpp | 7 +- dbms/src/Debug/dbgFuncCoprocessor.h | 5 - dbms/src/Debug/dbgFuncMockRaftCommand.h | 10 - dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp | 127 +----- dbms/src/Debug/dbgFuncMockTiDBTable.h | 2 +- dbms/src/Debug/dbgFuncRegion.cpp | 4 +- dbms/src/Debug/dbgTools.cpp | 2 +- .../tests/gtest_arithmetic_functions.cpp | 1 + .../tests/gtest_datetime_extract.cpp | 1 + .../src/Functions/tests/gtest_strings_pad.cpp | 1 + .../Functions/tests/gtest_strings_trim.cpp | 1 + dbms/src/Server/RaftConfigParser.cpp | 45 +-- dbms/src/Server/RaftConfigParser.h | 2 - dbms/src/Server/Server.cpp | 5 - .../DMVersionFilterBlockInputStream.cpp | 19 +- .../DMVersionFilterBlockInputStream.h | 3 - .../Storages/DeltaMerge/Delta/DeltaPack.cpp | 9 +- .../src/Storages/DeltaMerge/Delta/DeltaPack.h | 2 +- .../DeltaMerge/Delta/DeltaPackBlock.cpp | 6 +- .../DeltaMerge/Delta/DeltaPackBlock.h | 2 +- .../DeltaMerge/Delta/DeltaPackFile.cpp | 45 +-- .../Storages/DeltaMerge/Delta/DeltaPackFile.h | 3 +- .../DeltaMerge/Delta/DeltaPack_V3.cpp | 8 +- .../DeltaMerge/Delta/DeltaValueSpace.cpp | 15 +- .../DeltaMerge/Delta/DeltaValueSpace.h | 2 +- dbms/src/Storages/DeltaMerge/Delta/Pack.cpp | 310 ++++++++++++++ dbms/src/Storages/DeltaMerge/Delta/Pack.h | 41 ++ .../Storages/DeltaMerge/Delta/Snapshot.cpp | 2 +- dbms/src/Storages/DeltaMerge/DeltaMerge.h | 2 +- .../Storages/DeltaMerge/DeltaMergeHelpers.h | 2 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 97 ++--- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 47 +-- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 2 +- .../DeltaMerge/File/DMFileBlockOutputStream.h | 32 +- .../DeltaMerge/File/DMFilePackFilter.h | 14 +- .../Storages/DeltaMerge/File/DMFileWriter.cpp | 61 +-- .../Storages/DeltaMerge/File/DMFileWriter.h | 67 +-- ...tStream.h => ReorganizeBlockInputStream.h} | 88 ++-- .../DeltaMerge/SSTFilesToBlockInputStream.cpp | 247 ------------ .../DeltaMerge/SSTFilesToBlockInputStream.h | 137 ------- .../SSTFilesToDTFilesOutputStream.cpp | 212 ---------- .../SSTFilesToDTFilesOutputStream.h | 91 ----- dbms/src/Storages/DeltaMerge/Segment.cpp | 44 +- dbms/src/Storages/DeltaMerge/Segment.h | 9 +- dbms/src/Storages/DeltaMerge/StoragePool.h | 2 +- .../DeltaMerge/tests/dm_basic_include.h | 43 +- .../DeltaMerge/tests/gtest_data_streams.cpp | 93 ----- .../tests/gtest_dm_delta_index_manager.cpp | 1 + .../tests/gtest_dm_delta_merge_store.cpp | 380 +++--------------- .../DeltaMerge/tests/gtest_dm_file.cpp | 19 - .../tests/gtest_dm_minmax_index.cpp | 2 +- .../DeltaMerge/tests/gtest_dm_region.cpp | 4 +- .../DeltaMerge/tests/gtest_dm_segment.cpp | 21 +- .../tests/gtest_dm_storage_delta_merge.cpp | 2 +- dbms/src/Storages/FormatVersion.h | 4 +- dbms/src/Storages/Page/PageStorage.h | 4 +- .../Page/tests/gtest_page_map_version_set.cpp | 3 + dbms/src/Storages/StorageDeltaMerge.cpp | 12 +- dbms/src/Storages/StorageDeltaMerge.h | 4 - .../Storages/Transaction/ApplySnapshot.cpp | 313 +++------------ dbms/src/Storages/Transaction/KVStore.cpp | 7 +- dbms/src/Storages/Transaction/KVStore.h | 33 +- .../Storages/Transaction/PartitionStreams.cpp | 164 +------- dbms/src/Storages/Transaction/ProxyFFI.cpp | 72 +--- dbms/src/Storages/Transaction/ProxyFFI.h | 3 +- dbms/src/Storages/Transaction/Region.cpp | 35 +- dbms/src/Storages/Transaction/Region.h | 8 +- dbms/src/Storages/Transaction/RegionData.cpp | 15 +- dbms/src/Storages/Transaction/RegionData.h | 1 - dbms/src/Storages/Transaction/RegionTable.h | 22 - dbms/src/Storages/Transaction/RowCodec.cpp | 13 +- .../Storages/Transaction/StorageEngineType.h | 27 -- dbms/src/Storages/Transaction/TMTContext.cpp | 2 +- .../Transaction/tests/gtest_table_info.cpp | 1 + .../Transaction/tests/gtest_type_mapping.cpp | 1 + dbms/src/TestUtils/TiFlashTestBasic.h | 1 - dbms/src/TestUtils/gtests_dbms_main.cpp | 1 + metrics/grafana/tiflash_summary.json | 82 +--- tests/delta-merge-test/raft/ingest_sst.test | 3 +- .../raft/snapshot_dtfile.test | 168 -------- tests/docker/config/tics_tmt.toml | 2 - tests/docker/config/tiflash_dt.toml | 5 - 92 files changed, 796 insertions(+), 2651 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/Delta/Pack.cpp create mode 100644 dbms/src/Storages/DeltaMerge/Delta/Pack.h rename dbms/src/Storages/DeltaMerge/{PKSquashingBlockInputStream.h => ReorganizeBlockInputStream.h} (60%) delete mode 100644 dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp delete mode 100644 dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h delete mode 100644 dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp delete mode 100644 dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h delete mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp delete mode 100644 tests/delta-merge-test/raft/snapshot_dtfile.test diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 923505d93fa..747ff1d91cc 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,12 +26,10 @@ Tests Side effects - +# - Performance regression +# - Consumes more CPU +# - Consumes more MEM +# - Breaking backward compatibility ### Release note diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index a253d2cf3f6..4a209cbb464 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -390,7 +390,6 @@ namespace ErrorCodes extern const int PAGE_SIZE_NOT_MATCH = 9006; extern const int ILLFORMED_PAGE_NAME = 9007; extern const int ILLFORMAT_RAFT_ROW = 9008; - extern const int REGION_DATA_SCHEMA_UPDATED = 9009; extern const int LOCK_EXCEPTION = 10000; extern const int VERSION_ERROR = 10001; diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index c1958b2e96e..d3905e0d3b2 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -40,9 +40,13 @@ std::unordered_map> FailPointHelper::f M(exception_during_write_to_storage) \ M(force_set_sst_to_dtfile_block_size) \ M(force_set_sst_decode_rand) \ - M(exception_before_page_file_write_sync) + M(exception_before_page_file_write_sync) \ + M(force_set_segment_ingest_packs_fail) \ + M(segment_merge_after_ingest_packs) -#define APPLY_FOR_FAILPOINTS(M) M(force_set_page_file_write_errno) +#define APPLY_FOR_FAILPOINTS(M) \ + M(force_set_page_file_write_errno) \ + M(minimum_block_size_for_cross_join) #define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ M(pause_after_learner_read) \ diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 3530fae4009..fb6441db84b 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -84,7 +84,7 @@ namespace DB M(tiflash_storage_write_amplification, "The data write amplification in storage engine", Gauge) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ M(tiflash_storage_command_count, "Total number of storage's command, such as delete range / shutdown /startup", Counter, \ - F(type_delete_range, {"type", "delete_range"}), F(type_ingest, {"type", "ingest"})) \ + F(type_delete_range, {"type", "delete_range"})) \ M(tiflash_storage_subtask_count, "Total number of storage's sub task", Counter, F(type_delta_merge, {"type", "delta_merge"}), \ F(type_delta_merge_fg, {"type", "delta_merge_fg"}), F(type_delta_compact, {"type", "delta_compact"}), \ F(type_delta_flush, {"type", "delta_flush"}),F(type_seg_split, {"type", "seg_split"}), F(type_seg_merge, {"type", "seg_merge"}), \ @@ -99,13 +99,11 @@ namespace DB F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.0005, 2, 20})) \ M(tiflash_storage_throughput_bytes, "Calculate the throughput of tasks of storage in bytes", Gauge, /**/ \ F(type_write, {"type", "write"}), /**/ \ - F(type_ingest, {"type", "ingest"}), /**/ \ F(type_delta_merge, {"type", "delta_merge"}), /**/ \ F(type_split, {"type", "split"}), /**/ \ F(type_merge, {"type", "merge"})) /**/ \ M(tiflash_storage_throughput_rows, "Calculate the throughput of tasks of storage in rows", Gauge, /**/ \ F(type_write, {"type", "write"}), /**/ \ - F(type_ingest, {"type", "ingest"}), /**/ \ F(type_delta_merge, {"type", "delta_merge"}), /**/ \ F(type_split, {"type", "split"}), /**/ \ F(type_merge, {"type", "merge"})) /**/ \ diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 94f9df88dc9..2eb8fec6762 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/dbms/src/DataStreams/BlocksListBlockInputStream.h b/dbms/src/DataStreams/BlocksListBlockInputStream.h index 22751ac22de..df7feaacda2 100644 --- a/dbms/src/DataStreams/BlocksListBlockInputStream.h +++ b/dbms/src/DataStreams/BlocksListBlockInputStream.h @@ -17,7 +17,7 @@ class BlocksListBlockInputStream : public IProfilingBlockInputStream : list(std::move(list_)), it(list.begin()), end(list.end()) {} /// Uses a list of blocks lying somewhere else. - BlocksListBlockInputStream(const BlocksList::iterator & begin_, const BlocksList::iterator & end_) + BlocksListBlockInputStream(BlocksList::iterator & begin_, BlocksList::iterator & end_) : it(begin_), end(end_) {} String getName() const override { return "BlocksList"; } @@ -27,7 +27,7 @@ class BlocksListBlockInputStream : public IProfilingBlockInputStream Block res; if (!list.empty()) for (const auto & elem : list.front()) - res.insert({elem.column->cloneEmpty(), elem.type, elem.name, elem.column_id}); + res.insert({ elem.column->cloneEmpty(), elem.type, elem.name }); return res; } diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index e51dba62f41..0bcfd52b13a 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -121,7 +121,7 @@ class IBlockInputStream : private boost::noncopyable protected: BlockInputStreams children; - mutable std::shared_mutex children_mutex; + std::shared_mutex children_mutex; private: TableLockHolders table_locks; diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 51f242fa2ab..a21e1197028 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index 715e18b419f..53a82de0a53 100644 --- a/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -6,6 +6,11 @@ #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#include +#pragma GCC diagnostic pop + namespace DB { namespace tests diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index d2ddf57c93c..7b00fd5ad34 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -85,10 +85,8 @@ DBGInvoker::DBGInvoker() regSchemalessFunc("region_snapshot", MockRaftCommand::dbgFuncRegionSnapshot); regSchemalessFunc("region_snapshot_data", MockRaftCommand::dbgFuncRegionSnapshotWithData); - regSchemalessFunc("region_snapshot_pre_handle_block", /**/ MockRaftCommand::dbgFuncRegionSnapshotPreHandleBlock); - regSchemalessFunc("region_snapshot_apply_block", /* */ MockRaftCommand::dbgFuncRegionSnapshotApplyBlock); - regSchemalessFunc("region_snapshot_pre_handle_file", /* */ MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles); - regSchemalessFunc("region_snapshot_apply_file", /* */ MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles); + regSchemalessFunc("region_snapshot_pre_handle_block", MockRaftCommand::dbgFuncRegionSnapshotPreHandleBlock); + regSchemalessFunc("region_snapshot_apply_block", MockRaftCommand::dbgFuncRegionSnapshotApplyBlock); regSchemalessFunc("region_ingest_sst", MockRaftCommand::dbgFuncIngestSST); regSchemalessFunc("init_fail_point", DbgFailPointFunc::dbgInitFailPoint); diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index d5276a372c8..43563f8aa1d 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -59,12 +59,7 @@ static const String MPP_QUERY = "mpp_query"; static const String USE_BROADCAST_JOIN = "use_broadcast_join"; static const String MPP_PARTITION_NUM = "mpp_partition_num"; static const String MPP_TIMEOUT = "mpp_timeout"; -static String LOCAL_HOST = "127.0.0.1:3930"; - -namespace Debug -{ -void setServiceAddr(const std::string & addr) { LOCAL_HOST = addr; } -} // namespace Debug +static const String LOCAL_HOST = "127.0.0.1:3930"; struct DAGProperties { diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index e04a280df16..6781f0254a8 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -20,9 +20,4 @@ BlockInputStreamPtr dbgFuncTiDBQuery(Context & context, const ASTs & args); // ./storages-client.sh "DBGInvoke mock_dag(query, region_id[, start_ts])" BlockInputStreamPtr dbgFuncMockTiDBQuery(Context & context, const ASTs & args); -namespace Debug -{ -void setServiceAddr(const std::string & addr); -} - } // namespace DB diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.h b/dbms/src/Debug/dbgFuncMockRaftCommand.h index 7a3dd8aa1d2..27912cecf4d 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.h +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.h @@ -57,16 +57,6 @@ struct MockRaftCommand // Usage: // ./storages-client.sh "DBGInvoke region_snapshot_apply_block(region_id)" static void dbgFuncRegionSnapshotApplyBlock(Context & context, const ASTs & args, DBGInvoker::Printer output); - - // Simulate a region pre-handle snapshot data to DTFiles - // Usage: - // ./storage-client.sh "DBGInvoke region_snapshot_pre_handle_file(database_name, table_name, region_id, start, end, schema_string, pk_name[, test-fields=1, cfs="write,default"])" - static void dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, const ASTs & args, DBGInvoker::Printer output); - - // Apply snapshot for a region. (apply a pre-handle snapshot) - // Usage: - // ./storages-client.sh "DBGInvoke region_snapshot_apply_file(region_id)" - static void dbgFuncRegionSnapshotApplyDTFiles(Context & context, const ASTs & args, DBGInvoker::Printer output); }; } // namespace DB diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 1757720621b..61fe36801f9 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -29,9 +29,8 @@ namespace DB namespace FailPoints { -extern const char force_set_sst_to_dtfile_block_size[]; -extern const char force_set_sst_decode_rand[]; -} // namespace FailPoints +extern const char force_set_prehandle_dtfile_block_size[]; +} namespace ErrorCodes { @@ -144,7 +143,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotWithData(Context & context, const AST // Mock to apply a snapshot with data in `region` auto & tmt = context.getTMTContext(); - context.getTMTContext().getKVStore()->checkAndApplySnapshot(region, tmt); + context.getTMTContext().getKVStore()->checkAndApplySnapshot(region, tmt); std::stringstream ss; ss << "put region #" << region_id << ", range" << range_string << " to table #" << table_id << " with " << cnt << " records"; output(ss.str()); @@ -403,7 +402,6 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG auto & kvstore = tmt.getKVStore(); auto region = kvstore->getRegion(region_id); - FailPointHelper::enableFailPoint(FailPoints::force_set_sst_decode_rand); // Register some mock SST reading methods so that we can decode data in `MockSSTReader::MockSSTData` RegionMockTest mock_test(kvstore, region); @@ -519,7 +517,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A RegionID region_id = (RegionID)safeGet(typeid_cast(*args.front()).value); auto [region, block_cache] = GLOBAL_REGION_MAP.popRegionCache("__snap_" + std::to_string(region_id)); auto & tmt = context.getTMTContext(); - context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); + context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); std::stringstream ss; ss << "success apply " << region->id() << " with block cache"; @@ -527,121 +525,4 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A } -/// Mock to pre-decode snapshot to DTFile(s) then apply - -// Simulate a region pre-handle snapshot data to DTFiles -// ./storage-client.sh "DBGInvoke region_snapshot_pre_handle_file(database_name, table_name, region_id, start, end, schema_string, pk_name[, test-fields=1, cfs="write,default"])" -void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, const ASTs & args, DBGInvoker::Printer output) -{ - if (args.size() < 7 || args.size() > 9) - throw Exception("Args not matched, should be: database_name, table_name, region_id, start, end, schema_string, pk_name" - " [, test-fields, cfs=\"write,default\"]", - ErrorCodes::BAD_ARGUMENTS); - - const String & database_name = typeid_cast(*args[0]).name; - const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[2]).value); - RegionID start_handle = (RegionID)safeGet(typeid_cast(*args[3]).value); - RegionID end_handle = (RegionID)safeGet(typeid_cast(*args[4]).value); - - const String schema_str = safeGet(typeid_cast(*args[5]).value); - String handle_pk_name = safeGet(typeid_cast(*args[6]).value); - - UInt64 test_fields = 1; - if (args.size() > 7) - test_fields = (UInt64)safeGet(typeid_cast(*args[7]).value); - std::unordered_set cfs; - { - String cfs_str = "write,default"; - if (args.size() > 8) - cfs_str = safeGet(typeid_cast(*args[8]).value); - if (cfs_str.find("write") != std::string::npos) - cfs.insert(ColumnFamilyType::Write); - if (cfs_str.find("default") != std::string::npos) - cfs.insert(ColumnFamilyType::Default); - } - - // Parse a TableInfo from `schema_str` to generate data with this schema - TiDB::TableInfoPtr mocked_table_info; - { - ASTPtr columns_ast; - ParserColumnDeclarationList schema_parser; - Tokens tokens(schema_str.data(), schema_str.data() + schema_str.length()); - TokenIterator pos(tokens); - Expected expected; - if (!schema_parser.parse(pos, columns_ast, expected)) - throw Exception("Invalid TiDB table schema", ErrorCodes::LOGICAL_ERROR); - ColumnsDescription columns - = InterpreterCreateQuery::getColumnsDescription(typeid_cast(*columns_ast), context); - mocked_table_info = MockTiDB::parseColumns(table_name, columns, handle_pk_name, "dt"); - } - - MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); - const auto & table_info = RegionBench::getTableInfo(context, database_name, table_name); - if (table_info.is_common_handle) - throw Exception("Mocking pre handle SST files to DTFiles to a common handle table is not supported", ErrorCodes::LOGICAL_ERROR); - - // Mock SST data for handle [start, end) - const auto region_name = "__snap_snap_" + std::to_string(region_id); - GenMockSSTData(*mocked_table_info, table->id(), region_name, start_handle, end_handle, test_fields, cfs); - - auto & tmt = context.getTMTContext(); - auto & kvstore = tmt.getKVStore(); - auto old_region = kvstore->getRegion(region_id); - - // We may call this function mutiple time to mock some situation, try to reuse the region in `GLOBAL_REGION_MAP` - // so that we can collect uncommitted data. - UInt64 index = MockTiKV::instance().getRaftIndex(region_id) + 1; - RegionPtr new_region = RegionBench::createRegion(table->id(), region_id, start_handle, end_handle, index); - - // Register some mock SST reading methods so that we can decode data in `MockSSTReader::MockSSTData` - RegionMockTest mock_test(kvstore, new_region); - - std::vector sst_views; - { - if (cfs.count(ColumnFamilyType::Write) > 0) - sst_views.push_back(SSTView{ - ColumnFamilyType::Write, - BaseBuffView{region_name.data(), region_name.length()}, - }); - if (cfs.count(ColumnFamilyType::Default) > 0) - sst_views.push_back(SSTView{ - ColumnFamilyType::Default, - BaseBuffView{region_name.data(), region_name.length()}, - }); - } - - // set block size so that we can test for schema-sync while decoding dt files - FailPointHelper::enableFailPoint(FailPoints::force_set_sst_to_dtfile_block_size); - - auto ingest_ids = kvstore->preHandleSnapshotToFiles( - new_region, SSTViewVec{sst_views.data(), sst_views.size()}, index, MockTiKV::instance().getRaftTerm(region_id), tmt); - GLOBAL_REGION_MAP.insertRegionSnap(region_name, {new_region, ingest_ids}); - - { - std::stringstream ss; - ss << "Generate " << ingest_ids.size() << " files for [region_id=" << region_id << "]"; - output(ss.str()); - } -} - -// Apply snapshot for a region. (apply a pre-handle snapshot) -// ./storages-client.sh "DBGInvoke region_snapshot_apply_file(region_id)" -void MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles(Context & context, const ASTs & args, DBGInvoker::Printer output) -{ - if (args.size() != 1) - throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); - - RegionID region_id = (RegionID)safeGet(typeid_cast(*args.front()).value); - const auto region_name = "__snap_snap_" + std::to_string(region_id); - auto [new_region, ingest_ids] = GLOBAL_REGION_MAP.popRegionSnap(region_name); - auto & tmt = context.getTMTContext(); - context.getTMTContext().getKVStore()->checkAndApplySnapshot( - RegionPtrWithSnapshotFiles{new_region, std::move(ingest_ids)}, tmt); - - std::stringstream ss; - ss << "success apply region " << new_region->id() << " with dt files"; - output(ss.str()); -} - } // namespace DB diff --git a/dbms/src/Debug/dbgFuncMockTiDBTable.h b/dbms/src/Debug/dbgFuncMockTiDBTable.h index ec698461235..1ffa26197be 100644 --- a/dbms/src/Debug/dbgFuncMockTiDBTable.h +++ b/dbms/src/Debug/dbgFuncMockTiDBTable.h @@ -14,8 +14,8 @@ struct MockTiDBTable // Inject mocked TiDB table. // Usage: - // ./storages-client.sh "DBGInvoke mock_tidb_table(database_name, table_name, 'col1 type1, col2 type2, ...' [, handle_pk_name, engine-type(tmt|dt)])" + // engine: [tmt, dt], tmt by default static void dbgFuncMockTiDBTable(Context & context, const ASTs & args, DBGInvoker::Printer output); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index 3b3d062f45a..06c21a4d90f 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -60,7 +60,7 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer TMTContext & tmt = context.getTMTContext(); RegionPtr region = RegionBench::createRegion(table_info, region_id, start_keys, end_keys); - tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); + tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); std::stringstream ss; ss << "put region #" << region_id << ", range" << RecordKVFormat::DecodedTiKVKeyRangeToDebugString(region->getRange()->rawKeys()) @@ -74,7 +74,7 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer TMTContext & tmt = context.getTMTContext(); RegionPtr region = RegionBench::createRegion(table_id, region_id, start, end); - tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); + tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); std::stringstream ss; ss << "put region #" << region_id << ", range[" << start << ", " << end << ")" diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index ad0e1e82e51..1f6c568df4c 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -493,7 +493,7 @@ void concurrentBatchInsert(const TiDB::TableInfo & table_info, Int64 concurrent_ Regions regions = createRegions(table_info.id, concurrent_num, key_num_each_region, handle_begin, curr_max_region_id + 1); for (const RegionPtr & region : regions) - tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); + tmt.getKVStore()->onSnapshot(region, nullptr, 0, tmt); std::list threads; for (Int64 i = 0; i < concurrent_num; i++, handle_begin += key_num_each_region) diff --git a/dbms/src/Functions/tests/gtest_arithmetic_functions.cpp b/dbms/src/Functions/tests/gtest_arithmetic_functions.cpp index b050cc262fb..fe4c1c820be 100644 --- a/dbms/src/Functions/tests/gtest_arithmetic_functions.cpp +++ b/dbms/src/Functions/tests/gtest_arithmetic_functions.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #pragma GCC diagnostic pop diff --git a/dbms/src/Functions/tests/gtest_datetime_extract.cpp b/dbms/src/Functions/tests/gtest_datetime_extract.cpp index f9cb9e79f44..16b6b1bf39a 100644 --- a/dbms/src/Functions/tests/gtest_datetime_extract.cpp +++ b/dbms/src/Functions/tests/gtest_datetime_extract.cpp @@ -13,6 +13,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" #include +#include #pragma GCC diagnostic pop diff --git a/dbms/src/Functions/tests/gtest_strings_pad.cpp b/dbms/src/Functions/tests/gtest_strings_pad.cpp index 80cc2e6983a..d6b2cf68542 100644 --- a/dbms/src/Functions/tests/gtest_strings_pad.cpp +++ b/dbms/src/Functions/tests/gtest_strings_pad.cpp @@ -13,6 +13,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" #include +#include #pragma GCC diagnostic pop diff --git a/dbms/src/Functions/tests/gtest_strings_trim.cpp b/dbms/src/Functions/tests/gtest_strings_trim.cpp index 3a5c86ab53a..4db6bba84f2 100644 --- a/dbms/src/Functions/tests/gtest_strings_trim.cpp +++ b/dbms/src/Functions/tests/gtest_strings_trim.cpp @@ -11,6 +11,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" #include +#include #pragma GCC diagnostic pop diff --git a/dbms/src/Server/RaftConfigParser.cpp b/dbms/src/Server/RaftConfigParser.cpp index 4800e50e705..91fcfa58017 100644 --- a/dbms/src/Server/RaftConfigParser.cpp +++ b/dbms/src/Server/RaftConfigParser.cpp @@ -68,6 +68,7 @@ TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::LayeredConfigurat else res.engine = DEFAULT_ENGINE; } + LOG_DEBUG(log, "Default storage engine: " << static_cast(res.engine)); /// "tmt" engine ONLY support disable_bg_flush = false. /// "dt" engine ONLY support disable_bg_flush = true. @@ -96,50 +97,6 @@ TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::LayeredConfigurat res.enable_compatible_mode = config.getBool("raft.enable_compatible_mode"); } - if (config.has("raft.snapshot.method")) - { - String snapshot_method = config.getString("raft.snapshot.method", "file1"); - std::transform(snapshot_method.begin(), snapshot_method.end(), snapshot_method.begin(), [](char ch) { return std::tolower(ch); }); - if (snapshot_method == "block") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; - } - else if (snapshot_method == "file1") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; - } -#if 0 - else if (snapshot_method == "file2") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - } -#endif - else - { - throw Exception("Illegal arguments: unknown snapshot apply method: " + snapshot_method, ErrorCodes::INVALID_CONFIG_PARAMETER); - } - } - switch (res.snapshot_apply_method) - { - case TiDB::SnapshotApplyMethod::DTFile_Directory: - case TiDB::SnapshotApplyMethod::DTFile_Single: - if (res.engine != TiDB::StorageEngine::DT) - { - throw Exception( - "Illegal arguments: can not use DTFile to store snapshot data when the storage engine is not DeltaTree, [engine=" - + DB::toString(static_cast(res.engine)) - + "] [snapshot method=" + applyMethodToString(res.snapshot_apply_method) + "]", - ErrorCodes::INVALID_CONFIG_PARAMETER); - } - break; - default: - break; - } - - LOG_INFO(log, - "Default storage engine [type=" << static_cast(res.engine) - << "] [snapshot.method=" << applyMethodToString(res.snapshot_apply_method) << "]"); - return res; } diff --git a/dbms/src/Server/RaftConfigParser.h b/dbms/src/Server/RaftConfigParser.h index 750e83fd180..e74c5248f79 100644 --- a/dbms/src/Server/RaftConfigParser.h +++ b/dbms/src/Server/RaftConfigParser.h @@ -28,13 +28,11 @@ struct TiFlashRaftConfig static constexpr TiDB::StorageEngine DEFAULT_ENGINE = TiDB::StorageEngine::DT; bool disable_bg_flush = false; TiDB::StorageEngine engine = DEFAULT_ENGINE; - TiDB::SnapshotApplyMethod snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; public: TiFlashRaftConfig() = default; static TiFlashRaftConfig parseSettings(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); - }; } // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 099c685f9d4..2d1f25a09ed 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -100,10 +100,6 @@ extern const int ARGUMENT_OUT_OF_BOUND; extern const int INVALID_CONFIG_PARAMETER; } // namespace ErrorCodes -namespace Debug -{ -extern void setServiceAddr(const std::string & addr); -} static std::string getCanonicalPath(std::string path) { @@ -812,7 +808,6 @@ int Server::main(const std::vector & /*args*/) builder.SetMaxSendMessageSize(-1); flash_grpc_server = builder.BuildAndStart(); LOG_INFO(log, "Flash grpc server listening on [" << raft_config.flash_server_addr << "]"); - Debug::setServiceAddr(raft_config.flash_server_addr); } SCOPE_EXIT({ diff --git a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp index c85cb78d114..544d96dc518 100644 --- a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.cpp @@ -10,23 +10,6 @@ namespace DB namespace DM { -template -void DMVersionFilterBlockInputStream::readPrefix() -{ - forEachChild([](IBlockInputStream & child) { - child.readPrefix(); - return false; - }); -} - -template -void DMVersionFilterBlockInputStream::readSuffix() -{ - forEachChild([](IBlockInputStream & child) { - child.readSuffix(); - return false; - }); -} static constexpr size_t UNROLL_BATCH = 64; @@ -317,4 +300,4 @@ template class DMVersionFilterBlockInputStream; template class DMVersionFilterBlockInputStream; } // namespace DM -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h index 1881140d7e9..43faa4646ec 100644 --- a/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/DMVersionFilterBlockInputStream.h @@ -51,9 +51,6 @@ class DMVersionFilterBlockInputStream : public IBlockInputStream << "%, not clean: " << DB::toString((Float64)not_clean_rows * 100 / passed_rows, 2) << "%"); } - void readPrefix() override; - void readSuffix() override; - String getName() const override { return "DeltaMergeVersionFilter"; } Block getHeader() const override { return header; } diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.cpp index 7617a200bb5..c566c293b6c 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.cpp @@ -8,6 +8,11 @@ #include #include +namespace ProfileEvents +{ +extern const Event DMWriteBytes; +} + namespace DB { namespace DM @@ -196,7 +201,7 @@ String packsToString(const DeltaPacks & packs) packs_info += "F_" + DB::toString(p->getRows()); else if (auto dp_delete = p->tryToDeleteRange(); dp_delete) packs_info += "D_" + dp_delete->getDeleteRange().toString(); - packs_info += (p->isSaved() ? "_S," : "_N,"); + packs_info += + (p->isSaved() ? "_S," : "_N,"); } if (!packs.empty()) packs_info.erase(packs_info.size() - 1); @@ -205,4 +210,4 @@ String packsToString(const DeltaPacks & packs) } } // namespace DM -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.h index d349e0a9679..5dde7855ef0 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack.h @@ -154,4 +154,4 @@ DeltaPacks deserializePacks_V3(DMContext & context, const RowKeyRange & segment_ String packsToString(const DeltaPacks & packs); } // namespace DM -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.cpp index fa633c612cf..0af87c2976a 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.cpp @@ -235,7 +235,7 @@ void DeltaPackBlock::serializeMetadata(WriteBuffer & buf, bool save_schema) cons writeIntBinary(bytes, buf); } -std::tuple DeltaPackBlock::deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema) +DeltaPackPtr DeltaPackBlock::deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema) { auto schema = deserializeSchema(buf); if (!schema) @@ -250,7 +250,7 @@ std::tuple DeltaPackBlock::deserializeMetadata(ReadBuffe readIntBinary(rows, buf); readIntBinary(bytes, buf); - return {createPackWithDataPage(schema, rows, bytes, data_page_id), std::move(schema)}; + return createPackWithDataPage(schema, rows, bytes, data_page_id); } ColumnPtr DPBlockReader::getPKColumn() @@ -294,4 +294,4 @@ DeltaPackReaderPtr DPBlockReader::createNewReader(const ColumnDefinesPtr & new_c } // namespace DM -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.h index b6364e5dff5..9ade1a3ebcf 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackBlock.h @@ -118,7 +118,7 @@ class DeltaPackBlock : public DeltaPack void serializeMetadata(WriteBuffer & buf, bool save_schema) const override; - static std::tuple deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema); + static DeltaPackPtr deserializeMetadata(ReadBuffer & buf, const BlockPtr & last_schema); String toString() const override { diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.cpp index 8470ad17bf9..aac0481569c 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.cpp @@ -42,7 +42,8 @@ void DeltaPackFile::serializeMetadata(WriteBuffer & buf, bool /*save_schema*/) c DeltaPackPtr DeltaPackFile::deserializeMetadata(DMContext & context, // const RowKeyRange & segment_range, - ReadBuffer & buf) + ReadBuffer & buf, + const BlockPtr & /*last_schema*/) { UInt64 file_ref_id; size_t valid_rows, valid_bytes; @@ -152,34 +153,28 @@ size_t DPFileReader::readRowsOnce(MutableColumns & output_cols, // size_t rows_end = rows_offset + rows_limit; size_t actual_read = 0; - size_t read_offset = rows_offset; - while (read_offset < rows_end) + while (rows_offset < rows_end) { if (!cur_block || cur_block_offset == cur_block.rows()) { - if (unlikely(!read_next_block())) - throw Exception("Not enough delta data to read [offset=" + DB::toString(rows_offset) - + "] [limit=" + DB::toString(rows_limit) + "] [read_offset=" + DB::toString(read_offset) + "]", - ErrorCodes::LOGICAL_ERROR); + if (!read_next_block()) + throw Exception("Not enough delta data to read", ErrorCodes::LOGICAL_ERROR); } - if (unlikely(read_offset < rows_before_cur_block + cur_block_offset)) - throw Exception("read_offset is too small [offset=" + DB::toString(rows_offset) + "] [limit=" + DB::toString(rows_limit) - + "] [read_offset=" + DB::toString(read_offset) - + "] [min_offset=" + DB::toString(rows_before_cur_block + cur_block_offset) + "]", - ErrorCodes::LOGICAL_ERROR); + if (rows_offset < rows_before_cur_block + cur_block_offset) + throw Exception("rows_offset is too small", ErrorCodes::LOGICAL_ERROR); - if (read_offset >= rows_before_cur_block + cur_block.rows()) + if (rows_offset >= rows_before_cur_block + cur_block.rows()) { cur_block_offset = cur_block.rows(); continue; } auto read_end_for_cur_block = std::min(rows_end, rows_before_cur_block + cur_block.rows()); - auto read_start_in_block = read_offset - rows_before_cur_block; - auto read_limit_in_block = read_end_for_cur_block - read_offset; + auto read_start_in_block = rows_offset - rows_before_cur_block; + auto read_limit_in_block = read_end_for_cur_block - rows_offset; actual_read += copyColumnsData(cur_block_data, cur_block_data[0], output_cols, read_start_in_block, read_limit_in_block, range); - read_offset += read_limit_in_block; + rows_offset += read_limit_in_block; cur_block_offset += read_limit_in_block; } return actual_read; @@ -189,18 +184,10 @@ size_t DPFileReader::readRows(MutableColumns & output_cols, size_t rows_offset, { initStream(); - try - { - if (pk_ver_only) - return readRowsRepeatedly(output_cols, rows_offset, rows_limit, range); - else - return readRowsOnce(output_cols, rows_offset, rows_limit, range); - } - catch (DB::Exception & e) - { - e.addMessage(" while reading DTFile " + pack.getFile()->path()); - throw; - } + if (pk_ver_only) + return readRowsRepeatedly(output_cols, rows_offset, rows_limit, range); + else + return readRowsOnce(output_cols, rows_offset, rows_limit, range); } Block DPFileReader::readNextBlock() @@ -217,4 +204,4 @@ DeltaPackReaderPtr DPFileReader::createNewReader(const ColumnDefinesPtr & new_co } } // namespace DM -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.h index a5cf79cfae2..44128b0c034 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPackFile.h @@ -65,7 +65,8 @@ class DeltaPackFile : public DeltaPack static DeltaPackPtr deserializeMetadata(DMContext & context, // const RowKeyRange & segment_range, - ReadBuffer & buf); + ReadBuffer & buf, + const BlockPtr & last_schema); String toString() const override { diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack_V3.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack_V3.cpp index 2b3f3d3807b..22ca2105d7b 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaPack_V3.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaPack_V3.cpp @@ -66,17 +66,19 @@ DeltaPacks deserializePacks_V3(DMContext & context, const RowKeyRange & segment_ pack = DeltaPackDeleteRange::deserializeMetadata(buf); break; case DeltaPack::Type::BLOCK: { - std::tie(pack, last_schema) = DeltaPackBlock::deserializeMetadata(buf, last_schema); + pack = DeltaPackBlock::deserializeMetadata(buf, last_schema); + if (auto dp_block = pack->tryToBlock(); dp_block) + last_schema = dp_block->getSchema(); break; } case DeltaPack::Type::FILE: { - pack = DeltaPackFile::deserializeMetadata(context, segment_range, buf); + pack = DeltaPackFile::deserializeMetadata(context, segment_range, buf, last_schema); break; } default: throw Exception("Unexpected pack type: " + DB::toString(pack_type), ErrorCodes::LOGICAL_ERROR); } - packs.emplace_back(std::move(pack)); + packs.push_back(pack); } return packs; } diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp index 7806c548917..b0d49ae3825 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp @@ -237,7 +237,7 @@ void DeltaValueSpace::appendPackInner(const DeltaPackPtr & pack) // If this pack's schema is identical to last_schema, then use the last_schema instance, // so that we don't have to serialize my_schema instance. auto my_schema = dp_block->getSchema(); - if (last_schema && my_schema && last_schema != my_schema && isSameSchema(*my_schema, *last_schema)) + if (last_schema && my_schema && last_schema != my_schema && checkSchema(*my_schema, *last_schema)) dp_block->resetIdenticalSchema(last_schema); } @@ -289,14 +289,14 @@ bool DeltaValueSpace::appendToCache(DMContext & context, const Block & block, si { if constexpr (DM_RUN_CHECK) { - if (unlikely(!isSameSchema(*p->getSchema(), p->getCache()->block))) + if (unlikely(!checkSchema(*p->getSchema(), p->getCache()->block))) throw Exception("Mutable pack's structure of schema and block are different: " + last_pack->toString()); } auto & cache_block = p->getCache()->block; bool is_overflow = cache_block.rows() >= context.delta_cache_limit_rows || cache_block.bytes() >= context.delta_cache_limit_bytes; - bool is_same_schema = isSameSchema(block, cache_block); + bool is_same_schema = checkSchema(block, cache_block); if (!is_overflow && is_same_schema) { // The last cache block is available @@ -312,7 +312,7 @@ bool DeltaValueSpace::appendToCache(DMContext & context, const Block & block, si { // Create a new pack. auto last_schema = lastSchema(); - auto my_schema = (last_schema && isSameSchema(block, *last_schema)) ? last_schema : std::make_shared(block.cloneEmpty()); + auto my_schema = (last_schema && checkSchema(block, *last_schema)) ? last_schema : std::make_shared(block.cloneEmpty()); auto new_pack = DeltaPackBlock::createCachePack(my_schema); appendPackInner(new_pack); @@ -342,19 +342,20 @@ bool DeltaValueSpace::appendDeleteRange(DMContext & /*context*/, const RowKeyRan return true; } -bool DeltaValueSpace::ingestPacks(DMContext & /*context*/, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range) +bool DeltaValueSpace::appendRegionSnapshot(DMContext & /*context*/, + const RowKeyRange & range, + const DeltaPacks & packs, + bool clear_data_in_range) { std::scoped_lock lock(mutex); if (abandoned.load(std::memory_order_relaxed)) return false; - // Prepend a DeleteRange to clean data before applying packs if (clear_data_in_range) { auto p = std::make_shared(range); appendPackInner(p); } - for (auto & p : packs) { appendPackInner(p); diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h index 2580296c2dc..052c86ca5e6 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h @@ -237,7 +237,7 @@ class DeltaValueSpace : public std::enable_shared_from_this, pr bool appendDeleteRange(DMContext & context, const RowKeyRange & delete_range); - bool ingestPacks(DMContext & context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range); + bool appendRegionSnapshot(DMContext & context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range); /// Flush the data of packs which haven't write to disk yet, and also save the metadata of packs. bool flush(DMContext & context); diff --git a/dbms/src/Storages/DeltaMerge/Delta/Pack.cpp b/dbms/src/Storages/DeltaMerge/Delta/Pack.cpp new file mode 100644 index 00000000000..c7b01df39e0 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/Delta/Pack.cpp @@ -0,0 +1,310 @@ +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//namespace DB::DM +//{ +//using PageReadFields = PageStorage::PageReadFields; +// +//// ================================================ +//// Serialize / deserialize +//// ================================================ +// +////void serializeColumn(MemoryWriteBuffer & buf, const IColumn & column, const DataTypePtr & type, size_t offset, size_t limit, bool compress) +////{ +//// CompressionMethod method = compress ? CompressionMethod::LZ4 : CompressionMethod::NONE; +//// +//// CompressedWriteBuffer compressed(buf, CompressionSettings(method)); +//// type->serializeBinaryBulkWithMultipleStreams(column, // +//// [&](const IDataType::SubstreamPath &) { return &compressed; }, +//// offset, +//// limit, +//// true, +//// {}); +//// compressed.next(); +////} +// +//void deserializeColumn(IColumn & column, const DataTypePtr & type, const ByteBuffer & data_buf, size_t rows) +//{ +// ReadBufferFromMemory buf(data_buf.begin(), data_buf.size()); +// CompressedReadBuffer compressed(buf); +// type->deserializeBinaryBulkWithMultipleStreams(column, // +// [&](const IDataType::SubstreamPath &) { return &compressed; }, +// rows, +// (double)(data_buf.size()) / rows, +// true, +// {}); +//} +// +//inline void serializePack(const Pack & pack, const BlockPtr & schema, WriteBuffer & buf) +//{ +// writeIntBinary(pack.rows, buf); +// writeIntBinary(pack.bytes, buf); +// pack.delete_range.serialize(buf); +// writeIntBinary(pack.data_page, buf); +// if (schema) +// { +// writeIntBinary((UInt32)schema->columns(), buf); +// for (auto & col : *pack.schema) +// { +// writeIntBinary(col.column_id, buf); +// writeStringBinary(col.name, buf); +// writeStringBinary(col.type->getName(), buf); +// } +// } +// else +// { +// writeIntBinary((UInt32)0, buf); +// } +//} +// +//inline PackPtr deserializePack(ReadBuffer & buf, UInt64 version) +//{ +// auto pack = std::make_shared(); +// pack->saved = true; // Must be true, otherwise it should not be here. +// pack->appendable = false; // Must be false, otherwise it should not be here. +// readIntBinary(pack->rows, buf); +// readIntBinary(pack->bytes, buf); +// if (version == 1) +// { +// HandleRange range; +// readPODBinary(range, buf); +// pack->delete_range = RowKeyRange::fromHandleRange(range); +// } +// else +// pack->delete_range = RowKeyRange::deserialize(buf); +// readIntBinary(pack->data_page, buf); +// UInt32 column_size; +// readIntBinary(column_size, buf); +// if (column_size != 0) +// { +// auto schema = std::make_shared(); +// for (size_t i = 0; i < column_size; ++i) +// { +// Int64 column_id; +// String name; +// String type_name; +// readIntBinary(column_id, buf); +// readStringBinary(name, buf); +// readStringBinary(type_name, buf); +// schema->insert(ColumnWithTypeAndName({}, DataTypeFactory::instance().get(type_name), name, column_id)); +// } +// pack->setSchema(schema); +// } +// return pack; +//} +// +//void serializeSavedPacks(WriteBuffer & buf, const Packs & packs) +//{ +// size_t saved_packs = std::find_if(packs.begin(), packs.end(), [](const PackPtr & p) { return !p->isSaved(); }) - packs.begin(); +// +// writeIntBinary(DeltaValueSpace_OLD::CURRENT_VERSION, buf); // Add binary version +// writeIntBinary(saved_packs, buf); +// BlockPtr last_schema; +// +// for (auto & pack : packs) +// { +// if (!pack->isSaved()) +// break; +// // Do not encode the schema if it is the same as previous one. +// if (pack->isDeleteRange()) +// serializePack(*pack, nullptr, buf); +// else +// { +// if (unlikely(!pack->schema)) +// throw Exception("A data pack without schema: " + pack->toString(), ErrorCodes::LOGICAL_ERROR); +// if (pack->schema != last_schema) +// { +// serializePack(*pack, pack->schema, buf); +// last_schema = pack->schema; +// } +// else +// { +// serializePack(*pack, nullptr, buf); +// } +// } +// } +//} +// +//Packs deserializePacks(ReadBuffer & buf) +//{ +// // Check binary version +// UInt64 version; +// readIntBinary(version, buf); +// if (version > DeltaValueSpace_OLD::CURRENT_VERSION) +// throw Exception("Pack binary version not match: " + DB::toString(version), ErrorCodes::LOGICAL_ERROR); +// size_t size; +// readIntBinary(size, buf); +// Packs packs; +// BlockPtr last_schema; +// for (size_t i = 0; i < (size_t)size; ++i) +// { +// auto pack = deserializePack(buf, version); +// if (!pack->isDeleteRange()) +// { +// if (!pack->schema) +// pack->setSchema(last_schema); +// else +// last_schema = pack->schema; +// } +// packs.push_back(pack); +// } +// return packs; +//} +// +//String packsToString(const Packs & packs) +//{ +// String packs_info = "["; +// for (auto & p : packs) +// { +// packs_info += (p->isDeleteRange() ? "DEL" : "INS_" + DB::toString(p->rows)) + (p->isSaved() ? "_S," : "_N,"); +// } +// if (!packs.empty()) +// packs_info.erase(packs_info.size() - 1); +// packs_info += "]"; +// return packs_info; +//} +// +//Block readPackFromCache(const PackPtr & pack) +//{ +// std::scoped_lock lock(pack->cache->mutex); +// +// auto & cache_block = pack->cache->block; +// MutableColumns columns = cache_block.cloneEmptyColumns(); +// for (size_t i = 0; i < cache_block.columns(); ++i) +// columns[i]->insertRangeFrom(*cache_block.getByPosition(i).column, 0, pack->rows); +// return cache_block.cloneWithColumns(std::move(columns)); +//} +// +//Columns readPackFromCache(const PackPtr & pack, const ColumnDefines & column_defines, size_t col_start, size_t col_end) +//{ +// if (unlikely(!(pack->cache))) +// { +// String msg = " Not a cache pack: " + pack->toString(); +// LOG_ERROR(&Logger::get(__FUNCTION__), msg); +// throw Exception(msg); +// } +// +// // TODO: should be able to use cache data directly, without copy. +// std::scoped_lock lock(pack->cache->mutex); +// +// const auto & cache_block = pack->cache->block; +// if constexpr (0) +// { +// if (pack->schema == nullptr || !checkSchema(cache_block, *pack->schema)) +// { +// const String pack_schema_str = pack->schema ? pack->schema->dumpStructure() : "(none)"; +// const String cache_schema_str = cache_block.dumpStructure(); +// throw Exception("Pack[" + pack->toString() + "] schema not match its cache_block! pack: " + pack_schema_str +// + ", cache: " + cache_schema_str, +// ErrorCodes::LOGICAL_ERROR); +// } +// } +// Columns columns; +// for (size_t i = col_start; i < col_end; ++i) +// { +// const auto & cd = column_defines[i]; +// if (auto it = pack->colid_to_offset.find(cd.id); it != pack->colid_to_offset.end()) +// { +// auto col_offset = it->second; +// // Copy data from cache +// auto [type, col_data] = pack->getDataTypeAndEmptyColumn(cd.id); +// col_data->insertRangeFrom(*cache_block.getByPosition(col_offset).column, 0, pack->rows); +// // Cast if need +// auto col_converted = convertColumnByColumnDefineIfNeed(type, std::move(col_data), cd); +// columns.push_back(std::move(col_converted)); +// } +// else +// { +// ColumnPtr column = createColumnWithDefaultValue(cd, pack->rows); +// columns.emplace_back(std::move(column)); +// } +// } +// return columns; +//} +// +//Block readPackFromDisk(const PackPtr & pack, const PageReader & page_reader) +//{ +// auto & schema = *pack->schema; +// +// PageReadFields fields; +// fields.first = pack->data_page; +// for (size_t i = 0; i < schema.columns(); ++i) +// fields.second.push_back(i); +// +// auto page_map = page_reader.read({fields}); +// auto page = page_map[pack->data_page]; +// +// auto columns = schema.cloneEmptyColumns(); +// +// if (unlikely(columns.size() != page.fieldSize())) +// throw Exception("Column size and field size not the same"); +// +// for (size_t index = 0; index < schema.columns(); ++index) +// { +// auto data_buf = page.getFieldData(index); +// auto & type = schema.getByPosition(index).type; +// auto & column = columns[index]; +// deserializeColumn(*column, type, data_buf, pack->rows); +// } +// +// return schema.cloneWithColumns(std::move(columns)); +//} +// +//Columns readPackFromDisk(const PackPtr & pack, // +// const PageReader & page_reader, +// const ColumnDefines & column_defines, +// size_t col_start, +// size_t col_end) +//{ +// const size_t num_columns_read = col_end - col_start; +// +// Columns columns(num_columns_read); // allocate empty columns +// +// PageReadFields fields; +// fields.first = pack->data_page; +// for (size_t index = col_start; index < col_end; ++index) +// { +// const auto & cd = column_defines[index]; +// if (auto it = pack->colid_to_offset.find(cd.id); it != pack->colid_to_offset.end()) +// { +// auto col_index = it->second; +// fields.second.push_back(col_index); +// } +// else +// { +// // New column after ddl is not exist in this pack, fill with default value +// columns[index - col_start] = createColumnWithDefaultValue(cd, pack->rows); +// } +// } +// +// auto page_map = page_reader.read({fields}); +// Page page = page_map[pack->data_page]; +// for (size_t index = col_start; index < col_end; ++index) +// { +// const size_t index_in_read_columns = index - col_start; +// if (columns[index_in_read_columns] != nullptr) +// { +// // the column is fill with default values. +// continue; +// } +// auto col_id = column_defines[index].id; +// auto col_index = pack->colid_to_offset[col_id]; +// auto data_buf = page.getFieldData(col_index); +// +// const auto & cd = column_defines[index]; +// // Deserialize column by pack's schema +// auto [type, col_data] = pack->getDataTypeAndEmptyColumn(cd.id); +// deserializeColumn(*col_data, type, data_buf, pack->rows); +// +// columns[index_in_read_columns] = convertColumnByColumnDefineIfNeed(type, std::move(col_data), cd); +// } +// +// return columns; +//} +// +//} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Delta/Pack.h b/dbms/src/Storages/DeltaMerge/Delta/Pack.h new file mode 100644 index 00000000000..6484d5bbdbc --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/Delta/Pack.h @@ -0,0 +1,41 @@ +//#pragma once +// +//#include +//#include +//#include +//#include +//#include +// +//namespace DB +//{ +// +//class MemoryWriteBuffer; +// +//namespace DM +//{ +// +//static constexpr size_t PACK_SERIALIZE_BUFFER_SIZE = 65536; +// +//void serializeColumn(MemoryWriteBuffer & buf, const IColumn & column, const DataTypePtr & type, size_t offset, size_t limit, bool compress); +// +//void serializeSavedPacks(WriteBuffer & buf, const Packs & packs); +//Packs deserializePacks(ReadBuffer & buf); +// +//// Debugging string +//String packsToString(const Packs & packs); +// +//// Read a block from cache / disk according to `pack->schema`. Only used by Compact Delta. +//Block readPackFromCache(const PackPtr & pack); +//Block readPackFromDisk(const PackPtr & pack, const PageReader & page_reader); +// +//// Read a block of columns in `column_defines` from cache / disk, +//// if `pack->schema` is not match with `column_defines`, take good care of DDL cast +//Columns readPackFromCache(const PackPtr & pack, const ColumnDefines & column_defines, size_t col_start, size_t col_end); +//Columns readPackFromDisk(const PackPtr & pack, // +// const PageReader & page_reader, +// const ColumnDefines & column_defines, +// size_t col_start, +// size_t col_end); +// +//} // namespace DM +//} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp b/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp index 1c24645423b..f16f83c13be 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/Snapshot.cpp @@ -208,7 +208,7 @@ Block DeltaValueReader::readPKVersion(size_t offset, size_t limit) Block block; for (size_t i = 0; i < 2; ++i) { - const auto & cd = (*col_defs)[i]; + auto cd = (*col_defs)[i]; block.insert(ColumnWithTypeAndName(std::move(cols[i]), cd.type, cd.name, cd.id)); } return block; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMerge.h b/dbms/src/Storages/DeltaMerge/DeltaMerge.h index 41fc2afcb29..3ec56f0ac17 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMerge.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMerge.h @@ -514,7 +514,7 @@ class DeltaMergeBlockInputStream final : public SkippableBlockInputStream, Alloc use_stable_rows = delta_index_it.getSid() - prev_sid; } } -}; +}; // namespace DM } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index d5ece791928..793218376a4 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -207,7 +207,7 @@ inline bool hasColumn(const ColumnDefines & columns, const ColId & col_id) } template -inline bool isSameSchema(const Block & a, const Block & b) +inline bool checkSchema(const Block & a, const Block & b) { if (a.columns() != b.columns()) return false; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 24435335454..0b1516e3235 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -122,7 +122,7 @@ namespace { // Actually we will always store a column of `_tidb_rowid`, no matter it // exist in `table_columns` or not. -ColumnDefinesPtr generateStoreColumns(const ColumnDefines & table_columns, bool is_common_handle) +ColumnDefinesPtr getStoreColumns(const ColumnDefines & table_columns, bool is_common_handle) { auto columns = std::make_shared(); // First three columns are always _tidb_rowid, _INTERNAL_VERSION, _INTERNAL_DELMARK @@ -178,7 +178,7 @@ DeltaMergeStore::DeltaMergeStore(Context & db_context, } original_table_header = std::make_shared(toEmptyBlock(original_table_columns)); - store_columns = generateStoreColumns(original_table_columns, is_common_handle); + store_columns = getStoreColumns(original_table_columns, is_common_handle); auto dm_context = newDMContext(db_context, db_context.getSettingsRef()); @@ -386,24 +386,7 @@ inline Block getSubBlock(const Block & block, size_t offset, size_t limit) } } -// Add an extra handle column if handle reused the original column data. -Block DeltaMergeStore::addExtraColumnIfNeed(const Context & db_context, Block && block) const -{ - if (pkIsHandle()) - { - auto handle_pos = getPosByColumnId(block, original_table_handle_define.id); - addColumnToBlock(block, // - EXTRA_HANDLE_COLUMN_ID, - EXTRA_HANDLE_COLUMN_NAME, - EXTRA_HANDLE_COLUMN_INT_TYPE, - EXTRA_HANDLE_COLUMN_INT_TYPE->createColumn()); - // Fill the new column with data in column[handle_pos] - FunctionToInt64::create(db_context)->execute(block, {handle_pos}, block.columns() - 1); - } - return std::move(block); -} - -void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_settings, Block && to_write) +void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_settings, const Block & to_write) { LOG_TRACE(log, __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << to_write.rows()); @@ -414,8 +397,19 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ return; auto dm_context = newDMContext(db_context, db_settings); - Block block = addExtraColumnIfNeed(db_context, std::move(to_write)); + Block block = to_write; + // Add an extra handle column, if handle reused the original column data. + if (pkIsHandle()) + { + auto handle_pos = getPosByColumnId(block, original_table_handle_define.id); + addColumnToBlock(block, // + EXTRA_HANDLE_COLUMN_ID, + EXTRA_HANDLE_COLUMN_NAME, + EXTRA_HANDLE_COLUMN_INT_TYPE, + EXTRA_HANDLE_COLUMN_INT_TYPE->createColumn()); + FunctionToInt64::create(db_context)->execute(block, {handle_pos}, block.columns() - 1); + } const auto bytes = block.bytes(); { @@ -524,43 +518,14 @@ void DeltaMergeStore::write(const Context & db_context, const DB::Settings & db_ checkSegmentUpdate(dm_context, segment, ThreadType::Write); } -std::tuple DeltaMergeStore::preAllocateIngestFile() +void DeltaMergeStore::writeRegionSnapshot(const DMContextPtr & dm_context, + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range) { - if (shutdown_called.load(std::memory_order_relaxed)) - return {}; - - auto delegator = path_pool.getStableDiskDelegator(); - auto parent_path = delegator.choosePath(); - auto new_id = storage_pool.newDataPageId(); - return {parent_path, new_id}; -} - -void DeltaMergeStore::preIngestFile(const String & parent_path, const PageId file_id, size_t file_size) -{ - if (shutdown_called.load(std::memory_order_relaxed)) - return; - - auto delegator = path_pool.getStableDiskDelegator(); - delegator.addDTFile(file_id, file_size, parent_path); -} - -void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, - const RowKeyRange & range, - std::vector file_ids, - bool clear_data_in_range) -{ - if (unlikely(shutdown_called.load(std::memory_order_relaxed))) - { - std::stringstream stream; - stream << " try to ingest files into a shutdown table: " << db_name << "." << table_name; - auto msg = stream.str(); - LOG_WARNING(log, __FUNCTION__ << msg); - throw Exception(msg); - } - LOG_INFO(log, __FUNCTION__ << " table: " << db_name << "." << table_name << ", region range:" << range.toDebugString()); - EventRecorder write_block_recorder(ProfileEvents::DMWriteFile, ProfileEvents::DMWriteFileNS); + EventRecorder write_block_recorder(ProfileEvents::DMDeleteRange, ProfileEvents::DMDeleteRangeNS); auto delegate = dm_context->path_pool.getStableDiskDelegator(); auto file_provider = dm_context->db_context.getFileProvider(); @@ -583,8 +548,8 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, } LOG_DEBUG(log, - __FUNCTION__ << " table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes << ", bytes on disk" - << bytes_on_disk); + "[Write Region Snapshot] table: " << db_name << "." << table_name << ", rows: " << rows << ", bytes: " << bytes + << ", bytes on disk" << bytes_on_disk); Segments updated_segments; RowKeyRange cur_range = range; @@ -661,16 +626,9 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, // they are visible for readers who require file_ids to be found in PageStorage. wbs.writeLogAndData(); - if (segment->ingestPacks(*dm_context, range.shrink(segment_range), packs, clear_data_in_range)) + if (segment->writeRegionSnapshot(*dm_context, range.shrink(segment_range), packs, clear_data_in_range)) { updated_segments.push_back(segment); - // Enable gc for DTFile once it has been committed. - for (size_t index = 0; index < my_file_used.size(); ++index) - { - auto & file = files[index]; - if (my_file_used[index]) - file->enableGC(); - } file_used.swap(my_file_used); break; } @@ -684,9 +642,6 @@ void DeltaMergeStore::ingestFiles(const DMContextPtr & dm_context, cur_range.setEnd(range.end); } - GET_METRIC(dm_context->metrics, tiflash_storage_throughput_bytes, type_ingest).Increment(bytes); - GET_METRIC(dm_context->metrics, tiflash_storage_throughput_rows, type_ingest).Increment(rows); - flushCache(dm_context, range); for (auto & segment : updated_segments) @@ -735,6 +690,7 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings segment_range = segment->getRowKeyRange(); // Write could fail, because other threads could already updated the instance. Like split/merge, merge delta. + if (segment->write(*dm_context, delete_range.shrink(segment_range))) { updated_segments.push_back(segment); @@ -1289,7 +1245,8 @@ bool DeltaMergeStore::handleBackgroundTask(bool heavy) segmentMerge(*task.dm_context, task.segment, task.next_segment, false); type = ThreadType::BG_Merge; break; - case MergeDelta: { + case MergeDelta: + { FAIL_POINT_PAUSE(FailPoints::pause_before_dt_background_delta_merge); left = segmentMergeDelta(*task.dm_context, task.segment, false); type = ThreadType::BG_MergeDelta; @@ -1772,7 +1729,7 @@ void DeltaMergeStore::applyAlters(const AlterCommands & commands, } } - auto new_store_columns = generateStoreColumns(new_original_table_columns, is_common_handle); + auto new_store_columns = getStoreColumns(new_original_table_columns, is_common_handle); original_table_columns.swap(new_original_table_columns); store_columns.swap(new_store_columns); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 5cd212a00fb..f2e6120fe79 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -260,40 +260,32 @@ class DeltaMergeStore : private boost::noncopyable // Stop all background tasks. void shutdown(); - Block addExtraColumnIfNeed(const Context & db_context, Block && block) const; - - void write(const Context & db_context, const DB::Settings & db_settings, Block && block); - - void deleteRange(const Context & db_context, const DB::Settings & db_settings, const RowKeyRange & delete_range); - - std::tuple preAllocateIngestFile(); - - void preIngestFile(const String & parent_path, const PageId file_id, size_t file_size); - - void ingestFiles(const DMContextPtr & dm_context, // - const RowKeyRange & range, - std::vector file_ids, - bool clear_data_in_range); - - void ingestFiles(const Context & db_context, // - const DB::Settings & db_settings, - const RowKeyRange & range, - std::vector file_ids, - bool clear_data_in_range) + void write(const Context & db_context, const DB::Settings & db_settings, const Block & block); + + void writeRegionSnapshot(const DMContextPtr & dm_context, // + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range); + + void writeRegionSnapshot(const Context & db_context, // + const DB::Settings & db_settings, + const RowKeyRange & range, + std::vector file_ids, + bool clear_data_in_range) { auto dm_context = newDMContext(db_context, db_settings); - return ingestFiles(dm_context, range, file_ids, clear_data_in_range); + return writeRegionSnapshot(dm_context, range, file_ids, clear_data_in_range); } - /// Read all rows without MVCC filtering + void deleteRange(const Context & db_context, const DB::Settings & db_settings, const RowKeyRange & delete_range); + BlockInputStreams readRaw(const Context & db_context, const DB::Settings & db_settings, const ColumnDefines & column_defines, size_t num_streams, const SegmentIdSet & read_segments = {}); - /// Read rows with MVCC filtering - /// `sorted_ranges` should be already sorted and merged + /// ranges should be sorted and merged already. BlockInputStreams read(const Context & db_context, const DB::Settings & db_settings, const ColumnDefines & columns_to_read, @@ -325,11 +317,6 @@ class DeltaMergeStore : private boost::noncopyable ColumnID & max_column_id_used, const Context & context); - const ColumnDefinesPtr getStoreColumns() const - { - std::shared_lock lock(read_write_mutex); - return store_columns; - } const ColumnDefines & getTableColumns() const { return original_table_columns; } const ColumnDefine & getHandle() const { return original_table_handle_define; } BlockPtr getHeader() const; @@ -355,9 +342,9 @@ class DeltaMergeStore : private boost::noncopyable RegionSplitRes getRegionSplitPoint(DMContext & dm_context, const RowKeyRange & check_range, size_t max_region_size, size_t split_size); -private: DMContextPtr newDMContext(const Context & db_context, const DB::Settings & db_settings); +private: bool pkIsHandle() const { return original_table_handle_define.id != EXTRA_HANDLE_COLUMN_ID; } void waitForWrite(const DMContextPtr & context, const SegmentPtr & segment); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 4d3af9f6142..d644f1d90db 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -149,7 +149,7 @@ bool DMFile::isColIndexExist(const ColId & col_id) const { if (isSingleFileMode()) { - const auto index_identifier = DMFile::colIndexFileName(DMFile::getFileNameBase(col_id)); + const auto & index_identifier = DMFile::colIndexFileName(DMFile::getFileNameBase(col_id)); return isSubFileExists(index_identifier); } else diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h index 92893fe1581..92272d070b3 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockOutputStream.h @@ -8,38 +8,24 @@ namespace DB { namespace DM { - -/// The output stream for writing block to DTFile. -/// -/// Note that we will filter block by `RSOperatorPtr` while reading, so the -/// blocks output to DTFile must be bounded by primary key, or we will get -/// wrong results by filtering. -/// You can use `PKSquashingBlockInputStream` to reorganize the boundary of -/// blocks. class DMFileBlockOutputStream { public: - using Flags = DMFileWriter::Flags; - DMFileBlockOutputStream(const Context & context, const DMFilePtr & dmfile, const ColumnDefines & write_columns, - const Flags flags = Flags()) - : writer( - dmfile, - write_columns, - context.getFileProvider(), - flags.needRateLimit() ? context.getRateLimiter() : nullptr, - DMFileWriter::Options{ - CompressionMethod::LZ4, // context.chooseCompressionSettings(0, 0), TODO: should enable this, and make unit testes work. - context.getSettingsRef().min_compress_block_size, - context.getSettingsRef().max_compress_block_size, - flags}) + bool need_rate_limit = false) + : writer(dmfile, + write_columns, + context.getSettingsRef().min_compress_block_size, + context.getSettingsRef().max_compress_block_size, + // context.chooseCompressionSettings(0, 0), TODO: should enable this, and make unit testes work. + CompressionSettings(CompressionMethod::LZ4), + context.getFileProvider(), + need_rate_limit ? context.getRateLimiter() : nullptr) { } - const DMFilePtr getFile() const { return writer.getFile(); } - void write(const Block & block, size_t not_clean_rows) { writer.write(block, not_clean_rows); } void writePrefix() {} diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index fe2b433e728..3e8996b6135 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -163,11 +163,15 @@ class DMFilePackFilter Float64 filter_rate = (Float64)(after_read_packs - after_filter) * 100 / after_read_packs; LOG_DEBUG(log, - "RSFilter exclude rate: " << ((after_read_packs == 0) ? "nan" : DB::toString(filter_rate, 2)) - << ", after_pk: " << after_pk << ", after_read_packs: " << after_read_packs - << ", after_filter: " << after_filter << ", handle_range: " << rowkey_range.toDebugString() - << ", read_packs: " << ((!read_packs) ? 0 : read_packs->size()) - << ", pack_count: " << pack_count); + "RSFilter exclude rate is nan, after_pk: " << after_pk << ", after_read_packs: " << after_read_packs << ", after_filter: " + << after_filter << ", handle_range: " << rowkey_range.toDebugString() + << ", read_packs: " << ((!read_packs) ? 0 : read_packs->size()) + << ", pack_count: " << pack_count); + + if (isnan(filter_rate)) + LOG_DEBUG(log, "RSFilter exclude rate: nan"); + else + LOG_DEBUG(log, "RSFilter exclude rate: " << DB::toString(filter_rate, 2)); } friend class DMFileReader; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index c2c874043b0..4af6e0093eb 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -8,30 +8,33 @@ namespace DB namespace DM { -DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, - const ColumnDefines & write_columns_, - const FileProviderPtr & file_provider_, - const RateLimiterPtr & rate_limiter_, - const DMFileWriter::Options & options_) +DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, + const ColumnDefines & write_columns_, + size_t min_compress_block_size_, + size_t max_compress_block_size_, + const CompressionSettings & compression_settings_, + const FileProviderPtr & file_provider_, + const RateLimiterPtr & rate_limiter_) : dmfile(dmfile_), write_columns(write_columns_), - options(options_, dmfile), + min_compress_block_size(min_compress_block_size_), + max_compress_block_size(max_compress_block_size_), + compression_settings(compression_settings_), + single_file_mode(dmfile->isSingleFileMode()), // assume pack_stat_file is the first file created inside DMFile // it will create encryption info for the whole DMFile - pack_stat_file((options.flags.isSingleFile()) // - ? nullptr - : createWriteBufferFromFileBaseByFileProvider(file_provider_, - dmfile->packStatPath(), - dmfile->encryptionPackStatPath(), - true, - rate_limiter_, - 0, - 0, - options.max_compress_block_size)), - single_file_stream((!options.flags.isSingleFile()) - ? nullptr - : new SingleFileStream( - dmfile_, options.compression_settings, options.max_compress_block_size, file_provider_, rate_limiter_)), + pack_stat_file(single_file_mode ? nullptr + : createWriteBufferFromFileBaseByFileProvider(file_provider_, + dmfile->packStatPath(), + dmfile->encryptionPackStatPath(), + true, + rate_limiter_, + 0, + 0, + max_compress_block_size)), + single_file_stream(!single_file_mode ? nullptr + : new SingleFileStream( + dmfile_, compression_settings_, max_compress_block_size_, file_provider_, rate_limiter_)), file_provider(file_provider_), rate_limiter(rate_limiter_) { @@ -43,7 +46,7 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, /// for handle column always generate index bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || cd.type->isInteger() || cd.type->isDateOrDateTime(); - if (options.flags.isSingleFile()) + if (single_file_mode) { if (do_index) { @@ -73,8 +76,8 @@ void DMFileWriter::addStreams(ColId col_id, DataTypePtr type, bool do_index) auto stream = std::make_unique(dmfile, // stream_name, type, - options.compression_settings, - options.max_compress_block_size, + compression_settings, + max_compress_block_size, file_provider, rate_limiter, IDataType::isNullMap(substream_path) ? false : do_index); @@ -107,7 +110,7 @@ void DMFileWriter::write(const Block & block, size_t not_clean_rows) stat.first_tag = (UInt8)(col->get64(0)); } - if (!options.flags.isSingleFile()) + if (!single_file_mode) { writePODBinary(stat, *pack_stat_file); } @@ -117,7 +120,7 @@ void DMFileWriter::write(const Block & block, size_t not_clean_rows) void DMFileWriter::finalize() { - if (!options.flags.isSingleFile()) + if (!single_file_mode) { pack_stat_file->sync(); } @@ -127,7 +130,7 @@ void DMFileWriter::finalize() finalizeColumn(cd.id, cd.type); } - if (options.flags.isSingleFile()) + if (single_file_mode) { dmfile->finalizeForSingleFileMode(single_file_stream->plain_hashing); single_file_stream->flush(); @@ -142,7 +145,7 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu { size_t rows = column.size(); - if (options.flags.isSingleFile()) + if (single_file_mode) { auto callback = [&](const IDataType::SubstreamPath & substream) { size_t offset_in_compressed_file = single_file_stream->plain_hashing.count(); @@ -215,7 +218,7 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu stream->minmaxes->addPack(column, del_mark); /// There could already be enough data to compress into the new block. - if (stream->original_hashing.offset() >= options.min_compress_block_size) + if (stream->original_hashing.offset() >= min_compress_block_size) stream->original_hashing.next(); auto offset_in_compressed_block = stream->original_hashing.offset(); @@ -253,7 +256,7 @@ void DMFileWriter::finalizeColumn(ColId col_id, DataTypePtr type) { size_t bytes_written = 0; - if (options.flags.isSingleFile()) + if (single_file_mode) { auto callback = [&](const IDataType::SubstreamPath & substream) { const auto stream_name = DMFile::getFileNameBase(col_id, substream); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h index e4be0b60de2..747614d7b11 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.h @@ -129,62 +129,18 @@ class DMFileWriter }; using SingleFileStreamPtr = std::shared_ptr; - struct Flags - { - private: - static constexpr size_t IS_SINGLE_FILE = 0x01; - static constexpr size_t NEED_RATE_LIMIT = 0x02; - - size_t value; - - public: - Flags() : value(0x0) {} - - inline void setSingleFile(bool v) { value = (v ? (value | IS_SINGLE_FILE) : (value & ~IS_SINGLE_FILE)); } - inline bool isSingleFile() const { return (value & IS_SINGLE_FILE); } - inline void setRateLimit(bool v) { value = (v ? (value | NEED_RATE_LIMIT) : (value & ~NEED_RATE_LIMIT)); } - inline bool needRateLimit() const { return (value & NEED_RATE_LIMIT); } - }; - - struct Options - { - CompressionSettings compression_settings; - size_t min_compress_block_size; - size_t max_compress_block_size; - Flags flags; - - Options() = default; - - Options(CompressionSettings compression_settings_, size_t min_compress_block_size_, size_t max_compress_block_size_, Flags flags_) - : compression_settings(compression_settings_), - min_compress_block_size(min_compress_block_size_), - max_compress_block_size(max_compress_block_size_), - flags(flags_) - { - } - - Options(const Options & from, const DMFilePtr & file) - : compression_settings(from.compression_settings), - min_compress_block_size(from.min_compress_block_size), - max_compress_block_size(from.max_compress_block_size), - flags(from.flags) - { - flags.setSingleFile(file->isSingleFileMode()); - } - }; - public: - DMFileWriter(const DMFilePtr & dmfile_, - const ColumnDefines & write_columns_, - const FileProviderPtr & file_provider_, - const RateLimiterPtr & rate_limiter_, - const Options & options_); + DMFileWriter(const DMFilePtr & dmfile_, + const ColumnDefines & write_columns_, + size_t min_compress_block_size_, + size_t max_compress_block_size_, + const CompressionSettings & compression_settings_, + const FileProviderPtr & file_provider_, + const RateLimiterPtr & rate_limiter_); void write(const Block & block, size_t not_clean_rows); void finalize(); - const DMFilePtr getFile() const { return dmfile; } - private: void finalizeColumn(ColId col_id, DataTypePtr type); void writeColumn(ColId col_id, const IDataType & type, const IColumn & column, const ColumnVector * del_mark); @@ -195,9 +151,12 @@ class DMFileWriter void addStreams(ColId col_id, DataTypePtr type, bool do_index); private: - DMFilePtr dmfile; - ColumnDefines write_columns; - Options options; + DMFilePtr dmfile; + ColumnDefines write_columns; + size_t min_compress_block_size; + size_t max_compress_block_size; + CompressionSettings compression_settings; + const bool single_file_mode; ColumnStreams column_streams; diff --git a/dbms/src/Storages/DeltaMerge/PKSquashingBlockInputStream.h b/dbms/src/Storages/DeltaMerge/ReorganizeBlockInputStream.h similarity index 60% rename from dbms/src/Storages/DeltaMerge/PKSquashingBlockInputStream.h rename to dbms/src/Storages/DeltaMerge/ReorganizeBlockInputStream.h index 5e8964e4577..3a1962beed6 100644 --- a/dbms/src/Storages/DeltaMerge/PKSquashingBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/ReorganizeBlockInputStream.h @@ -1,53 +1,38 @@ #pragma once #include -#include #include -#include namespace DB { namespace DM { -/// Reorganize the boundary of blocks. The rows with the same primary key(s) will be squashed -/// into the same output block. The output blocks are sorted by increasing pk && version. -/// Note that the `child` must be a sorted input stream with increasing pk && version. If you are -/// not sure the child stream is sorted with increasing pk && version, set `need_extra_sort` to -/// be `true`. -template -class PKSquashingBlockInputStream final : public IBlockInputStream +/// Reorganize the boundary of blocks. +/// Note that child must be a sorted input stream with increasing pk column. +class ReorganizeBlockInputStream final : public IBlockInputStream { public: - PKSquashingBlockInputStream(BlockInputStreamPtr child, ColId pk_column_id_, bool is_common_handle_) - : sorted_input_stream(child), - pk_column_id(pk_column_id_), - is_common_handle(is_common_handle_) + ReorganizeBlockInputStream(BlockInputStreamPtr child, String pk_column_name_) + : sorted_input_stream(std::move(child)), pk_column_name(std::move(pk_column_name_)) { assert(sorted_input_stream != nullptr); cur_block = {}; - children.push_back(child); + if constexpr (DM_RUN_CHECK) + { + // Sanity check for existence of pk column + Block header = sorted_input_stream->getHeader(); + if (!header.has(pk_column_name)) + { + throw Exception("Try to write block to Pack without pk column", ErrorCodes::LOGICAL_ERROR); + } + is_common_handle = !header.getByName(pk_column_name).type->isInteger(); + } } - String getName() const override { return "PKSquashing"; } + String getName() const override { return "ReorganizeBlockBoundary"; } Block getHeader() const override { return sorted_input_stream->getHeader(); } - void readPrefix() override - { - forEachChild([](IBlockInputStream & child) { - child.readPrefix(); - return false; - }); - } - - void readSuffix() override - { - forEachChild([](IBlockInputStream & child) { - child.readSuffix(); - return false; - }); - } - Block read() override { if (first_read) @@ -58,24 +43,16 @@ class PKSquashingBlockInputStream final : public IBlockInputStream cur_block = next_block; if (!cur_block) - return finializeBlock(std::move(cur_block)); + return cur_block; while (true) { next_block = DB::DM::readNextBlock(sorted_input_stream); -#ifndef NDEBUG - if (next_block && !isSameSchema(cur_block, next_block)) - { - throw Exception("schema not match! [cur_block=" + cur_block.dumpStructure() + "] [next_block=" + next_block.dumpStructure() - + "]"); - } -#endif - - const size_t cut_offset = findCutOffsetInNextBlock(cur_block, next_block, pk_column_id, is_common_handle); + const size_t cut_offset = findCutOffsetInNextBlock(cur_block, next_block, pk_column_name, is_common_handle); if (unlikely(cut_offset == 0)) // There is no pk overlap between `cur_block` and `next_block`, or `next_block` is empty, just return `cur_block`. - return finializeBlock(std::move(cur_block)); + return cur_block; else { const size_t next_block_nrows = next_block.rows(); @@ -100,7 +77,7 @@ class PKSquashingBlockInputStream final : public IBlockInputStream if (cut_offset != next_block_nrows) { // We merge some rows to `cur_block`, return it. - return finializeBlock(std::move(cur_block)); + return cur_block; } // else we merge all rows from `next_block` to `cur_block`, continue to check if we should merge more blocks. } @@ -109,16 +86,16 @@ class PKSquashingBlockInputStream final : public IBlockInputStream private: static size_t - findCutOffsetInNextBlock(const Block & cur_block, const Block & next_block, const ColId pk_column_id, bool is_common_handle) + findCutOffsetInNextBlock(const Block & cur_block, const Block & next_block, const String & pk_column_name, bool is_common_handle) { assert(cur_block); if (!next_block) return 0; - auto cur_col = getByColumnId(cur_block, pk_column_id).column; + auto cur_col = cur_block.getByName(pk_column_name).column; RowKeyColumnContainer cur_rowkey_column(cur_col, is_common_handle); const auto last_curr_pk = cur_rowkey_column.getRowKeyValue(cur_col->size() - 1); - auto next_col = getByColumnId(next_block, pk_column_id).column; + auto next_col = next_block.getByName(pk_column_name).column; RowKeyColumnContainer next_rowkey_column(next_col, is_common_handle); size_t cut_offset = 0; for (/* */; cut_offset < next_col->size(); ++cut_offset) @@ -139,28 +116,15 @@ class PKSquashingBlockInputStream final : public IBlockInputStream return cut_offset; } - static Block finializeBlock(Block && block) - { - if constexpr (need_extra_sort) - { - // Sort by handle & version in ascending order. - static SortDescription sort{SortColumnDescription{EXTRA_HANDLE_COLUMN_NAME, 1, 0}, - SortColumnDescription{VERSION_COLUMN_NAME, 1, 0}}; - if (block.rows() > 1 && !isAlreadySorted(block, sort)) - stableSortBlock(block, sort); - } - return std::move(block); - } - private: BlockInputStreamPtr sorted_input_stream; - const ColId pk_column_id; + const String pk_column_name; + bool is_common_handle; Block cur_block; Block next_block; - bool first_read = true; - const bool is_common_handle; + bool first_read = true; }; } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp deleted file mode 100644 index 7e07de53e39..00000000000 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.cpp +++ /dev/null @@ -1,247 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int ILLFORMAT_RAFT_ROW; -} // namespace ErrorCodes - -Block GenRegionBlockDatawithSchema( // - const RegionPtr &, - const std::shared_ptr &, - const DM::ColumnDefinesPtr &, - Timestamp, - bool, - TMTContext &); - -namespace DM -{ - -SSTFilesToBlockInputStream::SSTFilesToBlockInputStream( // - RegionPtr region_, - const SSTViewVec & snaps_, - const TiFlashRaftProxyHelper * proxy_helper_, - SSTFilesToBlockInputStream::StorageDeltaMergePtr ingest_storage_, - DM::ColumnDefinesPtr schema_snap_, - Timestamp gc_safepoint_, - bool force_decode_, - TMTContext & tmt_, - size_t expected_size_) - : region(std::move(region_)), - snaps(snaps_), - proxy_helper(proxy_helper_), - ingest_storage(std::move(ingest_storage_)), - schema_snap(std::move(schema_snap_)), - tmt(tmt_), - gc_safepoint(gc_safepoint_), - expected_size(expected_size_), - log(&Poco::Logger::get("SSTFilesToBlockInputStream")), - force_decode(force_decode_) -{ -} - -SSTFilesToBlockInputStream::~SSTFilesToBlockInputStream() = default; - -void SSTFilesToBlockInputStream::readPrefix() -{ - for (UInt64 i = 0; i < snaps.len; ++i) - { - auto & snapshot = snaps.views[i]; - switch (snapshot.type) - { - case ColumnFamilyType::Default: - default_cf_reader = std::make_unique(proxy_helper, snapshot); - break; - case ColumnFamilyType::Write: - write_cf_reader = std::make_unique(proxy_helper, snapshot); - break; - case ColumnFamilyType::Lock: - lock_cf_reader = std::make_unique(proxy_helper, snapshot); - break; - } - } - - process_keys.default_cf = 0; - process_keys.write_cf = 0; - process_keys.lock_cf = 0; -} - -void SSTFilesToBlockInputStream::readSuffix() -{ - // There must be no data left when we write suffix - assert(!write_cf_reader || !write_cf_reader->remained()); - assert(!default_cf_reader || !default_cf_reader->remained()); - assert(!lock_cf_reader || !lock_cf_reader->remained()); - - // reset all SSTReaders and return without writting blocks any more. - write_cf_reader.reset(); - default_cf_reader.reset(); - lock_cf_reader.reset(); -} - -Block SSTFilesToBlockInputStream::read() -{ - while (write_cf_reader && write_cf_reader->remained()) - { - // Read a key-value from write CF and continue to read key-value until - // the key that we read from write CF. - // All SST files store key-value in a sorted way so that we are able to - // scan committed rows in `region`. - auto key = write_cf_reader->key(); - auto value = write_cf_reader->value(); - region->insert(ColumnFamilyType::Write, TiKVKey(key.data, key.len), TiKVValue(value.data, value.len)); - ++process_keys.write_cf; - write_cf_reader->next(); - - if (process_keys.write_cf % expected_size == 0) - { - auto key_view = std::string_view{key.data, key.len}; - // Batch the scan from other CFs until we need to decode data - scanCF(ColumnFamilyType::Default, /*until*/ key_view); - scanCF(ColumnFamilyType::Lock, /*until*/ key_view); - - auto block = readCommitedBlock(); - if (block.rows() != 0) - return block; - // else continue to decode key-value from write CF. - } - } - // Scan all key-value pairs from other CFs - scanCF(ColumnFamilyType::Default); - scanCF(ColumnFamilyType::Lock); - - // All uncommitted data are saved in `region`, decode the last committed rows. - return readCommitedBlock(); -} - -void SSTFilesToBlockInputStream::scanCF(ColumnFamilyType cf, const std::string_view until) -{ - SSTReader * reader; - if (cf == ColumnFamilyType::Default) - reader = default_cf_reader.get(); - else if (cf == ColumnFamilyType::Lock) - reader = lock_cf_reader.get(); - else - throw Exception("Should not happen!"); - - size_t num_process_keys = 0; - while (reader && reader->remained()) - { - auto key = reader->key(); - if (until.data() == nullptr || memcmp(until.data(), key.data, std::min(until.size(), key.len)) >= 0) - { - auto value = reader->value(); - region->insert(cf, TiKVKey(key.data, key.len), TiKVValue(value.data, value.len)); - ++num_process_keys; - reader->next(); - } - else - break; - } - - if (cf == ColumnFamilyType::Default) - process_keys.default_cf += num_process_keys; - else if (cf == ColumnFamilyType::Lock) - process_keys.lock_cf += num_process_keys; -} - -Block SSTFilesToBlockInputStream::readCommitedBlock() -{ - if (is_decode_cancelled) - return {}; - - try - { - // Read block from `region`. If the schema has been updated, it will - // throw an exception with code `ErrorCodes::REGION_DATA_SCHEMA_UPDATED` - return GenRegionBlockDatawithSchema(region, ingest_storage, schema_snap, gc_safepoint, force_decode, tmt); - } - catch (DB::Exception & e) - { - if (e.code() == ErrorCodes::ILLFORMAT_RAFT_ROW) - { - // br or lighting may write illegal data into tikv, stop decoding. - LOG_WARNING(log, - "Got error while reading region committed cache: " - << e.displayText() << ". Stop decoding rows into DTFiles and keep uncommitted data in region."); - // Cancel the decoding process. - // Note that we still need to scan data from CFs and keep them in `region` - is_decode_cancelled = true; - return {}; - } - else - throw; - } -} - -/// Methods for BoundedSSTFilesToBlockInputStream - -BoundedSSTFilesToBlockInputStream::BoundedSSTFilesToBlockInputStream( // - SSTFilesToBlockInputStreamPtr child, - const ColId pk_column_id_, - const bool is_common_handle_) - : pk_column_id(pk_column_id_), is_common_handle(is_common_handle_), _raw_child(std::move(child)) -{ - // Initlize `mvcc_compact_stream` - // First refine the boundary of blocks. Note that the rows decoded from SSTFiles are sorted by primary key asc, timestamp desc - // (https://github.com/tikv/tikv/blob/v5.0.1/components/txn_types/src/types.rs#L103-L108). - // While DMVersionFilter require rows sorted by primary key asc, timestamp asc, so we need an extra sort in PKSquashing. - auto stream = std::make_shared>(_raw_child, pk_column_id, is_common_handle); - mvcc_compact_stream = std::make_unique>( - stream, *(_raw_child->schema_snap), _raw_child->gc_safepoint, is_common_handle); -} - -void BoundedSSTFilesToBlockInputStream::readPrefix() -{ - mvcc_compact_stream->readPrefix(); -} - -void BoundedSSTFilesToBlockInputStream::readSuffix() -{ - mvcc_compact_stream->readSuffix(); -} - -Block BoundedSSTFilesToBlockInputStream::read() -{ - return mvcc_compact_stream->read(); -} - -std::tuple, DM::ColumnDefinesPtr> BoundedSSTFilesToBlockInputStream::ingestingInfo() const -{ - return std::make_tuple(_raw_child->ingest_storage, _raw_child->schema_snap); -} - -SSTFilesToBlockInputStream::ProcessKeys BoundedSSTFilesToBlockInputStream::getProcessKeys() const -{ - return _raw_child->process_keys; -} - -const RegionPtr BoundedSSTFilesToBlockInputStream::getRegion() const -{ - return _raw_child->region; -} - -size_t BoundedSSTFilesToBlockInputStream::getMvccStatistics() const -{ - return mvcc_compact_stream->getNotCleanRows(); -} - -} // namespace DM -} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h b/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h deleted file mode 100644 index 210896dade1..00000000000 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToBlockInputStream.h +++ /dev/null @@ -1,137 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include - -namespace Poco -{ -class Logger; -} - -namespace DB -{ - -class TMTContext; -class Region; -using RegionPtr = std::shared_ptr; - -struct SSTViewVec; -struct TiFlashRaftProxyHelper; -struct SSTReader; -class StorageDeltaMerge; - -namespace DM -{ - -struct ColumnDefine; -using ColumnDefines = std::vector; -using ColumnDefinesPtr = std::shared_ptr; - -// forward declaration -class SSTFilesToBlockInputStream; -using SSTFilesToBlockInputStreamPtr = std::shared_ptr; -class BoundedSSTFilesToBlockInputStream; -using BoundedSSTFilesToBlockInputStreamPtr = std::shared_ptr; - -class SSTFilesToBlockInputStream final : public IBlockInputStream -{ -public: - using StorageDeltaMergePtr = std::shared_ptr; - SSTFilesToBlockInputStream(RegionPtr region_, - const SSTViewVec & snaps_, - const TiFlashRaftProxyHelper * proxy_helper_, - StorageDeltaMergePtr ingest_storage_, - DM::ColumnDefinesPtr schema_snap_, - Timestamp gc_safepoint_, - bool force_decode_, - TMTContext & tmt_, - size_t expected_size_ = DEFAULT_MERGE_BLOCK_SIZE); - ~SSTFilesToBlockInputStream(); - - String getName() const override { return "SSTFilesToBlockInputStream"; } - - Block getHeader() const override { return toEmptyBlock(*schema_snap); } - - void readPrefix() override; - void readSuffix() override; - Block read() override; - -public: - struct ProcessKeys - { - size_t default_cf; - size_t write_cf; - size_t lock_cf; - - inline size_t total() const { return default_cf + write_cf + lock_cf; } - }; - -private: - void scanCF(ColumnFamilyType cf, const std::string_view until = std::string_view{}); - - Block readCommitedBlock(); - -private: - RegionPtr region; - const SSTViewVec & snaps; - const TiFlashRaftProxyHelper * proxy_helper{nullptr}; - const StorageDeltaMergePtr ingest_storage; - const DM::ColumnDefinesPtr schema_snap; - TMTContext & tmt; - const Timestamp gc_safepoint; - size_t expected_size; - Poco::Logger * log; - - using SSTReaderPtr = std::unique_ptr; - SSTReaderPtr write_cf_reader; - SSTReaderPtr default_cf_reader; - SSTReaderPtr lock_cf_reader; - - friend class BoundedSSTFilesToBlockInputStream; - - const bool force_decode; - bool is_decode_cancelled = false; - - ProcessKeys process_keys; -}; - -// Bound the blocks read from SSTFilesToBlockInputStream by column `_tidb_rowid` and -// do some calculation for the `DMFileWriter::BlockProperty` of read blocks. -class BoundedSSTFilesToBlockInputStream final -{ -public: - BoundedSSTFilesToBlockInputStream(SSTFilesToBlockInputStreamPtr child, const ColId pk_column_id_, const bool is_common_handle_); - - String getName() const { return "BoundedSSTFilesToBlockInputStream"; } - - void readPrefix(); - - void readSuffix(); - - Block read(); - - std::tuple, DM::ColumnDefinesPtr> ingestingInfo() const; - - SSTFilesToBlockInputStream::ProcessKeys getProcessKeys() const; - - const RegionPtr getRegion() const; - - // Return values: not clean rows - size_t getMvccStatistics() const; - -private: - const ColId pk_column_id; - const bool is_common_handle; - - // Note that we only keep _raw_child for getting ingest info / process key, etc. All block should be - // read from `mvcc_compact_stream` - const SSTFilesToBlockInputStreamPtr _raw_child; - std::unique_ptr> mvcc_compact_stream; -}; - -} // namespace DM -} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp deleted file mode 100644 index be0c61d6523..00000000000 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ -extern const Event DMWriteBytes; -} - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int ILLFORMAT_RAFT_ROW; -} // namespace ErrorCodes - -namespace DM -{ - -SSTFilesToDTFilesOutputStream::SSTFilesToDTFilesOutputStream( // - BoundedSSTFilesToBlockInputStreamPtr child_, - TiDB::SnapshotApplyMethod method_, - FileConvertJobType job_type_, - TMTContext & tmt_) - : child(child_), // - method(method_), - job_type(job_type_), - tmt(tmt_), - log(&Poco::Logger::get("SSTFilesToDTFilesOutputStream")) -{ -} - -SSTFilesToDTFilesOutputStream::~SSTFilesToDTFilesOutputStream() = default; - -void SSTFilesToDTFilesOutputStream::writePrefix() -{ - child->readPrefix(); - - commit_rows = 0; - watch.start(); -} - -void SSTFilesToDTFilesOutputStream::writeSuffix() -{ - child->readSuffix(); - - if (dt_stream != nullptr) - { - dt_stream->writeSuffix(); - auto dt_file = dt_stream->getFile(); - assert(!dt_file->canGC()); // The DTFile should not be able to gc until it is ingested. - // Add the DTFile to StoragePathPool so that we can restore it later - auto [ingest_storage, _schema_snap] = child->ingestingInfo(); - std::ignore = _schema_snap; - const auto bytes_written = dt_file->getBytesOnDisk(); - ingest_storage->getStore()->preIngestFile(dt_file->parentPath(), dt_file->fileId(), bytes_written); - - // Report DMWriteBytes for calculating write amplification - ProfileEvents::increment(ProfileEvents::DMWriteBytes, bytes_written); - - dt_stream.reset(); - } - - auto & ctx = tmt.getContext(); - auto metrics = ctx.getTiFlashMetrics(); - const auto process_keys = child->getProcessKeys(); - if (job_type == FileConvertJobType::ApplySnapshot) - { - GET_METRIC(metrics, tiflash_raft_command_duration_seconds, type_apply_snapshot_predecode).Observe(watch.elapsedSeconds()); - // Note that number of keys in different cf will be aggregated into one metrics - GET_METRIC(metrics, tiflash_raft_process_keys, type_apply_snapshot).Increment(process_keys.total()); - } - else - { - // Note that number of keys in different cf will be aggregated into one metrics - GET_METRIC(metrics, tiflash_raft_process_keys, type_ingest_sst).Increment(process_keys.total()); - } - LOG_INFO(log, - "Pre-handle snapshot " << child->getRegion()->toString(true) << " to " << ingest_files.size() << " DTFiles, cost " - << watch.elapsedMilliseconds() << "ms [rows=" << commit_rows - << "] [write_cf_keys=" << process_keys.write_cf << "] [default_cf_keys=" << process_keys.default_cf - << "] [lock_cf_keys=" << process_keys.lock_cf << "]"); -} - -bool SSTFilesToDTFilesOutputStream::newDTFileStream() -{ - // Generate a DMFilePtr and its DMFileBlockOutputStream - DMFileBlockOutputStream::Flags flags; - switch (method) - { - case TiDB::SnapshotApplyMethod::DTFile_Directory: - flags.setSingleFile(false); - break; - case TiDB::SnapshotApplyMethod::DTFile_Single: - flags.setSingleFile(true); - break; - default: - break; - } - - // The parent_path and file_id are generated by the storage. - auto [ingest_storage, schema_snap] = child->ingestingInfo(); - auto [parent_path, file_id] = ingest_storage->getStore()->preAllocateIngestFile(); - if (parent_path.empty()) - { - // Can no allocate path and id for storing DTFiles (the storage may be dropped / shutdown) - return false; - } - auto dt_file = DMFile::create(file_id, parent_path, flags.isSingleFile()); - LOG_INFO(log, "Create file for snapshot data [file=" << dt_file->path() << "] [single_file_mode=" << flags.isSingleFile() << "]"); - dt_stream = std::make_unique(tmt.getContext(), dt_file, *schema_snap, flags); - dt_stream->writePrefix(); - ingest_files.emplace_back(dt_file); - return true; -} - -void SSTFilesToDTFilesOutputStream::write() -{ - size_t last_not_clean_rows = 0; - while (true) - { - - Block block = child->read(); - if (!block) - break; - if (unlikely(block.rows() == 0)) - continue; - - if (dt_stream == nullptr) - { - // If can not create DTFile stream (the storage may be dropped / shutdown), - // break the writing loop. - if (bool ok = newDTFileStream(); !ok) - { - break; - } - } - - { - // Check whether rows are sorted by handle & version in ascending order. - SortDescription sort; - sort.emplace_back(MutableSupport::tidb_pk_column_name, 1, 0); - sort.emplace_back(MutableSupport::version_column_name, 1, 0); - if (unlikely(block.rows() > 1 && !isAlreadySorted(block, sort))) - { - const String error_msg - = "The block decoded from SSTFile is not sorted by primary key and version " + child->getRegion()->toString(true); - LOG_ERROR(log, error_msg); - FieldVisitorToString visitor; - const size_t nrows = block.rows(); - for (size_t i = 0; i < nrows; ++i) - { - const auto & pk_col = block.getByName(MutableSupport::tidb_pk_column_name); - const auto & ver_col = block.getByName(MutableSupport::version_column_name); - LOG_ERROR(log, - "[Row=" << i << "/" << nrows << "] [pk=" << applyVisitor(visitor, (*pk_col.column)[i]) - << "] [ver=" << applyVisitor(visitor, (*ver_col.column)[i]) << "]"); - } - throw Exception(error_msg); - } - } - - // Write block to the output stream - auto cur_not_clean_rows = child->getMvccStatistics(); - dt_stream->write(block, cur_not_clean_rows - last_not_clean_rows); - - commit_rows += block.rows(); - last_not_clean_rows = cur_not_clean_rows; - } -} - -PageIds SSTFilesToDTFilesOutputStream::ingestIds() const -{ - PageIds ids; - for (const auto & file : ingest_files) - { - ids.emplace_back(file->fileId()); - } - return ids; -} - -void SSTFilesToDTFilesOutputStream::cancel() -{ - // Try a lightweight cleanup the file generated by this stream (marking them able to be GC-ed). - for (auto & file : ingest_files) - { - try - { - file->enableGC(); - } - catch (...) - { - tryLogCurrentException(log, "ignore exception while canceling SST files to DeltaTree files stream [file=" + file->path() + "]"); - } - } -} - -} // namespace DM -} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h deleted file mode 100644 index be80372cfa0..00000000000 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.h +++ /dev/null @@ -1,91 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include - -namespace Poco -{ -class Logger; -} - -namespace DB -{ - -class TMTContext; -class Region; -using RegionPtr = std::shared_ptr; - -struct SSTViewVec; -struct TiFlashRaftProxyHelper; -struct SSTReader; -class StorageDeltaMerge; - -namespace DM -{ - -struct ColumnDefine; -using ColumnDefines = std::vector; -using ColumnDefinesPtr = std::shared_ptr; - -class DMFile; -using DMFilePtr = std::shared_ptr; -class DMFileBlockOutputStream; - -enum class FileConvertJobType -{ - ApplySnapshot, - IngestSST, -}; - - -// This class is tightly coupling with BoundedSSTFilesToBlockInputStream -// to get some info of the decoding process. -class SSTFilesToDTFilesOutputStream : private boost::noncopyable -{ -public: - using StorageDeltaMergePtr = std::shared_ptr; - SSTFilesToDTFilesOutputStream(BoundedSSTFilesToBlockInputStreamPtr child_, - TiDB::SnapshotApplyMethod method_, - FileConvertJobType job_type_, - TMTContext & tmt_); - ~SSTFilesToDTFilesOutputStream(); - - void writePrefix(); - void writeSuffix(); - void write(); - - PageIds ingestIds() const; - - // Try to cleanup the files in `ingest_files` quickly. - void cancel(); - -private: - - bool newDTFileStream(); - - // Stop the process for decoding committed data into DTFiles - void stop(); - -private: - BoundedSSTFilesToBlockInputStreamPtr child; - const TiDB::SnapshotApplyMethod method; - const FileConvertJobType job_type; - TMTContext & tmt; - Poco::Logger * log; - - std::unique_ptr dt_stream; - - std::vector ingest_files; - - size_t schema_sync_trigger_count = 0; - size_t commit_rows = 0; - Stopwatch watch; -}; - -} // namespace DM -} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 90c164a96f1..13e0ed98baa 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -74,15 +74,15 @@ namespace DM const static size_t SEGMENT_BUFFER_SIZE = 128; // More than enough. -DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // - const ColumnDefinesPtr & schema_snap, - const BlockInputStreamPtr & input_stream, - UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags) +DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // + const ColumnDefinesPtr & schema_snap, + const BlockInputStreamPtr & input_stream, + UInt64 file_id, + const String & parent_path, + bool need_rate_limit) { - auto dmfile = DMFile::create(file_id, parent_path, flags.isSingleFile()); - auto output_stream = std::make_shared(dm_context.db_context, dmfile, *schema_snap, flags); + auto dmfile = DMFile::create(file_id, parent_path, dm_context.db_context.getSettingsRef().dt_enable_single_file_mode_dmfile); + auto output_stream = std::make_shared(dm_context.db_context, dmfile, *schema_snap, need_rate_limit); auto * mvcc_stream = typeid_cast *>(input_stream.get()); input_stream->readPrefix(); @@ -122,12 +122,8 @@ StableValueSpacePtr createNewStable(DMContext & context, auto delegate = context.path_pool.getStableDiskDelegator(); auto store_path = delegate.choosePath(); - DMFileBlockOutputStream::Flags flags; - flags.setRateLimit(need_rate_limit); - flags.setSingleFile(context.db_context.getSettingsRef().dt_enable_single_file_mode_dmfile); - PageId dmfile_id = context.storage_pool.newDataPageId(); - auto dmfile = writeIntoNewDMFile(context, schema_snap, input_stream, dmfile_id, store_path, flags); + auto dmfile = writeIntoNewDMFile(context, schema_snap, input_stream, dmfile_id, store_path, need_rate_limit); auto stable = std::make_shared(stable_id); stable->setFiles({dmfile}, RowKeyRange::newAll(context.is_common_handle, context.rowkey_column_size)); stable->saveMeta(wbs.meta); @@ -213,14 +209,16 @@ SegmentPtr Segment::restoreSegment(DMContext & context, PageId segment_id) switch (version) { - case SegmentFormat::V1: { + case SegmentFormat::V1: + { HandleRange range; readIntBinary(range.start, buf); readIntBinary(range.end, buf); rowkey_range = RowKeyRange::fromHandleRange(range); break; } - case SegmentFormat::V2: { + case SegmentFormat::V2: + { rowkey_range = RowKeyRange::deserialize(buf); break; } @@ -299,12 +297,12 @@ bool Segment::write(DMContext & dm_context, const RowKeyRange & delete_range) return delta->appendDeleteRange(dm_context, delete_range); } -bool Segment::ingestPacks(DMContext & dm_context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range) +bool Segment::writeRegionSnapshot(DMContext & dm_context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range) { auto new_range = range.shrink(rowkey_range); LOG_TRACE(log, "Segment [" << segment_id << "] write region snapshot: " << new_range.toDebugString()); - return delta->ingestPacks(dm_context, range, packs, clear_data_in_range); + return delta->appendRegionSnapshot(dm_context, range, packs, clear_data_in_range); } SegmentSnapshotPtr Segment::createSnapshot(const DMContext & dm_context, bool for_update) const @@ -441,7 +439,7 @@ BlockInputStreamPtr Segment::getInputStreamForDataExport(const DMContext & data_stream = std::make_shared>(data_stream, data_range, 0); if (reorgnize_block) { - data_stream = std::make_shared>(data_stream, EXTRA_HANDLE_COLUMN_ID, is_common_handle); + data_stream = std::make_shared(data_stream, EXTRA_HANDLE_COLUMN_NAME); } data_stream = std::make_shared>( data_stream, *read_info.read_columns, dm_context.min_version, is_common_handle); @@ -898,7 +896,7 @@ std::optional Segment::prepareSplitPhysical(DMContext & my_data = std::make_shared>(my_data, my_range, 0); - my_data = std::make_shared>(my_data, EXTRA_HANDLE_COLUMN_ID, is_common_handle); + my_data = std::make_shared(my_data, EXTRA_HANDLE_COLUMN_NAME); my_data = std::make_shared>( my_data, *read_info.read_columns, dm_context.min_version, is_common_handle); auto my_stable_id = segment_snap->stable->getId(); @@ -925,7 +923,7 @@ std::optional Segment::prepareSplitPhysical(DMContext & other_data = std::make_shared>(other_data, other_range, 0); - other_data = std::make_shared>(other_data, EXTRA_HANDLE_COLUMN_ID, is_common_handle); + other_data = std::make_shared(other_data, EXTRA_HANDLE_COLUMN_NAME); other_data = std::make_shared>( other_data, *read_info.read_columns, dm_context.min_version, is_common_handle); auto other_stable_id = dm_context.storage_pool.newMetaPageId(); @@ -1055,7 +1053,7 @@ StableValueSpacePtr Segment::prepareMerge(DMContext & dm_context, dm_context.stable_pack_rows); stream = std::make_shared>(stream, segment->rowkey_range, 0); - stream = std::make_shared>(stream, EXTRA_HANDLE_COLUMN_ID, dm_context.is_common_handle); + stream = std::make_shared(stream, EXTRA_HANDLE_COLUMN_NAME); stream = std::make_shared>( stream, *read_info.read_columns, dm_context.min_version, dm_context.is_common_handle); @@ -1438,7 +1436,7 @@ bool Segment::placeDelete(const DMContext & dm_context, { RowKeyValueRef first_rowkey = RowKeyColumnContainer(block.getByPosition(0).column, is_common_handle).getRowKeyValue(0); auto place_handle_range = skippable_place ? RowKeyRange::startFrom(first_rowkey, is_common_handle, rowkey_column_size) - : RowKeyRange::newAll(is_common_handle, rowkey_column_size); + : RowKeyRange::newAll(is_common_handle, rowkey_column_size); auto compacted_index = update_delta_tree.getCompactedEntries(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 7085173fe6e..7dba2eff5b4 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -118,7 +118,7 @@ class Segment : private boost::noncopyable bool writeToCache(DMContext & dm_context, const Block & block, size_t offset, size_t limit); bool write(DMContext & dm_context, const Block & block); // For test only bool write(DMContext & dm_context, const RowKeyRange & delete_range); - bool ingestPacks(DMContext & dm_context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range); + bool writeRegionSnapshot(DMContext & dm_context, const RowKeyRange & range, const DeltaPacks & packs, bool clear_data_in_range); SegmentSnapshotPtr createSnapshot(const DMContext & dm_context, bool for_update = false) const; @@ -326,5 +326,12 @@ class Segment : private boost::noncopyable Logger * log; }; +DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // + const ColumnDefinesPtr & schema_snap, + const BlockInputStreamPtr & input_stream, + UInt64 file_id, + const String & parent_path, + bool need_rate_limit); + } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index 723552519d1..6b7d3a81de0 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -59,7 +59,7 @@ class StoragePool : private boost::noncopyable std::mutex mutex; }; -struct StorageSnapshot : private boost::noncopyable +struct StorageSnapshot { StorageSnapshot(StoragePool & storage, bool snapshot_read = true) : log_reader(storage.log(), snapshot_read ? storage.log().getSnapshot() : nullptr), diff --git a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h index 5ea518b0454..96a85470807 100644 --- a/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h +++ b/dbms/src/Storages/DeltaMerge/tests/dm_basic_include.h @@ -111,7 +111,14 @@ class DMTestEnv } else { - field = reversed ? Int64(end - 1 - i) : Int64(beg + i); + if (!reversed) + { + field = Int64(beg + i); + } + else + { + field = Int64(end - 1 - i); + } } m_col->insert(field); } @@ -151,17 +158,16 @@ class DMTestEnv * Create a simple block with 3 columns: * * `pk` - Int64 / `version` / `tag` * @param pk `pk`'s value - * @param ts_beg `timestamp`'s value begin - * @param ts_end `timestamp`'s value end (not included) - * @param reversed increasing/decreasing insert `timestamp`'s value + * @param tso_beg `tso`'s value begin + * @param tso_end `tso`'s value end (not included) * @return */ - static Block prepareBlockWithTso(Int64 pk, size_t ts_beg, size_t ts_end, bool reversed = false) + static Block prepareBlockWithIncreasingTso(Int64 pk, size_t tso_beg, size_t tso_end) { Block block; - const size_t num_rows = (ts_end - ts_beg); + const size_t num_rows = (tso_end - tso_beg); { - ColumnWithTypeAndName col1(nullptr, std::make_shared(), pk_name, EXTRA_HANDLE_COLUMN_ID); + ColumnWithTypeAndName col1(std::make_shared(), pk_name); { IColumn::MutablePtr m_col = col1.type->createColumn(); // insert form large to small @@ -174,19 +180,19 @@ class DMTestEnv } block.insert(col1); - ColumnWithTypeAndName version_col(nullptr, VERSION_COLUMN_TYPE, VERSION_COLUMN_NAME, VERSION_COLUMN_ID); + ColumnWithTypeAndName version_col(VERSION_COLUMN_TYPE, VERSION_COLUMN_NAME); { IColumn::MutablePtr m_col = version_col.type->createColumn(); for (size_t i = 0; i < num_rows; ++i) { - Field field = reversed ? Int64(ts_end - 1 - i) : Int64(ts_beg + i); + Field field = Int64(tso_beg + i); m_col->insert(field); } version_col.column = std::move(m_col); } block.insert(version_col); - ColumnWithTypeAndName tag_col(nullptr, TAG_COLUMN_TYPE, TAG_COLUMN_NAME, TAG_COLUMN_ID); + ColumnWithTypeAndName tag_col(TAG_COLUMN_TYPE, TAG_COLUMN_NAME); { IColumn::MutablePtr m_col = tag_col.type->createColumn(); auto & column_data = typeid_cast &>(*m_col).getData(); @@ -210,10 +216,7 @@ class DMTestEnv Block block; const size_t num_rows = 1; { - ColumnWithTypeAndName col1(nullptr, - is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, - pk_name, - EXTRA_HANDLE_COLUMN_ID); + ColumnWithTypeAndName col1(is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, pk_name); { IColumn::MutablePtr m_col = col1.type->createColumn(); // insert form large to small @@ -235,7 +238,7 @@ class DMTestEnv } block.insert(col1); - ColumnWithTypeAndName version_col(nullptr, VERSION_COLUMN_TYPE, VERSION_COLUMN_NAME, VERSION_COLUMN_ID); + ColumnWithTypeAndName version_col(VERSION_COLUMN_TYPE, VERSION_COLUMN_NAME); { IColumn::MutablePtr m_col = version_col.type->createColumn(); m_col->insert(tso); @@ -243,7 +246,7 @@ class DMTestEnv } block.insert(version_col); - ColumnWithTypeAndName tag_col(nullptr, TAG_COLUMN_TYPE, TAG_COLUMN_NAME, TAG_COLUMN_ID); + ColumnWithTypeAndName tag_col(TAG_COLUMN_TYPE, TAG_COLUMN_NAME); { IColumn::MutablePtr m_col = tag_col.type->createColumn(); auto & column_data = typeid_cast &>(*m_col).getData(); @@ -253,7 +256,7 @@ class DMTestEnv } block.insert(tag_col); - ColumnWithTypeAndName str_col(nullptr, DataTypeFactory::instance().get("String"), colname); + ColumnWithTypeAndName str_col(DataTypeFactory::instance().get("String"), colname); { IColumn::MutablePtr m_col = str_col.type->createColumn(); m_col->insert(value); @@ -327,12 +330,6 @@ class DMTestEnv } return block; } - - static int getPseudoRandomNumber() - { - static int num = 0; - return num++; - } }; } // namespace tests diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp deleted file mode 100644 index 3169aa8826b..00000000000 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ -namespace DM -{ -namespace tests -{ - -TEST(PKSquash_test, WithExtraSort) -{ - BlocksList blocks; - - size_t rows_per_block = 10; - size_t num_rows_write = 0; - { - // pk asc, ts desc - blocks.push_back(DMTestEnv::prepareBlockWithTso(4, 10000 + rows_per_block * 2, 10000 + rows_per_block * 3, true)); - num_rows_write += blocks.back().rows(); - blocks.push_back(DMTestEnv::prepareBlockWithTso(4, 10000 + rows_per_block, 10000 + rows_per_block * 2, true)); - num_rows_write += blocks.back().rows(); - blocks.push_back(DMTestEnv::prepareBlockWithTso(4, 10000, 10000 + rows_per_block, true)); - num_rows_write += blocks.back().rows(); - - { - Block mix_pks_block = DMTestEnv::prepareBlockWithTso(5, 10000, 10000 + rows_per_block, true); - Block b2 = DMTestEnv::prepareBlockWithTso(6, 10000 + rows_per_block, 10000 + rows_per_block * 2, true); - concat(mix_pks_block, b2); - blocks.push_back(mix_pks_block); - num_rows_write += blocks.back().rows(); - } - { - Block mix_pks_block = DMTestEnv::prepareBlockWithTso(6, 10000, 10000 + rows_per_block, true); - Block b2 = DMTestEnv::prepareBlockWithTso(7, 10000 + rows_per_block, 10000 + rows_per_block * 2, true); - concat(mix_pks_block, b2); - blocks.push_back(mix_pks_block); - num_rows_write += blocks.back().rows(); - } - blocks.push_back(DMTestEnv::prepareBlockWithTso(7, 10000, 10000 + rows_per_block, true)); - num_rows_write += blocks.back().rows(); - } - - // Sorted by pk, tso asc - SortDescription sort // - = SortDescription{// - SortColumnDescription{EXTRA_HANDLE_COLUMN_NAME, 1, 0}, - SortColumnDescription{VERSION_COLUMN_NAME, 1, 0}}; - - { - auto in = std::make_shared>( - std::make_shared(blocks.begin(), blocks.end()), TiDBPkColumnID, false); - size_t num_blocks_read = 0; - size_t num_rows_read = 0; - in->readPrefix(); - Block block; - while (true) - { - block = in->read(); - if (!block) - break; - - num_blocks_read += 1; - if (num_blocks_read == 1) - { - // for pk == 4 - EXPECT_EQ(block.rows(), rows_per_block * 3); - } - else if (num_blocks_read == 2) - { - // for pk in (5, 6) - EXPECT_EQ(block.rows(), rows_per_block * 3); - } - else - { - // for pk == 7 - EXPECT_EQ(block.rows(), rows_per_block * 2); - } - num_rows_read += block.rows(); - // Should be sorted - ASSERT_TRUE(isAlreadySorted(block, sort)); - } - in->readSuffix(); - ASSERT_EQ(num_blocks_read, 3); - ASSERT_EQ(num_rows_read, num_rows_write); - } -} - -} // namespace tests -} // namespace DM -} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp index 5cd1f63e413..1b3c26f4642 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 371da5ee020..4f1b4f5f541 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -4,15 +4,12 @@ #include #include #include - -#define private public -#include -#undef private #include -#include +#include #include #include #include +#include #include @@ -31,12 +28,6 @@ extern const char force_triggle_foreground_flush[]; namespace DM { -extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // - const ColumnDefinesPtr & schema_snap, - const BlockInputStreamPtr & input_stream, - UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); namespace tests { @@ -120,17 +111,14 @@ class DeltaMergeStore_test : public ::testing::Test, public testing::WithParamIn std::pair> genDMFile(DMContext & context, const Block & block) { - auto input_stream = std::make_shared(block); - auto [store_path, file_id] = store->preAllocateIngestFile(); - - DMFileBlockOutputStream::Flags flags; - flags.setSingleFile(DMTestEnv::getPseudoRandomNumber() % 2); - - auto dmfile = writeIntoNewDMFile( - context, std::make_shared(store->getTableColumns()), input_stream, file_id, store_path, flags); - + auto file_id = context.storage_pool.newDataPageId(); + auto input_stream = std::make_shared(block); + auto delegate = context.path_pool.getStableDiskDelegator(); + auto store_path = delegate.choosePath(); + auto dmfile = writeIntoNewDMFile( + context, std::make_shared(store->getTableColumns()), input_stream, file_id, store_path, false); - store->preIngestFile(store_path, file_id, dmfile->getBytesOnDisk()); + delegate.addDTFile(file_id, dmfile->getBytesOnDisk(), store_path); auto & pk_column = block.getByPosition(0).column; auto min_pk = pk_column->getInt(0); @@ -266,12 +254,12 @@ try { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); break; default: { auto dm_context = store->newDMContext(*context, context->getSettingsRef()); auto [range, file_ids] = genDMFile(*dm_context, block); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); break; } } @@ -376,12 +364,12 @@ try { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); break; default: { auto dm_context = store->newDMContext(*context, context->getSettingsRef()); auto [range, file_ids] = genDMFile(*dm_context, block); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); break; } } @@ -470,9 +458,9 @@ try { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: { - store->write(*context, context->getSettingsRef(), std::move(block1)); - store->write(*context, context->getSettingsRef(), std::move(block2)); - store->write(*context, context->getSettingsRef(), std::move(block3)); + store->write(*context, context->getSettingsRef(), block1); + store->write(*context, context->getSettingsRef(), block2); + store->write(*context, context->getSettingsRef(), block3); break; } case TestMode::V2_FileOnly: { @@ -484,7 +472,7 @@ try auto file_ids = file_ids1; file_ids.insert(file_ids.cend(), file_ids2.begin(), file_ids2.end()); file_ids.insert(file_ids.cend(), file_ids3.begin(), file_ids3.end()); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); break; } case TestMode::V2_Mix: { @@ -494,9 +482,9 @@ try auto range = range1.merge(range3); auto file_ids = file_ids1; file_ids.insert(file_ids.cend(), file_ids3.begin(), file_ids3.end()); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); - store->write(*context, context->getSettingsRef(), std::move(block2)); + store->write(*context, context->getSettingsRef(), block2); break; } } @@ -548,9 +536,9 @@ try { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: { - store->write(*context, context->getSettingsRef(), std::move(block1)); - store->write(*context, context->getSettingsRef(), std::move(block2)); - store->write(*context, context->getSettingsRef(), std::move(block3)); + store->write(*context, context->getSettingsRef(), block1); + store->write(*context, context->getSettingsRef(), block2); + store->write(*context, context->getSettingsRef(), block3); break; } case TestMode::V2_FileOnly: { @@ -562,11 +550,11 @@ try auto file_ids = file_ids1; file_ids.insert(file_ids.cend(), file_ids2.begin(), file_ids2.end()); file_ids.insert(file_ids.cend(), file_ids3.begin(), file_ids3.end()); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); break; } case TestMode::V2_Mix: { - store->write(*context, context->getSettingsRef(), std::move(block2)); + store->write(*context, context->getSettingsRef(), block2); auto dm_context = store->newDMContext(*context, context->getSettingsRef()); auto [range1, file_ids1] = genDMFile(*dm_context, block1); @@ -574,7 +562,7 @@ try auto range = range1.merge(range3); auto file_ids = file_ids1; file_ids.insert(file_ids.cend(), file_ids3.begin(), file_ids3.end()); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); break; } } @@ -666,7 +654,7 @@ try { // Write 7 rows that would not trigger a split Block block = DMTestEnv::prepareSimpleWriteBlock(0, 8, false); - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); } { @@ -701,7 +689,7 @@ try { // Write rows that would trigger a split Block block = DMTestEnv::prepareSimpleWriteBlock(8, 9, false); - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); } // Now there is 2 segments @@ -753,7 +741,7 @@ try { // write to store Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_tso1, false, tso1); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } const UInt64 tso2 = 890; @@ -761,7 +749,7 @@ try { // write to store Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_tso1, num_rows_tso1 + num_rows_tso2, false, tso2); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -854,274 +842,6 @@ try } CATCH -TEST_P(DeltaMergeStore_test, Ingest) -try -{ - if (mode == TestMode::V1_BlockOnly) - return; - - const UInt64 tso1 = 4; - const size_t num_rows_before_ingest = 128; - // Write to store [0, 128) - { - Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_before_ingest, false, tso1); - store->write(*context, context->getSettingsRef(), std::move(block)); - } - - const UInt64 tso2 = 10; - const UInt64 tso3 = 18; - - { - // Prepare DTFiles for ingesting - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); - - auto [range1, file_ids1] = genDMFile(*dm_context, DMTestEnv::prepareSimpleWriteBlock(32, 48, false, tso2)); - auto [range2, file_ids2] = genDMFile(*dm_context, DMTestEnv::prepareSimpleWriteBlock(80, 256, false, tso3)); - - auto file_ids = file_ids1; - file_ids.insert(file_ids.cend(), file_ids2.begin(), file_ids2.end()); - auto ingest_range = RowKeyRange::fromHandleRange(HandleRange{32, 256}); - // verify that ingest_range must not less than range1.merge(range2) - ASSERT_ROWKEY_RANGE_EQ(ingest_range, range1.merge(range2).merge(ingest_range)); - - store->ingestFiles(dm_context, ingest_range, file_ids, /*clear_data_in_range*/ true); - } - - - // After ingesting, the data in [32, 128) should be overwrite by the data in ingested files. - { - // Read all data <= tso1 - // We can only get [0, 32) with tso1 - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; - } - - { - // Read all data between [tso, tso2) - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso2 - 1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "Data [32, 128) after ingest with tso less than: " << tso2 - << " are erased, should only get [0, 32)"; - } - - { - // Read all data between [tso2, tso3) - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso3 - 1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL + 16) << "The rows number after ingest with tso less than " << tso3 << " is not match"; - } - - { - // Read all data between [tso2, tso3) - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ std::numeric_limits::max(), - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL + (48 - 32) + (256UL - 80)) << "The rows number after ingest is not match"; - } - - { - // Read with two point get, issue 1616 - auto range0 = RowKeyRange::fromHandleRange(HandleRange(32, 33)); - auto range1 = RowKeyRange::fromHandleRange(HandleRange(40, 41)); - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {range0, range1}, - /* num_streams= */ 1, - /* max_version= */ std::numeric_limits::max(), - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 2UL) << "The rows number of two point get is not match"; - } -} -CATCH - -TEST_P(DeltaMergeStore_test, IngestEmptyFileLists) -try -{ - if (mode == TestMode::V1_BlockOnly) - return; - - const UInt64 tso1 = 4; - const size_t num_rows_before_ingest = 128; - // Write to store [0, 128) - { - Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_before_ingest, false, tso1); - store->write(*context, context->getSettingsRef(), std::move(block)); - } - - // Test that if we ingest a empty file list, the data in range will be removed. - // The ingest range is [32, 256) - { - auto dm_context = store->newDMContext(*context, context->getSettingsRef()); - - std::vector file_ids ; - auto ingest_range = RowKeyRange::fromHandleRange(HandleRange{32, 256}); - store->ingestFiles(dm_context, ingest_range, file_ids, /*clear_data_in_range*/ true); - } - - - // After ingesting, the data in [32, 128) should be overwrite by the data in ingested files. - { - // Read all data <= tso1 - // We can only get [0, 32) with tso1 - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ tso1, - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; - } - - { - // Read all data - const auto & columns = store->getTableColumns(); - BlockInputStreams ins = store->read(*context, - context->getSettingsRef(), - columns, - {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, - /* num_streams= */ 1, - /* max_version= */ std::numeric_limits::max(), - EMPTY_FILTER, - /* expected_block_size= */ 1024); - ASSERT_EQ(ins.size(), 1UL); - BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "The rows number after ingest is not match"; - } -} -CATCH - TEST_P(DeltaMergeStore_test, Split) try { @@ -1147,14 +867,14 @@ try auto write_as_file = [&]() { auto dm_context = store->newDMContext(*context, context->getSettingsRef()); auto [range, file_ids] = genDMFile(*dm_context, block); - store->ingestFiles(dm_context, range, file_ids, false); + store->writeRegionSnapshot(dm_context, range, file_ids, false); }; switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); break; case TestMode::V2_FileOnly: write_as_file(); @@ -1162,7 +882,7 @@ try case TestMode::V2_Mix: { if ((std::rand() % 2) == 0) { - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); } else { @@ -1280,7 +1000,7 @@ try } block.insert(col2); } - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -1393,7 +1113,7 @@ try } block.insert(col2); } - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -1488,7 +1208,7 @@ try } block.insert(col1); } - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -1578,7 +1298,7 @@ try size_t num_rows_write = 1; { Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } // DDL add column f32 with default value @@ -1652,7 +1372,7 @@ try // write some rows before DDL { Block block = DMTestEnv::prepareSimpleWriteBlock(0, 1, false); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } // DDL add column date with default value @@ -1757,7 +1477,7 @@ try } block.insert(col2); } - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -1867,7 +1587,7 @@ try { // write to store Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false, /*tso=*/2, col_name_before_ddl, col_id_ddl, col_type); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -1954,7 +1674,7 @@ try // Then write new block with new pk name Block block = DMTestEnv::prepareSimpleWriteBlock( num_rows_write, num_rows_write * 2, false, /*tso=*/2, col_name_after_ddl, col_id_ddl, col_type); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { // read all columns from store @@ -2025,7 +1745,7 @@ try FailPointHelper::enableFailPoint(FailPoints::force_triggle_background_merge_delta); Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } // DDL add column f32 with default value @@ -2104,7 +1824,7 @@ try f_col.column = std::move(m_col); } block.insert(f_col); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } // disable pause so that delta-merge can continue @@ -2232,7 +1952,7 @@ try } block.insert(std::move(i8)); } - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } { @@ -2362,9 +2082,9 @@ try EXTRA_HANDLE_COLUMN_STRING_TYPE, true, rowkey_column_size); - store->write(*context, context->getSettingsRef(), std::move(block1)); - store->write(*context, context->getSettingsRef(), std::move(block2)); - store->write(*context, context->getSettingsRef(), std::move(block3)); + store->write(*context, context->getSettingsRef(), block1); + store->write(*context, context->getSettingsRef(), block2); + store->write(*context, context->getSettingsRef(), block3); store->flushCache(*context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); } @@ -2432,9 +2152,9 @@ try EXTRA_HANDLE_COLUMN_STRING_TYPE, true, rowkey_column_size); - store->write(*context, context->getSettingsRef(), std::move(block1)); - store->write(*context, context->getSettingsRef(), std::move(block2)); - store->write(*context, context->getSettingsRef(), std::move(block3)); + store->write(*context, context->getSettingsRef(), block1); + store->write(*context, context->getSettingsRef(), block2); + store->write(*context, context->getSettingsRef(), block3); store->flushCache(*context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); } @@ -2518,7 +2238,7 @@ try Block block = DMTestEnv::prepareSimpleWriteBlock( 0, 128, false, 2, EXTRA_HANDLE_COLUMN_NAME, EXTRA_HANDLE_COLUMN_ID, EXTRA_HANDLE_COLUMN_STRING_TYPE, true, rowkey_column_size); - store->write(*context, context->getSettingsRef(), std::move(block)); + store->write(*context, context->getSettingsRef(), block); } // Test Reading first { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 065af0a5450..7882e64ef9b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -22,25 +22,6 @@ namespace DM namespace tests { -TEST(DMFileWriterFlags_test, SetClearFlags) -{ - using Flags = DMFileWriter::Flags; - - Flags flags; - - bool f = false; - flags.setRateLimit(f); - EXPECT_FALSE(flags.needRateLimit()); - flags.setSingleFile(f); - EXPECT_FALSE(flags.isSingleFile()); - - f = true; - flags.setRateLimit(f); - EXPECT_TRUE(flags.needRateLimit()); - flags.setSingleFile(f); - EXPECT_TRUE(flags.isSingleFile()); -} - String paramToString(const ::testing::TestParamInfo & info) { const auto mode = info.param; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 89e61b84099..50b828cb837 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -78,7 +78,7 @@ bool checkMatch(const String & test_case, // DeltaMergeStorePtr store = std::make_shared( context, false, "test_database", "test_table", table_columns, getExtraHandleColumnDefine(false), false, 1); - store->write(context, context.getSettingsRef(), std::move(block)); + store->write(context, context.getSettingsRef(), block); store->flushCache(context, all_range); store->mergeDeltaAll(context); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_region.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_region.cpp index 0513b27c12a..19b7286d1d6 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_region.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_region.cpp @@ -109,7 +109,7 @@ try size_t step = rand() % 1000; LOG_DEBUG(log, "step " << step); auto block = DMTestEnv::prepareBlockWithIncreasingPKAndTs(step, cur_rows, cur_rows); - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); cur_rows += step; check_exact(RowKeyRange::fromHandleRange({0, (Int64)(cur_rows)}), cur_rows, cur_rows * bytes_per_rows); @@ -184,7 +184,7 @@ try size_t step = rand() % 1000; LOG_DEBUG(log, "step " << step); auto block = DMTestEnv::prepareBlockWithIncreasingPKAndTs(step, cur_rows, cur_rows); - store->write(*context, settings, std::move(block)); + store->write(*context, settings, block); cur_rows += step; check_split_point(random_range(cur_rows)); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index f3480cd6719..a5dc1df8dcc 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -15,12 +14,6 @@ namespace DB { namespace DM { -extern DMFilePtr writeIntoNewDMFile(DMContext & dm_context, // - const ColumnDefinesPtr & schema_snap, - const BlockInputStreamPtr & input_stream, - UInt64 file_id, - const String & parent_path, - DMFileBlockOutputStream::Flags flags); namespace tests { @@ -184,6 +177,7 @@ try { // flush segment segment = segment->mergeDelta(dmContext(), tableColumns()); + ; } { @@ -1041,12 +1035,8 @@ class Segment_test_2 : public Segment_test, public testing::WithParamInterface(block); auto delegate = context.path_pool.getStableDiskDelegator(); auto store_path = delegate.choosePath(); - - DMFileBlockOutputStream::Flags flags; - flags.setSingleFile(DMTestEnv::getPseudoRandomNumber() % 2); - auto dmfile - = writeIntoNewDMFile(context, std::make_shared(*tableColumns()), input_stream, file_id, store_path, flags); + = writeIntoNewDMFile(context, std::make_shared(*tableColumns()), input_stream, file_id, store_path, false); delegate.addDTFile(file_id, dmfile->getBytesOnDisk(), store_path); @@ -1076,7 +1066,8 @@ try case Segment_test_Mode::V2_BlockOnly: segment->write(dmContext(), std::move(block)); break; - case Segment_test_Mode::V2_FileOnly: { + case Segment_test_Mode::V2_FileOnly: + { auto delegate = dmContext().path_pool.getStableDiskDelegator(); auto file_provider = dmContext().db_context.getFileProvider(); auto [range, file_ids] = genDMFile(dmContext(), block); @@ -1089,7 +1080,7 @@ try wbs.data.putExternal(file_id, 0); wbs.writeLogAndData(); - segment->ingestPacks(dmContext(), range, {pack}, false); + segment->writeRegionSnapshot(dmContext(), range, {pack}, false); break; } default: @@ -1352,6 +1343,7 @@ try { segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); + ; } { @@ -1529,6 +1521,7 @@ try { segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); + ; } { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index f18b1e0ac95..e10a7dd23d9 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -157,7 +158,6 @@ try } CATCH - TEST(StorageDeltaMerge_internal_test, GetMergedQueryRanges) { MvccQueryInfo::RegionsQueryInfo regions; diff --git a/dbms/src/Storages/FormatVersion.h b/dbms/src/Storages/FormatVersion.h index 292bfb0c67d..97afde98a74 100644 --- a/dbms/src/Storages/FormatVersion.h +++ b/dbms/src/Storages/FormatVersion.h @@ -77,7 +77,7 @@ inline static const StorageFormatVersion STORAGE_FORMAT_V2 = StorageFormatVersio .page = PageFormat::V2, }; -inline StorageFormatVersion STORAGE_FORMAT_CURRENT = STORAGE_FORMAT_V2; +inline StorageFormatVersion STORAGE_FORMAT_CURRENT = STORAGE_FORMAT_V1; inline const StorageFormatVersion & toStorageFormat(UInt64 setting) { @@ -96,4 +96,4 @@ inline void setStorageFormat(UInt64 setting) { STORAGE_FORMAT_CURRENT = toStorag inline void setStorageFormat(const StorageFormatVersion & version) { STORAGE_FORMAT_CURRENT = version; } -} // namespace DB +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 10bcbb28bd6..ca3e41ce0dd 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -36,7 +36,7 @@ using PSDiskDelegatorPtr = std::shared_ptr; * * This class is multi-threads safe. Support single thread write, and multi threads read. */ -class PageStorage : private boost::noncopyable +class PageStorage { public: struct Config @@ -222,7 +222,7 @@ class PageStorage : private boost::noncopyable TiFlashMetricsPtr metrics; }; -class PageReader : private boost::noncopyable +class PageReader { public: /// Not snapshot read. diff --git a/dbms/src/Storages/Page/tests/gtest_page_map_version_set.cpp b/dbms/src/Storages/Page/tests/gtest_page_map_version_set.cpp index 4df1a45f136..3e97efc4ba9 100644 --- a/dbms/src/Storages/Page/tests/gtest_page_map_version_set.cpp +++ b/dbms/src/Storages/Page/tests/gtest_page_map_version_set.cpp @@ -1,3 +1,6 @@ +#include +#include + #include #define protected public diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 2dd5e9d1cda..7ab8a3f19fa 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -343,7 +343,7 @@ class DMBlockOutputStream : public IBlockOutputStream write_block.insert(ColumnWithTypeAndName(std::move(col), column.type, column.name, column.column_id)); } - store->write(db_context, db_settings, std::move(write_block)); + store->write(db_context, db_settings, write_block); } } } @@ -428,7 +428,7 @@ void StorageDeltaMerge::write(Block && block, const Settings & settings) FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_write_to_storage); - store->write(global_context, settings, std::move(block)); + store->write(global_context, settings, block); } std::unordered_set parseSegmentSet(const ASTPtr & ast) @@ -660,14 +660,6 @@ void StorageDeltaMerge::deleteRange(const DM::RowKeyRange & range_to_delete, con return store->deleteRange(global_context, settings, range_to_delete); } -void StorageDeltaMerge::ingestFiles( - const DM::RowKeyRange & range, const std::vector & file_ids, bool clear_data_in_range, const Settings & settings) -{ - auto metrics = global_context.getTiFlashMetrics(); - GET_METRIC(metrics, tiflash_storage_command_count, type_ingest).Increment(); - return store->ingestFiles(global_context, settings, range, file_ids, clear_data_in_range); -} - size_t getRows(DM::DeltaMergeStorePtr & store, const Context & context, const DM::RowKeyRange & range) { size_t rows = 0; diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 991c64addaf..d4b4c68da72 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -57,9 +57,6 @@ class StorageDeltaMerge : public ext::shared_ptr_helper, publ void deleteRange(const DM::RowKeyRange & range_to_delete, const Settings & settings); - void ingestFiles( - const DM::RowKeyRange & range, const std::vector & file_ids, bool clear_data_in_range, const Settings & settings); - void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, @@ -133,7 +130,6 @@ class StorageDeltaMerge : public ext::shared_ptr_helper, publ private: using ColumnIdMap = std::unordered_map; - const bool data_path_contains_database_name = false; DM::DeltaMergeStorePtr store; diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 25e370298af..498e7f3457a 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -3,8 +3,6 @@ #include #include #include -#include -#include #include #include #include @@ -14,9 +12,7 @@ #include #include #include -#include #include -#include #include #include @@ -24,13 +20,8 @@ namespace DB { -std::tuple, bool, DM::ColumnDefinesPtr> // -AtomicGetStorageSchema(const RegionPtr & region, TMTContext & tmt); - namespace FailPoints { -extern const char force_set_sst_to_dtfile_block_size[]; -extern const char force_set_sst_decode_rand[]; extern const char pause_until_apply_raft_snapshot[]; } // namespace FailPoints @@ -41,8 +32,7 @@ extern const int TABLE_IS_DROPPED; extern const int REGION_DATA_SCHEMA_UPDATED; } // namespace ErrorCodes -template -void KVStore::checkAndApplySnapshot(const RegionPtrWrap & new_region, TMTContext & tmt) +void KVStore::checkAndApplySnapshot(const RegionPtrWithBlock & new_region, TMTContext & tmt) { auto region_id = new_region->id(); auto old_region = getRegion(region_id); @@ -143,39 +133,39 @@ void KVStore::checkAndApplySnapshot(const RegionPtrWrap & new_region, TMTContext onSnapshot(new_region, old_region, old_applied_index, tmt); } -template -void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_region, UInt64 old_region_index, TMTContext & tmt) +void KVStore::onSnapshot(const RegionPtrWithBlock & new_region_wrap, RegionPtr old_region, UInt64 old_region_index, TMTContext & tmt) { RegionID region_id = new_region_wrap->id(); { auto table_id = new_region_wrap->getMappedTableID(); - if (auto storage = tmt.getStorages().get(table_id); storage && storage->engineType() == TiDB::StorageEngine::DT) + if (auto storage = tmt.getStorages().get(table_id); storage) { - try + switch (storage->engineType()) { - auto & context = tmt.getContext(); - // Acquire `drop_lock` so that no other threads can drop the storage. `alter_lock` is not required. - auto table_lock = storage->lockForShare(getThreadName()); - auto dm_storage = std::dynamic_pointer_cast(storage); - auto key_range = DM::RowKeyRange::fromRegionRange( - new_region_wrap->getRange(), table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); - if constexpr (std::is_same_v) - { - // Call `ingestFiles` to delete data for range and ingest external DTFiles. - dm_storage->ingestFiles(key_range, new_region_wrap.ingest_ids, /*clear_data_in_range=*/true, context.getSettingsRef()); - } - else + case TiDB::StorageEngine::DT: { - // Call `deleteRange` to delete data for range - dm_storage->deleteRange(key_range, context.getSettingsRef()); + try + { + auto & context = tmt.getContext(); + // acquire lock so that no other threads can drop storage + auto table_lock = storage->lockForShare(getThreadName()); + auto dm_storage = std::dynamic_pointer_cast(storage); + auto key_range = DM::RowKeyRange::fromRegionRange( + new_region_wrap->getRange(), table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); + // Call `deleteRange` to delete data for range + dm_storage->deleteRange(key_range, context.getSettingsRef()); + } + catch (DB::Exception & e) + { + // We can ignore if storage is dropped. + if (e.code() != ErrorCodes::TABLE_IS_DROPPED) + throw; + } + break; } - } - catch (DB::Exception & e) - { - // We can ignore if storage is dropped. - if (e.code() != ErrorCodes::TABLE_IS_DROPPED) - throw; + default: + break; } } } @@ -185,24 +175,20 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re auto & region_table = tmt.getRegionTable(); // extend region to make sure data won't be removed. region_table.extendRegionRange(region_id, *range); - // For `RegionPtrWithBlock`, try to flush data into storage first. - if constexpr (std::is_same_v) + // try to flush data into ch first. + try { - try - { - auto tmp = region_table.tryFlushRegion(new_region_wrap, false); - { - std::lock_guard lock(bg_gc_region_data_mutex); - bg_gc_region_data.push_back(std::move(tmp)); - } - tryFlushRegionCacheInStorage(tmt, *new_region_wrap, log); - } - catch (...) + auto tmp = region_table.tryFlushRegion(new_region_wrap, false); { - tryLogCurrentException(__PRETTY_FUNCTION__); + std::lock_guard lock(bg_gc_region_data_mutex); + bg_gc_region_data.push_back(std::move(tmp)); } + tryFlushRegionCacheInStorage(tmt, *new_region_wrap, log); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); } - // For `RegionPtrWithSnapshotFiles`, don't need to flush cache. } RegionPtr new_region = new_region_wrap.base; @@ -213,7 +199,7 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re if (getRegion(region_id) != old_region || (old_region && old_region_index != old_region->appliedIndex())) { throw Exception( - std::string(__PRETTY_FUNCTION__) + ": region " + DB::toString(region_id) + " instance changed, should not happen", + std::string(__PRETTY_FUNCTION__) + ": region " + std::to_string(region_id) + " instance changed, should not happen", ErrorCodes::LOGICAL_ERROR); } @@ -237,11 +223,10 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re } } + extern RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr &, Context &); -/// `preHandleSnapshotToBlock` read data from SSTFiles and predoced the data as a block -RegionPreDecodeBlockDataPtr KVStore::preHandleSnapshotToBlock( - RegionPtr new_region, const SSTViewVec snaps, uint64_t /*index*/, uint64_t /*term*/, TMTContext & tmt) +RegionPreDecodeBlockDataPtr KVStore::preHandleSnapshot(RegionPtr new_region, const SSTViewVec snaps, TMTContext & tmt) { RegionPreDecodeBlockDataPtr cache{nullptr}; { @@ -300,112 +285,7 @@ RegionPreDecodeBlockDataPtr KVStore::preHandleSnapshotToBlock( return cache; } -std::vector KVStore::preHandleSnapshotToFiles( - RegionPtr new_region, const SSTViewVec snaps, uint64_t index, uint64_t term, TMTContext & tmt) -{ - return preHandleSSTsToDTFiles(new_region, snaps, index, term, DM::FileConvertJobType::ApplySnapshot, tmt); -} - -/// `preHandleSSTsToDTFiles` read data from SSTFiles and generate DTFile(s) for commited data -/// return the ids of DTFile(s), the uncommited data will be inserted to `new_region` -std::vector KVStore::preHandleSSTsToDTFiles( - RegionPtr new_region, const SSTViewVec snaps, uint64_t /*index*/, uint64_t /*term*/, DM::FileConvertJobType job_type, TMTContext & tmt) -{ - auto context = tmt.getContext(); - bool force_decode = false; - size_t expected_block_size = DEFAULT_MERGE_BLOCK_SIZE; - - // Use failpoint to change the expected_block_size for some test cases - fiu_do_on(FailPoints::force_set_sst_to_dtfile_block_size, { expected_block_size = 3; }); - - PageIds ids; - while (true) - { - // If any schema changes is detected during decoding SSTs to DTFiles, we need to cancel and recreate DTFiles with - // the latest schema. Or we will get trouble in `BoundedSSTFilesToBlockInputStream`. - std::shared_ptr stream; - try - { - // Get storage schema atomically, will do schema sync if the storage does not exists. - // Will return the storage even if it is tombstoned. - auto [dm_storage, is_common_handle, schema_snap] = AtomicGetStorageSchema(new_region, tmt); - if (unlikely(dm_storage == nullptr)) - { - // The storage must be physically dropped, throw exception and do cleanup. - throw Exception("", ErrorCodes::TABLE_IS_DROPPED); - } - - // Get a gc safe point for compacting - Timestamp gc_safepoint = 0; - if (auto pd_client = tmt.getPDClient(); !pd_client->isMock()) - { - gc_safepoint = PDClientHelper::getGCSafePointWithRetry(pd_client, - /* ignore_cache= */ false, - context.getSettingsRef().safe_point_update_interval_seconds); - } - - // Read from SSTs and refine the boundary of blocks output to DTFiles - auto sst_stream = std::make_shared( - new_region, snaps, proxy_helper, dm_storage, schema_snap, gc_safepoint, force_decode, tmt, expected_block_size); - auto bounded_stream - = std::make_shared(sst_stream, ::DB::TiDBPkColumnID, is_common_handle); - stream = std::make_shared(bounded_stream, snapshot_apply_method, job_type, tmt); - - stream->writePrefix(); - stream->write(); - stream->writeSuffix(); - ids = stream->ingestIds(); - break; - } - catch (DB::Exception & e) - { - auto try_clean_up = [&stream]() -> void { - if (stream != nullptr) - stream->cancel(); - }; - if (e.code() == ErrorCodes::REGION_DATA_SCHEMA_UPDATED) - { - // The schema of decoding region data has been updated, need to clear and recreate another stream for writing DTFile(s) - new_region->clearAllData(); - try_clean_up(); - - if (force_decode) - { - // Can not decode data with `force_decode == true`, must be something wrong - throw; - } - - // Update schema and try to decode again - auto metrics = context.getTiFlashMetrics(); - GET_METRIC(metrics, tiflash_schema_trigger_count, type_raft_decode).Increment(); - tmt.getSchemaSyncer()->syncSchemas(context); - // Next time should force_decode - force_decode = true; - - continue; - } - else if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - { - // We can ignore if storage is dropped. - LOG_INFO(log, - "Pre-handle snapshot to DTFiles is ignored because the table is dropped. [region=" << new_region->toString(true) - << "]"); - try_clean_up(); - break; - } - else - { - // Other unrecoverable error, throw - throw; - } - } - } - - return ids; -} - -template -void KVStore::handlePreApplySnapshot(const RegionPtrWrap & new_region, TMTContext & tmt) +void KVStore::handlePreApplySnapshot(const RegionPtrWithBlock & new_region, TMTContext & tmt) { LOG_INFO(log, "Try to apply snapshot: " << new_region->toString(true)); @@ -423,14 +303,6 @@ void KVStore::handlePreApplySnapshot(const RegionPtrWrap & new_region, TMTContex LOG_INFO(log, new_region->toString(false) << " apply snapshot success"); } -template void KVStore::handlePreApplySnapshot(const RegionPtrWithBlock &, TMTContext &); -template void KVStore::handlePreApplySnapshot(const RegionPtrWithSnapshotFiles &, TMTContext &); -template void KVStore::checkAndApplySnapshot(const RegionPtrWithBlock &, TMTContext &); -template void KVStore::checkAndApplySnapshot(const RegionPtrWithSnapshotFiles &, TMTContext &); -template void KVStore::onSnapshot(const RegionPtrWithBlock &, RegionPtr, UInt64, TMTContext &); -template void KVStore::onSnapshot(const RegionPtrWithSnapshotFiles &, RegionPtr, UInt64, TMTContext &); - - static const metapb::Peer & findPeer(const metapb::Region & region, UInt64 peer_id) { for (const auto & peer : region.peers()) @@ -461,13 +333,10 @@ RegionPtr KVStore::genRegionPtr(metapb::Region && region, UInt64 peer_id, UInt64 } void KVStore::handleApplySnapshot( - metapb::Region && region, uint64_t peer_id, const SSTViewVec snaps, uint64_t index, uint64_t term, TMTContext & tmt) + metapb::Region && region, UInt64 peer_id, const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) { auto new_region = genRegionPtr(std::move(region), peer_id, index, term); - if (snapshot_apply_method == TiDB::SnapshotApplyMethod::Block) - handlePreApplySnapshot(RegionPtrWithBlock{new_region, preHandleSnapshotToBlock(new_region, snaps, index, term, tmt)}, tmt); - else - handlePreApplySnapshot(RegionPtrWithSnapshotFiles{new_region, preHandleSnapshotToFiles(new_region, snaps, index, term, tmt)}, tmt); + handlePreApplySnapshot(RegionPtrWithBlock{new_region, preHandleSnapshot(new_region, snaps, tmt)}, tmt); } EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) @@ -475,10 +344,9 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec auto region_task_lock = region_manager.genRegionTaskLock(region_id); Stopwatch watch; - SCOPE_EXIT({ - auto & ctx = tmt.getContext(); - GET_METRIC(ctx.getTiFlashMetrics(), tiflash_raft_command_duration_seconds, type_ingest_sst).Observe(watch.elapsedSeconds()); - }); + auto & ctx = tmt.getContext(); + SCOPE_EXIT( + { GET_METRIC(ctx.getTiFlashMetrics(), tiflash_raft_command_duration_seconds, type_ingest_sst).Observe(watch.elapsedSeconds()); }); const RegionPtr region = getRegion(region_id); if (region == nullptr) @@ -489,26 +357,6 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec return EngineStoreApplyRes::NotFound; } - fiu_do_on(FailPoints::force_set_sst_decode_rand, { - static int num_call = 0; - switch (num_call++ % 3) - { - case 0: - snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; - break; - case 1: - snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; - break; - case 2: - snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; - break; - default: - break; - } - LOG_INFO( - log, __FUNCTION__ << ": " << region->toString(true) << " ingest sst by method " << applyMethodToString(snapshot_apply_method)); - }); - const auto func_try_flush = [&]() { if (!region->writeCFCount()) return; @@ -525,23 +373,10 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec } }; - if (snapshot_apply_method == TiDB::SnapshotApplyMethod::Block) - { - // try to flush remain data in memory. - func_try_flush(); - region->handleIngestSSTInMemory(snaps, index, term, tmt); - // after `handleIngestSSTInMemory`, all data are stored in `region`, try to flush committed data into storage - func_try_flush(); - } - else - { - // try to flush remain data in memory. - func_try_flush(); - auto tmp_region = handleIngestSSTByDTFile(region, snaps, index, term, tmt); - region->finishIngestSSTByDTFile(std::move(tmp_region), index, term); - // after `finishIngestSSTByDTFile`, try to flush committed data into storage - func_try_flush(); - } + // try to flush remain data in memory. + func_try_flush(); + region->handleIngestSST(snaps, index, term, tmt); + func_try_flush(); if (region->dataSize()) { @@ -555,56 +390,4 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec } } -RegionPtr KVStore::handleIngestSSTByDTFile(const RegionPtr & region, const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) -{ - if (index <= region->appliedIndex()) - return nullptr; - - // Create a tmp region to store uncommitted data - RegionPtr tmp_region; - { - auto meta_region = region->getMetaRegion(); - auto meta_snap = region->dumpRegionMetaSnapshot(); - auto peer_id = meta_snap.peer.id(); - tmp_region = genRegionPtr(std::move(meta_region), peer_id, index, term); - } - - // Decode the KV pairs in ingesting SST into DTFiles - PageIds ingest_ids = preHandleSSTsToDTFiles(tmp_region, snaps, index, term, DM::FileConvertJobType::IngestSST, tmt); - - // If `ingest_ids` is empty, ingest SST won't write delete_range for ingest region, it is safe to - // ignore the step of calling `ingestFiles` - if (!ingest_ids.empty()) - { - auto table_id = region->getMappedTableID(); - if (auto storage = tmt.getStorages().get(table_id); storage) - { - // Ingest DTFiles into DeltaMerge storage - auto & context = tmt.getContext(); - try - { - // Acquire `drop_lock` so that no other threads can drop the storage. `alter_lock` is not required. - auto table_lock = storage->lockForShare(getThreadName()); - auto key_range = DM::RowKeyRange::fromRegionRange( - region->getRange(), table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); - // Call `ingestFiles` to ingest external DTFiles. - // Note that ingest sst won't remove the data in the key range - auto dm_storage = std::dynamic_pointer_cast(storage); - dm_storage->ingestFiles(key_range, ingest_ids, /*clear_data_in_range=*/false, context.getSettingsRef()); - } - catch (DB::Exception & e) - { - // We can ignore if storage is dropped. - if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - return nullptr; - else - throw; - } - } - } - - return tmp_region; -} - - } // namespace DB diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 3ff50173117..8d2be285a28 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -21,11 +21,8 @@ extern const int LOGICAL_ERROR; extern const int TABLE_IS_DROPPED; } // namespace ErrorCodes -KVStore::KVStore(Context & context, TiDB::SnapshotApplyMethod snapshot_apply_method_) - : region_persister(context, region_manager), - raft_cmd_res(std::make_unique()), - snapshot_apply_method(snapshot_apply_method_), - log(&Logger::get("KVStore")) +KVStore::KVStore(Context & context) + : region_persister(context, region_manager), raft_cmd_res(std::make_unique()), log(&Logger::get("KVStore")) {} void KVStore::restore(const TiFlashRaftProxyHelper * proxy_helper) diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index d75ddd44222..1dd143946a3 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB { @@ -12,10 +11,6 @@ namespace RegionBench { extern void concurrentBatchInsert(const TiDB::TableInfo &, Int64, Int64, Int64, UInt64, UInt64, Context &); } -namespace DM -{ -enum class FileConvertJobType; -} // TODO move to Settings.h static const Seconds REGION_CACHE_GC_PERIOD(60 * 5); @@ -46,6 +41,7 @@ struct WriteCmdsView; enum class EngineStoreApplyRes : uint32_t; struct TiFlashRaftProxyHelper; +struct RegionPtrWithBlock; struct RegionPreDecodeBlockData; using RegionPreDecodeBlockDataPtr = std::unique_ptr; @@ -53,7 +49,7 @@ using RegionPreDecodeBlockDataPtr = std::unique_ptr; class KVStore final : private boost::noncopyable { public: - KVStore(Context & context, TiDB::SnapshotApplyMethod snapshot_apply_method_); + KVStore(Context & context); void restore(const TiFlashRaftProxyHelper *); RegionPtr getRegion(const RegionID region_id) const; @@ -82,12 +78,8 @@ class KVStore final : private boost::noncopyable EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 region_id, UInt64 index, UInt64 term, TMTContext & tmt); void handleApplySnapshot(metapb::Region && region, uint64_t peer_id, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); - RegionPreDecodeBlockDataPtr preHandleSnapshotToBlock( - RegionPtr new_region, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); - std::vector /* */ preHandleSnapshotToFiles( - RegionPtr new_region, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); - template - void handlePreApplySnapshot(const RegionPtrWrap &, TMTContext & tmt); + RegionPreDecodeBlockDataPtr preHandleSnapshot(RegionPtr new_region, const SSTViewVec, TMTContext & tmt); + void handlePreApplySnapshot(const RegionPtrWithBlock &, TMTContext & tmt); void handleDestroy(UInt64 region_id, TMTContext & tmt); void setRegionCompactLogConfig(UInt64, UInt64, UInt64); @@ -95,8 +87,6 @@ class KVStore final : private boost::noncopyable RegionPtr genRegionPtr(metapb::Region && region, UInt64 peer_id, UInt64 index, UInt64 term); const TiFlashRaftProxyHelper * getProxyHelper() const { return proxy_helper; } - TiDB::SnapshotApplyMethod applyMethod() const { return snapshot_apply_method; } - private: friend class MockTiDB; friend struct MockTiDBTable; @@ -105,18 +95,11 @@ class KVStore final : private boost::noncopyable friend void RegionBench::concurrentBatchInsert(const TiDB::TableInfo &, Int64, Int64, Int64, UInt64, UInt64, Context &); using DBGInvokerPrinter = std::function; friend void dbgFuncRemoveRegion(Context &, const ASTs &, DBGInvokerPrinter); + friend void dbgFuncRegionSnapshotWithData(Context &, const ASTs &, DBGInvokerPrinter); friend void dbgFuncPutRegion(Context &, const ASTs &, DBGInvokerPrinter); - - std::vector preHandleSSTsToDTFiles( - RegionPtr new_region, const SSTViewVec, uint64_t index, uint64_t term, DM::FileConvertJobType, TMTContext & tmt); - - template - void checkAndApplySnapshot(const RegionPtrWrap &, TMTContext & tmt); - template - void onSnapshot(const RegionPtrWrap &, RegionPtr old_region, UInt64 old_region_index, TMTContext & tmt); - - RegionPtr handleIngestSSTByDTFile(const RegionPtr & region, const SSTViewVec, UInt64 index, UInt64 term, TMTContext & tmt); + void checkAndApplySnapshot(const RegionPtrWithBlock &, TMTContext & tmt); + void onSnapshot(const RegionPtrWithBlock &, RegionPtr old_region, UInt64 old_region_index, TMTContext & tmt); // Remove region from this TiFlash node. // If region is destroy or moved to another node(change peer), @@ -154,8 +137,6 @@ class KVStore final : private boost::noncopyable // raft_cmd_res stores the result of applying raft cmd. It must be protected by task_mutex. std::unique_ptr raft_cmd_res; - TiDB::SnapshotApplyMethod snapshot_apply_method; - Logger * log; std::atomic REGION_COMPACT_LOG_PERIOD; diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index 81e334a3222..c24033de96c 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -29,7 +28,6 @@ extern const char pause_before_apply_raft_snapshot[]; namespace ErrorCodes { extern const int LOGICAL_ERROR; -extern const int REGION_DATA_SCHEMA_UPDATED; extern const int ILLFORMAT_RAFT_ROW; extern const int TABLE_IS_DROPPED; } // namespace ErrorCodes @@ -385,12 +383,12 @@ RegionTable::ResolveLocksAndWriteRegionRes RegionTable::resolveLocksAndWriteRegi region_data_lock); } -/// Pre-decode region data into block cache and remove committed data from `region` +/// pre-decode region data into block cache and remove RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr & region, Context & context) { const auto & tmt = context.getTMTContext(); { - Timestamp gc_safe_point = 0; + Timestamp gc_safe_point = UINT64_MAX; if (auto pd_client = tmt.getPDClient(); !pd_client->isMock()) { gc_safe_point @@ -483,162 +481,4 @@ RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr & regio return std::make_unique(std::move(res_block), schema_version, std::move(*data_list_read)); } -std::tuple, bool, DM::ColumnDefinesPtr> // -AtomicGetStorageSchema(const RegionPtr & region, TMTContext & tmt) -{ - bool is_common_handle = false; - std::shared_ptr dm_storage; - DM::ColumnDefinesPtr schema_snap; - - auto table_id = region->getMappedTableID(); - auto context = tmt.getContext(); - auto metrics = context.getTiFlashMetrics(); - const auto atomicGet = [&](bool force_decode) -> bool { - auto storage = tmt.getStorages().get(table_id); - if (storage == nullptr) - { - if (!force_decode) - return false; - if (storage == nullptr) // Table must have just been GC-ed - return true; - } - // Get a structure read lock. It will throw exception if the table has been dropped, - // the caller should handle this situation. - auto table_lock = storage->lockStructureForShare(getThreadName()); - is_common_handle = storage->isCommonHandle(); - if (unlikely(storage->engineType() != ::TiDB::StorageEngine::DT)) - { - throw Exception("Try to get storage schema with unknown storage engine [table_id=" + DB::toString(table_id) - + "] [engine_type=" + DB::toString(static_cast(storage->engineType())) + "]", - ErrorCodes::LOGICAL_ERROR); - } - if (dm_storage = std::dynamic_pointer_cast(storage); dm_storage != nullptr) - { - auto store = dm_storage->getStore(); - schema_snap = store->getStoreColumns(); - } - return true; - }; - - if (!atomicGet(false)) - { - GET_METRIC(metrics, tiflash_schema_trigger_count, type_raft_decode).Increment(); - tmt.getSchemaSyncer()->syncSchemas(context); - - if (!atomicGet(true)) - throw Exception("Get " + region->toString() + " belonging table " + DB::toString(table_id) + " is_command_handle fail", - ErrorCodes::LOGICAL_ERROR); - } - return std::make_tuple(dm_storage, is_common_handle, schema_snap); -} - -static bool needUpdateSchema(const DM::ColumnDefinesPtr & a, const DM::ColumnDefinesPtr & b) -{ - // Note that we consider `a` is not `b` and need to update schema if either of them is `nullptr` - if (unlikely(a == nullptr || b == nullptr)) - return true; - - // If the two schema is not the same, then it need to be updated. - if (a->size() != b->size()) - return true; - for (size_t i = 0; i < a->size(); ++i) - { - const auto & ca = (*a)[i]; - const auto & cb = (*b)[i]; - - bool col_ok = ca.id == cb.id; - // bool name_ok = ca.name == cb.name; - bool type_ok = ca.type->equals(*cb.type); - - if (!col_ok || !type_ok) - return true; - } - return false; -} - -static Block sortColumnsBySchemaSnap(Block && ori, const DM::ColumnDefines & schema) -{ -#ifndef NDEBUG - // Some trival check to ensure the input is legal - if (ori.columns() != schema.size()) - { - throw Exception("Try to sortColumnsBySchemaSnap with different column size [block_columns=" + DB::toString(ori.columns()) - + "] [schema_columns=" + DB::toString(schema.size()) + "]"); - } -#endif - - std::map index_by_cid; - for (size_t i = 0; i < ori.columns(); ++i) - { - const ColumnWithTypeAndName & c = ori.getByPosition(i); - index_by_cid[c.column_id] = i; - } - - Block res; - for (const auto & cd : schema) - { - res.insert(ori.getByPosition(index_by_cid[cd.id])); - } -#ifndef NDEBUG - assertBlocksHaveEqualStructure(res, DM::toEmptyBlock(schema), "sortColumnsBySchemaSnap"); -#endif - - return res; -} - -/// Decode region data into block and belonging schema snapshot, remove committed data from `region` -/// The return value is a block that store the committed data scanned and removed from `region`. -/// The columns of returned block is sorted by `schema_snap`. -Block GenRegionBlockDatawithSchema(const RegionPtr & region, - const std::shared_ptr & dm_storage, - const DM::ColumnDefinesPtr & schema_snap, - Timestamp gc_safepoint, - bool force_decode, - TMTContext & tmt) -{ - // In 5.0.1, feature `compaction filter` is enabled by default. Under such feature tikv will do gc in write & default cf individually. - // If some rows were updated and add tiflash replica, tiflash store may receive region snapshot with unmatched data in write & default cf sst files. - region->tryCompactionFilter(gc_safepoint); - - std::optional data_list_read = std::nullopt; - data_list_read = ReadRegionCommitCache(region); - - Block res_block; - // No committed data, just return - if (!data_list_read) - return res_block; - - auto context = tmt.getContext(); - auto metrics = context.getTiFlashMetrics(); - - { - Stopwatch watch; - // Get a structure read lock. It will throw exception if the table has been - // dropped, the caller should handle this situation. - auto table_lock = dm_storage->lockStructureForShare(getThreadName()); - // Compare schema_snap with current schema, throw exception if changed. - auto store = dm_storage->getStore(); - auto cur_schema_snap = store->getStoreColumns(); - if (needUpdateSchema(cur_schema_snap, schema_snap)) - throw Exception("", ErrorCodes::REGION_DATA_SCHEMA_UPDATED); - - auto reader = RegionBlockReader(dm_storage); - auto [block, ok] = reader.read(*data_list_read, force_decode); - if (unlikely(!ok)) - throw Exception("", ErrorCodes::REGION_DATA_SCHEMA_UPDATED); - - GET_METRIC(metrics, tiflash_raft_write_data_to_storage_duration_seconds, type_decode).Observe(watch.elapsedSeconds()); - - // For DeltaMergeStore, we always store an extra column with column_id = -1 - res_block = store->addExtraColumnIfNeed(context, std::move(block)); - } - - res_block = sortColumnsBySchemaSnap(std::move(res_block), *schema_snap); - - // Remove committed data - RemoveRegionCommitCache(region, *data_list_read); - - return res_block; -} - } // namespace DB diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index d1cabf568da..8c843a85db0 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -212,11 +212,10 @@ BatchReadIndexRes TiFlashRaftProxyHelper::batchReadIndex(const std::vector && ids_) : region(region_), ingest_ids(std::move(ids_)) - { - CurrentMetrics::add(CurrentMetrics::RaftNumSnapshotsPendingApply); - } - RegionPtr region; - std::vector ingest_ids; // The file_ids storing pre-handled files -}; - RawCppPtr PreHandleSnapshot( EngineStoreServerWrap * server, BaseBuffView region_buff, uint64_t peer_id, SSTViewVec snaps, uint64_t index, uint64_t term) { @@ -245,26 +233,9 @@ RawCppPtr PreHandleSnapshot( auto & tmt = *server->tmt; auto & kvstore = tmt.getKVStore(); auto new_region = kvstore->genRegionPtr(std::move(region), peer_id, index, term); - switch (kvstore->applyMethod()) - { - case TiDB::SnapshotApplyMethod::Block: - { - // Pre-decode as a block - auto new_region_block_cache = kvstore->preHandleSnapshotToBlock(new_region, snaps, index, term, tmt); - auto res = new PreHandledSnapshotWithBlock{new_region, std::move(new_region_block_cache)}; - return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshotWithBlock); - } - case TiDB::SnapshotApplyMethod::DTFile_Directory: - case TiDB::SnapshotApplyMethod::DTFile_Single: - { - // Pre-decode and save as DTFiles - auto ingest_ids = kvstore->preHandleSnapshotToFiles(new_region, snaps, index, term, tmt); - auto res = new PreHandledSnapshotWithFiles{new_region, std::move(ingest_ids)}; - return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshotWithFiles); - } - default: - throw Exception("Unknow Region apply method: " + applyMethodToString(kvstore->applyMethod())); - } + auto new_region_block_cache = kvstore->preHandleSnapshot(new_region, snaps, tmt); + auto res = new PreHandledSnapshot{new_region, std::move(new_region_block_cache)}; + return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshot); } catch (...) { @@ -273,24 +244,12 @@ RawCppPtr PreHandleSnapshot( } } -template void ApplyPreHandledSnapshot(EngineStoreServerWrap * server, PreHandledSnapshot * snap) { - static_assert( - std::is_same_v || std::is_same_v, - "Unknown pre-handled snapshot type"); - try { auto & kvstore = server->tmt->getKVStore(); - if constexpr (std::is_same_v) - { - kvstore->handlePreApplySnapshot(RegionPtrWithBlock{snap->region, std::move(snap->cache)}, *server->tmt); - } - else if constexpr (std::is_same_v) - { - kvstore->handlePreApplySnapshot(RegionPtrWithSnapshotFiles{snap->region, std::move(snap->ingest_ids)}, *server->tmt); - } + kvstore->handlePreApplySnapshot(RegionPtrWithBlock{snap->region, std::move(snap->cache)}, *server->tmt); } catch (...) { @@ -303,15 +262,9 @@ void ApplyPreHandledSnapshot(EngineStoreServerWrap * server, RawVoidPtr res, Raw { switch (static_cast(type)) { - case RawCppPtrTypeImpl::PreHandledSnapshotWithBlock: + case RawCppPtrTypeImpl::PreHandledSnapshot: { - auto * snap = reinterpret_cast(res); - ApplyPreHandledSnapshot(server, snap); - break; - } - case RawCppPtrTypeImpl::PreHandledSnapshotWithFiles: - { - auto * snap = reinterpret_cast(res); + PreHandledSnapshot * snap = reinterpret_cast(res); ApplyPreHandledSnapshot(server, snap); break; } @@ -330,11 +283,8 @@ void GcRawCppPtr(EngineStoreServerWrap *, RawVoidPtr ptr, RawCppPtrType type) case RawCppPtrTypeImpl::String: delete reinterpret_cast(ptr); break; - case RawCppPtrTypeImpl::PreHandledSnapshotWithBlock: - delete reinterpret_cast(ptr); - break; - case RawCppPtrTypeImpl::PreHandledSnapshotWithFiles: - delete reinterpret_cast(ptr); + case RawCppPtrTypeImpl::PreHandledSnapshot: + delete reinterpret_cast(ptr); break; default: LOG_ERROR(&Logger::get(__PRETTY_FUNCTION__), "unknown type " + std::to_string(uint32_t(type))); diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index 78a51da1eb9..05e9cf19f09 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -37,8 +37,7 @@ enum class RawCppPtrTypeImpl : RawCppPtrType { None = 0, String, - PreHandledSnapshotWithBlock, - PreHandledSnapshotWithFiles, + PreHandledSnapshot, }; RawCppPtr GenRawCppPtr(RawVoidPtr ptr_ = nullptr, RawCppPtrTypeImpl type_ = RawCppPtrTypeImpl::None); diff --git a/dbms/src/Storages/Transaction/Region.cpp b/dbms/src/Storages/Transaction/Region.cpp index c2ed5260957..d90224d05f2 100644 --- a/dbms/src/Storages/Transaction/Region.cpp +++ b/dbms/src/Storages/Transaction/Region.cpp @@ -24,6 +24,8 @@ extern const int UNKNOWN_FORMAT_VERSION; const UInt32 Region::CURRENT_VERSION = 1; +const std::string Region::log_name = "Region"; + RegionData::WriteCFIter Region::removeDataByWriteIt(const RegionData::WriteCFIter & write_it) { return data.removeDataByWriteIt(write_it); } RegionDataReadInfo Region::readDataByWriteIt(const RegionData::ConstWriteCFIter & write_it, bool need_value) const @@ -64,12 +66,6 @@ void Region::remove(const std::string & cf, const TiKVKey & key) void Region::doRemove(ColumnFamilyType type, const TiKVKey & key) { data.remove(type, key); } -void Region::clearAllData() -{ - std::unique_lock lock(mutex); - data = RegionData(); -} - UInt64 Region::appliedIndex() const { return meta.appliedIndex(); } RegionPtr Region::splitInto(RegionMeta && meta) @@ -736,7 +732,7 @@ EngineStoreApplyRes Region::handleWriteRaftCmd(const WriteCmdsView & cmds, UInt6 return EngineStoreApplyRes::None; } -void Region::handleIngestSSTInMemory(const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) +void Region::handleIngestSST(const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) { if (index <= appliedIndex()) return; @@ -773,29 +769,6 @@ void Region::handleIngestSSTInMemory(const SSTViewVec snaps, UInt64 index, UInt6 meta.notifyAll(); } -void Region::finishIngestSSTByDTFile(RegionPtr && rhs, UInt64 index, UInt64 term) -{ - if (index <= appliedIndex()) - return; - - { - std::unique_lock lock(mutex); - - if (rhs) - { - // Merge the uncommitted data from `rhs` - // (we have taken the ownership of `rhs`, so don't acquire lock on `rhs.mutex`) - data.mergeFrom(rhs->data); - } - - meta.setApplied(index, term); - } - LOG_INFO(log, - __FUNCTION__ << ": " << this->toString(false) << " finish to ingest sst by DTFile [write_cf_keys=" << data.write_cf.getSize() - << "] [default_cf_keys=" << data.default_cf.getSize() << "] [lock_cf_keys=" << data.lock_cf.getSize() << "]"); - meta.notifyAll(); -} - RegionRaftCommandDelegate & Region::makeRaftCommandDelegate(const KVStoreTaskLock & lock) { static_assert(sizeof(RegionRaftCommandDelegate) == sizeof(Region)); @@ -809,7 +782,7 @@ RegionMetaSnapshot Region::dumpRegionMetaSnapshot() const { return meta.dumpRegi Region::Region(RegionMeta && meta_) : Region(std::move(meta_), nullptr) {} Region::Region(DB::RegionMeta && meta_, const TiFlashRaftProxyHelper * proxy_helper_) - : meta(std::move(meta_)), log(&Logger::get("Region")), mapped_table_id(meta.getRange()->getMappedTableID()), proxy_helper(proxy_helper_) + : meta(std::move(meta_)), log(&Logger::get(log_name)), mapped_table_id(meta.getRange()->getMappedTableID()), proxy_helper(proxy_helper_) {} TableID Region::getMappedTableID() const { return mapped_table_id; } diff --git a/dbms/src/Storages/Transaction/Region.h b/dbms/src/Storages/Transaction/Region.h index e28f127564b..287ae773477 100644 --- a/dbms/src/Storages/Transaction/Region.h +++ b/dbms/src/Storages/Transaction/Region.h @@ -40,6 +40,8 @@ class Region : public std::enable_shared_from_this public: const static UInt32 CURRENT_VERSION; + const static std::string log_name; + static const auto PutFlag = RecordKVFormat::CFModifyFlag::PutFlag; static const auto DelFlag = RecordKVFormat::CFModifyFlag::DelFlag; @@ -104,9 +106,6 @@ class Region : public std::enable_shared_from_this void insert(ColumnFamilyType cf, TiKVKey && key, TiKVValue && value); void remove(const std::string & cf, const TiKVKey & key); - // Directly drop all data in this Region object. - void clearAllData(); - CommittedScanner createCommittedScanner(bool use_lock = true); CommittedRemover createCommittedRemover(bool use_lock = true); @@ -171,8 +170,7 @@ class Region : public std::enable_shared_from_this TableID getMappedTableID() const; EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 index, UInt64 term, TMTContext & tmt); - void handleIngestSSTInMemory(const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt); - void finishIngestSSTByDTFile(RegionPtr && rhs, UInt64 index, UInt64 term); + void handleIngestSST(const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt); UInt64 getSnapshotEventFlag() const { return snapshot_event_flag; } diff --git a/dbms/src/Storages/Transaction/RegionData.cpp b/dbms/src/Storages/Transaction/RegionData.cpp index 78eb5080586..014c5cce484 100644 --- a/dbms/src/Storages/Transaction/RegionData.cpp +++ b/dbms/src/Storages/Transaction/RegionData.cpp @@ -216,22 +216,9 @@ bool RegionData::isEqual(const RegionData & r2) const } RegionData::RegionData(RegionData && data) - : write_cf(std::move(data.write_cf)), - default_cf(std::move(data.default_cf)), - lock_cf(std::move(data.lock_cf)), - cf_data_size(data.cf_data_size.load()) + : write_cf(std::move(data.write_cf)), default_cf(std::move(data.default_cf)), lock_cf(std::move(data.lock_cf)) {} -RegionData & RegionData::operator=(RegionData && rhs) -{ - write_cf = std::move(rhs.write_cf); - default_cf = std::move(rhs.default_cf); - lock_cf = std::move(rhs.lock_cf); - cf_data_size = rhs.cf_data_size.load(); - return *this; -} - - UInt8 RegionData::getWriteType(const ConstWriteCFIter & write_it) { return RegionWriteCFDataTrait::getWriteType(write_it->second); } const RegionDefaultCFDataTrait::Map & RegionData::getDefaultCFMap(RegionWriteCFData * write) diff --git a/dbms/src/Storages/Transaction/RegionData.h b/dbms/src/Storages/Transaction/RegionData.h index ff901e8ab75..0ef20c5f068 100644 --- a/dbms/src/Storages/Transaction/RegionData.h +++ b/dbms/src/Storages/Transaction/RegionData.h @@ -58,7 +58,6 @@ class RegionData RegionData() {} RegionData(RegionData && data); - RegionData& operator=(RegionData && data); public: static UInt8 getWriteType(const ConstWriteCFIter & write_it); diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index d55de57f9dc..6d42fed0b00 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -36,7 +36,6 @@ struct MockTiDBTable; class RegionRangeKeys; class RegionTaskLock; struct RegionPtrWithBlock; -struct RegionPtrWithSnapshotFiles; class RegionScanFilter; using RegionScanFilterPtr = std::shared_ptr; @@ -205,7 +204,6 @@ class RegionTable : private boost::noncopyable Logger * log; }; - // Block cache of region data with schema version. struct RegionPreDecodeBlockData { @@ -247,24 +245,4 @@ struct RegionPtrWithBlock CachePtr pre_decode_cache; }; - -// A wrap of RegionPtr, with snapshot files directory waitting to be ingested -struct RegionPtrWithSnapshotFiles -{ - using Base = RegionPtr; - - /// can accept const ref of RegionPtr without cache - RegionPtrWithSnapshotFiles(const Base & base_, std::vector ids_ = {}) : base(base_), ingest_ids(std::move(ids_)) {} - - /// to be compatible with usage as RegionPtr. - Base::element_type * operator->() const { return base.operator->(); } - const Base::element_type & operator*() const { return base.operator*(); } - - /// make it could be cast into RegionPtr implicitly. - operator const Base &() const { return base; } - - const Base & base; - const std::vector ingest_ids; -}; - } // namespace DB diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index 19c2f77f64b..dceb539e3e7 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -393,20 +393,18 @@ void encodeRowV1(const TiDB::TableInfo & table_info, const std::vector & column_in_key = 1; else if (table_info.is_common_handle) column_in_key = table_info.getPrimaryIndexInfo().idx_cols.size(); - if (table_info.columns.size() < fields.size() + column_in_key) + if (table_info.columns.size() != fields.size() + column_in_key) throw Exception(std::string("Encoding row has ") + std::to_string(table_info.columns.size()) + " columns but " + std::to_string(fields.size() + table_info.pk_is_handle) + " values: ", ErrorCodes::LOGICAL_ERROR); - size_t encoded_fields_idx = 0; + size_t index = 0; for (auto & column_info : table_info.columns) { if ((table_info.pk_is_handle || table_info.is_common_handle) && column_info.hasPriKeyFlag()) continue; EncodeDatum(Field(column_info.id), TiDB::CodecFlagInt, ss); - EncodeDatumForRow(fields[encoded_fields_idx++], column_info.getCodecFlag(), ss, column_info); - if (encoded_fields_idx == fields.size()) - break; + EncodeDatumForRow(fields[index++], column_info.getCodecFlag(), ss, column_info); } } @@ -421,7 +419,7 @@ struct RowEncoderV2 column_in_key = 1; else if (table_info.is_common_handle) column_in_key = table_info.getPrimaryIndexInfo().idx_cols.size(); - if (table_info.columns.size() < fields.size() + column_in_key) + if (table_info.columns.size() != fields.size() + column_in_key) throw Exception(std::string("Encoding row has ") + std::to_string(table_info.columns.size()) + " columns but " + std::to_string(fields.size() + table_info.pk_is_handle) + " values: ", ErrorCodes::LOGICAL_ERROR); @@ -451,9 +449,6 @@ struct RowEncoderV2 null_column_ids.emplace(column_info.id); } i_val++; - - if (i_val == fields.size()) - break; } is_big = is_big || value_length > std::numeric_limits::ValueOffsetType>::max(); diff --git a/dbms/src/Storages/Transaction/StorageEngineType.h b/dbms/src/Storages/Transaction/StorageEngineType.h index 938804bfc10..23c68c33749 100644 --- a/dbms/src/Storages/Transaction/StorageEngineType.h +++ b/dbms/src/Storages/Transaction/StorageEngineType.h @@ -1,8 +1,5 @@ #pragma once -#include -#include - namespace TiDB { @@ -17,28 +14,4 @@ enum class StorageEngine UNSUPPORTED_ENGINES = 128, }; -enum class SnapshotApplyMethod : std::int32_t -{ - Block = 1, - // Invalid if the storage engine is not DeltaTree - DTFile_Directory, - DTFile_Single, -}; - -inline const std::string applyMethodToString(SnapshotApplyMethod method) -{ - switch (method) - { - case SnapshotApplyMethod::Block: - return "block"; - case SnapshotApplyMethod::DTFile_Directory: - return "file1"; - case SnapshotApplyMethod::DTFile_Single: - return "file2"; - default: - return "unknown(" + std::to_string(static_cast(method)) + ")"; - } - return "unknown"; -} - } // namespace TiDB diff --git a/dbms/src/Storages/Transaction/TMTContext.cpp b/dbms/src/Storages/Transaction/TMTContext.cpp index cc13f49b8f1..ebf556dda04 100644 --- a/dbms/src/Storages/Transaction/TMTContext.cpp +++ b/dbms/src/Storages/Transaction/TMTContext.cpp @@ -16,7 +16,7 @@ namespace DB TMTContext::TMTContext(Context & context_, const TiFlashRaftConfig & raft_config, const pingcap::ClusterConfig & cluster_config) : context(context_), - kvstore(std::make_shared(context, raft_config.snapshot_apply_method)), + kvstore(std::make_shared(context)), region_table(context), background_service(nullptr), cluster(raft_config.pd_addrs.size() == 0 ? std::make_shared() diff --git a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp index a19ffb82e73..75a50a81c72 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include diff --git a/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp b/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp index aa95fc24ad6..65969c20e68 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_type_mapping.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include namespace DB diff --git a/dbms/src/TestUtils/TiFlashTestBasic.h b/dbms/src/TestUtils/TiFlashTestBasic.h index 8c25f458554..316484700f1 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.h +++ b/dbms/src/TestUtils/TiFlashTestBasic.h @@ -27,7 +27,6 @@ #pragma clang diagnostic pop #endif - namespace DB { namespace tests diff --git a/dbms/src/TestUtils/gtests_dbms_main.cpp b/dbms/src/TestUtils/gtests_dbms_main.cpp index f3866ba352d..fd929323d2c 100644 --- a/dbms/src/TestUtils/gtests_dbms_main.cpp +++ b/dbms/src/TestUtils/gtests_dbms_main.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB::tests { diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 59d7ff7c008..219576e824c 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1619587165566, + "iteration": 1595916828338, "links": [], "panels": [ { @@ -2251,18 +2251,13 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/delete_range|ingest/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(increase(tiflash_storage_command_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tiflash_storage_command_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -2305,11 +2300,11 @@ "show": true }, { - "format": "opm", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -2353,16 +2348,7 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/5min-write/", - "yaxis": 2 - }, - { - "alias": "/5min-all/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -2398,22 +2384,6 @@ "intervalFactor": 1, "legendFormat": "30min-{{instance}}", "refId": "D" - }, - { - "refId": "E", - "expr": "sum((rate(tiflash_system_profile_event_PSMWriteBytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]))) by (instance)", - "hide": true, - "intervalFactor": 1, - "format": "time_series", - "legendFormat": "5min-all-{{instance}}" - }, - { - "refId": "F", - "expr": "sum(rate(tiflash_system_profile_event_DMWriteBytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])) by (instance)", - "hide": true, - "intervalFactor": 1, - "format": "time_series", - "legendFormat": "5min-write-{{instance}}" } ], "thresholds": [], @@ -2445,7 +2415,7 @@ "show": true }, { - "format": "Bps", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -2719,31 +2689,17 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/(delta_merge)|(seg_)/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_storage_subtask_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split\"}[1m])) by (type)", + "expr": "sum(rate(tiflash_storage_subtask_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", - "hide": false, "intervalFactor": 1, "legendFormat": "{{type}}", "refId": "A" - }, - { - "expr": "sum(increase(tiflash_storage_subtask_count{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split\"}[1m])) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "B" } ], "thresholds": [], @@ -2775,11 +2731,11 @@ "show": true }, { - "format": "opm", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true } ], @@ -2824,7 +2780,7 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/^.*-delta_merge/", + "alias": "99-delta_merge", "yaxis": 2 } ], @@ -3005,7 +2961,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -3805,7 +3760,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[1m]))", + "expr": "sum(rate(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"}[1m]))", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -3814,21 +3769,21 @@ "step": 10 }, { - "expr": "sum(rate(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"}[1m]))", + "expr": "sum(rate(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"write\"}[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "throughput_delta-management", "refId": "B" }, { - "expr": "sum(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"})", + "expr": "sum(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"write\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "total_write", "refId": "C" }, { - "expr": "sum(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"})", + "expr": "sum(tiflash_storage_throughput_bytes{tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"write\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "total_delta-management", @@ -4589,7 +4544,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -4655,7 +4609,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -4721,7 +4674,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -4950,7 +4902,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", @@ -5016,7 +4967,6 @@ "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "min": 0, "mode": "spectrum" }, "dataFormat": "tsbuckets", diff --git a/tests/delta-merge-test/raft/ingest_sst.test b/tests/delta-merge-test/raft/ingest_sst.test index ac512888e30..27d3c30d1a4 100644 --- a/tests/delta-merge-test/raft/ingest_sst.test +++ b/tests/delta-merge-test/raft/ingest_sst.test @@ -1,5 +1,4 @@ => DBGInvoke __enable_schema_sync_service('true') -=> DBGInvoke __clean_up_region() => DBGInvoke __drop_tidb_table(default, test) => drop table if exists default.test @@ -19,6 +18,8 @@ │ -4 │ 4 │ │ -5 │ 5 │ │ -7 │ 7 │ +└───────┴─────────────┘ +┌─col_1─┬─_tidb_rowid─┐ │ -8 │ 8 │ └───────┴─────────────┘ => DBGInvoke __drop_tidb_table(default, test) diff --git a/tests/delta-merge-test/raft/snapshot_dtfile.test b/tests/delta-merge-test/raft/snapshot_dtfile.test deleted file mode 100644 index 5982b554030..00000000000 --- a/tests/delta-merge-test/raft/snapshot_dtfile.test +++ /dev/null @@ -1,168 +0,0 @@ -# Disable background schema sync to test schema sync triggled by applying snapshot -=> DBGInvoke __enable_schema_sync_service('false') -=> DBGInvoke __drop_tidb_table(default, test) -=> drop table if exists default.test -=> DBGInvoke __clean_up_region() - -##### -## Pre-handle region to dt files then apply -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int64', '', 'dt') -=> DBGInvoke __region_snapshot(4, 0, 10000, default, test) -=> DBGInvoke __refresh_schemas() -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 6, 'col_1 Int64', '') -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 6)────────┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────┘ -=> DBGInvoke __region_snapshot_apply_file(4) -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┐ -│ -3 │ 3 │ -│ -4 │ 4 │ -│ -5 │ 5 │ -└───────┴─────────────┘ - -##### -## Mock to test idempotence of applying snapshot -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 6, 'col_1 Int64', '') -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 6)────────┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────┘ -## Mock that we crashed before applying first snapshot, then replay the process of apply snapshot -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 6, 'col_1 Int64', '') -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 6)────────┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────┘ -=> DBGInvoke __region_snapshot_apply_file(4) -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┐ -│ -3 │ 3 │ -│ -4 │ 4 │ -│ -5 │ 5 │ -└───────┴─────────────┘ - -##### -### Mock to test apply snapshot with multiple schema -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_2 String') -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_3 UInt64') -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 12, 'col_1 Int64, col_2 String, col_3 UInt64', '', 3) -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 12, "col_1 Int64, col_2 String, col_3 UInt64", "", 3)─┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -=> DBGInvoke region_snapshot_apply_file(4) -┌─__region_snapshot_apply_file(4)──────┐ -│ success apply region 4 with dt files │ -└──────────────────────────────────────┘ -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┬─col_2─┬─col_3─┐ -│ -3 │ 3 │ │ 0 │ -│ -4 │ 4 │ │ 0 │ -│ -5 │ 5 │ │ 0 │ -│ -6 │ 6 │ _6 │ 0 │ -│ -7 │ 7 │ _7 │ 0 │ -│ -8 │ 8 │ _8 │ 0 │ -│ -9 │ 9 │ _9 │ 4 │ -│ -10 │ 10 │ _10 │ 5 │ -│ -11 │ 11 │ _11 │ 5 │ -└───────┴─────────────┴───────┴───────┘ - -##### -## Recreate test table -=> DBGInvoke __drop_tidb_table(default, test) -=> drop table if exists default.test -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int64', '', 'dt') -=> DBGInvoke __region_snapshot(4, 0, 10000, default, test) -=> DBGInvoke __refresh_schemas() -## Mock to test apply snapshot with an older schema (case 1 - add column) -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_2 String') -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_3 UInt64') -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 12, 'col_1 Int64, col_2 String, col_3 UInt64', '', 3) -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 12, "col_1 Int64, col_2 String, col_3 UInt64", "", 3)─┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -## Add column so that we should fill defaut value for new-added columns -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_4 Nullable(UInt64)') -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_5 Nullable(String)') -=> DBGInvoke __refresh_schemas() -=> DBGInvoke region_snapshot_apply_file(4) -┌─region_snapshot_apply_file(4)────────┐ -│ success apply region 4 with dt files │ -└──────────────────────────────────────┘ -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┬─col_2─┬─col_3─┬─col_4─┬─col_5─┐ -│ -3 │ 3 │ │ 0 │ \N │ \N │ -│ -4 │ 4 │ │ 0 │ \N │ \N │ -│ -5 │ 5 │ │ 0 │ \N │ \N │ -│ -6 │ 6 │ _6 │ 0 │ \N │ \N │ -│ -7 │ 7 │ _7 │ 0 │ \N │ \N │ -│ -8 │ 8 │ _8 │ 0 │ \N │ \N │ -│ -9 │ 9 │ _9 │ 4 │ \N │ \N │ -│ -10 │ 10 │ _10 │ 5 │ \N │ \N │ -│ -11 │ 11 │ _11 │ 5 │ \N │ \N │ -└───────┴─────────────┴───────┴───────┴───────┴───────┘ - -##### -## Recreate test table -=> DBGInvoke __drop_tidb_table(default, test) -=> drop table if exists default.test -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int64', '', 'dt') -=> DBGInvoke __region_snapshot(4, 0, 10000, default, test) -=> DBGInvoke __refresh_schemas() -## Mock to test apply snapshot with an older schema (case 2 - drop column) -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_2 String') -=> DBGInvoke __add_column_to_tidb_table(default, test, 'col_3 UInt64') -=> DBGInvoke region_snapshot_pre_handle_file(default, test, 4, 3, 12, 'col_1 Int64, col_2 String, col_3 UInt64', '', 3) -┌─region_snapshot_pre_handle_file(default, test, 4, 3, 12, "col_1 Int64, col_2 String, col_3 UInt64", "", 3)─┐ -│ Generate 1 files for [region_id=4] │ -└────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -## Drop column so that the Raft data always contain more column for decoding -=> DBGInvoke __drop_column_from_tidb_table(default, test, col_2) -=> DBGInvoke __refresh_schemas() -=> DBGInvoke region_snapshot_apply_file(4) -┌─region_snapshot_apply_file(4)────────┐ -│ success apply region 4 with dt files │ -└──────────────────────────────────────┘ -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┬─col_3─┐ -│ -3 │ 3 │ 0 │ -│ -4 │ 4 │ 0 │ -│ -5 │ 5 │ 0 │ -│ -6 │ 6 │ 0 │ -│ -7 │ 7 │ 0 │ -│ -8 │ 8 │ 0 │ -│ -9 │ 9 │ 4 │ -│ -10 │ 10 │ 5 │ -│ -11 │ 11 │ 5 │ -└───────┴─────────────┴───────┘ - -##### -## Test that BR/lightning may only ingest sst files of write cf into tikv without default cf -=> DBGInvoke __drop_tidb_table(default, test) -=> drop table if exists default.test -=> DBGInvoke __mock_tidb_table(default, test, 'col_1 Int64', '', 'dt') -=> DBGInvoke __region_snapshot(4, 0, 10000, default, test) -=> DBGInvoke __refresh_schemas() -# Only the sst files of write cf ingested and send to TiFlash as a snapshot, no panic, and those uncommitted data remain in memory -=> DBGInvoke __region_snapshot_pre_handle_file(default, test, 4, 3, 12, 'col_1 Int64', '', 1, 'write') -=> DBGInvoke __region_snapshot_apply_file(4) -# There should be no committed rows ingested -=> select * from default.test - -# Apply a snapshot with write & default cfs -=> DBGInvoke __region_snapshot_pre_handle_file(default, test, 4, 3, 12, 'col_1 Int64', '', 1, 'write,default') -=> DBGInvoke __region_snapshot_apply_file(4) -=> select * from default.test -┌─col_1─┬─_tidb_rowid─┐ -│ -3 │ 3 │ -│ -4 │ 4 │ -│ -5 │ 5 │ -│ -6 │ 6 │ -│ -7 │ 7 │ -│ -8 │ 8 │ -│ -9 │ 9 │ -│ -10 │ 10 │ -│ -11 │ 11 │ -└───────┴─────────────┘ - -=> DBGInvoke __drop_tidb_table(default, test) -=> drop table if exists default.test -=> DBGInvoke __enable_schema_sync_service('true') diff --git a/tests/docker/config/tics_tmt.toml b/tests/docker/config/tics_tmt.toml index 08ec3806595..ed8a1e6ec8d 100644 --- a/tests/docker/config/tics_tmt.toml +++ b/tests/docker/config/tics_tmt.toml @@ -21,5 +21,3 @@ http_port = 8123 [raft] # specify which storage engine we use. tmt or dt storage_engine = "tmt" -[raft.snapshot] - method = "block" diff --git a/tests/docker/config/tiflash_dt.toml b/tests/docker/config/tiflash_dt.toml index 310460a0b33..30c385a00e5 100644 --- a/tests/docker/config/tiflash_dt.toml +++ b/tests/docker/config/tiflash_dt.toml @@ -92,8 +92,3 @@ http_port = 8123 # Deprecated Raft data storage path setting style. Check [storage.raft] section for new style. # If it is not set, it will be the first path of "path" appended with "/kvstore". # kvstore_path = "" - - [raft.snapshot] - # The way to apply snapshot data - # The value is one of "block" / "file1" / "file2". - method = "file1" From 5e7de889de74872ea45badbe6e587aef1c8f104f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 8 Jun 2021 23:47:30 +0800 Subject: [PATCH 6/6] Fix comment Signed-off-by: JaySon-Huang --- dbms/src/Storages/Transaction/ApplySnapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 498e7f3457a..b2748134a0f 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -148,7 +148,7 @@ void KVStore::onSnapshot(const RegionPtrWithBlock & new_region_wrap, RegionPtr o try { auto & context = tmt.getContext(); - // acquire lock so that no other threads can drop storage + // Acquire `drop_lock` so that no other threads can drop the storage. `alter_lock` is not required. auto table_lock = storage->lockForShare(getThreadName()); auto dm_storage = std::dynamic_pointer_cast(storage); auto key_range = DM::RowKeyRange::fromRegionRange(