diff --git a/.github/licenserc.yml b/.github/licenserc.yml index eaf7a49eee4..b122b2c9775 100644 --- a/.github/licenserc.yml +++ b/.github/licenserc.yml @@ -25,6 +25,7 @@ header: - '**/LICENSE.TXT' - '**/cipher-file-256' - '**/asan.suppression' + - '**/tsan.suppression' - '**/LICENSE.TXT' - '**/LICENSE' - '**/README' diff --git a/.gitmodules b/.gitmodules index 8472d78404e..335e1dbd9c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "contrib/cpu_features"] path = contrib/cpu_features url = https://github.com/google/cpu_features +[submodule "contrib/arm-optimized-routines"] + path = contrib/arm-optimized-routines + url = https://github.com/ARM-software/optimized-routines diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e14c205f18..2e33a127807 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,10 +239,8 @@ else () set (CMAKE_CXX_STANDARD_REQUIRED ON) endif () -if (NOT ARCH_ARM) - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") -endif () +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") +set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") option (DEBUG_WITHOUT_DEBUG_INFO "Set to ON to build dev target without debug info (remove flag `-g` in order to accelerate compiling speed and reduce target binary size)" OFF) if (DEBUG_WITHOUT_DEBUG_INFO) @@ -432,9 +430,6 @@ else (ENABLE_FAILPOINTS) message (STATUS "Failpoints are disabled") endif (ENABLE_FAILPOINTS) -# Enable PageStorage V3 test. -option (ENABLE_V3_PAGESTORAGE "Enables V3 PageStorage" ON) - # Flags for test coverage option (TEST_COVERAGE "Enables flags for test coverage" OFF) option (TEST_COVERAGE_XML "Output XML report for test coverage" OFF) diff --git a/README.md b/README.md index 8a2217b9a42..ab996b6f3d6 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ TiFlash repository is based on [ClickHouse](https://github.com/ClickHouse/ClickH ### Start with TiDB Cloud -Quickly explore TiFlash with [a free trial of TiDB Cloud](https://tidbcloud.com/signup). +Quickly explore TiFlash with [a free trial of TiDB Cloud](https://tidbcloud.com/free-trial). See [TiDB Cloud Quick Start Guide](https://docs.pingcap.com/tidbcloud/tidb-cloud-quickstart). @@ -242,7 +242,30 @@ LSAN_OPTIONS=suppressions=$WORKSPACE/tiflash/test/sanitize/asan.suppression ## Run Integration Tests -TBD. +1. Build your own tiflash binary in $BUILD with `-DCMAKE_BUILD_TYPE=DEBUG`. +``` +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=DEBUG +ninja tiflash +``` +2. Run tidb cluster locally using tiup playgroud or other tools. +``` +tiup playground nightly --tiflash.binpath $BUILD/dbms/src/Server/tiflash +``` +3. Check $WORKSPACE/tests/_env.sh to make the port and build dir right. +4. Run your integration tests using commands like "./run-test.sh fullstack-test2/ddl" under $WORKSPACE/tests dir + +## Run MicroBenchmark Tests + +To run micro benchmark tests, you need to build with -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON: + +```shell +cd $BUILD +cmake $WORKSPACE/tiflash -GNinja -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_TESTS=ON +ninja bench_dbms +``` + +And the microbenchmark-test executables are at `$BUILD/dbms/bench_dbms`, you can run it with `./bench_dbms` or `./bench_dbms --benchmark_filter=xxx` . More usage please check with `./bench_dbms --help`. ## Generate LLVM Coverage Report diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 71f81ae3ee5..4520d1cb176 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -165,3 +165,7 @@ add_subdirectory(benchmark) set (BUILD_TESTING OFF CACHE BOOL "Disable cpu-features testing" FORCE) add_subdirectory(cpu_features) + +if (ARCH_AARCH64 AND ARCH_LINUX) + add_subdirectory(arm-optimized-routines-cmake) +endif () diff --git a/contrib/arm-optimized-routines b/contrib/arm-optimized-routines new file mode 160000 index 00000000000..e373f659523 --- /dev/null +++ b/contrib/arm-optimized-routines @@ -0,0 +1 @@ +Subproject commit e373f6595230087a8ddea449bfb14b47150b4059 diff --git a/contrib/arm-optimized-routines-cmake/CMakeLists.txt b/contrib/arm-optimized-routines-cmake/CMakeLists.txt new file mode 100644 index 00000000000..89baa7222f3 --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This library is to override performance-critical routines for aarch64 targets. +# The implementations are imported from official ARM repo. +# To reduce dispatching cost, indirect function technique is utilized. Therefore, +# this library should only be enabled with ELF targets. + +# Considerations: +# - By Jun, 2022, most enterprise OSs (CentOS 7, CentOS Stream 8 and RHEL 8) still +# use relatively old glibc on ARM64, where ASIMD, MTE, DC ZVA and SVE are not +# fully utilized. However, it is becoming increasingly common to use ARM64 instances +# in cloud-native situations. +# - `optimized-routines` repo is actively maintained by ARM officials. Therefore, +# the qualities can be ensured while using it also enables us to keep sync with latest +# acceleration techniques. + +set(CMAKE_C_FLAGS "") +ENABLE_LANGUAGE(C) +ENABLE_LANGUAGE(ASM) +set(TIFLASH_AOR_DIR ../arm-optimized-routines) + +file(GLOB TIFLASH_AARCH64_STRING_FILES ${TIFLASH_AOR_DIR}/string/aarch64/*.S) +add_library(tiflash-aarch64-string STATIC ${TIFLASH_AARCH64_STRING_FILES} src/aor.c) +target_compile_options(tiflash-aarch64-string PRIVATE -march=armv8-a+sve) +target_include_directories(tiflash-aarch64-string PRIVATE ${TIFLASH_AOR_DIR}/string/include) + +file(GLOB TIFLASH_AARCH64_MATH_FILES ${TIFLASH_AOR_DIR}/math/*.c) +add_library(tiflash-aarch64-math STATIC ${TIFLASH_AARCH64_MATH_FILES}) +target_include_directories(tiflash-aarch64-math PRIVATE ${TIFLASH_AOR_DIR}/math/include) + +# it is reasonable to keep these libraries optimized +target_compile_options(tiflash-aarch64-string PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) +target_compile_options(tiflash-aarch64-math PRIVATE -O3 -g3 -fno-omit-frame-pointer -ffunction-sections -fdata-sections) diff --git a/contrib/arm-optimized-routines-cmake/src/aor.c b/contrib/arm-optimized-routines-cmake/src/aor.c new file mode 100644 index 00000000000..daff1df3c4b --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/src/aor.c @@ -0,0 +1,115 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +// Provide default macro definitions in case that they are not defined on current linux distro. +// For example, TiFlash compiled on older linux kernels may also be used in newer ones. +// These values should be stable for Linux: only false negative is expected when running on +// older kernels, but it is acceptable as `google/cpu_features` is also doing so. +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +/// check if MTE is supported in current environment +static inline bool mte_supported(void) +{ + return (getauxval(AT_HWCAP2) & HWCAP2_MTE) != 0; +} + +/// check if SVE is supported in current environment +static inline bool sve_supported(void) +{ + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +} + +#define STRINGIFY_IMPL(X) #X +#define STRINGIFY(X) STRINGIFY_IMPL(X) +/** + * \brief + * Symbol is defined as hidden visibility. Therefore, implementations here are only to override routines with TiFlash + * binary itself. This is because dependencies like `ld.so`, `libgcc_s.so`, etc will need essential routines like + * `memcpy` to finish the early loading procedure. Therefore, declare such symbols as visible indirect function will + * create cyclic dependency. It shall be good enough to override symbols within TiFlash, as most heavy computation works + * are happening in the main binary. + * \param NAME: exported symbol name + * \param SVE: preferred implementation when SVE is available + * \param MTE: preferred implementation when MTE is available + * \param ASIMD: preferred implementation for generic aarch64 targets (ASIMD is required by default for Armv8 and above) + */ +#define DISPATCH(NAME, SVE, MTE, ASIMD) \ + extern typeof(ASIMD) __tiflash_##NAME __attribute__((ifunc(STRINGIFY(__tiflash_##NAME##_resolver)))); \ + extern typeof(ASIMD) NAME __attribute__((visibility("hidden"), alias(STRINGIFY(__tiflash_##NAME)))); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\"") static typeof(ASIMD) * __tiflash_##NAME##_resolver(void) \ + { \ + if (sve_supported()) \ + { \ + return SVE; \ + } \ + if (mte_supported()) \ + { \ + return MTE; \ + } \ + return ASIMD; \ + } \ + _Pragma("GCC diagnostic pop") +#undef memcpy +#undef memmove +#undef memset +#undef memchr +#undef memrchr +#undef memcmp +#undef strcpy +#undef stpcpy +#undef strcmp +#undef strchr +#undef strrchr +#undef strchrnul +#undef strlen +#undef strnlen +#undef strncmp + +DISPATCH(memcpy, __memcpy_aarch64_sve, __memcpy_aarch64_simd, __memcpy_aarch64_simd) +DISPATCH(memmove, __memmove_aarch64_sve, __memmove_aarch64_simd, __memmove_aarch64_simd) +DISPATCH(memset, __memset_aarch64, __memset_aarch64, __memset_aarch64) +DISPATCH(memchr, __memchr_aarch64_sve, __memchr_aarch64_mte, __memchr_aarch64) +DISPATCH(memrchr, __memrchr_aarch64, __memrchr_aarch64, __memrchr_aarch64) +DISPATCH(memcmp, __memcmp_aarch64_sve, __memcmp_aarch64, __memcmp_aarch64) +DISPATCH(strcpy, __strcpy_aarch64_sve, __strcpy_aarch64, __strcpy_aarch64) +DISPATCH(stpcpy, __stpcpy_aarch64_sve, __stpcpy_aarch64, __stpcpy_aarch64) +DISPATCH(strcmp, __strcmp_aarch64_sve, __strcmp_aarch64, __strcmp_aarch64) +DISPATCH(strchr, __strchr_aarch64_sve, __strchr_aarch64_mte, __strchr_aarch64) +DISPATCH(strrchr, __strrchr_aarch64_sve, __strrchr_aarch64_mte, __strrchr_aarch64) +DISPATCH(strchrnul, __strchrnul_aarch64_sve, __strchrnul_aarch64_mte, __strchrnul_aarch64) +DISPATCH(strlen, __strlen_aarch64_sve, __strlen_aarch64_mte, __strlen_aarch64) +DISPATCH(strnlen, __strnlen_aarch64_sve, __strnlen_aarch64, __strnlen_aarch64) +DISPATCH(strncmp, __strncmp_aarch64_sve, __strncmp_aarch64, __strncmp_aarch64) \ No newline at end of file diff --git a/contrib/client-c b/contrib/client-c index 36e05cb0f24..034d1e782cb 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 36e05cb0f24c085785abf367176dac2a45bfd67b +Subproject commit 034d1e782cb4697f99b09b679c00dade00f19dd5 diff --git a/contrib/jemalloc b/contrib/jemalloc index ea6b3e973b4..54eaed1d8b5 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756 +Subproject commit 54eaed1d8b56b1aa528be3bdd1877e59c56fa90c diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index ef02fbabc81..91b17eb8ec7 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -4,65 +4,136 @@ endif() set(JEMALLOC_SOURCE_DIR ${TiFlash_SOURCE_DIR}/contrib/jemalloc) -set(SRCS -${JEMALLOC_SOURCE_DIR}/src/arena.c -${JEMALLOC_SOURCE_DIR}/src/background_thread.c -${JEMALLOC_SOURCE_DIR}/src/base.c -${JEMALLOC_SOURCE_DIR}/src/bin.c -${JEMALLOC_SOURCE_DIR}/src/bitmap.c -${JEMALLOC_SOURCE_DIR}/src/ckh.c -${JEMALLOC_SOURCE_DIR}/src/ctl.c -${JEMALLOC_SOURCE_DIR}/src/div.c -${JEMALLOC_SOURCE_DIR}/src/extent.c -${JEMALLOC_SOURCE_DIR}/src/extent_dss.c -${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c -${JEMALLOC_SOURCE_DIR}/src/hash.c -${JEMALLOC_SOURCE_DIR}/src/hook.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc_cpp.cpp -${JEMALLOC_SOURCE_DIR}/src/large.c -${JEMALLOC_SOURCE_DIR}/src/log.c -${JEMALLOC_SOURCE_DIR}/src/malloc_io.c -${JEMALLOC_SOURCE_DIR}/src/mutex.c -${JEMALLOC_SOURCE_DIR}/src/mutex_pool.c -${JEMALLOC_SOURCE_DIR}/src/nstime.c -${JEMALLOC_SOURCE_DIR}/src/pages.c -${JEMALLOC_SOURCE_DIR}/src/prng.c -${JEMALLOC_SOURCE_DIR}/src/prof.c -${JEMALLOC_SOURCE_DIR}/src/rtree.c -${JEMALLOC_SOURCE_DIR}/src/sc.c -${JEMALLOC_SOURCE_DIR}/src/stats.c -${JEMALLOC_SOURCE_DIR}/src/sz.c -${JEMALLOC_SOURCE_DIR}/src/tcache.c -${JEMALLOC_SOURCE_DIR}/src/test_hooks.c -${JEMALLOC_SOURCE_DIR}/src/ticker.c -${JEMALLOC_SOURCE_DIR}/src/tsd.c -${JEMALLOC_SOURCE_DIR}/src/witness.c -${JEMALLOC_SOURCE_DIR}/src/safety_check.c +set (SRCS + "${JEMALLOC_SOURCE_DIR}/src/arena.c" + "${JEMALLOC_SOURCE_DIR}/src/background_thread.c" + "${JEMALLOC_SOURCE_DIR}/src/base.c" + "${JEMALLOC_SOURCE_DIR}/src/bin.c" + "${JEMALLOC_SOURCE_DIR}/src/bin_info.c" + "${JEMALLOC_SOURCE_DIR}/src/bitmap.c" + "${JEMALLOC_SOURCE_DIR}/src/buf_writer.c" + "${JEMALLOC_SOURCE_DIR}/src/cache_bin.c" + "${JEMALLOC_SOURCE_DIR}/src/ckh.c" + "${JEMALLOC_SOURCE_DIR}/src/counter.c" + "${JEMALLOC_SOURCE_DIR}/src/ctl.c" + "${JEMALLOC_SOURCE_DIR}/src/decay.c" + "${JEMALLOC_SOURCE_DIR}/src/div.c" + "${JEMALLOC_SOURCE_DIR}/src/ecache.c" + "${JEMALLOC_SOURCE_DIR}/src/edata.c" + "${JEMALLOC_SOURCE_DIR}/src/edata_cache.c" + "${JEMALLOC_SOURCE_DIR}/src/ehooks.c" + "${JEMALLOC_SOURCE_DIR}/src/emap.c" + "${JEMALLOC_SOURCE_DIR}/src/eset.c" + "${JEMALLOC_SOURCE_DIR}/src/exp_grow.c" + "${JEMALLOC_SOURCE_DIR}/src/extent.c" + "${JEMALLOC_SOURCE_DIR}/src/extent_dss.c" + "${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c" + "${JEMALLOC_SOURCE_DIR}/src/fxp.c" + "${JEMALLOC_SOURCE_DIR}/src/hook.c" + "${JEMALLOC_SOURCE_DIR}/src/hpa.c" + "${JEMALLOC_SOURCE_DIR}/src/hpa_hooks.c" + "${JEMALLOC_SOURCE_DIR}/src/hpdata.c" + "${JEMALLOC_SOURCE_DIR}/src/inspect.c" + "${JEMALLOC_SOURCE_DIR}/src/jemalloc.c" + "${JEMALLOC_SOURCE_DIR}/src/large.c" + "${JEMALLOC_SOURCE_DIR}/src/log.c" + "${JEMALLOC_SOURCE_DIR}/src/malloc_io.c" + "${JEMALLOC_SOURCE_DIR}/src/mutex.c" + "${JEMALLOC_SOURCE_DIR}/src/nstime.c" + "${JEMALLOC_SOURCE_DIR}/src/pa.c" + "${JEMALLOC_SOURCE_DIR}/src/pac.c" + "${JEMALLOC_SOURCE_DIR}/src/pa_extra.c" + "${JEMALLOC_SOURCE_DIR}/src/pages.c" + "${JEMALLOC_SOURCE_DIR}/src/pai.c" + "${JEMALLOC_SOURCE_DIR}/src/peak_event.c" + "${JEMALLOC_SOURCE_DIR}/src/prof.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_data.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_log.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_recent.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_stats.c" + "${JEMALLOC_SOURCE_DIR}/src/prof_sys.c" + "${JEMALLOC_SOURCE_DIR}/src/psset.c" + "${JEMALLOC_SOURCE_DIR}/src/rtree.c" + "${JEMALLOC_SOURCE_DIR}/src/safety_check.c" + "${JEMALLOC_SOURCE_DIR}/src/san_bump.c" + "${JEMALLOC_SOURCE_DIR}/src/san.c" + "${JEMALLOC_SOURCE_DIR}/src/sc.c" + "${JEMALLOC_SOURCE_DIR}/src/sec.c" + "${JEMALLOC_SOURCE_DIR}/src/stats.c" + "${JEMALLOC_SOURCE_DIR}/src/sz.c" + "${JEMALLOC_SOURCE_DIR}/src/tcache.c" + "${JEMALLOC_SOURCE_DIR}/src/test_hooks.c" + "${JEMALLOC_SOURCE_DIR}/src/thread_event.c" + "${JEMALLOC_SOURCE_DIR}/src/ticker.c" + "${JEMALLOC_SOURCE_DIR}/src/tsd.c" + "${JEMALLOC_SOURCE_DIR}/src/witness.c" ) if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() +if (ARCH_LINUX) + # ThreadPool select job randomly, and there can be some threads that had been + # performed some memory heavy task before and will be inactive for some time, + # but until it will became active again, the memory will not be freed since by + # default each thread has it's own arena, but there should be not more then + # 4*CPU arenas (see opt.nareans description). + # + # By enabling percpu_arena number of arenas limited to number of CPUs and hence + # this problem should go away. + # + # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to + # avoid spurious latencies and additional work associated with + # MADV_DONTNEED. See + # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") +else() + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") +endif() + +message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}") + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") endif () add_library(jemalloc STATIC ${SRCS}) +set (JEMALLOC_INCLUDE_PREFIX) + +if (ARCH_LINUX) + set (JEMALLOC_INCLUDE_PREFIX "include_linux") + target_compile_definitions(jemalloc PRIVATE JEMALLOC_MADV_FREE=8) +elseif (ARCH_FREEBSD) + set (JEMALLOC_INCLUDE_PREFIX "include_freebsd") +elseif (APPLE) + set (JEMALLOC_INCLUDE_PREFIX "include_darwin") +else () + message (FATAL_ERROR "internal jemalloc: This OS is not supported") +endif () -if (ARCH_ARM) - target_include_directories(jemalloc PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_aarch64) +if (ARCH_AMD64) + if (USE_MUSL) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64_musl") + else() + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64") + endif() +elseif (ARCH_AARCH64) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64") +elseif (ARCH_PPC64LE) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le") +elseif (ARCH_RISCV64) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64") else () - target_include_directories(jemalloc PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include_linux_x86_64) + message (FATAL_ERROR "internal jemalloc: This arch is not supported") endif () -target_include_directories(jemalloc PRIVATE - ${JEMALLOC_SOURCE_DIR}/include) +configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in + ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) +target_include_directories(jemalloc SYSTEM PRIVATE + "${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal") + +target_include_directories(jemalloc PUBLIC ${JEMALLOC_SOURCE_DIR}/include ${TiFlash_SOURCE_DIR}/contrib/jemalloc-cmake/include) target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) @@ -80,3 +151,5 @@ if (ENABLE_JEMALLOC_PROF) target_link_libraries (jemalloc PRIVATE ${UNWIND_LIBRARY}) endif () endif () + +target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h similarity index 69% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h rename to contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h index d79551e1f25..45f43a6cd02 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include/jemalloc/internal/jemalloc_preamble.h @@ -1,27 +1,33 @@ #ifndef JEMALLOC_PREAMBLE_H #define JEMALLOC_PREAMBLE_H -#include "jemalloc_internal_defs.h" #include "jemalloc/internal/jemalloc_internal_decls.h" +#include "jemalloc_internal_defs.h" -#ifdef JEMALLOC_UTRACE +#if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL) #include +#if defined(JEMALLOC_UTRACE) +#define UTRACE_CALL(p, l) utrace(p, l) +#else +#define UTRACE_CALL(p, l) utrace("jemalloc_process", p, l) +#define JEMALLOC_UTRACE +#endif #endif #define JEMALLOC_NO_DEMANGLE #ifdef JEMALLOC_JET -# undef JEMALLOC_IS_MALLOC -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "jemalloc/jemalloc.h" -# undef JEMALLOC_NO_RENAME +#undef JEMALLOC_IS_MALLOC +#define JEMALLOC_N(n) jet_##n +#include "jemalloc/internal/public_namespace.h" +#define JEMALLOC_NO_RENAME +#include "jemalloc/jemalloc.h" +#undef JEMALLOC_NO_RENAME #else -# define JEMALLOC_N(n) je_##n -# include "jemalloc/jemalloc.h" +#define JEMALLOC_N(n) je_##n +#include "jemalloc/jemalloc.h" #endif -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#if defined(JEMALLOC_OSATOMIC) #include #endif @@ -39,16 +45,16 @@ * possible. */ #ifndef JEMALLOC_NO_PRIVATE_NAMESPACE -# ifndef JEMALLOC_JET -# include "jemalloc/internal/private_namespace.h" -# else -# include "jemalloc/internal/private_namespace_jet.h" -# endif +#ifndef JEMALLOC_JET +#include "jemalloc/internal/private_namespace.h" +#else +#include "jemalloc/internal/private_namespace_jet.h" +#endif #endif #include "jemalloc/internal/test_hooks.h" #ifdef JEMALLOC_DEFINE_MADVISE_FREE -# define JEMALLOC_MADV_FREE 8 +#define JEMALLOC_MADV_FREE 8 #endif static const bool config_debug = @@ -161,7 +167,55 @@ static const bool config_log = false #endif ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + +/* + * Extra debugging of sized deallocations too onerous to be included in the + * general safety checks. + */ +static const bool config_opt_size_checks = +#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_DEBUG) + true +#else + false +#endif + ; + +static const bool config_uaf_detection = +#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG) + true +#else + false +#endif + ; + +/* Whether or not the C++ extensions are enabled. */ +static const bool config_enable_cxx = +#ifdef JEMALLOC_ENABLE_CXX + true +#else + false +#endif + ; + +#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA #endif @@ -190,23 +244,16 @@ static const bool have_background_thread = false #endif ; - -#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 -#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 - -/* - * Are extra safety checks enabled; things like checking the size of sized - * deallocations, double-frees, etc. - */ -static const bool config_opt_safety_checks = -#ifdef JEMALLOC_OPT_SAFETY_CHECKS +static const bool config_high_res_timer = +#ifdef JEMALLOC_HAVE_CLOCK_REALTIME true -#elif defined(JEMALLOC_DEBUG) - /* - * This lets us only guard safety checks by one flag instead of two; fast - * checks can guard solely by config_opt_safety_checks and run in debug mode - * too. - */ +#else + false +#endif + ; + +static const bool have_memcntl = +#ifdef JEMALLOC_HAVE_MEMCNTL true #else false diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h index d06243c5239..e90fa892100 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h @@ -4,13 +4,21 @@ extern "C" { #endif +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif + #include -#include #include #include +#include #include +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif - diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h similarity index 67% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h index d1389237a77..1fc77be57cf 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h @@ -5,15 +5,29 @@ /* Defined if alloc_size attribute is supported. */ #define JEMALLOC_HAVE_ATTR_ALLOC_SIZE +/* Defined if format_arg(...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_ARG + +/* Defined if format(gnu_printf, ...) attribute is supported. */ +/* #undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF */ + /* Defined if format(printf, ...) attribute is supported. */ #define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF +/* Defined if fallthrough attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FALLTHROUGH + +/* Defined if cold attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_COLD + /* * Define overrides for non-standard allocator-related functions if they are * present on the system. */ +#if !defined(USE_MUSL) #define JEMALLOC_OVERRIDE_MEMALIGN #define JEMALLOC_OVERRIDE_VALLOC +#endif /* * At least Linux omits the "const" in: @@ -32,11 +46,11 @@ #define JEMALLOC_USE_CXX_THROW #ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif +#ifdef _WIN64 +#define LG_SIZEOF_PTR_WIN 3 +#else +#define LG_SIZEOF_PTR_WIN 2 +#endif #endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h new file mode 100644 index 00000000000..ccb22470e64 --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_VERSION "5.3-RC" +#define JEMALLOC_VERSION_MAJOR 5 +#define JEMALLOC_VERSION_MINOR 3 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "ca709c3139f77f4c00a903cdee46d71e9028f6c6" +#define JEMALLOC_VERSION_GID_IDENT ca709c3139f77f4c00a903cdee46d71e9028f6c6 + +#define MALLOCX_LG_ALIGN(la) ((int)(la)) +#if LG_SIZEOF_PTR == 2 +#define MALLOCX_ALIGN(a) ((int)(ffs((int)(a)) - 1)) +#else +#define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a)) - 1 : ffs((int)(((size_t)(a)) >> 32)) + 31)) +#endif +#define MALLOCX_ZERO ((int)0x40) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +#define MALLOCX_TCACHE(tc) ((int)(((tc) + 2) << 8)) +#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +#define MALLOCX_ARENA(a) ((((int)(a)) + 1) << 20) + +/* + * Use as arena index in "arena..{purge,decay,dss}" and + * "stats.arenas..*" mallctl interfaces to select all arenas. This + * definition is intentionally specified in raw decimal format to support + * cpp-based string concatenation, e.g. + * + * #define STRINGIFY_HELPER(x) #x + * #define STRINGIFY(x) STRINGIFY_HELPER(x) + * + * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, + * 0); + */ +#define MALLCTL_ARENAS_ALL 4096 +/* + * Use as arena index in "stats.arenas..*" mallctl interfaces to select + * destroyed arenas. + */ +#define MALLCTL_ARENAS_DESTROYED 4097 + +#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) +#define JEMALLOC_CXX_THROW throw() +#else +#define JEMALLOC_CXX_THROW +#endif + +#if defined(_MSC_VER) +#define JEMALLOC_ATTR(s) +#define JEMALLOC_ALIGNED(s) __declspec(align(s)) +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#ifndef JEMALLOC_EXPORT +#ifdef DLLEXPORT +#define JEMALLOC_EXPORT __declspec(dllexport) +#else +#define JEMALLOC_EXPORT __declspec(dllimport) +#endif +#endif +#define JEMALLOC_FORMAT_ARG(i) +#define JEMALLOC_FORMAT_PRINTF(s, i) +#define JEMALLOC_FALLTHROUGH +#define JEMALLOC_NOINLINE __declspec(noinline) +#ifdef __cplusplus +#define JEMALLOC_NOTHROW __declspec(nothrow) +#else +#define JEMALLOC_NOTHROW +#endif +#define JEMALLOC_SECTION(s) __declspec(allocate(s)) +#define JEMALLOC_RESTRICT_RETURN __declspec(restrict) +#if _MSC_VER >= 1900 && !defined(__EDG__) +#define JEMALLOC_ALLOCATOR __declspec(allocator) +#else +#define JEMALLOC_ALLOCATOR +#endif +#define JEMALLOC_COLD +#elif defined(JEMALLOC_HAVE_ATTR) +#define JEMALLOC_ATTR(s) __attribute__((s)) +#define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +#ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +#define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +#else +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#endif +#ifndef JEMALLOC_EXPORT +#define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +#define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +#else +#define JEMALLOC_FORMAT_ARG(i) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +#define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +#elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +#define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +#else +#define JEMALLOC_FORMAT_PRINTF(s, i) +#endif +#ifdef JEMALLOC_HAVE_ATTR_FALLTHROUGH +#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough) +#else +#define JEMALLOC_FALLTHROUGH +#endif +#define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +#define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +#define JEMALLOC_RESTRICT_RETURN +#define JEMALLOC_ALLOCATOR +#ifdef JEMALLOC_HAVE_ATTR_COLD +#define JEMALLOC_COLD JEMALLOC_ATTR(__cold__) +#else +#define JEMALLOC_COLD +#endif +#else +#define JEMALLOC_ATTR(s) +#define JEMALLOC_ALIGNED(s) +#define JEMALLOC_ALLOC_SIZE(s) +#define JEMALLOC_ALLOC_SIZE2(s1, s2) +#define JEMALLOC_EXPORT +#define JEMALLOC_FORMAT_PRINTF(s, i) +#define JEMALLOC_FALLTHROUGH +#define JEMALLOC_NOINLINE +#define JEMALLOC_NOTHROW +#define JEMALLOC_SECTION(s) +#define JEMALLOC_RESTRICT_RETURN +#define JEMALLOC_ALLOCATOR +#define JEMALLOC_COLD +#endif + +#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME) +#define JEMALLOC_SYS_NOTHROW +#else +#define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h new file mode 100644 index 00000000000..31f72d3a2af --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h @@ -0,0 +1,86 @@ +// OSX does not have this for system alloc functions, so you will get +// "exception specification in declaration" error. +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_MUSL) +#undef JEMALLOC_NOTHROW +#define JEMALLOC_NOTHROW + +#undef JEMALLOC_SYS_NOTHROW +#define JEMALLOC_SYS_NOTHROW + +#undef JEMALLOC_CXX_THROW +#define JEMALLOC_CXX_THROW +#endif + +#include "jemalloc_rename.h" + +/* + * The je_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). + */ +extern JEMALLOC_EXPORT const char * je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void * cbopaque, + const char * s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW je_posix_memalign( + void ** memptr, + size_t alignment, + size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_realloc(void * ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW je_free(void * ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * je_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * je_rallocx(void * ptr, size_t size, int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void * ptr, size_t size, size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void * ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void * ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void * ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char * name, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char * name, + size_t * mibp, + size_t * miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t * mib, + size_t miblen, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( + void (*write_cb)(void *, const char *), + void * je_cbopaque, + const char * opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void * ptr) JEMALLOC_CXX_THROW; +#ifdef JEMALLOC_HAVE_MALLOC_SIZE +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_size( + const void * ptr); +#endif + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * je_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h new file mode 100644 index 00000000000..195d57e2997 --- /dev/null +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos_jet.h @@ -0,0 +1,71 @@ +/* + * The jet_ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h). + */ +extern JEMALLOC_EXPORT const char * jet_malloc_conf; +extern JEMALLOC_EXPORT void (*jet_malloc_message)(void * cbopaque, + const char * s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW jet_posix_memalign( + void ** memptr, + size_t alignment, + size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_realloc(void * ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW jet_free(void * ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * jet_mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * jet_rallocx(void * ptr, size_t size, int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_xallocx(void * ptr, size_t size, size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_sallocx(const void * ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_dallocx(void * ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_sdallocx(void * ptr, size_t size, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctl(const char * name, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctlnametomib(const char * name, + size_t * mibp, + size_t * miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW jet_mallctlbymib(const size_t * mib, + size_t miblen, + void * oldp, + size_t * oldlenp, + void * newp, + size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW jet_malloc_stats_print( + void (*write_cb)(void *, const char *), + void * jet_cbopaque, + const char * opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void * ptr) JEMALLOC_CXX_THROW; +#ifdef JEMALLOC_HAVE_MALLOC_SIZE +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW jet_malloc_size( + const void * ptr); +#endif + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_SYS_NOTHROW * jet_valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h index a2ea2dd3533..d032d46752d 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h @@ -4,26 +4,28 @@ * these macro definitions. */ #ifndef JEMALLOC_NO_RENAME -# define je_aligned_alloc aligned_alloc -# define je_calloc calloc -# define je_dallocx dallocx -# define je_free free -# define je_mallctl mallctl -# define je_mallctlbymib mallctlbymib -# define je_mallctlnametomib mallctlnametomib -# define je_malloc malloc -# define je_malloc_conf malloc_conf -# define je_malloc_message malloc_message -# define je_malloc_stats_print malloc_stats_print -# define je_malloc_usable_size malloc_usable_size -# define je_mallocx mallocx -# define je_nallocx nallocx -# define je_posix_memalign posix_memalign -# define je_rallocx rallocx -# define je_realloc realloc -# define je_sallocx sallocx -# define je_sdallocx sdallocx -# define je_xallocx xallocx -# define je_memalign memalign -# define je_valloc valloc +#define je_aligned_alloc aligned_alloc +#define je_calloc calloc +#define je_dallocx dallocx +#define je_free free +#define je_mallctl mallctl +#define je_mallctlbymib mallctlbymib +#define je_mallctlnametomib mallctlnametomib +#define je_malloc malloc +#define je_malloc_conf malloc_conf +#define je_malloc_conf_2_conf_harder malloc_conf_2_conf_harder +#define je_malloc_message malloc_message +#define je_malloc_stats_print malloc_stats_print +#define je_malloc_usable_size malloc_usable_size +#define je_mallocx mallocx +#define je_smallocx_ca709c3139f77f4c00a903cdee46d71e9028f6c6 smallocx_ca709c3139f77f4c00a903cdee46d71e9028f6c6 +#define je_nallocx nallocx +#define je_posix_memalign posix_memalign +#define je_rallocx rallocx +#define je_realloc realloc +#define je_sallocx sallocx +#define je_sdallocx sdallocx +#define je_xallocx xallocx +#define je_memalign memalign +#define je_valloc valloc #endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h similarity index 57% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h index 1a58874306e..eeaf7a6760e 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h @@ -5,73 +5,66 @@ typedef struct extent_hooks_s extent_hooks_t; * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); */ -typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, - bool *, unsigned); +typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, bool *, unsigned); /* * bool * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, * bool committed, unsigned arena_ind); */ -typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); +typedef bool(extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, unsigned); /* * void * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, * bool committed, unsigned arena_ind); */ -typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); +typedef void(extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, unsigned); /* * bool * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); +typedef bool(extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, - size_t, unsigned); +typedef bool(extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); +typedef bool(extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, unsigned); /* * bool * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - bool, unsigned); +typedef bool(extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, bool, unsigned); /* * bool * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, - bool, unsigned); +typedef bool(extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, bool, unsigned); -struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; +struct extent_hooks_s +{ + extent_alloc_t * alloc; + extent_dalloc_t * dalloc; + extent_destroy_t * destroy; + extent_commit_t * commit; + extent_decommit_t * decommit; + extent_purge_t * purge_lazy; + extent_purge_t * purge_forced; + extent_split_t * split; + extent_merge_t * merge; }; diff --git a/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..8ad95c51560 --- /dev/null +++ b/contrib/jemalloc-cmake/include_darwin_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,425 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#define JEMALLOC_PREFIX "je_" +#define JEMALLOC_CPREFIX "JE_" + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#define JEMALLOC_OS_UNFAIR_LOCK + +/* Defined if syscall(2) is usable. */ +/* #undef JEMALLOC_USE_SYSCALL */ + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */ + +/* + * Defined if mach_absolute_time() is available. + */ +#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 14 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +/* #undef JEMALLOC_TLS */ + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#define JEMALLOC_ZONE + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +#define JEMALLOC_HAVE_MALLOC_SIZE + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */ + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +/* #undef JEMALLOC_BACKGROUND_THREAD */ + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +/* #undef JEMALLOC_IS_MALLOC */ + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +#define JEMALLOC_HAVE_VM_MAKE_TAG + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..8671da5db69 --- /dev/null +++ b/contrib/jemalloc-cmake/include_darwin_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,425 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#define JEMALLOC_PREFIX "je_" +#define JEMALLOC_CPREFIX "JE_" + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#define JEMALLOC_OS_UNFAIR_LOCK + +/* Defined if syscall(2) is usable. */ +/* #undef JEMALLOC_USE_SYSCALL */ + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */ + +/* + * Defined if mach_absolute_time() is available. + */ +#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +/* #undef JEMALLOC_DSS */ + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +/* #undef JEMALLOC_TLS */ + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#define JEMALLOC_ZONE + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +#define JEMALLOC_HAVE_MALLOC_SIZE + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */ + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +/* #undef JEMALLOC_BACKGROUND_THREAD */ + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +/* #undef JEMALLOC_IS_MALLOC */ + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +#define JEMALLOC_HAVE_VM_MAKE_TAG + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..0f61417d65f --- /dev/null +++ b/contrib/jemalloc-cmake/include_freebsd_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Only since 12.1-STABLE */ +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Only since 12.1-STABLE */ +/* Defined if pthread_getname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GET_NAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#define JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#define JEMALLOC_MUTEX_INIT_CB + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 29 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#define JEMALLOC_SYSCTL_VM_OVERCOMMIT +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_NOCORE + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..32cad025f5f --- /dev/null +++ b/contrib/jemalloc-cmake/include_freebsd_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */ +/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */ +/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */ +#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +#define JEMALLOC_HAVE_ISSETUGID + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Only since 12.1-STABLE */ +/* Defined if pthread_setname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */ + +/* Only since 12.1-STABLE */ +/* Defined if pthread_getname_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GET_NAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#define JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#define JEMALLOC_MUTEX_INIT_CB + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +/* #undef JEMALLOC_RETAIN */ + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#define JEMALLOC_SYSCTL_VM_OVERCOMMIT +/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */ + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_NOCORE + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +/* #undef JEMALLOC_HAS_ALLOCA_H */ + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +/* #undef JEMALLOC_HAVE_SCHED_GETCPU */ + +/* GNU specific sched_setaffinity support */ +/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */ + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/README b/contrib/jemalloc-cmake/include_linux_aarch64/README deleted file mode 100644 index 2ab582803a2..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/README +++ /dev/null @@ -1,7 +0,0 @@ -Here are pre-generated files from jemalloc on Linux aarch64. -You can obtain these files by running ./autogen.sh inside jemalloc source directory. - -Added #define GNU_SOURCE -Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. -Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. -Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 80% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 5e598348e72..ad535e6d773 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,12 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -19,13 +13,15 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -51,29 +47,17 @@ #define LG_VADDR 48 /* Defined if C11 atomics are available. */ -#define JEMALLOC_C11_ATOMICS 1 +#define JEMALLOC_C11_ATOMICS /* Defined if GCC __atomic atomics are available. */ -#define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS /* Defined if GCC __sync atomics are available. */ -#define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -85,19 +69,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -#define JEMALLOC_HAVE_SECURE_GETENV +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ /* * Defined if issetugid(2) is available. @@ -110,21 +88,32 @@ /* Defined if pthread_setname_np(3) is available. */ #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC /* * Defined if mach_absolute_time() is available. */ /* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -187,6 +176,9 @@ /* Support utrace(2)-based tracing. */ /* #undef JEMALLOC_UTRACE */ +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + /* Support optional abort() on OOM. */ /* #undef JEMALLOC_XMALLOC */ @@ -202,6 +194,9 @@ /* One page is 2^LG_PAGE bytes. */ #define LG_PAGE 16 +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + /* * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the * system does not explicitly support huge pages; system calls that require @@ -243,6 +238,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -297,7 +298,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -// #define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -309,17 +310,46 @@ */ #define JEMALLOC_MADVISE_DONTDUMP +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. */ /* #undef JEMALLOC_THP */ +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + /* Define if operating system has alloca.h header. */ -#define JEMALLOC_HAS_ALLOCA_H 1 +#define JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ -#define JEMALLOC_HAS_RESTRICT 1 +#define JEMALLOC_HAS_RESTRICT /* For use by hash code. */ /* #undef JEMALLOC_BIG_ENDIAN */ @@ -360,7 +390,7 @@ /* * If defined, all the features necessary for background threads are present. */ -#define JEMALLOC_BACKGROUND_THREAD 1 +#define JEMALLOC_BACKGROUND_THREAD /* * If defined, jemalloc symbols are not exported (doesn't work when @@ -369,20 +399,29 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ -#define JEMALLOC_IS_MALLOC 1 +#define JEMALLOC_IS_MALLOC /* * Defined if strerror_r returns char * if _GNU_SOURCE is defined. */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE -/* - * popcount*() functions to use for bitmapping. - */ -#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl -#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h deleted file mode 100644 index d1389237a77..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h +++ /dev/null @@ -1,43 +0,0 @@ -/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ -/* Defined if __attribute__((...)) syntax is supported. */ -#define JEMALLOC_HAVE_ATTR - -/* Defined if alloc_size attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE - -/* Defined if format(printf, ...) attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF - -/* - * Define overrides for non-standard allocator-related functions if they are - * present on the system. - */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC - -/* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. - */ -#define JEMALLOC_USABLE_SIZE_CONST - -/* - * If defined, specify throw() for the public function prototypes when compiling - * with C++. The only justification for this is to match the prototypes that - * glibc defines. - */ -#define JEMALLOC_USE_CXX_THROW - -#ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif -#endif - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h deleted file mode 100644 index 34235894285..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 2 -#define JEMALLOC_VERSION_BUGFIX 1 -#define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 - -#define MALLOCX_LG_ALIGN(la) ((int)(la)) -#if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -#else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -#endif -#define MALLOCX_ZERO ((int)0x40) -/* - * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 - * encodes MALLOCX_TCACHE_NONE. - */ -#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) -/* - * Bias arena index bits so that 0 encodes "use an automatically chosen arena". - */ -#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) - -/* - * Use as arena index in "arena..{purge,decay,dss}" and - * "stats.arenas..*" mallctl interfaces to select all arenas. This - * definition is intentionally specified in raw decimal format to support - * cpp-based string concatenation, e.g. - * - * #define STRINGIFY_HELPER(x) #x - * #define STRINGIFY(x) STRINGIFY_HELPER(x) - * - * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, - * 0); - */ -#define MALLCTL_ARENAS_ALL 4096 -/* - * Use as arena index in "stats.arenas..*" mallctl interfaces to select - * destroyed arenas. - */ -#define MALLCTL_ARENAS_DESTROYED 4097 - -#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) -# define JEMALLOC_CXX_THROW throw() -#else -# define JEMALLOC_CXX_THROW -#endif - -#if defined(_MSC_VER) -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# ifndef JEMALLOC_EXPORT -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# endif -# define JEMALLOC_FORMAT_ARG(i) -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE __declspec(noinline) -# ifdef __cplusplus -# define JEMALLOC_NOTHROW __declspec(nothrow) -# else -# define JEMALLOC_NOTHROW -# endif -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) -# if _MSC_VER >= 1900 && !defined(__EDG__) -# define JEMALLOC_ALLOCATOR __declspec(allocator) -# else -# define JEMALLOC_ALLOCATOR -# endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG -# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) -# else -# define JEMALLOC_FORMAT_ARG(i) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# define JEMALLOC_EXPORT -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE -# define JEMALLOC_NOTHROW -# define JEMALLOC_SECTION(s) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h deleted file mode 100644 index ff025e30fa7..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The je_ prefix on the following public symbol declarations is an artifact - * of namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_malloc(size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, - size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, - size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) - JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) - JEMALLOC_CXX_THROW; - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) - JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, - int flags) JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, - size_t extra, int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, - int flags) JEMALLOC_ATTR(pure); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, - int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) - JEMALLOC_ATTR(pure); - -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, - size_t *mibp, size_t *miblenp); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, - size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( - void (*write_cb)(void *, const char *), void *je_cbopaque, - const char *opts); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW - JEMALLOC_ATTR(malloc); -#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h deleted file mode 100644 index 1a58874306e..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h +++ /dev/null @@ -1,77 +0,0 @@ -typedef struct extent_hooks_s extent_hooks_t; - -/* - * void * - * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, - * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); - */ -typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, - bool *, unsigned); - -/* - * bool - * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * void - * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * bool - * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, - size_t, unsigned); - -/* - * bool - * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - bool, unsigned); - -/* - * bool - * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, - * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, - bool, unsigned); - -struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; -}; diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..12890f80ef1 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_ATFORK */ + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +#define JEMALLOC_DEFINE_MADVISE_FREE + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..ad535e6d773 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,427 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 16 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 29 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/README b/contrib/jemalloc-cmake/include_linux_x86_64/README deleted file mode 100644 index bf7663bda8d..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/README +++ /dev/null @@ -1,7 +0,0 @@ -Here are pre-generated files from jemalloc on Linux x86_64. -You can obtain these files by running ./autogen.sh inside jemalloc source directory. - -Added #define GNU_SOURCE -Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not. -Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard. -Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in similarity index 78% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h rename to contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index 7c21fa79397..99ab2d53ca9 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,11 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -18,13 +13,15 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -50,29 +47,17 @@ #define LG_VADDR 48 /* Defined if C11 atomics are available. */ -#define JEMALLOC_C11_ATOMICS 1 +#define JEMALLOC_C11_ATOMICS /* Defined if GCC __atomic atomics are available. */ -#define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS /* Defined if GCC __sync atomics are available. */ -#define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -84,20 +69,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -// Don't want dependency on newer GLIBC -//#define JEMALLOC_HAVE_SECURE_GETENV +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ /* * Defined if issetugid(2) is available. @@ -110,21 +88,32 @@ /* Defined if pthread_setname_np(3) is available. */ #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1 +#define JEMALLOC_HAVE_CLOCK_MONOTONIC /* * Defined if mach_absolute_time() is available. */ /* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -160,6 +149,9 @@ /* JEMALLOC_STATS enables statistics calculation. */ #define JEMALLOC_STATS +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + /* JEMALLOC_PROF enables allocation profiling. */ /* #undef JEMALLOC_PROF */ @@ -184,6 +176,9 @@ /* Support utrace(2)-based tracing. */ /* #undef JEMALLOC_UTRACE */ +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + /* Support optional abort() on OOM. */ /* #undef JEMALLOC_XMALLOC */ @@ -199,6 +194,9 @@ /* One page is 2^LG_PAGE bytes. */ #define LG_PAGE 12 +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + /* * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the * system does not explicitly support huge pages; system calls that require @@ -240,6 +238,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -252,6 +256,12 @@ */ /* #undef JEMALLOC_LOG */ +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -288,7 +298,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -//#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -300,17 +310,46 @@ */ #define JEMALLOC_MADVISE_DONTDUMP +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. */ /* #undef JEMALLOC_THP */ +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + /* Define if operating system has alloca.h header. */ -#define JEMALLOC_HAS_ALLOCA_H 1 +#define JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ -#define JEMALLOC_HAS_RESTRICT 1 +#define JEMALLOC_HAS_RESTRICT /* For use by hash code. */ /* #undef JEMALLOC_BIG_ENDIAN */ @@ -351,7 +390,7 @@ /* * If defined, all the features necessary for background threads are present. */ -#define JEMALLOC_BACKGROUND_THREAD 1 +#define JEMALLOC_BACKGROUND_THREAD /* * If defined, jemalloc symbols are not exported (doesn't work when @@ -360,20 +399,29 @@ /* #undef JEMALLOC_EXPORT */ /* config.malloc_conf options string. */ -#define JEMALLOC_CONFIG_MALLOC_CONF "" +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ -#define JEMALLOC_IS_MALLOC 1 +#define JEMALLOC_IS_MALLOC /* * Defined if strerror_r returns char * if _GNU_SOURCE is defined. */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE -/* - * popcount*() functions to use for bitmapping. - */ -#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl -#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h deleted file mode 100644 index d79551e1f25..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h +++ /dev/null @@ -1,216 +0,0 @@ -#ifndef JEMALLOC_PREAMBLE_H -#define JEMALLOC_PREAMBLE_H - -#include "jemalloc_internal_defs.h" -#include "jemalloc/internal/jemalloc_internal_decls.h" - -#ifdef JEMALLOC_UTRACE -#include -#endif - -#define JEMALLOC_NO_DEMANGLE -#ifdef JEMALLOC_JET -# undef JEMALLOC_IS_MALLOC -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "jemalloc/jemalloc.h" -# undef JEMALLOC_NO_RENAME -#else -# define JEMALLOC_N(n) je_##n -# include "jemalloc/jemalloc.h" -#endif - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include -#endif - -#ifdef JEMALLOC_ZONE -#include -#include -#include -#endif - -#include "jemalloc/internal/jemalloc_internal_macros.h" - -/* - * Note that the ordering matters here; the hook itself is name-mangled. We - * want the inclusion of hooks to happen early, so that we hook as much as - * possible. - */ -#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE -# ifndef JEMALLOC_JET -# include "jemalloc/internal/private_namespace.h" -# else -# include "jemalloc/internal/private_namespace_jet.h" -# endif -#endif -#include "jemalloc/internal/test_hooks.h" - -#ifdef JEMALLOC_DEFINE_MADVISE_FREE -# define JEMALLOC_MADV_FREE 8 -#endif - -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -static const bool have_dss = -#ifdef JEMALLOC_DSS - true -#else - false -#endif - ; -static const bool have_madvise_huge = -#ifdef JEMALLOC_HAVE_MADVISE_HUGE - true -#else - false -#endif - ; -static const bool config_fill = -#ifdef JEMALLOC_FILL - true -#else - false -#endif - ; -static const bool config_lazy_lock = -#ifdef JEMALLOC_LAZY_LOCK - true -#else - false -#endif - ; -static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; -static const bool config_prof = -#ifdef JEMALLOC_PROF - true -#else - false -#endif - ; -static const bool config_prof_libgcc = -#ifdef JEMALLOC_PROF_LIBGCC - true -#else - false -#endif - ; -static const bool config_prof_libunwind = -#ifdef JEMALLOC_PROF_LIBUNWIND - true -#else - false -#endif - ; -static const bool maps_coalesce = -#ifdef JEMALLOC_MAPS_COALESCE - true -#else - false -#endif - ; -static const bool config_stats = -#ifdef JEMALLOC_STATS - true -#else - false -#endif - ; -static const bool config_tls = -#ifdef JEMALLOC_TLS - true -#else - false -#endif - ; -static const bool config_utrace = -#ifdef JEMALLOC_UTRACE - true -#else - false -#endif - ; -static const bool config_xmalloc = -#ifdef JEMALLOC_XMALLOC - true -#else - false -#endif - ; -static const bool config_cache_oblivious = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - true -#else - false -#endif - ; -/* - * Undocumented, for jemalloc development use only at the moment. See the note - * in jemalloc/internal/log.h. - */ -static const bool config_log = -#ifdef JEMALLOC_LOG - true -#else - false -#endif - ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU -/* Currently percpu_arena depends on sched_getcpu. */ -#define JEMALLOC_PERCPU_ARENA -#endif -static const bool have_percpu_arena = -#ifdef JEMALLOC_PERCPU_ARENA - true -#else - false -#endif - ; -/* - * Undocumented, and not recommended; the application should take full - * responsibility for tracking provenance. - */ -static const bool force_ivsalloc = -#ifdef JEMALLOC_FORCE_IVSALLOC - true -#else - false -#endif - ; -static const bool have_background_thread = -#ifdef JEMALLOC_BACKGROUND_THREAD - true -#else - false -#endif - ; - -#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 -#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 - -/* - * Are extra safety checks enabled; things like checking the size of sized - * deallocations, double-frees, etc. - */ -static const bool config_opt_safety_checks = -#ifdef JEMALLOC_OPT_SAFETY_CHECKS - true -#elif defined(JEMALLOC_DEBUG) - /* - * This lets us only guard safety checks by one flag instead of two; fast - * checks can guard solely by config_opt_safety_checks and run in debug mode - * too. - */ - true -#else - false -#endif - ; - -#endif /* JEMALLOC_PREAMBLE_H */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h deleted file mode 100644 index 34235894285..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 2 -#define JEMALLOC_VERSION_BUGFIX 1 -#define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" -#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 - -#define MALLOCX_LG_ALIGN(la) ((int)(la)) -#if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -#else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -#endif -#define MALLOCX_ZERO ((int)0x40) -/* - * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 - * encodes MALLOCX_TCACHE_NONE. - */ -#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) -/* - * Bias arena index bits so that 0 encodes "use an automatically chosen arena". - */ -#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) - -/* - * Use as arena index in "arena..{purge,decay,dss}" and - * "stats.arenas..*" mallctl interfaces to select all arenas. This - * definition is intentionally specified in raw decimal format to support - * cpp-based string concatenation, e.g. - * - * #define STRINGIFY_HELPER(x) #x - * #define STRINGIFY(x) STRINGIFY_HELPER(x) - * - * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, - * 0); - */ -#define MALLCTL_ARENAS_ALL 4096 -/* - * Use as arena index in "stats.arenas..*" mallctl interfaces to select - * destroyed arenas. - */ -#define MALLCTL_ARENAS_DESTROYED 4097 - -#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) -# define JEMALLOC_CXX_THROW throw() -#else -# define JEMALLOC_CXX_THROW -#endif - -#if defined(_MSC_VER) -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# ifndef JEMALLOC_EXPORT -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# endif -# define JEMALLOC_FORMAT_ARG(i) -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE __declspec(noinline) -# ifdef __cplusplus -# define JEMALLOC_NOTHROW __declspec(nothrow) -# else -# define JEMALLOC_NOTHROW -# endif -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) -# if _MSC_VER >= 1900 && !defined(__EDG__) -# define JEMALLOC_ALLOCATOR __declspec(allocator) -# else -# define JEMALLOC_ALLOCATOR -# endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG -# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) -# else -# define JEMALLOC_FORMAT_ARG(i) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# define JEMALLOC_EXPORT -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE -# define JEMALLOC_NOTHROW -# define JEMALLOC_SECTION(s) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h deleted file mode 100644 index ff025e30fa7..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The je_ prefix on the following public symbol declarations is an artifact - * of namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_malloc(size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, - size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, - size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) - JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) - JEMALLOC_CXX_THROW; - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) - JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, - int flags) JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, - size_t extra, int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, - int flags) JEMALLOC_ATTR(pure); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, - int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) - JEMALLOC_ATTR(pure); - -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, - size_t *mibp, size_t *miblenp); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, - size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( - void (*write_cb)(void *, const char *), void *je_cbopaque, - const char *opts); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW - JEMALLOC_ATTR(malloc); -#endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..684d4debb14 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,428 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 48 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +/* #undef JEMALLOC_HAVE_SECURE_GETENV */ + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/// musl doesn't support it +/* Defined if pthread_getname_np(3) is available. */ +/* #define JEMALLOC_HAVE_PTHREAD_GETNAME_NP */ + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 21 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +/* #undef JEMALLOC_BIG_ENDIAN */ + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#define JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#define JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +/* #undef JEMALLOC_ENABLE_CXX */ + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/kvproto b/contrib/kvproto index 12e2f5a9d16..a5d4ffd2ba3 160000 --- a/contrib/kvproto +++ b/contrib/kvproto @@ -1 +1 @@ -Subproject commit 12e2f5a9d167f46602804840857ddc8ff06dc695 +Subproject commit a5d4ffd2ba337dad0bc99e9fb53bf665864a3f3b diff --git a/contrib/prometheus-cpp b/contrib/prometheus-cpp index ca1f3463e74..76470b3ec02 160000 --- a/contrib/prometheus-cpp +++ b/contrib/prometheus-cpp @@ -1 +1 @@ -Subproject commit ca1f3463e74d957d1cccddd4a1a29e3e5d34bd83 +Subproject commit 76470b3ec024c8214e1f4253fb1f4c0b28d3df94 diff --git a/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt b/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt index daebd1b7c5a..993618e16ac 100644 --- a/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt +++ b/contrib/prometheus-cpp-cmake/pull/CMakeLists.txt @@ -12,9 +12,18 @@ if(ENABLE_COMPRESSION) endif() add_library(pull + ${PROMETHEUS_SRC_DIR}/pull/src/basic_auth.cc + ${PROMETHEUS_SRC_DIR}/pull/src/basic_auth.h + ${PROMETHEUS_SRC_DIR}/pull/src/endpoint.cc + ${PROMETHEUS_SRC_DIR}/pull/src/endpoint.h ${PROMETHEUS_SRC_DIR}/pull/src/exposer.cc ${PROMETHEUS_SRC_DIR}/pull/src/handler.cc ${PROMETHEUS_SRC_DIR}/pull/src/handler.h + ${PROMETHEUS_SRC_DIR}/pull/src/metrics_collector.cc + ${PROMETHEUS_SRC_DIR}/pull/src/metrics_collector.h + + ${PROMETHEUS_SRC_DIR}/pull/src/detail/base64.h + $<$:$> ) diff --git a/contrib/prometheus-cpp-cmake/push/CMakeLists.txt b/contrib/prometheus-cpp-cmake/push/CMakeLists.txt index 71dad9fb812..b776d17bdaf 100644 --- a/contrib/prometheus-cpp-cmake/push/CMakeLists.txt +++ b/contrib/prometheus-cpp-cmake/push/CMakeLists.txt @@ -3,6 +3,8 @@ if(NOT CURL_FOUND) endif() add_library(push + ${PROMETHEUS_SRC_DIR}/push/src/curl_wrapper.cc + ${PROMETHEUS_SRC_DIR}/push/src/curl_wrapper.h ${PROMETHEUS_SRC_DIR}/push/src/gateway.cc ) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index ca2f51f94e5..42ede65b66a 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit ca2f51f94e55bdd23749dcc02ab4afb94eeb5ae5 +Subproject commit 42ede65b66aed69debc80b60a31c63e41010d450 diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index e243ecba37c..e3e2df379a1 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -4,7 +4,11 @@ file(GLOB_RECURSE _TIFLASH_PROXY_SRCS "${_TIFLASH_PROXY_SOURCE_DIR}/*.rs") list(FILTER _TIFLASH_PROXY_SRCS EXCLUDE REGEX ${_TIFLASH_PROXY_SOURCE_DIR}/target/.*) # use `CFLAGS=-w CXXFLAGS=-w` to inhibit warning messages. -set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w) +if (TIFLASH_LLVM_TOOLCHAIN) + set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} "CFLAGS=-w -fuse-ld=lld" "CXXFLAGS=-w -fuse-ld=lld -stdlib=libc++") +else() + set(TIFLASH_RUST_ENV CMAKE=${CMAKE_COMMAND} CFLAGS=-w CXXFLAGS=-w) +endif() if(TIFLASH_LLVM_TOOLCHAIN AND USE_LIBCXX) set(TIFLASH_RUST_LINKER ${CMAKE_CURRENT_BINARY_DIR}/tiflash-linker) diff --git a/contrib/tipb b/contrib/tipb index bfb5c2c5518..0f4f873beca 160000 --- a/contrib/tipb +++ b/contrib/tipb @@ -1 +1 @@ -Subproject commit bfb5c2c55188c254018d3cf77bfad73b4d4b77ec +Subproject commit 0f4f873beca8d5078dde0a23d15ad5ce3188ed0d diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index cce11bd6997..0df79f89a84 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -91,12 +91,10 @@ add_headers_and_sources(dbms src/Storages/Page/V2/VersionSet) add_headers_and_sources(dbms src/Storages/Page/V2/gc) add_headers_and_sources(dbms src/WindowFunctions) add_headers_and_sources(dbms src/TiDB/Schema) -if (ENABLE_V3_PAGESTORAGE) - add_headers_and_sources(dbms src/Storages/Page/V3) - add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) - add_headers_and_sources(dbms src/Storages/Page/V3/WAL) - add_headers_and_sources(dbms src/Storages/Page/V3/spacemap) -endif() +add_headers_and_sources(dbms src/Storages/Page/V3) +add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) +add_headers_and_sources(dbms src/Storages/Page/V3/WAL) +add_headers_and_sources(dbms src/Storages/Page/V3/spacemap) add_headers_and_sources(dbms src/Storages/Page/) add_headers_and_sources(dbms src/TiDB) add_headers_and_sources(dbms src/Client) @@ -318,11 +316,15 @@ if (ENABLE_TESTS) ${TiFlash_SOURCE_DIR}/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp ) target_include_directories(bench_dbms BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR} ${benchmark_SOURCE_DIR}/include) + target_compile_definitions(bench_dbms PUBLIC DBMS_PUBLIC_GTEST) target_link_libraries(bench_dbms gtest dbms test_util_bench_main benchmark clickhouse_functions) if (ENABLE_TIFLASH_DTWORKLOAD) target_link_libraries(bench_dbms dt-workload-lib) endif () + if (ENABLE_TIFLASH_PAGEWORKLOAD) + target_link_libraries(bench_dbms page-workload-lib) + endif () add_check(bench_dbms) endif () diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 61a2843ac59..e21bde19a47 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -38,12 +38,6 @@ #include #endif - -namespace CurrentMetrics -{ -extern const Metric SendExternalTables; -} - namespace DB { namespace ErrorCodes @@ -434,8 +428,6 @@ void Connection::sendExternalTablesData(ExternalTablesData & data) size_t maybe_compressed_out_bytes = maybe_compressed_out ? maybe_compressed_out->count() : 0; size_t rows = 0; - CurrentMetrics::Increment metric_increment{CurrentMetrics::SendExternalTables}; - for (auto & elem : data) { elem.first->readPrefix(); diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp index a9b6825a3fe..179b2d92c0e 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.cpp +++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp @@ -20,13 +20,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event DistributedConnectionMissingTable; -extern const Event DistributedConnectionStaleReplica; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -50,7 +43,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( hostname_differences.resize(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) { - ConnectionPool & connection_pool = dynamic_cast(*nested_pools[i]); + auto & connection_pool = dynamic_cast(*nested_pools[i]); hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); } } @@ -187,7 +180,6 @@ ConnectionPoolWithFailover::tryGetEntry( fail_message = "There is no table " + table_to_check->database + "." + table_to_check->table + " on server: " + result.entry->getDescription(); LOG_WARNING(log, fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable); return result; } @@ -217,7 +209,6 @@ ConnectionPoolWithFailover::tryGetEntry( table_to_check->database, table_to_check->table, delay); - ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica); } } catch (const Exception & e) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 27283c0f24a..da071507a72 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -233,7 +233,8 @@ class ColumnConst final : public COWPtrHelper template T getValue() const { - return getField().safeGet::Type>(); + auto && tmp = getField(); + return std::move(tmp.safeGet::Type>()); } }; diff --git a/dbms/src/Common/Arena.h b/dbms/src/Common/Arena.h index c61ebfca8aa..ebaaf607a6d 100644 --- a/dbms/src/Common/Arena.h +++ b/dbms/src/Common/Arena.h @@ -24,13 +24,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ArenaAllocChunks; -extern const Event ArenaAllocBytes; -} // namespace ProfileEvents - namespace DB { /** Memory pool to append something. For example, short strings. @@ -55,9 +48,6 @@ class Arena : private boost::noncopyable Chunk(size_t size_, Chunk * prev_) { - ProfileEvents::increment(ProfileEvents::ArenaAllocChunks); - ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_); - begin = reinterpret_cast(Allocator::alloc(size_)); pos = begin; end = begin + size_; diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index 8a2f111d882..8673784c590 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -17,36 +17,13 @@ /// Available metrics. Add something here as you wish. #define APPLY_FOR_METRICS(M) \ - M(Query) \ - M(Merge) \ - M(ReplicatedFetch) \ - M(ReplicatedSend) \ - M(ReplicatedChecks) \ - M(BackgroundPoolTask) \ - M(DiskSpaceReservedForMerge) \ - M(DistributedSend) \ - M(QueryPreempted) \ - M(TCPConnection) \ - M(HTTPConnection) \ - M(InterserverConnection) \ M(OpenFileForRead) \ M(OpenFileForWrite) \ M(OpenFileForReadWrite) \ - M(SendExternalTables) \ - M(QueryThread) \ - M(ReadonlyReplica) \ - M(LeaderReplica) \ M(MemoryTracking) \ M(MemoryTrackingInBackgroundProcessingPool) \ - M(MemoryTrackingForMerges) \ - M(LeaderElection) \ - M(EphemeralNode) \ - M(DelayedInserts) \ - M(ContextLockWait) \ - M(StorageBufferRows) \ - M(StorageBufferBytes) \ - M(DictCacheRequests) \ - M(Revision) \ + M(LogicalCPUCores) \ + M(MemoryCapacity) \ M(PSMVCCNumSnapshots) \ M(PSMVCCSnapshotsList) \ M(RWLockWaitingReaders) \ diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index c6c3caa44ad..ad5010d7826 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -12,7 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include +#include +#include +#include +#include #include #include @@ -21,7 +27,6 @@ namespace DB { std::unordered_map> FailPointHelper::fail_point_wait_channels; - #define APPLY_FOR_FAILPOINTS_ONCE(M) \ M(exception_between_drop_meta_and_data) \ M(exception_between_alter_data_and_meta) \ @@ -85,33 +90,54 @@ std::unordered_map> FailPointHelper::f M(force_remote_read_for_batch_cop) \ M(force_context_path) \ M(force_slow_page_storage_snapshot_release) \ - M(force_change_all_blobs_to_read_only) - -#define APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) \ - M(pause_with_alter_locks_acquired) \ - M(hang_in_execution) \ - M(pause_before_dt_background_delta_merge) \ - M(pause_until_dt_background_delta_merge) \ - M(pause_before_apply_raft_cmd) \ - M(pause_before_apply_raft_snapshot) \ - M(pause_until_apply_raft_snapshot) \ + M(force_change_all_blobs_to_read_only) \ + M(unblock_query_init_after_write) + + +#define APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) \ + M(pause_with_alter_locks_acquired) \ + M(hang_in_execution) \ + M(pause_before_dt_background_delta_merge) \ + M(pause_until_dt_background_delta_merge) \ + M(pause_before_apply_raft_cmd) \ + M(pause_before_apply_raft_snapshot) \ + M(pause_until_apply_raft_snapshot) \ M(pause_after_copr_streams_acquired_once) -#define APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) \ - M(pause_when_reading_from_dt_stream) \ - M(pause_when_writing_to_dt_store) \ - M(pause_when_ingesting_to_dt_store) \ - M(pause_when_altering_dt_store) \ - M(pause_after_copr_streams_acquired) \ - M(pause_before_server_merge_one_delta) +#define APPLY_FOR_PAUSEABLE_FAILPOINTS(M) \ + M(pause_when_reading_from_dt_stream) \ + M(pause_when_writing_to_dt_store) \ + M(pause_when_ingesting_to_dt_store) \ + M(pause_when_altering_dt_store) \ + M(pause_after_copr_streams_acquired) \ + M(pause_before_server_merge_one_delta) \ + M(pause_query_init) + + +#define APPLY_FOR_RANDOM_FAILPOINTS(M) \ + M(random_tunnel_wait_timeout_failpoint) \ + M(random_tunnel_init_rpc_failure_failpoint) \ + M(random_receiver_sync_msg_push_failure_failpoint) \ + M(random_receiver_async_msg_push_failure_failpoint) \ + M(random_limit_check_failpoint) \ + M(random_join_build_failpoint) \ + M(random_join_prob_failpoint) \ + M(random_aggregate_create_state_failpoint) \ + M(random_aggregate_merge_failpoint) \ + M(random_sharedquery_failpoint) \ + M(random_interpreter_failpoint) \ + M(random_task_lifecycle_failpoint) \ + M(random_task_manager_find_task_failure_failpoint) \ + M(random_min_tso_scheduler_failpoint) namespace FailPoints { #define M(NAME) extern const char(NAME)[] = #NAME ""; APPLY_FOR_FAILPOINTS_ONCE(M) APPLY_FOR_FAILPOINTS(M) -APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) -APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) +APPLY_FOR_PAUSEABLE_FAILPOINTS(M) +APPLY_FOR_RANDOM_FAILPOINTS(M) #undef M } // namespace FailPoints @@ -167,15 +193,15 @@ void FailPointHelper::enableFailPoint(const String & fail_point_name) } #define M(NAME) SUB_M(NAME, FIU_ONETIME) - APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS_ONCE(M) #undef M #define M(NAME) SUB_M(NAME, 0) - APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) + APPLY_FOR_PAUSEABLE_FAILPOINTS(M) #undef M #undef SUB_M - throw Exception("Cannot find fail point " + fail_point_name, ErrorCodes::FAIL_POINT_ERROR); + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); } void FailPointHelper::disableFailPoint(const String & fail_point_name) @@ -200,6 +226,41 @@ void FailPointHelper::wait(const String & fail_point_name) ptr->wait(); } } + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +{ + String random_fail_point_cfg = config.getString("flash.random_fail_points", ""); + if (random_fail_point_cfg.empty()) + return; + + Poco::StringTokenizer string_tokens(random_fail_point_cfg, ","); + for (const auto & string_token : string_tokens) + { + Poco::StringTokenizer pair_tokens(string_token, "-"); + RUNTIME_ASSERT((pair_tokens.count() == 2), log, "RandomFailPoints config should be FailPointA-RatioA,FailPointB-RatioB,... format"); + double rate = atof(pair_tokens[1].c_str()); //NOLINT(cert-err34-c): check conversion error manually + RUNTIME_ASSERT((0 <= rate && rate <= 1.0), log, "RandomFailPoint trigger rate should in [0,1], while {}", rate); + enableRandomFailPoint(pair_tokens[0], rate); + } + LOG_FMT_INFO(log, "Enable RandomFailPoints: {}", random_fail_point_cfg); +} + +void FailPointHelper::enableRandomFailPoint(const String & fail_point_name, double rate) +{ +#define SUB_M(NAME) \ + if (fail_point_name == FailPoints::NAME) \ + { \ + fiu_enable_random(FailPoints::NAME, 1, nullptr, 0, rate); \ + return; \ + } + +#define M(NAME) SUB_M(NAME) + APPLY_FOR_RANDOM_FAILPOINTS(M) +#undef M +#undef SUB_M + + throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); +} #else class FailPointChannel { @@ -210,6 +271,10 @@ void FailPointHelper::enableFailPoint(const String &) {} void FailPointHelper::disableFailPoint(const String &) {} void FailPointHelper::wait(const String &) {} + +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration &, Poco::Logger *) {} + +void FailPointHelper::enableRandomFailPoint(const String &, double) {} #endif } // namespace DB diff --git a/dbms/src/Common/FailPoint.h b/dbms/src/Common/FailPoint.h index 2cf40ad55e4..31df2dbdcd2 100644 --- a/dbms/src/Common/FailPoint.h +++ b/dbms/src/Common/FailPoint.h @@ -21,6 +21,15 @@ #include +namespace Poco +{ +class Logger; +namespace Util +{ +class LayeredConfiguration; +} +} // namespace Poco + namespace DB { namespace ErrorCodes @@ -35,7 +44,6 @@ extern const int FAIL_POINT_ERROR; // When `fail_point` is enabled, wait till it is disabled #define FAIL_POINT_PAUSE(fail_point) fiu_do_on(fail_point, FailPointHelper::wait(fail_point);) - class FailPointChannel; class FailPointHelper { @@ -46,6 +54,16 @@ class FailPointHelper static void wait(const String & fail_point_name); + /* + * For Server RandomFailPoint test usage. When FIU_ENABLE is defined, this function does the following work: + * 1. Return if TiFlash config has empty flash.random_fail_points cfg + * 2. Parse flash.random_fail_points, which expect to has "FailPointA-RatioA,FailPointB-RatioB,..." format + * 3. Call enableRandomFailPoint method with parsed FailPointName and Rate + */ + static void initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + + static void enableRandomFailPoint(const String & fail_point_name, double rate); + private: static std::unordered_map> fail_point_wait_channels; }; diff --git a/dbms/src/Common/MPMCQueue.h b/dbms/src/Common/MPMCQueue.h index f550ecc7ca2..31dfc65a174 100644 --- a/dbms/src/Common/MPMCQueue.h +++ b/dbms/src/Common/MPMCQueue.h @@ -74,56 +74,80 @@ class MPMCQueue destruct(getObj(read_pos)); } - /// Block util: + /// Block until: /// 1. Pop succeeds with a valid T: return true. /// 2. The queue is cancelled or finished: return false. - bool pop(T & obj) + ALWAYS_INLINE bool pop(T & obj) { - return popObj(obj); + return popObj(obj); } - /// Besides all conditions mentioned at `pop`, `tryPop` will return false if `timeout` is exceeded. + /// Besides all conditions mentioned at `pop`, `popTimeout` will return false if `timeout` is exceeded. template - bool tryPop(T & obj, const Duration & timeout) + ALWAYS_INLINE bool popTimeout(T & obj, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return popObj(obj, &deadline); + return popObj(obj, &deadline); } - /// Block util: + /// Non-blocking function. + /// Return true if pop succeed. + /// else return false. + ALWAYS_INLINE bool tryPop(T & obj) + { + return popObj(obj); + } + + /// Block until: /// 1. Push succeeds and return true. /// 2. The queue is cancelled and return false. /// 3. The queue has finished and return false. template ALWAYS_INLINE bool push(U && u) { - return pushObj(std::forward(u)); + return pushObj(std::forward(u)); } - /// Besides all conditions mentioned at `push`, `tryPush` will return false if `timeout` is exceeded. + /// Besides all conditions mentioned at `push`, `pushTimeout` will return false if `timeout` is exceeded. template - ALWAYS_INLINE bool tryPush(U && u, const Duration & timeout) + ALWAYS_INLINE bool pushTimeout(U && u, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return pushObj(std::forward(u), &deadline); + return pushObj(std::forward(u), &deadline); + } + + /// Non-blocking function. + /// Return true if push succeed. + /// else return false. + template + ALWAYS_INLINE bool tryPush(U && u) + { + return pushObj(std::forward(u)); } /// The same as `push` except it will construct the object in place. template ALWAYS_INLINE bool emplace(Args &&... args) { - return emplaceObj(nullptr, std::forward(args)...); + return emplaceObj(nullptr, std::forward(args)...); } - /// The same as `tryPush` except it will construct the object in place. + /// The same as `pushTimeout` except it will construct the object in place. template - ALWAYS_INLINE bool tryEmplace(Args &&... args, const Duration & timeout) + ALWAYS_INLINE bool emplaceTimeout(Args &&... args, const Duration & timeout) { /// std::condition_variable::wait_until will always use system_clock. auto deadline = std::chrono::system_clock::now() + timeout; - return emplaceObj(&deadline, std::forward(args)...); + return emplaceObj(&deadline, std::forward(args)...); + } + + /// The same as `tryPush` except it will construct the object in place. + template + ALWAYS_INLINE bool tryEmplace(Args &&... args) + { + return emplaceObj(nullptr, std::forward(args)...); } /// Cancel a NORMAL queue will wake up all blocking readers and writers. @@ -233,7 +257,8 @@ class MPMCQueue } } - bool popObj(T & res, const TimePoint * deadline = nullptr) + template + bool popObj(T & res, [[maybe_unused]] const TimePoint * deadline = nullptr) { #ifdef __APPLE__ WaitingNode node; @@ -241,14 +266,16 @@ class MPMCQueue thread_local WaitingNode node; #endif { - /// read_pos < write_pos means the queue isn't empty - auto pred = [&] { - return read_pos < write_pos || !isNormal(); - }; - std::unique_lock lock(mu); - wait(lock, reader_head, node, pred, deadline); + if constexpr (need_wait) + { + /// read_pos < write_pos means the queue isn't empty + auto pred = [&] { + return read_pos < write_pos || !isNormal(); + }; + wait(lock, reader_head, node, pred, deadline); + } if (!isCancelled() && read_pos < write_pos) { @@ -272,21 +299,23 @@ class MPMCQueue return false; } - template - bool assignObj(const TimePoint * deadline, F && assigner) + template + bool assignObj([[maybe_unused]] const TimePoint * deadline, F && assigner) { #ifdef __APPLE__ WaitingNode node; #else thread_local WaitingNode node; #endif - auto pred = [&] { - return write_pos - read_pos < capacity || !isNormal(); - }; - std::unique_lock lock(mu); - wait(lock, writer_head, node, pred, deadline); + if constexpr (need_wait) + { + auto pred = [&] { + return write_pos - read_pos < capacity || !isNormal(); + }; + wait(lock, writer_head, node, pred, deadline); + } /// double check status after potential wait /// check write_pos because timeouted will also reach here. @@ -305,16 +334,16 @@ class MPMCQueue return false; } - template + template ALWAYS_INLINE bool pushObj(U && u, const TimePoint * deadline = nullptr) { - return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(u)); }); + return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(u)); }); } - template + template ALWAYS_INLINE bool emplaceObj(const TimePoint * deadline, Args &&... args) { - return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(args)...); }); + return assignObj(deadline, [&](void * addr) { new (addr) T(std::forward(args)...); }); } ALWAYS_INLINE bool isNormal() const diff --git a/dbms/src/Common/MyDuration.cpp b/dbms/src/Common/MyDuration.cpp index 8801ae0de44..513c40b6dbc 100644 --- a/dbms/src/Common/MyDuration.cpp +++ b/dbms/src/Common/MyDuration.cpp @@ -67,4 +67,4 @@ String MyDuration::toString() const auto frac_str = fmt::format("{:06}", microsecond); return fmt::format(fmt_str, sign > 0 ? "" : "-", hour, minute, second, frac_str); } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h index a5483587e3c..04e6474c0fe 100644 --- a/dbms/src/Common/PoolWithFailoverBase.h +++ b/dbms/src/Common/PoolWithFailoverBase.h @@ -40,12 +40,6 @@ extern const int LOGICAL_ERROR; } // namespace ErrorCodes } // namespace DB -namespace ProfileEvents -{ -extern const Event DistributedConnectionFailTry; -extern const Event DistributedConnectionFailAtAll; -} // namespace ProfileEvents - /// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB. /// Initialized by several PoolBase objects. /// When a connection is requested, tries to create or choose an alive connection from one of the nested pools. @@ -254,14 +248,12 @@ PoolWithFailoverBase::getMany( else { LOG_FMT_WARNING(log, "Connection failed at try No.{}, reason: {}", shuffled_pool.error_count + 1, fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); ++shuffled_pool.error_count; if (shuffled_pool.error_count >= max_tries) { ++failed_pools_count; - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailAtAll); } } } diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 0ec1ce438a6..7507ff0b1f8 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -16,160 +16,98 @@ /// Available events. Add something here as you wish. -#define APPLY_FOR_EVENTS(M) \ - M(Query) \ - M(SelectQuery) \ - M(InsertQuery) \ - M(DeleteQuery) \ - M(FileOpen) \ - M(FileOpenFailed) \ - M(Seek) \ - M(ReadBufferFromFileDescriptorRead) \ - M(ReadBufferFromFileDescriptorReadFailed) \ - M(ReadBufferFromFileDescriptorReadBytes) \ - M(WriteBufferFromFileDescriptorWrite) \ - M(WriteBufferFromFileDescriptorWriteFailed) \ - M(WriteBufferFromFileDescriptorWriteBytes) \ - M(ReadBufferAIORead) \ - M(ReadBufferAIOReadBytes) \ - M(WriteBufferAIOWrite) \ - M(WriteBufferAIOWriteBytes) \ - M(ReadCompressedBytes) \ - M(CompressedReadBufferBlocks) \ - M(CompressedReadBufferBytes) \ - M(UncompressedCacheHits) \ - M(UncompressedCacheMisses) \ - M(UncompressedCacheWeightLost) \ - M(IOBufferAllocs) \ - M(IOBufferAllocBytes) \ - M(ArenaAllocChunks) \ - M(ArenaAllocBytes) \ - M(FunctionExecute) \ - M(TableFunctionExecute) \ - M(MarkCacheHits) \ - M(MarkCacheMisses) \ - M(CreatedReadBufferOrdinary) \ - M(CreatedReadBufferAIO) \ - M(CreatedWriteBufferOrdinary) \ - M(CreatedWriteBufferAIO) \ - \ - M(InsertedRows) \ - M(InsertedBytes) \ - M(DelayedInserts) \ - M(RejectedInserts) \ - M(DelayedInsertsMilliseconds) \ - M(DuplicatedInsertedBlocks) \ - \ - M(DistributedConnectionFailTry) \ - M(DistributedConnectionMissingTable) \ - M(DistributedConnectionStaleReplica) \ - M(DistributedConnectionFailAtAll) \ - \ - M(CompileAttempt) \ - M(CompileSuccess) \ - \ - M(ExternalSortWritePart) \ - M(ExternalSortMerge) \ - M(ExternalAggregationWritePart) \ - M(ExternalAggregationMerge) \ - M(ExternalAggregationCompressedBytes) \ - M(ExternalAggregationUncompressedBytes) \ - \ - M(SlowRead) \ - M(ReadBackoff) \ - \ - M(RegexpCreated) \ - M(ContextLock) \ - \ - M(StorageBufferFlush) \ - M(StorageBufferErrorOnFlush) \ - M(StorageBufferPassedAllMinThresholds) \ - M(StorageBufferPassedTimeMaxThreshold) \ - M(StorageBufferPassedRowsMaxThreshold) \ - M(StorageBufferPassedBytesMaxThreshold) \ - \ - M(DictCacheKeysRequested) \ - M(DictCacheKeysRequestedMiss) \ - M(DictCacheKeysRequestedFound) \ - M(DictCacheKeysExpired) \ - M(DictCacheKeysNotFound) \ - M(DictCacheKeysHit) \ - M(DictCacheRequestTimeNs) \ - M(DictCacheRequests) \ - M(DictCacheLockWriteNs) \ - M(DictCacheLockReadNs) \ - \ - M(DistributedSyncInsertionTimeoutExceeded) \ - M(DataAfterMergeDiffersFromReplica) \ - M(PolygonsAddedToPool) \ - M(PolygonsInPoolAllocatedBytes) \ - M(RWLockAcquiredReadLocks) \ - M(RWLockAcquiredWriteLocks) \ - M(RWLockReadersWaitMilliseconds) \ - M(RWLockWritersWaitMilliseconds) \ - \ - M(PSMWritePages) \ - M(PSMWriteIOCalls) \ - M(PSV3MBlobExpansion) \ - M(PSV3MBlobReused) \ - M(PSMWriteBytes) \ - M(PSMBackgroundWriteBytes) \ - M(PSMReadPages) \ - M(PSMBackgroundReadBytes) \ - \ - M(PSMReadIOCalls) \ - M(PSMReadBytes) \ - M(PSMWriteFailed) \ - M(PSMReadFailed) \ - \ - M(PSMVCCApplyOnCurrentBase) \ - M(PSMVCCApplyOnCurrentDelta) \ - M(PSMVCCApplyOnNewDelta) \ - M(PSMVCCCompactOnDelta) \ - M(PSMVCCCompactOnDeltaRebaseRejected) \ - M(PSMVCCCompactOnBase) \ - \ - M(DMWriteBytes) \ - M(DMWriteBlock) \ - M(DMWriteBlockNS) \ - M(DMWriteFile) \ - M(DMWriteFileNS) \ - M(DMDeleteRange) \ - M(DMDeleteRangeNS) \ - M(DMAppendDeltaPrepare) \ - M(DMAppendDeltaPrepareNS) \ - M(DMAppendDeltaCommitMemory) \ - M(DMAppendDeltaCommitMemoryNS) \ - M(DMAppendDeltaCommitDisk) \ - M(DMAppendDeltaCommitDiskNS) \ - M(DMAppendDeltaCleanUp) \ - M(DMAppendDeltaCleanUpNS) \ - M(DMPlace) \ - M(DMPlaceNS) \ - M(DMPlaceUpsert) \ - M(DMPlaceUpsertNS) \ - M(DMPlaceDeleteRange) \ - M(DMPlaceDeleteRangeNS) \ - M(DMDeltaMerge) \ - M(DMDeltaMergeNS) \ - M(DMSegmentSplit) \ - M(DMSegmentSplitNS) \ - M(DMSegmentGetSplitPoint) \ - M(DMSegmentGetSplitPointNS) \ - M(DMSegmentMerge) \ - M(DMSegmentMergeNS) \ - M(DMFlushDeltaCache) \ - M(DMFlushDeltaCacheNS) \ - M(DMCleanReadRows) \ - \ - M(FileFSync) \ - \ - M(DMFileFilterNoFilter) \ - M(DMFileFilterAftPKAndPackSet) \ - M(DMFileFilterAftRoughSet) \ - \ - M(ChecksumDigestBytes) \ - \ +#define APPLY_FOR_EVENTS(M) \ + M(Query) \ + M(FileOpen) \ + M(FileOpenFailed) \ + M(ReadBufferFromFileDescriptorRead) \ + M(ReadBufferFromFileDescriptorReadFailed) \ + M(ReadBufferFromFileDescriptorReadBytes) \ + M(WriteBufferFromFileDescriptorWrite) \ + M(WriteBufferFromFileDescriptorWriteBytes) \ + M(ReadBufferAIORead) \ + M(ReadBufferAIOReadBytes) \ + M(WriteBufferAIOWrite) \ + M(WriteBufferAIOWriteBytes) \ + \ + M(UncompressedCacheHits) \ + M(UncompressedCacheMisses) \ + M(UncompressedCacheWeightLost) \ + M(MarkCacheHits) \ + M(MarkCacheMisses) \ + \ + M(ExternalAggregationCompressedBytes) \ + M(ExternalAggregationUncompressedBytes) \ + \ + M(ContextLock) \ + \ + M(RWLockAcquiredReadLocks) \ + M(RWLockAcquiredWriteLocks) \ + M(RWLockReadersWaitMilliseconds) \ + M(RWLockWritersWaitMilliseconds) \ + \ + M(PSMWritePages) \ + M(PSMWriteIOCalls) \ + M(PSV3MBlobExpansion) \ + M(PSV3MBlobReused) \ + M(PSMWriteBytes) \ + M(PSMBackgroundWriteBytes) \ + M(PSMReadPages) \ + M(PSMBackgroundReadBytes) \ + \ + M(PSMReadIOCalls) \ + M(PSMReadBytes) \ + M(PSMWriteFailed) \ + M(PSMReadFailed) \ + \ + M(PSMVCCApplyOnCurrentBase) \ + M(PSMVCCApplyOnCurrentDelta) \ + M(PSMVCCApplyOnNewDelta) \ + M(PSMVCCCompactOnDelta) \ + M(PSMVCCCompactOnDeltaRebaseRejected) \ + M(PSMVCCCompactOnBase) \ + \ + M(DMWriteBytes) \ + M(DMWriteBlock) \ + M(DMWriteBlockNS) \ + M(DMWriteFile) \ + M(DMWriteFileNS) \ + M(DMDeleteRange) \ + M(DMDeleteRangeNS) \ + M(DMAppendDeltaPrepare) \ + M(DMAppendDeltaPrepareNS) \ + M(DMAppendDeltaCommitMemory) \ + M(DMAppendDeltaCommitMemoryNS) \ + M(DMAppendDeltaCommitDisk) \ + M(DMAppendDeltaCommitDiskNS) \ + M(DMAppendDeltaCleanUp) \ + M(DMAppendDeltaCleanUpNS) \ + M(DMPlace) \ + M(DMPlaceNS) \ + M(DMPlaceUpsert) \ + M(DMPlaceUpsertNS) \ + M(DMPlaceDeleteRange) \ + M(DMPlaceDeleteRangeNS) \ + M(DMDeltaMerge) \ + M(DMDeltaMergeNS) \ + M(DMSegmentSplit) \ + M(DMSegmentSplitNS) \ + M(DMSegmentGetSplitPoint) \ + M(DMSegmentGetSplitPointNS) \ + M(DMSegmentMerge) \ + M(DMSegmentMergeNS) \ + M(DMFlushDeltaCache) \ + M(DMFlushDeltaCacheNS) \ + M(DMCleanReadRows) \ + \ + M(FileFSync) \ + \ + M(DMFileFilterNoFilter) \ + M(DMFileFilterAftPKAndPackSet) \ + M(DMFileFilterAftRoughSet) \ + \ + M(ChecksumDigestBytes) \ + \ M(RaftWaitIndexTimeout) namespace ProfileEvents diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 9aa826e0e30..c0ce60af01e 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -60,27 +60,27 @@ namespace DB F(type_partition_ts, {"type", "partition_table_scan"}), \ F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"})) \ M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram, \ - F(type_batch, {{"type", "batch"}}, ExpBuckets{0.0005, 2, 30}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.0005, 2, 30})) \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_coprocessor_request_memory_usage, "Bucketed histogram of request memory usage", Histogram, \ F(type_cop, {{"type", "cop"}}, ExpBuckets{1024 * 1024, 2, 16}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{1024 * 1024, 2, 16}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 16})) \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{1024 * 1024, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{1024 * 1024, 2, 20})) \ M(tiflash_coprocessor_request_error, "Total number of request error", Counter, F(reason_meet_lock, {"reason", "meet_lock"}), \ F(reason_region_not_found, {"reason", "region_not_found"}), F(reason_epoch_not_match, {"reason", "epoch_not_match"}), \ F(reason_kv_client_error, {"reason", "kv_client_error"}), F(reason_internal_error, {"reason", "internal_error"}), \ F(reason_other_error, {"reason", "other_error"})) \ M(tiflash_coprocessor_request_handle_seconds, "Bucketed histogram of request handle duration", Histogram, \ - F(type_batch, {{"type", "batch"}}, ExpBuckets{0.0005, 2, 30}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.0005, 2, 30}), \ - F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.0005, 2, 30})) \ + F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_super_batch, {{"type", "super_batch"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_dispatch_mpp_task, {{"type", "dispatch_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_mpp_establish_conn, {{"type", "mpp_establish_conn"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_cancel_mpp_task, {{"type", "cancel_mpp_task"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_run_mpp_task, {{"type", "run_mpp_task"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_coprocessor_response_bytes, "Total bytes of response body", Counter) \ M(tiflash_schema_version, "Current version of tiflash cached schema", Gauge) \ M(tiflash_schema_applying, "Whether the schema is applying or not (holding lock)", Gauge) \ @@ -95,21 +95,14 @@ namespace DB F(type_alter_column_tp, {"type", "alter_column_type"}), F(type_rename_column, {"type", "rename_column"}), \ F(type_exchange_partition, {"type", "exchange_partition"})) \ M(tiflash_schema_apply_duration_seconds, "Bucketed histogram of ddl apply duration", Histogram, \ - F(type_ddl_apply_duration, {{"req", "ddl_apply_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_merge_count, "Total number of TMT engine merge", Counter) \ - M(tiflash_tmt_merge_duration_seconds, "Bucketed histogram of TMT engine merge duration", Histogram, \ - F(type_tmt_merge_duration, {{"type", "tmt_merge_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_write_parts_count, "Total number of TMT engine write parts", Counter) \ - M(tiflash_tmt_write_parts_duration_seconds, "Bucketed histogram of TMT engine write parts duration", Histogram, \ - F(type_tmt_write_duration, {{"type", "tmt_write_parts_duration"}}, ExpBuckets{0.0005, 2, 20})) \ - M(tiflash_tmt_read_parts_count, "Total number of TMT engine read parts", Gauge) \ + F(type_ddl_apply_duration, {{"req", "ddl_apply_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_raft_read_index_count, "Total number of raft read index", Counter) \ M(tiflash_raft_read_index_duration_seconds, "Bucketed histogram of raft read index duration", Histogram, \ - F(type_raft_read_index_duration, {{"type", "tmt_raft_read_index_duration"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_raft_read_index_duration, {{"type", "tmt_raft_read_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_raft_wait_index_duration_seconds, "Bucketed histogram of raft wait index duration", Histogram, \ - F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_syncing_data_freshness, "The freshness of tiflash data with tikv data", Histogram, \ - F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_storage_write_amplification, "The data write amplification in storage engine", Gauge) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ M(tiflash_storage_command_count, "Total number of storage's command, such as delete range / shutdown /startup", Counter, \ @@ -122,16 +115,16 @@ namespace DB F(type_seg_split, {"type", "seg_split"}), F(type_seg_split_fg, {"type", "seg_split_fg"}), \ F(type_seg_merge, {"type", "seg_merge"}), F(type_place_index_update, {"type", "place_index_update"})) \ M(tiflash_storage_subtask_duration_seconds, "Bucketed histogram of storage's sub task duration", Histogram, \ - F(type_delta_merge, {{"type", "delta_merge"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_fg_rpc, {{"type", "delta_merge_fg_rpc"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_split, {{"type", "seg_split"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_seg_merge, {{"type", "seg_merge"}}, ExpBuckets{0.0005, 2, 20}), \ - F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.0005, 2, 20})) \ + F(type_delta_merge, {{"type", "delta_merge"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_fg, {{"type", "delta_merge_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_fg_rpc, {{"type", "delta_merge_fg_rpc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_merge_bg_gc, {{"type", "delta_merge_bg_gc"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_compact, {{"type", "delta_compact"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_delta_flush, {{"type", "delta_flush"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split, {{"type", "seg_split"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_split_fg, {{"type", "seg_split_fg"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_seg_merge, {{"type", "seg_merge"}}, ExpBuckets{0.001, 2, 20}), \ + F(type_place_index_update, {{"type", "place_index_update"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_storage_throughput_bytes, "Calculate the throughput of tasks of storage in bytes", Gauge, /**/ \ F(type_write, {"type", "write"}), /**/ \ F(type_ingest, {"type", "ingest"}), /**/ \ @@ -145,8 +138,8 @@ namespace DB F(type_split, {"type", "split"}), /**/ \ F(type_merge, {"type", "merge"})) /**/ \ M(tiflash_storage_write_stall_duration_seconds, "The write stall duration of storage, in seconds", Histogram, /**/ \ - F(type_write, {{"type", "write"}}, ExpBuckets{0.0005, 2, 20}), /**/ \ - F(type_delete_range, {{"type", "delete_range"}}, ExpBuckets{0.0005, 2, 20})) /**/ \ + F(type_write, {{"type", "write"}}, ExpBuckets{0.001, 2, 20}), /**/ \ + F(type_delete_range, {{"type", "delete_range"}}, ExpBuckets{0.001, 2, 20})) /**/ \ M(tiflash_storage_page_gc_count, "Total number of page's gc execution.", Counter, \ F(type_exec, {"type", "exec"}), \ F(type_low_write, {"type", "low_write"}), \ @@ -170,7 +163,7 @@ namespace DB Histogram, /* these command usually cost servel seconds, increase the start bucket to 50ms */ \ F(type_ingest_sst, {{"type", "ingest_sst"}}, ExpBuckets{0.05, 2, 10}), \ F(type_apply_snapshot_predecode, {{"type", "snapshot_predecode"}}, ExpBuckets{0.05, 2, 10}), \ - F(type_apply_snapshot_predecode_sst2dt, {{"type", "snapshot_predecode_sst2dt"}}, ExpBuckets{0.05, 2, 10}), \ + F(type_apply_snapshot_predecode_sst2dt, {{"type", "snapshot_predecode_sst2dt"}}, ExpBuckets{0.05, 2, 10}), \ F(type_apply_snapshot_flush, {{"type", "snapshot_flush"}}, ExpBuckets{0.05, 2, 10})) \ M(tiflash_raft_process_keys, "Total number of keys processed in some types of Raft commands", Counter, \ F(type_apply_snapshot, {"type", "apply_snapshot"}), F(type_ingest_sst, {"type", "ingest_sst"})) \ @@ -212,7 +205,7 @@ namespace DB F(type_thread_hard_limit, {"type", "thread_hard_limit"}), \ F(type_hard_limit_exceeded_count, {"type", "hard_limit_exceeded_count"})) \ M(tiflash_task_scheduler_waiting_duration_seconds, "Bucketed histogram of task waiting for scheduling duration", Histogram, \ - F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.0005, 2, 20})) + F(type_task_scheduler_waiting_duration, {{"type", "task_waiting_duration"}}, ExpBuckets{0.001, 2, 20})) // clang-format on diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.h b/dbms/src/Common/getNumberOfPhysicalCPUCores.h index 6f7eaef4bb4..b3ab65a66e5 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.h +++ b/dbms/src/Common/getNumberOfPhysicalCPUCores.h @@ -15,4 +15,5 @@ #pragma once /// Get number of CPU cores without hyper-threading. +/// Note: do not support environment under resource isolation mechanism like Docker, CGroup. unsigned getNumberOfPhysicalCPUCores(); diff --git a/dbms/src/Common/tests/gtest_mpmc_queue.cpp b/dbms/src/Common/tests/gtest_mpmc_queue.cpp index 85ad1892067..3f2748b452b 100644 --- a/dbms/src/Common/tests/gtest_mpmc_queue.cpp +++ b/dbms/src/Common/tests/gtest_mpmc_queue.cpp @@ -98,12 +98,14 @@ class MPMCQueueTest : public ::testing::Test void testCannotTryPush(MPMCQueue & queue) { auto old_size = queue.size(); - auto res = queue.tryPush(ValueHelper::make(-1), std::chrono::microseconds(1)); - auto new_size = queue.size(); - if (res) + bool ok1 = queue.tryPush(ValueHelper::make(-1)); + auto new_size1 = queue.size(); + bool ok2 = queue.pushTimeout(ValueHelper::make(-1), std::chrono::microseconds(1)); + auto new_size2 = queue.size(); + if (ok1 || ok2) throw TiFlashTestException("Should push fail"); - if (old_size != new_size) - throw TiFlashTestException(fmt::format("Size changed from {} to {} without push", old_size, new_size)); + if (old_size != new_size1 || old_size != new_size2) + throw TiFlashTestException(fmt::format("Size changed from {} to {} and {} without push", old_size, new_size1, new_size2)); } template @@ -124,12 +126,14 @@ class MPMCQueueTest : public ::testing::Test { auto old_size = queue.size(); T res; - bool ok = queue.tryPop(res, std::chrono::microseconds(1)); - auto new_size = queue.size(); - if (ok) + bool ok1 = queue.tryPop(res); + auto new_size1 = queue.size(); + bool ok2 = queue.popTimeout(res, std::chrono::microseconds(1)); + auto new_size2 = queue.size(); + if (ok1 || ok2) throw TiFlashTestException("Should pop fail"); - if (old_size != new_size) - throw TiFlashTestException(fmt::format("Size changed from {} to {} without pop", old_size, new_size)); + if (old_size != new_size1 || old_size != new_size2) + throw TiFlashTestException(fmt::format("Size changed from {} to {} and {} without pop", old_size, new_size1, new_size2)); } template @@ -474,7 +478,6 @@ class MPMCQueueTest : public ::testing::Test throwOrMove(std::move(rhs)); } - ThrowInjectable & operator=(ThrowInjectable && rhs) { if (this != &rhs) diff --git a/dbms/src/Common/tests/mpmc_queue_perftest.cpp b/dbms/src/Common/tests/mpmc_queue_perftest.cpp index d047b5d498f..ba0d00001a3 100644 --- a/dbms/src/Common/tests/mpmc_queue_perftest.cpp +++ b/dbms/src/Common/tests/mpmc_queue_perftest.cpp @@ -87,7 +87,7 @@ struct Helper> template static void pushOneTo(MPMCQueue & queue, U && data) { - queue.tryPush(std::forward(data), std::chrono::milliseconds(1)); + queue.pushTimeout(std::forward(data), std::chrono::milliseconds(1)); } }; diff --git a/dbms/src/Common/wrapInvocable.h b/dbms/src/Common/wrapInvocable.h index d6cee519835..1c93bb3e782 100644 --- a/dbms/src/Common/wrapInvocable.h +++ b/dbms/src/Common/wrapInvocable.h @@ -35,7 +35,6 @@ inline auto wrapInvocable(bool propagate_memory_tracker, Func && func, Args &&.. // run the task with the parameters provided return std::apply(std::move(func), std::move(args)); }; - return capture; } } // namespace DB diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 28db7af82e1..971e8f36e2a 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -238,10 +238,18 @@ void Block::checkNumberOfRows() const if (rows == -1) rows = size; else if (rows != size) - throw Exception("Sizes of columns doesn't match: " - + data.front().name + ": " + toString(rows) - + ", " + elem.name + ": " + toString(size), + { + auto first_col = data.front(); + throw Exception(fmt::format( + "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size), ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + } } } diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 0d9e907c5f4..4cd09d1ea63 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -17,12 +17,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationMerge; -} - namespace DB { Block AggregatingBlockInputStream::getHeader() const @@ -56,8 +50,6 @@ Block AggregatingBlockInputStream::readImpl() * then read and merge them, spending the minimum amount of memory. */ - ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - if (!isCancelled()) { /// Flush data in the RAM to disk also. It's easier than merging on-disk and RAM data. diff --git a/dbms/src/DataStreams/AsynchronousBlockInputStream.h b/dbms/src/DataStreams/AsynchronousBlockInputStream.h index e75d1603648..5b373c26e95 100644 --- a/dbms/src/DataStreams/AsynchronousBlockInputStream.h +++ b/dbms/src/DataStreams/AsynchronousBlockInputStream.h @@ -22,12 +22,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { /** Executes another BlockInputStream in a separate thread. @@ -141,8 +135,6 @@ class AsynchronousBlockInputStream : public IProfilingBlockInputStream /// Calculations that can be performed in a separate thread void calculate() { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { if (first) diff --git a/dbms/src/DataStreams/CountingBlockOutputStream.cpp b/dbms/src/DataStreams/CountingBlockOutputStream.cpp index 26bc5a4566f..52dc6b598b9 100644 --- a/dbms/src/DataStreams/CountingBlockOutputStream.cpp +++ b/dbms/src/DataStreams/CountingBlockOutputStream.cpp @@ -12,20 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include - - -namespace ProfileEvents -{ - extern const Event InsertedRows; - extern const Event InsertedBytes; -} - +#include namespace DB { - void CountingBlockOutputStream::write(const Block & block) { stream->write(block); @@ -33,9 +24,6 @@ void CountingBlockOutputStream::write(const Block & block) Progress local_progress(block.rows(), block.bytes(), 0); progress.incrementPiecewiseAtomically(local_progress); - ProfileEvents::increment(ProfileEvents::InsertedRows, local_progress.rows); - ProfileEvents::increment(ProfileEvents::InsertedBytes, local_progress.bytes); - if (process_elem) process_elem->updateProgressOut(local_progress); @@ -43,4 +31,4 @@ void CountingBlockOutputStream::write(const Block & block) progress_callback(local_progress); } -} +} // namespace DB diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index e79426f686e..cf8db3f8711 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -19,13 +19,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalSortWritePart; -extern const Event ExternalSortMerge; -} // namespace ProfileEvents - namespace DB { /** Remove constant columns from block. @@ -136,7 +129,6 @@ Block MergeSortingBlockInputStream::readImpl() MergeSortingBlocksBlockInputStream block_in(blocks, description, log->identifier(), max_merged_block_size, limit); LOG_FMT_INFO(log, "Sorting and writing part of data into temporary file {}", path); - ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); copyData(block_in, block_out, &is_cancelled); /// NOTE. Possibly limit disk usage. LOG_FMT_INFO(log, "Done writing part of data into temporary file {}", path); @@ -155,7 +147,6 @@ Block MergeSortingBlockInputStream::readImpl() else { /// If there was temporary files. - ProfileEvents::increment(ProfileEvents::ExternalSortMerge); LOG_FMT_INFO(log, "There are {} temporary sorted parts to merge.", temporary_files.size()); diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 5d0b677b792..3a1cc1eed31 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -19,13 +19,6 @@ #include - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - - namespace DB { /** Scheme of operation: @@ -156,7 +149,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::cancel(bool kill) for (auto & input : inputs) { - if (IProfilingBlockInputStream * child = dynamic_cast(input.stream.get())) + if (auto * child = dynamic_cast(input.stream.get())) { try { @@ -198,7 +191,6 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() reading_pool->schedule( wrapInvocable(true, [&child] { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; child->readPrefix(); })); } @@ -309,8 +301,6 @@ void MergingAggregatedMemoryEfficientBlockInputStream::finalize() void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread() { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { while (!parallel_merge_data->finish) @@ -490,7 +480,6 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate if (need_that_input(input)) { reading_pool->schedule(wrapInvocable(true, [&input, &read_from_input] { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; read_from_input(input); })); } diff --git a/dbms/src/DataStreams/MultiplexInputStream.h b/dbms/src/DataStreams/MultiplexInputStream.h new file mode 100644 index 00000000000..4fa33262e66 --- /dev/null +++ b/dbms/src/DataStreams/MultiplexInputStream.h @@ -0,0 +1,246 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +class MultiPartitionStreamPool +{ +public: + MultiPartitionStreamPool() = default; + + void addPartitionStreams(const BlockInputStreams & cur_streams) + { + if (cur_streams.empty()) + return; + std::unique_lock lk(mu); + streams_queue_by_partition.push_back( + std::make_shared>>()); + for (const auto & stream : cur_streams) + streams_queue_by_partition.back()->push(stream); + added_streams.insert(added_streams.end(), cur_streams.begin(), cur_streams.end()); + } + + std::shared_ptr pickOne() + { + std::unique_lock lk(mu); + if (streams_queue_by_partition.empty()) + return nullptr; + if (streams_queue_id >= static_cast(streams_queue_by_partition.size())) + streams_queue_id = 0; + + auto & q = *streams_queue_by_partition[streams_queue_id]; + std::shared_ptr ret = nullptr; + assert(!q.empty()); + ret = q.front(); + q.pop(); + if (q.empty()) + streams_queue_id = removeQueue(streams_queue_id); + else + streams_queue_id = nextQueueId(streams_queue_id); + return ret; + } + + int exportAddedStreams(BlockInputStreams & ret_streams) + { + std::unique_lock lk(mu); + for (auto & stream : added_streams) + ret_streams.push_back(stream); + return added_streams.size(); + } + + int addedStreamsCnt() + { + std::unique_lock lk(mu); + return added_streams.size(); + } + +private: + int removeQueue(int queue_id) + { + streams_queue_by_partition[queue_id] = nullptr; + if (queue_id != static_cast(streams_queue_by_partition.size()) - 1) + { + swap(streams_queue_by_partition[queue_id], streams_queue_by_partition.back()); + streams_queue_by_partition.pop_back(); + return queue_id; + } + else + { + streams_queue_by_partition.pop_back(); + return 0; + } + } + + int nextQueueId(int queue_id) const + { + if (queue_id + 1 < static_cast(streams_queue_by_partition.size())) + return queue_id + 1; + else + return 0; + } + + static void swap(std::shared_ptr>> & a, + std::shared_ptr>> & b) + { + a.swap(b); + } + + std::vector< + std::shared_ptr>>> + streams_queue_by_partition; + std::vector> added_streams; + int streams_queue_id = 0; + std::mutex mu; +}; + +class MultiplexInputStream final : public IProfilingBlockInputStream +{ +private: + static constexpr auto NAME = "Multiplex"; + +public: + MultiplexInputStream( + std::shared_ptr & shared_pool, + const String & req_id) + : log(Logger::get(NAME, req_id)) + , shared_pool(shared_pool) + { + shared_pool->exportAddedStreams(children); + size_t num_children = children.size(); + if (num_children > 1) + { + Block header = children.at(0)->getHeader(); + for (size_t i = 1; i < num_children; ++i) + assertBlocksHaveEqualStructure( + children[i]->getHeader(), + header, + "MULTIPLEX"); + } + } + + String getName() const override { return NAME; } + + ~MultiplexInputStream() override + { + try + { + if (!all_read) + cancel(false); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } + } + + /** Different from the default implementation by trying to stop all sources, + * skipping failed by execution. + */ + void cancel(bool kill) override + { + if (kill) + is_killed = true; + + bool old_val = false; + if (!is_cancelled.compare_exchange_strong( + old_val, + true, + std::memory_order_seq_cst, + std::memory_order_relaxed)) + return; + + if (cur_stream) + { + if (IProfilingBlockInputStream * child = dynamic_cast(&*cur_stream)) + { + child->cancel(kill); + } + } + } + + Block getHeader() const override { return children.at(0)->getHeader(); } + +protected: + /// Do nothing, to make the preparation when underlying InputStream is picked from the pool + void readPrefix() override + { + } + + /** The following options are possible: + * 1. `readImpl` function is called until it returns an empty block. + * Then `readSuffix` function is called and then destructor. + * 2. `readImpl` function is called. At some point, `cancel` function is called perhaps from another thread. + * Then `readSuffix` function is called and then destructor. + * 3. At any time, the object can be destroyed (destructor called). + */ + + Block readImpl() override + { + if (all_read) + return {}; + + Block ret; + while (!cur_stream || !(ret = cur_stream->read())) + { + if (cur_stream) + cur_stream->readSuffix(); // release old inputstream + cur_stream = shared_pool->pickOne(); + if (!cur_stream) + { // shared_pool is empty + all_read = true; + return {}; + } + cur_stream->readPrefix(); + } + return ret; + } + + /// Called either after everything is read, or after cancel. + void readSuffix() override + { + if (!all_read && !is_cancelled) + throw Exception("readSuffix called before all data is read", ErrorCodes::LOGICAL_ERROR); + + if (cur_stream) + { + cur_stream->readSuffix(); + cur_stream = nullptr; + } + } + +private: + LoggerPtr log; + + std::shared_ptr shared_pool; + std::shared_ptr cur_stream; + + bool all_read = false; +}; + +} // namespace DB diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 1a59b979c29..cd9d6235f52 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -20,18 +20,11 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationMerge; -} - - namespace DB { ParallelAggregatingBlockInputStream::ParallelAggregatingBlockInputStream( const BlockInputStreams & inputs, - const BlockInputStreamPtr & additional_input_at_end, + const BlockInputStreams & additional_inputs_at_end, const Aggregator::Params & params_, const FileProviderPtr & file_provider_, bool final_, @@ -48,11 +41,10 @@ ParallelAggregatingBlockInputStream::ParallelAggregatingBlockInputStream( , keys_size(params.keys_size) , aggregates_size(params.aggregates_size) , handler(*this) - , processor(inputs, additional_input_at_end, max_threads, handler, log) + , processor(inputs, additional_inputs_at_end, max_threads, handler, log) { children = inputs; - if (additional_input_at_end) - children.push_back(additional_input_at_end); + children.insert(children.end(), additional_inputs_at_end.begin(), additional_inputs_at_end.end()); } @@ -101,8 +93,6 @@ Block ParallelAggregatingBlockInputStream::readImpl() * then read and merge them, spending the minimum amount of memory. */ - ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - const auto & files = aggregator.getTemporaryFiles(); BlockInputStreams input_streams; for (const auto & file : files.files) @@ -207,8 +197,8 @@ void ParallelAggregatingBlockInputStream::Handler::onException(std::exception_pt /// can not cancel parent inputStream or the exception might be lost if (!parent.executed) - /// kill the processor so ExchangeReceiver will be closed - parent.processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + parent.processor.cancel(false); } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 41e61786370..907622c8364 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -36,7 +36,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream */ ParallelAggregatingBlockInputStream( const BlockInputStreams & inputs, - const BlockInputStreamPtr & additional_input_at_end, + const BlockInputStreams & additional_inputs_at_end, const Aggregator::Params & params_, const FileProviderPtr & file_provider_, bool final_, diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 0e839093cd7..57ab37e1756 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -46,11 +47,6 @@ * then read block from source and then put source back to queue of available sources. */ -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { /** Union mode. @@ -88,9 +84,8 @@ template class ParallelInputsProcessor { public: - /** additional_input_at_end - if not nullptr, - * then the blocks from this source will start to be processed only after all other sources are processed. - * This is done in the main thread. + /** additional_inputs_at_end - if not empty, + * then the blocks from the sources will start to be processed only after all other sources are processed. * * Intended for implementation of FULL and RIGHT JOIN * - where you must first make JOIN in parallel, while noting which keys are not found, @@ -98,19 +93,18 @@ class ParallelInputsProcessor */ ParallelInputsProcessor( const BlockInputStreams & inputs_, - const BlockInputStreamPtr & additional_input_at_end_, + const BlockInputStreams & additional_inputs_at_end_, size_t max_threads_, Handler & handler_, const LoggerPtr & log_) : inputs(inputs_) - , additional_input_at_end(additional_input_at_end_) - , max_threads(std::min(inputs_.size(), max_threads_)) + , additional_inputs_at_end(additional_inputs_at_end_) + , max_threads(std::min(std::max(inputs_.size(), additional_inputs_at_end_.size()), max_threads_)) , handler(handler_) + , working_inputs(inputs_) + , working_additional_inputs(additional_inputs_at_end_) , log(log_) - { - for (size_t i = 0; i < inputs_.size(); ++i) - unprepared_inputs.emplace(inputs_[i], i); - } + {} ~ParallelInputsProcessor() { @@ -137,36 +131,21 @@ class ParallelInputsProcessor /// Ask all sources to stop earlier than they run out. void cancel(bool kill) { - finish = true; + working_inputs.available_inputs.cancel(); + working_additional_inputs.available_inputs.cancel(); - for (auto & input : inputs) - { - if (IProfilingBlockInputStream * child = dynamic_cast(&*input)) - { - try - { - child->cancel(kill); - } - catch (...) - { - /** If you can not ask one or more sources to stop. - * (for example, the connection is broken for distributed query processing) - * - then do not care. - */ - LOG_FMT_ERROR(log, "Exception while cancelling {}", child->getName()); - } - } - } + cancelStreams(inputs, kill); + cancelStreams(additional_inputs_at_end, kill); } /// Wait until all threads are finished, before the destructor. void wait() { - if (joined_threads) - return; if (thread_manager) + { thread_manager->wait(); - joined_threads = true; + thread_manager.reset(); + } } size_t getNumActiveThreads() const @@ -186,13 +165,78 @@ class ParallelInputsProcessor BlockInputStreamPtr in; size_t i; /// The source number (for debugging). - InputData() {} + InputData() + : i(0) + {} InputData(const BlockInputStreamPtr & in_, size_t i_) : in(in_) , i(i_) {} }; + struct WorkingInputs + { + explicit WorkingInputs(const BlockInputStreams & inputs_) + : available_inputs(inputs_.size()) + , active_inputs(inputs_.size()) + , unprepared_inputs(inputs_.size()) + { + for (size_t i = 0; i < inputs_.size(); ++i) + unprepared_inputs.emplace(inputs_[i], i); + } + /** A set of available sources that are not currently processed by any thread. + * Each thread takes one source from this set, takes a block out of the source (at this moment the source does the calculations) + * and (if the source is not run out), puts it back into the set of available sources. + * + * The question arises what is better to use: + * - the queue (just processed source will be processed the next time later than the rest) + * - stack (just processed source will be processed as soon as possible). + * + * The stack is better than the queue when you need to do work on reading one source more consequentially, + * and theoretically, this allows you to achieve more consequent/consistent reads from the disk. + * + * But when using the stack, there is a problem with distributed query processing: + * data is read only from a part of the servers, and on the other servers + * a timeout occurs during send, and the request processing ends with an exception. + * + * Therefore, a queue is used. This can be improved in the future. + */ + using AvailableInputs = MPMCQueue; + AvailableInputs available_inputs; + + /// How many active input streams. + std::atomic active_inputs; + + /** For parallel preparing (readPrefix) child streams. + * First, streams are located here. + * After a stream was prepared, it is moved to "available_inputs" for reading. + */ + using UnpreparedInputs = MPMCQueue; + UnpreparedInputs unprepared_inputs; + }; + + void cancelStreams(const BlockInputStreams & streams, bool kill) + { + for (const auto & input : streams) + { + if (auto * p_child = dynamic_cast(&*input)) + { + try + { + p_child->cancel(kill); + } + catch (...) + { + /** If you can not ask one or more sources to stop. + * (for example, the connection is broken for distributed query processing) + * - then do not care. + */ + LOG_FMT_ERROR(log, "Exception while cancelling {}", p_child->getName()); + } + } + } + } + void publishPayload(BlockInputStreamPtr & stream, Block & block, size_t thread_num) { if constexpr (mode == StreamUnionMode::Basic) @@ -206,34 +250,24 @@ class ParallelInputsProcessor void thread(size_t thread_num) { - std::exception_ptr exception; + work(thread_num, working_inputs); + work(thread_num, working_additional_inputs); - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; + handler.onFinishThread(thread_num); - try + if (0 == --active_threads) { - while (!finish) - { - InputData unprepared_input; - { - std::lock_guard lock(unprepared_inputs_mutex); - - if (unprepared_inputs.empty()) - break; - - unprepared_input = unprepared_inputs.front(); - unprepared_inputs.pop(); - } - - unprepared_input.in->readPrefix(); + handler.onFinish(); + } + } - { - std::lock_guard lock(available_inputs_mutex); - available_inputs.push(unprepared_input); - } - } + void work(size_t thread_num, WorkingInputs & work) + { + std::exception_ptr exception; - loop(thread_num); + try + { + loop(thread_num, work); } catch (...) { @@ -244,134 +278,63 @@ class ParallelInputsProcessor { handler.onException(exception, thread_num); } - - handler.onFinishThread(thread_num); - - /// The last thread on the output indicates that there is no more data. - if (0 == --active_threads) - { - /// And then it processes an additional source, if there is one. - if (additional_input_at_end) - { - try - { - additional_input_at_end->readPrefix(); - while (Block block = additional_input_at_end->read()) - publishPayload(additional_input_at_end, block, thread_num); - } - catch (...) - { - exception = std::current_exception(); - } - - if (exception) - { - handler.onException(exception, thread_num); - } - } - - handler.onFinish(); /// TODO If in `onFinish` or `onFinishThread` there is an exception, then std::terminate is called. - } } - void loop(size_t thread_num) + /// This function may be called in different threads. + /// If no exception occurs, we can ensure that the work is all done when the function + /// returns in any thread. + void loop(size_t thread_num, WorkingInputs & work) { - while (!finish) /// You may need to stop work earlier than all sources run out. + if (work.active_inputs == 0) { - InputData input; + return; + } - /// Select the next source. - { - std::lock_guard lock(available_inputs_mutex); + InputData input; - /// If there are no free sources, then this thread is no longer needed. (But other threads can work with their sources.) - if (available_inputs.empty()) - break; - - input = available_inputs.front(); + while (work.unprepared_inputs.tryPop(input)) + { + input.in->readPrefix(); - /// We remove the source from the queue of available sources. - available_inputs.pop(); - } + work.available_inputs.push(input); + } + // The condition is false when all input streams are exhausted or + // an exception occurred then the queue was cancelled. + while (work.available_inputs.pop(input)) + { /// The main work. Block block = input.in->read(); + if (block) { - if (finish) - break; - - /// If this source is not run out yet, then put the resulting block in the ready queue. + work.available_inputs.push(input); + publishPayload(input.in, block, thread_num); + } + else + { + if (0 == --work.active_inputs) { - std::lock_guard lock(available_inputs_mutex); - - if (block) - { - available_inputs.push(input); - } - else - { - if (available_inputs.empty()) - break; - } - } - - if (finish) + work.available_inputs.finish(); break; - - if (block) - publishPayload(input.in, block, thread_num); + } } } } - BlockInputStreams inputs; - BlockInputStreamPtr additional_input_at_end; + const BlockInputStreams inputs; + const BlockInputStreams additional_inputs_at_end; unsigned max_threads; Handler & handler; std::shared_ptr thread_manager; - /** A set of available sources that are not currently processed by any thread. - * Each thread takes one source from this set, takes a block out of the source (at this moment the source does the calculations) - * and (if the source is not run out), puts it back into the set of available sources. - * - * The question arises what is better to use: - * - the queue (just processed source will be processed the next time later than the rest) - * - stack (just processed source will be processed as soon as possible). - * - * The stack is better than the queue when you need to do work on reading one source more consequentially, - * and theoretically, this allows you to achieve more consequent/consistent reads from the disk. - * - * But when using the stack, there is a problem with distributed query processing: - * data is read only from a part of the servers, and on the other servers - * a timeout occurs during send, and the request processing ends with an exception. - * - * Therefore, a queue is used. This can be improved in the future. - */ - using AvailableInputs = std::queue; - AvailableInputs available_inputs; - - /** For parallel preparing (readPrefix) child streams. - * First, streams are located here. - * After a stream was prepared, it is moved to "available_inputs" for reading. - */ - using UnpreparedInputs = std::queue; - UnpreparedInputs unprepared_inputs; - - /// For operations with available_inputs. - std::mutex available_inputs_mutex; - - /// For operations with unprepared_inputs. - std::mutex unprepared_inputs_mutex; + WorkingInputs working_inputs; + WorkingInputs working_additional_inputs; /// How many sources ran out. std::atomic active_threads{0}; - /// Finish the threads work (before the sources run out). - std::atomic finish{false}; - /// Wait for the completion of all threads. - std::atomic joined_threads{false}; const LoggerPtr log; }; diff --git a/dbms/src/DataStreams/SharedQueryBlockInputStream.h b/dbms/src/DataStreams/SharedQueryBlockInputStream.h index e7cece67f0b..d7c0707b5aa 100644 --- a/dbms/src/DataStreams/SharedQueryBlockInputStream.h +++ b/dbms/src/DataStreams/SharedQueryBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -24,6 +25,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_sharedquery_failpoint[]; +} // namespace FailPoints + /** This block input stream is used by SharedQuery. * It enable multiple threads read from one stream. */ @@ -136,6 +142,7 @@ class SharedQueryBlockInputStream : public IProfilingBlockInputStream in->readPrefix(); while (true) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_sharedquery_failpoint); Block block = in->read(); // in is finished or queue is canceled if (!block || !queue.push(block)) diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp index 7dd5e1524ba..4d1bfaae997 100644 --- a/dbms/src/DataStreams/SizeLimits.cpp +++ b/dbms/src/DataStreams/SizeLimits.cpp @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include -#include +#include +#include +#include +#include namespace DB { +namespace FailPoints +{ +extern const char random_limit_check_failpoint[]; +} // namespace FailPoints bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const { - if (max_rows && rows > max_rows) + bool rows_exceed_limit = max_rows && rows > max_rows; + fiu_do_on(FailPoints::random_limit_check_failpoint, rows_exceed_limit = true;); + if (rows_exceed_limit) { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows) - + ", current rows: " + formatReadableQuantity(rows), exception_code); + + ", current rows: " + formatReadableQuantity(rows), + exception_code); else return false; } @@ -36,7 +44,8 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes) - + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code); + + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), + exception_code); else return false; } @@ -44,4 +53,4 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti return true; } -} +} // namespace DB diff --git a/dbms/src/DataStreams/TiRemoteBlockInputStream.h b/dbms/src/DataStreams/TiRemoteBlockInputStream.h index 76fda0b57d0..c1afb1e9f4e 100644 --- a/dbms/src/DataStreams/TiRemoteBlockInputStream.h +++ b/dbms/src/DataStreams/TiRemoteBlockInputStream.h @@ -59,6 +59,11 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream uint64_t total_rows; + // For fine grained shuffle, sender will partition data into muiltiple streams by hashing. + // ExchangeReceiverBlockInputStream only need to read its own stream, i.e., streams[stream_id]. + // CoprocessorBlockInputStream doesn't take care of this. + size_t stream_id; + void initRemoteExecutionSummaries(tipb::SelectResponse & resp, size_t index) { for (const auto & execution_summary : resp.execution_summaries()) @@ -121,7 +126,7 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream bool fetchRemoteResult() { - auto result = remote_reader->nextResult(block_queue, sample_block); + auto result = remote_reader->nextResult(block_queue, sample_block, stream_id); if (result.meet_error) { LOG_FMT_WARNING(log, "remote reader meets error: {}", result.error_msg); @@ -169,13 +174,14 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream } public: - TiRemoteBlockInputStream(std::shared_ptr remote_reader_, const String & req_id, const String & executor_id) + TiRemoteBlockInputStream(std::shared_ptr remote_reader_, const String & req_id, const String & executor_id, size_t stream_id_) : remote_reader(remote_reader_) , source_num(remote_reader->getSourceNum()) , name(fmt::format("TiRemoteBlockInputStream({})", RemoteReader::name)) , execution_summaries_inited(source_num) , log(Logger::get(name, req_id, executor_id)) , total_rows(0) + , stream_id(stream_id_) { for (size_t i = 0; i < source_num; ++i) { diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 251d0663e14..ffcc8d77c10 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -94,20 +94,19 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream public: UnionBlockInputStream( BlockInputStreams inputs, - BlockInputStreamPtr additional_input_at_end, + BlockInputStreams additional_inputs_at_end, size_t max_threads, const String & req_id, ExceptionCallback exception_callback_ = ExceptionCallback()) - : output_queue(std::min(inputs.size(), max_threads) * 5) // reduce contention + : output_queue(std::min(std::max(inputs.size(), additional_inputs_at_end.size()), max_threads) * 5) // reduce contention , log(Logger::get(NAME, req_id)) , handler(*this) - , processor(inputs, additional_input_at_end, max_threads, handler, log) + , processor(inputs, additional_inputs_at_end, max_threads, handler, log) , exception_callback(exception_callback_) { // TODO: assert capacity of output_queue is not less than processor.getMaxThreads() children = inputs; - if (additional_input_at_end) - children.push_back(additional_input_at_end); + children.insert(children.end(), additional_inputs_at_end.begin(), additional_inputs_at_end.end()); size_t num_children = children.size(); if (num_children > 1) @@ -293,8 +292,8 @@ class UnionBlockInputStream final : public IProfilingBlockInputStream /// and the exception is lost. output_queue.emplace(exception); /// can not cancel itself or the exception might be lost - /// kill the processor so ExchangeReceiver will be closed - processor.cancel(true); + /// use cancel instead of kill to avoid too many useless error message + processor.cancel(false); } struct Handler diff --git a/dbms/src/DataStreams/WindowBlockInputStream.cpp b/dbms/src/DataStreams/WindowBlockInputStream.cpp index bc63db52873..2cc61df8104 100644 --- a/dbms/src/DataStreams/WindowBlockInputStream.cpp +++ b/dbms/src/DataStreams/WindowBlockInputStream.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace DB @@ -574,4 +575,21 @@ void WindowBlockInputStream::tryCalculate() peer_group_number = 1; } } + +void WindowBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + buffer.append(", function: {"); + buffer.joinStr( + window_description.window_functions_descriptions.begin(), + window_description.window_functions_descriptions.end(), + [&](const auto & func, FmtBuffer & b) { + b.append(func.window_function->getName()); + }, + ", "); + buffer.fmtAppend( + "}}, frame: {{type: {}, boundary_begin: {}, boundary_end: {}}}", + frameTypeToString(window_description.frame.type), + boundaryTypeToString(window_description.frame.begin_type), + boundaryTypeToString(window_description.frame.end_type)); +} } // namespace DB diff --git a/dbms/src/DataStreams/WindowBlockInputStream.h b/dbms/src/DataStreams/WindowBlockInputStream.h index 46b18dec1ee..0ef23aa9f6f 100644 --- a/dbms/src/DataStreams/WindowBlockInputStream.h +++ b/dbms/src/DataStreams/WindowBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -169,6 +170,7 @@ class WindowBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; LoggerPtr log; diff --git a/dbms/src/DataStreams/tests/union_stream2.cpp b/dbms/src/DataStreams/tests/union_stream2.cpp index f939cda4e14..fb3f7238414 100644 --- a/dbms/src/DataStreams/tests/union_stream2.cpp +++ b/dbms/src/DataStreams/tests/union_stream2.cpp @@ -51,7 +51,7 @@ try for (size_t i = 0, size = streams.size(); i < size; ++i) streams[i] = std::make_shared(streams[i]); - BlockInputStreamPtr stream = std::make_shared>(streams, nullptr, settings.max_threads, /*req_id=*/""); + BlockInputStreamPtr stream = std::make_shared>(streams, BlockInputStreams{}, settings.max_threads, /*req_id=*/""); stream = std::make_shared(stream, 10, 0, ""); WriteBufferFromFileDescriptor wb(STDERR_FILENO); diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 120d0b1ba30..71fda0615e4 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -471,7 +471,6 @@ class IDataType : private boost::noncopyable virtual bool isEnum() const { return false; }; virtual bool isNullable() const { return false; } - /** Is this type can represent only NULL value? (It also implies isNullable) */ virtual bool onlyNull() const { return false; } diff --git a/dbms/src/DataTypes/NumberTraits.h b/dbms/src/DataTypes/NumberTraits.h index 925628a8894..a8b91b88075 100644 --- a/dbms/src/DataTypes/NumberTraits.h +++ b/dbms/src/DataTypes/NumberTraits.h @@ -277,6 +277,7 @@ struct ResultOfAbs> }; /** For bitwise operations, an integer is obtained with number of bits is equal to the maximum of the arguments. + * todo: note that MySQL handles only unsigned 64-bit integer argument and result values. We should refine the code. */ template struct ResultOfBit diff --git a/dbms/src/Debug/DBGInvoker.cpp b/dbms/src/Debug/DBGInvoker.cpp index 3f633c08e67..df993d8e6e9 100644 --- a/dbms/src/Debug/DBGInvoker.cpp +++ b/dbms/src/Debug/DBGInvoker.cpp @@ -118,6 +118,10 @@ DBGInvoker::DBGInvoker() regSchemalessFunc("mapped_database", dbgFuncMappedDatabase); regSchemalessFunc("mapped_table", dbgFuncMappedTable); regSchemafulFunc("query_mapped", dbgFuncQueryMapped); + regSchemalessFunc("get_tiflash_replica_count", dbgFuncGetTiflashReplicaCount); + regSchemalessFunc("get_partition_tables_tiflash_replica_count", dbgFuncGetPartitionTablesTiflashReplicaCount); + regSchemalessFunc("get_tiflash_mode", dbgFuncGetTiflashMode); + regSchemalessFunc("get_partition_tables_tiflash_mode", dbgFuncGetPartitionTablesTiflashMode); regSchemalessFunc("search_log_for_key", dbgFuncSearchLogForKey); regSchemalessFunc("tidb_dag", dbgFuncTiDBQueryFromNaturalDag); diff --git a/dbms/src/Debug/MockSchemaGetter.h b/dbms/src/Debug/MockSchemaGetter.h index f02699866ce..11c5d97f036 100644 --- a/dbms/src/Debug/MockSchemaGetter.h +++ b/dbms/src/Debug/MockSchemaGetter.h @@ -17,16 +17,25 @@ #include #include +#include + namespace DB { - struct MockSchemaGetter { TiDB::DBInfoPtr getDatabase(DatabaseID db_id) { return MockTiDB::instance().getDBInfoByID(db_id); } Int64 getVersion() { return MockTiDB::instance().getVersion(); } - SchemaDiff getSchemaDiff(Int64 version) { return MockTiDB::instance().getSchemaDiff(version); } + std::optional getSchemaDiff(Int64 version) + { + return MockTiDB::instance().getSchemaDiff(version); + } + + bool checkSchemaDiffExists(Int64 version) + { + return MockTiDB::instance().checkSchemaDiffExists(version); + } TiDB::TableInfoPtr getTableInfo(DatabaseID, TableID table_id) { return MockTiDB::instance().getTableInfoByID(table_id); } diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 42ab56a97c1..99d9625461b 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -221,7 +221,6 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( { String & name = string_tokens[index]; index_info.idx_cols[index].name = name; - index_info.idx_cols[index].offset = pk_column_pos_map[name]; index_info.idx_cols[index].length = -1; } } @@ -302,7 +301,7 @@ int MockTiDB::newTables( tables_by_id.emplace(table->table_info.id, table); tables_by_name.emplace(qualified_name, table); - AffectedOption opt; + AffectedOption opt{}; opt.schema_id = table->database_id; opt.table_id = table->id(); opt.old_schema_id = table->database_id; @@ -571,7 +570,7 @@ void MockTiDB::renameTables(const std::vectordatabase_id; opt.table_id = new_table->id(); opt.old_schema_id = table->database_id; @@ -669,9 +668,14 @@ std::pair MockTiDB::getDBIDByName(const String & database_name return std::make_pair(false, -1); } -SchemaDiff MockTiDB::getSchemaDiff(Int64 version_) +std::optional MockTiDB::getSchemaDiff(Int64 version_) { return version_diff[version_]; } +bool MockTiDB::checkSchemaDiffExists(Int64 version) +{ + return version_diff.find(version) != version_diff.end(); +} + } // namespace DB diff --git a/dbms/src/Debug/MockTiDB.h b/dbms/src/Debug/MockTiDB.h index 36d2af90859..261e547b13a 100644 --- a/dbms/src/Debug/MockTiDB.h +++ b/dbms/src/Debug/MockTiDB.h @@ -127,7 +127,9 @@ class MockTiDB : public ext::Singleton std::pair getDBIDByName(const String & database_name); - SchemaDiff getSchemaDiff(Int64 version); + bool checkSchemaDiffExists(Int64 version); + + std::optional getSchemaDiff(Int64 version); std::unordered_map getDatabases() { return databases; } diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index a1e9295b3f5..61f4474f919 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -24,13 +24,13 @@ #include #include #include -#include #include #include #include namespace DB { +using ASTPartitionByElement = ASTOrderByElement; void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb::Expr * expr, Int32 collator_id) { *(expr->mutable_field_type()) = columnInfoToFieldType(ci); @@ -191,6 +191,12 @@ std::unordered_map agg_func_name_to_sig({ {"group_concat", tipb::ExprType::GroupConcat}, }); +std::unordered_map window_func_name_to_sig({ + {"RowNumber", tipb::ExprType::RowNumber}, + {"Rank", tipb::ExprType::Rank}, + {"DenseRank", tipb::ExprType::DenseRank}, +}); + DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) { DAGColumnInfo output = input; @@ -854,6 +860,7 @@ bool ExchangeReceiver::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t c { tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); for (auto & field : output_schema) { @@ -1352,6 +1359,107 @@ void Join::toMPPSubPlan(size_t & executor_index, const DAGProperties & propertie exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); } + +bool Window::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeWindow); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::Window * window = tipb_executor->mutable_window(); + auto & input_schema = children[0]->output_schema; + for (const auto & expr : func_descs) + { + tipb::Expr * window_expr = window->add_func_desc(); + const auto * window_func = typeid_cast(expr.get()); + for (const auto & arg : window_func->arguments->children) + { + tipb::Expr * func = window_expr->add_children(); + astToPB(input_schema, arg, func, collator_id, context); + } + auto window_sig_it = window_func_name_to_sig.find(window_func->name); + if (window_sig_it == window_func_name_to_sig.end()) + throw Exception(fmt::format("Unsupported window function {}", window_func->name), ErrorCodes::LOGICAL_ERROR); + auto window_sig = window_sig_it->second; + window_expr->set_tp(window_sig); + auto * ft = window_expr->mutable_field_type(); + // TODO: Maybe more window functions with different field type. + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagBinary); + ft->set_collate(collator_id); + ft->set_flen(21); + ft->set_decimal(-1); + } + + for (const auto & child : order_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + for (const auto & child : partition_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_partition_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + if (frame.type.has_value()) + { + tipb::WindowFrame * mut_frame = window->mutable_frame(); + mut_frame->set_type(frame.type.value()); + if (frame.start.has_value()) + { + auto * start = mut_frame->mutable_start(); + start->set_offset(std::get<2>(frame.start.value())); + start->set_unbounded(std::get<1>(frame.start.value())); + start->set_type(std::get<0>(frame.start.value())); + } + + if (frame.end.has_value()) + { + auto * end = mut_frame->mutable_end(); + end->set_offset(std::get<2>(frame.end.value())); + end->set_unbounded(std::get<1>(frame.end.value())); + end->set_type(std::get<0>(frame.end.value())); + } + } + + auto * children_executor = window->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +bool Sort::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeSort); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::Sort * sort = tipb_executor->mutable_sort(); + sort->set_ispartialsort(is_partial_sort); + + for (const auto & child : by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = sort->add_byitems(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + auto * children_executor = sort->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + } // namespace mock ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, String & table_alias, bool append_pk_column) @@ -1449,7 +1557,7 @@ ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPt ci.tp = TiDB::TypeLongLong; ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; } - else if (func->name == "max" || func->name == "min" || func->name == "first_row") + else if (func->name == "max" || func->name == "min" || func->name == "first_row" || func->name == "sum") { ci = children_ci[0]; ci.flag &= ~TiDB::ColumnFlagNotNull; @@ -1533,7 +1641,6 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se } } } - auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); project->children.push_back(input); return project; @@ -1570,11 +1677,102 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti return exchange_sender; } - -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema) +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) { - ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema); + ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); return exchange_receiver; } -} // namespace DB \ No newline at end of file +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) +{ + std::vector partition_columns; + if (partition_by_expr_list != nullptr) + { + for (const auto & child : partition_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + partition_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + DAGSchema output_schema; + output_schema.insert(output_schema.end(), input->output_schema.begin(), input->output_schema.end()); + + std::vector window_exprs; + if (func_desc_list != nullptr) + { + for (const auto & expr : func_desc_list->children) + { + const auto * func = typeid_cast(expr.get()); + window_exprs.push_back(expr); + std::vector children_ci; + for (const auto & arg : func->arguments->children) + { + children_ci.push_back(compileExpr(input->output_schema, arg)); + } + // TODO: add more window functions + TiDB::ColumnInfo ci; + switch (window_func_name_to_sig[func->name]) + { + case tipb::ExprType::RowNumber: + case tipb::ExprType::Rank: + case tipb::ExprType::DenseRank: + { + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagBinary; + break; + } + default: + throw Exception(fmt::format("Unsupported window function {}", func->name), ErrorCodes::LOGICAL_ERROR); + } + output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); + } + } + + ExecutorPtr window = std::make_shared( + executor_index, + output_schema, + window_exprs, + std::move(partition_columns), + std::move(order_columns), + frame, + fine_grained_shuffle_stream_count); + window->children.push_back(input); + return window; +} + +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) +{ + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + ExecutorPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort, fine_grained_shuffle_stream_count); + sort->children.push_back(input); + return sort; +} +} // namespace DB diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 37d3f22b6e1..f39f4059d26 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes @@ -136,8 +139,11 @@ struct ExchangeSender : Executor struct ExchangeReceiver : Executor { TaskMetas task_metas; - ExchangeReceiver(size_t & index, const DAGSchema & output) + uint64_t fine_grained_shuffle_stream_count; + + ExchangeReceiver(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) : Executor(index, "exchange_receiver_" + std::to_string(index), output) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) {} void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context &) override; @@ -272,6 +278,58 @@ struct Join : Executor void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; }; + +using MockWindowFrameBound = std::tuple; + +struct MockWindowFrame +{ + std::optional type; + std::optional start; + std::optional end; + // TODO: support calcFuncs +}; + +struct Window : Executor +{ + std::vector func_descs; + std::vector partition_by_exprs; + std::vector order_by_exprs; + MockWindowFrame frame; + uint64_t fine_grained_shuffle_stream_count; + + Window(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) + : Executor(index_, "window_" + std::to_string(index_), output_schema_) + , func_descs(std::move(func_descs_)) + , partition_by_exprs(std::move(partition_by_exprs_)) + , order_by_exprs(order_by_exprs_) + , frame(frame_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + { + } + // Currently only use Window Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; +}; + +struct Sort : Executor +{ + std::vector by_exprs; + bool is_partial_sort; + uint64_t fine_grained_shuffle_stream_count; + + Sort(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) + : Executor(index_, "sort_" + std::to_string(index_), output_schema_) + , by_exprs(by_exprs_) + , is_partial_sort(is_partial_sort_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + { + } + // Currently only use Sort Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; +}; } // namespace mock using ExecutorPtr = std::shared_ptr; @@ -292,10 +350,11 @@ ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr r ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); +ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count = 0); -void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); +ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); -//TODO: add compileWindow +ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); -} // namespace DB \ No newline at end of file +void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); +} // namespace DB diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index e9335d1e2bd..62a8b7537f1 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -290,8 +290,9 @@ BlockInputStreamPtr executeQuery(Context & context, RegionID region_id, const DA tipb_exchange_receiver.encoded_task_meta_size(), 10, /*req_id=*/"", - /*executor_id=*/""); - BlockInputStreamPtr ret = std::make_shared(exchange_receiver, /*req_id=*/"", /*executor_id=*/""); + /*executor_id=*/"", + /*fine_grained_shuffle_stream_count=*/0); + BlockInputStreamPtr ret = std::make_shared(exchange_receiver, /*req_id=*/"", /*executor_id=*/"", /*stream_id*/ 0); return ret; } else diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp index df93ee1c78d..3626041f428 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp @@ -40,7 +40,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); const String & database_name = typeid_cast(*args[1]).name; const String & table_name = typeid_cast(*args[2]).name; auto table = MockTiDB::instance().getTableByName(database_name, table_name); @@ -49,7 +49,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar if (4 + handle_column_size * 4 != args.size()) throw Exception("Args not matched, should be: region-id1, database-name, table-name, start1, end1, start2, end2, region-id2", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id2 = (RegionID)safeGet(typeid_cast(*args[args.size() - 1]).value); + auto region_id2 = static_cast(safeGet(typeid_cast(*args[args.size() - 1]).value)); auto table_id = table->id(); TiKVKey start_key1, start_key2, end_key1, end_key2; @@ -59,9 +59,17 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar std::vector start_keys2; std::vector end_keys1; std::vector end_keys2; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field1 = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum1 = TiDB::DatumBumpy(start_field1, column_info.tp); @@ -88,10 +96,10 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar } else { - HandleID start1 = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end1 = (HandleID)safeGet(typeid_cast(*args[4]).value); - HandleID start2 = (HandleID)safeGet(typeid_cast(*args[5]).value); - HandleID end2 = (HandleID)safeGet(typeid_cast(*args[6]).value); + auto start1 = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end1 = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start2 = static_cast(safeGet(typeid_cast(*args[5]).value)); + auto end2 = static_cast(safeGet(typeid_cast(*args[6]).value)); start_key1 = RecordKVFormat::genKey(table_id, start1); start_key2 = RecordKVFormat::genKey(table_id, start2); end_key1 = RecordKVFormat::genKey(table_id, end1); @@ -110,7 +118,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar request.set_cmd_type(raft_cmdpb::AdminCmdType::BatchSplit); raft_cmdpb::BatchSplitResponse * splits = response.mutable_splits(); { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id); region->set_start_key(start_key1); region->set_end_key(end_key1); @@ -118,7 +126,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar *region->mutable_region_epoch() = new_epoch; } { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id2); region->set_start_key(start_key2); region->set_end_key(end_key2); @@ -144,8 +152,8 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: source-id1, target-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID target_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto target_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -157,7 +165,7 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::PrepareMerge); - auto prepare_merge = request.mutable_prepare_merge(); + auto * prepare_merge = request.mutable_prepare_merge(); { auto min_index = region->appliedIndex(); prepare_merge->set_min_index(min_index); @@ -184,8 +192,8 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D throw Exception("Args not matched, should be: source-id1, current-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID source_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID current_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto source_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto current_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -196,7 +204,7 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D { request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); - auto commit_merge = request.mutable_commit_merge(); + auto * commit_merge = request.mutable_commit_merge(); { commit_merge->set_commit(source_region->appliedIndex()); *commit_merge->mutable_source() = source_region->getMetaRegion(); @@ -220,7 +228,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -231,7 +239,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); - auto rollback_merge = request.mutable_rollback_merge(); + auto * rollback_merge = request.mutable_rollback_merge(); { auto merge_state = region->getMergeState(); rollback_merge->set_commit(merge_state.commit()); diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 7eecbbdf6f7..b5d3f252d0a 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -60,7 +60,7 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); TableID table_id = RegionBench::getTableID(context, database_name, table_name, ""); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); auto & table_info = table->table_info; @@ -68,10 +68,16 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) size_t handle_column_size = is_common_handle ? table_info.getPrimaryIndexInfo().idx_cols.size() : 1; RegionPtr region; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + if (!is_common_handle) { - HandleID start = static_cast(safeGet(typeid_cast(*args[3]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[4]).value)); region = RegionBench::createRegion(table_id, region_id, start, end); } else @@ -81,7 +87,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector end_keys; for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -105,8 +112,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) for (auto it = args_begin; it != args_end; it += len) { HandleID handle_id = is_common_handle ? 0 : static_cast(safeGet(typeid_cast(*it[0]).value)); - Timestamp tso = static_cast(safeGet(typeid_cast(*it[1]).value)); - UInt8 del = static_cast(safeGet(typeid_cast(*it[2]).value)); + auto tso = static_cast(safeGet(typeid_cast(*it[1]).value)); + auto del = static_cast(safeGet(typeid_cast(*it[2]).value)); { std::vector fields; @@ -122,9 +129,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector keys; // handle key for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; + auto start_field = RegionBench::convertField(column_info, fields[idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -168,7 +175,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotWithData(Context & context, const AST // DBGInvoke region_snapshot(region-id, start-key, end-key, database-name, table-name[, partition-id]) void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args, DBGInvoker::Printer output) { - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool has_partition_id = false; size_t args_size = args.size(); if (dynamic_cast(args[args_size - 1].get()) != nullptr) @@ -198,9 +205,16 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args // Get start key and end key form multiple column if it is clustered_index. std::vector start_keys; std::vector end_keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -214,15 +228,15 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args } else { - HandleID start = static_cast(safeGet(typeid_cast(*args[1]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[1]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[2]).value)); start_key = RecordKVFormat::genKey(table_id, start); end_key = RecordKVFormat::genKey(table_id, end); } region_info.set_start_key(start_key.toString()); region_info.set_end_key(end_key.toString()); - *region_info.add_peers() = createPeer(1, true); - *region_info.add_peers() = createPeer(2, true); + *region_info.add_peers() = tests::createPeer(1, true); + *region_info.add_peers() = tests::createPeer(2, true); auto peer_id = 1; auto start_decoded_key = RecordKVFormat::decodeTiKVKey(start_key); auto end_decoded_key = RecordKVFormat::decodeTiKVKey(end_key); @@ -432,9 +446,9 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); const auto & table_info = RegionBench::getTableInfo(context, database_name, table_name); @@ -555,7 +569,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); auto [region, block_cache] = GLOBAL_REGION_MAP.popRegionCache("__snap_" + std::to_string(region_id)); auto & tmt = context.getTMTContext(); context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); @@ -577,12 +591,12 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, c const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); - RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); - const String schema_str = safeGet(typeid_cast(*args[5]).value); - String handle_pk_name = safeGet(typeid_cast(*args[6]).value); + const auto schema_str = safeGet(typeid_cast(*args[5]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[6]).value); UInt64 test_fields = 1; if (args.size() > 7) @@ -677,10 +691,10 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFilesWithHandles(Context & const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); - const String schema_str = safeGet(typeid_cast(*args[3]).value); - String handle_pk_name = safeGet(typeid_cast(*args[4]).value); + const auto schema_str = safeGet(typeid_cast(*args[3]).value); + auto handle_pk_name = safeGet(typeid_cast(*args[4]).value); std::vector handles; for (size_t i = 5; i < args.size(); ++i) @@ -770,7 +784,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles(Context & context, const if (args.size() != 1) throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); const auto region_name = "__snap_snap_" + std::to_string(region_id); auto [new_region, ingest_ids] = GLOBAL_REGION_MAP.popRegionSnap(region_name); auto & tmt = context.getTMTContext(); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index b2024eac1d8..f65a18b8fd0 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -61,9 +61,15 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer { std::vector start_keys; std::vector end_keys; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgFuncSchema.cpp b/dbms/src/Debug/dbgFuncSchema.cpp index c388015dc10..9ef07f16e8b 100644 --- a/dbms/src/Debug/dbgFuncSchema.cpp +++ b/dbms/src/Debug/dbgFuncSchema.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -137,4 +138,5 @@ void dbgFuncIsTombstone(Context & context, const ASTs & args, DBGInvoker::Printe output(fmt_buf.toString()); } + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/dbgFuncSchema.h b/dbms/src/Debug/dbgFuncSchema.h index 162bc0af46b..51ab3ad41cf 100644 --- a/dbms/src/Debug/dbgFuncSchema.h +++ b/dbms/src/Debug/dbgFuncSchema.h @@ -46,5 +46,4 @@ void dbgFuncResetSchemas(Context & context, const ASTs & args, DBGInvoker::Print // Usage: // ./storage-client.sh "DBGInvoke is_tombstone(db_name, table_name)" void dbgFuncIsTombstone(Context & context, const ASTs & args, DBGInvoker::Printer output); - } // namespace DB diff --git a/dbms/src/Debug/dbgFuncSchemaName.cpp b/dbms/src/Debug/dbgFuncSchemaName.cpp index 4c2ad86bd62..3aa7b6e3af4 100644 --- a/dbms/src/Debug/dbgFuncSchemaName.cpp +++ b/dbms/src/Debug/dbgFuncSchemaName.cpp @@ -128,4 +128,109 @@ BlockInputStreamPtr dbgFuncQueryMapped(Context & context, const ASTs & args) return executeQuery(query, context, true).in; } + +void dbgFuncGetTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + fmt_buf.append((std::to_string(managed_storage->getTableInfo().replica_info.count))); + + output(fmt_buf.toString()); +} + +void dbgFuncGetPartitionTablesTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + auto table_info = managed_storage->getTableInfo(); + + if (!table_info.isLogicalPartitionTable()) + throw Exception(database_name + "." + table_name + " is not logical partition table", ErrorCodes::BAD_ARGUMENTS); + + SchemaNameMapper name_mapper; + for (const auto & part_def : table_info.partition.definitions) + { + auto paritition_table_info = table_info.producePartitionTableInfo(part_def.id, name_mapper); + auto partition_storage = context.getTMTContext().getStorages().get(paritition_table_info->id); + fmt_buf.append((std::to_string(partition_storage->getTableInfo().replica_info.count))); + fmt_buf.append("/"); + } + + output(fmt_buf.toString()); +} + +void dbgFuncGetTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + fmt_buf.append((TiFlashModeToString(managed_storage->getTableInfo().tiflash_mode))); + + output(fmt_buf.toString()); +} + +void dbgFuncGetPartitionTablesTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output) +{ + if (args.empty() || args.size() != 2) + throw Exception("Args not matched, should be: database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); + + const String & database_name = typeid_cast(*args[0]).name; + FmtBuffer fmt_buf; + + const String & table_name = typeid_cast(*args[1]).name; + auto mapped = mappedTable(context, database_name, table_name); + auto storage = context.getTable(mapped->first, mapped->second); + auto managed_storage = std::dynamic_pointer_cast(storage); + if (!managed_storage) + throw Exception(database_name + "." + table_name + " is not ManageableStorage", ErrorCodes::BAD_ARGUMENTS); + + auto table_info = managed_storage->getTableInfo(); + + if (!table_info.isLogicalPartitionTable()) + throw Exception(database_name + "." + table_name + " is not logical partition table", ErrorCodes::BAD_ARGUMENTS); + + SchemaNameMapper name_mapper; + for (const auto & part_def : table_info.partition.definitions) + { + auto paritition_table_info = table_info.producePartitionTableInfo(part_def.id, name_mapper); + auto partition_storage = context.getTMTContext().getStorages().get(paritition_table_info->id); + fmt_buf.append((TiFlashModeToString(partition_storage->getTableInfo().tiflash_mode))); + fmt_buf.append("/"); + } + + output(fmt_buf.toString()); +} + } // namespace DB diff --git a/dbms/src/Debug/dbgFuncSchemaName.h b/dbms/src/Debug/dbgFuncSchemaName.h index 8e95aaab908..ec18f89e911 100644 --- a/dbms/src/Debug/dbgFuncSchemaName.h +++ b/dbms/src/Debug/dbgFuncSchemaName.h @@ -40,4 +40,24 @@ void dbgFuncMappedTable(Context & context, const ASTs & args, DBGInvoker::Printe // ./storage-client.sh "DBGInvoke query_mapped('select * from $d.$t', database_name[, table_name])" BlockInputStreamPtr dbgFuncQueryMapped(Context & context, const ASTs & args); +// Get table's tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_tiflash_replica_count(db_name, table_name)" +void dbgFuncGetTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get the logical table's partition tables' tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_partition_tables_tiflash_replica_count(db_name, table_name)" +void dbgFuncGetPartitionTablesTiflashReplicaCount(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get table's tiflash mode with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_tiflash_mode(db_name, table_name)" +void dbgFuncGetTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output); + +// Get the logical table's partition tables' tiflash replica counts with mapped table name +// Usage: +// ./storage-client.sh "DBGInvoke get_partition_tables_tiflash_mode(db_name, table_name)" +void dbgFuncGetPartitionTablesTiflashMode(Context & context, const ASTs & args, DBGInvoker::Printer output); + } // namespace DB diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 685b2563a3b..854d8a18bd5 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -310,7 +310,7 @@ void insert( // // Parse the fields in the inserted row std::vector fields; { - for (ASTs::const_iterator it = values_begin; it != values_end; ++it) + for (auto it = values_begin; it != values_end; ++it) { auto field = typeid_cast((*it).get())->value; fields.emplace_back(field); @@ -330,11 +330,18 @@ void insert( // if (table_info.is_common_handle) { std::vector keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - const auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - const auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + const auto & col_idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[col_idx]; + auto start_field = RegionBench::convertField(column_info, fields[col_idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 8573bdad6bd..0d7243ede8f 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -12,57 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include #include +#include +#include #include -#include +#include #include -#include -#include #include -#include +#include +#include +#include #include #include #include -#include -#include -#include - - -namespace ProfileEvents -{ - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} +#include +#include +#include +#include +#include +#include namespace DB { - namespace ErrorCodes { - extern const int TYPE_MISMATCH; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; -} +extern const int TYPE_MISMATCH; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes inline size_t CacheDictionary::getCellIdx(const Key id) const @@ -73,15 +52,15 @@ inline size_t CacheDictionary::getCellIdx(const Key id) const } -CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, - const size_t size) - : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - cells{this->size}, - rnd_engine(randomSeed()) +CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const size_t size) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , cells{this->size} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ @@ -100,13 +79,19 @@ void CacheDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray(hierarchical_attribute->null_values); - getItemsNumber(*hierarchical_attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsNumber(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); } /// Allow to use single value in same way as array. -static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) { return arr[idx]; } -static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) { return value; } +static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) +{ + return arr[idx]; +} +static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) +{ + return value; +} template @@ -118,7 +103,7 @@ void CacheDictionary::isInImpl( /// Transform all children to parents until ancestor id or null_value will be reached. size_t size = out.size(); - memset(out.data(), 0xFF, size); /// 0xFF means "not calculated" + memset(out.data(), 0xFF, size); /// 0xFF means "not calculated" const auto null_value = std::get(hierarchical_attribute->null_values); @@ -224,19 +209,19 @@ void CacheDictionary::isInConstantVector( } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - const auto null_value = std::get(attribute.null_values);\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return null_value; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + const auto null_value = std::get(attribute.null_values); \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t) { return null_value; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -260,22 +245,24 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const PaddedPODArray & def,\ - PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t row) { return def[row]; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ + PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t row) { return def[row]; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -290,7 +277,9 @@ DECLARE(Float64) #undef DECLARE void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, + const std::string & attribute_name, + const PaddedPODArray & ids, + const ColumnString * const def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); @@ -299,21 +288,24 @@ void CacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } -#define DECLARE(TYPE)\ -void CacheDictionary::get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, PaddedPODArray & out) const\ -{\ - auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{\ - name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ - ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, ids, out, [&] (const size_t) { return def; });\ -} +#define DECLARE(TYPE) \ + void CacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const TYPE def, \ + PaddedPODArray & out) const \ + { \ + auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{ \ + name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber(attribute, ids, out, [&](const size_t) { return def; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -328,7 +320,9 @@ DECLARE(Float64) #undef DECLARE void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, + const std::string & attribute_name, + const PaddedPODArray & ids, + const String & def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); @@ -337,7 +331,7 @@ void CacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); } @@ -390,8 +384,6 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray const auto rows = ext::size(ids); { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) @@ -416,10 +408,6 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -427,21 +415,19 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = true; - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = false; - }); + update( + required_ids, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = true; + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = false; + }); } @@ -476,68 +462,68 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib switch (type) { - case AttributeUnderlyingType::UInt8: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt8); - break; - case AttributeUnderlyingType::UInt16: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt16); - break; - case AttributeUnderlyingType::UInt32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt32); - break; - case AttributeUnderlyingType::UInt64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt64); - break; - case AttributeUnderlyingType::UInt128: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(UInt128); - break; - case AttributeUnderlyingType::Int8: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int8); - break; - case AttributeUnderlyingType::Int16: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int16); - break; - case AttributeUnderlyingType::Int32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int32); - break; - case AttributeUnderlyingType::Int64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Int64); - break; - case AttributeUnderlyingType::Float32: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Float32); - break; - case AttributeUnderlyingType::Float64: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(Float64); - break; - case AttributeUnderlyingType::String: - std::get(attr.null_values) = null_value.get(); - std::get>(attr.arrays) = std::make_unique>(size); - bytes_allocated += size * sizeof(StringRef); - if (!string_arena) - string_arena = std::make_unique(); - break; + case AttributeUnderlyingType::UInt8: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt8); + break; + case AttributeUnderlyingType::UInt16: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt16); + break; + case AttributeUnderlyingType::UInt32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt32); + break; + case AttributeUnderlyingType::UInt64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt64); + break; + case AttributeUnderlyingType::UInt128: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(UInt128); + break; + case AttributeUnderlyingType::Int8: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int8); + break; + case AttributeUnderlyingType::Int16: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int16); + break; + case AttributeUnderlyingType::Int32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int32); + break; + case AttributeUnderlyingType::Int64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Int64); + break; + case AttributeUnderlyingType::Float32: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Float32); + break; + case AttributeUnderlyingType::Float64: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(Float64); + break; + case AttributeUnderlyingType::String: + std::get(attr.null_values) = null_value.get(); + std::get>(attr.arrays) = std::make_unique>(size); + bytes_allocated += size * sizeof(StringRef); + if (!string_arena) + string_arena = std::make_unique(); + break; } return attr; @@ -551,8 +537,8 @@ void CacheDictionary::getItemsNumber( PaddedPODArray & out, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ + if (false) {} // NOLINT +#define DISPATCH(TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \ getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); DISPATCH(UInt8) @@ -567,8 +553,7 @@ void CacheDictionary::getItemsNumber( DISPATCH(Float32) DISPATCH(Float64) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template @@ -586,8 +571,6 @@ void CacheDictionary::getItemsNumberImpl( size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) @@ -618,10 +601,6 @@ void CacheDictionary::getItemsNumberImpl( } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -629,23 +608,21 @@ void CacheDictionary::getItemsNumberImpl( return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; - for (const auto row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = get_default(row); - }); + for (const auto row : outdated_ids[id]) + out[row] = static_cast(attribute_value); // NOLINT + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + out[row] = get_default(row); + }); } template @@ -666,8 +643,6 @@ void CacheDictionary::getItemsString( /// perform optimistic version, fallback to pessimistic if failed { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows)) @@ -710,8 +685,6 @@ void CacheDictionary::getItemsString( size_t total_length = 0; size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, ids.size())) { @@ -741,10 +714,6 @@ void CacheDictionary::getItemsString( } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); @@ -752,22 +721,20 @@ void CacheDictionary::getItemsString( if (!outdated_ids.empty()) { std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); - - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + + update( + required_ids, + [&](const auto id, const auto cell_idx) { + const auto attribute_value = attribute_array[cell_idx]; + + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&](const auto id, const auto) { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); } out->getChars().reserve(total_length); @@ -790,18 +757,13 @@ void CacheDictionary::update( { std::unordered_map remaining_ids{requested_ids.size()}; for (const auto id : requested_ids) - remaining_ids.insert({ id, 0 }); + remaining_ids.insert({id, 0}); - std::uniform_int_distribution distribution - { + std::uniform_int_distribution distribution{ dict_lifetime.min_sec, - dict_lifetime.max_sec - }; - - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + dict_lifetime.max_sec}; { - CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; Stopwatch watch; auto stream = source_ptr->loadIds(requested_ids); stream->readPrefix(); @@ -810,7 +772,7 @@ void CacheDictionary::update( while (const auto block = stream->read()) { - const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); + const auto * id_column = typeid_cast(block.safeGetByPosition(0).column.get()); if (!id_column) throw Exception{ name + ": id column has type different from UInt64.", @@ -819,8 +781,7 @@ void CacheDictionary::update( const auto & ids = id_column->getData(); /// cache column pointers - const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) - { + const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); @@ -859,9 +820,6 @@ void CacheDictionary::update( } stream->readSuffix(); - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); } size_t not_found_num = 0, found_num = 0; @@ -903,10 +861,6 @@ void CacheDictionary::update( /// inform caller that the cell has not been found on_id_not_found(id, cell_idx); } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheRequests); } @@ -914,32 +868,54 @@ void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::String: - { - const auto & null_value_ref = std::get(attribute.null_values); - auto & string_ref = std::get>(attribute.arrays)[idx]; - - if (string_ref.data != null_value_ref.data()) - { - if (string_ref.data) - string_arena->free(const_cast(string_ref.data), string_ref.size); + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::String: + { + const auto & null_value_ref = std::get(attribute.null_values); + auto & string_ref = std::get>(attribute.arrays)[idx]; - string_ref = StringRef{null_value_ref}; - } + if (string_ref.data != null_value_ref.data()) + { + if (string_ref.data) + string_arena->free(const_cast(string_ref.data), string_ref.size); - break; + string_ref = StringRef{null_value_ref}; } + + break; + } } } @@ -947,39 +923,61 @@ void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, co { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::String: - { - const auto & string = value.get(); - auto & string_ref = std::get>(attribute.arrays)[idx]; - const auto & null_value_ref = std::get(attribute.null_values); - - /// free memory unless it points to a null_value - if (string_ref.data && string_ref.data != null_value_ref.data()) - string_arena->free(const_cast(string_ref.data), string_ref.size); + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::String: + { + const auto & string = value.get(); + auto & string_ref = std::get>(attribute.arrays)[idx]; + const auto & null_value_ref = std::get(attribute.null_values); - const auto size = string.size(); - if (size != 0) - { - auto string_ptr = string_arena->alloc(size + 1); - std::copy(string.data(), string.data() + size + 1, string_ptr); - string_ref = StringRef{string_ptr, size}; - } - else - string_ref = {}; + /// free memory unless it points to a null_value + if (string_ref.data && string_ref.data != null_value_ref.data()) + string_arena->free(const_cast(string_ref.data), string_ref.size); - break; + const auto size = string.size(); + if (size != 0) + { + auto * string_ptr = string_arena->alloc(size + 1); + std::copy(string.data(), string.data() + size + 1, string_ptr); + string_ref = StringRef{string_ptr, size}; } + else + string_ref = {}; + + break; + } } } @@ -989,22 +987,18 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a if (it == std::end(attribute_index_by_name)) throw Exception{ name + ": no such attribute '" + attribute_name + "'", - ErrorCodes::BAD_ARGUMENTS - }; + ErrorCodes::BAD_ARGUMENTS}; return attributes[it->second]; } bool CacheDictionary::isEmptyCell(const UInt64 idx) const { - return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data - == ext::safe_bit_cast(CellMetadata::time_point_t())); + return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t())); } PaddedPODArray CacheDictionary::getCachedIds() const { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - PaddedPODArray array; for (size_t idx = 0; idx < cells.size(); ++idx) { @@ -1024,4 +1018,4 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na } -} +} // namespace DB diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 330ee036136..fb9a94b29a0 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -12,48 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include #include -#include -#include -#include -#include #include -#include -#include - - -namespace ProfileEvents -{ - - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { - namespace ErrorCodes { - extern const int TYPE_MISMATCH; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; -} +extern const int TYPE_MISMATCH; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; +} // namespace ErrorCodes inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const @@ -64,13 +43,14 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const } -ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, - const size_t size) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - rnd_engine(randomSeed()) +ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const size_t size) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ @@ -85,7 +65,9 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictio {} void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); @@ -98,12 +80,15 @@ void ComplexKeyCacheDictionary::getString( const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); @@ -113,12 +98,15 @@ void ComplexKeyCacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); @@ -128,7 +116,7 @@ void ComplexKeyCacheDictionary::getString( name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; }); } /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag, @@ -190,8 +178,6 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) @@ -220,9 +206,6 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -231,18 +214,18 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes return; std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), - [] (auto & pair) { return pair.getMapped().front(); }); + std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); /// request new values - update(key_columns, keys_array, required_rows, - [&] (const StringRef key, const auto) - { + update( + key_columns, + keys_array, + required_rows, + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = true; }, - [&] (const StringRef key, const auto) - { + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = false; }); @@ -297,8 +280,11 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Pool & pool) + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Pool & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -337,25 +323,32 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool( } } - return { place, sum_keys_size }; + return {place, sum_keys_size}; } /// Explicit instantiations. template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Arena & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, ArenaWithFreeLists & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + ArenaWithFreeLists & pool); StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( - const size_t row, const Columns & key_columns) const + const size_t row, + const Columns & key_columns) const { - const auto res = fixed_size_keys_pool->alloc(); - auto place = res; + auto * const res = fixed_size_keys_pool->alloc(); + auto * place = res; for (const auto & key_column : key_columns) { @@ -364,36 +357,33 @@ StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( place += key.size; } - return { res, key_size }; + return {res, key_size}; } StringRef ComplexKeyCacheDictionary::copyIntoArena(StringRef src, Arena & arena) { char * allocated = arena.alloc(src.size); memcpy(allocated, src.data, src.size); - return { allocated, src.size }; + return {allocated, src.size}; } StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const { - const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); + auto * const res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); memcpy(res, key.data, key.size); - return { res, key.size }; + return {res, key.size}; } bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const { - return (cells[idx].key == StringRef{} && (idx != zero_cell_idx - || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); + return (cells[idx].key == StringRef{} && (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); } BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const { std::vector keys; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - for (auto idx : ext::range(0, cells.size())) if (!isEmptyCell(idx) && !cells[idx].isDefault()) @@ -404,4 +394,4 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & return std::make_shared(shared_from_this(), max_block_size, keys, column_names); } -} +} // namespace DB diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index feb61261f1d..19fe5214fef 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -14,12 +14,6 @@ #pragma once -#include -#include -#include -#include -#include -#include #include #include #include @@ -29,24 +23,17 @@ #include #include #include + +#include +#include #include #include #include +#include #include - - -namespace ProfileEvents -{ -extern const Event DictCacheKeysRequested; -extern const Event DictCacheKeysRequestedMiss; -extern const Event DictCacheKeysRequestedFound; -extern const Event DictCacheKeysExpired; -extern const Event DictCacheKeysNotFound; -extern const Event DictCacheKeysHit; -extern const Event DictCacheRequestTimeNs; -extern const Event DictCacheLockWriteNs; -extern const Event DictCacheLockReadNs; -} +#include +#include +#include namespace DB { @@ -54,10 +41,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { public: ComplexKeyCacheDictionary(const std::string & name, - const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, - const size_t size); + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + const size_t size); ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other); @@ -144,9 +131,12 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// In all functions below, key_columns must be full (non-constant) columns. /// See the requirement in IDataType.h for text-serialization functions. -#define DECLARE(TYPE) \ - void get##TYPE( \ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -164,10 +154,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const PaddedPODArray & def, \ - PaddedPODArray & out) const; + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -182,17 +172,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #undef DECLARE void getString(const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const ColumnString * const def, - ColumnString * const out) const; + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const TYPE def, \ - PaddedPODArray & out) const; + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + PaddedPODArray & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -207,10 +197,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase #undef DECLARE void getString(const std::string & attribute_name, - const Columns & key_columns, - const DataTypes & key_types, - const String & def, - ColumnString * const out) const; + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; @@ -263,17 +253,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase AttributeUnderlyingType type; std::tuple null_values; std::tuple, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType> + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> arrays; }; @@ -283,7 +273,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template void getItemsNumber( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + Attribute & attribute, + const Columns & key_columns, + PaddedPODArray & out, + DefaultGetter && get_default) const { if (false) { @@ -308,7 +301,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template void getItemsNumberImpl( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + Attribute & attribute, + const Columns & key_columns, + PaddedPODArray & out, + DefaultGetter && get_default) const { /// Mapping: -> { all indices `i` of `key_columns` such that `key_columns[i]` = } MapType> outdated_keys; @@ -322,8 +318,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) @@ -354,9 +348,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -365,19 +356,21 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase std::vector required_rows(outdated_keys.size()); std::transform( - std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); + std::begin(outdated_keys), + std::end(outdated_keys), + std::begin(required_rows), + [](auto & pair) { return pair.getMapped().front(); }); /// request new values - update(key_columns, + update( + key_columns, keys_array, required_rows, - [&](const StringRef key, const size_t cell_idx) - { + [&](const StringRef key, const size_t cell_idx) { for (const auto row : outdated_keys[key]) out[row] = static_cast(attribute_array[cell_idx]); }, - [&](const StringRef key, const size_t) - { + [&](const StringRef key, const size_t) { for (const auto row : outdated_keys[key]) out[row] = get_default(row); }); @@ -400,8 +393,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// perform optimistic version, fallback to pessimistic if failed { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows_num)) @@ -446,8 +437,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase size_t total_length = 0; size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, rows_num)) { @@ -477,9 +466,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } } } - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); query_count.fetch_add(rows_num, std::memory_order_relaxed); hit_count.fetch_add(rows_num - outdated_keys.size(), std::memory_order_release); @@ -488,16 +474,15 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase if (!outdated_keys.empty()) { std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) - { + std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); - update(key_columns, + update( + key_columns, keys_array, required_rows, - [&](const StringRef key, const size_t cell_idx) - { + [&](const StringRef key, const size_t cell_idx) { const StringRef attribute_value = attribute_array[cell_idx]; /// We must copy key and value to own memory, because it may be replaced with another @@ -508,8 +493,7 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase map[copied_key] = copied_value; total_length += (attribute_value.size + 1) * outdated_keys[key].size(); }, - [&](const StringRef key, const size_t) - { + [&](const StringRef key, const size_t) { for (const auto row : outdated_keys[key]) total_length += get_default(row).size + 1; }); @@ -521,17 +505,17 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { const StringRef key = keys_array[row]; const auto it = map.find(key); - const auto string_ref = it != std::end(map) ? it->getMapped(): get_default(row); + const auto string_ref = it != std::end(map) ? it->getMapped() : get_default(row); out->insertData(string_ref.data, string_ref.size); } }; template void update(const Columns & in_key_columns, - const PODArray & in_keys, - const std::vector & in_requested_rows, - PresentKeyHandler && on_cell_updated, - AbsentKeyHandler && on_key_not_found) const + const PODArray & in_keys, + const std::vector & in_requested_rows, + PresentKeyHandler && on_cell_updated, + AbsentKeyHandler && on_key_not_found) const { MapType remaining_keys{in_requested_rows.size()}; for (const auto row : in_requested_rows) @@ -539,7 +523,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase std::uniform_int_distribution distribution(dict_lifetime.min_sec, dict_lifetime.max_sec); - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; { Stopwatch watch; auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows); @@ -555,10 +538,11 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase { /// cache column pointers const auto key_columns = ext::map( - ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); + ext::range(0, keys_size), + [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); const auto attribute_columns = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); + [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); const auto rows_num = block.rows(); @@ -612,9 +596,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase } stream->readSuffix(); - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, in_requested_rows.size()); - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); } size_t found_num = 0; @@ -671,9 +652,6 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase /// inform caller that the cell has not been found on_key_not_found(key, cell_idx); } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); }; UInt64 getCellIdx(const StringRef key) const; @@ -690,10 +668,10 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase template static StringRef placeKeysInPool(const size_t row, - const Columns & key_columns, - StringRefs & keys, - const std::vector & key_attributes, - Arena & pool); + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); StringRef placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const; @@ -752,4 +730,4 @@ class ComplexKeyCacheDictionary final : public IDictionaryBase const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; -} +} // namespace DB diff --git a/dbms/src/Encryption/WriteBufferFromFileProvider.cpp b/dbms/src/Encryption/WriteBufferFromFileProvider.cpp index 4c99b8e24b1..a17dd85d379 100644 --- a/dbms/src/Encryption/WriteBufferFromFileProvider.cpp +++ b/dbms/src/Encryption/WriteBufferFromFileProvider.cpp @@ -19,7 +19,6 @@ namespace ProfileEvents { extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; } // namespace ProfileEvents @@ -72,8 +71,7 @@ void WriteBufferFromFileProvider::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); // NOLINT } if (res > 0) diff --git a/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp b/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp index b76d58c20cd..1858d474d60 100644 --- a/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp +++ b/dbms/src/Encryption/createReadBufferFromFileBaseByFileProvider.cpp @@ -20,10 +20,6 @@ #endif #include #include -namespace ProfileEvents -{ -extern const Event CreatedReadBufferOrdinary; -} namespace DB { @@ -46,7 +42,6 @@ std::unique_ptr createReadBufferFromFileBaseByFileProvid { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); return std::make_unique( file_provider, filename_, @@ -75,7 +70,6 @@ createReadBufferFromFileBaseByFileProvider( size_t checksum_frame_size, int flags_) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); auto file = file_provider->newRandomAccessFile(filename_, encryption_path_, read_limiter, flags_); auto allocation_size = std::min(estimated_size, checksum_frame_size); switch (checksum_algorithm) diff --git a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp index 2f1a2cbaeb8..5e8a6940598 100644 --- a/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp +++ b/dbms/src/Encryption/createWriteBufferFromFileBaseByFileProvider.cpp @@ -20,11 +20,6 @@ #include #include -namespace ProfileEvents -{ -extern const Event CreatedWriteBufferOrdinary; -} - namespace DB { namespace ErrorCodes @@ -49,7 +44,6 @@ createWriteBufferFromFileBaseByFileProvider( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); return std::make_unique( file_provider, filename_, @@ -81,7 +75,6 @@ createWriteBufferFromFileBaseByFileProvider( int flags_, mode_t mode) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); auto file_ptr = file_provider->newWritableFile(filename_, encryption_path_, true, create_new_encryption_info_, write_limiter_, flags_, mode); switch (checksum_algorithm) diff --git a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp index a1c6061948a..1609c83b029 100644 --- a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp +++ b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -41,7 +40,7 @@ extern const int NOT_IMPLEMENTED; const IColumn * getNestedCol(const IColumn * flash_col) { if (flash_col->isColumnNullable()) - return dynamic_cast(flash_col)->getNestedColumnPtr().get(); + return static_cast(flash_col)->getNestedColumnPtr().get(); else return flash_col; } @@ -75,8 +74,8 @@ bool flashDecimalColToArrowColInternal( const IColumn * nested_col = getNestedCol(flash_col_untyped); if (checkColumn>(nested_col) && checkDataType>(data_type)) { - const ColumnDecimal * flash_col = checkAndGetColumn>(nested_col); - const DataTypeDecimal * type = checkAndGetDataType>(data_type); + const auto * flash_col = checkAndGetColumn>(nested_col); + const auto * type = checkAndGetDataType>(data_type); UInt32 scale = type->getScale(); for (size_t i = start_index; i < end_index; i++) { @@ -92,8 +91,8 @@ bool flashDecimalColToArrowColInternal( std::vector digits; digits.reserve(type->getPrec()); decimalToVector(dec.value, digits, scale); - TiDBDecimal tiDecimal(scale, digits, dec.value < 0); - dag_column.append(tiDecimal); + TiDBDecimal ti_decimal(scale, digits, dec.value < 0); + dag_column.append(ti_decimal); } return true; } @@ -121,7 +120,7 @@ template bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { constexpr bool is_unsigned = std::is_unsigned_v; for (size_t i = start_index; i < end_index; i++) @@ -135,9 +134,9 @@ bool flashIntegerColToArrowColInternal(TiDBColumn & dag_column, const IColumn * } } if constexpr (is_unsigned) - dag_column.append((UInt64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); else - dag_column.append((Int64)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return true; } @@ -148,7 +147,7 @@ template void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col_untyped, size_t start_index, size_t end_index) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - if (const ColumnVector * flash_col = checkAndGetColumn>(nested_col)) + if (const auto * flash_col = checkAndGetColumn>(nested_col)) { for (size_t i = start_index; i < end_index; i++) { @@ -160,7 +159,7 @@ void flashDoubleColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col continue; } } - dag_column.append((T)flash_col->getElement(i)); + dag_column.append(static_cast(flash_col->getElement(i))); } return; } @@ -196,7 +195,7 @@ void flashDateOrDateTimeColToArrowCol( { const IColumn * nested_col = getNestedCol(flash_col_untyped); using DateFieldType = DataTypeMyTimeBase::FieldType; - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -217,7 +216,7 @@ void flashStringColToArrowCol(TiDBColumn & dag_column, const IColumn * flash_col { const IColumn * nested_col = getNestedCol(flash_col_untyped); // columnFixedString is not used so do not check it - auto * flash_col = checkAndGetColumn(nested_col); + const auto * flash_col = checkAndGetColumn(nested_col); for (size_t i = start_index; i < end_index; i++) { // todo check if we can convert flash_col to DAG col directly since the internal representation is almost the same @@ -242,7 +241,7 @@ void flashBitColToArrowCol( const tipb::FieldType & field_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); for (size_t i = start_index; i < end_index; i++) { if constexpr (is_nullable) @@ -267,7 +266,7 @@ void flashEnumColToArrowCol( const IDataType * data_type) { const IColumn * nested_col = getNestedCol(flash_col_untyped); - auto * flash_col = checkAndGetColumn>(nested_col); + const auto * flash_col = checkAndGetColumn>(nested_col); const auto * enum_type = checkAndGetDataType(data_type); size_t enum_value_size = enum_type->getValues().size(); for (size_t i = start_index; i < end_index; i++) @@ -280,10 +279,10 @@ void flashEnumColToArrowCol( continue; } } - auto enum_value = (UInt64)flash_col->getElement(i); + auto enum_value = static_cast(flash_col->getElement(i)); if (enum_value == 0 || enum_value > enum_value_size) throw TiFlashException("number of enum overflow enum boundary", Errors::Coprocessor::Internal); - TiDBEnum ti_enum(enum_value, enum_type->getNameForValue((const DataTypeEnum16::FieldType)enum_value)); + TiDBEnum ti_enum(enum_value, enum_type->getNameForValue(static_cast(enum_value))); dag_column.append(ti_enum); } } @@ -300,7 +299,7 @@ void flashColToArrowCol(TiDBColumn & dag_column, const ColumnWithTypeAndName & f throw TiFlashException("Flash column and TiDB column has different not null flag", Errors::Coprocessor::Internal); } if (type->isNullable()) - type = dynamic_cast(type)->getNestedType().get(); + type = static_cast(type)->getNestedType().get(); switch (tidb_column_info.tp) { @@ -457,7 +456,7 @@ const char * arrowEnumColToFlashCol( { if (checkNull(i, null_count, null_bitmap, col)) continue; - const auto enum_value = (Int64)toLittleEndian(*(reinterpret_cast(pos + offsets[i]))); + const auto enum_value = static_cast(toLittleEndian(*(reinterpret_cast(pos + offsets[i])))); col.column->assumeMutable()->insert(Field(enum_value)); } return pos + offsets[length]; @@ -479,11 +478,11 @@ const char * arrowBitColToFlashCol( continue; const String value = String(pos + offsets[i], pos + offsets[i + 1]); if (value.length() == 0) - col.column->assumeMutable()->insert(Field(UInt64(0))); + col.column->assumeMutable()->insert(Field(static_cast(0))); UInt64 result = 0; - for (auto & c : value) + for (const auto & c : value) { - result = (result << 8u) | (UInt8)c; + result = (result << 8u) | static_cast(c); } col.column->assumeMutable()->insert(Field(result)); } @@ -500,7 +499,7 @@ T toCHDecimal(UInt8 digits_int, UInt8 digits_frac, bool negative, const Int32 * UInt8 tailing_digit = digits_frac % DIGITS_PER_WORD; typename T::NativeType value = 0; - const int word_max = int(1e9); + const int word_max = static_cast(1e9); for (int i = 0; i < word_int; i++) { value = value * word_max + word_buf[i]; @@ -552,28 +551,28 @@ const char * arrowDecimalColToFlashCol( pos += 1; Int32 word_buf[MAX_WORD_BUF_LEN]; const DataTypePtr decimal_type - = col.type->isNullable() ? dynamic_cast(col.type.get())->getNestedType() : col.type; - for (int j = 0; j < MAX_WORD_BUF_LEN; j++) + = col.type->isNullable() ? static_cast(col.type.get())->getNestedType() : col.type; + for (int & j : word_buf) { - word_buf[j] = toLittleEndian(*(reinterpret_cast(pos))); + j = toLittleEndian(*(reinterpret_cast(pos))); pos += 4; } - if (auto * type32 = checkDecimal(*decimal_type)) + if (const auto * type32 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type32->getScale())); } - else if (auto * type64 = checkDecimal(*decimal_type)) + else if (const auto * type64 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type64->getScale())); } - else if (auto * type128 = checkDecimal(*decimal_type)) + else if (const auto * type128 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type128->getScale())); } - else if (auto * type256 = checkDecimal(*decimal_type)) + else if (const auto * type256 = checkDecimal(*decimal_type)) { auto res = toCHDecimal(digits_int, digits_frac, negative, word_buf); col.column->assumeMutable()->insert(DecimalField(res, type256->getScale())); @@ -600,13 +599,13 @@ const char * arrowDateColToFlashCol( continue; } UInt64 chunk_time = toLittleEndian(*(reinterpret_cast(pos))); - UInt16 year = (UInt16)((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); - UInt8 month = (UInt8)((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); - UInt8 day = (UInt8)((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); - UInt16 hour = (UInt16)((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); - UInt8 minute = (UInt8)((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); - UInt8 second = (UInt8)((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); - UInt32 micro_second = (UInt32)((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); + auto year = static_cast((chunk_time & MyTimeBase::YEAR_BIT_FIELD_MASK) >> MyTimeBase::YEAR_BIT_FIELD_OFFSET); + auto month = static_cast((chunk_time & MyTimeBase::MONTH_BIT_FIELD_MASK) >> MyTimeBase::MONTH_BIT_FIELD_OFFSET); + auto day = static_cast((chunk_time & MyTimeBase::DAY_BIT_FIELD_MASK) >> MyTimeBase::DAY_BIT_FIELD_OFFSET); + auto hour = static_cast((chunk_time & MyTimeBase::HOUR_BIT_FIELD_MASK) >> MyTimeBase::HOUR_BIT_FIELD_OFFSET); + auto minute = static_cast((chunk_time & MyTimeBase::MINUTE_BIT_FIELD_MASK) >> MyTimeBase::MINUTE_BIT_FIELD_OFFSET); + auto second = static_cast((chunk_time & MyTimeBase::SECOND_BIT_FIELD_MASK) >> MyTimeBase::SECOND_BIT_FIELD_OFFSET); + auto micro_second = static_cast((chunk_time & MyTimeBase::MICROSECOND_BIT_FIELD_MASK) >> MyTimeBase::MICROSECOND_BIT_FIELD_OFFSET); MyDateTime mt(year, month, day, hour, minute, second, micro_second); pos += field_length; col.column->assumeMutable()->insert(Field(mt.toPackedUInt())); @@ -659,7 +658,7 @@ const char * arrowNumColToFlashCol( case TiDB::TypeFloat: u32 = toLittleEndian(*(reinterpret_cast(pos))); std::memcpy(&f32, &u32, sizeof(Float32)); - col.column->assumeMutable()->insert(Field((Float64)f32)); + col.column->assumeMutable()->insert(Field(static_cast(f32))); break; case TiDB::TypeDouble: u64 = toLittleEndian(*(reinterpret_cast(pos))); diff --git a/dbms/src/Flash/Coprocessor/CoprocessorReader.h b/dbms/src/Flash/Coprocessor/CoprocessorReader.h index 25c07cff49c..b48fdbcd6dc 100644 --- a/dbms/src/Flash/Coprocessor/CoprocessorReader.h +++ b/dbms/src/Flash/Coprocessor/CoprocessorReader.h @@ -139,7 +139,8 @@ class CoprocessorReader return detail; } - CoprocessorReaderResult nextResult(std::queue & block_queue, const Block & header) + // stream_id is only meaningful for ExchagneReceiver. + CoprocessorReaderResult nextResult(std::queue & block_queue, const Block & header, size_t /*stream_id*/) { auto && [result, has_next] = resp_iter.next(); if (!result.error.empty()) diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index a17eaf53b64..1cf7a0d6c87 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -30,6 +30,8 @@ extern const int DIVIDED_BY_ZERO; extern const int INVALID_TIME; } // namespace ErrorCodes +const String enableFineGrainedShuffleExtraInfo = "enable fine grained shuffle"; + bool strictSqlMode(UInt64 sql_mode) { return sql_mode & TiDBSQLMode::STRICT_ALL_TABLES || sql_mode & TiDBSQLMode::STRICT_TRANS_TABLES; @@ -210,60 +212,21 @@ void DAGContext::attachBlockIO(const BlockIO & io_) { io = io_; } -void DAGContext::initExchangeReceiverIfMPP(Context & context, size_t max_streams) -{ - if (isMPPTask()) - { - if (mpp_exchange_receiver_map_inited) - throw TiFlashException("Repeatedly initialize mpp_exchange_receiver_map", Errors::Coprocessor::Internal); - traverseExecutors(dag_request, [&](const tipb::Executor & executor) { - if (executor.tp() == tipb::ExecType::TypeExchangeReceiver) - { - assert(executor.has_executor_id()); - const auto & executor_id = executor.executor_id(); - // In order to distinguish different exchange receivers. - auto exchange_receiver = std::make_shared( - std::make_shared( - executor.exchange_receiver(), - getMPPTaskMeta(), - context.getTMTContext().getKVCluster(), - context.getTMTContext().getMPPTaskManager(), - context.getSettingsRef().enable_local_tunnel, - context.getSettingsRef().enable_async_grpc_client), - executor.exchange_receiver().encoded_task_meta_size(), - max_streams, - log->identifier(), - executor_id); - mpp_exchange_receiver_map[executor_id] = exchange_receiver; - new_thread_count_of_exchange_receiver += exchange_receiver->computeNewThreadCount(); - } - return true; - }); - mpp_exchange_receiver_map_inited = true; - } -} - -const std::unordered_map> & DAGContext::getMPPExchangeReceiverMap() const +ExchangeReceiverPtr DAGContext::getMPPExchangeReceiver(const String & executor_id) const { if (!isMPPTask()) throw TiFlashException("mpp_exchange_receiver_map is used in mpp only", Errors::Coprocessor::Internal); - if (!mpp_exchange_receiver_map_inited) - throw TiFlashException("mpp_exchange_receiver_map has not been initialized", Errors::Coprocessor::Internal); - return mpp_exchange_receiver_map; -} - -void DAGContext::cancelAllExchangeReceiver() -{ - for (auto & it : mpp_exchange_receiver_map) - { - it.second->cancel(); - } + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->getExchangeReceiver(executor_id); } -int DAGContext::getNewThreadCountOfExchangeReceiver() const +void DAGContext::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) { - return new_thread_count_of_exchange_receiver; + if (!isMPPTask()) + return; + RUNTIME_ASSERT(mpp_receiver_set != nullptr, log, "MPPTask without receiver set"); + return mpp_receiver_set->addCoprocessorReader(coprocessor_reader); } bool DAGContext::containsRegionsInfoForTable(Int64 table_id) const diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 8d84a7c6add..7bfc67afcad 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -37,6 +37,13 @@ namespace DB class Context; class MPPTunnelSet; class ExchangeReceiver; +using ExchangeReceiverPtr = std::shared_ptr; +/// key: executor_id of ExchangeReceiver nodes in dag. +using ExchangeReceiverMap = std::unordered_map; +class MPPReceiverSet; +using MPPReceiverSetPtr = std::shared_ptr; +class CoprocessorReader; +using CoprocessorReaderPtr = std::shared_ptr; class Join; using JoinPtr = std::shared_ptr; @@ -109,6 +116,13 @@ constexpr UInt64 NO_ENGINE_SUBSTITUTION = 1ul << 30ul; constexpr UInt64 ALLOW_INVALID_DATES = 1ul << 32ul; } // namespace TiDBSQLMode +inline bool enableFineGrainedShuffle(uint64_t stream_count) +{ + return stream_count > 0; +} + +extern const String enableFineGrainedShuffleExtraInfo; + /// A context used to track the information that needs to be passed around during DAG planning. class DAGContext { @@ -254,7 +268,6 @@ class DAGContext return io; } - int getNewThreadCountOfExchangeReceiver() const; UInt64 getFlags() const { return flags; @@ -305,10 +318,12 @@ class DAGContext bool columnsForTestEmpty() { return columns_for_test_map.empty(); } - void cancelAllExchangeReceiver(); - - void initExchangeReceiverIfMPP(Context & context, size_t max_streams); - const std::unordered_map> & getMPPExchangeReceiverMap() const; + ExchangeReceiverPtr getMPPExchangeReceiver(const String & executor_id) const; + void setMPPReceiverSet(const MPPReceiverSetPtr & receiver_set) + { + mpp_receiver_set = receiver_set; + } + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); void addSubquery(const String & subquery_id, SubqueryForSet && subquery); bool hasSubquery() const { return !subqueries.empty(); } @@ -343,6 +358,10 @@ class DAGContext std::vector output_field_types; std::vector output_offsets; + /// Hold the order of list based executors. + /// It is used to ensure that the order of Execution summary of list based executors is the same as the order of list based executors. + std::vector list_based_executors_order; + private: void initExecutorIdToJoinIdMap(); void initOutputInfo(); @@ -350,7 +369,7 @@ class DAGContext private: /// Hold io for correcting the destruction order. BlockIO io; - /// profile_streams_map is a map that maps from executor_id to profile BlockInputStreams + /// profile_streams_map is a map that maps from executor_id to profile BlockInputStreams. std::unordered_map profile_streams_map; /// executor_id_to_join_id_map is a map that maps executor id to all the join executor id of itself and all its children. std::unordered_map> executor_id_to_join_id_map; @@ -369,10 +388,8 @@ class DAGContext ConcurrentBoundedQueue warnings; /// warning_count is the actual warning count during the entire execution std::atomic warning_count; - int new_thread_count_of_exchange_receiver = 0; - /// key: executor_id of ExchangeReceiver nodes in dag. - std::unordered_map> mpp_exchange_receiver_map; - bool mpp_exchange_receiver_map_inited = false; + + MPPReceiverSetPtr mpp_receiver_set; /// vector of SubqueriesForSets(such as join build subquery). /// The order of the vector is also the order of the subquery. std::vector subqueries; diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 55a2024a8bc..9fe388f8fe4 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -72,6 +72,7 @@ DAGDriver::DAGDriver( ::grpc::ServerWriter<::coprocessor::BatchResponse> * writer_, bool internal_) : context(context_) + , dag_response(nullptr) , writer(writer_) , internal(internal_) , log(&Poco::Logger::get("DAGDriver")) @@ -129,7 +130,7 @@ try auto streaming_writer = std::make_shared(writer); TiDB::TiDBCollators collators; - std::unique_ptr response_writer = std::make_unique>( + std::unique_ptr response_writer = std::make_unique>( streaming_writer, std::vector(), collators, @@ -137,7 +138,9 @@ try context.getSettingsRef().dag_records_per_chunk, context.getSettingsRef().batch_send_min_limit, true, - dag_context); + dag_context, + /*fine_grained_shuffle_stream_count=*/0, + /*fine_grained_shuffle_batch_size=*/0); dag_output_stream = std::make_shared(streams.in->getHeader(), std::move(response_writer)); copyData(*streams.in, *dag_output_stream); } diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 9b765f30cc6..5fbd86e9762 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1124,6 +1124,24 @@ std::pair DAGExpressionAnalyzer::isCastRequiredForRootFinalProjec return std::make_pair(need_append_type_cast, std::move(need_append_type_cast_vec)); } +NamesWithAliases DAGExpressionAnalyzer::appendFinalProjectForRootQueryBlock( + ExpressionActionsChain & chain, + const std::vector & schema, + const std::vector & output_offsets, + const String & column_prefix, + bool keep_session_timezone_info) +{ + auto & step = initAndGetLastStep(chain); + + NamesWithAliases final_project = buildFinalProjection(step.actions, schema, output_offsets, column_prefix, keep_session_timezone_info); + + for (const auto & name : final_project) + { + step.required_output.push_back(name.first); + } + return final_project; +} + NamesWithAliases DAGExpressionAnalyzer::buildFinalProjection( const ExpressionActionsPtr & actions, const std::vector & schema, @@ -1148,24 +1166,6 @@ NamesWithAliases DAGExpressionAnalyzer::buildFinalProjection( return genRootFinalProjectAliases(column_prefix, output_offsets); } -NamesWithAliases DAGExpressionAnalyzer::appendFinalProjectForRootQueryBlock( - ExpressionActionsChain & chain, - const std::vector & schema, - const std::vector & output_offsets, - const String & column_prefix, - bool keep_session_timezone_info) -{ - auto & step = initAndGetLastStep(chain); - - NamesWithAliases final_project = buildFinalProjection(step.actions, schema, output_offsets, column_prefix, keep_session_timezone_info); - - for (const auto & name : final_project) - { - step.required_output.push_back(name.first); - } - return final_project; -} - String DAGExpressionAnalyzer::alignReturnType( const tipb::Expr & expr, const ExpressionActionsPtr & actions, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 7506047b34f..c42312b95c3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -166,7 +166,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable void appendCastAfterWindow( const ExpressionActionsPtr & actions, const tipb::Window & window, - const size_t window_columns_start_index); + size_t window_columns_start_index); NamesAndTypes buildOrderColumns( const ExpressionActionsPtr & actions, @@ -199,6 +199,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable #ifndef DBMS_PUBLIC_GTEST private: #endif + String buildTupleFunctionForGroupConcat( const tipb::Expr & expr, SortDescription & sort_desc, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index ee529680d28..23bbb4586b3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -450,6 +450,7 @@ DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::fun {"bitOr", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"bitXor", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"bitNot", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, + {"bitShiftRight", DAGExpressionAnalyzerHelper::buildBitwiseFunction}, {"leftUTF8", DAGExpressionAnalyzerHelper::buildLeftUTF8Function}, {"date_add", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, {"date_sub", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 81fe2c8f713..d67a8f6ec52 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -346,14 +346,26 @@ void DAGQueryBlockInterpreter::executeWhere(DAGPipeline & pipeline, const Expres void DAGQueryBlockInterpreter::executeWindow( DAGPipeline & pipeline, - WindowDescription & window_description) + WindowDescription & window_description, + bool enable_fine_grained_shuffle) { executeExpression(pipeline, window_description.before_window, log, "before window"); - /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log, false, "merge into one for window input"); - assert(pipeline.streams.size() == 1); - pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); + if (enable_fine_grained_shuffle) + { + /// Window function can be multiple threaded when fine grained shuffle is enabled. + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, window_description, log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + } + else + { + /// If there are several streams, we merge them into one. + executeUnion(pipeline, max_streams, log, false, "merge into one for window input"); + assert(pipeline.streams.size() == 1); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); + } } void DAGQueryBlockInterpreter::executeAggregation( @@ -379,34 +391,39 @@ void DAGQueryBlockInterpreter::executeAggregation( is_final_agg); /// If there are several sources, then we perform parallel aggregation - if (pipeline.streams.size() > 1) + if (pipeline.streams.size() > 1 || pipeline.streams_with_non_joined_data.size() > 1) { const Settings & settings = context.getSettingsRef(); - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); - pipeline.firstStream() = std::make_shared( + BlockInputStreamPtr stream = std::make_shared( pipeline.streams, - stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, params, context.getFileProvider(), true, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads), log->identifier()); + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + // should record for agg before restore concurrency. See #3804. recordProfileStreams(pipeline, query_block.aggregation_name); restorePipelineConcurrency(pipeline); } else { - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); BlockInputStreams inputs; if (!pipeline.streams.empty()) inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - if (stream_with_non_joined_data) - inputs.push_back(stream_with_non_joined_data); + + if (!pipeline.streams_with_non_joined_data.empty()) + inputs.push_back(pipeline.streams_with_non_joined_data.at(0)); + + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::make_shared( std::make_shared(inputs, log->identifier()), params, @@ -417,15 +434,15 @@ void DAGQueryBlockInterpreter::executeAggregation( } } -void DAGQueryBlockInterpreter::executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc) +void DAGQueryBlockInterpreter::executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle) { - orderStreams(pipeline, max_streams, sort_desc, 0, context, log); + orderStreams(pipeline, max_streams, sort_desc, 0, enable_fine_grained_shuffle, context, log); } void DAGQueryBlockInterpreter::executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns) { Int64 limit = query_block.limit_or_topn->topn().limit(); - orderStreams(pipeline, max_streams, getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()), limit, context, log); + orderStreams(pipeline, max_streams, getSortDescription(order_columns, query_block.limit_or_topn->topn().order_by()), limit, false, context, log); } void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, const String & key) @@ -436,17 +453,30 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { - auto it = dagContext().getMPPExchangeReceiverMap().find(query_block.source_name); - if (unlikely(it == dagContext().getMPPExchangeReceiverMap().end())) + auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); + if (unlikely(exchange_receiver == nullptr)) throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR); // todo choose a more reasonable stream number auto & exchange_receiver_io_input_streams = dagContext().getInBoundIOInputStreamsMap()[query_block.source_name]; - for (size_t i = 0; i < max_streams; ++i) + + const bool enable_fine_grained_shuffle = enableFineGrainedShuffle(exchange_receiver->getFineGrainedShuffleStreamCount()); + String extra_info = "squashing after exchange receiver"; + size_t stream_count = max_streams; + if (enable_fine_grained_shuffle) + { + extra_info += ", " + enableFineGrainedShuffleExtraInfo; + stream_count = std::min(max_streams, exchange_receiver->getFineGrainedShuffleStreamCount()); + } + + for (size_t i = 0; i < stream_count; ++i) { - BlockInputStreamPtr stream = std::make_shared(it->second, log->identifier(), query_block.source_name); + BlockInputStreamPtr stream = std::make_shared(exchange_receiver, + log->identifier(), + query_block.source_name, + /*stream_id=*/enable_fine_grained_shuffle ? i : 0); exchange_receiver_io_input_streams.push_back(stream); stream = std::make_shared(stream, 8192, 0, log->identifier()); - stream->setExtraInfo("squashing after exchange receiver"); + stream->setExtraInfo(extra_info); pipeline.streams.push_back(stream); } NamesAndTypes source_columns; @@ -508,7 +538,7 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti analyzer = std::make_unique(std::move(output_columns), context); } -void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb::Window & window) +void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb::Window & window, bool enable_fine_grained_shuffle) { NamesAndTypes input_columns; assert(input_streams_vec.size() == 1); @@ -517,13 +547,13 @@ void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb:: input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(input_columns, context); WindowDescription window_description = dag_analyzer.buildWindowDescription(window); - executeWindow(pipeline, window_description); + executeWindow(pipeline, window_description, enable_fine_grained_shuffle); executeExpression(pipeline, window_description.after_window, log, "cast after window"); analyzer = std::make_unique(window_description.after_window_columns, context); } -void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort) +void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort, bool enable_fine_grained_shuffle) { NamesAndTypes input_columns; assert(input_streams_vec.size() == 1); @@ -532,7 +562,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(input_columns, context); auto order_columns = dag_analyzer.buildWindowOrderColumns(window_sort); - executeWindowOrder(pipeline, getSortDescription(order_columns, window_sort.byitems())); + executeWindowOrder(pipeline, getSortDescription(order_columns, window_sort.byitems()), enable_fine_grained_shuffle); analyzer = std::make_unique(std::move(input_columns), context); } @@ -580,13 +610,13 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } else if (query_block.source->tp() == tipb::ExecType::TypeWindow) { - handleWindow(pipeline, query_block.source->window()); + handleWindow(pipeline, query_block.source->window(), enableFineGrainedShuffle(query_block.source->fine_grained_shuffle_stream_count())); recordProfileStreams(pipeline, query_block.source_name); restorePipelineConcurrency(pipeline); } else if (query_block.source->tp() == tipb::ExecType::TypeSort) { - handleWindowOrder(pipeline, query_block.source->sort()); + handleWindowOrder(pipeline, query_block.source->sort(), enableFineGrainedShuffle(query_block.source->fine_grained_shuffle_stream_count())); recordProfileStreams(pipeline, query_block.source_name); } else @@ -692,19 +722,47 @@ void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) std::vector partition_col_ids = ExchangeSenderInterpreterHelper::genPartitionColIds(exchange_sender); TiDB::TiDBCollators partition_col_collators = ExchangeSenderInterpreterHelper::genPartitionColCollators(exchange_sender); int stream_id = 0; - pipeline.transform([&](auto & stream) { - // construct writer - std::unique_ptr response_writer = std::make_unique>( - context.getDAGContext()->tunnel_set, - partition_col_ids, - partition_col_collators, - exchange_sender.tp(), - context.getSettingsRef().dag_records_per_chunk, - context.getSettingsRef().batch_send_min_limit, - stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response - dagContext()); - stream = std::make_shared(stream, std::move(response_writer), log->identifier()); - }); + const uint64_t stream_count = query_block.exchange_sender->fine_grained_shuffle_stream_count(); + const uint64_t batch_size = query_block.exchange_sender->fine_grained_shuffle_batch_size(); + + if (enableFineGrainedShuffle(stream_count)) + { + pipeline.transform([&](auto & stream) { + // construct writer + std::unique_ptr response_writer = std::make_unique>( + context.getDAGContext()->tunnel_set, + partition_col_ids, + partition_col_collators, + exchange_sender.tp(), + context.getSettingsRef().dag_records_per_chunk, + context.getSettingsRef().batch_send_min_limit, + stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response + dagContext(), + stream_count, + batch_size); + stream = std::make_shared(stream, std::move(response_writer), log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + RUNTIME_CHECK(exchange_sender.tp() == tipb::ExchangeType::Hash, Exception, "exchange_sender has to be hash partition when fine grained shuffle is enabled"); + RUNTIME_CHECK(stream_count <= 1024, Exception, "fine_grained_shuffle_stream_count should not be greater than 1024"); + } + else + { + pipeline.transform([&](auto & stream) { + std::unique_ptr response_writer = std::make_unique>( + context.getDAGContext()->tunnel_set, + partition_col_ids, + partition_col_collators, + exchange_sender.tp(), + context.getSettingsRef().dag_records_per_chunk, + context.getSettingsRef().batch_send_min_limit, + stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response + dagContext(), + stream_count, + batch_size); + stream = std::make_shared(stream, std::move(response_writer), log->identifier()); + }); + } } void DAGQueryBlockInterpreter::handleMockExchangeSender(DAGPipeline & pipeline) @@ -732,4 +790,4 @@ BlockInputStreams DAGQueryBlockInterpreter::execute() return pipeline.streams; } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index cabdd4dc9be..c449b37e360 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -64,15 +64,16 @@ class DAGQueryBlockInterpreter void handleExchangeReceiver(DAGPipeline & pipeline); void handleMockExchangeReceiver(DAGPipeline & pipeline); void handleProjection(DAGPipeline & pipeline, const tipb::Projection & projection); - void handleWindow(DAGPipeline & pipeline, const tipb::Window & window); - void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort); + void handleWindow(DAGPipeline & pipeline, const tipb::Window & window, bool enable_fine_grained_shuffle); + void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort, bool enable_fine_grained_shuffle); void executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column, const String & extra_info = ""); - void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc); + void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); void executeLimit(DAGPipeline & pipeline); void executeWindow( DAGPipeline & pipeline, - WindowDescription & window_description); + WindowDescription & window_description, + bool enable_fine_grained_shuffle); void executeAggregation( DAGPipeline & pipeline, const ExpressionActionsPtr & expression_actions_ptr, diff --git a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp index 882699e1599..d68a7b17aaa 100644 --- a/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQuerySource.cpp @@ -20,6 +20,26 @@ namespace DB { +namespace +{ +void fillOrderForListBasedExecutors(DAGContext & dag_context, const DAGQueryBlock & query_block) +{ + assert(query_block.source); + auto & list_based_executors_order = dag_context.list_based_executors_order; + list_based_executors_order.push_back(query_block.source_name); + if (query_block.selection) + list_based_executors_order.push_back(query_block.selection_name); + if (query_block.aggregation) + list_based_executors_order.push_back(query_block.aggregation_name); + if (query_block.having) + list_based_executors_order.push_back(query_block.having_name); + if (query_block.limit_or_topn) + list_based_executors_order.push_back(query_block.limit_or_topn_name); + if (query_block.exchange_sender) + dag_context.list_based_executors_order.push_back(query_block.exchange_sender_name); +} +} // namespace + DAGQuerySource::DAGQuerySource(Context & context_) : context(context_) { @@ -32,6 +52,9 @@ DAGQuerySource::DAGQuerySource(Context & context_) else { root_query_block = std::make_shared(1, dag_request.executors()); + auto & dag_context = getDAGContext(); + if (!dag_context.return_executor_id) + fillOrderForListBasedExecutors(dag_context, *root_query_block); } } diff --git a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp index 53bebc91da8..33f6d99f9d8 100644 --- a/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGResponseWriter.cpp @@ -89,12 +89,10 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo } } - /// add execution_summary for local executor - for (auto & p : dag_context.getProfileStreamsMap()) - { + auto fill_execution_summary = [&](const String & executor_id, const BlockInputStreams & streams) { ExecutionSummary current; /// part 1: local execution info - for (auto & stream_ptr : p.second) + for (const auto & stream_ptr : streams) { if (auto * p_stream = dynamic_cast(stream_ptr.get())) { @@ -105,16 +103,16 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo current.concurrency++; } /// part 2: remote execution info - if (merged_remote_execution_summaries.find(p.first) != merged_remote_execution_summaries.end()) + if (merged_remote_execution_summaries.find(executor_id) != merged_remote_execution_summaries.end()) { - for (auto & remote : merged_remote_execution_summaries[p.first]) + for (auto & remote : merged_remote_execution_summaries[executor_id]) current.merge(remote, false); } /// part 3: for join need to add the build time /// In TiFlash, a hash join's build side is finished before probe side starts, /// so the join probe side's running time does not include hash table's build time, /// when construct ExecSummaries, we need add the build cost to probe executor - auto all_join_id_it = dag_context.getExecutorIdToJoinIdMap().find(p.first); + auto all_join_id_it = dag_context.getExecutorIdToJoinIdMap().find(executor_id); if (all_join_id_it != dag_context.getExecutorIdToJoinIdMap().end()) { for (const auto & join_executor_id : all_join_id_it->second) @@ -138,8 +136,27 @@ void DAGResponseWriter::addExecuteSummaries(tipb::SelectResponse & response, boo } current.time_processed_ns += dag_context.compile_time_ns; - fillTiExecutionSummary(response.add_execution_summaries(), current, p.first, delta_mode); + fillTiExecutionSummary(response.add_execution_summaries(), current, executor_id, delta_mode); + }; + + /// add execution_summary for local executor + if (dag_context.return_executor_id) + { + for (auto & p : dag_context.getProfileStreamsMap()) + fill_execution_summary(p.first, p.second); + } + else + { + const auto & profile_streams_map = dag_context.getProfileStreamsMap(); + assert(profile_streams_map.size() == dag_context.list_based_executors_order.size()); + for (const auto & executor_id : dag_context.list_based_executors_order) + { + auto it = profile_streams_map.find(executor_id); + assert(it != profile_streams_map.end()); + fill_execution_summary(executor_id, it->second); + } } + for (auto & p : merged_remote_execution_summaries) { if (local_executors.find(p.first) == local_executors.end()) diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index df7e504d2c4..390ce7b9948 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -485,7 +486,8 @@ void DAGStorageInterpreter::buildRemoteStreams(std::vector && rem std::vector tasks(all_tasks.begin() + task_start, all_tasks.begin() + task_end); auto coprocessor_reader = std::make_shared(schema, cluster, tasks, has_enforce_encode_type, 1); - BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID()); + context.getDAGContext()->addCoprocessorReader(coprocessor_reader); + BlockInputStreamPtr input = std::make_shared(coprocessor_reader, log->identifier(), table_scan.getTableScanExecutorID(), /*stream_id=*/0); pipeline.streams.push_back(input); task_start = task_end; } @@ -634,6 +636,9 @@ void DAGStorageInterpreter::buildLocalStreams(DAGPipeline & pipeline, size_t max if (total_local_region_num == 0) return; const auto table_query_infos = generateSelectQueryInfos(); + bool has_multiple_partitions = table_query_infos.size() > 1; + // MultiPartitionStreamPool will be disabled in no partition mode or single-partition case + std::shared_ptr stream_pool = has_multiple_partitions ? std::make_shared() : nullptr; for (const auto & table_query_info : table_query_infos) { DAGPipeline current_pipeline; @@ -642,9 +647,6 @@ void DAGStorageInterpreter::buildLocalStreams(DAGPipeline & pipeline, size_t max size_t region_num = query_info.mvcc_query_info->regions_query_info.size(); if (region_num == 0) continue; - /// calculate weighted max_streams for each partition, note at least 1 stream is needed for each partition - size_t current_max_streams = table_query_infos.size() == 1 ? max_streams : (max_streams * region_num + total_local_region_num - 1) / total_local_region_num; - QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; assert(storages_with_structure_lock.find(table_id) != storages_with_structure_lock.end()); auto & storage = storages_with_structure_lock[table_id].storage; @@ -654,7 +656,7 @@ void DAGStorageInterpreter::buildLocalStreams(DAGPipeline & pipeline, size_t max { try { - current_pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, current_max_streams); + current_pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); // After getting streams from storage, we need to validate whether Regions have changed or not after learner read. // (by calling `validateQueryInfo`). In case the key ranges of Regions have changed (Region merge/split), those `streams` @@ -778,7 +780,19 @@ void DAGStorageInterpreter::buildLocalStreams(DAGPipeline & pipeline, size_t max throw; } } - pipeline.streams.insert(pipeline.streams.end(), current_pipeline.streams.begin(), current_pipeline.streams.end()); + if (has_multiple_partitions) + stream_pool->addPartitionStreams(current_pipeline.streams); + else + pipeline.streams.insert(pipeline.streams.end(), current_pipeline.streams.begin(), current_pipeline.streams.end()); + } + if (has_multiple_partitions) + { + String req_info = dag_context.isMPPTask() ? dag_context.getMPPTaskId().toString() : ""; + int exposed_streams_cnt = std::min(static_cast(max_streams), stream_pool->addedStreamsCnt()); + for (int i = 0; i < exposed_streams_cnt; ++i) + { + pipeline.streams.push_back(std::make_shared(stream_pool, req_info)); + } } } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 87f58131c8c..2003103a20a 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -29,7 +29,6 @@ #include namespace DB { - const Int8 VAR_SIZE = 0; extern const String uniq_raw_res_name; @@ -333,7 +332,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::DecimalIsFalseWithNull, "isFalseWithNull"}, //{tipb::ScalarFuncSig::LeftShift, "cast"}, - //{tipb::ScalarFuncSig::RightShift, "cast"}, + {tipb::ScalarFuncSig::RightShift, "bitShiftRight"}, //{tipb::ScalarFuncSig::BitCount, "cast"}, //{tipb::ScalarFuncSig::GetParamString, "cast"}, @@ -514,7 +513,7 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::YearWeekWithMode, "cast"}, //{tipb::ScalarFuncSig::YearWeekWithoutMode, "cast"}, - //{tipb::ScalarFuncSig::GetFormat, "cast"}, + {tipb::ScalarFuncSig::GetFormat, "getFormat"}, {tipb::ScalarFuncSig::SysDateWithFsp, "sysDateWithFsp"}, {tipb::ScalarFuncSig::SysDateWithoutFsp, "sysDateWithoutFsp"}, //{tipb::ScalarFuncSig::CurrentDate, "cast"}, @@ -562,7 +561,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::Quarter, "toQuarter"}, //{tipb::ScalarFuncSig::SecToTime, "cast"}, - //{tipb::ScalarFuncSig::TimeToSec, "cast"}, + {tipb::ScalarFuncSig::TimeToSec, "tidbTimeToSec"}, //{tipb::ScalarFuncSig::TimestampAdd, "cast"}, {tipb::ScalarFuncSig::ToDays, "tidbToDays"}, {tipb::ScalarFuncSig::ToSeconds, "tidbToSeconds"}, @@ -649,8 +648,8 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::Quote, "cast"}, //{tipb::ScalarFuncSig::Repeat, "cast"}, {tipb::ScalarFuncSig::Replace, "replaceAll"}, - //{tipb::ScalarFuncSig::ReverseUTF8, "cast"}, - //{tipb::ScalarFuncSig::Reverse, "cast"}, + {tipb::ScalarFuncSig::ReverseUTF8, "reverseUTF8"}, + {tipb::ScalarFuncSig::Reverse, "reverse"}, {tipb::ScalarFuncSig::RightUTF8, "rightUTF8"}, //{tipb::ScalarFuncSig::Right, "cast"}, {tipb::ScalarFuncSig::RpadUTF8, "rpadUTF8"}, @@ -770,6 +769,10 @@ const String & getFunctionName(const tipb::Expr & expr) { return getAggFunctionName(expr); } + else if (isWindowFunctionExpr(expr)) + { + return getWindowFunctionName(expr); + } else { auto it = scalar_func_map.find(expr.sig()); @@ -1429,6 +1432,7 @@ tipb::EncodeType analyzeDAGEncodeType(DAGContext & dag_context) return tipb::EncodeType::TypeDefault; return encode_type; } + tipb::ScalarFuncSig reverseGetFuncSigByFuncName(const String & name) { static std::unordered_map func_name_sig_map = getFuncNameToSigMap(); diff --git a/dbms/src/Flash/Coprocessor/DecodeDetail.h b/dbms/src/Flash/Coprocessor/DecodeDetail.h index 9bad0ca2b72..91851650d9e 100644 --- a/dbms/src/Flash/Coprocessor/DecodeDetail.h +++ b/dbms/src/Flash/Coprocessor/DecodeDetail.h @@ -21,8 +21,12 @@ namespace DB /// Detail of the packet that decoding in TiRemoteInputStream.RemoteReader.decodeChunks() struct DecodeDetail { + // For fine grained shuffle, each ExchangeReceiver/thread will decode its own blocks. + // So this is the row number of partial blocks of the original packet. + // This will be the row number of all blocks of the original packet if it's not fine grained shuffle. Int64 rows = 0; - // byte size of origin packet. + + // Total byte size of the origin packet, even for fine grained shuffle. Int64 packet_bytes = 0; }; -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 5ca8d26758f..964384ce885 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -25,19 +25,8 @@ namespace DB InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) : context(context_) , dag(dag_) + , max_streams(context.getMaxStreams()) { - const Settings & settings = context.getSettingsRef(); - if (dagContext().isBatchCop() || (dagContext().isMPPTask() && !dagContext().isTest())) - max_streams = settings.max_threads; - else if (dagContext().isTest()) - max_streams = dagContext().initialize_concurrency; - else - max_streams = 1; - - if (max_streams > 1) - { - max_streams *= settings.max_streams_to_max_threads_ratio; - } } void setRestorePipelineConcurrency(DAGQueryBlock & query_block) @@ -89,10 +78,6 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) BlockIO InterpreterDAG::execute() { - /// Due to learner read, DAGQueryBlockInterpreter may take a long time to build - /// the query plan, so we init mpp exchange receiver before executeQueryBlock - dagContext().initExchangeReceiverIfMPP(context, max_streams); - BlockInputStreams streams = executeQueryBlock(*dag.getRootQueryBlock()); DAGPipeline pipeline; pipeline.streams = streams; diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 9de5b83626f..002a06d07b9 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -42,32 +43,6 @@ void restoreConcurrency( } } -BlockInputStreamPtr combinedNonJoinedDataStream( - DAGPipeline & pipeline, - size_t max_threads, - const LoggerPtr & log, - bool ignore_block) -{ - BlockInputStreamPtr ret = nullptr; - if (pipeline.streams_with_non_joined_data.size() == 1) - ret = pipeline.streams_with_non_joined_data.at(0); - else if (pipeline.streams_with_non_joined_data.size() > 1) - { - if (ignore_block) - { - ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); - ret->setExtraInfo("combine non joined(ignore block)"); - } - else - { - ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); - ret->setExtraInfo("combine non joined"); - } - } - pipeline.streams_with_non_joined_data.clear(); - return ret; -} - void executeUnion( DAGPipeline & pipeline, size_t max_streams, @@ -75,21 +50,33 @@ void executeUnion( bool ignore_block, const String & extra_info) { - if (pipeline.streams.size() == 1 && pipeline.streams_with_non_joined_data.empty()) - return; - auto non_joined_data_stream = combinedNonJoinedDataStream(pipeline, max_streams, log, ignore_block); - if (!pipeline.streams.empty()) + switch (pipeline.streams.size() + pipeline.streams_with_non_joined_data.size()) + { + case 0: + break; + case 1: { + if (pipeline.streams.size() == 1) + break; + // streams_with_non_joined_data's size is 1. + pipeline.streams.push_back(pipeline.streams_with_non_joined_data.at(0)); + pipeline.streams_with_non_joined_data.clear(); + break; + } + default: + { + BlockInputStreamPtr stream; if (ignore_block) - pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); + stream = std::make_shared(pipeline.streams, pipeline.streams_with_non_joined_data, max_streams, log->identifier()); else - pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); - pipeline.firstStream()->setExtraInfo(extra_info); + stream = std::make_shared(pipeline.streams, pipeline.streams_with_non_joined_data, max_streams, log->identifier()); + stream->setExtraInfo(extra_info); + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + break; } - else if (non_joined_data_stream != nullptr) - { - pipeline.streams.push_back(non_joined_data_stream); } } @@ -126,10 +113,14 @@ void orderStreams( size_t max_streams, SortDescription order_descr, Int64 limit, + bool enable_fine_grained_shuffle, const Context & context, const LoggerPtr & log) { const Settings & settings = context.getSettingsRef(); + String extra_info; + if (enable_fine_grained_shuffle) + extra_info = enableFineGrainedShuffleExtraInfo; pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, log->identifier(), limit); @@ -141,19 +132,37 @@ void orderStreams( sorting_stream->setLimits(limits); stream = sorting_stream; + stream->setExtraInfo(extra_info); }); - /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log, false, "for partial order"); + if (enable_fine_grained_shuffle) + { + pipeline.transform([&](auto & stream) { + stream = std::make_shared( + stream, + order_descr, + settings.max_block_size, + limit, + settings.max_bytes_before_external_sort, + context.getTemporaryPath(), + log->identifier()); + stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + }); + } + else + { + /// If there are several streams, we merge them into one + executeUnion(pipeline, max_streams, log, false, "for partial order"); - /// Merge the sorted blocks. - pipeline.firstStream() = std::make_shared( - pipeline.firstStream(), - order_descr, - settings.max_block_size, - limit, - settings.max_bytes_before_external_sort, - context.getTemporaryPath(), - log->identifier()); + /// Merge the sorted blocks. + pipeline.firstStream() = std::make_shared( + pipeline.firstStream(), + order_descr, + settings.max_block_size, + limit, + settings.max_bytes_before_external_sort, + context.getTemporaryPath(), + log->identifier()); + } } } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.h b/dbms/src/Flash/Coprocessor/InterpreterUtils.h index 36280f3b903..bd64346718c 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.h +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.h @@ -57,6 +57,7 @@ void orderStreams( size_t max_streams, SortDescription order_descr, Int64 limit, + bool enable_fine_grained_shuffle, const Context & context, const LoggerPtr & log); } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp index f915653fe96..a72dfcc16ef 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes @@ -37,8 +39,8 @@ inline void serializeToPacket(mpp::MPPDataPacket & packet, const tipb::SelectRes throw Exception(fmt::format("Fail to serialize response, response size: {}", response.ByteSizeLong())); } -template -StreamingDAGResponseWriter::StreamingDAGResponseWriter( +template +StreamingDAGResponseWriter::StreamingDAGResponseWriter( StreamWriterPtr writer_, std::vector partition_col_ids_, TiDB::TiDBCollators collators_, @@ -46,7 +48,9 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( Int64 records_per_chunk_, Int64 batch_send_min_limit_, bool should_send_exec_summary_at_last_, - DAGContext & dag_context_) + DAGContext & dag_context_, + uint64_t fine_grained_shuffle_stream_count_, + UInt64 fine_grained_shuffle_batch_size_) : DAGResponseWriter(records_per_chunk_, dag_context_) , batch_send_min_limit(batch_send_min_limit_) , should_send_exec_summary_at_last(should_send_exec_summary_at_last_) @@ -54,6 +58,8 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( , writer(writer_) , partition_col_ids(std::move(partition_col_ids_)) , collators(std::move(collators_)) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + , fine_grained_shuffle_batch_size(fine_grained_shuffle_batch_size_) { rows_in_blocks = 0; partition_num = writer_->getPartitionNum(); @@ -71,17 +77,37 @@ StreamingDAGResponseWriter::StreamingDAGResponseWriter( } } -template -void StreamingDAGResponseWriter::finishWrite() +template +void StreamingDAGResponseWriter::finishWrite() { if (should_send_exec_summary_at_last) - batchWrite(); + { + if constexpr (enable_fine_grained_shuffle) + { + assert(exchange_type == tipb::ExchangeType::Hash); + batchWriteFineGrainedShuffle(); + } + else + { + batchWrite(); + } + } else - batchWrite(); + { + if constexpr (enable_fine_grained_shuffle) + { + assert(exchange_type == tipb::ExchangeType::Hash); + batchWriteFineGrainedShuffle(); + } + else + { + batchWrite(); + } + } } -template -void StreamingDAGResponseWriter::write(const Block & block) +template +void StreamingDAGResponseWriter::write(const Block & block) { if (block.columns() != dag_context.result_field_types.size()) throw TiFlashException("Output column size mismatch with field type size", Errors::Coprocessor::Internal); @@ -91,15 +117,23 @@ void StreamingDAGResponseWriter::write(const Block & block) { blocks.push_back(block); } - if (static_cast(rows_in_blocks) > (dag_context.encode_type == tipb::EncodeType::TypeCHBlock ? batch_send_min_limit : records_per_chunk - 1)) + + if constexpr (enable_fine_grained_shuffle) { - batchWrite(); + assert(exchange_type == tipb::ExchangeType::Hash); + if (static_cast(rows_in_blocks) >= fine_grained_shuffle_batch_size) + batchWriteFineGrainedShuffle(); + } + else + { + if (static_cast(rows_in_blocks) > (dag_context.encode_type == tipb::EncodeType::TypeCHBlock ? batch_send_min_limit : records_per_chunk - 1)) + batchWrite(); } } -template +template template -void StreamingDAGResponseWriter::encodeThenWriteBlocks( +void StreamingDAGResponseWriter::encodeThenWriteBlocks( const std::vector & input_blocks, tipb::SelectResponse & response) const { @@ -191,133 +225,238 @@ void StreamingDAGResponseWriter::encodeThenWriteBlocks( } } -/// hash exchanging data among only TiFlash nodes. -template + +template template -void StreamingDAGResponseWriter::partitionAndEncodeThenWriteBlocks( - std::vector & input_blocks, - tipb::SelectResponse & response) const +void StreamingDAGResponseWriter::batchWrite() { - std::vector packet(partition_num); - - std::vector responses_row_count(partition_num); + tipb::SelectResponse response; + if constexpr (send_exec_summary_at_last) + addExecuteSummaries(response, !dag_context.isMPPTask() || dag_context.isRootMPPTask()); + if (exchange_type == tipb::ExchangeType::Hash) + { + partitionAndEncodeThenWriteBlocks(blocks, response); + } + else + { + encodeThenWriteBlocks(blocks, response); + } + blocks.clear(); + rows_in_blocks = 0; +} +template +template +void StreamingDAGResponseWriter::handleExecSummary( + const std::vector & input_blocks, + std::vector & packet, + tipb::SelectResponse & response) const +{ if constexpr (send_exec_summary_at_last) { /// Sending the response to only one node, default the first one. serializeToPacket(packet[0], response); - } - if (input_blocks.empty()) - { - if constexpr (send_exec_summary_at_last) + // No need to send data when blocks are not empty, + // because exec_summary will be sent together with blocks. + if (input_blocks.empty()) { for (auto part_id = 0; part_id < partition_num; ++part_id) { writer->write(packet[part_id], part_id); } } - return; } +} - // partition tuples in blocks - // 1) compute partition id - // 2) partition each row - // 3) encode each chunk and send it - std::vector partition_key_containers(collators.size()); - for (auto & block : input_blocks) +template +template +void StreamingDAGResponseWriter::writePackets(const std::vector & responses_row_count, + std::vector & packets) const +{ + for (size_t part_id = 0; part_id < packets.size(); ++part_id) { - std::vector dest_blocks(partition_num); - std::vector dest_tbl_cols(partition_num); - - for (size_t i = 0; i < block.columns(); ++i) + if constexpr (send_exec_summary_at_last) { - if (ColumnPtr converted = block.getByPosition(i).column->convertToFullColumnIfConst()) - { - block.getByPosition(i).column = converted; - } + writer->write(packets[part_id], part_id); } - - for (auto i = 0; i < partition_num; ++i) + else { - dest_tbl_cols[i] = block.cloneEmptyColumns(); - dest_blocks[i] = block.cloneEmpty(); + if (responses_row_count[part_id] > 0) + writer->write(packets[part_id], part_id); } + } +} - size_t rows = block.rows(); - WeakHash32 hash(rows); - - // get hash values by all partition key columns - for (size_t i = 0; i < partition_col_ids.size(); i++) +inline void initInputBlocks(std::vector & input_blocks) +{ + for (auto & input_block : input_blocks) + { + for (size_t i = 0; i < input_block.columns(); ++i) { - block.getByPosition(partition_col_ids[i]).column->updateWeakHash32(hash, collators[i], partition_key_containers[i]); + if (ColumnPtr converted = input_block.getByPosition(i).column->convertToFullColumnIfConst()) + input_block.getByPosition(i).column = converted; } - const auto & hash_data = hash.getData(); + } +} - // partition each row - IColumn::Selector selector(rows); - for (size_t row = 0; row < rows; ++row) - { - /// Row from interval [(2^32 / partition_num) * i, (2^32 / partition_num) * (i + 1)) goes to bucket with number i. - selector[row] = hash_data[row]; /// [0, 2^32) - selector[row] *= partition_num; /// [0, partition_num * 2^32), selector stores 64 bit values. - selector[row] >>= 32u; /// [0, partition_num) - } +inline void initDestColumns(const Block & input_block, std::vector & dest_tbl_cols) +{ + for (auto & cols : dest_tbl_cols) + { + cols = input_block.cloneEmptyColumns(); + } +} - for (size_t col_id = 0; col_id < block.columns(); ++col_id) - { - // Scatter columns to different partitions - auto scattered_columns = block.getByPosition(col_id).column->scatter(partition_num, selector); - for (size_t part_id = 0; part_id < partition_num; ++part_id) - { - dest_tbl_cols[part_id][col_id] = std::move(scattered_columns[part_id]); - } - } - // serialize each partitioned block and write it to its destination - for (auto part_id = 0; part_id < partition_num; ++part_id) - { - dest_blocks[part_id].setColumns(std::move(dest_tbl_cols[part_id])); - responses_row_count[part_id] += dest_blocks[part_id].rows(); - chunk_codec_stream->encode(dest_blocks[part_id], 0, dest_blocks[part_id].rows()); - packet[part_id].add_chunks(chunk_codec_stream->getString()); - chunk_codec_stream->clear(); - } +void computeHash(const Block & input_block, + uint32_t bucket_num, + const TiDB::TiDBCollators & collators, + std::vector & partition_key_containers, + const std::vector & partition_col_ids, + std::vector> & result_columns) +{ + size_t rows = input_block.rows(); + WeakHash32 hash(rows); + + // get hash values by all partition key columns + for (size_t i = 0; i < partition_col_ids.size(); ++i) + { + input_block.getByPosition(partition_col_ids[i]).column->updateWeakHash32(hash, collators[i], partition_key_containers[i]); } - for (auto part_id = 0; part_id < partition_num; ++part_id) + const auto & hash_data = hash.getData(); + + // partition each row + IColumn::Selector selector(rows); + for (size_t row = 0; row < rows; ++row) { - if constexpr (send_exec_summary_at_last) + /// Row from interval [(2^32 / bucket_num) * i, (2^32 / bucket_num) * (i + 1)) goes to bucket with number i. + selector[row] = hash_data[row]; /// [0, 2^32) + selector[row] *= bucket_num; /// [0, bucket_num * 2^32), selector stores 64 bit values. + selector[row] >>= 32u; /// [0, bucket_num) + } + + for (size_t col_id = 0; col_id < input_block.columns(); ++col_id) + { + // Scatter columns to different partitions + std::vector part_columns = input_block.getByPosition(col_id).column->scatter(bucket_num, selector); + assert(part_columns.size() == bucket_num); + for (size_t bucket_idx = 0; bucket_idx < bucket_num; ++bucket_idx) { - writer->write(packet[part_id], part_id); + result_columns[bucket_idx][col_id] = std::move(part_columns[bucket_idx]); } - else + } +} + +/// Hash exchanging data among only TiFlash nodes. Only be called when enable_fine_grained_shuffle is false. +template +template +void StreamingDAGResponseWriter::partitionAndEncodeThenWriteBlocks( + std::vector & input_blocks, + tipb::SelectResponse & response) const +{ + static_assert(!enable_fine_grained_shuffle); + std::vector packet(partition_num); + std::vector responses_row_count(partition_num); + handleExecSummary(input_blocks, packet, response); + if (input_blocks.empty()) + return; + + initInputBlocks(input_blocks); + Block dest_block = input_blocks[0].cloneEmpty(); + std::vector partition_key_containers(collators.size()); + for (const auto & block : input_blocks) + { + std::vector dest_tbl_cols(partition_num); + initDestColumns(block, dest_tbl_cols); + + computeHash(block, partition_num, collators, partition_key_containers, partition_col_ids, dest_tbl_cols); + + for (size_t part_id = 0; part_id < partition_num; ++part_id) { - if (responses_row_count[part_id] > 0) - writer->write(packet[part_id], part_id); + dest_block.setColumns(std::move(dest_tbl_cols[part_id])); + responses_row_count[part_id] += dest_block.rows(); + chunk_codec_stream->encode(dest_block, 0, dest_block.rows()); + packet[part_id].add_chunks(chunk_codec_stream->getString()); + chunk_codec_stream->clear(); } } + + writePackets(responses_row_count, packet); } -template +/// Hash exchanging data among only TiFlash nodes. Only be called when enable_fine_grained_shuffle is true. +template template -void StreamingDAGResponseWriter::batchWrite() +void StreamingDAGResponseWriter::batchWriteFineGrainedShuffle() { + static_assert(enable_fine_grained_shuffle); + assert(exchange_type == tipb::ExchangeType::Hash); + assert(fine_grained_shuffle_stream_count <= 1024); + tipb::SelectResponse response; if constexpr (send_exec_summary_at_last) addExecuteSummaries(response, !dag_context.isMPPTask() || dag_context.isRootMPPTask()); - if (exchange_type == tipb::ExchangeType::Hash) - { - partitionAndEncodeThenWriteBlocks(blocks, response); - } - else + + std::vector packet(partition_num); + std::vector responses_row_count(partition_num, 0); + + // fine_grained_shuffle_stream_count is in [0, 1024], and partition_num is uint16_t, so will not overflow. + uint32_t bucket_num = partition_num * fine_grained_shuffle_stream_count; + handleExecSummary(blocks, packet, response); + if (!blocks.empty()) { - encodeThenWriteBlocks(blocks, response); + std::vector final_dest_tbl_columns(bucket_num); + initInputBlocks(blocks); + initDestColumns(blocks[0], final_dest_tbl_columns); + + // Hash partition input_blocks into bucket_num. + for (const auto & block : blocks) + { + std::vector partition_key_containers(collators.size()); + std::vector dest_tbl_columns(bucket_num); + initDestColumns(block, dest_tbl_columns); + computeHash(block, bucket_num, collators, partition_key_containers, partition_col_ids, dest_tbl_columns); + for (size_t bucket_idx = 0; bucket_idx < bucket_num; ++bucket_idx) + { + for (size_t col_id = 0; col_id < block.columns(); ++col_id) + { + const MutableColumnPtr & src_col = dest_tbl_columns[bucket_idx][col_id]; + final_dest_tbl_columns[bucket_idx][col_id]->insertRangeFrom(*src_col, 0, src_col->size()); + } + } + } + + // For i-th stream_count buckets, send to i-th tiflash node. + for (size_t bucket_idx = 0; bucket_idx < bucket_num; bucket_idx += fine_grained_shuffle_stream_count) + { + size_t part_id = bucket_idx / fine_grained_shuffle_stream_count; // NOLINT(clang-analyzer-core.DivideZero) + size_t row_count_per_part = 0; + for (uint64_t stream_idx = 0; stream_idx < fine_grained_shuffle_stream_count; ++stream_idx) + { + Block dest_block = blocks[0].cloneEmpty(); + // For now we put all rows into one Block, may cause this Block too large. + dest_block.setColumns(std::move(final_dest_tbl_columns[bucket_idx + stream_idx])); + row_count_per_part += dest_block.rows(); + + chunk_codec_stream->encode(dest_block, 0, dest_block.rows()); + packet[part_id].add_chunks(chunk_codec_stream->getString()); + packet[part_id].add_stream_ids(stream_idx); + chunk_codec_stream->clear(); + } + responses_row_count[part_id] = row_count_per_part; + } } + + writePackets(responses_row_count, packet); + blocks.clear(); rows_in_blocks = 0; } -template class StreamingDAGResponseWriter; -template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; +template class StreamingDAGResponseWriter; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h index 9b5e3864c64..cd7559d1e79 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.h @@ -33,7 +33,7 @@ namespace DB /// Serializes the stream of blocks and sends them to TiDB or TiFlash with different serialization paths. /// When sending data to TiDB, blocks with extra info are written into tipb::SelectResponse, then the whole tipb::SelectResponse is further serialized into mpp::MPPDataPacket.data. /// Differently when sending data to TiFlash, blocks with only tuples are directly serialized into mpp::MPPDataPacket.chunks, but for the last block, its extra info (like execution summaries) is written into tipb::SelectResponse, then further serialized into mpp::MPPDataPacket.data. -template +template class StreamingDAGResponseWriter : public DAGResponseWriter { public: @@ -45,18 +45,30 @@ class StreamingDAGResponseWriter : public DAGResponseWriter Int64 records_per_chunk_, Int64 batch_send_min_limit_, bool should_send_exec_summary_at_last, - DAGContext & dag_context_); + DAGContext & dag_context_, + UInt64 fine_grained_shuffle_stream_count_, + UInt64 fine_grained_shuffle_batch_size); void write(const Block & block) override; void finishWrite() override; private: template void batchWrite(); + template + void batchWriteFineGrainedShuffle(); + template void encodeThenWriteBlocks(const std::vector & input_blocks, tipb::SelectResponse & response) const; template void partitionAndEncodeThenWriteBlocks(std::vector & input_blocks, tipb::SelectResponse & response) const; + template + void handleExecSummary(const std::vector & input_blocks, + std::vector & packet, + tipb::SelectResponse & response) const; + template + void writePackets(const std::vector & responses_row_count, std::vector & packets) const; + Int64 batch_send_min_limit; bool should_send_exec_summary_at_last; /// only one stream needs to sending execution summaries at last. tipb::ExchangeType exchange_type; @@ -67,6 +79,8 @@ class StreamingDAGResponseWriter : public DAGResponseWriter size_t rows_in_blocks; uint16_t partition_num; std::unique_ptr chunk_codec_stream; + UInt64 fine_grained_shuffle_stream_count; + UInt64 fine_grained_shuffle_batch_size; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp index 7183374a5c1..eef89696d3a 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp @@ -28,7 +28,7 @@ template void encodeLittleEndian(const T & value, WriteBuffer & ss) { auto v = toLittleEndian(value); - ss.write(reinterpret_cast(&v), sizeof(v)); + ss.template writeFixed(&v); } TiDBColumn::TiDBColumn(Int8 element_len_) @@ -141,10 +141,10 @@ void TiDBColumn::append(const TiDBDecimal & decimal) encodeLittleEndian(decimal.digits_int, *data); encodeLittleEndian(decimal.digits_frac, *data); encodeLittleEndian(decimal.result_frac, *data); - encodeLittleEndian((UInt8)decimal.negative, *data); - for (int i = 0; i < MAX_WORD_BUF_LEN; i++) + encodeLittleEndian(static_cast(decimal.negative), *data); + for (int i : decimal.word_buf) { - encodeLittleEndian(decimal.word_buf[i], *data); + encodeLittleEndian(i, *data); } finishAppendFixed(); } diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index b68279faa13..87744c553e0 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -45,19 +45,36 @@ bool collectForAgg(std::vector & output_field_types, const tipb { for (const auto & expr : agg.agg_func()) { - if (!exprHasValidFieldType(expr)) + if (unlikely(!exprHasValidFieldType(expr))) throw TiFlashException("Agg expression without valid field type", Errors::Coprocessor::BadRequest); output_field_types.push_back(expr.field_type()); } for (const auto & expr : agg.group_by()) { - if (!exprHasValidFieldType(expr)) + if (unlikely(!exprHasValidFieldType(expr))) throw TiFlashException("Group by expression without valid field type", Errors::Coprocessor::BadRequest); output_field_types.push_back(expr.field_type()); } return false; } +bool collectForExecutor(std::vector & output_field_types, const tipb::Executor & executor); +bool collectForWindow(std::vector & output_field_types, const tipb::Executor & executor) +{ + // collect output_field_types of child + getChildren(executor).forEach([&output_field_types](const tipb::Executor & child) { + traverseExecutorTree(child, [&output_field_types](const tipb::Executor & e) { return collectForExecutor(output_field_types, e); }); + }); + + for (const auto & expr : executor.window().func_desc()) + { + if (unlikely(!exprHasValidFieldType(expr))) + throw TiFlashException("Window expression without valid field type", Errors::Coprocessor::BadRequest); + output_field_types.push_back(expr.field_type()); + } + return false; +} + bool collectForReceiver(std::vector & output_field_types, const tipb::ExchangeReceiver & receiver) { for (const auto & field_type : receiver.field_types()) @@ -82,7 +99,6 @@ bool collectForTableScan(std::vector & output_field_types, cons return false; } -bool collectForExecutor(std::vector & output_field_types, const tipb::Executor & executor); bool collectForJoin(std::vector & output_field_types, const tipb::Executor & executor) { // collect output_field_types of children @@ -147,8 +163,8 @@ bool collectForExecutor(std::vector & output_field_types, const case tipb::ExecType::TypeWindow: // Window will only be pushed down in mpp mode. // In mpp mode, ExchangeSender or Sender will return output_field_types directly. - // If not in mpp mode, window executor type is invalid. - throw TiFlashException("Window executor type is invalid in non-mpp mode, should not reach here.", Errors::Coprocessor::Internal); + // If not in mpp mode or debug mode, window executor type is invalid. + return collectForWindow(output_field_types, executor); case tipb::ExecType::TypeExchangeReceiver: return collectForReceiver(output_field_types, executor.exchange_receiver()); case tipb::ExecType::TypeTableScan: diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp new file mode 100644 index 00000000000..5d4186123b7 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_dag_writer.cpp @@ -0,0 +1,184 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +namespace tests +{ + +using BlockPtr = std::shared_ptr; +class TestStreamingDAGResponseWriter : public testing::Test +{ +protected: + void SetUp() override + { + dag_context_ptr = std::make_unique(1024); + dag_context_ptr->encode_type = tipb::EncodeType::TypeCHBlock; + dag_context_ptr->is_mpp_task = true; + dag_context_ptr->is_root_mpp_task = false; + dag_context_ptr->result_field_types = makeFields(); + context.setDAGContext(dag_context_ptr.get()); + } + +public: + TestStreamingDAGResponseWriter() + : context(TiFlashTestEnv::getContext()) + , part_col_ids{0} + , part_col_collators{ + TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY)} + {} + + // Return 10 Int64 column. + static std::vector makeFields() + { + std::vector fields(10); + for (int i = 0; i < 10; ++i) + { + fields[i].set_tp(TiDB::TypeLongLong); + } + return fields; + } + + // Return a block with **rows** and 10 Int64 column. + static BlockPtr prepareBlock(const std::vector & rows) + { + BlockPtr block = std::make_shared(); + for (int i = 0; i < 10; ++i) + { + DataTypePtr int64_data_type = std::make_shared(); + DataTypePtr nullable_int64_data_type = std::make_shared(int64_data_type); + MutableColumnPtr int64_col = nullable_int64_data_type->createColumn(); + for (Int64 r : rows) + { + int64_col->insert(Field(r)); + } + block->insert(ColumnWithTypeAndName{std::move(int64_col), + nullable_int64_data_type, + String("col") + std::to_string(i)}); + } + return block; + } + + Context context; + std::vector part_col_ids; + TiDB::TiDBCollators part_col_collators; + + std::unique_ptr dag_context_ptr; +}; + +using MockStreamWriterChecker = std::function; + +struct MockStreamWriter +{ + MockStreamWriter(MockStreamWriterChecker checker_, + uint16_t part_num_) + : checker(checker_) + , part_num(part_num_) + {} + + void write(mpp::MPPDataPacket &) { FAIL() << "cannot reach here, because we only expect hash partition"; } + void write(mpp::MPPDataPacket & packet, uint16_t part_id) { checker(packet, part_id); } + void write(tipb::SelectResponse &, uint16_t) { FAIL() << "cannot reach here, only consider CH Block format"; } + void write(tipb::SelectResponse &) { FAIL() << "cannot reach here, only consider CH Block format"; } + uint16_t getPartitionNum() const { return part_num; } + +private: + MockStreamWriterChecker checker; + uint16_t part_num; +}; + +// Input block data is distributed uniform. +// partition_num: 4 +// fine_grained_shuffle_stream_count: 8 +TEST_F(TestStreamingDAGResponseWriter, testBatchWriteFineGrainedShuffle) +try +{ + const size_t block_rows = 1024; + const uint16_t part_num = 4; + const uint32_t fine_grained_shuffle_stream_count = 8; + const Int64 fine_grained_shuffle_batch_size = 4096; + + // Set these to 1, because when fine grained shuffle is enabled, + // batchWriteFineGrainedShuffle() only check fine_grained_shuffle_batch_size. + // records_per_chunk and batch_send_min_limit are useless. + const Int64 records_per_chunk = 1; + const Int64 batch_send_min_limit = 1; + const bool should_send_exec_summary_at_last = true; + + // 1. Build Block. + std::vector uniform_data_set; + for (size_t i = 0; i < block_rows; ++i) + { + uniform_data_set.push_back(i); + } + BlockPtr block = prepareBlock(uniform_data_set); + + // 2. Build MockStreamWriter. + std::unordered_map write_report; + auto checker = [&write_report](mpp::MPPDataPacket & packet, uint16_t part_id) { + auto res = write_report.insert({part_id, packet}); + // Should always insert succeed. + // Because block.rows(1024) < fine_grained_shuffle_batch_size(4096), + // batchWriteFineGrainedShuffle() only called once, so will only be one packet for each partition. + ASSERT_TRUE(res.second); + }; + auto mock_writer = std::make_shared(checker, part_num); + + // 3. Start to write. + auto dag_writer = std::make_shared, /*enable_fine_grained_shuffle=*/true>>( + mock_writer, + part_col_ids, + part_col_collators, + tipb::ExchangeType::Hash, + records_per_chunk, + batch_send_min_limit, + should_send_exec_summary_at_last, + *dag_context_ptr, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size); + dag_writer->write(*block); + dag_writer->finishWrite(); + + // 4. Start to check write_report. + std::vector decoded_blocks; + ASSERT_EQ(write_report.size(), part_num); + for (const auto & ele : write_report) + { + const mpp::MPPDataPacket & packet = ele.second; + ASSERT_EQ(packet.chunks_size(), packet.stream_ids_size()); + for (int i = 0; i < packet.chunks_size(); ++i) + { + decoded_blocks.push_back(CHBlockChunkCodec::decode(packet.chunks(i), *block)); + } + } + ASSERT_EQ(decoded_blocks.size(), fine_grained_shuffle_stream_count * part_num); + for (const auto & block : decoded_blocks) + { + ASSERT_EQ(block.rows(), block_rows / (fine_grained_shuffle_stream_count * part_num)); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/EstablishCall.cpp b/dbms/src/Flash/EstablishCall.cpp index 8af81e30962..2f8c7c15f56 100644 --- a/dbms/src/Flash/EstablishCall.cpp +++ b/dbms/src/Flash/EstablishCall.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -19,6 +20,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_tunnel_init_rpc_failure_failpoint[]; +} // namespace FailPoints + EstablishCallData::EstablishCallData(AsyncFlashService * service, grpc::ServerCompletionQueue * cq, grpc::ServerCompletionQueue * notify_cq, const std::shared_ptr> & is_shutdown) : service(service) , cq(cq) @@ -71,6 +77,7 @@ void EstablishCallData::initRpc() std::exception_ptr eptr = nullptr; try { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_tunnel_init_rpc_failure_failpoint); service->establishMPPConnectionSyncOrAsync(&ctx, &request, nullptr, this); } catch (...) @@ -136,7 +143,7 @@ void EstablishCallData::finishTunnelAndResponder() state = FINISH; if (mpp_tunnel) { - mpp_tunnel->consumerFinish("grpc writes failed.", true); //trigger mpp tunnel finish work + mpp_tunnel->consumerFinish(fmt::format("{}: finishTunnelAndResponder called.", mpp_tunnel->id()), true); //trigger mpp tunnel finish work } grpc::Status status(static_cast(GRPC_STATUS_UNKNOWN), "Consumer exits unexpected, grpc writes failed."); responder.Finish(status, this); diff --git a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp index df6c881c306..1e9da93ffe3 100644 --- a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp +++ b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -48,7 +47,6 @@ class BasicManualCompactTest BasicManualCompactTest() { - log = &Poco::Logger::get(DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); pk_type = GetParam(); } @@ -63,7 +61,7 @@ class BasicManualCompactTest setupStorage(); // In tests let's only compact one segment. - db_context->setSetting("manual_compact_more_until_ms", UInt64(0)); + db_context->setSetting("manual_compact_more_until_ms", Field(UInt64(0))); // Split into 4 segments, and prepare some delta data for first 3 segments. helper = std::make_unique(*db_context); @@ -116,8 +114,6 @@ class BasicManualCompactTest std::unique_ptr manager; DM::tests::DMTestEnv::PkType pk_type; - - [[maybe_unused]] Poco::Logger * log; }; @@ -315,7 +311,7 @@ CATCH TEST_P(BasicManualCompactTest, CompactMultiple) try { - db_context->setSetting("manual_compact_more_until_ms", UInt64(60 * 1000)); // Hope it's long enough! + db_context->setSetting("manual_compact_more_until_ms", Field(UInt64(60 * 1000))); // Hope it's long enough! auto request = ::kvrpcpb::CompactRequest(); request.set_physical_table_id(TABLE_ID); diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index f194afee31f..ab8d83a1481 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -13,6 +13,8 @@ // limitations under the License. #include +#include +#include #include #include #include @@ -22,6 +24,12 @@ namespace DB { +namespace FailPoints +{ +extern const char random_receiver_sync_msg_push_failure_failpoint[]; +extern const char random_receiver_async_msg_push_failure_failpoint[]; +} // namespace FailPoints + namespace { String getReceiverStateStr(const ExchangeReceiverState & s) @@ -41,6 +49,106 @@ String getReceiverStateStr(const ExchangeReceiverState & s) } } +// If enable_fine_grained_shuffle: +// Seperate chunks according to packet.stream_ids[i], then push to msg_channels[stream_id]. +// If fine grained_shuffle is disabled: +// Push all chunks to msg_channels[0]. +// Return true if all push succeed, otherwise return false. +// NOTE: shared_ptr will be hold by all ExchangeReceiverBlockInputStream to make chunk pointer valid. +template +bool pushPacket(size_t source_index, + const String & req_info, + MPPDataPacketPtr & packet, + const std::vector & msg_channels, + LoggerPtr & log) +{ + bool push_succeed = true; + + const mpp::Error * error_ptr = nullptr; + if (packet->has_error()) + error_ptr = &packet->error(); + const String * resp_ptr = nullptr; + if (!packet->data().empty()) + resp_ptr = &packet->data(); + + if constexpr (enable_fine_grained_shuffle) + { + std::vector> chunks(msg_channels.size()); + if (!packet->chunks().empty()) + { + // Packet not empty. + if (unlikely(packet->stream_ids().empty())) + { + // Fine grained shuffle is enabled in receiver, but sender didn't. We cannot handle this, so return error. + // This can happen when there are old version nodes when upgrading. + LOG_FMT_ERROR(log, "MPPDataPacket.stream_ids empty, it means ExchangeSender is old version of binary " + "(source_index: {}) while fine grained shuffle of ExchangeReceiver is enabled. " + "Cannot handle this.", + source_index); + return false; + } + // packet.stream_ids[i] is corresponding to packet.chunks[i], + // indicating which stream_id this chunk belongs to. + assert(packet->chunks_size() == packet->stream_ids_size()); + + for (int i = 0; i < packet->stream_ids_size(); ++i) + { + UInt64 stream_id = packet->stream_ids(i) % msg_channels.size(); + chunks[stream_id].push_back(&packet->chunks(i)); + } + } + // Still need to send error_ptr or resp_ptr even if packet.chunks_size() is zero. + for (size_t i = 0; i < msg_channels.size() && push_succeed; ++i) + { + if (resp_ptr == nullptr && error_ptr == nullptr && chunks[i].empty()) + continue; + + std::shared_ptr recv_msg = std::make_shared( + source_index, + req_info, + packet, + error_ptr, + resp_ptr, + std::move(chunks[i])); + push_succeed = msg_channels[i]->push(std::move(recv_msg)); + if constexpr (is_sync) + fiu_do_on(FailPoints::random_receiver_sync_msg_push_failure_failpoint, push_succeed = false;); + else + fiu_do_on(FailPoints::random_receiver_async_msg_push_failure_failpoint, push_succeed = false;); + + // Only the first ExchangeReceiverInputStream need to handle resp. + resp_ptr = nullptr; + } + } + else + { + std::vector chunks(packet->chunks_size()); + for (int i = 0; i < packet->chunks_size(); ++i) + { + chunks[i] = &packet->chunks(i); + } + + if (!(resp_ptr == nullptr && error_ptr == nullptr && chunks.empty())) + { + std::shared_ptr recv_msg = std::make_shared( + source_index, + req_info, + packet, + error_ptr, + resp_ptr, + std::move(chunks)); + + push_succeed = msg_channels[0]->push(std::move(recv_msg)); + if constexpr (is_sync) + fiu_do_on(FailPoints::random_receiver_sync_msg_push_failure_failpoint, push_succeed = false;); + else + fiu_do_on(FailPoints::random_receiver_async_msg_push_failure_failpoint, push_succeed = false;); + } + } + LOG_FMT_DEBUG(log, "push recv_msg to msg_channels(size: {}) succeed:{}, enable_fine_grained_shuffle: {}", msg_channels.size(), push_succeed, enable_fine_grained_shuffle); + return push_succeed; +} + enum class AsyncRequestStage { NEED_INIT, @@ -57,25 +165,25 @@ using TimePoint = Clock::time_point; constexpr Int32 max_retry_times = 10; constexpr Int32 batch_packet_count = 16; -template +template class AsyncRequestHandler : public UnaryCallback { public: using Status = typename RPCContext::Status; using Request = typename RPCContext::Request; using AsyncReader = typename RPCContext::AsyncReader; - using Self = AsyncRequestHandler; + using Self = AsyncRequestHandler; AsyncRequestHandler( MPMCQueue * queue, - MPMCQueue> * msg_channel_, + std::vector * msg_channels_, const std::shared_ptr & context, const Request & req, const String & req_id) : rpc_context(context) , request(&req) , notify_queue(queue) - , msg_channel(msg_channel_) + , msg_channels(msg_channels_) , req_info(fmt::format("tunnel{}+{}", req.send_task_id, req.recv_task_id)) , log(Logger::get("ExchangeReceiver", req_id, req_info)) { @@ -253,11 +361,7 @@ class AsyncRequestHandler : public UnaryCallback for (size_t i = 0; i < read_packet_index; ++i) { auto & packet = packets[i]; - auto recv_msg = std::make_shared(); - recv_msg->packet = std::move(packet); - recv_msg->source_index = request->source_index; - recv_msg->req_info = req_info; - if (!msg_channel->push(std::move(recv_msg))) + if (!pushPacket(request->source_index, req_info, packet, *msg_channels, log)) return false; // can't reuse packet since it is sent to readers. packet = std::make_shared(); @@ -274,7 +378,7 @@ class AsyncRequestHandler : public UnaryCallback std::shared_ptr rpc_context; const Request * request; // won't be null MPMCQueue * notify_queue; // won't be null - MPMCQueue> * msg_channel; // won't be null + std::vector * msg_channels; // won't be null String req_info; bool meet_error = false; @@ -299,20 +403,32 @@ ExchangeReceiverBase::ExchangeReceiverBase( size_t source_num_, size_t max_streams_, const String & req_id, - const String & executor_id) + const String & executor_id, + uint64_t fine_grained_shuffle_stream_count_) : rpc_context(std::move(rpc_context_)) , source_num(source_num_) , max_streams(max_streams_) , max_buffer_size(std::max(batch_packet_count, std::max(source_num, max_streams_) * 2)) , thread_manager(newThreadManager()) - , msg_channel(max_buffer_size) , live_connections(source_num) , state(ExchangeReceiverState::NORMAL) , exc_log(Logger::get("ExchangeReceiver", req_id, executor_id)) , collected(false) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) { try { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count_)) + { + for (size_t i = 0; i < max_streams_; ++i) + { + msg_channels.push_back(std::make_unique>>(max_buffer_size)); + } + } + else + { + msg_channels.push_back(std::make_unique>>(max_buffer_size)); + } rpc_context->fillSchema(schema); setUpConnection(); } @@ -349,14 +465,14 @@ template void ExchangeReceiverBase::cancel() { setEndState(ExchangeReceiverState::CANCELED); - msg_channel.finish(); + cancelAllMsgChannels(); } template void ExchangeReceiverBase::close() { setEndState(ExchangeReceiverState::CLOSED); - msg_channel.finish(); + finishAllMsgChannels(); } template @@ -371,7 +487,12 @@ void ExchangeReceiverBase::setUpConnection() async_requests.push_back(std::move(req)); else { - thread_manager->schedule(true, "Receiver", [this, req = std::move(req)] { readLoop(req); }); + thread_manager->schedule(true, "Receiver", [this, req = std::move(req)] { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + readLoop(req); + else + readLoop(req); + }); ++thread_count; } } @@ -379,15 +500,21 @@ void ExchangeReceiverBase::setUpConnection() // TODO: reduce this thread in the future. if (!async_requests.empty()) { - thread_manager->schedule(true, "RecvReactor", [this, async_requests = std::move(async_requests)] { reactor(async_requests); }); + thread_manager->schedule(true, "RecvReactor", [this, async_requests = std::move(async_requests)] { + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + reactor(async_requests); + else + reactor(async_requests); + }); ++thread_count; } } template +template void ExchangeReceiverBase::reactor(const std::vector & async_requests) { - using AsyncHandler = AsyncRequestHandler; + using AsyncHandler = AsyncRequestHandler; GET_METRIC(tiflash_thread_count, type_threads_of_receiver_reactor).Increment(); SCOPE_EXIT({ @@ -403,7 +530,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn std::vector> handlers; handlers.reserve(alive_async_connections); for (const auto & req : async_requests) - handlers.emplace_back(std::make_unique(&ready_requests, &msg_channel, rpc_context, req, exc_log->identifier())); + handlers.emplace_back(std::make_unique(&ready_requests, &msg_channels, rpc_context, req, exc_log->identifier())); while (alive_async_connections > 0) { @@ -415,7 +542,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn for (Int32 i = 0; i < check_waiting_requests_freq; ++i) { AsyncHandler * handler = nullptr; - if (unlikely(!ready_requests.tryPop(handler, timeout))) + if (unlikely(!ready_requests.popTimeout(handler, timeout))) break; handler->handle(); @@ -448,6 +575,7 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn } template +template void ExchangeReceiverBase::readLoop(const Request & req) { GET_METRIC(tiflash_thread_count, type_threads_of_receiver_read_loop).Increment(); @@ -472,18 +600,15 @@ void ExchangeReceiverBase::readLoop(const Request & req) for (;;) { LOG_FMT_TRACE(log, "begin next "); - auto recv_msg = std::make_shared(); - recv_msg->packet = std::make_shared(); - recv_msg->req_info = req_info; - recv_msg->source_index = req.source_index; - bool success = reader->read(recv_msg->packet); + MPPDataPacketPtr packet = std::make_shared(); + bool success = reader->read(packet); if (!success) break; has_data = true; - if (recv_msg->packet->has_error()) - throw Exception("Exchange receiver meet error : " + recv_msg->packet->error().msg()); + if (packet->has_error()) + throw Exception("Exchange receiver meet error : " + packet->error().msg()); - if (!msg_channel.push(std::move(recv_msg))) + if (!pushPacket(req.source_index, req_info, packet, msg_channels, log)) { meet_error = true; auto local_state = getState(); @@ -553,15 +678,15 @@ DecodeDetail ExchangeReceiverBase::decodeChunks( assert(recv_msg != nullptr); DecodeDetail detail; - int chunk_size = recv_msg->packet->chunks_size(); - if (chunk_size == 0) + if (recv_msg->chunks.empty()) return detail; + // Record total packet size even if fine grained shuffle is enabled. detail.packet_bytes = recv_msg->packet->ByteSizeLong(); - /// ExchangeReceiverBase should receive chunks of TypeCHBlock - for (int i = 0; i < chunk_size; ++i) + + for (const String * chunk : recv_msg->chunks) { - Block block = CHBlockChunkCodec::decode(recv_msg->packet->chunks(i), header); + Block block = CHBlockChunkCodec::decode(*chunk, header); detail.rows += block.rows(); if (unlikely(block.rows() == 0)) continue; @@ -571,10 +696,15 @@ DecodeDetail ExchangeReceiverBase::decodeChunks( } template -ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue & block_queue, const Block & header) +ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue & block_queue, const Block & header, size_t stream_id) { + if (unlikely(stream_id >= msg_channels.size())) + { + LOG_FMT_ERROR(exc_log, "stream_id out of range, stream_id: {}, total_stream_count: {}", stream_id, msg_channels.size()); + return {nullptr, 0, "", true, "stream_id out of range", false}; + } std::shared_ptr recv_msg; - if (!msg_channel.pop(recv_msg)) + if (!msg_channels[stream_id]->pop(recv_msg)) { std::unique_lock lock(mu); @@ -596,29 +726,32 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuepacket != nullptr); + assert(recv_msg != nullptr); ExchangeReceiverResult result; - if (recv_msg->packet->has_error()) + if (recv_msg->error_ptr != nullptr) { - result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, recv_msg->packet->error().msg(), false}; + result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, recv_msg->error_ptr->msg(), false}; } else { - if (!recv_msg->packet->data().empty()) /// the data of the last packet is serialized from tipb::SelectResponse including execution summaries. + if (recv_msg->resp_ptr != nullptr) /// the data of the last packet is serialized from tipb::SelectResponse including execution summaries. { - auto resp_ptr = std::make_shared(); - if (!resp_ptr->ParseFromString(recv_msg->packet->data())) + auto select_resp = std::make_shared(); + if (!select_resp->ParseFromString(*(recv_msg->resp_ptr))) { result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, "decode error", false}; } else { - result = {resp_ptr, recv_msg->source_index, recv_msg->req_info, false, "", false}; - /// If mocking TiFlash as TiDB, here should decode chunks from resp_ptr. - if (!resp_ptr->chunks().empty()) + result = {select_resp, recv_msg->source_index, recv_msg->req_info, false, "", false}; + /// If mocking TiFlash as TiDB, here should decode chunks from select_resp. + if (!select_resp->chunks().empty()) { - assert(recv_msg->packet->chunks().empty()); - result.decode_detail = CoprocessorReader::decodeChunks(resp_ptr, block_queue, header, schema); + assert(recv_msg->chunks.empty()); + // Fine grained shuffle should only be enabled when sending data to TiFlash node. + // So all data should be encoded into MPPDataPacket.chunks. + RUNTIME_CHECK(!enableFineGrainedShuffle(fine_grained_shuffle_stream_count), Exception, "Data should not be encoded into tipb::SelectResponse.chunks when fine grained shuffle is enabled"); + result.decode_detail = CoprocessorReader::decodeChunks(select_resp, block_queue, header, schema); } } } @@ -626,7 +759,7 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuesource_index, recv_msg->req_info, false, "", false}; } - if (!result.meet_error && !recv_msg->packet->chunks().empty()) + if (!result.meet_error && !recv_msg->chunks.empty()) { assert(result.decode_detail.rows == 0); result.decode_detail = decodeChunks(recv_msg, block_queue, header); @@ -688,7 +821,21 @@ void ExchangeReceiverBase::connectionDone( throw Exception("live_connections should not be less than 0!"); if (meet_error || copy_live_conn == 0) - msg_channel.finish(); + finishAllMsgChannels(); +} + +template +void ExchangeReceiverBase::finishAllMsgChannels() +{ + for (auto & msg_channel : msg_channels) + msg_channel->finish(); +} + +template +void ExchangeReceiverBase::cancelAllMsgChannels() +{ + for (auto & msg_channel : msg_channels) + msg_channel->cancel(); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.h b/dbms/src/Flash/Mpp/ExchangeReceiver.h index 830dc6241a9..708f133f226 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.h +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.h @@ -35,9 +35,28 @@ namespace DB { struct ReceivedMessage { - std::shared_ptr packet; - size_t source_index = 0; + size_t source_index; String req_info; + // shared_ptr is copied to make sure error_ptr, resp_ptr and chunks are valid. + const std::shared_ptr packet; + const mpp::Error * error_ptr; + const String * resp_ptr; + std::vector chunks; + + // Constructor that move chunks. + ReceivedMessage(size_t source_index_, + const String & req_info_, + const std::shared_ptr & packet_, + const mpp::Error * error_ptr_, + const String * resp_ptr_, + std::vector && chunks_) + : source_index(source_index_) + , req_info(req_info_) + , packet(packet_) + , error_ptr(error_ptr_) + , resp_ptr(resp_ptr_) + , chunks(chunks_) + {} }; struct ExchangeReceiverResult @@ -78,6 +97,7 @@ enum class ExchangeReceiverState CLOSED, }; +using MsgChannelPtr = std::unique_ptr>>; template class ExchangeReceiverBase @@ -92,7 +112,8 @@ class ExchangeReceiverBase size_t source_num_, size_t max_streams_, const String & req_id, - const String & executor_id); + const String & executor_id, + uint64_t fine_grained_shuffle_stream_count); ~ExchangeReceiverBase(); @@ -104,9 +125,11 @@ class ExchangeReceiverBase ExchangeReceiverResult nextResult( std::queue & block_queue, - const Block & header); + const Block & header, + size_t stream_id); size_t getSourceNum() const { return source_num; } + uint64_t getFineGrainedShuffleStreamCount() const { return fine_grained_shuffle_stream_count; } int computeNewThreadCount() const { return thread_count; } @@ -128,7 +151,10 @@ class ExchangeReceiverBase using Request = typename RPCContext::Request; void setUpConnection(); + // Template argument enable_fine_grained_shuffle will be setup properly in setUpConnection(). + template void readLoop(const Request & req); + template void reactor(const std::vector & async_requests); bool setEndState(ExchangeReceiverState new_state); @@ -139,12 +165,14 @@ class ExchangeReceiverBase std::queue & block_queue, const Block & header); - void connectionDone( bool meet_error, const String & local_err_msg, const LoggerPtr & log); + void finishAllMsgChannels(); + void cancelAllMsgChannels(); + std::shared_ptr rpc_context; const tipb::ExchangeReceiver pb_exchange_receiver; @@ -156,7 +184,7 @@ class ExchangeReceiverBase std::shared_ptr thread_manager; DAGSchema schema; - MPMCQueue> msg_channel; + std::vector msg_channels; std::mutex mu; /// should lock `mu` when visit these members @@ -168,6 +196,7 @@ class ExchangeReceiverBase bool collected = false; int thread_count = 0; + uint64_t fine_grained_shuffle_stream_count; }; class ExchangeReceiver : public ExchangeReceiverBase diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index a3096aaa644..7f97a1dd698 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -31,7 +31,7 @@ void MPPHandler::handleError(const MPPTaskPtr & task, String error) try { if (task) - task->cancel(error); + task->handleError(error); } catch (...) { diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.cpp b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp new file mode 100644 index 00000000000..60cca308c18 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.cpp @@ -0,0 +1,48 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +void MPPReceiverSet::addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver) +{ + RUNTIME_ASSERT(exchange_receiver_map.find(executor_id) == exchange_receiver_map.end(), log, "Duplicate executor_id: {} in DAGRequest", executor_id); + exchange_receiver_map[executor_id] = exchange_receiver; +} + +void MPPReceiverSet::addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader) +{ + coprocessor_readers.push_back(coprocessor_reader); +} + +ExchangeReceiverPtr MPPReceiverSet::getExchangeReceiver(const String & executor_id) const +{ + auto it = exchange_receiver_map.find(executor_id); + if (unlikely(it == exchange_receiver_map.end())) + return nullptr; + return it->second; +} + +void MPPReceiverSet::cancel() +{ + for (auto & it : exchange_receiver_map) + { + it.second->cancel(); + } + for (auto & cop_reader : coprocessor_readers) + cop_reader->cancel(); +} +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPReceiverSet.h b/dbms/src/Flash/Mpp/MPPReceiverSet.h new file mode 100644 index 00000000000..44274cb3ce8 --- /dev/null +++ b/dbms/src/Flash/Mpp/MPPReceiverSet.h @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB +{ +class MPPReceiverSet +{ +public: + explicit MPPReceiverSet(const String & req_id) + : log(Logger::get("MPPReceiverSet", req_id)) + {} + void addExchangeReceiver(const String & executor_id, const ExchangeReceiverPtr & exchange_receiver); + void addCoprocessorReader(const CoprocessorReaderPtr & coprocessor_reader); + ExchangeReceiverPtr getExchangeReceiver(const String & executor_id) const; + void cancel(); + +private: + /// two kinds of receiver in MPP + /// ExchangeReceiver: receiver data from other MPPTask + /// CoprocessorReader: used in remote read + ExchangeReceiverMap exchange_receiver_map; + std::vector coprocessor_readers; + const LoggerPtr log; +}; + +using MPPReceiverSetPtr = std::shared_ptr; + +} // namespace DB diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 0f18ad582b4..7ddc6af361f 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -22,11 +22,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -48,6 +51,7 @@ extern const char exception_before_mpp_register_tunnel_for_root_mpp_task[]; extern const char exception_during_mpp_register_tunnel_for_non_root_mpp_task[]; extern const char exception_during_mpp_write_err_to_tunnel[]; extern const char force_no_local_region_for_mpp_task[]; +extern const char random_task_lifecycle_failpoint[]; } // namespace FailPoints MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) @@ -56,6 +60,7 @@ MPPTask::MPPTask(const mpp::TaskMeta & meta_, const ContextPtr & context_) , id(meta.start_ts(), meta.task_id()) , log(Logger::get("MPPTask", id.toString())) , mpp_task_statistics(id, meta.address()) + , needed_threads(0) , schedule_state(ScheduleState::WAITING) {} @@ -76,36 +81,108 @@ MPPTask::~MPPTask() LOG_FMT_DEBUG(log, "finish MPPTask: {}", id.toString()); } -void MPPTask::closeAllTunnels(const String & reason) +void MPPTask::abortTunnels(const String & message, AbortType abort_type) { - for (auto & it : tunnel_map) + if (abort_type == AbortType::ONCANCELLATION) + { + closeAllTunnels(message); + } + else { - it.second->close(reason); + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->writeError(message); } } -void MPPTask::finishWrite() +void MPPTask::abortReceivers() { - for (const auto & it : tunnel_map) + if (likely(receiver_set != nullptr)) { - it.second->writeDone(); + receiver_set->cancel(); } } +void MPPTask::abortDataStreams(AbortType abort_type) +{ + /// When abort type is ONERROR, it means MPPTask already known it meet error, so let the remaining task stop silently to avoid too many useless error message + bool is_kill = abort_type == AbortType::ONCANCELLATION; + context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, is_kill); +} + +void MPPTask::closeAllTunnels(const String & reason) +{ + if (likely(tunnel_set)) + tunnel_set->close(reason); +} + +void MPPTask::finishWrite() +{ + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + tunnel_set->finishWrite(); +} + void MPPTask::run() { newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); }); } -void MPPTask::registerTunnel(const MPPTaskId & id, MPPTunnelPtr tunnel) +void MPPTask::registerTunnels(const mpp::DispatchTaskRequest & task_request) { - if (status == CANCELLED) - throw Exception("the tunnel " + tunnel->id() + " can not been registered, because the task is cancelled"); + tunnel_set = std::make_shared(log->identifier()); + std::chrono::seconds timeout(task_request.timeout()); + const auto & exchange_sender = dag_req.root_executor().exchange_sender(); - if (tunnel_map.find(id) != tunnel_map.end()) - throw Exception("the tunnel " + tunnel->id() + " has been registered"); + for (int i = 0; i < exchange_sender.encoded_task_meta_size(); ++i) + { + // exchange sender will register the tunnels and wait receiver to found a connection. + mpp::TaskMeta task_meta; + if (unlikely(!task_meta.ParseFromString(exchange_sender.encoded_task_meta(i)))) + throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest); + bool is_local = context->getSettingsRef().enable_local_tunnel && meta.address() == task_meta.address(); + bool is_async = !is_local && context->getSettingsRef().enable_async_server; + MPPTunnelPtr tunnel = std::make_shared(task_meta, task_request.meta(), timeout, context->getSettingsRef().max_threads, is_local, is_async, log->identifier()); + LOG_FMT_DEBUG(log, "begin to register the tunnel {}", tunnel->id()); + if (status != INITIALIZING) + throw Exception(fmt::format("The tunnel {} can not be registered, because the task is not in initializing state", tunnel->id())); + tunnel_set->registerTunnel(MPPTaskId{task_meta.start_ts(), task_meta.task_id()}, tunnel); + if (!dag_context->isRootMPPTask()) + { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task); + } + } +} - tunnel_map[id] = tunnel; +void MPPTask::initExchangeReceivers() +{ + receiver_set = std::make_shared(log->identifier()); + traverseExecutors(&dag_req, [&](const tipb::Executor & executor) { + if (executor.tp() == tipb::ExecType::TypeExchangeReceiver) + { + assert(executor.has_executor_id()); + const auto & executor_id = executor.executor_id(); + // In order to distinguish different exchange receivers. + auto exchange_receiver = std::make_shared( + std::make_shared( + executor.exchange_receiver(), + dag_context->getMPPTaskMeta(), + context->getTMTContext().getKVCluster(), + context->getTMTContext().getMPPTaskManager(), + context->getSettingsRef().enable_local_tunnel, + context->getSettingsRef().enable_async_grpc_client), + executor.exchange_receiver().encoded_task_meta_size(), + context->getMaxStreams(), + log->identifier(), + executor_id, + executor.fine_grained_shuffle_stream_count()); + if (status != RUNNING) + throw Exception("exchange receiver map can not be initialized, because the task is not in running state"); + + receiver_set->addExchangeReceiver(executor_id, exchange_receiver); + new_thread_count_of_exchange_receiver += exchange_receiver->computeNewThreadCount(); + } + return true; + }); + dag_context->setMPPReceiverSet(receiver_set); } std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConnectionRequest * request) @@ -120,8 +197,9 @@ std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConn } MPPTaskId receiver_id{request->receiver_meta().start_ts(), request->receiver_meta().task_id()}; - auto it = tunnel_map.find(receiver_id); - if (it == tunnel_map.end()) + RUNTIME_ASSERT(tunnel_set != nullptr, log, "mpp task without tunnel set"); + auto tunnel_ptr = tunnel_set->getTunnelByReceiverTaskId(receiver_id); + if (tunnel_ptr == nullptr) { auto err_msg = fmt::format( "can't find tunnel ({} + {})", @@ -129,7 +207,7 @@ std::pair MPPTask::getTunnel(const ::mpp::EstablishMPPConn request->receiver_meta().task_id()); return {nullptr, err_msg}; } - return {it->second, ""}; + return {tunnel_ptr, ""}; } void MPPTask::unregisterTask() @@ -211,26 +289,8 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) } // register tunnels - tunnel_set = std::make_shared(); - std::chrono::seconds timeout(task_request.timeout()); + registerTunnels(task_request); - for (int i = 0; i < exchange_sender.encoded_task_meta_size(); i++) - { - // exchange sender will register the tunnels and wait receiver to found a connection. - mpp::TaskMeta task_meta; - if (!task_meta.ParseFromString(exchange_sender.encoded_task_meta(i))) - throw TiFlashException("Failed to decode task meta info in ExchangeSender", Errors::Coprocessor::BadRequest); - bool is_local = context->getSettingsRef().enable_local_tunnel && meta.address() == task_meta.address(); - bool is_async = !is_local && context->getSettingsRef().enable_async_server; - MPPTunnelPtr tunnel = std::make_shared(task_meta, task_request.meta(), timeout, context->getSettingsRef().max_threads, is_local, is_async, log->identifier()); - LOG_FMT_DEBUG(log, "begin to register the tunnel {}", tunnel->id()); - registerTunnel(MPPTaskId{task_meta.start_ts(), task_meta.task_id()}, tunnel); - tunnel_set->addTunnel(tunnel); - if (!dag_context->isRootMPPTask()) - { - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task); - } - } dag_context->tunnel_set = tunnel_set; // register task. auto task_manager = tmt_context.getMPPTaskManager(); @@ -256,6 +316,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) void MPPTask::preprocess() { auto start_time = Clock::now(); + initExchangeReceivers(); DAGQuerySource dag(*context); executeQuery(dag, *context, false, QueryProcessingStage::Complete); auto end_time = Clock::now(); @@ -285,7 +346,7 @@ void MPPTask::runImpl() LOG_FMT_INFO(log, "task starts preprocessing"); preprocess(); needed_threads = estimateCountOfNewThreads(); - LOG_FMT_DEBUG(log, "Estimate new thread count of query :{} including tunnel_threads: {} , receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), dag_context->getNewThreadCountOfExchangeReceiver()); + LOG_FMT_DEBUG(log, "Estimate new thread count of query :{} including tunnel_threads: {} , receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), new_thread_count_of_exchange_receiver); scheduleOrWait(); @@ -317,104 +378,124 @@ void MPPTask::runImpl() return_statistics.blocks, return_statistics.bytes); } - catch (Exception & e) - { - err_msg = e.displayText(); - LOG_FMT_ERROR(log, "task running meets error: {} Stack Trace : {}", err_msg, e.getStackTrace().toString()); - } - catch (pingcap::Exception & e) - { - err_msg = e.message(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } - catch (std::exception & e) - { - err_msg = e.what(); - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); - } catch (...) { - err_msg = "unrecovered error"; - LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + err_msg = getCurrentExceptionMessage(true, true); } + if (err_msg.empty()) { - // todo when error happens, should try to update the metrics if it is available - auto throughput = dag_context->getTableScanThroughput(); - if (throughput.first) - GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); - auto process_info = context->getProcessListElement()->getInfo(); - auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; - GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); - mpp_task_statistics.setMemoryPeak(peak_memory); + if (switchStatus(RUNNING, FINISHED)) + LOG_INFO(log, "finish task"); + else + LOG_FMT_WARNING(log, "finish task which is in {} state", taskStatusToString(status)); + if (status == FINISHED) + { + // todo when error happens, should try to update the metrics if it is available + auto throughput = dag_context->getTableScanThroughput(); + if (throughput.first) + GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); + auto process_info = context->getProcessListElement()->getInfo(); + auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; + GET_METRIC(tiflash_coprocessor_request_memory_usage, type_run_mpp_task).Observe(peak_memory); + mpp_task_statistics.setMemoryPeak(peak_memory); + } } else { - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - if (dag_context) - dag_context->cancelAllExchangeReceiver(); - writeErrToAllTunnels(err_msg); + if (status == RUNNING) + { + LOG_FMT_ERROR(log, "task running meets error: {}", err_msg); + /// trim the stack trace to avoid too many useless information in log + trimStackTrace(err_msg); + try + { + handleError(err_msg); + } + catch (...) + { + tryLogCurrentException(log, "Meet error while try to handle error in MPPTask"); + } + } } LOG_FMT_INFO(log, "task ends, time cost is {} ms.", stopwatch.elapsedMilliseconds()); - unregisterTask(); - - if (switchStatus(RUNNING, FINISHED)) - LOG_INFO(log, "finish task"); - else - LOG_WARNING(log, "finish task which was cancelled before"); + // unregister flag is only for FailPoint usage, to produce the situation that MPPTask is destructed + // by grpc CancelMPPTask thread; + bool unregister = true; + fiu_do_on(FailPoints::random_task_lifecycle_failpoint, { + if (!err_msg.empty()) + unregister = false; + }); + if (unregister) + unregisterTask(); - mpp_task_statistics.end(status.load(), err_msg); + mpp_task_statistics.end(status.load(), err_string); mpp_task_statistics.logTracingJson(); } -void MPPTask::writeErrToAllTunnels(const String & e) +void MPPTask::handleError(const String & error_msg) { - for (auto & it : tunnel_map) - { - try - { - FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_write_err_to_tunnel); - it.second->write(getPacketWithError(e), true); - } - catch (...) - { - it.second->close("Failed to write error msg to tunnel"); - tryLogCurrentException(log, "Failed to write error " + e + " to tunnel: " + it.second->id()); - } - } + if (manager == nullptr || !manager->isTaskToBeCancelled(id)) + abort(error_msg, AbortType::ONERROR); } -void MPPTask::cancel(const String & reason) +void MPPTask::abort(const String & message, AbortType abort_type) { - CPUAffinityManager::getInstance().bindSelfQueryThread(); - LOG_FMT_WARNING(log, "Begin cancel task: {}", id.toString()); + String abort_type_string; + TaskStatus next_task_status; + switch (abort_type) + { + case AbortType::ONCANCELLATION: + abort_type_string = "ONCANCELLATION"; + next_task_status = CANCELLED; + break; + case AbortType::ONERROR: + abort_type_string = "ONERROR"; + next_task_status = FAILED; + break; + } + LOG_FMT_WARNING(log, "Begin abort task: {}, abort type: {}", id.toString(), abort_type_string); while (true) { auto previous_status = status.load(); - if (previous_status == FINISHED || previous_status == CANCELLED) + if (previous_status == FINISHED || previous_status == CANCELLED || previous_status == FAILED) { - LOG_FMT_WARNING(log, "task already {}", (previous_status == FINISHED ? "finished" : "cancelled")); + LOG_FMT_WARNING(log, "task already in {} state", taskStatusToString(previous_status)); return; } - else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, CANCELLED)) + else if (previous_status == INITIALIZING && switchStatus(INITIALIZING, next_task_status)) { - closeAllTunnels(reason); + err_string = message; + /// if the task is in initializing state, mpp task can return error to TiDB directly, + /// so just close all tunnels here + closeAllTunnels(message); unregisterTask(); - LOG_WARNING(log, "Finish cancel task from uninitialized"); + LOG_WARNING(log, "Finish abort task from uninitialized"); return; } - else if (previous_status == RUNNING && switchStatus(RUNNING, CANCELLED)) + else if (previous_status == RUNNING && switchStatus(RUNNING, next_task_status)) { + /// abort the components from top to bottom because if bottom components are aborted + /// first, the top components may see an error caused by the abort, which is not + /// the original error + err_string = message; + abortTunnels(message, abort_type); + abortDataStreams(abort_type); + abortReceivers(); scheduleThisTask(ScheduleState::FAILED); - context->getProcessList().sendCancelToQuery(context->getCurrentQueryId(), context->getClientInfo().current_user, true); - closeAllTunnels(reason); /// runImpl is running, leave remaining work to runImpl - LOG_WARNING(log, "Finish cancel task from running"); + LOG_WARNING(log, "Finish abort task from running"); return; } } } +void MPPTask::cancel(const String & reason) +{ + CPUAffinityManager::getInstance().bindSelfQueryThread(); + abort(reason, AbortType::ONCANCELLATION); +} + bool MPPTask::switchStatus(TaskStatus from, TaskStatus to) { return status.compare_exchange_strong(from, to); diff --git a/dbms/src/Flash/Mpp/MPPTask.h b/dbms/src/Flash/Mpp/MPPTask.h index c34cae49699..a30150b26e8 100644 --- a/dbms/src/Flash/Mpp/MPPTask.h +++ b/dbms/src/Flash/Mpp/MPPTask.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -58,12 +59,12 @@ class MPPTask : public std::enable_shared_from_this void cancel(const String & reason); + void handleError(const String & error_msg); + void prepare(const mpp::DispatchTaskRequest & task_request); void run(); - void registerTunnel(const MPPTaskId & id, MPPTunnelPtr tunnel); - int getNeededThreads(); enum class ScheduleState @@ -91,12 +92,22 @@ class MPPTask : public std::enable_shared_from_this void unregisterTask(); - void writeErrToAllTunnels(const String & e); - /// Similar to `writeErrToAllTunnels`, but it just try to write the error message to tunnel /// without waiting the tunnel to be connected void closeAllTunnels(const String & reason); + enum class AbortType + { + /// todo add ONKILL to distinguish between silent cancellation and kill + ONCANCELLATION, + ONERROR, + }; + void abort(const String & message, AbortType abort_type); + + void abortTunnels(const String & message, AbortType abort_type); + void abortReceivers(); + void abortDataStreams(AbortType abort_type); + void finishWrite(); bool switchStatus(TaskStatus from, TaskStatus to); @@ -107,6 +118,10 @@ class MPPTask : public std::enable_shared_from_this int estimateCountOfNewThreads(); + void registerTunnels(const mpp::DispatchTaskRequest & task_request); + + void initExchangeReceivers(); + tipb::DAGRequest dag_req; ContextPtr context; @@ -116,6 +131,7 @@ class MPPTask : public std::enable_shared_from_this MemoryTracker * memory_tracker = nullptr; std::atomic status{INITIALIZING}; + String err_string; mpp::TaskMeta meta; @@ -123,8 +139,9 @@ class MPPTask : public std::enable_shared_from_this MPPTunnelSetPtr tunnel_set; - // which targeted task we should send data by which tunnel. - std::unordered_map tunnel_map; + MPPReceiverSetPtr receiver_set; + + int new_thread_count_of_exchange_receiver = 0; MPPTaskManager * manager = nullptr; @@ -132,8 +149,6 @@ class MPPTask : public std::enable_shared_from_this MPPTaskStatistics mpp_task_statistics; - Exception err; - friend class MPPTaskManager; int needed_threads; diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.cpp b/dbms/src/Flash/Mpp/MPPTaskManager.cpp index 531f8f7a10d..c5499eda89d 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.cpp +++ b/dbms/src/Flash/Mpp/MPPTaskManager.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -22,6 +23,11 @@ namespace DB { +namespace FailPoints +{ +extern const char random_task_manager_find_task_failure_failpoint[]; +} // namespace FailPoints + MPPTaskManager::MPPTaskManager(MPPTaskSchedulerPtr scheduler_) : scheduler(std::move(scheduler_)) , log(&Poco::Logger::get("TaskManager")) @@ -50,6 +56,7 @@ MPPTaskPtr MPPTaskManager::findTaskWithTimeout(const mpp::TaskMeta & meta, std:: it = query_it->second->task_map.find(id); return it != query_it->second->task_map.end(); }); + fiu_do_on(FailPoints::random_task_manager_find_task_failure_failpoint, ret = false;); if (cancelled) { errMsg = fmt::format("Task [{},{}] has been cancelled.", meta.start_ts(), meta.task_id()); @@ -140,6 +147,17 @@ bool MPPTaskManager::registerTask(MPPTaskPtr task) return true; } +bool MPPTaskManager::isTaskToBeCancelled(const MPPTaskId & task_id) +{ + std::unique_lock lock(mu); + auto it = mpp_query_map.find(task_id.start_ts); + if (it != mpp_query_map.end() && it->second->to_be_cancelled) + { + return it->second->task_map.find(task_id) != it->second->task_map.end(); + } + return false; +} + void MPPTaskManager::unregisterTask(MPPTask * task) { std::unique_lock lock(mu); diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.h b/dbms/src/Flash/Mpp/MPPTaskManager.h index d7047804aca..770acea3853 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.h +++ b/dbms/src/Flash/Mpp/MPPTaskManager.h @@ -73,6 +73,8 @@ class MPPTaskManager : private boost::noncopyable void unregisterTask(MPPTask * task); + bool isTaskToBeCancelled(const MPPTaskId & task_id); + bool tryToScheduleTask(const MPPTaskPtr & task); void releaseThreadsFromScheduler(const int needed_threads); diff --git a/dbms/src/Flash/Mpp/MPPTunnel.cpp b/dbms/src/Flash/Mpp/MPPTunnel.cpp index 826e7fea88a..16fe4ae42cc 100644 --- a/dbms/src/Flash/Mpp/MPPTunnel.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnel.cpp @@ -25,6 +25,7 @@ namespace DB namespace FailPoints { extern const char exception_during_mpp_close_tunnel[]; +extern const char random_tunnel_wait_timeout_failpoint[]; } // namespace FailPoints template @@ -219,7 +220,11 @@ void MPPTunnelBase::sendJob(bool need_lock) err_msg = "fatal error in sendJob()"; } if (!err_msg.empty()) + { + /// append tunnel id to error message + err_msg = fmt::format("{} meet error: {}", tunnel_id, err_msg); LOG_ERROR(log, err_msg); + } consumerFinish(err_msg, need_lock); if (is_async) writer->writeDone(grpc::Status::OK); @@ -322,6 +327,7 @@ void MPPTunnelBase::waitUntilConnectedOrFinished(std::unique_lock +#include #include +#include #include namespace DB { +namespace FailPoints +{ +extern const char exception_during_mpp_write_err_to_tunnel[]; +} // namespace FailPoints namespace { inline mpp::MPPDataPacket serializeToPacket(const tipb::SelectResponse & response) @@ -108,6 +114,65 @@ void MPPTunnelSetBase::write(mpp::MPPDataPacket & packet, int16_t partit tunnels[partition_id]->write(packet); } +template +void MPPTunnelSetBase::writeError(const String & msg) +{ + for (auto & tunnel : tunnels) + { + try + { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::exception_during_mpp_write_err_to_tunnel); + tunnel->write(getPacketWithError(msg), true); + } + catch (...) + { + tunnel->close("Failed to write error msg to tunnel"); + tryLogCurrentException(log, "Failed to write error " + msg + " to tunnel: " + tunnel->id()); + } + } +} + +template +void MPPTunnelSetBase::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel) +{ + if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end()) + throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id())); + + receiver_task_id_to_index_map[receiver_task_id] = tunnels.size(); + tunnels.push_back(tunnel); + if (!tunnel->isLocal()) + { + remote_tunnel_cnt++; + } +} + +template +void MPPTunnelSetBase::close(const String & reason) +{ + for (auto & tunnel : tunnels) + tunnel->close(reason); +} + +template +void MPPTunnelSetBase::finishWrite() +{ + for (auto & tunnel : tunnels) + { + tunnel->writeDone(); + } +} + +template +typename MPPTunnelSetBase::TunnelPtr MPPTunnelSetBase::getTunnelByReceiverTaskId(const MPPTaskId & id) +{ + auto it = receiver_task_id_to_index_map.find(id); + if (it == receiver_task_id_to_index_map.end()) + { + return nullptr; + } + return tunnels[it->second]; +} + /// Explicit template instantiations - to avoid code bloat in headers. template class MPPTunnelSetBase; diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.h b/dbms/src/Flash/Mpp/MPPTunnelSet.h index f2279b945cb..e4123db1be5 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.h +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.h @@ -14,6 +14,7 @@ #pragma once +#include #include #ifdef __clang__ #pragma clang diagnostic push @@ -32,6 +33,9 @@ class MPPTunnelSetBase : private boost::noncopyable { public: using TunnelPtr = std::shared_ptr; + explicit MPPTunnelSetBase(const String & req_id) + : log(Logger::get("MPPTunnelSet", req_id)) + {} void clearExecutionSummaries(tipb::SelectResponse & response); @@ -50,17 +54,14 @@ class MPPTunnelSetBase : private boost::noncopyable // this is a partition writing. void write(tipb::SelectResponse & response, int16_t partition_id); void write(mpp::MPPDataPacket & packet, int16_t partition_id); + void writeError(const String & msg); + void close(const String & reason); + void finishWrite(); + void registerTunnel(const MPPTaskId & id, const TunnelPtr & tunnel); - uint16_t getPartitionNum() const { return tunnels.size(); } + TunnelPtr getTunnelByReceiverTaskId(const MPPTaskId & id); - void addTunnel(const TunnelPtr & tunnel) - { - tunnels.push_back(tunnel); - if (!tunnel->isLocal()) - { - remote_tunnel_cnt++; - } - } + uint16_t getPartitionNum() const { return tunnels.size(); } int getRemoteTunnelCnt() { @@ -71,6 +72,8 @@ class MPPTunnelSetBase : private boost::noncopyable private: std::vector tunnels; + std::unordered_map receiver_task_id_to_index_map; + const LoggerPtr log; int remote_tunnel_cnt = 0; }; diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp index af525bd1a55..967bfcecfa3 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp @@ -12,12 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include namespace DB { +namespace FailPoints +{ +extern const char random_min_tso_scheduler_failpoint[]; +} // namespace FailPoints + constexpr UInt64 MAX_UINT64 = std::numeric_limits::max(); constexpr UInt64 OS_THREAD_SOFT_LIMIT = 100000; @@ -193,7 +199,9 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q } else { - if (tso <= min_tso) /// the min_tso query should fully run, otherwise throw errors here. + bool is_tso_min = tso <= min_tso; + fiu_do_on(FailPoints::random_min_tso_scheduler_failpoint, is_tso_min = true;); + if (is_tso_min) /// the min_tso query should fully run, otherwise throw errors here. { has_error = true; auto msg = fmt::format("threads are unavailable for the query {} ({} min_tso {}) {}, need {}, but used {} of the thread hard limit {}, {} active and {} waiting queries.", tso, tso == min_tso ? "is" : "is newer than", min_tso, isWaiting ? "from the waiting set" : "when directly schedule it", needed_threads, estimated_thread_usage, thread_hard_limit, active_set.size(), waiting_set.size()); diff --git a/dbms/src/Flash/Mpp/TaskStatus.cpp b/dbms/src/Flash/Mpp/TaskStatus.cpp index 423b768faea..c87ae2b8eb4 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.cpp +++ b/dbms/src/Flash/Mpp/TaskStatus.cpp @@ -29,6 +29,8 @@ StringRef taskStatusToString(const TaskStatus & status) return "FINISHED"; case CANCELLED: return "CANCELLED"; + case FAILED: + return "FAILED"; default: throw Exception("Unknown TaskStatus"); } diff --git a/dbms/src/Flash/Mpp/TaskStatus.h b/dbms/src/Flash/Mpp/TaskStatus.h index 999e30790bf..0997c8adc52 100644 --- a/dbms/src/Flash/Mpp/TaskStatus.h +++ b/dbms/src/Flash/Mpp/TaskStatus.h @@ -24,6 +24,7 @@ enum TaskStatus RUNNING, FINISHED, CANCELLED, + FAILED, }; StringRef taskStatusToString(const TaskStatus & status); diff --git a/dbms/src/Flash/Mpp/Utils.cpp b/dbms/src/Flash/Mpp/Utils.cpp index 477c478eef7..21d89b3cd52 100644 --- a/dbms/src/Flash/Mpp/Utils.cpp +++ b/dbms/src/Flash/Mpp/Utils.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include @@ -27,4 +28,14 @@ mpp::MPPDataPacket getPacketWithError(String reason) return data; } +void trimStackTrace(String & message) +{ + auto stack_trace_pos = message.find("Stack trace"); + if (stack_trace_pos != String::npos) + { + message.resize(stack_trace_pos); + Poco::trimRightInPlace(message); + } +} + } // namespace DB diff --git a/dbms/src/Flash/Mpp/Utils.h b/dbms/src/Flash/Mpp/Utils.h index 67e2dc3f641..021dc4407d5 100644 --- a/dbms/src/Flash/Mpp/Utils.h +++ b/dbms/src/Flash/Mpp/Utils.h @@ -23,5 +23,6 @@ namespace DB { mpp::MPPDataPacket getPacketWithError(String reason); +void trimStackTrace(String & message); } // namespace DB diff --git a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp index 47ce2ee6ee6..706c17ed036 100644 --- a/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp +++ b/dbms/src/Flash/Mpp/tests/gtest_mpptunnel.cpp @@ -382,7 +382,7 @@ TEST_F(TestMPPTunnelBase, WriteError) } catch (Exception & e) { - GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, 0000_0001 meet error: grpc writes failed."); } } @@ -631,7 +631,7 @@ TEST_F(TestMPPTunnelBase, AsyncWriteError) } catch (Exception & e) { - GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, grpc writes failed."); + GTEST_ASSERT_EQ(e.message(), "Consumer exits unexpected, 0000_0001 meet error: grpc writes failed."); } } diff --git a/dbms/src/Flash/Planner/Planner.cpp b/dbms/src/Flash/Planner/Planner.cpp index b798123de71..8aae14b9420 100644 --- a/dbms/src/Flash/Planner/Planner.cpp +++ b/dbms/src/Flash/Planner/Planner.cpp @@ -87,9 +87,15 @@ BlockInputStreams Planner::execute() bool Planner::isSupported(const DAGQueryBlock & query_block) { + /// todo support fine grained shuffle + static auto disable_fine_frained_shuffle = [](const DAGQueryBlock & query_block) { + return !enableFineGrainedShuffle(query_block.source->fine_grained_shuffle_stream_count()) + && (!query_block.exchange_sender || !enableFineGrainedShuffle(query_block.exchange_sender->fine_grained_shuffle_stream_count())); + }; return query_block.source && (query_block.source->tp() == tipb::ExecType::TypeProjection - || query_block.source->tp() == tipb::ExecType::TypeExchangeReceiver); + || query_block.source->tp() == tipb::ExecType::TypeExchangeReceiver) + && disable_fine_frained_shuffle(query_block); } DAGContext & Planner::dagContext() const diff --git a/dbms/src/Flash/Planner/plans/PhysicalAggregation.cpp b/dbms/src/Flash/Planner/plans/PhysicalAggregation.cpp index 45e4586dd18..26a6fa574f2 100644 --- a/dbms/src/Flash/Planner/plans/PhysicalAggregation.cpp +++ b/dbms/src/Flash/Planner/plans/PhysicalAggregation.cpp @@ -106,34 +106,39 @@ void PhysicalAggregation::transformImpl(DAGPipeline & pipeline, Context & contex is_final_agg); /// If there are several sources, then we perform parallel aggregation - if (pipeline.streams.size() > 1) + if (pipeline.streams.size() > 1 || pipeline.streams_with_non_joined_data.size() > 1) { const Settings & settings = context.getSettingsRef(); - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); - pipeline.firstStream() = std::make_shared( + BlockInputStreamPtr stream = std::make_shared( pipeline.streams, - stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, params, context.getFileProvider(), true, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads), log->identifier()); + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + // should record for agg before restore concurrency. See #3804. recordProfileStreams(pipeline, context); restoreConcurrency(pipeline, context.getDAGContext()->final_concurrency, log); } else { - BlockInputStreamPtr stream_with_non_joined_data = combinedNonJoinedDataStream(pipeline, max_streams, log); BlockInputStreams inputs; if (!pipeline.streams.empty()) inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - if (stream_with_non_joined_data) - inputs.push_back(stream_with_non_joined_data); + + if (!pipeline.streams_with_non_joined_data.empty()) + inputs.push_back(pipeline.streams_with_non_joined_data.at(0)); + + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::make_shared( std::make_shared(inputs, log->identifier()), params, diff --git a/dbms/src/Flash/Planner/plans/PhysicalExchangeReceiver.cpp b/dbms/src/Flash/Planner/plans/PhysicalExchangeReceiver.cpp index ee40e42e1aa..ca87a85ab17 100644 --- a/dbms/src/Flash/Planner/plans/PhysicalExchangeReceiver.cpp +++ b/dbms/src/Flash/Planner/plans/PhysicalExchangeReceiver.cpp @@ -43,15 +43,14 @@ PhysicalPlanNodePtr PhysicalExchangeReceiver::build( const String & executor_id, const LoggerPtr & log) { - const auto & mpp_exchange_receiver_map = context.getDAGContext()->getMPPExchangeReceiverMap(); - - auto it = mpp_exchange_receiver_map.find(executor_id); - if (unlikely(it == mpp_exchange_receiver_map.end())) + auto mpp_exchange_receiver = context.getDAGContext()->getMPPExchangeReceiver(executor_id); + if (unlikely(mpp_exchange_receiver == nullptr)) throw TiFlashException( fmt::format("Can not find exchange receiver for {}", executor_id), Errors::Planner::Internal); + /// todo support fine grained shuffle + assert(!enableFineGrainedShuffle(mpp_exchange_receiver->getFineGrainedShuffleStreamCount())); - const auto & mpp_exchange_receiver = it->second; NamesAndTypes schema = toNamesAndTypes(mpp_exchange_receiver->getOutputSchema()); auto physical_exchange_receiver = std::make_shared( executor_id, @@ -69,7 +68,7 @@ void PhysicalExchangeReceiver::transformImpl(DAGPipeline & pipeline, Context & c auto & exchange_receiver_io_input_streams = dag_context.getInBoundIOInputStreamsMap()[executor_id]; for (size_t i = 0; i < max_streams; ++i) { - BlockInputStreamPtr stream = std::make_shared(mpp_exchange_receiver, log->identifier(), executor_id); + BlockInputStreamPtr stream = std::make_shared(mpp_exchange_receiver, log->identifier(), executor_id, /*stream_id=*/0); exchange_receiver_io_input_streams.push_back(stream); stream = std::make_shared(stream, 8192, 0, log->identifier()); stream->setExtraInfo("squashing after exchange receiver"); diff --git a/dbms/src/Flash/Planner/plans/PhysicalExchangeSender.cpp b/dbms/src/Flash/Planner/plans/PhysicalExchangeSender.cpp index 373b04a3941..1f99656506e 100644 --- a/dbms/src/Flash/Planner/plans/PhysicalExchangeSender.cpp +++ b/dbms/src/Flash/Planner/plans/PhysicalExchangeSender.cpp @@ -55,10 +55,11 @@ void PhysicalExchangeSender::transformImpl(DAGPipeline & pipeline, Context & con RUNTIME_ASSERT(dag_context.isMPPTask() && dag_context.tunnel_set != nullptr, log, "exchange_sender only run in MPP"); + /// todo support fine grained shuffle int stream_id = 0; pipeline.transform([&](auto & stream) { // construct writer - std::unique_ptr response_writer = std::make_unique>( + std::unique_ptr response_writer = std::make_unique>( dag_context.tunnel_set, partition_col_ids, partition_col_collators, @@ -66,7 +67,9 @@ void PhysicalExchangeSender::transformImpl(DAGPipeline & pipeline, Context & con context.getSettingsRef().dag_records_per_chunk, context.getSettingsRef().batch_send_min_limit, stream_id++ == 0, /// only one stream needs to sending execution summaries for the last response - dag_context); + dag_context, + 0, + 0); stream = std::make_shared(stream, std::move(response_writer), log->identifier()); }); } diff --git a/dbms/src/Flash/Planner/plans/PhysicalTopN.cpp b/dbms/src/Flash/Planner/plans/PhysicalTopN.cpp index d572435d645..a78dd350264 100644 --- a/dbms/src/Flash/Planner/plans/PhysicalTopN.cpp +++ b/dbms/src/Flash/Planner/plans/PhysicalTopN.cpp @@ -62,7 +62,7 @@ void PhysicalTopN::transformImpl(DAGPipeline & pipeline, Context & context, size executeExpression(pipeline, before_sort_actions, log, "before TopN"); - orderStreams(pipeline, max_streams, order_descr, limit, context, log); + orderStreams(pipeline, max_streams, order_descr, limit, false, context, log); } void PhysicalTopN::finalize(const Names & parent_require) diff --git a/dbms/src/Flash/Planner/tests/gtest_physical_plan.cpp b/dbms/src/Flash/Planner/tests/gtest_physical_plan.cpp index 7ad06fbeb04..45414b717df 100644 --- a/dbms/src/Flash/Planner/tests/gtest_physical_plan.cpp +++ b/dbms/src/Flash/Planner/tests/gtest_physical_plan.cpp @@ -69,7 +69,7 @@ class PhysicalPlanTestRunner : public DB::tests::ExecutorTest ASSERT_EQ(Poco::trim(expected_streams), Poco::trim(fb.toString())); } - readAndAssertBlock(final_stream, expect_columns); + ASSERT_COLUMNS_EQ_R(readBlock(final_stream), expect_columns); } LoggerPtr log = Logger::get("PhysicalPlanTestRunner", "test_physical_plan"); diff --git a/dbms/src/Flash/tests/CMakeLists.txt b/dbms/src/Flash/tests/CMakeLists.txt index a34e4b23432..944908dcb25 100644 --- a/dbms/src/Flash/tests/CMakeLists.txt +++ b/dbms/src/Flash/tests/CMakeLists.txt @@ -13,14 +13,3 @@ # limitations under the License. include_directories (${CMAKE_CURRENT_BINARY_DIR}) - -add_executable (exchange_perftest - exchange_perftest.cpp - ${TiFlash_SOURCE_DIR}/dbms/src/Server/StorageConfigParser.cpp - ${TiFlash_SOURCE_DIR}/dbms/src/Functions/FunctionsConversion.cpp) -target_link_libraries (exchange_perftest - gtest_main - dbms - clickhouse_functions - clickhouse_aggregate_functions - tiflash-dttool-lib) diff --git a/dbms/src/Flash/tests/WindowTestUtil.h b/dbms/src/Flash/tests/WindowTestUtil.h new file mode 100644 index 00000000000..b7385380419 --- /dev/null +++ b/dbms/src/Flash/tests/WindowTestUtil.h @@ -0,0 +1,67 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB +{ +namespace tests +{ + +inline std::shared_ptr mockInterpreter(Context & context, const std::vector & source_columns, int concurrency) +{ + std::vector mock_input_streams_vec = {}; + DAGQueryBlock mock_query_block(0, static_cast>(nullptr)); + std::vector mock_subqueries_for_sets = {}; + std::shared_ptr mock_interpreter = std::make_shared(context, + mock_input_streams_vec, + mock_query_block, + concurrency); + mock_interpreter->analyzer = std::make_unique(std::move(source_columns), context); + return mock_interpreter; +} + +inline void mockExecuteProject(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, NamesWithAliases & final_project) +{ + mock_interpreter->executeProject(pipeline, final_project); +} + +inline void mockExecuteWindowOrder(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Sort & sort, uint64_t fine_grained_shuffle_stream_count) +{ + mock_interpreter->handleWindowOrder(pipeline, sort, ::DB::enableFineGrainedShuffle(fine_grained_shuffle_stream_count)); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(mock_interpreter, pipeline, final_project); +} + +inline void mockExecuteWindow(std::shared_ptr & mock_interpreter, DAGPipeline & pipeline, const tipb::Window & window, uint64_t fine_grained_shuffle_stream_count) +{ + mock_interpreter->handleWindow(pipeline, window, ::DB::enableFineGrainedShuffle(fine_grained_shuffle_stream_count)); + mock_interpreter->input_streams_vec[0] = pipeline.streams; + NamesWithAliases final_project; + for (const auto & column : (*mock_interpreter->analyzer).source_columns) + { + final_project.push_back({column.name, ""}); + } + mockExecuteProject(mock_interpreter, pipeline, final_project); +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.cpp b/dbms/src/Flash/tests/bench_exchange.cpp new file mode 100644 index 00000000000..d6e3f3e825e --- /dev/null +++ b/dbms/src/Flash/tests/bench_exchange.cpp @@ -0,0 +1,466 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include // to include the implementation of StreamingDAGResponseWriter +#include // to include the implementation of ExchangeReceiver +#include // to include the implementation of MPPTunnel +#include // to include the implementation of MPPTunnelSet +#include +#include + + +namespace DB +{ +namespace tests +{ + +std::random_device rd; + +MockBlockInputStream::MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_) + : blocks(blocks_) + , header(blocks[0].cloneEmpty()) + , mt(rd()) + , dist(0, blocks.size() - 1) + , stop_flag(stop_flag_) +{} + +MockFixedRowsBlockInputStream::MockFixedRowsBlockInputStream(size_t total_rows_, const std::vector & blocks_) + : header(blocks_[0].cloneEmpty()) + , mt(rd()) + , dist(0, blocks_.size() - 1) + , current_rows(0) + , total_rows(total_rows_) + , blocks(blocks_) +{} + +Block makeBlock(int row_num, bool skew) +{ + InferredDataVector> int64_vec; + InferredDataVector> int64_vec2; + InferredDataVector> string_vec; + + if (skew) + { + for (int i = 0; i < row_num; ++i) + { + int64_vec.emplace_back(100); + int64_vec2.emplace_back(100); + } + + for (int i = 0; i < row_num; ++i) + { + string_vec.push_back("abcdefg"); + } + } + else + { + std::mt19937 mt(rd()); + std::uniform_int_distribution int64_dist; + std::uniform_int_distribution len_dist(10, 20); + std::uniform_int_distribution char_dist; + + for (int i = 0; i < row_num; ++i) + { + int64_vec.emplace_back(int64_dist(mt)); + int64_vec2.emplace_back(int64_dist(mt)); + } + + for (int i = 0; i < row_num; ++i) + { + int len = len_dist(mt); + String s; + for (int j = 0; j < len; ++j) + s.push_back(char_dist(mt)); + string_vec.push_back(std::move(s)); + } + } + + auto int64_data_type = makeDataType>(); + ColumnWithTypeAndName int64_column(makeColumn>(int64_data_type, int64_vec), int64_data_type, "int64_1"); + ColumnWithTypeAndName int64_column2(makeColumn>(int64_data_type, int64_vec2), int64_data_type, "int64_2"); + + auto string_data_type = makeDataType>(); + ColumnWithTypeAndName string_column(makeColumn>(string_data_type, string_vec), string_data_type, "string"); + + return Block({int64_column, string_column, int64_column2}); +} + +std::vector makeBlocks(int block_num, int row_num, bool skew) +{ + std::vector blocks; + for (int i = 0; i < block_num; ++i) + blocks.push_back(makeBlock(row_num, skew)); + return blocks; +} + +mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num) +{ + auto block = makeBlock(row_num); + codec.encode(block, 0, row_num); + + mpp::MPPDataPacket packet; + packet.add_chunks(codec.getString()); + codec.clear(); + + return packet; +} + +std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num) +{ + std::vector packets; + for (int i = 0; i < packet_num; ++i) + packets.push_back(std::make_shared(makePacket(codec, row_num))); + return packets; +} + +std::vector makePacketQueues(int source_num, int queue_size) +{ + std::vector queues(source_num); + for (int i = 0; i < source_num; ++i) + queues[i] = std::make_shared(queue_size); + return queues; +} + +std::vector makeFields() +{ + std::vector fields(3); + fields[0].set_tp(TiDB::TypeLongLong); + fields[1].set_tp(TiDB::TypeString); + fields[2].set_tp(TiDB::TypeLongLong); + return fields; +} + +void printException(const Exception & e) +{ + std::string text = e.displayText(); + + auto embedded_stack_trace_pos = text.find("Stack trace"); + std::cerr << "Code: " << e.code() << ". " << text << std::endl + << std::endl; + if (std::string::npos == embedded_stack_trace_pos) + std::cerr << "Stack trace:" << std::endl + << e.getStackTrace().toString() << std::endl; +} + +ReceiverHelper::ReceiverHelper(int concurrency_, int source_num_, uint32_t fine_grained_shuffle_stream_count_) + : concurrency(concurrency_) + , source_num(source_num_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) +{ + pb_exchange_receiver.set_tp(tipb::Hash); + for (int i = 0; i < source_num; ++i) + { + mpp::TaskMeta task; + task.set_start_ts(0); + task.set_task_id(i); + task.set_partition_id(i); + task.set_address(""); + + String encoded_task; + task.SerializeToString(&encoded_task); + + pb_exchange_receiver.add_encoded_task_meta(encoded_task); + } + + fields = makeFields(); + *pb_exchange_receiver.add_field_types() = fields[0]; + *pb_exchange_receiver.add_field_types() = fields[1]; + *pb_exchange_receiver.add_field_types() = fields[2]; + + task_meta.set_task_id(100); + + queues = makePacketQueues(source_num, 10); +} + +MockExchangeReceiverPtr ReceiverHelper::buildReceiver() +{ + return std::make_shared( + std::make_shared(queues, fields), + source_num, + concurrency, + "mock_req_id", + "mock_exchange_receiver_id", + fine_grained_shuffle_stream_count); +} + +std::vector ReceiverHelper::buildExchangeReceiverStream() +{ + auto receiver = buildReceiver(); + std::vector streams(concurrency); + // NOTE: check if need fine_grained_shuffle_stream_count + for (int i = 0; i < concurrency; ++i) + { + streams[i] = std::make_shared(receiver, + "mock_req_id", + "mock_executor_id" + std::to_string(i), + /*stream_id=*/enableFineGrainedShuffle(fine_grained_shuffle_stream_count) ? i : 0); + } + return streams; +} + +BlockInputStreamPtr ReceiverHelper::buildUnionStream() +{ + auto streams = buildExchangeReceiverStream(); + return std::make_shared>(streams, BlockInputStreams{}, concurrency, /*req_id=*/""); +} + +void ReceiverHelper::finish() +{ + if (join_ptr) + { + join_ptr->setBuildTableState(Join::BuildTableState::SUCCEED); + std::cout << fmt::format("Hash table size: {} bytes", join_ptr->getTotalByteCount()) << std::endl; + } +} + +SenderHelper::SenderHelper( + int source_num_, + int concurrency_, + uint32_t fine_grained_shuffle_stream_count_, + int64_t fine_grained_shuffle_batch_size_, + const std::vector & queues_, + const std::vector & fields) + : source_num(source_num_) + , concurrency(concurrency_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + , fine_grained_shuffle_batch_size(fine_grained_shuffle_batch_size_) + , queues(queues_) +{ + mpp::TaskMeta task_meta; + tunnel_set = std::make_shared("mock_req_id"); + for (int i = 0; i < source_num; ++i) + { + auto writer = std::make_shared(queues[i]); + mock_writers.push_back(writer); + + auto tunnel = std::make_shared( + task_meta, + task_meta, + std::chrono::seconds(60), + concurrency, + false, + false, + "mock_req_id"); + tunnel->connect(writer.get()); + tunnels.push_back(tunnel); + MPPTaskId id(0, i); + tunnel_set->registerTunnel(id, tunnel); + } + + tipb::DAGRequest dag_request; + tipb::Executor root_executor; + root_executor.set_executor_id("ExchangeSender_100"); + *dag_request.mutable_root_executor() = root_executor; + + dag_context = std::make_unique(dag_request); + dag_context->is_mpp_task = true; + dag_context->is_root_mpp_task = false; + dag_context->encode_type = tipb::EncodeType::TypeCHBlock; + dag_context->result_field_types = fields; +} + +BlockInputStreamPtr SenderHelper::buildUnionStream( + StopFlag & stop_flag, + const std::vector & blocks) +{ + std::vector send_streams; + for (int i = 0; i < concurrency; ++i) + { + BlockInputStreamPtr stream = std::make_shared(blocks, stop_flag); + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + else + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + } + + return std::make_shared>(send_streams, BlockInputStreams{}, concurrency, /*req_id=*/""); +} + +BlockInputStreamPtr SenderHelper::buildUnionStream(size_t total_rows, const std::vector & blocks) +{ + std::vector send_streams; + for (int i = 0; i < concurrency; ++i) + { + BlockInputStreamPtr stream = std::make_shared(total_rows / concurrency, blocks); + if (enableFineGrainedShuffle(fine_grained_shuffle_stream_count)) + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + else + { + std::unique_ptr response_writer( + new StreamingDAGResponseWriter( + tunnel_set, + {0, 1, 2}, + TiDB::TiDBCollators(3), + tipb::Hash, + -1, + -1, + true, + *dag_context, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size)); + send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); + } + } + + return std::make_shared>(send_streams, BlockInputStreams{}, concurrency, /*req_id=*/""); +} + +void SenderHelper::finish() +{ + for (size_t i = 0; i < tunnels.size(); ++i) + { + tunnels[i]->writeDone(); + tunnels[i]->waitForFinish(); + mock_writers[i]->finish(); + } +} + +void ExchangeBench::SetUp(const benchmark::State &) +{ + DynamicThreadPool::global_instance = std::make_unique( + /*fixed_thread_num=*/300, + std::chrono::milliseconds(100000)); + + uniform_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024); + skew_blocks = makeBlocks(/*block_num=*/100, /*row_num=*/1024, /*skew=*/true); + + try + { + DB::registerWindowFunctions(); + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } +} + +void ExchangeBench::TearDown(const benchmark::State &) +{ + uniform_blocks.clear(); + skew_blocks.clear(); + // NOTE: Must reset here, otherwise DynamicThreadPool::fixedWork() may core because metrics already destroyed. + DynamicThreadPool::global_instance.reset(); +} + +void ExchangeBench::runAndWait(std::shared_ptr receiver_helper, + BlockInputStreamPtr receiver_stream, + std::shared_ptr & sender_helper, + BlockInputStreamPtr sender_stream) +{ + std::future sender_future = DynamicThreadPool::global_instance->schedule(/*memory_tracker=*/false, + [sender_stream, sender_helper] { + sender_stream->readPrefix(); + while (const auto & block = sender_stream->read()) {} + sender_stream->readSuffix(); + sender_helper->finish(); + }); + std::future receiver_future = DynamicThreadPool::global_instance->schedule(/*memory_tracker=*/false, + [receiver_stream, receiver_helper] { + receiver_stream->readPrefix(); + while (const auto & block = receiver_stream->read()) {} + receiver_stream->readSuffix(); + receiver_helper->finish(); + }); + sender_future.get(); + receiver_future.get(); +} + +BENCHMARK_DEFINE_F(ExchangeBench, basic_send_receive) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); + Context context = TiFlashTestEnv::getContext(); + + for (auto _ : state) + { + std::shared_ptr receiver_helper = std::make_shared(concurrency, source_num, fine_grained_shuffle_stream_count); + BlockInputStreamPtr receiver_stream = receiver_helper->buildUnionStream(); + + std::shared_ptr sender_helper = std::make_shared(source_num, + concurrency, + fine_grained_shuffle_stream_count, + fine_grained_shuffle_batch_size, + receiver_helper->queues, + receiver_helper->fields); + BlockInputStreamPtr sender_stream = sender_helper->buildUnionStream(total_rows, uniform_blocks); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(ExchangeBench, basic_send_receive) + ->Args({8, 1, 1024 * 1000, 0, 4096}) + ->Args({8, 1, 1024 * 1000, 4, 4096}) + ->Args({8, 1, 1024 * 1000, 8, 4096}) + ->Args({8, 1, 1024 * 1000, 16, 4096}) + ->Args({8, 1, 1024 * 1000, 32, 4096}) + ->Args({8, 1, 1024 * 1000, 8, 1}) + ->Args({8, 1, 1024 * 1000, 8, 1000}) + ->Args({8, 1, 1024 * 1000, 8, 10000}) + ->Args({8, 1, 1024 * 1000, 8, 100000}); + + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_exchange.h b/dbms/src/Flash/tests/bench_exchange.h new file mode 100644 index 00000000000..d8300d45740 --- /dev/null +++ b/dbms/src/Flash/tests/bench_exchange.h @@ -0,0 +1,299 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace tests +{ + + +using Packet = mpp::MPPDataPacket; +using PacketPtr = std::shared_ptr; +using PacketQueue = MPMCQueue; +using PacketQueuePtr = std::shared_ptr; +using StopFlag = std::atomic; + +// NOLINTBEGIN(readability-convert-member-functions-to-static) +struct MockReceiverContext +{ + using Status = ::grpc::Status; + struct Request + { + String debugString() const + { + return "{Request}"; + } + + int source_index = 0; + int send_task_id = 0; + int recv_task_id = -1; + }; + + struct Reader + { + explicit Reader(const PacketQueuePtr & queue_) + : queue(queue_) + {} + + void initialize() const + { + } + + bool read(PacketPtr & packet [[maybe_unused]]) const + { + PacketPtr res; + if (queue->pop(res)) + { + *packet = *res; // avoid change shared packets + return true; + } + return false; + } + + Status finish() const + { + return ::grpc::Status(); + } + + PacketQueuePtr queue; + }; + + struct MockAsyncGrpcExchangePacketReader + { + // Not implement benchmark for Async GRPC for now. + void init(UnaryCallback *) { assert(0); } + void read(MPPDataPacketPtr &, UnaryCallback *) { assert(0); } + void finish(::grpc::Status &, UnaryCallback *) { assert(0); } + }; + + using AsyncReader = MockAsyncGrpcExchangePacketReader; + + MockReceiverContext( + const std::vector & queues_, + const std::vector & field_types_) + : queues(queues_) + , field_types(field_types_) + { + } + + void fillSchema(DAGSchema & schema) const + { + schema.clear(); + for (size_t i = 0; i < field_types.size(); ++i) + { + String name = "exchange_receiver_" + std::to_string(i); + ColumnInfo info = TiDB::fieldTypeToColumnInfo(field_types[i]); + schema.emplace_back(std::move(name), std::move(info)); + } + } + + Request makeRequest(int index) const + { + return {index, index, -1}; + } + + std::shared_ptr makeReader(const Request & request) + { + return std::make_shared(queues[request.send_task_id]); + } + + static Status getStatusOK() + { + return ::grpc::Status(); + } + + bool supportAsync(const Request &) const { return false; } + void makeAsyncReader( + const Request &, + std::shared_ptr &, + UnaryCallback *) const {} + + std::vector queues; + std::vector field_types; +}; +// NOLINTEND(readability-convert-member-functions-to-static) + +using MockExchangeReceiver = ExchangeReceiverBase; +using MockExchangeReceiverPtr = std::shared_ptr; +using MockExchangeReceiverInputStream = TiRemoteBlockInputStream; + +struct MockWriter : public PacketWriter +{ + explicit MockWriter(PacketQueuePtr queue_) + : queue(std::move(queue_)) + {} + + bool write(const Packet & packet) override + { + queue->push(std::make_shared(packet)); + return true; + } + + void finish() + { + queue->finish(); + } + + PacketQueuePtr queue; +}; + +using MockWriterPtr = std::shared_ptr; +using MockTunnel = MPPTunnelBase; +using MockTunnelPtr = std::shared_ptr; +using MockTunnelSet = MPPTunnelSetBase; +using MockTunnelSetPtr = std::shared_ptr; + +struct MockBlockInputStream : public IProfilingBlockInputStream +{ + const std::vector & blocks; + Block header; + std::mt19937 mt; + std::uniform_int_distribution dist; + StopFlag & stop_flag; + + MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_); + + String getName() const override { return "MockBlockInputStream"; } + Block getHeader() const override { return header; } + + Block readImpl() override + { + if (stop_flag.load(std::memory_order_relaxed)) + return Block{}; + return blocks[dist(mt)]; + } +}; + +// Similar to MockBlockInputStream, but return fixed count of rows. +struct MockFixedRowsBlockInputStream : public IProfilingBlockInputStream +{ + Block header; + std::mt19937 mt; + std::uniform_int_distribution dist; + size_t current_rows; + size_t total_rows; + const std::vector & blocks; + + MockFixedRowsBlockInputStream(size_t total_rows_, const std::vector & blocks_); + + String getName() const override { return "MockBlockInputStream"; } + Block getHeader() const override { return header; } + + Block readImpl() override + { + if (current_rows >= total_rows) + return Block{}; + Block res = blocks[dist(mt)]; + current_rows += res.rows(); + return res; + } +}; + +Block makeBlock(int row_num, bool skew = false); +std::vector makeBlocks(int block_num, int row_num, bool skew = false); +mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num); +std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num); +std::vector makePacketQueues(int source_num, int queue_size); +std::vector makeFields(); +void printException(const Exception & e); +void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag); +void receivePacket(const PacketQueuePtr & queue); + +struct ReceiverHelper +{ + const int concurrency; + const int source_num; + const uint32_t fine_grained_shuffle_stream_count; + tipb::ExchangeReceiver pb_exchange_receiver; + std::vector fields; + mpp::TaskMeta task_meta; + std::vector queues; + std::shared_ptr join_ptr; + + explicit ReceiverHelper(int concurrency_, int source_num_, uint32_t fine_grained_shuffle_stream_count_); + MockExchangeReceiverPtr buildReceiver(); + std::vector buildExchangeReceiverStream(); + BlockInputStreamPtr buildUnionStream(); + void finish(); +}; + +struct SenderHelper +{ + const int source_num; + const int concurrency; + const uint32_t fine_grained_shuffle_stream_count; + const int64_t fine_grained_shuffle_batch_size; + + std::vector queues; + std::vector mock_writers; + std::vector tunnels; + MockTunnelSetPtr tunnel_set; + std::unique_ptr dag_context; + + SenderHelper( + int source_num_, + int concurrency_, + uint32_t fine_grained_shuffle_stream_count_, + int64_t fine_grained_shuffle_batch_size_, + const std::vector & queues_, + const std::vector & fields); + + // Using MockBlockInputStream to build streams. + BlockInputStreamPtr buildUnionStream(StopFlag & stop_flag, const std::vector & blocks); + // Using MockFixedRowsBlockInputStream to build streams. + BlockInputStreamPtr buildUnionStream(size_t total_rows, const std::vector & blocks); + + void finish(); +}; + +class ExchangeBench : public benchmark::Fixture +{ +public: + void SetUp(const benchmark::State &) override; + void TearDown(const benchmark::State &) override; + void runAndWait(std::shared_ptr receiver_helper, + BlockInputStreamPtr receiver_stream, + std::shared_ptr & sender_helper, + BlockInputStreamPtr sender_stream); + + std::vector uniform_blocks; + std::vector skew_blocks; +}; + + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/bench_window.cpp b/dbms/src/Flash/tests/bench_window.cpp new file mode 100644 index 00000000000..75dc53b065b --- /dev/null +++ b/dbms/src/Flash/tests/bench_window.cpp @@ -0,0 +1,167 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class WindowFunctionBench : public ExchangeBench +{ +public: + void SetUp(const benchmark::State & state) override + { + // Using DAGRequestBuilder to build tipb::Window and tipb::Sort. + // select row_number() over w1 from t1 window w1 as (partition by c1, c2, c3 order by c1, c2, c3); + ExchangeBench::SetUp(state); + } + + static void setupPB(uint64_t fine_grained_shuffle_stream_count, tipb::Window & window, tipb::Sort & sort) + { + MockColumnInfoVec columns{ + {"c1", TiDB::TP::TypeLongLong}, + {"c2", TiDB::TP::TypeString}, + {"c3", TiDB::TP::TypeLongLong}, + }; + size_t executor_index = 0; + DAGRequestBuilder builder(executor_index); + builder + .mockTable("test", "t1", columns) + .sort({{"c1", false}, {"c2", false}, {"c3", false}}, true, fine_grained_shuffle_stream_count) + .window(RowNumber(), + {{"c1", false}, {"c2", false}, {"c3", false}}, + {{"c1", false}, {"c2", false}, {"c3", false}}, + buildDefaultRowsFrame(), + fine_grained_shuffle_stream_count); + tipb::DAGRequest req; + MPPInfo mpp_info(0, -1, -1, {}, std::unordered_map>{}); + builder.getRoot()->toTiPBExecutor(req.mutable_root_executor(), /*collator_id=*/0, mpp_info, TiFlashTestEnv::getContext()); + assert(req.root_executor().tp() == tipb::TypeWindow); + window = req.root_executor().window(); + assert(window.child().tp() == tipb::TypeSort); + sort = window.child().sort(); + } + + static void prepareWindowStream(Context & context, int concurrency, int source_num, int total_rows, uint32_t fine_grained_shuffle_stream_count, uint64_t fine_grained_shuffle_batch_size, const std::vector & blocks, BlockInputStreamPtr & sender_stream, BlockInputStreamPtr & receiver_stream, std::shared_ptr & sender_helper, std::shared_ptr & receiver_helper, bool build_window = true) + { + tipb::Window window; + tipb::Sort sort; + setupPB(fine_grained_shuffle_stream_count, window, sort); + + DAGPipeline pipeline; + receiver_helper = std::make_shared(concurrency, source_num, fine_grained_shuffle_stream_count); + pipeline.streams = receiver_helper->buildExchangeReceiverStream(); + + sender_helper = std::make_shared(source_num, concurrency, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, receiver_helper->queues, receiver_helper->fields); + sender_stream = sender_helper->buildUnionStream(total_rows, blocks); + + context.setDAGContext(sender_helper->dag_context.get()); + std::vector source_columns{ + NameAndTypePair("c1", makeNullable(std::make_shared())), + NameAndTypePair("c2", makeNullable(std::make_shared())), + NameAndTypePair("c3", makeNullable(std::make_shared()))}; + auto mock_interpreter = mockInterpreter(context, source_columns, concurrency); + mock_interpreter->input_streams_vec.push_back(pipeline.streams); + mockExecuteWindowOrder(mock_interpreter, pipeline, sort, fine_grained_shuffle_stream_count); + if (build_window) + { + mockExecuteWindow(mock_interpreter, pipeline, window, fine_grained_shuffle_stream_count); + } + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, 8192, 0, "mock_executor_id_squashing"); + }); + receiver_stream = std::make_shared>(pipeline.streams, BlockInputStreams{}, concurrency, /*req_id=*/""); + } +}; + +BENCHMARK_DEFINE_F(WindowFunctionBench, basic_row_number) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); + const bool skew = state.range(5); + Context context = TiFlashTestEnv::getContext(); + + std::vector * blocks = &uniform_blocks; + if (skew) + blocks = &skew_blocks; + + for (auto _ : state) + { + std::shared_ptr sender_helper; + std::shared_ptr receiver_helper; + BlockInputStreamPtr sender_stream; + BlockInputStreamPtr receiver_stream; + + prepareWindowStream(context, concurrency, source_num, total_rows, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, *blocks, sender_stream, receiver_stream, sender_helper, receiver_helper); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(WindowFunctionBench, basic_row_number) + ->Args({8, 1, 1024 * 1000, 0, 4096, false}) // Test fine_grained_shuffle_stream_count. + ->Args({8, 1, 1024 * 1000, 4, 4096, false}) + ->Args({8, 1, 1024 * 1000, 8, 4096, false}) + ->Args({8, 1, 1024 * 1000, 16, 4096, false}) + ->Args({8, 1, 1024 * 1000, 32, 4096, false}) + ->Args({8, 1, 1024 * 1000, 8, 1, false}) // Test fine_grained_shuffle_batch_size. + ->Args({8, 1, 1024 * 1000, 8, 1000, false}) + ->Args({8, 1, 1024 * 1000, 8, 10000, false}) + ->Args({8, 1, 1024 * 1000, 8, 100000, false}) + ->Args({8, 1, 1024 * 1000, 0, 4096, true}) // Test skew dataset. + ->Args({8, 1, 1024 * 1000, 4, 4096, true}) + ->Args({8, 1, 1024 * 1000, 8, 4096, true}) + ->Args({8, 1, 1024 * 1000, 16, 4096, true}); + +BENCHMARK_DEFINE_F(WindowFunctionBench, partial_sort_skew_dataset) +(benchmark::State & state) +try +{ + const int concurrency = state.range(0); + const int source_num = state.range(1); + const int total_rows = state.range(2); + const int fine_grained_shuffle_stream_count = state.range(3); + const int fine_grained_shuffle_batch_size = state.range(4); + Context context = TiFlashTestEnv::getContext(); + + std::vector * blocks = &skew_blocks; + + for (auto _ : state) + { + std::shared_ptr sender_helper; + std::shared_ptr receiver_helper; + BlockInputStreamPtr sender_stream; + BlockInputStreamPtr receiver_stream; + + // Only build partial sort. + prepareWindowStream(context, concurrency, source_num, total_rows, fine_grained_shuffle_stream_count, fine_grained_shuffle_batch_size, *blocks, sender_stream, receiver_stream, sender_helper, receiver_helper, /*build_window=*/false); + + runAndWait(receiver_helper, receiver_stream, sender_helper, sender_stream); + } +} +CATCH +BENCHMARK_REGISTER_F(WindowFunctionBench, partial_sort_skew_dataset) + ->Args({1, 1, 1024 * 10000, 0, 4096}) // Test how much multiple-thread improves performance for partial sort. + ->Args({2, 1, 1024 * 10000, 0, 4096}) + ->Args({4, 1, 1024 * 10000, 0, 4096}) + ->Args({8, 1, 1024 * 10000, 0, 4096}); +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/exchange_perftest.cpp b/dbms/src/Flash/tests/exchange_perftest.cpp deleted file mode 100644 index c2e047bec62..00000000000 --- a/dbms/src/Flash/tests/exchange_perftest.cpp +++ /dev/null @@ -1,699 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // to include the implementation of StreamingDAGResponseWriter -#include // to include the implementation of ExchangeReceiver -#include // to include the implementation of MPPTunnel -#include // to include the implementation of MPPTunnelSet -#include -#include -#include - -namespace DB::tests -{ -namespace -{ -std::random_device rd; - -using Packet = mpp::MPPDataPacket; -using PacketPtr = std::shared_ptr; -using PacketQueue = MPMCQueue; -using PacketQueuePtr = std::shared_ptr; -using StopFlag = std::atomic; - -std::atomic received_data_size{0}; - -struct MockReceiverContext -{ - struct Status - { - int status_code = 0; - String error_msg; - - bool ok() const - { - return status_code == 0; - } - - const String & error_message() const - { - return error_msg; - } - - int error_code() const - { - return status_code; - } - }; - - struct Request - { - String debugString() const - { - return "{Request}"; - } - - int source_index = 0; - int send_task_id = 0; - int recv_task_id = -1; - }; - - struct Reader - { - explicit Reader(const PacketQueuePtr & queue_) - : queue(queue_) - {} - - void initialize() const - { - } - - bool read(PacketPtr & packet [[maybe_unused]]) const - { - PacketPtr res; - if (queue->pop(res)) - { - received_data_size.fetch_add(res->ByteSizeLong()); - *packet = *res; // avoid change shared packets - return true; - } - return false; - } - - Status finish() const - { - return {0, ""}; - } - - PacketQueuePtr queue; - }; - - MockReceiverContext( - const std::vector & queues_, - const std::vector & field_types_) - : queues(queues_) - , field_types(field_types_) - { - } - - void fillSchema(DAGSchema & schema) const - { - schema.clear(); - for (size_t i = 0; i < field_types.size(); ++i) - { - String name = "exchange_receiver_" + std::to_string(i); - ColumnInfo info = TiDB::fieldTypeToColumnInfo(field_types[i]); - schema.emplace_back(std::move(name), std::move(info)); - } - } - - Request makeRequest(int index) const - { - return {index, index, -1}; - } - - std::shared_ptr makeReader(const Request & request) - { - return std::make_shared(queues[request.send_task_id]); - } - - static Status getStatusOK() - { - return {0, ""}; - } - - std::vector queues; - std::vector field_types; -}; - -using MockExchangeReceiver = ExchangeReceiverBase; -using MockExchangeReceiverPtr = std::shared_ptr; -using MockExchangeReceiverInputStream = TiRemoteBlockInputStream; - -struct MockWriter -{ - explicit MockWriter(PacketQueuePtr queue_) - : queue(std::move(queue_)) - {} - - bool Write(const Packet & packet) - { - queue->push(std::make_shared(packet)); - return true; - } - - void finish() - { - queue->finish(); - } - - PacketQueuePtr queue; -}; - -using MockWriterPtr = std::shared_ptr; -using MockTunnel = MPPTunnelBase; -using MockTunnelPtr = std::shared_ptr; -using MockTunnelSet = MPPTunnelSetBase; -using MockTunnelSetPtr = std::shared_ptr; - -struct MockBlockInputStream : public IProfilingBlockInputStream -{ - const std::vector & blocks; - Block header; - std::mt19937 mt; - std::uniform_int_distribution dist; - StopFlag & stop_flag; - - MockBlockInputStream(const std::vector & blocks_, StopFlag & stop_flag_) - : blocks(blocks_) - , header(blocks[0].cloneEmpty()) - , mt(rd()) - , dist(0, blocks.size() - 1) - , stop_flag(stop_flag_) - {} - - String getName() const override { return "MockBlockInputStream"; } - Block getHeader() const override { return header; } - - Block readImpl() override - { - if (stop_flag.load(std::memory_order_relaxed)) - return Block{}; - return blocks[dist(mt)]; - } -}; - -Block makeBlock(int row_num) -{ - std::mt19937 mt(rd()); - std::uniform_int_distribution int64_dist; - std::uniform_int_distribution len_dist(10, 20); - std::uniform_int_distribution char_dist; - - InferredDataVector> int64_vec; - InferredDataVector> int64_vec2; - for (int i = 0; i < row_num; ++i) - { - int64_vec.emplace_back(int64_dist(mt)); - int64_vec2.emplace_back(int64_dist(mt)); - } - - InferredDataVector> string_vec; - for (int i = 0; i < row_num; ++i) - { - int len = len_dist(mt); - String s; - for (int j = 0; j < len; ++j) - s.push_back(char_dist(mt)); - string_vec.push_back(std::move(s)); - } - - auto int64_data_type = makeDataType>(); - ColumnWithTypeAndName int64_column(makeColumn>(int64_data_type, int64_vec), int64_data_type, "int64_1"); - ColumnWithTypeAndName int64_column2(makeColumn>(int64_data_type, int64_vec2), int64_data_type, "int64_2"); - - auto string_data_type = makeDataType>(); - ColumnWithTypeAndName string_column(makeColumn>(string_data_type, string_vec), string_data_type, "string"); - - return Block({int64_column, string_column, int64_column2}); -} - -std::vector makeBlocks(int block_num, int row_num) -{ - std::vector blocks; - for (int i = 0; i < block_num; ++i) - blocks.push_back(makeBlock(row_num)); - return blocks; -} - -mpp::MPPDataPacket makePacket(ChunkCodecStream & codec, int row_num) -{ - auto block = makeBlock(row_num); - codec.encode(block, 0, row_num); - - mpp::MPPDataPacket packet; - packet.add_chunks(codec.getString()); - codec.clear(); - - return packet; -} - -std::vector makePackets(ChunkCodecStream & codec, int packet_num, int row_num) -{ - std::vector packets; - for (int i = 0; i < packet_num; ++i) - packets.push_back(std::make_shared(makePacket(codec, row_num))); - return packets; -} - -std::vector makePacketQueues(int source_num, int queue_size) -{ - std::vector queues; - for (int i = 0; i < source_num; ++i) - queues.push_back(std::make_shared(queue_size)); - return queues; -} - -std::vector makeFields() -{ - std::vector fields(3); - fields[0].set_tp(TiDB::TypeLongLong); - fields[1].set_tp(TiDB::TypeString); - fields[2].set_tp(TiDB::TypeLongLong); - return fields; -} - -void printException(const Exception & e) -{ - std::string text = e.displayText(); - - auto embedded_stack_trace_pos = text.find("Stack trace"); - std::cerr << "Code: " << e.code() << ". " << text << std::endl - << std::endl; - if (std::string::npos == embedded_stack_trace_pos) - std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString() << std::endl; -} - -void sendPacket(const std::vector & packets, const PacketQueuePtr & queue, StopFlag & stop_flag) -{ - std::mt19937 mt(rd()); - std::uniform_int_distribution dist(0, packets.size() - 1); - - while (!stop_flag.load()) - { - int i = dist(mt); - queue->tryPush(packets[i], std::chrono::milliseconds(10)); - } - queue->finish(); -} - -void receivePacket(const PacketQueuePtr & queue) -{ - while (true) - { - PacketPtr packet; - if (queue->pop(packet)) - received_data_size.fetch_add(packet->ByteSizeLong()); - else - break; - } -} - -template -void readBlock(BlockInputStreamPtr stream) -{ - [[maybe_unused]] auto get_rate = [](auto count, auto duration) { - return count * 1000 / duration.count(); - }; - - [[maybe_unused]] auto get_mib = [](auto v) { - return v / 1024 / 1024; - }; - - [[maybe_unused]] auto start = std::chrono::high_resolution_clock::now(); - [[maybe_unused]] auto second_ago = start; - [[maybe_unused]] Int64 block_count = 0; - [[maybe_unused]] Int64 last_block_count = 0; - [[maybe_unused]] Int64 last_data_size = received_data_size.load(); - try - { - stream->readPrefix(); - while (auto block = stream->read()) - { - if constexpr (print_progress) - { - ++block_count; - auto cur = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(cur - second_ago); - if (duration.count() >= 1000) - { - Int64 data_size = received_data_size.load(); - std::cout - << fmt::format( - "Blocks: {:<10} Data(MiB): {:<8} Block/s: {:<6} Data/s(MiB): {:<6}", - block_count, - get_mib(data_size), - get_rate(block_count - last_block_count, duration), - get_mib(get_rate(data_size - last_data_size, duration))) - << std::endl; - second_ago = cur; - last_block_count = block_count; - last_data_size = data_size; - } - } - } - stream->readSuffix(); - - if constexpr (print_progress) - { - auto cur = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(cur - start); - Int64 data_size = received_data_size.load(); - std::cout - << fmt::format( - "End. Blocks: {:<10} Data(MiB): {:<8} Block/s: {:<6} Data/s(MiB): {:<6}", - block_count, - get_mib(data_size), - get_rate(block_count, duration), - get_mib(get_rate(data_size, duration))) - << std::endl; - } - } - catch (const Exception & e) - { - printException(e); - throw; - } -} - -struct ReceiverHelper -{ - const int source_num; - tipb::ExchangeReceiver pb_exchange_receiver; - std::vector fields; - mpp::TaskMeta task_meta; - std::vector queues; - std::shared_ptr join_ptr; - - explicit ReceiverHelper(int source_num_) - : source_num(source_num_) - { - pb_exchange_receiver.set_tp(tipb::Hash); - for (int i = 0; i < source_num; ++i) - { - mpp::TaskMeta task; - task.set_start_ts(0); - task.set_task_id(i); - task.set_partition_id(i); - task.set_address(""); - - String encoded_task; - task.SerializeToString(&encoded_task); - - pb_exchange_receiver.add_encoded_task_meta(encoded_task); - } - - fields = makeFields(); - *pb_exchange_receiver.add_field_types() = fields[0]; - *pb_exchange_receiver.add_field_types() = fields[1]; - *pb_exchange_receiver.add_field_types() = fields[2]; - - task_meta.set_task_id(100); - - queues = makePacketQueues(source_num, 10); - } - - MockExchangeReceiverPtr buildReceiver() - { - return std::make_shared( - std::make_shared(queues, fields), - source_num, - source_num * 5, - nullptr); - } - - BlockInputStreamPtr buildUnionStream(int concurrency) - { - auto receiver = buildReceiver(); - std::vector streams; - for (int i = 0; i < concurrency; ++i) - streams.push_back(std::make_shared(receiver, nullptr)); - return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); - } - - BlockInputStreamPtr buildUnionStreamWithHashJoinBuildStream(int concurrency) - { - auto receiver = buildReceiver(); - std::vector streams; - for (int i = 0; i < concurrency; ++i) - streams.push_back(std::make_shared(receiver, nullptr)); - - auto receiver_header = streams.front()->getHeader(); - auto key_name = receiver_header.getByPosition(0).name; - - join_ptr = std::make_shared( - Names{key_name}, - Names{key_name}, - true, - SizeLimits(0, 0, OverflowMode::THROW), - ASTTableJoin::Kind::Inner, - ASTTableJoin::Strictness::All, - /*req_id=*/"", - TiDB::TiDBCollators{nullptr}, - "", - "", - "", - "", - nullptr, - 65536); - - join_ptr->init(receiver_header, concurrency); - - for (int i = 0; i < concurrency; ++i) - streams[i] = std::make_shared(streams[i], join_ptr, i, /*req_id=*/""); - - return std::make_shared>(streams, nullptr, concurrency, /*req_id=*/""); - } - - void finish() - { - if (join_ptr) - { - join_ptr->setBuildTableState(Join::BuildTableState::SUCCEED); - std::cout << fmt::format("Hash table size: {} bytes", join_ptr->getTotalByteCount()) << std::endl; - } - } -}; - -struct SenderHelper -{ - const int source_num; - const int concurrency; - - std::vector queues; - std::vector mock_writers; - std::vector tunnels; - MockTunnelSetPtr tunnel_set; - std::unique_ptr dag_context; - - SenderHelper( - int source_num_, - int concurrency_, - const std::vector & queues_, - const std::vector & fields) - : source_num(source_num_) - , concurrency(concurrency_) - , queues(queues_) - { - mpp::TaskMeta task_meta; - tunnel_set = std::make_shared(); - for (int i = 0; i < source_num; ++i) - { - auto writer = std::make_shared(queues[i]); - mock_writers.push_back(writer); - - auto tunnel = std::make_shared( - task_meta, - task_meta, - std::chrono::seconds(60), - concurrency, - false); - tunnel->connect(writer.get()); - tunnels.push_back(tunnel); - tunnel_set->addTunnel(tunnel); - } - - tipb::DAGRequest dag_request; - tipb::Executor root_executor; - root_executor.set_executor_id("ExchangeSender_100"); - *dag_request.mutable_root_executor() = root_executor; - - dag_context = std::make_unique(dag_request); - dag_context->is_mpp_task = true; - dag_context->is_root_mpp_task = false; - dag_context->encode_type = tipb::EncodeType::TypeCHBlock; - dag_context->result_field_types = fields; - } - - BlockInputStreamPtr buildUnionStream( - StopFlag & stop_flag, - const std::vector & blocks) - { - std::vector send_streams; - for (int i = 0; i < concurrency; ++i) - { - BlockInputStreamPtr stream = std::make_shared(blocks, stop_flag); - std::unique_ptr response_writer( - new StreamingDAGResponseWriter( - tunnel_set, - {0, 1, 2}, - TiDB::TiDBCollators(3), - tipb::Hash, - -1, - -1, - true, - *dag_context)); - send_streams.push_back(std::make_shared(stream, std::move(response_writer), /*req_id=*/"")); - } - - return std::make_shared>(send_streams, nullptr, concurrency, /*req_id=*/""); - } - - void finish() - { - for (size_t i = 0; i < tunnels.size(); ++i) - { - tunnels[i]->writeDone(); - tunnels[i]->waitForFinish(); - mock_writers[i]->finish(); - } - } -}; - -void testOnlyReceiver(int concurrency, int source_num, int block_rows, int seconds) -{ - ReceiverHelper receiver_helper(source_num); - auto union_input_stream = receiver_helper.buildUnionStream(concurrency); - - auto chunk_codec_stream = CHBlockChunkCodec().newCodecStream(receiver_helper.fields); - auto packets = makePackets(*chunk_codec_stream, 100, block_rows); - - StopFlag stop_flag(false); - - std::vector threads; - for (const auto & queue : receiver_helper.queues) - threads.emplace_back(sendPacket, std::cref(packets), queue, std::ref(stop_flag)); - threads.emplace_back(readBlock, union_input_stream); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - for (auto & thread : threads) - thread.join(); - - receiver_helper.finish(); -} - -template -void testSenderReceiver(int concurrency, int source_num, int block_rows, int seconds) -{ - ReceiverHelper receiver_helper(source_num); - BlockInputStreamPtr union_receive_stream; - if constexpr (with_join) - union_receive_stream = receiver_helper.buildUnionStreamWithHashJoinBuildStream(concurrency); - else - union_receive_stream = receiver_helper.buildUnionStream(concurrency); - - StopFlag stop_flag(false); - auto blocks = makeBlocks(100, block_rows); - - SenderHelper sender_helper(source_num, concurrency, receiver_helper.queues, receiver_helper.fields); - auto union_send_stream = sender_helper.buildUnionStream(stop_flag, blocks); - - auto write_thread = std::thread(readBlock, union_send_stream); - auto read_thread = std::thread(readBlock, union_receive_stream); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - - write_thread.join(); - sender_helper.finish(); - - read_thread.join(); - receiver_helper.finish(); -} - -void testOnlySender(int concurrency, int source_num, int block_rows, int seconds) -{ - auto queues = makePacketQueues(source_num, 10); - auto fields = makeFields(); - - StopFlag stop_flag(false); - auto blocks = makeBlocks(100, block_rows); - - SenderHelper sender_helper(source_num, concurrency, queues, fields); - auto union_send_stream = sender_helper.buildUnionStream(stop_flag, blocks); - - auto write_thread = std::thread(readBlock, union_send_stream); - std::vector read_threads; - for (int i = 0; i < source_num; ++i) - read_threads.emplace_back(receivePacket, queues[i]); - - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - stop_flag.store(true); - - write_thread.join(); - sender_helper.finish(); - - for (auto & t : read_threads) - t.join(); -} - -} // namespace -} // namespace DB::tests - -int main(int argc [[maybe_unused]], char ** argv [[maybe_unused]]) -{ - if (argc < 2 || argc > 6) - { - std::cerr << fmt::format("Usage: {} [receiver|sender|sender_receiver|sender_receiver_join] ", argv[0]) << std::endl; - exit(1); - } - - String method = argv[1]; - int concurrency = argc >= 3 ? atoi(argv[2]) : 5; - int source_num = argc >= 4 ? atoi(argv[3]) : 2; - int block_rows = argc >= 5 ? atoi(argv[4]) : 5000; - int seconds = argc >= 6 ? atoi(argv[5]) : 10; - - using TestHandler = std::function; - std::unordered_map handlers = { - {"receiver", DB::tests::testOnlyReceiver}, - {"sender", DB::tests::testOnlySender}, - {"sender_receiver", DB::tests::testSenderReceiver}, - {"sender_receiver_join", DB::tests::testSenderReceiver}, - }; - - auto it = handlers.find(method); - if (it != handlers.end()) - { - std::cout - << fmt::format( - "{}. concurrency = {}. source_num = {}. block_rows = {}. seconds = {}", - method, - concurrency, - source_num, - block_rows, - seconds) - << std::endl; - it->second(concurrency, source_num, block_rows, seconds); - } - else - { - std::cerr << "Unknown method: " << method << std::endl; - exit(1); - } -} diff --git a/dbms/src/Flash/tests/gtest_executor.cpp b/dbms/src/Flash/tests/gtest_executor.cpp index 49512b9271f..ee35af0c03d 100644 --- a/dbms/src/Flash/tests/gtest_executor.cpp +++ b/dbms/src/Flash/tests/gtest_executor.cpp @@ -59,180 +59,170 @@ class ExecutorTestRunner : public DB::tests::ExecutorTest {toVec("s", {"banana", "banana"}), toVec("join_c", {"apple", "banana"})}); } - - void executeExecutor(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns, size_t concurrency = 1) - { - std::vector enable_planners{"true", "false"}; - for (auto enable : enable_planners) - { - context.context.setSetting("enable_planner", enable); - executeStreams(request, expect_columns, concurrency); - } - } }; TEST_F(ExecutorTestRunner, Filter) try { - auto request = context - .scan("test_db", "test_table") - .filter(eq(col("s1"), col("s2"))) - .build(context); - { - executeExecutor(request, - {toNullableVec({"banana"}), - toNullableVec({"banana"})}); - } + wrapForDisEnablePlanner([&]() { + auto request = context + .scan("test_db", "test_table") + .filter(eq(col("s1"), col("s2"))) + .build(context); + { + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana"}), + toNullableVec({"banana"})})); + } - request = context.receive("exchange1") - .filter(eq(col("s1"), col("s2"))) - .build(context); - { - executeExecutor(request, - {toNullableVec({"banana"}), - toNullableVec({"banana"})}); - } + request = context.receive("exchange1") + .filter(eq(col("s1"), col("s2"))) + .build(context); + { + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana"}), + toNullableVec({"banana"})})); + } + }); } CATCH TEST_F(ExecutorTestRunner, JoinWithTableScan) try { - auto request = context - .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) - .topN("join_c", false, 2) - .build(context); - { - String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" - " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" - " table_scan_0 | {<0, String>, <1, String>}\n" - " table_scan_1 | {<0, String>, <1, String>}\n"; - ASSERT_DAGREQUEST_EQAUL(expected, request); - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 5); - - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}); - } - request = context - .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) - .project({"s", "join_c"}) - .topN("join_c", false, 2) - .build(context); - { - String expected = "topn_4 | order_by: {(<1, String>, desc: false)}, limit: 2\n" - " project_3 | {<0, String>, <1, String>}\n" - " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" - " table_scan_0 | {<0, String>, <1, String>}\n" - " table_scan_1 | {<0, String>, <1, String>}\n"; - ASSERT_DAGREQUEST_EQAUL(expected, request); - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - } + wrapForDisEnablePlanner([&]() { + auto request = context + .scan("test_db", "l_table") + .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .topN("join_c", false, 2) + .build(context); + { + String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" + " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n" + " table_scan_1 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request, 5), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + } + request = context + .scan("test_db", "l_table") + .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .project({"s", "join_c"}) + .topN("join_c", false, 2) + .build(context); + { + String expected = "topn_4 | order_by: {(<1, String>, desc: false)}, limit: 2\n" + " project_3 | {<0, String>, <1, String>}\n" + " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n" + " table_scan_1 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + } - request = context - .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table_2"), {col("join_c")}, ASTTableJoin::Kind::Left) - .topN("join_c", false, 4) - .build(context); - { - String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 4\n" - " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" - " table_scan_0 | {<0, String>, <1, String>}\n" - " table_scan_1 | {<0, String>, <1, String>}\n"; - ASSERT_DAGREQUEST_EQAUL(expected, request); - executeExecutor(request, - {toNullableVec({"banana", "banana", "banana", "banana"}), - toNullableVec({"apple", "apple", "apple", "banana"}), - toNullableVec({"banana", "banana", "banana", {}}), - toNullableVec({"apple", "apple", "apple", {}})}, - 2); - executeExecutor(request, - {toNullableVec({"banana", "banana", "banana", "banana"}), - toNullableVec({"apple", "apple", "apple", "banana"}), - toNullableVec({"banana", "banana", "banana", {}}), - toNullableVec({"apple", "apple", "apple", {}})}, - 3); - } + request = context + .scan("test_db", "l_table") + .join(context.scan("test_db", "r_table_2"), {col("join_c")}, ASTTableJoin::Kind::Left) + .topN("join_c", false, 4) + .build(context); + { + String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 4\n" + " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n" + " table_scan_1 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana", "banana", "banana"}), + toNullableVec({"apple", "apple", "apple", "banana"}), + toNullableVec({"banana", "banana", "banana", {}}), + toNullableVec({"apple", "apple", "apple", {}})})); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 3), + createColumns({toNullableVec({"banana", "banana", "banana", "banana"}), + toNullableVec({"apple", "apple", "apple", "banana"}), + toNullableVec({"banana", "banana", "banana", {}}), + toNullableVec({"apple", "apple", "apple", {}})})); + } + }); } CATCH TEST_F(ExecutorTestRunner, JoinWithExchangeReceiver) try { - auto request = context - .receive("exchange_l_table") - .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) - .topN("join_c", false, 2) - .build(context); - { - String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" - " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" - " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>}\n" - " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; - ASSERT_DAGREQUEST_EQAUL(expected, request); - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 5); - - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}); - } + wrapForDisEnablePlanner([&]() { + auto request = context + .receive("exchange_l_table") + .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .topN("join_c", false, 2) + .build(context); + { + String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" + " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>}\n" + " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request, 5), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + } + }); } CATCH TEST_F(ExecutorTestRunner, JoinWithTableScanAndReceiver) try { - auto request = context - .scan("test_db", "l_table") - .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) - .topN("join_c", false, 2) - .build(context); - { - String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" - " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" - " table_scan_0 | {<0, String>, <1, String>}\n" - " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; - ASSERT_DAGREQUEST_EQAUL(expected, request); - executeExecutor(request, - {toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"}), - toNullableVec({"banana", "banana"}), - toNullableVec({"apple", "banana"})}, - 2); - } + wrapForDisEnablePlanner([&]() { + auto request = context + .scan("test_db", "l_table") + .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .topN("join_c", false, 2) + .build(context); + { + String expected = "topn_3 | order_by: {(<1, String>, desc: false)}, limit: 2\n" + " Join_2 | LeftOuterJoin, HashJoin. left_join_keys: {<0, String>}, right_join_keys: {<0, String>}\n" + " table_scan_0 | {<0, String>, <1, String>}\n" + " exchange_receiver_1 | type:PassThrough, {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), + createColumns({toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"})})); + } + }); } CATCH diff --git a/dbms/src/Flash/tests/gtest_qb_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp similarity index 56% rename from dbms/src/Flash/tests/gtest_qb_interpreter.cpp rename to dbms/src/Flash/tests/gtest_interpreter.cpp index 9c4c15857d3..d681cfaf66b 100644 --- a/dbms/src/Flash/tests/gtest_qb_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -19,26 +19,26 @@ namespace DB { namespace tests { -class QBInterpreterExecuteTest : public DB::tests::ExecutorTest +class InterpreterExecuteTest : public DB::tests::ExecutorTest { public: void initializeContext() override { ExecutorTest::initializeContext(); - context.context.setSetting("enable_planner", "false"); + enablePlanner(false); context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addExchangeRelationSchema("sender_1", {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeString}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeString}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); } }; -TEST_F(QBInterpreterExecuteTest, SingleQueryBlock) +TEST_F(InterpreterExecuteTest, SingleQueryBlock) try { auto request = context.scan("test_db", "test_table_1") @@ -92,13 +92,13 @@ Union: } CATCH -TEST_F(QBInterpreterExecuteTest, MultipleQueryBlockWithSource) +TEST_F(InterpreterExecuteTest, MultipleQueryBlockWithSource) try { auto request = context.scan("test_db", "test_table_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .build(context); { String expected = R"( @@ -200,51 +200,10 @@ Union: ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - // Join Source. - DAGRequestBuilder table1 = context.scan("test_db", "r_table"); - DAGRequestBuilder table2 = context.scan("test_db", "l_table"); - DAGRequestBuilder table3 = context.scan("test_db", "r_table"); - DAGRequestBuilder table4 = context.scan("test_db", "l_table"); - - request = table1.join( - table2.join( - table3.join(table4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockTableScan - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockTableScan - Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - request = context.receive("sender_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .build(context); { String expected = R"( @@ -263,7 +222,7 @@ Union: request = context.receive("sender_1") .project({"s1", "s2", "s3"}) .project({"s1", "s2"}) - .project("s1") + .project({"s1"}) .exchangeSender(tipb::Broadcast) .build(context); { @@ -280,387 +239,394 @@ Union: MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - - // only join + ExchangeReceiver - DAGRequestBuilder receiver1 = context.receive("sender_l"); - DAGRequestBuilder receiver2 = context.receive("sender_r"); - DAGRequestBuilder receiver3 = context.receive("sender_l"); - DAGRequestBuilder receiver4 = context.receive("sender_r"); - - request = receiver1.join( - receiver2.join( - receiver3.join(receiver4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockExchangeReceiver - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver - Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - // join + receiver + sender - // TODO: Find a way to write the request easier. - DAGRequestBuilder receiver5 = context.receive("sender_l"); - DAGRequestBuilder receiver6 = context.receive("sender_r"); - DAGRequestBuilder receiver7 = context.receive("sender_l"); - DAGRequestBuilder receiver8 = context.receive("sender_r"); - request = receiver5.join( - receiver6.join( - receiver7.join(receiver8, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .exchangeSender(tipb::PassThrough) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockExchangeReceiver - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver - Union: - MockExchangeSender x 10 - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } } CATCH -TEST_F(QBInterpreterExecuteTest, ParallelQuery) +TEST_F(InterpreterExecuteTest, Window) try { - /// executor with table scan - auto request = context.scan("test_db", "test_table_1") - .limit(10) + auto request = context + .scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) .build(context); { String expected = R"( -Limit, limit = 10 - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); - - expected = R"( Union: - SharedQuery x 5: - Limit, limit = 10 - Union: - Limit x 5, limit = 10 + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 5); + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - request = context.scan("test_db", "test_table_1") - .project({"s1", "s2", "s3"}) + request = context.scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "RowNumber()"}) .build(context); { String expected = R"( -Expression: - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); - - expected = R"( Union: - Expression x 5: + Expression x 10: Expression: Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 5); + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } request = context.scan("test_db", "test_table_1") - .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) + .sort({{"s1", true}, {"s2", false}}, true) + .project({"s1", "s2", "s3"}) + .window(RowNumber(), {"s1", true}, {"s1", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "s3", "RowNumber()"}) .build(context); { String expected = R"( -Expression: - Aggregating - Concat - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); - - expected = R"( Union: - Expression x 5: - SharedQuery: - ParallelAggregating, max_threads: 5, final: true - MockTableScan x 5)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 5); + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Union: + Expression x 10: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } +} +CATCH - request = context.scan("test_db", "test_table_1") - .topN("s2", false, 10) - .build(context); +TEST_F(InterpreterExecuteTest, FineGrainedShuffle) +try +{ + // fine-grained shuffle is enabled. + const uint64_t enable = 8; + const uint64_t disable = 0; + auto request = context + .receive("sender_1", enable) + .sort({{"s1", true}, {"s2", false}}, true, enable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), enable) + .build(context); { String expected = R"( -Expression: - MergeSorting, limit = 10 - PartialSorting: limit = 10 - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); +Union: + Expression x 10: + Expression: + Window: , function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting: , limit = 0 + PartialSorting: : limit = 0 + Expression: + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } - expected = R"( + auto topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + String topn_expected = R"( Union: - SharedQuery x 5: + SharedQuery x 10: Expression: MergeSorting, limit = 10 Union: - PartialSorting x 5: limit = 10 - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 5); - } - - request = context.scan("test_db", "test_table_1") - .filter(eq(col("s2"), col("s3"))) - .build(context); - { - String expected = R"( -Expression: - Expression: - Filter: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); - - expected = R"( -Union: - Expression x 5: - Expression: - Filter: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 5); - } - - /// other cases - request = context.scan("test_db", "test_table_1") - .limit(10) - .project({"s1", "s2", "s3"}) - .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) + PartialSorting x 10: limit = 10 + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); + + // fine-grained shuffle is disabled. + request = context + .receive("sender_1", disable) + .sort({{"s1", true}, {"s2", false}}, true, disable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), disable) .build(context); { String expected = R"( Union: Expression x 10: SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - SharedQuery: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 Expression: - MockTableScan)"; + MockExchangeReceiver + )"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - - expected = R"(Expression: - Aggregating - Concat - Expression: - Limit, limit = 10 - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); } - request = context.scan("test_db", "test_table_1") - .topN("s2", false, 10) - .project({"s1", "s2", "s3"}) - .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) - .build(context); + topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); +} +CATCH + +TEST_F(InterpreterExecuteTest, Join) +try +{ + // TODO: Find a way to write the request easier. { - String expected = R"( -Union: - Expression x 10: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - SharedQuery: - Expression: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + // Join Source. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + DAGRequestBuilder table3 = context.scan("test_db", "r_table"); + DAGRequestBuilder table4 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2.join( + table3.join(table4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); - expected = R"( -Expression: - Aggregating - Concat - Expression: + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: Expression: - MergeSorting, limit = 10 - PartialSorting: limit = 10 - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); + MockTableScan + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockTableScan + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } - request = context.scan("test_db", "test_table_1") - .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) - .project({"s2", "s3"}) - .aggregation({Max(col("s2"))}, {col("s3")}) - .build(context); { + // only join + ExchangeReceiver + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + String expected = R"( -Union: - Expression x 10: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockExchangeReceiver + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver + Union: + Expression x 10: + Expression: + HashJoinProbe: Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - MockTableScan x 10)"; + MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - - expected = R"( -Expression: - Aggregating - Concat - Expression: - Expression: - Aggregating - Concat - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); } - request = context.scan("test_db", "test_table_1") - .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) - .exchangeSender(tipb::PassThrough) - .build(context); { + // join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .exchangeSender(tipb::PassThrough) + .build(context); + String expected = R"( -Union: - MockExchangeSender x 10 - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - MockTableScan x 10)"; +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockExchangeReceiver + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver + Union: + MockExchangeSender x 10 + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - - expected = R"( -MockExchangeSender - Expression: - Aggregating - Concat - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); } +} +CATCH - request = context.scan("test_db", "test_table_1") - .topN("s2", false, 10) - .exchangeSender(tipb::PassThrough) - .build(context); +TEST_F(InterpreterExecuteTest, JoinThenAgg) +try +{ { + // Left Join. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Left) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); String expected = R"( -Union: - MockExchangeSender x 10 - SharedQuery: - Expression: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - MockTableScan)"; +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: + MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - - expected = R"( -MockExchangeSender - Expression: - MergeSorting, limit = 10 - PartialSorting: limit = 10 - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); } - request = context.scan("test_db", "test_table_1") - .limit(10) - .exchangeSender(tipb::PassThrough) - .build(context); { + // Right Join + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); String expected = R"( -Union: - MockExchangeSender x 10 - SharedQuery: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 - Expression: - MockTableScan)"; +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Right + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: + Expression: + MockTableScan + Expression x 10: + NonJoined: )"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - - expected = R"( -MockExchangeSender - Limit, limit = 10 - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 1); } - DAGRequestBuilder table1 = context.scan("test_db", "r_table"); - DAGRequestBuilder table2 = context.scan("test_db", "l_table"); - request = table1.join(table2.limit(1), {col("join_c")}, ASTTableJoin::Kind::Left).build(context); { + // Right join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Sum(col("r_a"))}, {col("join_c")}) + .exchangeSender(tipb::PassThrough) + .limit(10) + .build(context); String expected = R"( CreatingSets Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left + HashJoinBuildBlockInputStream x 20: , join_kind = Right Expression: - SharedQuery: - Limit, limit = 1 - Union: - Limit x 10, limit = 1 - Expression: - MockTableScan + Expression: + MockExchangeReceiver Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + MockExchangeSender x 20 + SharedQuery: + Limit, limit = 10 + Union: + Limit x 20, limit = 10 + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 20, final: true + Expression x 20: + HashJoinProbe: + Expression: + Expression: + MockExchangeReceiver + Expression x 20: + NonJoined: )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 20); } } CATCH diff --git a/dbms/src/Flash/tests/gtest_limit_executor.cpp b/dbms/src/Flash/tests/gtest_limit_executor.cpp new file mode 100644 index 00000000000..a3ddf341525 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_limit_executor.cpp @@ -0,0 +1,79 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorLimitTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataType = std::optional::FieldType>; + using ColumnWithData = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_name, TiDB::TP::TypeString}}, + {toNullableVec(col_name, col0)}); + } + + std::shared_ptr buildDAGRequest(size_t limit_num) + { + return context.scan(db_name, table_name).limit(limit_num).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; + const String col_name{"limit_col"}; + const ColumnWithData col0{"col0-0", {}, "col0-2", "col0-3", {}, "col0-5", "col0-6", "col0-7"}; +}; + +TEST_F(ExecutorLimitTestRunner, Limit) +try +{ + wrapForDisEnablePlanner([&]() { + std::shared_ptr request; + ColumnsWithTypeAndName expect_cols; + + /// Check limit result with various parameters + const size_t col_data_num = col0.size(); + for (size_t limit_num = 0; limit_num <= col_data_num + 3; ++limit_num) + { + if (limit_num == col_data_num + 3) + limit_num = INT_MAX; + request = buildDAGRequest(limit_num); + + if (limit_num == 0) + expect_cols = {}; + else if (limit_num > col_data_num) + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.end()))}; + else + expect_cols = {toNullableVec(col_name, ColumnWithData(col0.begin(), col0.begin() + limit_num))}; + + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols); + } + }); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp index 3840b858340..aa4950a2b22 100644 --- a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp @@ -26,19 +26,19 @@ class PlannerInterpreterExecuteTest : public DB::tests::ExecutorTest { ExecutorTest::initializeContext(); - context.context.setSetting("enable_planner", "true"); + enablePlanner(true); context.addMockTable({"test_db", "test_table"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "test_table_1"}, {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "r_table"}, {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addMockTable({"test_db", "l_table"}, {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); context.addExchangeRelationSchema("sender_1", {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}, {"s3", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeString}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); - context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeString}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_l", {{"l_a", TiDB::TP::TypeLong}, {"l_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); + context.addExchangeRelationSchema("sender_r", {{"r_a", TiDB::TP::TypeLong}, {"r_b", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}); } }; -TEST_F(PlannerInterpreterExecuteTest, SimpleQuery) +TEST_F(PlannerInterpreterExecuteTest, SingleQueryBlock) try { auto request = context.scan("test_db", "test_table_1") @@ -92,282 +92,6 @@ Union: } CATCH -TEST_F(PlannerInterpreterExecuteTest, ComplexQuery) -try -{ - auto request = context.scan("test_db", "test_table_1") - .project({"s1", "s2", "s3"}) - .project({"s1", "s2"}) - .project("s1") - .build(context); - { - String expected = R"( -Union: - Expression x 10: - Expression: - Expression: - Expression: - Expression: - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - request = context.scan("test_db", "test_table_1") - .project({"s1", "s2", "s3"}) - .topN({{"s1", true}, {"s2", false}}, 10) - .project({"s1", "s2"}) - .build(context); - { - String expected = R"( -Union: - Expression x 10: - Expression: - Expression: - SharedQuery: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - request = context.scan("test_db", "test_table_1") - .project({"s1", "s2", "s3"}) - .topN({{"s1", true}, {"s2", false}}, 10) - .project({"s1", "s2"}) - .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) - .project({"max(s1)", "s1", "s2"}) - .build(context); - { - String expected = R"( -Union: - Expression x 10: - Expression: - Expression: - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - Expression: - SharedQuery: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - request = context.scan("test_db", "test_table_1") - .project({"s1", "s2", "s3"}) - .topN({{"s1", true}, {"s2", false}}, 10) - .project({"s1", "s2"}) - .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) - .project({"max(s1)", "s1", "s2"}) - .filter(eq(col("s1"), col("s2"))) - .project({"max(s1)", "s1"}) - .limit(10) - .build(context); - { - String expected = R"( -Union: - Expression x 10: - SharedQuery: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 - Expression: - Expression: - Filter - Expression: - Expression: - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - Expression x 10: - Expression: - SharedQuery: - MergeSorting, limit = 10 - Union: - PartialSorting x 10: limit = 10 - Expression: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - // Join Source. - DAGRequestBuilder table1 = context.scan("test_db", "r_table"); - DAGRequestBuilder table2 = context.scan("test_db", "l_table"); - DAGRequestBuilder table3 = context.scan("test_db", "r_table"); - DAGRequestBuilder table4 = context.scan("test_db", "l_table"); - - request = table1.join( - table2.join( - table3.join(table4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockTableScan - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockTableScan - Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockTableScan)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - request = context.receive("sender_1") - .project({"s1", "s2", "s3"}) - .project({"s1", "s2"}) - .project("s1") - .build(context); - { - String expected = R"( -Union: - Expression x 10: - Expression: - Expression: - Expression: - Expression: - Expression: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - request = context.receive("sender_1") - .project({"s1", "s2", "s3"}) - .project({"s1", "s2"}) - .project("s1") - .exchangeSender(tipb::Broadcast) - .build(context); - { - String expected = R"( -Union: - MockExchangeSender x 10 - Expression: - Expression: - Expression: - Expression: - Expression: - Expression: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - // only join + ExchangeReceiver - DAGRequestBuilder receiver1 = context.receive("sender_l"); - DAGRequestBuilder receiver2 = context.receive("sender_r"); - DAGRequestBuilder receiver3 = context.receive("sender_l"); - DAGRequestBuilder receiver4 = context.receive("sender_r"); - - request = receiver1.join( - receiver2.join( - receiver3.join(receiver4, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockExchangeReceiver - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver - Union: - Expression x 10: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } - - // join + receiver + sender - // TODO: Find a way to write the request easier. - DAGRequestBuilder receiver5 = context.receive("sender_l"); - DAGRequestBuilder receiver6 = context.receive("sender_r"); - DAGRequestBuilder receiver7 = context.receive("sender_l"); - DAGRequestBuilder receiver8 = context.receive("sender_r"); - request = receiver5.join( - receiver6.join( - receiver7.join(receiver8, - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left), - {col("join_c")}, - ASTTableJoin::Kind::Left) - .exchangeSender(tipb::PassThrough) - .build(context); - { - String expected = R"( -CreatingSets - Union: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - MockExchangeReceiver - Union x 2: - HashJoinBuildBlockInputStream x 10: , join_kind = Left - Expression: - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver - Union: - MockExchangeSender x 10 - Expression: - Expression: - HashJoinProbe: - Expression: - MockExchangeReceiver)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); - } -} -CATCH - TEST_F(PlannerInterpreterExecuteTest, ParallelQuery) try { @@ -673,5 +397,548 @@ CreatingSets } CATCH +TEST_F(PlannerInterpreterExecuteTest, MultipleQueryBlockWithSource) +try +{ + auto request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .project({"s1", "s2"}) + .project({"s1"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) + .project({"max(s1)", "s1", "s2"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Expression: + SharedQuery: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .project({"s1", "s2", "s3"}) + .topN({{"s1", true}, {"s2", false}}, 10) + .project({"s1", "s2"}) + .aggregation({Max(col("s1"))}, {col("s1"), col("s2")}) + .project({"max(s1)", "s1", "s2"}) + .filter(eq(col("s1"), col("s2"))) + .project({"max(s1)", "s1"}) + .limit(10) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 + Expression: + Expression: + Filter + Expression: + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Expression: + SharedQuery: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.receive("sender_1") + .project({"s1", "s2", "s3"}) + .project({"s1", "s2"}) + .project({"s1"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + MockExchangeReceiver)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.receive("sender_1") + .project({"s1", "s2", "s3"}) + .project({"s1", "s2"}) + .project({"s1"}) + .exchangeSender(tipb::Broadcast) + .build(context); + { + String expected = R"( +Union: + MockExchangeSender x 10 + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + MockExchangeReceiver)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + +TEST_F(PlannerInterpreterExecuteTest, Window) +try +{ + auto request = context + .scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table") + .sort({{"s1", true}, {"s2", false}}, true) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + request = context.scan("test_db", "test_table_1") + .sort({{"s1", true}, {"s2", false}}, true) + .project({"s1", "s2", "s3"}) + .window(RowNumber(), {"s1", true}, {"s1", false}, buildDefaultRowsFrame()) + .project({"s1", "s2", "s3", "RowNumber()"}) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Union: + Expression x 10: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + +/// todo support FineGrainedShuffle +/* +TEST_F(PlannerInterpreterExecuteTest, FineGrainedShuffle) +try +{ + // fine-grained shuffle is enabled. + const uint64_t enable = 8; + const uint64_t disable = 0; + auto request = context + .receive("sender_1", enable) + .sort({{"s1", true}, {"s2", false}}, true, enable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), enable) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + Expression: + Window: , function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting: , limit = 0 + PartialSorting: : limit = 0 + Expression: + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + auto topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + String topn_expected = R"( +Union: + SharedQuery x 10: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); + + // fine-grained shuffle is disabled. + request = context + .receive("sender_1", disable) + .sort({{"s1", true}, {"s2", false}}, true, disable) + .window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame(), disable) + .build(context); + { + String expected = R"( +Union: + Expression x 10: + SharedQuery: + Expression: + Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} + Expression: + MergeSorting, limit = 0 + Union: + PartialSorting x 10: limit = 0 + Expression: + MockExchangeReceiver + )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + topn_request = context + .receive("sender_1") + .topN("s2", false, 10) + .build(context); + ASSERT_BLOCKINPUTSTREAM_EQAUL(topn_expected, topn_request, 10); +} +CATCH +*/ + +TEST_F(PlannerInterpreterExecuteTest, Join) +try +{ + // TODO: Find a way to write the request easier. + { + // Join Source. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + DAGRequestBuilder table3 = context.scan("test_db", "r_table"); + DAGRequestBuilder table4 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2.join( + table3.join(table4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockTableScan + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockTableScan + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + { + // only join + ExchangeReceiver + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .build(context); + + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockExchangeReceiver + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + { + // join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + DAGRequestBuilder receiver3 = context.receive("sender_l"); + DAGRequestBuilder receiver4 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2.join( + receiver3.join(receiver4, + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left), + {col("join_c")}, + ASTTableJoin::Kind::Left) + .exchangeSender(tipb::PassThrough) + .build(context); + + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockExchangeReceiver + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver + Union: + MockExchangeSender x 10 + Expression: + Expression: + HashJoinProbe: + Expression: + MockExchangeReceiver)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } +} +CATCH + +TEST_F(PlannerInterpreterExecuteTest, JoinThenAgg) +try +{ + { + // Left Join. + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Left) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: + MockTableScan)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + { + // Right Join + DAGRequestBuilder table1 = context.scan("test_db", "r_table"); + DAGRequestBuilder table2 = context.scan("test_db", "l_table"); + + auto request = table1.join( + table2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Max(col("r_a"))}, {col("join_c")}) + .build(context); + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Right + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + HashJoinProbe: + Expression: + Expression: + MockTableScan + Expression x 10: + NonJoined: )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); + } + + { + // Right join + receiver + sender + DAGRequestBuilder receiver1 = context.receive("sender_l"); + DAGRequestBuilder receiver2 = context.receive("sender_r"); + + auto request = receiver1.join( + receiver2, + {col("join_c")}, + ASTTableJoin::Kind::Right) + .aggregation({Sum(col("r_a"))}, {col("join_c")}) + .exchangeSender(tipb::PassThrough) + .limit(10) + .build(context); + String expected = R"( +CreatingSets + Union: + HashJoinBuildBlockInputStream x 20: , join_kind = Right + Expression: + Expression: + MockExchangeReceiver + Union: + MockExchangeSender x 20 + SharedQuery: + Limit, limit = 10 + Union: + Limit x 20, limit = 10 + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 20, final: true + Expression x 20: + HashJoinProbe: + Expression: + Expression: + MockExchangeReceiver + Expression x 20: + NonJoined: )"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 20); + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp new file mode 100644 index 00000000000..65b75bad9ab --- /dev/null +++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp @@ -0,0 +1,228 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColDataString = std::vector::FieldType>>; + using ColDataInt32 = std::vector::FieldType>>; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_names[0], TiDB::TP::TypeString}, + {col_names[1], TiDB::TP::TypeString}, + {col_names[2], TiDB::TP::TypeString}, + {col_names[3], TiDB::TP::TypeLong}, + {col_names[4], TiDB::TP::TypeLong}}, + {toNullableVec(col_names[0], col0), + toNullableVec(col_names[1], col1), + toNullableVec(col_names[2], col2), + toNullableVec(col_names[3], col3), + toNullableVec(col_names[4], col4)}); + } + + template + std::shared_ptr buildDAGRequest(T param) + { + return context.scan(db_name, table_name).project(param).build(context); + }; + + void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) + { + for (size_t i = 1; i < 10; i += 2) + { + ASSERT_COLUMNS_EQ_UR(executeStreams(request, i), expect_columns); + } + } + + /// Prepare column data + const ColDataString col0{"col0-0", "col0-1", "", "col0-2", {}, "col0-3", ""}; + const ColDataString col1{"col1-0", {}, "", "col1-1", "", "col1-2", "col1-3"}; + const ColDataString col2{"", "col2-0", "col2-1", {}, "col2-3", {}, "col2-4"}; + const ColDataInt32 col3{1, {}, 0, -111111, {}, 0, 9999}; + + /** Each value in col4 should be different from each other so that topn + * could sort the columns into an unique result, or multi-results could + * be right. + */ + const ColDataInt32 col4{0, 5, -123, -234, {}, 24353, 9999}; + + /// Results after sorted by col4 + const ColDataString col0_sorted_asc{{}, "col0-2", "", "col0-0", "col0-1", "", "col0-3"}; + const ColDataString col1_sorted_asc{"", "col1-1", "", "col1-0", {}, "col1-3", "col1-2"}; + const ColDataString col2_sorted_asc{"col2-3", {}, "col2-1", "", "col2-0", "col2-4", {}}; + const ColDataInt32 col3_sorted_asc{{}, -111111, 0, 1, {}, 9999, 0}; + const ColDataInt32 col4_sorted_asc{{}, -234, -123, 0, 5, 9999, 24353}; + + /// Prepare some names + std::vector col_names{"col0", "col1", "col2", "col3", "col4"}; + const String db_name{"test_db"}; + const String table_name{"projection_test_table"}; +}; + +TEST_F(ExecutorProjectionTestRunner, Projection) +try +{ + wrapForDisEnablePlanner([&]() { + /// Check single column + auto request = buildDAGRequest({col_names[4]}); + executeWithConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[4]}); + executeWithConcurrency(request, + { + toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + }); + + /// Check multi columns + request = buildDAGRequest({col_names[0], col_names[1], col_names[4]}); + executeWithConcurrency(request, + {toNullableVec(col_names[0], col0_sorted_asc), + toNullableVec(col_names[1], col1_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Check duplicate columns + request = buildDAGRequest({col_names[4], col_names[4], col_names[4]}); + executeWithConcurrency(request, + {toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc), + toNullableVec(col_names[4], col4_sorted_asc)}); + + { + /// Check large number of columns + const size_t col_num = 100; + MockColumnNameVec projection_input; + ColumnsWithTypeAndName columns; + auto expect_column = toNullableVec(col_names[4], col4_sorted_asc); + + for (size_t i = 0; i < col_num; ++i) + { + projection_input.push_back(col_names[4]); + columns.push_back(expect_column); + } + + request = buildDAGRequest(projection_input); + executeWithConcurrency(request, columns); + } + }); +} +CATCH + +TEST_F(ExecutorProjectionTestRunner, ProjectionFunction) +try +{ + wrapForDisEnablePlanner([&]() { + std::shared_ptr request; + + /// Test "equal" function + + /// Data type: TypeString + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "greater" function + + /// Data type: TypeString + request = buildDAGRequest({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + + /// Test "and" function + + /// Data type: TypeString + request = buildDAGRequest({And(col(col_names[0]), col(col_names[0])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + request = buildDAGRequest({And(col(col_names[0]), col(col_names[1])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({0, 0, 0, 0, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({And(col(col_names[3]), col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Test "not" function + + /// Data type: TypeString + request = buildDAGRequest({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), + toNullableVec({1, 1, 1, 1, {}, 1, 1}), + toNullableVec({1, {}, 1, 1, 1, 1, {}}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// Data type: TypeLong + request = buildDAGRequest({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])}); + executeWithConcurrency(request, + {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), + toNullableVec({{}, 0, 0, 1, 0, 0, 0}), + toNullableVec(col_names[4], col4_sorted_asc)}); + + /// TODO more functions... + }); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp new file mode 100644 index 00000000000..a6ba3183118 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -0,0 +1,225 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorTopNTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_single_name}, + {{single_col_name, TiDB::TP::TypeString}}, + {toNullableVec(single_col_name, col0)}); + + context.addMockTable({db_name, table_name}, + {{col_name[0], TiDB::TP::TypeLong}, + {col_name[1], TiDB::TP::TypeString}, + {col_name[2], TiDB::TP::TypeString}, + {col_name[3], TiDB::TP::TypeLong}}, + {toNullableVec(col_name[0], col_age), + toNullableVec(col_name[1], col_gender), + toNullableVec(col_name[2], col_country), + toNullableVec(col_name[3], c0l_salary)}); + } + + std::shared_ptr buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num) + { + return context.scan(db_name, table_name).topN(col_name, is_desc, limit_num).build(context); + } + + std::shared_ptr buildDAGRequest(const String & table_name, MockOrderByItemVec order_by_items, int limit, MockAstVec func_proj_ast = {}, MockColumnNameVec out_proj_ast = {}) + { + if (func_proj_ast.size() == 0) + return context.scan(db_name, table_name).topN(order_by_items, limit).build(context); + else + return context.scan(db_name, table_name).project(func_proj_ast).topN(order_by_items, limit).project(out_proj_ast).build(context); + } + + /// Prepare some names + const String db_name{"test_db"}; + + const String table_single_name{"topn_single_table"}; /// For single column test + const String single_col_name{"single_col"}; + ColumnWithString col0{"col0-0", "col0-1", "col0-2", {}, "col0-4", {}, "col0-6", "col0-7"}; + + const String table_name{"clerk"}; + const std::vector col_name{"age", "gender", "country", "salary"}; + ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34}; + ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"}; + ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"}; + ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300}; +}; + +TEST_F(ExecutorTopNTestRunner, TopN) +try +{ + wrapForDisEnablePlanner([&]() { + std::shared_ptr request; + std::vector expect_cols; + + { + /// Test single column + size_t col_data_num = col0.size(); + for (size_t i = 1; i <= 1; ++i) + { + bool is_desc; + is_desc = static_cast(i); /// Set descent or ascent + if (is_desc) + sort(col0.begin(), col0.end(), std::greater()); /// Sort col0 for the following comparison + else + sort(col0.begin(), col0.end()); + + for (size_t limit_num = 0; limit_num <= col_data_num + 5; ++limit_num) + { + request = buildDAGRequest(table_single_name, single_col_name, is_desc, limit_num); + + expect_cols.clear(); + if (limit_num == 0 || limit_num > col_data_num) + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.end()))}); + else + expect_cols.push_back({toNullableVec(single_col_name, ColumnWithString(col0.begin(), col0.begin() + limit_num))}); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 2), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 4), expect_cols[0]); + ASSERT_COLUMNS_EQ_R(executeStreams(request, 8), expect_cols[0]); + } + } + } + + { + /// Test multi-columns + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{36, 34, 32, 27, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "male", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{900, -300, {}, 0, {}, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{32, {}, 34, 27, 36, {}}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "china", "korea"}), + toNullableVec(col_name[3], ColumnWithInt32{{}, {}, -300, 0, 900, 1300})}, + {toNullableVec(col_name[0], ColumnWithInt32{34, {}, 32, 36, {}, 27}), + toNullableVec(col_name[1], ColumnWithString{"male", "male", "male", "female", "female", "female"}), + toNullableVec(col_name[2], ColumnWithString{"china", "china", "usa", "china", "korea", "usa"}), + toNullableVec(col_name[3], ColumnWithInt32{-300, {}, {}, 900, 1300, 0})}}; + + std::vector order_by_items{ + /// select * from clerk order by age DESC, gender DESC; + {MockOrderByItem(col_name[0], true), MockOrderByItem(col_name[1], true)}, + /// select * from clerk order by gender DESC, salary ASC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[3], false)}, + /// select * from clerk order by gender DESC, country ASC, salary DESC; + {MockOrderByItem(col_name[1], true), MockOrderByItem(col_name[2], false), MockOrderByItem(col_name[3], true)}}; + + size_t test_num = expect_cols.size(); + + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(table_name, order_by_items[i], 100); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[i]); + } + } + }); +} +CATCH + +TEST_F(ExecutorTopNTestRunner, TopNFunction) +try +{ + wrapForDisEnablePlanner([&]() { + std::shared_ptr request; + std::vector expect_cols; + MockColumnNameVec output_projection{col_name[0], col_name[1], col_name[2], col_name[3]}; + MockAstVec func_projection; // Do function operation for topn + MockOrderByItemVec order_by_items; + ASTPtr col0_ast = col(col_name[0]); + ASTPtr col1_ast = col(col_name[1]); + ASTPtr col2_ast = col(col_name[2]); + ASTPtr col3_ast = col(col_name[3]); + ASTPtr func_ast; + + { + /// "and" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, {}, 32, 27, 36, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "china", "usa", "usa", "china", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 0, 900, -300})}}; + + { + /// select * from clerk order by age and salary ASC limit 100; + order_by_items = {MockOrderByItem("and(age, salary)", false)}; + func_ast = And(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + { + /// "equal" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{27, 36, 34, 32, {}, {}}), + toNullableVec(col_name[1], ColumnWithString{"female", "female", "male", "male", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"usa", "china", "china", "usa", "korea", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{0, 900, -300, {}, 1300, {}})}}; + + { + /// select age, salary from clerk order by age = salary DESC limit 100; + order_by_items = {MockOrderByItem("equals(age, salary)", true)}; + func_ast = eq(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + { + /// "greater" function + expect_cols = {{toNullableVec(col_name[0], ColumnWithInt32{{}, 32, {}, 36, 27, 34}), + toNullableVec(col_name[1], ColumnWithString{"female", "male", "male", "female", "female", "male"}), + toNullableVec(col_name[2], ColumnWithString{"korea", "usa", "china", "china", "usa", "china"}), + toNullableVec(col_name[3], ColumnWithInt32{1300, {}, {}, 900, 0, -300})}}; + + { + /// select age, gender, country, salary from clerk order by age > salary ASC limit 100; + order_by_items = {MockOrderByItem("greater(age, salary)", false)}; + func_ast = gt(col(col_name[0]), col(col_name[3])); + func_projection = {col0_ast, col1_ast, col2_ast, col3_ast, func_ast}; + + request = buildDAGRequest(table_name, order_by_items, 100, func_projection, output_projection); + ASSERT_COLUMNS_EQ_R(executeStreams(request), expect_cols[0]); + } + } + + /// TODO more functions... + }); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h new file mode 100644 index 00000000000..395ecc5b9eb --- /dev/null +++ b/dbms/src/Functions/CollationOperatorOptimized.h @@ -0,0 +1,210 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +template +ALWAYS_INLINE inline int signum(T val) +{ + return (0 < val) - (val < 0); +} + +// Check equality is much faster than other comparison. +// - check size first +// - return 0 if equal else 1 +__attribute__((flatten, always_inline, pure)) inline uint8_t RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs) +{ + return StringRef(lhs) == StringRef(rhs) ? 0 : 1; +} + +// Compare str view by memcmp +__attribute__((flatten, always_inline, pure)) inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2) +{ + return signum(v1.compare(v2)); +} + +constexpr char SPACE = ' '; + +// Remove tail space +__attribute__((flatten, always_inline, pure)) inline std::string_view RightTrim(const std::string_view & v) +{ + if (likely(v.empty() || v.back() != SPACE)) + return v; + size_t end = v.find_last_not_of(SPACE); + return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1); +} + +__attribute__((flatten, always_inline, pure)) inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb) +{ + return RawStrCompare(RightTrim(va), RightTrim(vb)); +} + +// If true, only need to check equal or not. +template +struct IsEqualRelated +{ + static constexpr const bool value = false; +}; + +// For `EqualsOp` and `NotEqualsOp`, value is true. +template +struct IsEqualRelated> +{ + static constexpr const bool value = true; +}; +template +struct IsEqualRelated> +{ + static constexpr const bool value = true; +}; + +// Loop columns and invoke callback for each pair. +template +__attribute__((flatten, always_inline)) inline void LoopTwoColumns( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + size_t size, + F && func) +{ + for (size_t i = 0; i < size; ++i) + { + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); + const auto * b_ptr = reinterpret_cast(&b_data[StringUtil::offsetAt(b_offsets, i)]); + + func({a_ptr, a_size}, {b_ptr, b_size}, i); + } +} + +// Loop one column and invoke callback for each pair. +template +__attribute__((flatten, always_inline)) inline void LoopOneColumn( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + size_t size, + F && func) +{ + for (size_t i = 0; i < size; ++i) + { + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); + + func({a_ptr, a_size}, i); + } +} + +// Handle str-column compare str-column. +// - Optimize UTF8_BIN and UTF8MB4_BIN +// - Check if columns do NOT contain tail space +// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way +template +ALWAYS_INLINE inline bool StringVectorStringVector( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const ColumnString::Chars_t & b_data, + const ColumnString::Offsets & b_offsets, + const TiDB::TiDBCollatorPtr & collator, + Result & c) +{ + bool use_optimized_path = false; + + switch (collator->getCollatorId()) + { + case TiDB::ITiDBCollator::UTF8MB4_BIN: + case TiDB::ITiDBCollator::UTF8_BIN: + { + size_t size = a_offsets.size(); + + LoopTwoColumns(a_data, a_offsets, b_data, b_offsets, size, [&c](const std::string_view & va, const std::string_view & vb, size_t i) { + if constexpr (IsEqualRelated::value) + { + c[i] = Op::apply(RawStrEqualCompare(RightTrim(va), RightTrim(vb)), 0); + } + else + { + c[i] = Op::apply(RtrimStrCompare(va, vb), 0); + } + }); + + use_optimized_path = true; + + break; + } + default: + break; + } + return use_optimized_path; +} + +// Handle str-column compare const-str. +// - Optimize UTF8_BIN and UTF8MB4_BIN +// - Right trim const-str first +// - Check if column does NOT contain tail space +// - If Op is `EqualsOp` or `NotEqualsOp`, optimize comparison by faster way +template +ALWAYS_INLINE inline bool StringVectorConstant( + const ColumnString::Chars_t & a_data, + const ColumnString::Offsets & a_offsets, + const std::string_view & b, + const TiDB::TiDBCollatorPtr & collator, + Result & c) +{ + bool use_optimized_path = false; + + switch (collator->getCollatorId()) + { + case TiDB::ITiDBCollator::UTF8MB4_BIN: + case TiDB::ITiDBCollator::UTF8_BIN: + { + size_t size = a_offsets.size(); + + std::string_view tar_str_view = RightTrim(b); // right trim const-str first + + LoopOneColumn(a_data, a_offsets, size, [&c, &tar_str_view](const std::string_view & view, size_t i) { + if constexpr (IsEqualRelated::value) + { + c[i] = Op::apply(RawStrEqualCompare(RightTrim(view), tar_str_view), 0); + } + else + { + c[i] = Op::apply(RawStrCompare(RightTrim(view), tar_str_view), 0); + } + }); + + use_optimized_path = true; + break; + } + default: + break; + } + return use_optimized_path; +} + +} // namespace DB diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 1c63a286452..8f7502fba85 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,12 @@ struct StringComparisonWithCollatorImpl const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { + bool optimized_path = StringVectorStringVector(a_data, a_offsets, b_data, b_offsets, collator, c); + if (optimized_path) + { + return; + } + size_t size = a_offsets.size(); for (size_t i = 0; i < size; ++i) @@ -317,10 +324,17 @@ struct StringComparisonWithCollatorImpl static void NO_INLINE stringVectorConstant( const ColumnString::Chars_t & a_data, const ColumnString::Offsets & a_offsets, - const std::string & b, + const std::string_view & b, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & c) { + bool optimized_path = StringVectorConstant(a_data, a_offsets, b, collator, c); + + if (optimized_path) + { + return; + } + size_t size = a_offsets.size(); ColumnString::Offset b_size = b.size(); const char * b_data = reinterpret_cast(b.data()); @@ -332,7 +346,7 @@ struct StringComparisonWithCollatorImpl } static void constantStringVector( - const std::string & a, + const std::string_view & a, const ColumnString::Chars_t & b_data, const ColumnString::Offsets & b_offsets, const TiDB::TiDBCollatorPtr & collator, @@ -342,8 +356,8 @@ struct StringComparisonWithCollatorImpl } static void constantConstant( - const std::string & a, - const std::string & b, + const std::string_view & a, + const std::string_view & b, const TiDB::TiDBCollatorPtr & collator, ResultType & c) { @@ -706,6 +720,25 @@ class FunctionComparison : public IFunction } } + static inline std::string_view genConstStrRef(const ColumnConst * c0_const) + { + std::string_view c0_const_str_ref{}; + if (c0_const) + { + if (const auto * c0_const_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_string) + { + c0_const_str_ref = std::string_view(c0_const_string->getDataAt(0)); + } + else if (const auto * c0_const_fixed_string = checkAndGetColumn(&c0_const->getDataColumn()); c0_const_fixed_string) + { + c0_const_str_ref = std::string_view(c0_const_fixed_string->getDataAt(0)); + } + else + throw Exception("Logical error: ColumnConst contains not String nor FixedString column", ErrorCodes::ILLEGAL_COLUMN); + } + return c0_const_str_ref; + } + template bool executeStringWithCollator( Block & block, @@ -720,10 +753,13 @@ class FunctionComparison : public IFunction using ResultType = typename ResultColumnType::value_type; using StringImpl = StringComparisonWithCollatorImpl, ResultType>; + std::string_view c0_const_str_ref = genConstStrRef(c0_const); + std::string_view c1_const_str_ref = genConstStrRef(c1_const); + if (c0_const && c1_const) { ResultType res = 0; - StringImpl::constantConstant(c0_const->getValue(), c1_const->getValue(), collator, res); + StringImpl::constantConstant(c0_const_str_ref, c1_const_str_ref, collator, res); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(c0_const->size(), toField(res)); return true; } @@ -745,12 +781,12 @@ class FunctionComparison : public IFunction StringImpl::stringVectorConstant( c0_string->getChars(), c0_string->getOffsets(), - c1_const->getValue(), + c1_const_str_ref, collator, c_res->getData()); else if (c0_const && c1_string) StringImpl::constantStringVector( - c0_const->getValue(), + c0_const_str_ref, c1_string->getChars(), c1_string->getOffsets(), collator, @@ -770,8 +806,8 @@ class FunctionComparison : public IFunction template bool executeString(Block & block, size_t result, const IColumn * c0, const IColumn * c1) const { - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const = checkAndGetColumnConstStringOrFixedString(c0); const ColumnConst * c1_const = checkAndGetColumnConstStringOrFixedString(c1); diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp index 118574ed33d..0446f76bd51 100644 --- a/dbms/src/Functions/FunctionsConversion.cpp +++ b/dbms/src/Functions/FunctionsConversion.cpp @@ -240,6 +240,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index ddf64a70ca1..e8333ceeeea 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1751,6 +1751,120 @@ class FunctionDateFormat : public IFunction } }; +class FunctionGetFormat : public IFunction +{ +private: + static String get_format(const StringRef & time_type, const StringRef & location) + { + if (time_type == "DATE") + { + if (location == "USA") + return "%m.%d.%Y"; + else if (location == "JIS") + return "%Y-%m-%d"; + else if (location == "ISO") + return "%Y-%m-%d"; + else if (location == "EUR") + return "%d.%m.%Y"; + else if (location == "INTERNAL") + return "%Y%m%d"; + } + else if (time_type == "DATETIME" || time_type == "TIMESTAMP") + { + if (location == "USA") + return "%Y-%m-%d %H.%i.%s"; + else if (location == "JIS") + return "%Y-%m-%d %H:%i:%s"; + else if (location == "ISO") + return "%Y-%m-%d %H:%i:%s"; + else if (location == "EUR") + return "%Y-%m-%d %H.%i.%s"; + else if (location == "INTERNAL") + return "%Y%m%d%H%i%s"; + } + else if (time_type == "TIME") + { + if (location == "USA") + return "%h:%i:%s %p"; + else if (location == "JIS") + return "%H:%i:%s"; + else if (location == "ISO") + return "%H:%i:%s"; + else if (location == "EUR") + return "%H.%i.%s"; + else if (location == "INTERNAL") + return "%H%i%s"; + } + return ""; + } + +public: + static constexpr auto name = "getFormat"; + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!arguments[0].type->isString()) + throw Exception("First argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN); + if (!arguments[1].type->isString()) + throw Exception("Second argument for function " + getName() + " must be String", ErrorCodes::ILLEGAL_COLUMN); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + /** + * @brief The first argument is designed as a MySQL reserved word. You would encounter a syntax error when wrap it around with quote in SQL. + * For example, select GET_FORMAT("DATE", "USA") will fail. Removing the quote can solve the problem. + * Thus the first argument should always be a ColumnConst. See details in the link below: + * https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html#function_get-format + * + * @return ColumnNumbers + */ + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) const override + { + const auto * location_col = checkAndGetColumn(block.getByPosition(arguments[1]).column.get()); + assert(location_col); + size_t size = location_col->size(); + const auto & time_type_col = block.getByPosition(arguments[0]).column; + auto col_to = ColumnString::create(); + + if (time_type_col->isColumnConst()) + { + const auto & time_type_col_const = checkAndGetColumnConst(time_type_col.get()); + const auto & time_type = time_type_col_const->getValue(); + + ColumnString::Chars_t & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + auto max_length = 18; + data_to.resize(size * max_length); + offsets_to.resize(size); + WriteBufferFromVector write_buffer(data_to); + for (size_t i = 0; i < size; ++i) + { + const auto & location = location_col->getDataAt(i); + const auto & result = get_format(StringRef(time_type), location); + write_buffer.write(result.c_str(), result.size()); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + data_to.resize(write_buffer.count()); + block.getByPosition(result).column = std::move(col_to); + } + else + { + throw Exception("First argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); + } + } +}; + struct NameStrToDateDate { static constexpr auto name = "strToDateDate"; diff --git a/dbms/src/Functions/FunctionsDuration.cpp b/dbms/src/Functions/FunctionsDuration.cpp index ea7b86ac670..9ccafd2794d 100644 --- a/dbms/src/Functions/FunctionsDuration.cpp +++ b/dbms/src/Functions/FunctionsDuration.cpp @@ -97,6 +97,57 @@ void FunctionDurationSplit::executeImpl(Block & block, const ColumnNumbers ErrorCodes::ILLEGAL_COLUMN); }; +template +DataTypePtr FunctionMyDurationToSec::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + if (!arguments[0].type->isMyTime()) + { + throw Exception( + fmt::format("Illegal type {} of the first argument of function {}", arguments[0].type->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + return std::make_shared(); +} + +template +void FunctionMyDurationToSec::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const +{ + const auto * from_type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get()); + if (from_type == nullptr) + { + throw Exception( + fmt::format( + "Illegal column {} of the first argument of function {}", + block.getByPosition(arguments[0]).column->getName(), + name), + ErrorCodes::ILLEGAL_COLUMN); + } + + using FromFieldType = typename DataTypeMyDuration::FieldType; + const auto * col_from = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get()); + if (col_from != nullptr) + { + const typename ColumnVector::Container & vec_from = col_from->getData(); + const size_t size = vec_from.size(); + auto col_to = ColumnVector::create(size); + typename ColumnVector::Container & vec_to = col_to->getData(); + + for (size_t i = 0; i < size; ++i) + { + MyDuration val(vec_from[i], from_type->getFsp()); + vec_to[i] = Impl::apply(val); + } + block.getByPosition(result).column = std::move(col_to); + } + else + throw Exception( + fmt::format( + "Illegal column {} of the first argument of function {}", + block.getByPosition(arguments[0]).column->getName(), + name), + ErrorCodes::ILLEGAL_COLUMN); +} + struct DurationSplitHourImpl { static constexpr auto name = "hour"; @@ -133,11 +184,27 @@ struct DurationSplitMicroSecondImpl } }; +struct TiDBTimeToSecTransformerImpl +{ + static constexpr auto name = "tidbTimeToSec"; + static Int64 apply(const MyDuration & val) + { + Int64 sign = 1; + if (val.isNeg()) + { + sign = -1; + } + return sign * (val.hours() * 3600 + val.minutes() * 60 + val.seconds()); + } +}; + using FunctionDurationHour = FunctionDurationSplit; using FunctionDurationMinute = FunctionDurationSplit; using FunctionDurationSecond = FunctionDurationSplit; using FunctionDurationMicroSecond = FunctionDurationSplit; +using FunctionToTiDBTimeToSec = FunctionMyDurationToSec; + void registerFunctionsDuration(FunctionFactory & factory) { factory.registerFunction(); @@ -146,5 +213,7 @@ void registerFunctionsDuration(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/FunctionsDuration.h b/dbms/src/Functions/FunctionsDuration.h index 4247cde03ff..5bc54d425f4 100644 --- a/dbms/src/Functions/FunctionsDuration.h +++ b/dbms/src/Functions/FunctionsDuration.h @@ -69,4 +69,23 @@ class FunctionDurationSplit : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; }; +template +class FunctionMyDurationToSec : public IFunction +{ +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create(const Context &) { return std::make_shared(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override; +}; + } // namespace DB \ No newline at end of file diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index a6fd2ff522e..02e11b66d77 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -28,13 +28,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event PolygonsAddedToPool; -extern const Event PolygonsInPoolAllocatedBytes; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -60,9 +53,6 @@ ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, P /// To allocate memory. ptr->init(); - ProfileEvents::increment(ProfileEvents::PolygonsAddedToPool); - ProfileEvents::increment(ProfileEvents::PolygonsInPoolAllocatedBytes, ptr->getAllocatedBytes()); - return ptr.release(); }; @@ -121,30 +111,30 @@ class FunctionPointInPolygon : public IFunction throw Exception("Too few arguments", ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION); } - auto getMsgPrefix = [this](size_t i) { + auto get_msg_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; for (size_t i = 1; i < arguments.size(); ++i) { - auto * array = checkAndGetDataType(arguments[i].get()); + const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr && i != 1) - throw Exception(getMsgPrefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(get_msg_prefix(i) + " must be array of tuples.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto * tuple = checkAndGetDataType(array ? array->getNestedType().get() : arguments[i].get()); + const auto * tuple = checkAndGetDataType(array ? array->getNestedType().get() : arguments[i].get()); if (tuple == nullptr) - throw Exception(getMsgPrefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(get_msg_prefix(i) + " must contains tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const DataTypes & elements = tuple->getElements(); if (elements.size() != 2) - throw Exception(getMsgPrefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(get_msg_prefix(i) + " must have exactly two elements.", ErrorCodes::BAD_ARGUMENTS); for (auto j : ext::range(0, elements.size())) { if (!elements[j]->isNumber()) { - throw Exception(getMsgPrefix(i) + " must contains numeric tuple at position " + toString(j + 1), + throw Exception(get_msg_prefix(i) + " must contains numeric tuple at position " + toString(j + 1), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -156,10 +146,10 @@ class FunctionPointInPolygon : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * point_col = block.getByPosition(arguments[0]).column.get(); - auto const_tuple_col = checkAndGetColumn(point_col); + const auto * const_tuple_col = checkAndGetColumn(point_col); if (const_tuple_col) point_col = &const_tuple_col->getDataColumn(); - auto tuple_col = checkAndGetColumn(point_col); + const auto * tuple_col = checkAndGetColumn(point_col); if (!tuple_col) { @@ -207,18 +197,18 @@ class FunctionPointInPolygon : public IFunction { Polygon polygon; - auto getMsgPrefix = [this](size_t i) { + auto get_msg_prefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; for (size_t i = 1; i < arguments.size(); ++i) { - auto const_col = checkAndGetColumn(block.getByPosition(arguments[i]).column.get()); - auto array_col = const_col ? checkAndGetColumn(&const_col->getDataColumn()) : nullptr; - auto tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; + const auto * const_col = checkAndGetColumn(block.getByPosition(arguments[i]).column.get()); + const auto * array_col = const_col ? checkAndGetColumn(&const_col->getDataColumn()) : nullptr; + const auto * tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; if (!tuple_col) - throw Exception(getMsgPrefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(get_msg_prefix(i) + " must be constant array of tuples.", ErrorCodes::ILLEGAL_COLUMN); const auto & tuple_columns = tuple_col->getColumns(); const auto & column_x = tuple_columns[0]; @@ -232,7 +222,7 @@ class FunctionPointInPolygon : public IFunction auto size = column_x->size(); if (size == 0) - throw Exception(getMsgPrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(get_msg_prefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); for (auto j : ext::range(0, size)) { @@ -246,11 +236,11 @@ class FunctionPointInPolygon : public IFunction container.push_back(container.front()); } - auto callImpl = use_object_pool + auto call_impl = use_object_pool ? FunctionPointInPolygonDetail::callPointInPolygonImplWithPool, PointInPolygonImpl> : FunctionPointInPolygonDetail::callPointInPolygonImpl, PointInPolygonImpl>; - return callImpl(x, y, polygon); + return call_impl(x, y, polygon); } }; diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index b9f20e45134..76022b983ad 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -992,7 +992,7 @@ class FunctionStringOrArrayToT : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnVector::create(); @@ -1002,7 +1002,7 @@ class FunctionStringOrArrayToT : public IFunction block.getByPosition(result).column = std::move(col_res); } - else if (const ColumnFixedString * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { if (Impl::is_fixed_to_constant) { @@ -1022,7 +1022,7 @@ class FunctionStringOrArrayToT : public IFunction block.getByPosition(result).column = std::move(col_res); } } - else if (const ColumnArray * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnVector::create(); @@ -1081,13 +1081,13 @@ class FunctionReverse : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); block.getByPosition(result).column = std::move(col_res); } - else if (const ColumnFixedString * col = checkAndGetColumn(column.get())) + else if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnFixedString::create(col->getN()); ReverseImpl::vectorFixed(col->getChars(), col->getN(), col_res->getChars()); @@ -1131,7 +1131,7 @@ class FunctionJsonLength : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const ColumnPtr column = block.getByPosition(arguments[0]).column; - if (const ColumnString * col = checkAndGetColumn(column.get())) + if (const auto * col = checkAndGetColumn(column.get())) { auto col_res = ColumnUInt64::create(); typename ColumnUInt64::Container & vec_col_res = col_res->getData(); @@ -1232,8 +1232,8 @@ class ConcatImpl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); @@ -1552,7 +1552,7 @@ class FunctionSubstring : public IFunction if (number_of_arguments == 3) column_length = block.getByPosition(arguments[2]).column; - const ColumnConst * column_start_const = checkAndGetColumn(column_start.get()); + const auto * column_start_const = checkAndGetColumn(column_start.get()); const ColumnConst * column_length_const = nullptr; if (number_of_arguments == 3) @@ -1572,9 +1572,9 @@ class FunctionSubstring : public IFunction throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, StringSource(*col)); - else if (const ColumnFixedString * col = checkAndGetColumn(column_string.get())) + else if (const auto * col = checkAndGetColumn(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, FixedStringSource(*col)); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, block, result, ConstSource(*col)); @@ -1676,7 +1676,7 @@ class FunctionSubstringUTF8 : public IFunction return true; } - const ColumnString * col = checkAndGetColumn(column_string.get()); + const auto * col = checkAndGetColumn(column_string.get()); assert(col); auto col_res = ColumnString::create(); getVectorConstConstFunc(implicit_length, is_positive)(col->getChars(), col->getOffsets(), start_abs, length, col_res->getChars(), col_res->getOffsets()); @@ -1732,7 +1732,7 @@ class FunctionSubstringUTF8 : public IFunction // convert to vector if string is const. ColumnPtr full_column_string = column_string->isColumnConst() ? column_string->convertToFullColumnIfConst() : column_string; - const ColumnString * col = checkAndGetColumn(full_column_string.get()); + const auto * col = checkAndGetColumn(full_column_string.get()); assert(col); auto col_res = ColumnString::create(); if (implicit_length) @@ -1869,7 +1869,7 @@ class FunctionRightUTF8 : public IFunction using LengthFieldType = typename LengthType::FieldType; auto col_res = ColumnString::create(); - if (const ColumnString * col_string = checkAndGetColumn(column_string.get())) + if (const auto * col_string = checkAndGetColumn(column_string.get())) { if (column_length->isColumnConst()) { @@ -1897,7 +1897,7 @@ class FunctionRightUTF8 : public IFunction else if (const ColumnConst * col_const_string = checkAndGetColumnConst(column_string.get())) { // const vector - const ColumnString * col_string_from_const = checkAndGetColumn(col_const_string->getDataColumnPtr().get()); + const auto * col_string_from_const = checkAndGetColumn(col_const_string->getDataColumnPtr().get()); assert(col_string_from_const); // When useDefaultImplementationForConstants is true, string and length are not both constants assert(!column_length->isColumnConst()); @@ -1993,7 +1993,7 @@ class FunctionAppendTrailingCharIfAbsent : public IFunction if (!checkColumnConst(column_char.get())) throw Exception(fmt::format("Second argument of function {} must be a constant string", getName()), ErrorCodes::ILLEGAL_COLUMN); - String trailing_char_str = static_cast(*column_char).getValue(); + auto trailing_char_str = static_cast(*column_char).getValue(); if (trailing_char_str.size() != 1) throw Exception(fmt::format("Second argument of function {} must be a one-character string", getName()), ErrorCodes::BAD_ARGUMENTS); @@ -2101,7 +2101,7 @@ class TrimImpl : public IFunction void executeTrim(Block & block, const ColumnNumbers & arguments, const size_t result) const { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); auto c_res = ColumnString::create(); @@ -2121,8 +2121,8 @@ class TrimImpl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); - const ColumnString * c1_string = checkAndGetColumn(c1); + const auto * c0_string = checkAndGetColumn(c0); + const auto * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); @@ -2202,7 +2202,7 @@ class TrimUTF8Impl : public IFunction void executeTrim(Block & block, const ColumnNumbers & arguments, const size_t result) const { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); auto c_res = ColumnString::create(); @@ -2225,7 +2225,7 @@ class TrimUTF8Impl : public IFunction const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); - const ColumnString * c0_string = checkAndGetColumn(c0); + const auto * c0_string = checkAndGetColumn(c0); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); const auto * column_trim_string = checkAndGetColumn(c1_const_string->getDataColumnPtr().get()); @@ -2716,7 +2716,7 @@ class FunctionTiDBTrim : public IFunction ColumnPtr & column_data = block.getByPosition(arguments[0]).column; auto res_col = ColumnString::create(); - const ColumnString * data_col = checkAndGetColumn(column_data.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); static constexpr std::string_view default_rem = " "; static const auto * remstr_ptr = reinterpret_cast(default_rem.data()); @@ -2738,25 +2738,25 @@ class FunctionTiDBTrim : public IFunction if (data_const && !remstr_const) { const ColumnConst * data_col = checkAndGetColumnConst(column_data.get()); - const ColumnString * remstr_col = checkAndGetColumn(column_remstr.get()); + const auto * remstr_col = checkAndGetColumn(column_remstr.get()); - const std::string data = data_col->getValue(); + const auto data = data_col->getValue(); const auto * data_ptr = reinterpret_cast(data.c_str()); constVector(is_ltrim, is_rtrim, data_ptr, data.size() + 1, remstr_col->getChars(), remstr_col->getOffsets(), res_col->getChars(), res_col->getOffsets()); } else if (remstr_const && !data_const) { const ColumnConst * remstr_col = checkAndGetColumnConst(column_remstr.get()); - const ColumnString * data_col = checkAndGetColumn(column_data.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); - const std::string remstr = remstr_col->getValue(); + const auto remstr = remstr_col->getValue(); const auto * remstr_ptr = reinterpret_cast(remstr.c_str()); vectorConst(is_ltrim, is_rtrim, data_col->getChars(), data_col->getOffsets(), remstr_ptr, remstr.size() + 1, res_col->getChars(), res_col->getOffsets()); } else { - const ColumnString * data_col = checkAndGetColumn(column_data.get()); - const ColumnString * remstr_col = checkAndGetColumn(column_remstr.get()); + const auto * data_col = checkAndGetColumn(column_data.get()); + const auto * remstr_col = checkAndGetColumn(column_remstr.get()); vectorVector(is_ltrim, is_rtrim, data_col->getChars(), data_col->getOffsets(), remstr_col->getChars(), remstr_col->getOffsets(), res_col->getChars(), res_col->getOffsets()); } @@ -2769,7 +2769,7 @@ class FunctionTiDBTrim : public IFunction ColumnPtr & column_direction = block.getByPosition(arguments[2]).column; if (!column_direction->isColumnConst()) throw Exception(fmt::format("3nd argument of function {} must be constant.", getName())); - const ColumnConst * direction_col = checkAndGetColumn(column_direction.get()); + const auto * direction_col = checkAndGetColumn(column_direction.get()); static constexpr Int64 trim_both_default = 0; // trims from both direction by default static constexpr Int64 trim_both = 1; // trims from both direction with explicit notation @@ -2989,7 +2989,7 @@ class TidbPadImpl { continue; } - int32_t len = static_cast(column_length->getInt(i)); + auto len = static_cast(column_length->getInt(i)); if (len <= 0) { len = 0; @@ -3051,7 +3051,7 @@ class TidbPadImpl } else { - const ColumnString * column_string = checkAndGetColumn(column_string_ptr.get()); + const auto * column_string = checkAndGetColumn(column_string_ptr.get()); const ColumnString::Offsets & string_offsets = column_string->getOffsets(); const ColumnString::Chars_t & string_data = column_string->getChars(); @@ -3233,7 +3233,7 @@ class TidbPadImpl return true; } - ColumnString::Offset tmp_target_len = static_cast(target_len); + auto tmp_target_len = static_cast(target_len); ColumnString::Offset per_pad_offset = 0; ColumnString::Offset pad_bytes = 0; ColumnString::Offset left = 0; @@ -3300,7 +3300,7 @@ class TidbPadImpl return true; } - ColumnString::Offset tmp_target_len = static_cast(target_len); + auto tmp_target_len = static_cast(target_len); if (data_len < tmp_target_len) { ColumnString::Offset left = tmp_target_len - data_len; @@ -3421,7 +3421,7 @@ class PadImpl : public IFunction ColumnPtr column_length = block.getByPosition(arguments[1]).column; ColumnPtr column_padding = block.getByPosition(arguments[2]).column; - const ColumnConst * column_length_const = checkAndGetColumn(column_length.get()); + const auto * column_length_const = checkAndGetColumn(column_length.get()); const ColumnConst * column_padding_const = checkAndGetColumnConst(column_padding.get()); Int64 length_value = 0; @@ -3441,7 +3441,7 @@ class PadImpl : public IFunction auto c_res = ColumnString::create(); - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) pad, StringSink>( StringSource(*col), ConstSource(*column_padding_const), @@ -3548,7 +3548,7 @@ class PadUTF8Impl : public IFunction ColumnPtr column_length = block.getByPosition(arguments[1]).column; ColumnPtr column_padding = block.getByPosition(arguments[2]).column; - const ColumnConst * column_length_const = checkAndGetColumn(column_length.get()); + const auto * column_length_const = checkAndGetColumn(column_length.get()); const ColumnConst * column_padding_const = checkAndGetColumnConst(column_padding.get()); Int64 length_value = 0; @@ -3568,7 +3568,7 @@ class PadUTF8Impl : public IFunction auto c_res = ColumnString::create(); const auto * column_padding_string = checkAndGetColumn(column_padding_const->getDataColumnPtr().get()); - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const auto * col = checkAndGetColumn(column_string.get())) vector(col->getChars(), col->getOffsets(), length_value, column_padding_string->getChars(), column_padding_string->getOffsets(), c_res->getChars(), c_res->getOffsets()); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) { @@ -4114,8 +4114,8 @@ class FunctionASCII : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field res_field; int val_num = c0_col->size(); @@ -4165,8 +4165,8 @@ class FunctionLength : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field res_field; int val_num = c0_col->size(); @@ -4215,13 +4215,13 @@ class FunctionPosition : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { const IColumn * c0_col = block.getByPosition(arguments[0]).column.get(); - const ColumnConst * c0_const = checkAndGetColumn(c0_col); - const ColumnString * c0_string = checkAndGetColumn(c0_col); + const auto * c0_const = checkAndGetColumn(c0_col); + const auto * c0_string = checkAndGetColumn(c0_col); Field c0_field; const IColumn * c1_col = block.getByPosition(arguments[1]).column.get(); - const ColumnConst * c1_const = checkAndGetColumn(c1_col); - const ColumnString * c1_string = checkAndGetColumn(c1_col); + const auto * c1_const = checkAndGetColumn(c1_col); + const auto * c1_string = checkAndGetColumn(c1_col); Field c1_field; if ((c0_const == nullptr && c0_string == nullptr) || (c1_const == nullptr && c1_string == nullptr)) @@ -4331,7 +4331,7 @@ class FunctionSubStringIndex : public IFunction column_str = column_str->isColumnConst() ? column_str->convertToFullColumnIfConst() : column_str; if (delim_const && count_const) { - const ColumnString * str_col = checkAndGetColumn(column_str.get()); + const auto * str_col = checkAndGetColumn(column_str.get()); const ColumnConst * delim_col = checkAndGetColumnConst(column_delim.get()); const ColumnConst * count_col = checkAndGetColumnConst>(column_count.get()); if (str_col == nullptr || delim_col == nullptr || count_col == nullptr) @@ -4339,7 +4339,7 @@ class FunctionSubStringIndex : public IFunction return false; } auto col_res = ColumnString::create(); - IntType count = count_col->getValue(); + auto count = count_col->getValue(); vectorConstConst( str_col->getChars(), str_col->getOffsets(), @@ -4353,9 +4353,9 @@ class FunctionSubStringIndex : public IFunction { column_delim = column_delim->isColumnConst() ? column_delim->convertToFullColumnIfConst() : column_delim; column_count = column_count->isColumnConst() ? column_count->convertToFullColumnIfConst() : column_count; - const ColumnString * str_col = checkAndGetColumn(column_str.get()); - const ColumnString * delim_col = checkAndGetColumn(column_delim.get()); - const ColumnVector * count_col = checkAndGetColumn>(column_count.get()); + const auto * str_col = checkAndGetColumn(column_str.get()); + const auto * delim_col = checkAndGetColumn(column_delim.get()); + const auto * count_col = checkAndGetColumn>(column_count.get()); if (str_col == nullptr || delim_col == nullptr || count_col == nullptr) { return false; @@ -4573,7 +4573,9 @@ class FormatImpl : public IFunction using NumberFieldType = typename NumberType::FieldType; using NumberColVec = std::conditional_t, ColumnDecimal, ColumnVector>; const auto * number_raw = block.getByPosition(arguments[0]).column.get(); + TiDBDecimalRoundInfo info{number_type, number_type}; + info.output_prec = info.output_prec < 65 ? info.output_prec + 1 : 65; return getPrecisionType(precision_base_type, [&](const auto & precision_type, bool) { using PrecisionType = std::decay_t; @@ -4723,10 +4725,11 @@ class FormatImpl : public IFunction static void format( T number, size_t max_num_decimals, - const TiDBDecimalRoundInfo & info, + TiDBDecimalRoundInfo & info, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { + info.output_scale = std::min(max_num_decimals, static_cast(info.input_scale)); auto round_number = round(number, max_num_decimals, info); std::string round_number_str = number2Str(round_number, info); std::string buffer = Format::apply(round_number_str, max_num_decimals); @@ -4870,7 +4873,7 @@ class FunctionFormatWithLocale : public IFunction } else { - const String value = locale_const->getValue(); + const auto value = locale_const->getValue(); if (!boost::iequals(value, supported_locale)) { const auto & msg = genWarningMsg(value); diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index 119169be8b5..3eddd383cfb 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -18,13 +18,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event RegexpCreated; -} - - namespace DB { namespace Regexps @@ -54,7 +47,6 @@ inline Pool::Pointer get(const std::string & pattern, int flags) if (no_capture) flags |= OptimizedRegularExpression::RE_NO_CAPTURE; - ProfileEvents::increment(ProfileEvents::RegexpCreated); return new Regexp{createRegexp(pattern, flags)}; }); } diff --git a/dbms/src/Functions/bitShiftRight.cpp b/dbms/src/Functions/bitShiftRight.cpp index 961f7459f68..90b365771de 100644 --- a/dbms/src/Functions/bitShiftRight.cpp +++ b/dbms/src/Functions/bitShiftRight.cpp @@ -13,6 +13,9 @@ // limitations under the License. #include +#include + +#include namespace DB { @@ -29,7 +32,18 @@ struct BitShiftRightImpl template static Result apply(A a, B b) { - return static_cast(a) >> static_cast(b); + // It is an undefined behavior for shift operation in c++ that the right operand is negative or greater than + // or equal to the number of digits of the bits in the (promoted) left operand. + // See https://en.cppreference.com/w/cpp/language/operator_arithmetic for details. + if (static_cast(b) >= std::numeric_limits(a))>::digits) + { + return static_cast(0); + } + // Note that we do not consider the case that the right operand is negative, + // since other types will all be cast to uint64 before shift operation + // according to DAGExpressionAnalyzerHelper::buildBitwiseFunction. + // Therefore, we simply suppress clang-tidy checking here. + return static_cast(a) >> static_cast(b); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) } template static Result apply(A, B, UInt8 &) @@ -87,4 +101,4 @@ void registerFunctionBitShiftRight(FunctionFactory & factory) factory.registerFunction(); } -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_bitshiftright.cpp b/dbms/src/Functions/tests/gtest_bitshiftright.cpp new file mode 100644 index 00000000000..a4af6336099 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_bitshiftright.cpp @@ -0,0 +1,273 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class TestFunctionBitShiftRight : public DB::tests::FunctionTest +{ +}; + +#define ASSERT_BITSHIFTRIGHT(t1, t2, result) \ + ASSERT_COLUMN_EQ(result, executeFunction("bitShiftRight", {t1, t2})) + +TEST_F(TestFunctionBitShiftRight, Simple) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn>({8}), + createColumn>({2}), + createColumn>({2})); +} +CATCH + +/// Note: Only IntX and UIntX will be received by BitShiftRight, others will be casted by TiDB planner. +/// Note: BitShiftRight will further cast other types to UInt64 before doing shift. +TEST_F(TestFunctionBitShiftRight, TypePromotion) +try +{ + // Type Promotion + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({1}), createColumn>({9223372036854775807ull})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + + // Type Promotion across signed/unsigned + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({0}), createColumn>({18446744073709551615ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({-1}), createColumn>({0}), createColumn>({18446744073709551615ull})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn>({0}), createColumn>({1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, Nullable) +try +{ + // Non Nullable + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn({0}), createColumn({1})); + + // Across Nullable and non-Nullable + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn({1}), createColumn>({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); + ASSERT_BITSHIFTRIGHT(createColumn>({1}), createColumn({0}), createColumn>({1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, TypeCastWithConst) +try +{ + /// need test these kinds of columns: + /// 1. ColumnVector + /// 2. ColumnVector + /// 3. ColumnConst + /// 4. ColumnConst, value != null + /// 5. ColumnConst, value = null + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createColumn({0, 1, 0, 1}), createColumn({0, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn(4, 0), createColumn({0, 0, 1, 1})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn>(4, 0), createColumn({0, 0, 1, 1})); + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1}), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); // become const in wrapInNullable + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn({0, 1, 0, 1}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({0, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn(4, 0), createColumn>({0, 1, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn(4, 0), createColumn>({0, 1, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createColumn({0, 1, 0, 1}), createColumn({1, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({1, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn>(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn(4, 1), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createColumn({0, 1, 0, 1}), createColumn({1, 0, 1, 0})); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createColumn>({0, 1, std::nullopt, std::nullopt}), createColumn>({1, 0, std::nullopt, std::nullopt})); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn>(4, 0), createConstColumn(4, 1)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, 1), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); + + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createColumn({0, 1, 0, 1}), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createColumn>({0, 1, std::nullopt, std::nullopt}), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn(4, 0), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn(4, 0), createConstColumn>(4, std::nullopt)); + ASSERT_BITSHIFTRIGHT(createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt), createConstColumn>(4, std::nullopt)); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, Boundary) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({127, 127, -128, -128}), createColumn({0, 7, 0, 7}), createColumn({127, 0, 18446744073709551488ull, 144115188075855871ull})); + ASSERT_BITSHIFTRIGHT(createColumn({127, 127, -128, -128}), createColumn({0, 7, 0, 7}), createColumn({127, 0, 18446744073709551488ull, 144115188075855871ull})); + ASSERT_BITSHIFTRIGHT(createColumn({32767, 32767, -32768, -32768}), createColumn({0, 15, 0, 15}), createColumn({32767, 0, 18446744073709518848ull, 562949953421311ull})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1, -1, -1, INT64_MAX, INT64_MAX, INT64_MIN, INT64_MIN}), + createColumn({0, 63, 0, 63, 0, 63, 0, 63, 0, 63}), + createColumn({0, 0, 1, 0, 18446744073709551615ull, 1, INT64_MAX, 0, 9223372036854775808ull, 1})); +} +CATCH + +TEST_F(TestFunctionBitShiftRight, UINT64) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX}), + createColumn({63, 63}), + createColumn({0, 1})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, UINT64_MAX, std::nullopt}), + createColumn>({63, 63, 63}), + createColumn>({0, 1, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, UINT64_MAX, std::nullopt}), + createColumn({63, 63, 63}), + createColumn>({0, 1, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX}), + createColumn>({63, 63}), + createColumn>({0, 1})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, 1, 1, -1, -1, INT64_MAX, INT64_MAX, INT64_MIN, INT64_MIN}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX, 0, UINT64_MAX}), + createColumn({0, 0, 1, 0, 18446744073709551615ull, 0, INT64_MAX, 0, 9223372036854775808ull, 0})); + + + ASSERT_BITSHIFTRIGHT(createColumn({0, 0, UINT64_MAX, UINT64_MAX}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX}), + createColumn({0, 0, UINT64_MAX, 0})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 0, UINT64_MAX, UINT64_MAX, 0, std::nullopt}), + createColumn>({0, UINT64_MAX, 0, UINT64_MAX, std::nullopt, 0}), + createColumn>({0, 0, UINT64_MAX, 0, std::nullopt, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn>({0, 0, UINT64_MAX, UINT64_MAX, std::nullopt}), + createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0}), + createColumn>({0, 0, UINT64_MAX, 0, std::nullopt})); + + ASSERT_BITSHIFTRIGHT(createColumn({0, UINT64_MAX, 0, UINT64_MAX, 0}), + createColumn>({0, 0, UINT64_MAX, UINT64_MAX, std::nullopt}), + createColumn>({0, UINT64_MAX, 0, 0, std::nullopt})); + + /* + std::mt19937 gen(std::random_device{}()); + std::uniform_int_distribution dis( + std::numeric_limits::min(), + std::numeric_limits::max() + ); + size_t count = 100; + std::vector v1(count), v2(count), res(count); + for (size_t i=0; i> v2[i]; + } + */ + // clang-format off + ASSERT_BITSHIFTRIGHT(createColumn({4286230172992429668ull,11550684080080434735ull,775195682263841867ull,18390588538388462661ull,15578761645824658314ull,20662948907547635ull,8403266546632871011ull,10316916867086714284ull,14494183568060929367ull,11741337603037632348ull,10803264694948981380ull,2181969932373516503ull,9673801579564730047ull,12998855911221966916ull,13852157931865274857ull,9203926828777338586ull,8903261359104369984ull,3296258311466476456ull,14658801806079697908ull,7542518003247963618ull,7751150277360944372ull,12225694156629117269ull,3173837214287201256ull,10555082060194839563ull,14202570947308501213ull,13841194359225980123ull,9085267378073816945ull,15975493157631073381ull,1890233386459299033ull,2368634323417847398ull,691423931511513606ull,986000479038857169ull,6676906740954304741ull,2841686799872009560ull,6483676442160212821ull,12550114481083571140ull,1973026146580965947ull,15006687639313690830ull,6443617813685195609ull,13648732879238232658ull,173820604016606515ull,2669428687588070677ull,15361476519767969236ull,8957522718906827285ull,10484385204137290737ull,12390466571993898199ull,13655746682011856065ull,4183302523705398003ull,9898692767945122925ull,16701902679050716746ull,15003324714492513897ull,15554724240808081962ull,7754458312088240871ull,16060968032680196798ull,12619581440986221928ull,15462661961676206824ull,2991773628650321635ull,16341599119345297909ull,14943939970889580769ull,17589764776976679210ull,15274914527536421890ull,16268454608136611433ull,14617646699124891378ull,466927094873143934ull,10558583305251737283ull,255559140356160501ull,5962789691899784330ull,8004603198837555992ull,1881892337023478820ull,6549167700870881840ull,17551996157828573642ull,3349744237253314638ull,2876698686583880568ull,16792783373922568330ull,16231348759981899800ull,17731631990557975899ull,1305376485657663531ull,3568754485566225727ull,10076204423028931225ull,1206238310176455071ull,4297062324543635867ull,5116785256928623516ull,4216305034157620433ull,412817651268481791ull,11256299741838589766ull,10786197076871163667ull,8588357635228913652ull,6361409982074778071ull,4750871994764527580ull,12851835128796581697ull,13871712051825681122ull,12445309465661589227ull,1668617678034382020ull,10152918068481134781ull,16242941973571224246ull,12988338226657152812ull,2352083670492692674ull,10735026236980245779ull,14986388012066843516ull,17651064432466444102ull}), + createColumn({0,58,55,24,5,35,34,54,43,45,17,36,51,54,19,55,55,8,37,49,15,11,36,0,5,41,46,54,2,59,11,25,43,29,31,8,59,2,11,19,56,35,57,13,2,35,6,54,17,0,49,5,15,3,60,44,16,6,57,44,58,54,26,23,58,23,26,29,56,40,45,2,21,9,57,40,4,46,17,15,62,21,5,54,22,47,10,24,53,61,43,52,23,10,61,43,26,31,38,2}), + createColumn({4286230172992429668ull,40,21,1096164497041ull,486836301432020572ull,601370,489134489,572,1647797,333708,82422368583289ull,31751841,4296,721,26420894492846ull,255,247,12876009029165923ull,106656820,13398,236546334147978ull,5969577224916561ull,46185410,10555082060194839563ull,443830342103390662ull,6294246,129109,886,472558346614824758ull,4,337609341558356ull,29385104150ull,759076,5293054133ull,3019197118ull,49023884691732699ull,3,3751671909828422707ull,3146297760588474ull,26032891996838ull,2,77690599,106,1093447597522806ull,2621096301034322684ull,360610038,213371041906435251ull,232,75521032470284ull,16701902679050716746ull,26651,486085132525252561ull,236647287356208ull,2007621004085024599ull,10,878950,45650842722325ull,255337486239770279ull,103,999862,52,903,217819909738ull,55662047251ull,36,30465023560ull,88852490364ull,14909735319ull,26,5956433,498857,837436059313328659ull,1371716826717ull,32798405027192516ull,112,16126825,81586030353603970ull,50715,76875338920813ull,36811471868177ull,0,2439873341049ull,131759532317425638ull,22,2683710990390ull,76640,8387068003153235ull,379169582252ull,527,5,1577031,2763,198914727930ull,9914959051251108ull,7,1476603,35048777915ull,4998886136ull,54520161,4412766108116611025ull})); + // clang-format on +} +CATCH + +TEST_F(TestFunctionBitShiftRight, UB) +try +{ + ASSERT_BITSHIFTRIGHT(createColumn({127, -128}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({127, -128}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({32767, -32768}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({INT32_MAX, INT32_MIN}), createColumn({64, 64}), createColumn({0, 0})); + ASSERT_BITSHIFTRIGHT(createColumn({INT64_MAX, INT64_MIN}), createColumn({64, 64}), createColumn({0, 0})); + + ASSERT_BITSHIFTRIGHT(createColumn({255}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({255}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({65535}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({UINT32_MAX}), createColumn({64}), createColumn({0})); + ASSERT_BITSHIFTRIGHT(createColumn({UINT64_MAX}), createColumn({64}), createColumn({0})); + + /* + std::mt19937 gen(std::random_device{}()); + std::uniform_int_distribution dis1( + std::numeric_limits::min(), + std::numeric_limits::max() + ); + std::uniform_int_distribution dis2( + 64, + std::numeric_limits::max() + ); + size_t count = 100; + std::vector v1(count), v2(count), res(count); + for (size_t i=0; i({17563387625296433369ull,5842891814427459261ull,15074502074821508463ull,386435802999553003ull,5487893274931198395ull,8125923807366590570ull,13340330062727071249ull,14908193031091561411ull,296805448857369387ull,8684453485792353774ull,13117933444495098288ull,3225762988982100714ull,11290506757949810556ull,14617912756126856962ull,9479575714707174581ull,11720728318194739598ull,14410575429605211363ull,12068356718035872518ull,80682389916710599ull,11003236134534292734ull,4412447398096224810ull,5331184707993902906ull,13827083432789678788ull,958142831027309576ull,16716461997317184701ull,17128750834581527743ull,11590434571174666313ull,10204342520615148287ull,11067791415848657283ull,17583875436196878829ull,186304014359496415ull,9381729025189804702ull,11502205568225715300ull,16472133582690439104ull,3743303387826342067ull,12860029445868505658ull,2244056593742923769ull,3275687468466891223ull,1545828456957460699ull,14187252460708728077ull,7551907967738536187ull,9754400233340010491ull,16293183350230169116ull,6298812696728711031ull,5915538565572009956ull,2284684518775825662ull,1130711226902262476ull,17158957721471765323ull,4220824385439711070ull,16559772875254313109ull,15397179690017513678ull,6300413832999049491ull,13787530251307637715ull,10132349060092695582ull,10446586881482901699ull,15759779838283537085ull,14402587207027333363ull,5546051719872960161ull,6545031029710296628ull,17407295406267098658ull,4259019625544816073ull,791895457880289787ull,8549227257401578066ull,15246278171168501125ull,1674668228908076954ull,849762797502000057ull,13302651500925764574ull,12438174880334092333ull,17701249772557033303ull,10742459186038873636ull,15671491258945407856ull,9352557101631889001ull,8914093883925002585ull,17935292744735591949ull,606989231583658922ull,6528503454270721815ull,14980539549624989095ull,13765196438235456668ull,3058323869228644592ull,14346577759191739044ull,1543206286382906519ull,1025562312317433790ull,17052896445025268012ull,18349597294988935754ull,17174604730104962524ull,11924965352621110201ull,502032511104181724ull,13845633389643139332ull,15436039204445155412ull,17809579006694175565ull,15166364145138562881ull,14062748599121933798ull,1777457178576774356ull,4985224560472716170ull,3881603168175384251ull,11555031280550342082ull,1252677486917153396ull,8744807353133366467ull,2048964426549800495ull,11945831330508218140ull}), + createColumn({7570379165150948640ull,2086259313016069849ull,3606689596671293211ull,14039117280692395662ull,13678665403528829741ull,16069000531561010558ull,18229345530821449414ull,433464578739092378ull,6298872104011095934ull,4518228872693063137ull,14988726875963869472ull,9568218424260764817ull,5383191468426384555ull,8698762658876708752ull,9487599666567205013ull,14370091126330876161ull,10702068376663045773ull,8045701071228357739ull,10878469353312437370ull,3183167829827610494ull,5928881618833110378ull,10410530709181481816ull,249988564503361262ull,13482614555530280987ull,5522946068620734806ull,12797173590813112894ull,14133419908717831141ull,10825732602137508628ull,13271177233899692778ull,1157753039017783757ull,3370600557036147696ull,2957689395775524062ull,11963898745206689513ull,4828931188614542720ull,15157289330857160797ull,369467010700905309ull,6278071805692607460ull,17817858137511910604ull,17789013631125929528ull,2861684947245777353ull,2583152408663154190ull,7935135702156687355ull,3033127046167579202ull,14224256960933395097ull,10838403249753694181ull,2154089102842257532ull,7860358918492191001ull,2982010253383852617ull,16385171982396620123ull,12241857497176342828ull,2080931105225959532ull,1046322072991155713ull,6146917059052005252ull,17411786298437646544ull,5497869583209795613ull,11701448129764809247ull,12642962700918363620ull,15936842187305218463ull,7811510447588439153ull,3558405966224377785ull,977960926168429540ull,9505800334935014018ull,12114068456102275321ull,5141880021314950000ull,6719615890604904521ull,1341445859098821585ull,3883912906202435997ull,2107770591867486616ull,2657186337437393032ull,2640917573672927653ull,3746140861437224253ull,15057648507099656234ull,12051189681068107042ull,2259769676757597701ull,2935229535510718769ull,6368233316971463582ull,14384644474340782197ull,2553547617837260603ull,14238122466576902747ull,9555765226032904481ull,15522640015319979866ull,10274396157562093026ull,5996101113505388770ull,16915812546351047056ull,4956089714130804219ull,17126605744801075545ull,12036643325202409080ull,11257234688654558199ull,375338337104024778ull,11152980243617851986ull,12325805905403174063ull,8653948654121626815ull,15348912598299408338ull,6883296938248095081ull,6484642948886870833ull,16936141613107270500ull,17012171815528507292ull,2574129622316042070ull,17178726110735453748ull,16578303277501346489ull}), + createColumn({0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0})); + // clang-format on +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Functions/tests/gtest_duration_pushdown.cpp b/dbms/src/Functions/tests/gtest_duration_pushdown.cpp index 4501a4c9fae..106f3d84642 100644 --- a/dbms/src/Functions/tests/gtest_duration_pushdown.cpp +++ b/dbms/src/Functions/tests/gtest_duration_pushdown.cpp @@ -166,5 +166,85 @@ try ASSERT_COLUMN_EQ(microSecond_out, executeFunction("microSecond", input4)); } CATCH + +TEST_F(DurationPushDown, timeToSecPushDownTest) +try +{ + ColumnWithTypeAndName input( + createColumn>({(838 * 3600 + 59 * 60 + 59) * 1000000000L + 999999000L, + -(838 * 3600 + 59 * 60 + 59) * 1000000000L - 123456000L, + 0, + (1 * 3600 + 2 * 60 + 3) * 1000000000L + 4000L}) + .column, + makeNullable(std::make_shared(6)), + "input"); + auto second_output = createColumn>({3020399, -3020399, 0, 3723}); + ASSERT_COLUMN_EQ(second_output, executeFunction("tidbTimeToSec", input)); + + // Test Overflow + ColumnWithTypeAndName input2( + createColumn>({(838 * 3600 + 59 * 60 + 59) * 1000000000L + 999999000L + 1000L}).column, + makeNullable(std::make_shared(6)), + "result"); + try + { + auto result = executeFunction("tidbTimeToSec", input2); + FAIL() << "Expected overflow"; + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.message(), std::string("nanos must >= -3020399999999000 and <= 3020399999999000")); + } + catch (...) + { + FAIL() << "Expected overflow"; + }; + + ColumnWithTypeAndName input3( + createColumn>({-(838 * 3600 + 59 * 60 + 59) * 1000000000L - 999999000L - 1000L}).column, + makeNullable(std::make_shared(6)), + "result"); + try + { + auto result = executeFunction("tidbTimeToSec", input3); + FAIL() << "Expected overflow"; + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.message(), std::string("nanos must >= -3020399999999000 and <= 3020399999999000")); + } + catch (...) + { + FAIL() << "Expected overflow"; + }; + + // Random Test + constexpr int rowNum = 1000; + auto dur_column = ColumnVector::create(); + auto & dur_data = dur_column->getData(); + auto second_column = ColumnVector::create(); + auto & second_data = second_column->getData(); + dur_data.resize(rowNum); + second_data.resize(rowNum); + + std::random_device rd; + std::default_random_engine gen = std::default_random_engine(rd()); + std::uniform_int_distribution sign_dis(0, 1), hour_dis(0, 838), minute_dis(0, 59), second_dis(0, 59), microSecond_dis(0, 999999); + for (int i = 0; i < rowNum; ++i) + { + auto sign = (sign_dis(gen) == 0) ? 1 : -1; + auto hour = hour_dis(gen); + auto minute = minute_dis(gen); + auto second = second_dis(gen); + auto microSecond = microSecond_dis(gen); + dur_data[i] = sign * ((hour * 3600 + minute * 60 + second) * 1000000000L + microSecond * 1000L); + second_data[i] = sign * (hour * 3600 + minute * 60 + second); + } + + ColumnWithTypeAndName input4(std::move(dur_column), std::make_shared(6), "duration"); + ColumnWithTypeAndName second_out(std::move(second_column), std::make_shared(), "time_to_sec"); + ASSERT_COLUMN_EQ(second_out, executeFunction("tidbTimeToSec", input4)); +} +CATCH } // namespace tests } // namespace DB \ No newline at end of file diff --git a/dbms/src/Functions/tests/gtest_get_format.cpp b/dbms/src/Functions/tests/gtest_get_format.cpp new file mode 100644 index 00000000000..61a8d80e7b4 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_get_format.cpp @@ -0,0 +1,153 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#include + +#pragma GCC diagnostic pop + +namespace DB::tests +{ +class GetFormatTest : public DB::tests::FunctionTest +{ +public: + static constexpr auto funcName = "getFormat"; +}; + +TEST_F(GetFormatTest, testBoundary) +try +{ + // const(non-null), vector + // time_type is a const with non null value + // location is a vector containing null + ASSERT_COLUMN_EQ( + createColumn>({"%m.%d.%Y", {}}), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createColumn>({"USA", {}}))); + + // const(null), vector + // time_type is a const with null value + // location is a vector containing null + ASSERT_COLUMN_EQ( + createConstColumn>(2, {}), + executeFunction( + funcName, + createConstColumn>(2, {}), + createColumn>({"USA", {}}))); + + // const(non-null), const(non-null) + // time_type is a const with non null value + // location is a const with non null value + ASSERT_COLUMN_EQ( + createConstColumn(2, "%m.%d.%Y"), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createConstColumn>(2, "USA"))); + + // const(non-null), const(null) + // time_type is a const with non null value + // location is a const with null value + ASSERT_COLUMN_EQ( + createConstColumn>(2, {}), + executeFunction( + funcName, + createConstColumn>(2, "DATE"), + createConstColumn>(2, {}))); + + // The time_type is a system pre_defined macro, thus assume time_type column is const + // Throw an exception is time_type is not ColumnConst + ASSERT_THROW( + executeFunction( + funcName, + createColumn>({"DATE", "TIME"}), + createColumn>({"USA", {}})), + DB::Exception); +} +CATCH + +TEST_F(GetFormatTest, testMoreCases) +try +{ + // time_type: DATE + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%m.%d.%Y", "%Y-%m-%d", "%Y-%m-%d", "%d.%m.%Y", "%Y%m%d"}), + executeFunction( + funcName, + createConstColumn>(5, "DATE"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: DATETIME + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "DATETIME"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: TIMESTAMP + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%Y-%m-%d %H.%i.%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H:%i:%s", "%Y-%m-%d %H.%i.%s", "%Y%m%d%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "TIMESTAMP"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // time_type: TIME + // all locations + ASSERT_COLUMN_EQ( + createColumn>({"%h:%i:%s %p", "%H:%i:%s", "%H:%i:%s", "%H.%i.%s", "%H%i%s"}), + executeFunction( + funcName, + createConstColumn>(5, "TIME"), + createColumn>({"USA", "JIS", "ISO", "EUR", "INTERNAL"}))); + + // the location is not in ("USA", "JIS", "ISO", "EUR", "INTERNAL") + ASSERT_COLUMN_EQ( + createColumn>({"", ""}), + executeFunction( + funcName, + createConstColumn>(2, "TIME"), + createColumn>({"CAN", ""}))); + + // the time_type is not in ("DATE", "DATETIME", "TIMESTAMP", "TIME") + ASSERT_COLUMN_EQ( + createColumn>({"", ""}), + executeFunction( + funcName, + createConstColumn>(2, "TIMEINUTC"), + createColumn>({"USA", "ISO"}))); +} +CATCH + +} // namespace DB::tests diff --git a/dbms/src/Functions/tests/gtest_strings_format.cpp b/dbms/src/Functions/tests/gtest_strings_format.cpp index 2d571a9bb1b..8f3b899316e 100644 --- a/dbms/src/Functions/tests/gtest_strings_format.cpp +++ b/dbms/src/Functions/tests/gtest_strings_format.cpp @@ -34,7 +34,7 @@ class StringFormat : public DB::tests::FunctionTest using FieldType = DecimalField; using NullableDecimal = Nullable; ASSERT_COLUMN_EQ( - createColumn>({"0.0000", "-0.0120", "0.0120", "12,332.1000", "12,332", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}}), + createColumn>({"0.0000", "-0.0120", "0.0120", "12,332.1000", "12,332", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}, "99,999.9999000000", "100,000.000", "100,000"}), executeFunction( func_name, createColumn( @@ -49,8 +49,11 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-123323000), 4), FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4), - FieldType(static_cast(0), 0)}), - createColumn>({4, 4, 4, 4, 0, -1, 31, 5, 1, 2, {}}))); + FieldType(static_cast(0), 0), + FieldType(static_cast(999999999), 4), + FieldType(static_cast(999999999), 4), + FieldType(static_cast(999999999), 4)}), + createColumn>({4, 4, 4, 4, 0, -1, 31, 5, 1, 2, {}, 10, 3, -5}))); ASSERT_COLUMN_EQ( createColumn>({"12,332.100", "-12,332.300", "-1,000.000", "-333.333"}), executeFunction( @@ -62,8 +65,6 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4)}), createConstColumn>(4, 3))); - /// known issue https://github.com/pingcap/tiflash/issues/4891 - /* ASSERT_COLUMN_EQ( createColumn>({"-999.9999", "-1,000", "-1,000", "-999.999900000000000000000000000000", "-999.99990", "-1,000.0", "-1,000.00"}), executeFunction( @@ -74,7 +75,7 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4)), createColumn>({4, 0, -1, 31, 5, 1, 2}))); ASSERT_COLUMN_EQ( - createConstColumn>(1, "-1,000.000"), + createConstColumn(1, "-1,000.000"), executeFunction( func_name, createConstColumn( @@ -82,7 +83,6 @@ class StringFormat : public DB::tests::FunctionTest 1, FieldType(static_cast(-9999999), 4)), createConstColumn>(1, 3))); - */ ASSERT_COLUMN_EQ( createColumn>({"12,332.1000", "12,332", "12,332.300000000000000000000000000000", "-12,332.30000", "-1,000.0", "-333.33", {}}), executeFunction( @@ -108,8 +108,6 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4), FieldType(static_cast(-3333330), 4)}), createConstColumn>(4, 3))); - /// known issue https://github.com/pingcap/tiflash/issues/4891 - /* ASSERT_COLUMN_EQ( createColumn>({"-999.9999", "-1,000", "-999.999900000000000000000000000000", "-999.99990", "-1,000.0", "-1,000.00"}), executeFunction( @@ -120,7 +118,7 @@ class StringFormat : public DB::tests::FunctionTest FieldType(static_cast(-9999999), 4)), createColumn>({4, 0, 31, 5, 1, 2}))); ASSERT_COLUMN_EQ( - createConstColumn>(1, "-1,000.000"), + createConstColumn(1, "-1,000.000"), executeFunction( func_name, createConstColumn( @@ -128,7 +126,6 @@ class StringFormat : public DB::tests::FunctionTest 1, FieldType(static_cast(-9999999), 4)), createConstColumn>(1, 3))); - */ } template diff --git a/dbms/src/Functions/tests/gtest_strings_reverse.cpp b/dbms/src/Functions/tests/gtest_strings_reverse.cpp new file mode 100644 index 00000000000..304a403db83 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_reverse.cpp @@ -0,0 +1,120 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic pop + +namespace DB::tests +{ +class StringReverse : public DB::tests::FunctionTest +{ +protected: + static ColumnWithTypeAndName toVec(const std::vector & v) + { + return createColumn(v); + } + + static ColumnWithTypeAndName toNullableVec(const std::vector> & v) + { + return createColumn>(v); + } + + static ColumnWithTypeAndName toConst(const String & s) + { + return createConstColumn(1, s); + } +}; +// test reverse +TEST_F(StringReverse, stringReverseTest) +try +{ + std::vector candidate_strings = {"one week's time test", "abcdef", "abcabc", "moc.pacgnip"}; + std::vector reversed_strings = {"tset emit s'keew eno", "fedcba", "cbacba", "pingcap.com"}; + + // test vector + ASSERT_COLUMN_EQ( + toVec(reversed_strings), + executeFunction( + "reverse", + toVec(candidate_strings))); + + // test nullable + ASSERT_COLUMN_EQ( + toNullableVec({"", " ", {}, "pacgnip"}), + executeFunction( + "reverse", + toNullableVec({"", " ", {}, "pingcap"}))); + + // test const + ASSERT_COLUMN_EQ( + toConst("pacgnip"), + executeFunction( + "reverse", + toConst("pingcap"))); + + // test null + ASSERT_COLUMN_EQ( + toConst({}), + executeFunction( + "reverse", + toConst({}))); +} +CATCH + +// test reverseUTF8 +TEST_F(StringReverse, stringReverseUTF8Test) +try +{ + std::vector candidate_strings = {"one week's time test", "abc测试def", "abcテストabc", "ѐёђѓєѕіїјљњћќѝўџ", "+ѐ-ё*ђ/ѓ!є@ѕ#і$@ї%ј……љ&њ(ћ)ќ¥ѝ#ў@џ!^", "αβγδεζηθικλμνξοπρστυφχψωσ", "▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★σ✕", "թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ"}; + std::vector reversed_strings = {"tset emit s'keew eno", "fed试测cba", "cbaトステcba", "џўѝќћњљјїіѕєѓђёѐ", "^!џ@ў#ѝ¥ќ)ћ(њ&љ……ј%ї@$і#ѕ@є!ѓ/ђ*ё-ѐ+", "σωψχφυτσρποξνμλκιθηζεδγβα", "✕σ★ω♘ψχ♖φυ♥τσ℉ρπ✚οξ✓νμ♫λκ€ιθ✂ηζ☎εδ➨γβ▼α▲", "շմնբվցղզխլկյհգֆդսապօիւըտռեոքծժճչրջձփթ"}; + + // test vector + ASSERT_COLUMN_EQ( + toVec(reversed_strings), + executeFunction( + "reverseUTF8", + toVec(candidate_strings))); + + // test nullable + ASSERT_COLUMN_EQ( + toNullableVec({"", " ", {}, "pacgnip"}), + executeFunction( + "reverseUTF8", + toNullableVec({"", " ", {}, "pingcap"}))); + + // test const + ASSERT_COLUMN_EQ( + toConst("pacgnip"), + executeFunction( + "reverseUTF8", + toConst("pingcap"))); + + // test null + ASSERT_COLUMN_EQ( + toConst({}), + executeFunction( + "reverseUTF8", + toConst({}))); +} +CATCH + +} // namespace DB::tests \ No newline at end of file diff --git a/dbms/src/IO/BufferWithOwnMemory.h b/dbms/src/IO/BufferWithOwnMemory.h index 272f4fc5c01..babe2541b33 100644 --- a/dbms/src/IO/BufferWithOwnMemory.h +++ b/dbms/src/IO/BufferWithOwnMemory.h @@ -21,14 +21,6 @@ #include - -namespace ProfileEvents -{ -extern const Event IOBufferAllocs; -extern const Event IOBufferAllocBytes; -} // namespace ProfileEvents - - namespace DB { /** Replacement for std::vector to use in buffers. @@ -119,9 +111,6 @@ struct Memory return; } - ProfileEvents::increment(ProfileEvents::IOBufferAllocs); - ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, m_capacity); - size_t new_capacity = align(m_capacity, alignment); m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; diff --git a/dbms/src/IO/ChecksumBuffer.h b/dbms/src/IO/ChecksumBuffer.h index f6d60677a12..b095545ea6e 100644 --- a/dbms/src/IO/ChecksumBuffer.h +++ b/dbms/src/IO/ChecksumBuffer.h @@ -27,7 +27,6 @@ namespace ProfileEvents { // no need to update sync, since write buffers inherit that directly from `WriteBufferFromFileDescriptor` extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; extern const Event ReadBufferFromFileDescriptorRead; extern const Event ReadBufferFromFileDescriptorReadBytes; @@ -107,7 +106,6 @@ class FramedChecksumWriteBuffer : public WriteBufferFromFileDescriptor } if (unlikely(count == -1)) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); if (errno == EINTR) continue; else @@ -386,8 +384,6 @@ class FramedChecksumReadBuffer : public ReadBufferFromFileDescriptor off_t doSeek(off_t offset, int whence) override { - ProfileEvents::increment(ProfileEvents::Seek); - auto & frame = reinterpret_cast &>( *(this->working_buffer.begin() - sizeof(ChecksumFrame))); // align should not fail diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index dd54c1b47a8..58bf47a9298 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -28,14 +28,6 @@ #include - -namespace ProfileEvents -{ -extern const Event ReadCompressedBytes; -extern const Event CompressedReadBufferBlocks; -extern const Event CompressedReadBufferBytes; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -83,8 +75,6 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_ if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); - ProfileEvents::increment(ProfileEvents::ReadCompressedBytes, size_compressed + sizeof(checksum)); - /// Is whole compressed block located in 'compressed_in' buffer? if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && compressed_in->position() + size_compressed - COMPRESSED_BLOCK_HEADER_SIZE <= compressed_in->buffer().end()) @@ -115,9 +105,6 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_ template void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) { - ProfileEvents::increment(ProfileEvents::CompressedReadBufferBlocks); - ProfileEvents::increment(ProfileEvents::CompressedReadBufferBytes, size_decompressed); - UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h if (method == static_cast(CompressionMethodByte::LZ4)) diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp index 90cc6e3ca76..4b3d52f3741 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp @@ -77,7 +77,7 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (profile_callback) { - ProfileInfo info; + ProfileInfo info; // NOLINT info.bytes_requested = internal_buffer.size(); info.bytes_read = res; info.nanoseconds = watch->elapsed(); @@ -120,8 +120,6 @@ off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence) } else { - ProfileEvents::increment(ProfileEvents::Seek); - pos = working_buffer.end(); off_t res = doSeekInFile(new_pos, SEEK_SET); if (-1 == res) @@ -145,7 +143,7 @@ bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) FD_SET(fd, &fds); timeval timeout = {time_t(timeout_microseconds / 1000000), suseconds_t(timeout_microseconds % 1000000)}; - int res = select(1, &fds, 0, 0, &timeout); + int res = select(1, &fds, nullptr, nullptr, &timeout); if (-1 == res) throwFromErrno("Cannot select", ErrorCodes::CANNOT_SELECT); diff --git a/dbms/src/IO/WriteBuffer.h b/dbms/src/IO/WriteBuffer.h index 361081d1176..0c0fa2cb545 100644 --- a/dbms/src/IO/WriteBuffer.h +++ b/dbms/src/IO/WriteBuffer.h @@ -96,6 +96,24 @@ class WriteBuffer : public BufferBase } } + template + __attribute__((always_inline)) void writeFixed(const T * __restrict from) + { + if (likely(working_buffer.end() - pos >= static_cast(sizeof(T)))) + { + tiflash_compiler_builtin_memcpy(pos, from, sizeof(T)); + pos += sizeof(T); + } + else + { + [&]() __attribute__((noinline)) + { + write(reinterpret_cast(from), sizeof(T)); + } + (); + } + } + inline void write(char x) { diff --git a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp index c18337497b7..49b6d871870 100644 --- a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp @@ -24,7 +24,6 @@ namespace ProfileEvents { extern const Event FileFSync; extern const Event WriteBufferFromFileDescriptorWrite; -extern const Event WriteBufferFromFileDescriptorWriteFailed; extern const Event WriteBufferFromFileDescriptorWriteBytes; } // namespace ProfileEvents @@ -57,7 +56,6 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { - ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } diff --git a/dbms/src/IO/createReadBufferFromFileBase.cpp b/dbms/src/IO/createReadBufferFromFileBase.cpp index 24c9dfb204c..0d129d03a1a 100644 --- a/dbms/src/IO/createReadBufferFromFileBase.cpp +++ b/dbms/src/IO/createReadBufferFromFileBase.cpp @@ -20,13 +20,6 @@ #endif #include - -namespace ProfileEvents -{ -extern const Event CreatedReadBufferOrdinary; -extern const Event CreatedReadBufferAIO; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -46,13 +39,11 @@ createReadBufferFromFileBase( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); return std::make_unique(filename_, buffer_size_, flags_, existing_memory_, alignment); } else { #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) - ProfileEvents::increment(ProfileEvents::CreatedReadBufferAIO); return std::make_unique(filename_, buffer_size_, flags_, existing_memory_); #else throw Exception("AIO is not implemented yet on MacOS X", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/IO/createWriteBufferFromFileBase.cpp b/dbms/src/IO/createWriteBufferFromFileBase.cpp index 96bf3e65558..0e741eb3e5d 100644 --- a/dbms/src/IO/createWriteBufferFromFileBase.cpp +++ b/dbms/src/IO/createWriteBufferFromFileBase.cpp @@ -19,13 +19,6 @@ #endif #include - -namespace ProfileEvents -{ -extern const Event CreatedWriteBufferOrdinary; -extern const Event CreatedWriteBufferAIO; -} // namespace ProfileEvents - namespace DB { namespace ErrorCodes @@ -45,13 +38,11 @@ WriteBufferFromFileBase * createWriteBufferFromFileBase( { if ((aio_threshold == 0) || (estimated_size < aio_threshold)) { - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferOrdinary); return new WriteBufferFromFile(filename_, buffer_size_, flags_, mode, existing_memory_, alignment); } else { #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) - ProfileEvents::increment(ProfileEvents::CreatedWriteBufferAIO); return new WriteBufferAIO(filename_, buffer_size_, flags_, mode, existing_memory_); #else throw Exception("AIO is not implemented yet on MacOS X", ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 6e067b88d81..6cb947a1bfa 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -37,19 +38,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event ExternalAggregationWritePart; -extern const Event ExternalAggregationCompressedBytes; -extern const Event ExternalAggregationUncompressedBytes; -} // namespace ProfileEvents - -namespace CurrentMetrics -{ -extern const Metric QueryThread; -} - namespace DB { namespace ErrorCodes @@ -61,6 +49,11 @@ extern const int CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS; extern const int LOGICAL_ERROR; } // namespace ErrorCodes +namespace FailPoints +{ +extern const char random_aggregate_create_state_failpoint[]; +extern const char random_aggregate_merge_failpoint[]; +} // namespace FailPoints AggregatedDataVariants::~AggregatedDataVariants() { @@ -330,6 +323,7 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const * In order that then everything is properly destroyed, we "roll back" some of the created states. * The code is not very convenient. */ + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_create_state_failpoint); aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); } catch (...) @@ -658,7 +652,6 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co NativeBlockOutputStream block_out(compressed_buf, ClickHouseRevision::get(), getHeader(false)); LOG_FMT_DEBUG(log, "Writing part of aggregation data into temporary file {}.", path); - ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); /// Flush only two-level data and possibly overflow data. @@ -694,9 +687,6 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, co temporary_files.sum_size_compressed += compressed_bytes; } - ProfileEvents::increment(ProfileEvents::ExternalAggregationCompressedBytes, compressed_bytes); - ProfileEvents::increment(ProfileEvents::ExternalAggregationUncompressedBytes, uncompressed_bytes); - LOG_FMT_TRACE( log, "Written part in {:.3f} sec., {} rows, " @@ -1016,7 +1006,7 @@ Block Aggregator::prepareBlockAndFill( aggregate_columns[i] = header.getByName(aggregate_column_name).type->createColumn(); /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. - ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); + auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); for (auto & pool : data_variants.aggregates_pools) column_aggregate_func.addArena(pool); @@ -1502,7 +1492,7 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream Block getHeader() const override { return aggregator.getHeader(final); } - ~MergingAndConvertingBlockInputStream() + ~MergingAndConvertingBlockInputStream() override { LOG_FMT_TRACE(&Poco::Logger::get(__PRETTY_FUNCTION__), "Waiting for threads to finish"); @@ -1521,6 +1511,8 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream if (current_bucket_num >= NUM_BUCKETS) return {}; + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_aggregate_merge_failpoint); + AggregatedDataVariantsPtr & first = data[0]; if (current_bucket_num == -1) @@ -1636,8 +1628,6 @@ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream void thread(Int32 bucket_num) { - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryThread}; - try { /// TODO: add no_more_keys support maybe diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index a0adef5b50d..44699a324f4 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -77,10 +78,10 @@ namespace ProfileEvents extern const Event ContextLock; } +#include + namespace CurrentMetrics { -extern const Metric ContextLockWait; -extern const Metric MemoryTrackingForMerges; extern const Metric GlobalStorageRunMode; } // namespace CurrentMetrics @@ -308,8 +309,6 @@ Context::~Context() std::unique_lock Context::getLock() const { - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; return std::unique_lock(shared->mutex); } @@ -1879,6 +1878,30 @@ SharedQueriesPtr Context::getSharedQueries() return shared->shared_queries; } +size_t Context::getMaxStreams() const +{ + size_t max_streams = settings.max_threads; + bool is_cop_request = false; + if (dag_context != nullptr) + { + if (dag_context->isTest()) + max_streams = dag_context->initialize_concurrency; + else if (!dag_context->isBatchCop() && !dag_context->isMPPTask()) + { + is_cop_request = true; + max_streams = 1; + } + } + if (max_streams > 1) + max_streams *= settings.max_streams_to_max_threads_ratio; + if (max_streams == 0) + max_streams = 1; + if (unlikely(max_streams != 1 && is_cop_request)) + /// for cop request, the max_streams should be 1 + throw Exception("Cop request only support running with max_streams = 1"); + return max_streams; +} + SessionCleaner::~SessionCleaner() { try diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 5d5c39263c6..b6e759e364b 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -459,6 +459,8 @@ class Context void reloadDeltaTreeConfig(const Poco::Util::AbstractConfiguration & config); + size_t getMaxStreams() const; + private: /** Check if the current client has access to the specified database. * If access is denied, throw an exception. diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 8e75a64427c..0ab8519e4d0 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -27,12 +27,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event FunctionExecute; -} - namespace DB { namespace ErrorCodes @@ -339,7 +333,6 @@ void ExpressionAction::execute(Block & block) const size_t num_columns_without_result = block.columns(); block.insert({nullptr, result_type, result_name}); - ProfileEvents::increment(ProfileEvents::FunctionExecute); function->execute(block, arguments, num_columns_without_result); break; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index aa64cf8ca94..782a254925a 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -30,12 +30,6 @@ #include #include - -namespace ProfileEvents -{ -extern const Event InsertQuery; -} - namespace DB { namespace ErrorCodes @@ -54,7 +48,6 @@ InterpreterInsertQuery::InterpreterInsertQuery( , context(context_) , allow_materialized(allow_materialized_) { - ProfileEvents::increment(ProfileEvents::InsertQuery); } @@ -62,7 +55,7 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) { if (query.table_function) { - auto table_function = typeid_cast(query.table_function.get()); + const auto * table_function = typeid_cast(query.table_function.get()); const auto & factory = TableFunctionFactory::instance(); return factory.get(table_function->name, context)->execute(query.table_function, context); } @@ -71,7 +64,7 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) return context.getTable(query.database, query.table); } -Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) +Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) // NOLINT { Block table_sample_non_materialized; if (query.is_import) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 51b55f65bd4..3514f915626 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -93,6 +94,12 @@ extern const int SCHEMA_VERSION_ERROR; extern const int UNKNOWN_EXCEPTION; } // namespace ErrorCodes + +namespace FailPoints +{ +extern const char pause_query_init[]; +} // namespace FailPoints + InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, @@ -131,7 +138,14 @@ InterpreterSelectQuery::~InterpreterSelectQuery() = default; void InterpreterSelectQuery::init(const Names & required_result_column_names) { - ProfileEvents::increment(ProfileEvents::SelectQuery); + /// the failpoint pause_query_init should use with the failpoint unblock_query_init_after_write, + /// to fulfill that the select query action will be blocked before init state to wait the write action finished. + /// In using, we need enable unblock_query_init_after_write in our test code, + /// and before each write statement take effect, we need enable pause_query_init. + /// When the write action finished, the pause_query_init will be disabled automatically, + /// and then the select query could be continued. + /// you can refer multi_alter_with_write.test for an example. + FAIL_POINT_PAUSE(FailPoints::pause_query_init); if (!context.hasQueryContext()) context.setQueryContext(context); @@ -498,13 +512,13 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt { const auto & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) - pipeline.stream_with_non_joined_data = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( + pipeline.streams_with_non_joined_data.push_back(expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( pipeline.firstStream()->getHeader(), 0, 1, - settings.max_block_size); + settings.max_block_size)); - for (auto & stream : pipeline.streams) /// Applies to all sources except stream_with_non_joined_data. + for (auto & stream : pipeline.streams) /// Applies to all sources except streams_with_non_joined_data. stream = std::make_shared(stream, expressions.before_join, /*req_id=*/""); } @@ -589,7 +603,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass || query.limit_length || query.limit_by_expression_list - || pipeline.stream_with_non_joined_data) + || !pipeline.streams_with_non_joined_data.empty()) { need_merge_streams = true; } @@ -973,11 +987,11 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre Aggregator::Params params(header, keys, aggregates, overflow_row, settings.max_rows_to_group_by, settings.group_by_overflow_mode, allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold : SettingUInt64(0), allow_to_use_two_level_group_by ? settings.group_by_two_level_threshold_bytes : SettingUInt64(0), settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, context.getTemporaryPath()); /// If there are several sources, then we perform parallel aggregation - if (pipeline.streams.size() > 1) + if (pipeline.streams.size() > 1 || pipeline.streams_with_non_joined_data.size() > 1) { - pipeline.firstStream() = std::make_shared( + auto stream = std::make_shared( pipeline.streams, - pipeline.stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, params, file_provider, final, @@ -987,19 +1001,21 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre : static_cast(settings.max_threads), /*req_id=*/""); - pipeline.stream_with_non_joined_data = nullptr; pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); } else { BlockInputStreams inputs; if (!pipeline.streams.empty()) inputs.push_back(pipeline.firstStream()); - else - pipeline.streams.resize(1); - if (pipeline.stream_with_non_joined_data) - inputs.push_back(pipeline.stream_with_non_joined_data); + if (!pipeline.streams_with_non_joined_data.empty()) + inputs.push_back(pipeline.streams_with_non_joined_data.at(0)); + + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); pipeline.firstStream() = std::make_shared( std::make_shared(inputs, /*req_id=*/""), @@ -1007,8 +1023,6 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre file_provider, final, /*req_id=*/""); - - pipeline.stream_with_non_joined_data = nullptr; } } @@ -1230,21 +1244,33 @@ void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_or void InterpreterSelectQuery::executeUnion(Pipeline & pipeline) { - /// If there are still several streams, then we combine them into one - if (pipeline.hasMoreThanOneStream()) + switch (pipeline.streams.size() + pipeline.streams_with_non_joined_data.size()) + { + case 0: + break; + case 1: + { + if (pipeline.streams.size() == 1) + break; + // streams_with_non_joined_data's size is 1. + pipeline.streams.push_back(pipeline.streams_with_non_joined_data.at(0)); + pipeline.streams_with_non_joined_data.clear(); + break; + } + default: { - pipeline.firstStream() = std::make_shared>( + BlockInputStreamPtr stream = std::make_shared>( pipeline.streams, - pipeline.stream_with_non_joined_data, + pipeline.streams_with_non_joined_data, max_streams, /*req_id=*/""); - pipeline.stream_with_non_joined_data = nullptr; + ; + pipeline.streams.resize(1); + pipeline.streams_with_non_joined_data.clear(); + pipeline.firstStream() = std::move(stream); + break; } - else if (pipeline.stream_with_non_joined_data) - { - pipeline.streams.push_back(pipeline.stream_with_non_joined_data); - pipeline.stream_with_non_joined_data = nullptr; } } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 474ace7ee84..d1bcec2a3dd 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -95,7 +95,7 @@ class InterpreterSelectQuery : public IInterpreter * It has a special meaning, since reading from it should be done after reading from the main streams. * It is appended to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. */ - BlockInputStreamPtr stream_with_non_joined_data; + BlockInputStreams streams_with_non_joined_data; BlockInputStreamPtr & firstStream() { return streams.at(0); } @@ -105,13 +105,13 @@ class InterpreterSelectQuery : public IInterpreter for (auto & stream : streams) transform(stream); - if (stream_with_non_joined_data) - transform(stream_with_non_joined_data); + for (auto & stream : streams_with_non_joined_data) + transform(stream); } bool hasMoreThanOneStream() const { - return streams.size() + (stream_with_non_joined_data ? 1 : 0) > 1; + return streams.size() + streams_with_non_joined_data.size() > 1; } }; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 5e73b1e5f3e..076c290cc9d 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -224,7 +224,7 @@ BlockIO InterpreterSelectWithUnionQuery::execute() } else { - result_stream = std::make_shared>(nested_streams, nullptr, settings.max_threads, /*req_id=*/""); + result_stream = std::make_shared>(nested_streams, BlockInputStreams{}, settings.max_threads, /*req_id=*/""); nested_streams.clear(); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 820618a6e8b..181ebcaaa64 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,9 +27,17 @@ #include #include #include +#include + namespace DB { +namespace FailPoints +{ +extern const char random_join_build_failpoint[]; +extern const char random_join_prob_failpoint[]; +} // namespace FailPoints + namespace ErrorCodes { extern const int UNKNOWN_SET_DATA_VARIANT; @@ -621,6 +630,7 @@ void NO_INLINE insertFromBlockImplTypeCaseWithLock( } for (size_t insert_index = 0; insert_index < segment_index_info.size(); insert_index++) { + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_build_failpoint); size_t segment_index = (insert_index + stream_index) % segment_index_info.size(); if (segment_index == segment_size) { @@ -1513,7 +1523,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } - + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index fdc009237aa..5ed586c263d 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -31,12 +31,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric Query; -} - namespace DB { class IStorage; @@ -90,8 +84,6 @@ class ProcessListElement QueryPriorities::Handle priority_handle; - CurrentMetrics::Increment num_queries{CurrentMetrics::Query}; - std::atomic is_killed{false}; /// Be careful using it. For example, queries field could be modified concurrently. diff --git a/dbms/src/Interpreters/QueryPriorities.h b/dbms/src/Interpreters/QueryPriorities.h index 5f34ae616c7..ca01e4f0a6c 100644 --- a/dbms/src/Interpreters/QueryPriorities.h +++ b/dbms/src/Interpreters/QueryPriorities.h @@ -23,13 +23,6 @@ #include #include - -namespace CurrentMetrics -{ -extern const Metric QueryPreempted; -} - - namespace DB { /** Implements query priorities in very primitive way. @@ -95,7 +88,6 @@ class QueryPriorities if (!found) return true; - CurrentMetrics::Increment metric_increment{CurrentMetrics::QueryPreempted}; if (std::cv_status::timeout == condvar.wait_for(lock, cur_timeout)) return false; else diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index b827cb8b6a0..21366c336a8 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -272,7 +272,7 @@ struct Settings M(SettingUInt64, dt_segment_delta_small_column_file_size, 8388608, "Determine whether a column file in delta is small or not. 8MB by default.") \ M(SettingUInt64, dt_segment_stable_pack_rows, DEFAULT_MERGE_BLOCK_SIZE, "Expected stable pack rows in DeltaTree Engine.") \ M(SettingFloat, dt_segment_wait_duration_factor, 1, "The factor of wait duration in a write stall.") \ - M(SettingUInt64, dt_bg_gc_check_interval, 60, "Background gc thread check interval, the unit is second.") \ + M(SettingUInt64, dt_bg_gc_check_interval, 60, "Background gc thread check interval, the unit is second.") \ M(SettingInt64, dt_bg_gc_max_segments_to_check_every_round, 100, "Max segments to check in every gc round, value less than or equal to 0 means gc no segments.") \ M(SettingFloat, dt_bg_gc_ratio_threhold_to_trigger_gc, 1.2, "Trigger segment's gc when the ratio of invalid version exceed this threhold. Values smaller than or equal to 1.0 means gc all " \ "segments") \ @@ -361,10 +361,12 @@ struct Settings M(SettingUInt64, async_pollers_per_cq, 200, "grpc async pollers per cqs") \ M(SettingUInt64, async_cqs, 1, "grpc async cqs") \ M(SettingUInt64, preallocated_request_count_per_poller, 20, "grpc preallocated_request_count_per_poller") \ + \ M(SettingUInt64, manual_compact_pool_size, 1, "The number of worker threads to handle manual compact requests.") \ M(SettingUInt64, manual_compact_max_concurrency, 10, "Max concurrent tasks. It should be larger than pool size.") \ M(SettingUInt64, manual_compact_more_until_ms, 60000, "Continuously compact more segments until reaching specified elapsed time. If 0 is specified, only one segment will be compacted each round.") \ M(SettingBool, enable_planner, true, "Enable planner") + // clang-format on #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) TYPE NAME{DEFAULT}; diff --git a/dbms/src/Interpreters/WindowDescription.cpp b/dbms/src/Interpreters/WindowDescription.cpp index 2ab407bb18e..09d96411673 100644 --- a/dbms/src/Interpreters/WindowDescription.cpp +++ b/dbms/src/Interpreters/WindowDescription.cpp @@ -44,7 +44,7 @@ WindowFrame::FrameType getFrameTypeFromTipb(const tipb::WindowFrameType & type) return WindowFrame::FrameType::Groups; default: throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Unknowed frame type {}", + "Unknown frame type {}", type); } } @@ -60,4 +60,38 @@ void WindowDescription::setWindowFrame(const tipb::WindowFrame & frame_) frame.end_preceding = (frame_.end().type() == tipb::WindowBoundType::Preceding); frame.is_default = false; } + +String frameTypeToString(const WindowFrame::FrameType & type) +{ + switch (type) + { + case WindowFrame::FrameType::Rows: + return "Rows"; + case WindowFrame::FrameType::Groups: + return "Groups"; + case WindowFrame::FrameType::Ranges: + return "Ranges"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown frame type {}", + type); + } +} + +String boundaryTypeToString(const WindowFrame::BoundaryType & type) +{ + switch (type) + { + case WindowFrame::BoundaryType::Unbounded: + return "Unbounded"; + case WindowFrame::BoundaryType::Current: + return "Current"; + case WindowFrame::BoundaryType::Offset: + return "Offset"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown boundary type {}", + type); + } +} } // namespace DB diff --git a/dbms/src/Interpreters/WindowDescription.h b/dbms/src/Interpreters/WindowDescription.h index cdcade1b750..a3c2bac5747 100644 --- a/dbms/src/Interpreters/WindowDescription.h +++ b/dbms/src/Interpreters/WindowDescription.h @@ -87,6 +87,10 @@ struct WindowFrame && other.end_preceding == end_preceding; } }; + +String frameTypeToString(const WindowFrame::FrameType & type); +String boundaryTypeToString(const WindowFrame::BoundaryType & type); + class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; struct WindowDescription diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 96cfc0a58ae..78ad4b41ce6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -53,7 +54,10 @@ extern const int LOGICAL_ERROR; extern const int QUERY_IS_TOO_LARGE; extern const int INTO_OUTFILE_NOT_ALLOWED; } // namespace ErrorCodes - +namespace FailPoints +{ +extern const char random_interpreter_failpoint[]; +} // namespace FailPoints namespace { void checkASTSizeLimits(const IAST & ast, const Settings & settings) @@ -226,6 +230,7 @@ std::tuple executeQueryImpl( context.setProcessListElement(&process_list_entry->get()); } + FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_interpreter_failpoint); auto interpreter = query_src.interpreter(context, stage); res = interpreter->execute(); diff --git a/dbms/src/Server/CLIService.h b/dbms/src/Server/CLIService.h index 9078fa991f3..0acffebb577 100644 --- a/dbms/src/Server/CLIService.h +++ b/dbms/src/Server/CLIService.h @@ -126,6 +126,8 @@ CLIService::TiFlashProxyConfig::TiFlashProxyConfig(Poco::Util::Layer args.push_back(v.first.data()); args.push_back(v.second.data()); } + // Start the decryption service without starting the raftstore service + args.push_back("--only-decryption"); is_proxy_runnable = true; } template diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 63cf6d0e1f9..2948bb076db 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -22,6 +22,7 @@ option(ENABLE_CLICKHOUSE_SERVER "Enable server" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_CLICKHOUSE_CLIENT "Enable client" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_DTTOOL "Enable dttool: tools to manage dmfile" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_DTWORKLOAD "Enable dtworkload: tools to test and stress DeltaTree" ${ENABLE_CLICKHOUSE_ALL}) +option(ENABLE_TIFLASH_PAGEWORKLOAD "Enable pageworkload: tools to test and stress PageStorage" ${ENABLE_CLICKHOUSE_ALL}) option(ENABLE_TIFLASH_PAGECTL "Enable pagectl: tools to debug page storage" ${ENABLE_CLICKHOUSE_ALL}) configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) @@ -33,6 +34,7 @@ add_library (clickhouse-server-lib NotFoundHandler.cpp PingRequestHandler.cpp RootRequestHandler.cpp + ServerInfo.cpp Server.cpp StatusFile.cpp TCPHandler.cpp @@ -136,6 +138,9 @@ endif () if (ENABLE_TIFLASH_DTWORKLOAD) target_link_libraries(tiflash dt-workload-lib) endif () +if (ENABLE_TIFLASH_PAGEWORKLOAD) + target_link_libraries(tiflash page-workload-lib) +endif() if (ENABLE_TIFLASH_PAGECTL) target_link_libraries(tiflash page-ctl-lib) endif () diff --git a/dbms/src/Server/HTTPHandler.h b/dbms/src/Server/HTTPHandler.h index bd06d56bd4e..74b5cc4b4c7 100644 --- a/dbms/src/Server/HTTPHandler.h +++ b/dbms/src/Server/HTTPHandler.h @@ -14,24 +14,20 @@ #pragma once -#include "IServer.h" - -#include - #include #include +#include +#include "IServer.h" -namespace CurrentMetrics + +namespace Poco { - extern const Metric HTTPConnection; +class Logger; } -namespace Poco { class Logger; } - namespace DB { - class WriteBufferFromHTTPServerResponse; @@ -69,11 +65,9 @@ class HTTPHandler : public Poco::Net::HTTPRequestHandler IServer & server; Poco::Logger * log; - /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. + /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. String server_display_name; - CurrentMetrics::Increment metric_increment{CurrentMetrics::HTTPConnection}; - /// Also initializes 'used_output'. void processQuery( Poco::Net::HTTPServerRequest & request, @@ -91,4 +85,4 @@ class HTTPHandler : public Poco::Net::HTTPRequestHandler void pushDelayedResults(Output & used_output); }; -} +} // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 04676ef969d..a398aa9c74d 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "Server.h" - #include #include #include #include #include #include +#include #include #include #include @@ -56,6 +55,8 @@ #include #include #include +#include +#include #include #include #include @@ -72,7 +73,6 @@ #include #include #include -#include #include #include @@ -152,6 +152,7 @@ void loadMiConfig(Logger * log) } #undef TRY_LOAD_CONF #endif + namespace { [[maybe_unused]] void tryLoadBoolConfigFromEnv(Poco::Logger * log, bool & target, const char * name) @@ -179,8 +180,9 @@ namespace namespace CurrentMetrics { -extern const Metric Revision; -} +extern const Metric LogicalCPUCores; +extern const Metric MemoryCapacity; +} // namespace CurrentMetrics namespace DB { @@ -190,6 +192,7 @@ extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int INVALID_CONFIG_PARAMETER; +extern const int IP_ADDRESS_NOT_ALLOWED; } // namespace ErrorCodes namespace Debug @@ -627,6 +630,10 @@ class Server::FlashGrpcServerHolder } } flash_grpc_server = builder.BuildAndStart(); + if (!flash_grpc_server) + { + throw Exception("Exception happens when start grpc server, the flash.service_addr may be invalid, flash.service_addr is " + raft_config.flash_server_addr, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + } LOG_FMT_INFO(log, "Flash grpc server listening on [{}]", raft_config.flash_server_addr); Debug::setServiceAddr(raft_config.flash_server_addr); if (enable_async_server) @@ -967,7 +974,10 @@ class Server::TcpHttpServersHolder LOG_DEBUG(log, debug_msg); } - const std::vector> & getServers() const { return servers; } + const std::vector> & getServers() const + { + return servers; + } private: Server & server; @@ -983,6 +993,7 @@ int Server::main(const std::vector & /*args*/) Poco::Logger * log = &logger(); #ifdef FIU_ENABLE fiu_init(0); // init failpoint + FailPointHelper::initRandomFailPoints(config(), log); #endif UpdateMallocConfig(log); @@ -1002,7 +1013,6 @@ int Server::main(const std::vector & /*args*/) #ifdef TIFLASH_ENABLE_SVE_SUPPORT tryLoadBoolConfigFromEnv(log, simd_option::ENABLE_SVE, "TIFLASH_ENABLE_SVE"); #endif - registerFunctions(); registerAggregateFunctions(); registerWindowFunctions(); @@ -1049,7 +1059,22 @@ int Server::main(const std::vector & /*args*/) LOG_FMT_INFO(log, "tiflash proxy thread is joined"); }); - CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); + /// get CPU/memory/disk info of this server + if (tiflash_instance_wrap.proxy_helper) + { + diagnosticspb::ServerInfoRequest request; + request.set_tp(static_cast(1)); + diagnosticspb::ServerInfoResponse response; + std::string req = request.SerializeAsString(); + auto * helper = tiflash_instance_wrap.proxy_helper; + helper->fn_server_info(helper->proxy_ptr, strIntoView(&req), &response); + server_info.parseSysInfo(response); + LOG_FMT_INFO(log, "ServerInfo: {}", server_info.debugString()); + } + else + { + LOG_FMT_INFO(log, "TiFlashRaftProxyHelper is null, failed to get server info"); + } // print necessary grpc log. grpc_log = &Poco::Logger::get("grpc"); @@ -1408,12 +1433,14 @@ int Server::main(const std::vector & /*args*/) { // on ARM processors it can show only enabled at current moment cores + CurrentMetrics::set(CurrentMetrics::LogicalCPUCores, server_info.cpu_info.logical_cores); + CurrentMetrics::set(CurrentMetrics::MemoryCapacity, server_info.memory_info.capacity); LOG_FMT_INFO( log, - "Available RAM = {}; physical cores = {}; threads = {}.", - formatReadableSizeWithBinarySuffix(getMemoryAmount()), - getNumberOfPhysicalCPUCores(), - std::thread::hardware_concurrency()); + "Available RAM = {}; physical cores = {}; logical cores = {}.", + server_info.memory_info.capacity, + server_info.cpu_info.physical_cores, + server_info.cpu_info.logical_cores); } LOG_FMT_INFO(log, "Ready for connections."); diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h index 278349f2aa4..07c5b955a92 100644 --- a/dbms/src/Server/Server.h +++ b/dbms/src/Server/Server.h @@ -14,10 +14,10 @@ #pragma once +#include +#include #include -#include "IServer.h" - /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. * 2. TCP - interface for native clickhouse-client and for server to server internal communications. @@ -39,7 +39,7 @@ class Server : public BaseDaemon return BaseDaemon::config(); } - virtual const TiFlashSecurityConfig & securityConfig() const override { return security_config; }; + const TiFlashSecurityConfig & securityConfig() const override { return security_config; }; Poco::Logger & logger() const override { @@ -70,6 +70,8 @@ class Server : public BaseDaemon TiFlashSecurityConfig security_config; + ServerInfo server_info; + class FlashGrpcServerHolder; class TcpHttpServersHolder; }; diff --git a/dbms/src/Server/ServerInfo.cpp b/dbms/src/Server/ServerInfo.cpp new file mode 100644 index 00000000000..9cba40c4775 --- /dev/null +++ b/dbms/src/Server/ServerInfo.cpp @@ -0,0 +1,199 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB +{ +using diagnosticspb::ServerInfoItem; +using diagnosticspb::ServerInfoResponse; + +void ServerInfo::parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item) +{ + for (const auto & pair : cpu_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "cpu-logical-cores") + { + cpu_info.logical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-physical-cores") + { + cpu_info.physical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-frequency") + { + cpu_info.frequency = pair.value(); + } + else if (key == "l1-cache-size") + { + cpu_info.l1_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l1-cache-line-size") + { + cpu_info.l1_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l2-cache-size") + { + cpu_info.l2_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l2-cache-line-size") + { + cpu_info.l2_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l3-cache-size") + { + cpu_info.l3_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l3-cache-line-size") + { + cpu_info.l3_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-arch") + { + cpu_info.arch = pair.value(); + } + } +} + +void ServerInfo::parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item) +{ + Disk disk; + disk.name = disk_info_item.name(); + for (const auto & pair : disk_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "type") + { + if (pair.value() == "HDD") + { + disk.disk_type = Disk::DiskType::HDD; + } + else if (pair.value() == "SSD") + { + disk.disk_type = Disk::DiskType::SSD; + } + else + { + disk.disk_type = Disk::DiskType::UNKNOWN; + } + } + else if (key == "total") + { + disk.total_space = static_cast(std::stoull(pair.value())); + } + else if (key == "free") + { + disk.free_space = static_cast(std::stoull(pair.value())); + } + else if (key == "path") + { + disk.mount_point = pair.value(); + } + else if (key == "fstype") + { + disk.fs_type = pair.value(); + } + } + disk_infos.push_back(disk); +} + +void ServerInfo::parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item) +{ + for (const auto & pair : memory_info_item.pairs()) + { + if (pair.key() == "capacity") + { + memory_info.capacity = std::stoull(pair.value()); + } + } +} + +void ServerInfo::parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response) +{ + for (const auto & item : sys_info_response.items()) + { + const auto & tp = item.tp(); + if (tp == "cpu") + { + parseCPUInfo(item); + } + else if (tp == "disk") + { + parseDiskInfo(item); + } + else if (tp == "memory") + { + parseMemoryInfo(item); + } + } +} + +String ServerInfo::debugString() const +{ + FmtBuffer fmt_buf; + // append cpu info + fmt_buf.fmtAppend("CPU: \n" + " logical cores: {}\n" + " physical cores: {}\n" + " frequency: {}\n" + " l1 cache size: {}\n" + " l1 cache line size: {}\n" + " l2 cache size: {}\n" + " l2 cache line size: {}\n" + " l3 cache size: {}\n" + " l3 cache line size: {}\n" + " arch: {}\n", + cpu_info.logical_cores, + cpu_info.physical_cores, + cpu_info.frequency, + cpu_info.l1_cache_size, + cpu_info.l1_cache_line_size, + cpu_info.l2_cache_size, + cpu_info.l2_cache_line_size, + cpu_info.l3_cache_size, + cpu_info.l3_cache_line_size, + cpu_info.arch); + // append disk info + { + const static String disk_type_str[] = {"UNKNOWN", "HDD", "SSD"}; + for (const auto & disk_info : disk_infos) + { + fmt_buf.fmtAppend("Disk: \n" + " name: {}\n" + " type: {}\n" + " total space: {}\n" + " free space: {}\n" + " mount point: {}\n" + " fstype: {}\n", + disk_info.name, + disk_type_str[static_cast(disk_info.disk_type)], + disk_info.total_space, + disk_info.free_space, + disk_info.mount_point, + disk_info.fs_type); + } + } + // append memory info + fmt_buf.fmtAppend("Memory: \n" + " capacity: {}\n", + memory_info.capacity); + + return fmt_buf.toString(); +} + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Server/ServerInfo.h b/dbms/src/Server/ServerInfo.h new file mode 100644 index 00000000000..9663bd37568 --- /dev/null +++ b/dbms/src/Server/ServerInfo.h @@ -0,0 +1,99 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +#include +#pragma GCC diagnostic pop + +namespace DB +{ +class ServerInfo +{ +public: + struct CPUInfo + { + /// number of logical CPU cores + UInt16 logical_cores = std::thread::hardware_concurrency(); + /// number of physical CPU cores + UInt16 physical_cores = getNumberOfPhysicalCPUCores(); + /// number of L1 cache size + /// units: Byte + UInt32 l1_cache_size = 16384; // 16KB (typical value) + /// number of L2 cache size + /// units: Byte + UInt32 l2_cache_size = 65536; // 64KB (typical value) + /// number of L3 cache size + /// units: Byte + UInt32 l3_cache_size = 2097152; // 2MB (typical value) + /// number of L1 cache line size + UInt8 l1_cache_line_size = 64; // 64B (typical value) + /// number of L2 cache line size + UInt8 l2_cache_line_size = 64; // 64B (typical value) + /// number of L3 cache line size + UInt8 l3_cache_line_size = 64; // 64B (typical value) + /// CPU architecture + String arch; + /// CPU frequency + String frequency; + }; + + struct Disk + { + String name; + enum DiskType + { + UNKNOWN = 0, + HDD = 1, + SSD = 2 + }; + DiskType disk_type; + UInt64 total_space = 0; + UInt64 free_space = 0; + String mount_point; + String fs_type; + }; + using DiskInfo = std::vector; + + struct MemoryInfo + { + /// total memory size + /// units: Byte + UInt64 capacity = getMemoryAmount(); + }; + + ServerInfo() = default; + ~ServerInfo() = default; + void parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item); + void parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item); + void parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item); + void parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response); + String debugString() const; + + CPUInfo cpu_info; + DiskInfo disk_infos; + MemoryInfo memory_info; +}; +} // namespace DB diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index ed0af52dc98..2fde0b11d9b 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -29,11 +29,6 @@ #include "IServer.h" -namespace CurrentMetrics -{ -extern const Metric TCPConnection; -} - namespace Poco { class Logger; @@ -131,8 +126,6 @@ class TCPHandler : public Poco::Net::TCPServerConnection /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; - CurrentMetrics::Increment metric_increment{CurrentMetrics::TCPConnection}; - /// It is the name of the server that will be sent to the client. String server_display_name; diff --git a/dbms/src/Server/config_tools.h.in b/dbms/src/Server/config_tools.h.in index 61aa3f41591..03a478a6473 100644 --- a/dbms/src/Server/config_tools.h.in +++ b/dbms/src/Server/config_tools.h.in @@ -6,4 +6,5 @@ #cmakedefine01 ENABLE_CLICKHOUSE_CLIENT #cmakedefine01 ENABLE_TIFLASH_DTTOOL #cmakedefine01 ENABLE_TIFLASH_DTWORKLOAD +#cmakedefine01 ENABLE_TIFLASH_PAGEWORKLOAD #cmakedefine01 ENABLE_TIFLASH_PAGECTL diff --git a/dbms/src/Server/main.cpp b/dbms/src/Server/main.cpp index 11cccf84729..dbcaa4f38fc 100644 --- a/dbms/src/Server/main.cpp +++ b/dbms/src/Server/main.cpp @@ -36,7 +36,10 @@ #include #endif #if ENABLE_TIFLASH_DTWORKLOAD -#include +#include +#endif +#if ENABLE_TIFLASH_PAGEWORKLOAD +#include #endif #if ENABLE_TIFLASH_PAGECTL #include @@ -107,6 +110,9 @@ std::pair clickhouse_applications[] = { #if ENABLE_TIFLASH_DTWORKLOAD {"dtworkload", DB::DM::tests::DTWorkload::mainEntry}, #endif +#if ENABLE_TIFLASH_PAGEWORKLOAD + {"pageworkload", DB::PS::tests::StressWorkload::mainEntry}, +#endif #if ENABLE_TIFLASH_PAGECTL {"pagectl", DB::PageStorageCtl::mainEntry}, #endif diff --git a/dbms/src/Storages/BackgroundProcessingPool.cpp b/dbms/src/Storages/BackgroundProcessingPool.cpp index 96c2c6cc622..45ba032bf53 100644 --- a/dbms/src/Storages/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/BackgroundProcessingPool.cpp @@ -42,7 +42,6 @@ inline static pid_t getTid() namespace CurrentMetrics { -extern const Metric BackgroundPoolTask; extern const Metric MemoryTrackingInBackgroundProcessingPool; } // namespace CurrentMetrics @@ -215,8 +214,6 @@ void BackgroundProcessingPool::threadFunction() continue; { - CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundPoolTask}; - bool done_work = false; if (!task->multi) { diff --git a/dbms/src/Storages/CMakeLists.txt b/dbms/src/Storages/CMakeLists.txt index 90cc7a01d5b..68a2e6c9a74 100644 --- a/dbms/src/Storages/CMakeLists.txt +++ b/dbms/src/Storages/CMakeLists.txt @@ -15,16 +15,15 @@ add_subdirectory (System) add_subdirectory (Page) add_subdirectory (DeltaMerge/File/dtpb) -add_subdirectory (DeltaMerge/tools) +add_subdirectory (DeltaMerge/workload) +add_subdirectory (Page/workload) if (ENABLE_TESTS) add_subdirectory (tests EXCLUDE_FROM_ALL) add_subdirectory (Transaction/tests EXCLUDE_FROM_ALL) add_subdirectory (Page/V2/tests EXCLUDE_FROM_ALL) - if (ENABLE_V3_PAGESTORAGE) - add_subdirectory (Page/V3 EXCLUDE_FROM_ALL) - add_subdirectory (Page/V3/tests EXCLUDE_FROM_ALL) - endif () + add_subdirectory (Page/V3 EXCLUDE_FROM_ALL) + add_subdirectory (Page/V3/tests EXCLUDE_FROM_ALL) add_subdirectory (DeltaMerge/tests EXCLUDE_FROM_ALL) endif () diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp index 132732d6989..8a69b7573e2 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.cpp @@ -141,6 +141,19 @@ bool DeltaValueSpace::ingestColumnFiles(DMContext & /*context*/, const RowKeyRan bool DeltaValueSpace::flush(DMContext & context) { + bool v = false; + if (!is_flushing.compare_exchange_strong(v, true)) + { + // other thread is flushing, just return. + LOG_FMT_DEBUG(log, "{}, Flush stop because other thread is flushing", simpleInfo()); + return false; + } + SCOPE_EXIT({ + bool v = true; + if (!is_flushing.compare_exchange_strong(v, false)) + throw Exception(simpleInfo() + " is expected to be flushing", ErrorCodes::LOGICAL_ERROR); + }); + LOG_FMT_DEBUG(log, "{}, Flush start", info()); /// We have two types of data needed to flush to disk: diff --git a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h index 8f14682caa8..04fb97b3004 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/Delta/DeltaValueSpace.h @@ -77,6 +77,11 @@ class DeltaValueSpace /// Note that those things can not be done at the same time. std::atomic_bool is_updating = false; + /// Note that it's safe to do multiple flush concurrently but only one of them can succeed, + /// and other thread's work is just a waste of resource. + /// So we only allow one flush task running at any time to aviod waste resource. + std::atomic_bool is_flushing = false; + std::atomic last_try_flush_rows = 0; std::atomic last_try_flush_bytes = 0; std::atomic last_try_compact_column_files = 0; @@ -159,6 +164,8 @@ class DeltaValueSpace size_t getTotalCacheBytes() const; size_t getValidCacheRows() const; + bool isFlushing() const { return is_flushing; } + bool isUpdating() const { return is_updating; } bool tryLockUpdating() diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index a74404f3dbb..73ad22d6d1f 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -979,14 +980,14 @@ void DeltaMergeStore::deleteRange(const Context & db_context, const DB::Settings checkSegmentUpdate(dm_context, segment, ThreadType::Write); } -void DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRange & range) +bool DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRange & range, bool try_until_succeed) { RowKeyRange cur_range = range; while (!cur_range.none()) { RowKeyRange segment_range; - // Keep trying until succeeded. + // Keep trying until succeeded if needed. while (true) { SegmentPtr segment; @@ -1009,10 +1010,15 @@ void DeltaMergeStore::flushCache(const DMContextPtr & dm_context, const RowKeyRa { break; } + else if (!try_until_succeed) + { + return false; + } } cur_range.setStart(segment_range.end); } + return true; } void DeltaMergeStore::mergeDeltaAll(const Context & context) @@ -1054,6 +1060,13 @@ std::optional DeltaMergeStore::mergeDeltaBySegment(const Contex segment = segment_it->second; } + if (!segment->flushCache(*dm_context)) + { + // If the flush failed, it means there are parallel updates to the segment in the background. + // In this case, we try again. + continue; + } + const auto new_segment = segmentMergeDelta(*dm_context, segment, run_thread); if (new_segment) { @@ -1137,9 +1150,11 @@ BlockInputStreams DeltaMergeStore::readRaw(const Context & db_context, } fiu_do_on(FailPoints::force_slow_page_storage_snapshot_release, { - std::thread thread_hold_snapshots([tasks]() { + std::thread thread_hold_snapshots([this, tasks]() { + LOG_FMT_WARNING(log, "failpoint force_slow_page_storage_snapshot_release begin"); std::this_thread::sleep_for(std::chrono::seconds(5 * 60)); (void)tasks; + LOG_FMT_WARNING(log, "failpoint force_slow_page_storage_snapshot_release end"); }); thread_hold_snapshots.detach(); }); @@ -1344,6 +1359,12 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const && (delta_rows - delta_last_try_flush_rows >= delta_cache_limit_rows || delta_bytes - delta_last_try_flush_bytes >= delta_cache_limit_bytes); bool should_foreground_flush = unsaved_rows >= delta_cache_limit_rows * 3 || unsaved_bytes >= delta_cache_limit_bytes * 3; + /// For write thread, we want to avoid foreground flush to block the process of apply raft command. + /// So we increase the threshold of foreground flush for write thread. + if (thread_type == ThreadType::Write) + { + should_foreground_flush = unsaved_rows >= delta_cache_limit_rows * 10 || unsaved_bytes >= delta_cache_limit_bytes * 10; + } bool should_background_merge_delta = ((delta_check_rows >= delta_limit_rows || delta_check_bytes >= delta_limit_bytes) // && (delta_rows - delta_last_try_merge_delta_rows >= delta_cache_limit_rows @@ -1401,9 +1422,16 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const } else if (should_background_flush) { - delta_last_try_flush_rows = delta_rows; - delta_last_try_flush_bytes = delta_bytes; - try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment, {}}); + /// It's meaningless to add more flush tasks if the segment is flushing. + /// Because only one flush task can proceed at any time. + /// And after the current flush task finished, + /// it will call `checkSegmentUpdate` again to check whether there is more flush task to do. + if (!segment->isFlushing()) + { + delta_last_try_flush_rows = delta_rows; + delta_last_try_flush_bytes = delta_bytes; + try_add_background_task(BackgroundTask{TaskType::Flush, dm_context, segment, {}}); + } } } @@ -1499,7 +1527,12 @@ void DeltaMergeStore::checkSegmentUpdate(const DMContextPtr & dm_context, const return false; }; auto try_bg_compact = [&]() { - if (should_compact) + /// Compact task should be a really low priority task. + /// And if the segment is flushing, + /// we should avoid adding background compact task to reduce lock contention on the segment and save disk throughput. + /// And after the current flush task complete, + /// it will call `checkSegmentUpdate` again to check whether there is other kinds of task to do. + if (should_compact && !segment->isFlushing()) { delta_last_try_compact_column_files = column_file_count; try_add_background_task(BackgroundTask{TaskType::Compact, dm_context, segment, {}}); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 705481ca107..57c2a42b807 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -367,14 +367,14 @@ class DeltaMergeStore : private boost::noncopyable const SegmentIdSet & read_segments = {}, size_t extra_table_id_index = InvalidColumnID); - /// Force flush all data to disk. - void flushCache(const Context & context, const RowKeyRange & range) + /// Try flush all data in `range` to disk and return whether the task succeed. + bool flushCache(const Context & context, const RowKeyRange & range, bool try_until_succeed = true) { auto dm_context = newDMContext(context, context.getSettingsRef()); - flushCache(dm_context, range); + return flushCache(dm_context, range, try_until_succeed); } - void flushCache(const DMContextPtr & dm_context, const RowKeyRange & range); + bool flushCache(const DMContextPtr & dm_context, const RowKeyRange & range, bool try_until_succeed = true); /// Merge delta into the stable layer for all segments. /// diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h index 47674ab2cfc..29e127fe35f 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaTree.h +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -810,6 +811,20 @@ class DeltaTree template InternPtr afterNodeUpdated(T * node); +#ifdef __x86_64__ + template + InternPtr afterNodeUpdatedGeneric(T * node); + + template + InternPtr afterNodeUpdatedAVX512(T * node); + + template + InternPtr afterNodeUpdatedAVX(T * node); + + template + InternPtr afterNodeUpdatedSSE4(T * node); +#endif + inline void afterLeafUpdated(LeafPtr leaf) { if (leaf->count == 0 && isRootOnly()) @@ -1348,158 +1363,86 @@ typename DT_CLASS::InterAndSid DT_CLASS::submitMinSid(T * node, UInt64 subtree_m } } -DT_TEMPLATE -template -typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +#ifndef __x86_64__ +#define TIFLASH_DT_IMPL_NAME afterNodeUpdated +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +#else + +// generic implementation +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedGeneric +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME + +// avx512 implementation +TIFLASH_BEGIN_AVX512_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX512 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// avx implementation +TIFLASH_BEGIN_AVX_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedAVX +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +// sse4 implementation +TIFLASH_BEGIN_SSE4_SPECIFIC_CODE +#define TIFLASH_DT_IMPL_NAME afterNodeUpdatedSSE4 +#include "DeltaTree.ipp" +#undef TIFLASH_DT_IMPL_NAME +TIFLASH_END_TARGET_SPECIFIC_CODE + +namespace Impl { - if (!node) - return {}; - - constexpr bool is_leaf = std::is_same::value; +enum class DeltaTreeVariant +{ + Generic, + SSE4, + AVX, + AVX512 +}; - if (root == asNode(node) && !isLeaf(root) && node->count == 1) +static inline DeltaTreeVariant resolveDeltaTreeVariant() +{ + if (DB::TargetSpecific::AVX512Checker::runtimeSupport()) { - /// Decrease tree height. - root = as(Intern, root)->children[0]; - - --(node->count); - freeNode(node); - - if (isLeaf(root)) - as(Leaf, root)->parent = nullptr; - else - as(Intern, root)->parent = nullptr; - --height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); - - return {}; + return DeltaTreeVariant::AVX512; } - - auto parent = node->parent; - bool parent_updated = false; - - if (T::overflow(node->count)) // split + if (DB::TargetSpecific::AVXChecker::runtimeSupport()) { - if (!parent) - { - /// Increase tree height. - parent = createNode(); - root = asNode(parent); - - parent->deltas[0] = checkDelta(node->getDelta()); - parent->children[0] = asNode(node); - ++(parent->count); - parent->refreshChildParent(); - - ++height; - - LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); - } - - auto pos = parent->searchChild(asNode(node)); - - T * next_n = createNode(); - - UInt64 sep_sid = node->split(next_n); - - // handle parent update - parent->shiftEntries(pos + 1, 1); - // for current node - parent->deltas[pos] = checkDelta(node->getDelta()); - // for next node - parent->sids[pos] = sep_sid; - parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); - parent->children[pos + 1] = asNode(next_n); - - ++(parent->count); - - if constexpr (is_leaf) - { - if (as(Leaf, node) == right_leaf) - right_leaf = as(Leaf, next_n); - } - - parent_updated = true; + return DeltaTreeVariant::AVX; } - else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + if (DB::TargetSpecific::SSE4Checker::runtimeSupport()) { - auto pos = parent->searchChild(asNode(node)); - - // currently we always adopt from the right one if possible - bool is_sibling_left; - size_t sibling_pos; - T * sibling; - - if (unlikely(parent->count <= 1)) - throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); - - if (pos == parent->count - 1) - { - is_sibling_left = true; - sibling_pos = pos - 1; - sibling = as(T, parent->children[sibling_pos]); - } - else - { - is_sibling_left = false; - sibling_pos = pos + 1; - sibling = as(T, parent->children[sibling_pos]); - } - - if (unlikely(sibling->parent != node->parent)) - throw Exception("parent not the same"); - - auto after_adopt = (node->count + sibling->count) / 2; - if (T::underflow(after_adopt)) - { - // Do merge. - // adoption won't work because the sibling doesn't have enough entries. - - node->merge(sibling, is_sibling_left, pos); - freeNode(sibling); - - pos = std::min(pos, sibling_pos); - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->children[pos] = asNode(node); - parent->shiftEntries(pos + 2, -1); - - if constexpr (is_leaf) - { - if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) - left_leaf = as(Leaf, node); - else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) - right_leaf = as(Leaf, node); - } - --(parent->count); - } - else - { - // Do adoption. - - auto adopt_count = after_adopt - node->count; - auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + return DeltaTreeVariant::SSE4; + } + return DeltaTreeVariant::Generic; +} - parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; - parent->deltas[pos] = checkDelta(node->getDelta()); - parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); - } +static inline DeltaTreeVariant DELTA_TREE_VARIANT = resolveDeltaTreeVariant(); +} // namespace Impl - parent_updated = true; - } - else if (parent) +DT_TEMPLATE +template +typename DT_CLASS::InternPtr DT_CLASS::afterNodeUpdated(T * node) +{ + switch (Impl::DELTA_TREE_VARIANT) { - auto pos = parent->searchChild(asNode(node)); - auto delta = node->getDelta(); - parent_updated = parent->deltas[pos] != delta; - parent->deltas[pos] = checkDelta(delta); + case Impl::DeltaTreeVariant::Generic: + return afterNodeUpdatedGeneric(node); + case Impl::DeltaTreeVariant::SSE4: + return afterNodeUpdatedSSE4(node); + case Impl::DeltaTreeVariant::AVX: + return afterNodeUpdatedAVX(node); + case Impl::DeltaTreeVariant::AVX512: + return afterNodeUpdatedAVX512(node); } - - if (parent_updated) - return parent; - else - return {}; } +#endif + #undef as #undef asNode diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.ipp b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp new file mode 100644 index 00000000000..27b8a3b96f1 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.ipp @@ -0,0 +1,165 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +DT_TEMPLATE +template +__attribute__((noinline, flatten)) typename DT_CLASS::InternPtr DT_CLASS::TIFLASH_DT_IMPL_NAME(T * node) +{ + if (!node) + return {}; + + constexpr bool is_leaf = std::is_same::value; + + if (root == asNode(node) && !isLeaf(root) && node->count == 1) + { + /// Decrease tree height. + root = as(Intern, root)->children[0]; + + --(node->count); + freeNode(node); + + if (isLeaf(root)) + as(Leaf, root)->parent = nullptr; + else + as(Intern, root)->parent = nullptr; + --height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height + 1), height); + + return {}; + } + + auto parent = node->parent; + bool parent_updated = false; + + if (T::overflow(node->count)) // split + { + if (!parent) + { + /// Increase tree height. + parent = createNode(); + root = asNode(parent); + + parent->deltas[0] = checkDelta(node->getDelta()); + parent->children[0] = asNode(node); + ++(parent->count); + parent->refreshChildParent(); + + ++height; + + LOG_FMT_TRACE(log, "height {} -> {}", (height - 1), height); + } + + auto pos = parent->searchChild(asNode(node)); + + T * next_n = createNode(); + + UInt64 sep_sid = node->split(next_n); + + // handle parent update + parent->shiftEntries(pos + 1, 1); + // for current node + parent->deltas[pos] = checkDelta(node->getDelta()); + // for next node + parent->sids[pos] = sep_sid; + parent->deltas[pos + 1] = checkDelta(next_n->getDelta()); + parent->children[pos + 1] = asNode(next_n); + + ++(parent->count); + + if constexpr (is_leaf) + { + if (as(Leaf, node) == right_leaf) + right_leaf = as(Leaf, next_n); + } + + parent_updated = true; + } + else if (T::underflow(node->count) && root != asNode(node)) // adopt or merge + { + auto pos = parent->searchChild(asNode(node)); + + // currently we always adopt from the right one if possible + bool is_sibling_left; + size_t sibling_pos; + T * sibling; + + if (unlikely(parent->count <= 1)) + throw Exception("Unexpected parent entry count: " + DB::toString(parent->count)); + + if (pos == parent->count - 1) + { + is_sibling_left = true; + sibling_pos = pos - 1; + sibling = as(T, parent->children[sibling_pos]); + } + else + { + is_sibling_left = false; + sibling_pos = pos + 1; + sibling = as(T, parent->children[sibling_pos]); + } + + if (unlikely(sibling->parent != node->parent)) + throw Exception("parent not the same"); + + auto after_adopt = (node->count + sibling->count) / 2; + if (T::underflow(after_adopt)) + { + // Do merge. + // adoption won't work because the sibling doesn't have enough entries. + + node->merge(sibling, is_sibling_left, pos); + freeNode(sibling); + + pos = std::min(pos, sibling_pos); + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->children[pos] = asNode(node); + parent->shiftEntries(pos + 2, -1); + + if constexpr (is_leaf) + { + if (is_sibling_left && (as(Leaf, sibling) == left_leaf)) + left_leaf = as(Leaf, node); + else if (!is_sibling_left && as(Leaf, sibling) == right_leaf) + right_leaf = as(Leaf, node); + } + --(parent->count); + } + else + { + // Do adoption. + + auto adopt_count = after_adopt - node->count; + auto new_sep_sid = node->adopt(sibling, is_sibling_left, adopt_count, pos); + + parent->sids[std::min(pos, sibling_pos)] = new_sep_sid; + parent->deltas[pos] = checkDelta(node->getDelta()); + parent->deltas[sibling_pos] = checkDelta(sibling->getDelta()); + } + + parent_updated = true; + } + else if (parent) + { + auto pos = parent->searchChild(asNode(node)); + auto delta = node->getDelta(); + parent_updated = parent->deltas[pos] != delta; + parent->deltas[pos] = checkDelta(delta); + } + + if (parent_updated) + return parent; + else + return {}; +} \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp index 3bff05ef19f..272d548eee1 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWriter.cpp @@ -72,10 +72,9 @@ DMFileWriter::DMFileWriter(const DMFilePtr & dmfile_, for (auto & cd : write_columns) { // TODO: currently we only generate index for Integers, Date, DateTime types, and this should be configurable by user. - // TODO: If column type is nullable, we won't generate index for it /// for handle column always generate index - bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || cd.type->isInteger() || cd.type->isDateOrDateTime(); - + auto type = removeNullable(cd.type); + bool do_index = cd.id == EXTRA_HANDLE_COLUMN_ID || type->isInteger() || type->isDateOrDateTime(); if (options.flags.isSingleFile()) { if (do_index) @@ -122,7 +121,7 @@ void DMFileWriter::addStreams(ColId col_id, DataTypePtr type, bool do_index) void DMFileWriter::write(const Block & block, const BlockProperty & block_property) { is_empty_file = false; - DMFile::PackStat stat; + DMFile::PackStat stat{}; stat.rows = block.rows(); stat.not_clean = block_property.not_clean_rows; stat.bytes = block.bytes(); // This is bytes of pack data in memory. @@ -219,7 +218,7 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu "Type shouldn be nullable when substream_path's type is NullMap.", Errors::DeltaTree::Internal); - const ColumnNullable & col = static_cast(column); + const auto & col = static_cast(column); col.checkConsistency(); DataTypeUInt8().serializeBinaryBulk(col.getNullMapColumn(), single_file_stream->original_layer, 0, rows); } @@ -230,8 +229,8 @@ void DMFileWriter::writeColumn(ColId col_id, const IDataType & type, const IColu "Type shouldn be nullable when substream_path's type is NullableElements.", Errors::DeltaTree::Internal); - const DataTypeNullable & nullable_type = static_cast(type); - const ColumnNullable & col = static_cast(column); + const auto & nullable_type = static_cast(type); + const auto & col = static_cast(column); nullable_type.getNestedType()->serializeBinaryBulk(col.getNestedColumn(), single_file_stream->original_layer, 0, rows); } else diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 2681284948c..6229d54c169 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -61,7 +61,6 @@ inline std::pair minmax(const IColumn & column, const ColumnVect void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * del_mark) { - const IColumn * column_ptr = &column; auto size = column.size(); bool has_null = false; if (column.isColumnNullable()) @@ -70,7 +69,6 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de const auto & nullable_column = static_cast(column); const auto & null_mark_data = nullable_column.getNullMapColumn().getData(); - column_ptr = &nullable_column.getNestedColumn(); for (size_t i = 0; i < size; ++i) { @@ -82,14 +80,13 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de } } - const IColumn & updated_column = *column_ptr; - auto [min_index, max_index] = details::minmax(updated_column, del_mark, 0, updated_column.size()); + auto [min_index, max_index] = details::minmax(column, del_mark, 0, column.size()); if (min_index != NONE_EXIST) { has_null_marks->push_back(has_null); has_value_marks->push_back(1); - minmaxes->insertFrom(updated_column, min_index); - minmaxes->insertFrom(updated_column, max_index); + minmaxes->insertFrom(column, min_index); + minmaxes->insertFrom(column, max_index); } else { @@ -158,6 +155,62 @@ std::pair MinMaxIndex::getUInt64MinMax(size_t pack_index) return {minmaxes->get64(pack_index * 2), minmaxes->get64(pack_index * 2 + 1)}; } +RSResult MinMaxIndex::checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); + +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const DataTypePtr & type) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -165,7 +218,13 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D if (!(*has_value_marks)[pack_index]) return RSResult::None; + // if minmaxes_data has null value, the value of minmaxes_data[i] is meaningless and maybe just some random value. + // But we have checked the has_null_marks above and ensured that there is no null value in MinMax Indexes. const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -215,6 +274,62 @@ RSResult MinMaxIndex::checkEqual(size_t pack_index, const Field & value, const D } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + const auto * raw_type = type.get(); + +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreater(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreater(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(chars[prev_offset], offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreater(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -223,6 +338,10 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreater(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ @@ -272,6 +391,62 @@ RSResult MinMaxIndex::checkGreater(size_t pack_index, const Field & value, const } return RSResult::Some; } + +RSResult MinMaxIndex::checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type) +{ + const auto & column_nullable = static_cast(*minmaxes); + + const auto * raw_type = type.get(); +#define DISPATCH(TYPE) \ + if (typeid_cast(raw_type)) \ + { \ + auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); \ + auto min = minmaxes_data[pack_index * 2]; \ + auto max = minmaxes_data[pack_index * 2 + 1]; \ + return RoughCheck::checkGreaterEqual(value, type, min, max); \ + } + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type) || typeid_cast(raw_type)) + { + // For DataTypeMyDateTime / DataTypeMyDate, simply compare them as comparing UInt64 is OK. + // Check `struct MyTimeBase` for more details. + const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); + auto min = minmaxes_data[pack_index * 2]; + auto max = minmaxes_data[pack_index * 2 + 1]; + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + if (typeid_cast(raw_type)) + { + const auto * string_column = checkAndGetColumn(column_nullable.getNestedColumnPtr().get()); + const auto & chars = string_column->getChars(); + const auto & offsets = string_column->getOffsets(); + size_t pos = pack_index * 2; + size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; + // todo use StringRef instead of String + auto min = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + pos = pack_index * 2 + 1; + prev_offset = offsets[pos - 1]; + auto max = String(reinterpret_cast(&chars[prev_offset]), offsets[pos] - prev_offset - 1); + return RoughCheck::checkGreaterEqual(value, type, min, max); + } + return RSResult::Some; +} + RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int /*nan_direction_hint*/) { if ((*has_null_marks)[pack_index] || value.isNull()) @@ -280,6 +455,10 @@ RSResult MinMaxIndex::checkGreaterEqual(size_t pack_index, const Field & value, return RSResult::None; const auto * raw_type = type.get(); + if (typeid_cast(raw_type)) + { + return checkNullableGreaterEqual(pack_index, value, removeNullable(type)); + } #define DISPATCH(TYPE) \ if (typeid_cast(raw_type)) \ { \ diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 7efd37fafa4..73284333c73 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -81,6 +81,9 @@ class MinMaxIndex RSResult checkGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type, int nan_direction); static String toString(); + RSResult checkNullableEqual(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreater(size_t pack_index, const Field & value, const DataTypePtr & type); + RSResult checkNullableGreaterEqual(size_t pack_index, const Field & value, const DataTypePtr & type); }; diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index cccfc5091b9..8058329ae91 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -300,6 +300,8 @@ class Segment : private boost::noncopyable void drop(const FileProviderPtr & file_provider, WriteBatches & wbs); + bool isFlushing() const { return delta->isFlushing(); } + RowsAndBytes getRowsAndBytesInRange( DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index 752898f9c75..2791a74e9e3 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -624,8 +624,8 @@ PageId StoragePool::newDataPageIdForDTFile(StableDiskDelegator & delegator, cons auto existed_path = delegator.getDTFilePath(dtfile_id, /*throw_on_not_exist=*/false); fiu_do_on(FailPoints::force_set_dtfile_exist_when_acquire_id, { - static size_t fail_point_called = 0; - if (existed_path.empty() && fail_point_called % 10 == 0) + static std::atomic fail_point_called(0); + if (existed_path.empty() && fail_point_called.load() % 10 == 0) { existed_path = ""; } diff --git a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h index b35dae0cbe2..84fafbc46ef 100644 --- a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h +++ b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h @@ -273,7 +273,8 @@ class DMTestEnv DataTypePtr pk_type = EXTRA_HANDLE_COLUMN_INT_TYPE, bool is_common_handle = false, size_t rowkey_column_size = 1, - bool with_internal_columns = true) + bool with_internal_columns = true, + bool is_deleted = false) { Block block; const size_t num_rows = (end - beg); @@ -324,7 +325,7 @@ class DMTestEnv VERSION_COLUMN_ID)); // tag_col block.insert(DB::tests::createColumn( - std::vector(num_rows, 0), + std::vector(num_rows, is_deleted), TAG_COLUMN_NAME, TAG_COLUMN_ID)); } diff --git a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h index 7c5b0b2416d..787a521ded3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h +++ b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h @@ -88,6 +88,7 @@ class MultiSegmentTestUtil : private boost::noncopyable // Check there is only one segment ASSERT_EQ(store->segments.size(), 1); const auto & [_key, seg] = *store->segments.begin(); + (void)_key; ASSERT_EQ(seg->getDelta()->getRows(), n_avg_rows_per_segment * 4); ASSERT_EQ(seg->getStable()->getRows(), 0); @@ -108,6 +109,7 @@ class MultiSegmentTestUtil : private boost::noncopyable auto segment_idx = 0; for (auto & [_key, seg] : store->segments) { + (void)_key; LOG_FMT_INFO(log, "Segment #{}: Range = {}", segment_idx, seg->getRowKeyRange().toDebugString()); ASSERT_EQ(seg->getDelta()->getRows(), 0); ASSERT_GT(seg->getStable()->getRows(), 0); // We don't check the exact rows of each segment. @@ -147,6 +149,7 @@ class MultiSegmentTestUtil : private boost::noncopyable auto segment_idx = 0; for (auto & [_key, seg] : store->segments) { + (void)_key; ASSERT_EQ(seg->getDelta()->getRows(), expected_delta_rows[segment_idx]) << "Assert failed for segment #" << segment_idx; ASSERT_EQ(seg->getStable()->getRows(), expected_stable_rows[segment_idx]) << "Assert failed for segment #" << segment_idx; segment_idx++; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index e934f7a2049..b7913c44a2c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -3564,7 +3564,6 @@ class DeltaMergeStoreMergeDeltaBySegmentTest public: DeltaMergeStoreMergeDeltaBySegmentTest() { - log = &Poco::Logger::get(DB::base::TiFlashStorageTestBasic::getCurrentFullTestName()); std::tie(ps_ver, pk_type) = GetParam(); } @@ -3607,8 +3606,6 @@ class DeltaMergeStoreMergeDeltaBySegmentTest UInt64 ps_ver; DMTestEnv::PkType pk_type; - - [[maybe_unused]] Poco::Logger * log; }; INSTANTIATE_TEST_CASE_P( @@ -3765,6 +3762,55 @@ try CATCH +// Verify that unflushed data will also be compacted. +TEST_P(DeltaMergeStoreMergeDeltaBySegmentTest, Flush) +try +{ + { + // Write data to first 3 segments and flush. + auto newly_written_rows = helper->rows_by_segments[0] + helper->rows_by_segments[1] + helper->rows_by_segments[2]; + Block block = DMTestEnv::prepareSimpleWriteBlock(0, newly_written_rows, false, pk_type, 5 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + store->flushCache(dm_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); + + helper->expected_delta_rows[0] += helper->rows_by_segments[0]; + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->expected_delta_rows[2] += helper->rows_by_segments[2]; + helper->verifyExpectedRowsForAllSegments(); + + auto segment1 = std::next(store->segments.begin())->second; + ASSERT_EQ(segment1->getDelta()->getUnsavedRows(), 0); + } + { + // Write new data to segment[1] without flush. + auto newly_written_rows = helper->rows_by_segments[1]; + Block block = DMTestEnv::prepareSimpleWriteBlock(helper->rows_by_segments[0], helper->rows_by_segments[0] + newly_written_rows, false, pk_type, 10 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->verifyExpectedRowsForAllSegments(); + + auto segment1 = std::next(store->segments.begin())->second; + ASSERT_GT(segment1->getDelta()->getUnsavedRows(), 0); + } + { + auto segment1 = std::next(store->segments.begin())->second; + auto result = store->mergeDeltaBySegment(*db_context, segment1->getRowKeyRange().start, DeltaMergeStore::TaskRunThread::Foreground); + ASSERT_NE(result, std::nullopt); + + segment1 = std::next(store->segments.begin())->second; + ASSERT_EQ(*result, segment1->getRowKeyRange()); + + helper->expected_stable_rows[1] += helper->expected_delta_rows[1]; + helper->expected_delta_rows[1] = 0; + helper->verifyExpectedRowsForAllSegments(); + + ASSERT_EQ(segment1->getDelta()->getUnsavedRows(), 0); + } +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 96c0070b73b..bb31b687186 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -214,14 +214,6 @@ try ASSERT_EQ(true, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-27"), 0))); ASSERT_EQ(false, checkMatch(case_name, *context, "MyDateTime", "2020-09-27", createLessEqual(attr("MyDateTime"), parseMyDateTime("2020-09-26"), 0))); - /// Currently we don't do filtering for null values. i.e. if a pack contains any null values, then the pack will pass the filter. - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); - ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); - ASSERT_EQ(false, checkDelMatch(case_name, *context, "Int64", "100", createEqual(attr("Int64"), Field((Int64)100)))); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createEqual(pkAttr(), Field((Int64)100)), true)); ASSERT_EQ(true, checkPkMatch(case_name, *context, "Int64", "100", createGreater(pkAttr(), Field((Int64)99), 0), true)); @@ -236,6 +228,80 @@ try } CATCH +TEST_F(DMMinMaxIndexTest, NullableToNullable) +try +{ + const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + // clang-format off + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createEqual(attr("Nullable(Int64)"), Field((Int64)100)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)100)}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)99), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Int64)", "100", createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createEqual(attr("Nullable(Date)"), Field((String) "2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createIn(attr("Nullable(Date)"), {Field((String) "2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreater(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createGreaterEqual(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLess(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(Date)", "2020-09-27", createLessEqual(attr("Nullable(Date)"), Field((String) "2020-09-26"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:01")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createIn(attr("Nullable(DateTime)"), {Field((String) "2020-01-01 05:00:02")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreater(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createGreaterEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:02"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLess(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:01"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(DateTime)", "2020-01-01 05:00:01", createLessEqual(attr("Nullable(DateTime)"), Field((String) "2020-01-01 05:00:00"), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27")))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28")))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-27")}))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createIn(attr("Nullable(MyDateTime)"), {parseMyDateTime("2020-09-28")}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreater(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createGreaterEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-28"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLess(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-27"), 0))); + ASSERT_EQ(false, checkMatch(case_name, *context, "Nullable(MyDateTime)", "2020-09-27", createLessEqual(attr("Nullable(MyDateTime)"), parseMyDateTime("2020-09-26"), 0))); + + // has null + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "100"}, {"1", "1", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); + + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createEqual(attr("Nullable(Int64)"), Field((Int64)101)))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createIn(attr("Nullable(Int64)"), {Field((Int64)101)}))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreater(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createGreaterEqual(attr("Nullable(Int64)"), Field((Int64)101), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLess(attr("Nullable(Int64)"), Field((Int64)100), 0))); + ASSERT_EQ(true, checkMatch(case_name, *context, "Nullable(Int64)", {{"0", "0", "0", "\\N"}}, createLessEqual(attr("Nullable(Int64)"), Field((Int64)99), 0))); +} +CATCH + TEST_F(DMMinMaxIndexTest, Logical) try { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp new file mode 100644 index 00000000000..dc43ef3713b --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment.cpp @@ -0,0 +1,100 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentOperationTest : public SegmentTestBasic +{ +protected: + static void SetUpTestCase() {} +}; + +TEST_F(SegmentOperationTest, Issue4956) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + + // flush data, make the segment can be split. + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + // write data to cache, reproduce the https://github.com/pingcap/tiflash/issues/4956 + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + deleteRangeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegment) +try +{ + SegmentTestOptions options; + reloadWithOptions(options); + writeSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + flushSegmentCache(DELTA_MERGE_FIRST_SEGMENT_ID); + mergeSegmentDelta(DELTA_MERGE_FIRST_SEGMENT_ID); + auto segment_id = splitSegment(DELTA_MERGE_FIRST_SEGMENT_ID); + ASSERT_TRUE(segment_id.has_value()); + + size_t origin_rows = getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID); + + writeSegment(*segment_id); + flushSegmentCache(*segment_id); + deleteRangeSegment(*segment_id); + writeSegmentWithDeletedPack(*segment_id); + mergeSegment(DELTA_MERGE_FIRST_SEGMENT_ID, *segment_id); + + EXPECT_EQ(getSegmentRowNum(DELTA_MERGE_FIRST_SEGMENT_ID), origin_rows); +} +CATCH + +TEST_F(SegmentOperationTest, TestSegmentRandom) +try +{ + srand(time(nullptr)); + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + randomSegmentTest(100); +} +CATCH + +// run in CI weekly +TEST_F(SegmentOperationTest, DISABLED_TestSegmentRandomForCI) +try +{ + srand(time(nullptr)); + SegmentTestOptions options; + options.is_common_handle = true; + reloadWithOptions(options); + randomSegmentTest(10000); +} +CATCH + +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp new file mode 100644 index 00000000000..c676f2e08d5 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -0,0 +1,430 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +void SegmentTestBasic::reloadWithOptions(SegmentTestOptions config) +{ + TiFlashStorageTestBasic::SetUp(); + options = config; + table_columns = std::make_shared(); + + root_segment = reload(config.is_common_handle); + ASSERT_EQ(root_segment->segmentId(), DELTA_MERGE_FIRST_SEGMENT_ID); + segments.clear(); + segments[DELTA_MERGE_FIRST_SEGMENT_ID] = root_segment; +} + +PageId SegmentTestBasic::createNewSegmentWithSomeData() +{ + SegmentPtr new_segment; + std::tie(root_segment, new_segment) = root_segment->split(dmContext(), tableColumns()); + + const size_t num_rows_write_per_batch = 100; + { + // write to segment and flush + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), true); + } + { + // write to segment and don't flush + Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch, 2 * num_rows_write_per_batch, false); + new_segment->write(dmContext(), std::move(block), false); + } + return new_segment->segmentId(); +} + +size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) +{ + auto segment = segments[segment_id]; + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + return num_rows_read; +} + +void SegmentTestBasic::checkSegmentRow(PageId segment_id, size_t expected_row_num) +{ + auto segment = segments[segment_id]; + // read written data + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, expected_row_num); +} + +std::optional SegmentTestBasic::splitSegment(PageId segment_id) +{ + auto origin_segment = segments[segment_id]; + size_t origin_segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr segment, new_segment; + std::tie(segment, new_segment) = origin_segment->split(dmContext(), tableColumns()); + if (new_segment) + { + segments[new_segment->segmentId()] = new_segment; + segments[segment_id] = segment; + + EXPECT_EQ(origin_segment_row_num, getSegmentRowNum(segment_id) + getSegmentRowNum(new_segment->segmentId())); + return new_segment->segmentId(); + } + return std::nullopt; +} + +void SegmentTestBasic::mergeSegment(PageId left_segment_id, PageId right_segment_id) +{ + auto left_segment = segments[left_segment_id]; + auto right_segment = segments[right_segment_id]; + + size_t left_segment_row_num = getSegmentRowNum(left_segment_id); + size_t right_segment_row_num = getSegmentRowNum(right_segment_id); + LOG_FMT_TRACE(&Poco::Logger::root(), "merge in segment:{}:{} and {}:{}", left_segment->segmentId(), left_segment_row_num, right_segment->segmentId(), right_segment_row_num); + + SegmentPtr merged_segment = Segment::merge(dmContext(), tableColumns(), left_segment, right_segment); + segments[merged_segment->segmentId()] = merged_segment; + auto it = segments.find(right_segment->segmentId()); + if (it != segments.end()) + { + segments.erase(it); + } + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), left_segment_row_num + right_segment_row_num); +} + +void SegmentTestBasic::mergeSegmentDelta(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + SegmentPtr merged_segment = segment->mergeDelta(dmContext(), tableColumns()); + segments[merged_segment->segmentId()] = merged_segment; + EXPECT_EQ(getSegmentRowNum(merged_segment->segmentId()), segment_row_num); +} + +void SegmentTestBasic::flushSegmentCache(PageId segment_id) +{ + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNum(segment_id); + segment->flushCache(dmContext()); + EXPECT_EQ(getSegmentRowNum(segment_id), segment_row_num); +} + +std::pair SegmentTestBasic::getSegmentKeyRange(SegmentPtr segment) +{ + Int64 start_key, end_key; + if (!options.is_common_handle) + { + start_key = segment->getRowKeyRange().getStart().int_value; + end_key = segment->getRowKeyRange().getEnd().int_value; + return {start_key, end_key}; + } + EXPECT_EQ(segment->getRowKeyRange().getStart().data[0], TiDB::CodecFlagInt); + EXPECT_EQ(segment->getRowKeyRange().getEnd().data[0], TiDB::CodecFlagInt); + { + size_t cursor = 1; + start_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getStart().data, segment->getRowKeyRange().getStart().size)); + } + { + size_t cursor = 1; + end_key = DecodeInt64(cursor, String(segment->getRowKeyRange().getEnd().data, segment->getRowKeyRange().getEnd().size)); + } + return {start_key, end_key}; +} + +void SegmentTestBasic::writeSegment(PageId segment_id, UInt64 write_rows) +{ + if (write_rows == 0) + { + return; + } + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle); + segment->write(dmContext(), std::move(block), false); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::writeSegmentWithDeletedPack(PageId segment_id) +{ + UInt64 write_rows = DEFAULT_MERGE_BLOCK_SIZE; + auto segment = segments[segment_id]; + size_t segment_row_num = getSegmentRowNumWithoutMVCC(segment_id); + std::pair keys = getSegmentKeyRange(segment); + Int64 start_key = keys.first; + Int64 end_key = keys.second; + UInt64 remain_row_num = 0; + if (static_cast(end_key - start_key) > write_rows) + { + end_key = start_key + write_rows; + } + else + { + remain_row_num = write_rows - static_cast(end_key - start_key); + } + { + // write to segment and not flush + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, end_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, end_key); + version++; + } + while (remain_row_num > 0) + { + UInt64 write_num = std::min(remain_row_num, static_cast(end_key - start_key)); + Block block = DMTestEnv::prepareSimpleWriteBlock(start_key, write_num + start_key, false, version, DMTestEnv::pk_name, EXTRA_HANDLE_COLUMN_ID, options.is_common_handle ? EXTRA_HANDLE_COLUMN_STRING_TYPE : EXTRA_HANDLE_COLUMN_INT_TYPE, options.is_common_handle, 1, true, true); + segment->write(dmContext(), std::move(block), true); + remain_row_num -= write_num; + LOG_FMT_TRACE(&Poco::Logger::root(), "write key range [{}, {})", start_key, write_num + start_key); + version++; + } + EXPECT_EQ(getSegmentRowNumWithoutMVCC(segment_id), segment_row_num + write_rows); +} + +void SegmentTestBasic::deleteRangeSegment(PageId segment_id) +{ + auto segment = segments[segment_id]; + segment->write(dmContext(), /*delete_range*/ segment->getRowKeyRange()); + EXPECT_EQ(getSegmentRowNum(segment_id), 0); +} + +void SegmentTestBasic::writeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment:{}", random_segment_id); + writeSegment(random_segment_id); +} +void SegmentTestBasic::writeRandomSegmentWithDeletedPack() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start write segment with deleted pack:{}", random_segment_id); + writeSegmentWithDeletedPack(random_segment_id); +} + +void SegmentTestBasic::deleteRangeRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start delete range segment:{}", random_segment_id); + deleteRangeSegment(random_segment_id); +} + +void SegmentTestBasic::splitRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start split segment:{}", random_segment_id); + splitSegment(random_segment_id); +} + +void SegmentTestBasic::mergeRandomSegment() +{ + if (segments.empty() || segments.size() == 1) + { + return; + } + std::pair segment_pair; + segment_pair = getRandomMergeablePair(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge segment:{} and {}", segment_pair.first, segment_pair.second); + mergeSegment(segment_pair.first, segment_pair.second); +} + +void SegmentTestBasic::mergeDeltaRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start merge delta in segment:{}", random_segment_id); + mergeSegmentDelta(random_segment_id); +} + +void SegmentTestBasic::flushCacheRandomSegment() +{ + if (segments.empty()) + { + return; + } + PageId random_segment_id = getRandomSegmentId(); + LOG_FMT_TRACE(&Poco::Logger::root(), "start flush cache in segment:{}", random_segment_id); + flushSegmentCache(random_segment_id); +} + +void SegmentTestBasic::randomSegmentTest(size_t operator_count) +{ + for (size_t i = 0; i < operator_count; i++) + { + auto op = static_cast(random() % SegmentOperaterMax); + segment_operator_entries[op](); + } +} + +PageId SegmentTestBasic::getRandomSegmentId() +{ + auto max_segment_id = segments.rbegin()->first; + PageId random_segment_id = random() % (max_segment_id + 1); + auto it = segments.find(random_segment_id); + while (it == segments.end()) + { + random_segment_id = random() % (max_segment_id + 1); + it = segments.find(random_segment_id); + } + return random_segment_id; +} + +std::pair SegmentTestBasic::getRandomMergeablePair() +{ + while (true) + { + PageId random_left_segment_id = getRandomSegmentId(); + PageId random_right_segment_id = random_left_segment_id; + while (random_right_segment_id == random_left_segment_id) + { + random_right_segment_id = getRandomSegmentId(); + } + auto left_segment = segments[random_left_segment_id]; + auto right_segment = segments[random_right_segment_id]; + if (compare(left_segment->getRowKeyRange().getEnd(), right_segment->getRowKeyRange().getStart()) != 0 || left_segment->nextSegmentId() != right_segment->segmentId()) + { + continue; + } + return {random_left_segment_id, random_right_segment_id}; + } +} + +RowKeyRange SegmentTestBasic::commanHandleKeyRange() +{ + String start_key, end_key; + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::min(), ss); + start_key = ss.releaseStr(); + } + { + WriteBufferFromOwnString ss; + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(std::numeric_limits::max(), ss); + end_key = ss.releaseStr(); + } + return RowKeyRange(RowKeyValue(true, std::make_shared(start_key), 0), RowKeyValue(true, std::make_shared(end_key), 0), true, 1); +} + +SegmentPtr SegmentTestBasic::reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns, DB::Settings && db_settings) +{ + TiFlashStorageTestBasic::reload(std::move(db_settings)); + storage_path_pool = std::make_unique(db_context->getPathPool().withTable("test", "t1", false)); + storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *storage_path_pool, "test.t1"); + storage_pool->restore(); + ColumnDefinesPtr cols = (!pre_define_columns) ? DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID) : pre_define_columns; + setColumns(cols); + + return Segment::newSegment(*dm_context, table_columns, is_common_handle ? commanHandleKeyRange() : RowKeyRange::newAll(is_common_handle, 1), storage_pool->newMetaPageId(), 0); +} + +void SegmentTestBasic::setColumns(const ColumnDefinesPtr & columns) +{ + *table_columns = *columns; + + dm_context = std::make_unique(*db_context, + *storage_path_pool, + *storage_pool, + 0, + /*min_version_*/ 0, + settings.not_compress_columns, + options.is_common_handle, + 1, + db_context->getSettingsRef()); +} +} // namespace tests +} // namespace DM +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h new file mode 100644 index 00000000000..ab0c7d6d0be --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.h @@ -0,0 +1,123 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace DM +{ +namespace tests +{ +class SegmentTestBasic : public DB::base::TiFlashStorageTestBasic +{ +public: + struct SegmentTestOptions + { + bool is_common_handle = false; + }; + +public: + void reloadWithOptions(SegmentTestOptions config); + + std::optional splitSegment(PageId segment_id); + void mergeSegment(PageId left_segment_id, PageId right_segment_id); + void mergeSegmentDelta(PageId segment_id); + void flushSegmentCache(PageId segment_id); + void writeSegment(PageId segment_id, UInt64 write_rows = 100); + void writeSegmentWithDeletedPack(PageId segment_id); + void deleteRangeSegment(PageId segment_id); + + + void writeRandomSegment(); + void writeRandomSegmentWithDeletedPack(); + void deleteRangeRandomSegment(); + void splitRandomSegment(); + void mergeRandomSegment(); + void mergeDeltaRandomSegment(); + void flushCacheRandomSegment(); + + void randomSegmentTest(size_t operator_count); + + PageId createNewSegmentWithSomeData(); + size_t getSegmentRowNumWithoutMVCC(PageId segment_id); + size_t getSegmentRowNum(PageId segment_id); + void checkSegmentRow(PageId segment_id, size_t expected_row_num); + std::pair getSegmentKeyRange(SegmentPtr segment); + +protected: + // + std::map segments; + + enum SegmentOperaterType + { + Write = 0, + DeleteRange, + Split, + Merge, + MergeDelta, + FlushCache, + WriteDeletedPack, + SegmentOperaterMax + }; + + const std::vector> segment_operator_entries = { + [this] { writeRandomSegment(); }, + [this] { deleteRangeRandomSegment(); }, + [this] { splitRandomSegment(); }, + [this] { mergeRandomSegment(); }, + [this] { mergeDeltaRandomSegment(); }, + [this] { flushCacheRandomSegment(); }, + [this] { + writeRandomSegmentWithDeletedPack(); + }}; + + PageId getRandomSegmentId(); + + std::pair getRandomMergeablePair(); + + RowKeyRange commanHandleKeyRange(); + + SegmentPtr reload(bool is_common_handle, const ColumnDefinesPtr & pre_define_columns = {}, DB::Settings && db_settings = DB::Settings()); + + // setColumns should update dm_context at the same time + void setColumns(const ColumnDefinesPtr & columns); + + const ColumnDefinesPtr & tableColumns() const { return table_columns; } + + DMContext & dmContext() { return *dm_context; } + +protected: + /// all these var lives as ref in dm_context + std::unique_ptr storage_path_pool; + std::unique_ptr storage_pool; + /// dm_context + std::unique_ptr dm_context; + ColumnDefinesPtr table_columns; + DM::DeltaMergeStore::Settings settings; + + SegmentPtr root_segment; + UInt64 version = 0; + SegmentTestOptions options; +}; +} // namespace tests +} // namespace DM +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp deleted file mode 100644 index 092c8a89a42..00000000000 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Main.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -using namespace DB::DM::tests; - -int main(int argc, char ** argv) -{ - return DTWorkload::mainEntry(argc, argv); -} diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt b/dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt similarity index 86% rename from dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt rename to dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt index 7227f1cf563..7a83cbec57c 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/CMakeLists.txt +++ b/dbms/src/Storages/DeltaMerge/workload/CMakeLists.txt @@ -18,6 +18,3 @@ set(dt-workload-src MainEntry.cpp DTWorkload.cpp KeyGenerator.cpp TableGenerator add_library(dt-workload-lib ${dt-workload-src}) target_link_libraries(dt-workload-lib dbms clickhouse_functions clickhouse-server-lib) - -add_executable(dt-workload Main.cpp ${dt-workload-src}) -target_link_libraries(dt-workload dbms gtest clickhouse_functions clickhouse-server-lib) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp similarity index 94% rename from dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp rename to dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp index a6113f91d91..a53a1b9ebbd 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.cpp @@ -19,16 +19,16 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.h similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h rename to dbms/src/Storages/DeltaMerge/workload/DTWorkload.h index 26cc5b6e07c..1ee5ba6b871 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.h +++ b/dbms/src/Storages/DeltaMerge/workload/DTWorkload.h @@ -73,7 +73,7 @@ class ThreadStat class Statistics { public: - Statistics(int write_thread_count = 0, int read_thread_count = 0) + explicit Statistics(int write_thread_count = 0, int read_thread_count = 0) : init_ms(0) , write_stats(write_thread_count) , read_stats(read_thread_count) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp similarity index 95% rename from dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp index be6ff1dcbbe..479977d46d1 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp @@ -13,11 +13,11 @@ // limitations under the License. #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -33,7 +33,7 @@ class RandomDataGenerator : public DataGenerator , rand_gen(std::random_device()()) {} - virtual std::tuple get(uint64_t key) override + std::tuple get(uint64_t key) override { Block block; // Generate 'rowkeys'. @@ -227,7 +227,9 @@ class RandomDataGenerator : public DataGenerator struct tm randomLocalTime() { time_t t = randomUTCTimestamp(); - struct tm res; + struct tm res + { + }; if (localtime_r(&t, &res) == nullptr) { throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/DataGenerator.h index e32de4591e6..cd29f1a3a80 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DataGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.h @@ -27,7 +27,7 @@ class DataGenerator public: static std::unique_ptr create(const WorkloadOptions & opts, const TableInfo & table_info, TimestampGenerator & ts_gen); virtual std::tuple get(uint64_t key) = 0; - virtual ~DataGenerator() {} + virtual ~DataGenerator() = default; }; std::string blockToString(const Block & block); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Handle.h b/dbms/src/Storages/DeltaMerge/workload/Handle.h similarity index 90% rename from dbms/src/Storages/DeltaMerge/tools/workload/Handle.h rename to dbms/src/Storages/DeltaMerge/workload/Handle.h index eb117a4fddd..2bbd1bd409d 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Handle.h +++ b/dbms/src/Storages/DeltaMerge/workload/Handle.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include @@ -40,7 +40,7 @@ class HandleLock static constexpr uint64_t default_lock_count = 4096; static std::unique_ptr create(const TableInfo & table_info); - HandleLock(uint64_t lock_count = default_lock_count) + explicit HandleLock(uint64_t lock_count = default_lock_count) : rmtxs(lock_count) {} @@ -52,6 +52,7 @@ class HandleLock std::vector> getLocks(const std::vector & handles) { std::vector indexes; + indexes.reserve(handles.size()); for (const auto & h : handles) { indexes.push_back(index(h)); @@ -59,6 +60,7 @@ class HandleLock // Sort mutex indexes to avoid dead lock. sort(indexes.begin(), indexes.end()); std::vector> locks; + locks.reserve(indexes.size()); for (auto i : indexes) { locks.push_back(getLockByIndex(i)); @@ -105,7 +107,7 @@ class HandleTable std::lock_guard lock(mtx); handle_to_ts[handle] = ts; Record r{handle, ts}; - if (wal != nullptr && wal->write((char *)&r, sizeof(r)) != sizeof(r)) + if (wal != nullptr && wal->write(reinterpret_cast(&r), sizeof(r)) != sizeof(r)) { throw std::runtime_error(fmt::format("write ret {}", strerror(errno))); } @@ -134,8 +136,8 @@ class HandleTable try { PosixRandomAccessFile f(fname, -1); - Record r; - while (f.read((char *)&r, sizeof(r)) == sizeof(r)) + Record r{}; + while (f.read(reinterpret_cast(&r), sizeof(r)) == sizeof(r)) { handle_to_ts[r.handle] = r.ts; } @@ -156,7 +158,7 @@ class HandleTable for (const auto & pa : handle_to_ts) { Record r{pa.first, pa.second}; - if (f.write((char *)&r, sizeof(r)) != sizeof(r)) + if (f.write(reinterpret_cast(&r), sizeof(r)) != sizeof(r)) { throw std::runtime_error(fmt::format("write ret {}", strerror(errno))); } @@ -191,7 +193,7 @@ class SharedHandleTable public: static constexpr uint64_t default_shared_count = 4096; - SharedHandleTable(uint64_t max_key_count, const std::string & waldir = "", uint64_t shared_cnt = default_shared_count) + explicit SharedHandleTable(uint64_t max_key_count, const std::string & waldir = "", uint64_t shared_cnt = default_shared_count) : tables(shared_cnt) { uint64_t max_key_count_per_shared = max_key_count / default_shared_count + 1; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp similarity index 92% rename from dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp index bb2f2253279..f899ec71b4b 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#include +#include #include #include @@ -31,7 +31,7 @@ class IncrementalKeyGenerator : public KeyGenerator , key(0) {} - virtual uint64_t get64() override + uint64_t get64() override { return key.fetch_add(1, std::memory_order_relaxed) % key_count + start_key; } @@ -54,7 +54,7 @@ class UniformDistributionKeyGenerator : public KeyGenerator , uniform_dist(0, key_count) {} - virtual uint64_t get64() override + uint64_t get64() override { std::lock_guard lock(mtx); return uniform_dist(rand_gen); @@ -78,7 +78,7 @@ class NormalDistributionKeyGenerator : public KeyGenerator , normal_dist(key_count / 2.0, key_count / 20.0) {} - virtual uint64_t get64() override + uint64_t get64() override { std::lock_guard lock(mtx); return normal_dist(rand_gen); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h similarity index 92% rename from dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h index 447f3ffc27a..7c8b8fd0080 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/KeyGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/KeyGenerator.h @@ -23,8 +23,8 @@ class KeyGenerator public: static std::unique_ptr create(const WorkloadOptions & opts); - KeyGenerator() {} - virtual ~KeyGenerator() {} + KeyGenerator() = default; + virtual ~KeyGenerator() = default; virtual uint64_t get64() = 0; }; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp b/dbms/src/Storages/DeltaMerge/workload/Limiter.cpp similarity index 77% rename from dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp rename to dbms/src/Storages/DeltaMerge/workload/Limiter.cpp index 73764d27bc5..65f9e3ce72c 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Limiter.cpp @@ -13,8 +13,8 @@ // limitations under the License. #include -#include -#include +#include +#include #include #include @@ -24,10 +24,10 @@ namespace DB::DM::tests class ConstantLimiter : public Limiter { public: - ConstantLimiter(uint64_t rate_per_sec) + explicit ConstantLimiter(uint64_t rate_per_sec) : limiter(rate_per_sec, LimiterType::UNKNOW) {} - virtual void request() override + void request() override { limiter.request(1); } @@ -38,7 +38,7 @@ class ConstantLimiter : public Limiter std::unique_ptr Limiter::create(const WorkloadOptions & opts) { - uint64_t per_sec = std::ceil(static_cast(opts.max_write_per_sec / opts.write_thread_count)); + uint64_t per_sec = std::ceil(opts.max_write_per_sec * 1.0 / opts.write_thread_count); return std::make_unique(per_sec); } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h b/dbms/src/Storages/DeltaMerge/workload/Limiter.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h rename to dbms/src/Storages/DeltaMerge/workload/Limiter.h index e2892b178a2..da2d31c7915 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Limiter.h +++ b/dbms/src/Storages/DeltaMerge/workload/Limiter.h @@ -23,6 +23,6 @@ class Limiter public: static std::unique_ptr create(const WorkloadOptions & opts); virtual void request() = 0; - virtual ~Limiter() {} + virtual ~Limiter() = default; }; } // namespace DB::DM::tests \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp rename to dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp index f79d414f20b..88cf0b6322f 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/MainEntry.cpp @@ -14,10 +14,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp b/dbms/src/Storages/DeltaMerge/workload/Options.cpp similarity index 98% rename from dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp rename to dbms/src/Storages/DeltaMerge/workload/Options.cpp index 1c6409f3c53..8545d22ca8d 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Options.cpp @@ -13,8 +13,8 @@ // limitations under the License. #include -#include -#include +#include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h b/dbms/src/Storages/DeltaMerge/workload/Options.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/Options.h rename to dbms/src/Storages/DeltaMerge/workload/Options.h diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h b/dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h similarity index 93% rename from dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h index 180409f89e1..c881bb148a2 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/ReadColumnsGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/ReadColumnsGenerator.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include @@ -28,7 +28,7 @@ class ReadColumnsGenerator return std::make_unique(table_info); } - ReadColumnsGenerator(const TableInfo & table_info_) + explicit ReadColumnsGenerator(const TableInfo & table_info_) : table_info(table_info_) , rand_gen(std::random_device()()) , uniform_dist(0, table_info_.columns->size() - 1) diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp similarity index 97% rename from dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp rename to dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp index cf52e808ab1..ec29a476d6a 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.cpp @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -237,7 +237,7 @@ class RandomTableGenerator : public TableGenerator , rand_gen(std::random_device()()) {} - virtual TableInfo get(int64_t table_id, std::string table_name) override + TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; @@ -293,7 +293,7 @@ class RandomTableGenerator : public TableGenerator class ConstantTableGenerator : public TableGenerator { - virtual TableInfo get(int64_t table_id, std::string table_name) override + TableInfo get(int64_t table_id, std::string table_name) override { TableInfo table_info; diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.h similarity index 96% rename from dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/TableGenerator.h index aba5c1590b7..b88bf2b72e2 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/TableGenerator.h +++ b/dbms/src/Storages/DeltaMerge/workload/TableGenerator.h @@ -38,6 +38,6 @@ class TableGenerator virtual TableInfo get(int64_t table_id, std::string table_name) = 0; - virtual ~TableGenerator() {} + virtual ~TableGenerator() = default; }; } // namespace DB::DM::tests \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/TimestampGenerator.h b/dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/TimestampGenerator.h rename to dbms/src/Storages/DeltaMerge/workload/TimestampGenerator.h diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp b/dbms/src/Storages/DeltaMerge/workload/Utils.cpp similarity index 94% rename from dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp rename to dbms/src/Storages/DeltaMerge/workload/Utils.cpp index 1cefae724c6..80d9f788016 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/Utils.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include @@ -83,7 +83,7 @@ std::string fieldToString(const DataTypePtr & data_type, const Field & f) } else if (t == Field::Types::Which::Decimal256) { - auto i = f.get(); + const auto & i = f.get(); auto scale = dynamic_cast(data_type.get())->getScale(); return i.toString(scale); } @@ -105,8 +105,8 @@ std::vector colToVec(const DataTypePtr & data_type, const ColumnPtr std::string blockToString(const Block & block) { std::string s = "id name type values\n"; - auto & cols = block.getColumnsWithTypeAndName(); - for (auto & col : cols) + const auto & cols = block.getColumnsWithTypeAndName(); + for (const auto & col : cols) { s += fmt::format("{} {} {} {}\n", col.column_id, col.name, col.type->getFamilyName(), colToVec(col.type, col.column)); } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Utils.h b/dbms/src/Storages/DeltaMerge/workload/Utils.h similarity index 100% rename from dbms/src/Storages/DeltaMerge/tools/workload/Utils.h rename to dbms/src/Storages/DeltaMerge/workload/Utils.h diff --git a/dbms/src/Storages/IManageableStorage.h b/dbms/src/Storages/IManageableStorage.h index ebf84c592e4..2ff766a9c6d 100644 --- a/dbms/src/Storages/IManageableStorage.h +++ b/dbms/src/Storages/IManageableStorage.h @@ -68,7 +68,7 @@ class IManageableStorage : public IStorage virtual void flushCache(const Context & /*context*/) {} - virtual void flushCache(const Context & /*context*/, const DM::RowKeyRange & /*range_to_flush*/) {} + virtual bool flushCache(const Context & /*context*/, const DM::RowKeyRange & /*range_to_flush*/, [[maybe_unused]] bool try_until_succeed = true) { return true; } virtual BlockInputStreamPtr status() { return {}; } diff --git a/dbms/src/Storages/MarkCache.h b/dbms/src/Storages/MarkCache.h index 5816b0c1bba..728f830e0d0 100644 --- a/dbms/src/Storages/MarkCache.h +++ b/dbms/src/Storages/MarkCache.h @@ -14,24 +14,23 @@ #pragma once -#include - #include #include #include -#include #include +#include + +#include namespace ProfileEvents { - extern const Event MarkCacheHits; - extern const Event MarkCacheMisses; -} +extern const Event MarkCacheHits; +extern const Event MarkCacheMisses; +} // namespace ProfileEvents namespace DB { - /// Estimate of number of bytes in cache for marks. struct MarksWeightFunction { @@ -53,7 +52,8 @@ class MarkCache : public LRUCache MappedPtr getOrSet(const Key & key, LoadFunc && load) @@ -70,4 +70,4 @@ class MarkCache : public LRUCache; -} +} // namespace DB diff --git a/dbms/src/Storages/Page/CMakeLists.txt b/dbms/src/Storages/Page/CMakeLists.txt index cead83fa126..f208dc84be2 100644 --- a/dbms/src/Storages/Page/CMakeLists.txt +++ b/dbms/src/Storages/Page/CMakeLists.txt @@ -14,13 +14,3 @@ add_subdirectory(V2) add_subdirectory(tools) - -# PageStorage Stress test -if (ENABLE_V3_PAGESTORAGE) - add_headers_and_sources(page_stress_testing stress) - add_headers_and_sources(page_stress_testing stress/workload) - add_executable(page_stress_testing EXCLUDE_FROM_ALL ${page_stress_testing_sources}) - target_link_libraries(page_stress_testing dbms page_storage_v3) - target_include_directories(page_stress_testing PRIVATE stress) - target_compile_options(page_stress_testing PRIVATE -Wno-format -lc++) # turn off printf format check -endif() \ No newline at end of file diff --git a/dbms/src/Storages/Page/PageUtil.h b/dbms/src/Storages/Page/PageUtil.h index cebcbdb27f2..b0d8f0f88c8 100644 --- a/dbms/src/Storages/Page/PageUtil.h +++ b/dbms/src/Storages/Page/PageUtil.h @@ -281,7 +281,7 @@ void readFile(T & file, } if (unlikely(bytes_read != expected_bytes)) - throw DB::TiFlashException(fmt::format("No enough data in file {}, read bytes: {} , expected bytes: {}", file->getFileName(), bytes_read, expected_bytes), + throw DB::TiFlashException(fmt::format("No enough data in file {}, read bytes: {}, expected bytes: {}, offset: {}", file->getFileName(), bytes_read, expected_bytes, offset), Errors::PageStorage::FileSizeNotMatch); } diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp index e72c7a87541..c4dd2178eb9 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_util.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -30,6 +31,7 @@ namespace tests static const std::string FileName = "page_util_test"; TEST(PageUtilsTest, ReadWriteFile) +try { ::remove(FileName.c_str()); @@ -52,6 +54,7 @@ TEST(PageUtilsTest, ReadWriteFile) ::remove(FileName.c_str()); } +CATCH TEST(PageUtilsTest, FileNotExists) { diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index d5f71841b91..3bd0bd9c4fa 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -555,7 +556,7 @@ void BlobStore::read(PageIDAndEntriesV3 & entries, const PageHandler & handler, for (const auto & [page_id_v3, entry] : entries) { - auto blob_file = read(entry.file_id, entry.offset, data_buf, entry.size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, data_buf, entry.size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { @@ -635,7 +636,7 @@ PageMap BlobStore::read(FieldReadInfos & to_read, const ReadLimiterPtr & read_li // TODO: Continuously fields can read by one system call. const auto [beg_offset, end_offset] = entry.getFieldOffsets(field_index); const auto size_to_read = end_offset - beg_offset; - auto blob_file = read(entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset + beg_offset, write_offset, size_to_read, read_limiter); fields_offset_in_page.emplace(field_index, read_size_this_entry); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) @@ -732,7 +733,7 @@ PageMap BlobStore::read(PageIDAndEntriesV3 & entries, const ReadLimiterPtr & rea PageMap page_map; for (const auto & [page_id_v3, entry] : entries) { - auto blob_file = read(entry.file_id, entry.offset, pos, entry.size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, pos, entry.size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { @@ -797,7 +798,7 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r free(p, buf_size); }); - auto blob_file = read(entry.file_id, entry.offset, data_buf, buf_size, read_limiter); + auto blob_file = read(page_id_v3, entry.file_id, entry.offset, data_buf, buf_size, read_limiter); if constexpr (BLOBSTORE_CHECKSUM_ON_READ) { ChecksumClass digest; @@ -824,11 +825,20 @@ Page BlobStore::read(const PageIDAndEntryV3 & id_entry, const ReadLimiterPtr & r return page; } -BlobFilePtr BlobStore::read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter, bool background) +BlobFilePtr BlobStore::read(const PageIdV3Internal & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter, bool background) { assert(buffers != nullptr); - auto blob_file = getBlobFile(blob_id); - blob_file->read(buffers, offset, size, read_limiter, background); + BlobFilePtr blob_file = getBlobFile(blob_id); + try + { + blob_file->read(buffers, offset, size, read_limiter, background); + } + catch (DB::Exception & e) + { + // add debug message + e.addMessage(fmt::format("(error while reading page data [page_id={}] [blob_id={}] [offset={}] [size={}] [background={}])", page_id_v3, blob_id, offset, size, background)); + e.rethrow(); + } return blob_file; } @@ -841,8 +851,8 @@ struct BlobStoreGCInfo toTypeString("Read-Only Blob", 0), toTypeString("No GC Blob", 1), toTypeString("Full GC Blob", 2), - toTypeString("Truncated Blob", 3), - toTypeString("Big Blob", 4)); + toTypeString("Big Blob", 3), + toTypeTruncateString("Truncated Blob")); } void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate) @@ -860,23 +870,24 @@ struct BlobStoreGCInfo blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate) + void appendToBigBlob(const BlobFileId blob_id, double valid_rate) { blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate)); } - void appendToBigBlob(const BlobFileId blob_id, double valid_rate) + void appendToTruncatedBlob(const BlobFileId blob_id, UInt64 origin_size, UInt64 truncated_size, double valid_rate) { - blob_gc_info[4].emplace_back(std::make_pair(blob_id, valid_rate)); + blob_gc_truncate_info.emplace_back(std::make_tuple(blob_id, origin_size, truncated_size, valid_rate)); } private: // 1. read only blob // 2. no need gc blob // 3. full gc blob - // 4. need truncate blob - // 5. big blob - std::vector> blob_gc_info[5]; + // 4. big blob + std::vector> blob_gc_info[4]; + + std::vector> blob_gc_truncate_info; String toTypeString(const std::string_view prefix, const size_t index) const { @@ -901,6 +912,32 @@ struct BlobStoreGCInfo return fmt_buf.toString(); } + + String toTypeTruncateString(const std::string_view prefix) const + { + FmtBuffer fmt_buf; + if (blob_gc_truncate_info.empty()) + { + fmt_buf.fmtAppend("{}: [null]", prefix); + } + else + { + fmt_buf.fmtAppend("{}: [", prefix); + fmt_buf.joinStr( + blob_gc_truncate_info.begin(), + blob_gc_truncate_info.end(), + [](const auto arg, FmtBuffer & fb) { + fb.fmtAppend("{} origin: {} truncate: {} rate: {:.2f}", // + std::get<0>(arg), // blob id + std::get<1>(arg), // origin size + std::get<2>(arg), // truncated size + std::get<3>(arg)); // valid rate + }, + ", "); + fmt_buf.append("]"); + } + return fmt_buf.toString(); + } }; std::vector BlobStore::getGCStats() @@ -943,7 +980,7 @@ std::vector BlobStore::getGCStats() } auto lock = stat->lock(); - auto right_margin = stat->smap->getRightMargin(); + auto right_margin = stat->smap->getUsedBoundary(); // Avoid divide by zero if (right_margin == 0) @@ -956,14 +993,13 @@ std::vector BlobStore::getGCStats() stat->sm_valid_rate)); } - LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate); - // If current blob empty, the size of in disk blob may not empty // So we need truncate current blob, and let it be reused. auto blobfile = getBlobFile(stat->id); - LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id); + LOG_FMT_INFO(log, "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_rate); blobfile->truncate(right_margin); - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; continue; } @@ -1004,9 +1040,10 @@ std::vector BlobStore::getGCStats() auto blobfile = getBlobFile(stat->id); LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin); blobfile->truncate(right_margin); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate); + stat->sm_total_size = right_margin; stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size; - blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); } } } @@ -1117,21 +1154,15 @@ PageEntriesEdit BlobStore::gc(std::map & std::tie(blobfile_id, file_offset_beg) = getPosFromStats(next_alloc_size); } - PageEntryV3 new_entry; - - read(file_id, entry.offset, data_pos, entry.size, read_limiter, /*background*/ true); - - // No need do crc again, crc won't be changed. - new_entry.checksum = entry.checksum; - - // Need copy the field_offsets - new_entry.field_offsets = entry.field_offsets; - - // Entry size won't be changed. - new_entry.size = entry.size; + // Read the data into buffer by old entry + read(page_id, file_id, entry.offset, data_pos, entry.size, read_limiter, /*background*/ true); + // Most vars of the entry is not changed, but the file id and offset + // need to be updated. + PageEntryV3 new_entry = entry; new_entry.file_id = blobfile_id; new_entry.offset = file_offset_beg + offset_in_data; + new_entry.padded_size = 0; // reset padded size to be zero offset_in_data += new_entry.size; data_pos += new_entry.size; diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index 24bf4652123..6b139b98557 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -296,7 +296,7 @@ class BlobStore : private Allocator PageEntriesEdit handleLargeWrite(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter = nullptr); - BlobFilePtr read(BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); + BlobFilePtr read(const PageIdV3Internal & page_id_v3, BlobFileId blob_id, BlobFileOffset offset, char * buffers, size_t size, const ReadLimiterPtr & read_limiter = nullptr, bool background = false); /** * Ask BlobStats to get a span from BlobStat. diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index e9b754854b8..951da42de1c 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -478,7 +478,7 @@ PageSize VersionedPageEntries::getEntriesByBlobIds( bool VersionedPageEntries::cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & /*page_lock*/) { if (type == EditRecordType::VAR_EXTERNAL) @@ -541,7 +541,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( { if (iter->second.being_ref_count == 1) { - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } // The `being_ref_count` for this version is valid. While for older versions, @@ -551,7 +554,10 @@ bool VersionedPageEntries::cleanOutdatedEntries( else { // else there are newer "entry" in the version list, the outdated entries should be removed - entries_removed.emplace_back(iter->second.entry); + if (entries_removed) + { + entries_removed->emplace_back(iter->second.entry); + } iter = entries.erase(iter); } } @@ -564,7 +570,7 @@ bool VersionedPageEntries::cleanOutdatedEntries( return entries.empty() || (entries.size() == 1 && entries.begin()->second.isDelete()); } -bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 & entries_removed) +bool VersionedPageEntries::derefAndClean(UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, const Int64 deref_count, PageEntriesV3 * entries_removed) { auto page_lock = acquireLock(); if (type == EditRecordType::VAR_EXTERNAL) @@ -1223,7 +1229,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W // `being_ref_count` by the function `createSnapshot()`. assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true auto log_num = files_snap.persisted_log_files.rbegin()->log_num; - auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); + auto identifier = fmt::format("{}.dump_{}", wal->name(), log_num); auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); PageDirectoryFactory factory; // we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that @@ -1239,7 +1245,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W return done_any_io; } -PageEntriesV3 PageDirectory::gcInMemEntries() +PageEntriesV3 PageDirectory::gcInMemEntries(bool return_removed_entries) { UInt64 lowest_seq = sequence.load(); @@ -1303,7 +1309,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() const bool all_deleted = iter->second->cleanOutdatedEntries( lowest_seq, &normal_entries_to_deref, - all_del_entries, + return_removed_entries ? &all_del_entries : nullptr, iter->second->acquireLock()); { @@ -1342,7 +1348,7 @@ PageEntriesV3 PageDirectory::gcInMemEntries() page_id, /*deref_ver=*/deref_counter.first, /*deref_count=*/deref_counter.second, - all_del_entries); + return_removed_entries ? &all_del_entries : nullptr); if (all_deleted) { diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index bd7c433022f..2f0f09f4e42 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -223,14 +223,14 @@ class VersionedPageEntries bool cleanOutdatedEntries( UInt64 lowest_seq, std::map> * normal_entries_to_deref, - PageEntriesV3 & entries_removed, + PageEntriesV3 * entries_removed, const PageLock & page_lock); bool derefAndClean( UInt64 lowest_seq, PageIdV3Internal page_id, const PageVersion & deref_ver, Int64 deref_count, - PageEntriesV3 & entries_removed); + PageEntriesV3 * entries_removed); void collapseTo(UInt64 seq, PageIdV3Internal page_id, PageEntriesEdit & edit); @@ -360,7 +360,9 @@ class PageDirectory bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); - PageEntriesV3 gcInMemEntries(); + // Perform a GC for in-memory entries and return the removed entries. + // If `return_removed_entries` is false, then just return an empty set. + PageEntriesV3 gcInMemEntries(bool return_removed_entries = true); std::set getAliveExternalIds(NamespaceId ns_id) const; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 483c5073ab5..968049a3273 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -44,7 +44,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WAL // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore, so skip filling removed_entries to imporve performance. + dir->gcInMemEntries(/*return_removed_entries=*/false); LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) @@ -84,7 +85,8 @@ PageDirectoryPtr PageDirectoryFactory::createFromEdit(String storage_name, FileP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. - dir->gcInMemEntries(); + // It's no need to remove the expired entries in BlobStore when restore, so no need to fill removed_entries. + dir->gcInMemEntries(/*return_removed_entries=*/false); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h index ae44b608de0..d230b2f3e35 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMap.h @@ -95,9 +95,11 @@ class SpaceMap virtual std::tuple searchInsertOffset(size_t size) = 0; /** - * Get the offset of the last free block. `[margin_offset, +∞)` is not used at all. + * Get the used boundary of this SpaceMap. + * The return value (`used_boundary`) means that `[used_bounary + 1, +∞)` is safe to be truncated. + * If the `used_boundary` is equal to the `end` of this SpaceMap, it means that there is no space to be truncated. */ - virtual UInt64 getRightMargin() = 0; + virtual UInt64 getUsedBoundary() = 0; /** * Get the accurate max capacity of the space map. diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h index 22128a09f30..81c2a5cb786 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h @@ -74,7 +74,7 @@ class BigSpaceMap return std::make_pair(size_in_used, size_in_used); } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { return end; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp index 54275574060..4bd53b93e07 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp @@ -84,7 +84,7 @@ static void rb_get_new_entry(struct SmapRbEntry ** entry, UInt64 start, UInt64 c { struct SmapRbEntry * new_entry; - new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); + new_entry = static_cast(calloc(1, sizeof(struct SmapRbEntry))); // NOLINT if (new_entry == nullptr) { return; @@ -115,7 +115,7 @@ inline static void rb_free_entry(struct RbPrivate * private_data, struct SmapRbE private_data->read_index_next = nullptr; } - free(entry); + free(entry); // NOLINT } @@ -419,7 +419,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) { auto ptr = std::shared_ptr(new RBTreeSpaceMap(start, end)); - ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); + ptr->rb_tree = static_cast(calloc(1, sizeof(struct RbPrivate))); // NOLINT if (ptr->rb_tree == nullptr) { return nullptr; @@ -435,7 +435,7 @@ std::shared_ptr RBTreeSpaceMap::create(UInt64 start, UInt64 end) if (!rb_insert_entry(start, end, ptr->rb_tree, ptr->log)) { LOG_FMT_ERROR(ptr->log, "Erorr happend, when mark all space free. [start={}] , [end={}]", start, end); - free(ptr->rb_tree); + free(ptr->rb_tree); // NOLINT return nullptr; } return ptr; @@ -451,7 +451,7 @@ static void rb_free_tree(struct rb_root * root) next = rb_tree_next(node); entry = node_to_entry(node); rb_node_remove(node, root); - free(entry); + free(entry); // NOLINT } } @@ -460,7 +460,7 @@ void RBTreeSpaceMap::freeSmap() if (rb_tree) { rb_free_tree(&rb_tree->root); - free(rb_tree); + free(rb_tree); // NOLINT } } @@ -734,7 +734,7 @@ std::pair RBTreeSpaceMap::getSizes() const } } -UInt64 RBTreeSpaceMap::getRightMargin() +UInt64 RBTreeSpaceMap::getUsedBoundary() { struct rb_node * node = rb_tree_last(&rb_tree->root); if (node == nullptr) @@ -743,6 +743,20 @@ UInt64 RBTreeSpaceMap::getRightMargin() } auto * entry = node_to_entry(node); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (entry->start + entry->count != end) + { + return end; + } + + // Else we should return the offset of last free node return entry->start; } diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h index 0393fda081b..04691007a47 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h @@ -46,7 +46,7 @@ class RBTreeSpaceMap std::pair getSizes() const override; - UInt64 getRightMargin() override; + UInt64 getUsedBoundary() override; protected: RBTreeSpaceMap(UInt64 start, UInt64 end) diff --git a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h index b6ff8797f0f..41ddd77d03a 100644 --- a/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h +++ b/dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h @@ -111,13 +111,29 @@ class STDMapSpaceMap } } - UInt64 getRightMargin() override + UInt64 getUsedBoundary() override { if (free_map.empty()) { - return end - start; + return end; } - return free_map.rbegin()->first; + + const auto & last_node_it = free_map.rbegin(); + + // If the `offset+size` of the last free node is not equal to `end`, it means the range `[last_node.offset, end)` is marked as used, + // then we should return `end` as the used boundary. + // + // eg. + // 1. The spacemap manage a space of `[0, 100]` + // 2. A span {offset=90, size=10} is marked as used, then the free range in SpaceMap is `[0, 90)` + // 3. The return value should be 100 + if (last_node_it->first + last_node_it->second != end) + { + return end; + } + + // Else we should return the offset of last free node + return last_node_it->first; } bool isMarkUnused(UInt64 offset, size_t length) override diff --git a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp index 048140ed04f..f9daacc4cce 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_blob_store.cpp @@ -82,6 +82,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 128, + .padded_size = 0, .tag = 0, .offset = 1024, .checksum = 0x4567, @@ -89,6 +90,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -96,6 +98,7 @@ try stats.restoreByEntry(PageEntryV3{ .file_id = file_id2, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -303,6 +306,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 128, + .padded_size = 0, .tag = 0, .offset = 1024, .checksum = 0x4567, @@ -310,6 +314,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id1, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -317,6 +322,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = file_id2, .size = 512, + .padded_size = 0, .tag = 0, .offset = 2048, .checksum = 0x4567, @@ -399,6 +405,7 @@ try blob_store.blob_stats.restoreByEntry(PageEntryV3{ .file_id = id, .size = 1024, + .padded_size = 0, .tag = 0, .offset = 0, .checksum = 0x4567, @@ -531,7 +538,8 @@ TEST_F(BlobStoreTest, testWriteRead) ASSERT_EQ(record.entry.file_id, 1); // Read directly from the file - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + index * buff_size, record.entry.size, @@ -631,7 +639,8 @@ TEST_F(BlobStoreTest, testWriteReadWithIOLimiter) { for (const auto & record : edits[i].getRecords()) { - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + i * buff_size, record.entry.size, @@ -809,7 +818,8 @@ TEST_F(BlobStoreTest, testFeildOffsetWriteRead) ASSERT_EQ(check_field_sizes, offsets); // Read - blob_store.read(record.entry.file_id, + blob_store.read(buildV3Id(TEST_NAMESPACE_ID, page_id), + record.entry.file_id, record.entry.offset, c_buff_read + index * buff_size, record.entry.size, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp index f7120f000b2..faec139920b 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp @@ -427,6 +427,43 @@ TEST_P(SpaceMapTest, TestGetMaxCap) } } + +TEST_P(SpaceMapTest, TestGetUsedBoundary) +{ + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(50, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 60); + ASSERT_TRUE(smap->markUsed(80, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 90); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markUsed(20, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + + ASSERT_TRUE(smap->markFree(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 30); + + ASSERT_TRUE(smap->markUsed(90, 10)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } + + { + auto smap = SpaceMap::createSpaceMap(test_type, 0, 100); + ASSERT_EQ(smap->getUsedBoundary(), 0); + ASSERT_TRUE(smap->markUsed(0, 100)); + ASSERT_EQ(smap->getUsedBoundary(), 100); + } +} + INSTANTIATE_TEST_CASE_P( Type, SpaceMapTest, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 6e2b0efa1ea..6d6ef41630f 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -75,7 +75,7 @@ try auto snap0 = dir->createSnapshot(); EXPECT_ENTRY_NOT_EXIST(dir, 1, snap0); - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -85,7 +85,7 @@ try auto snap1 = dir->createSnapshot(); EXPECT_ENTRY_EQ(entry1, dir, 1, snap1); - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(2, entry2); @@ -102,7 +102,7 @@ try EXPECT_ENTRIES_EQ(expected_entries, dir, ids, snap2); } - PageEntryV3 entry2_v2{.file_id = 2 + 102, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2_v2{.file_id = 2 + 102, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -123,7 +123,7 @@ try auto snap0 = dir->createSnapshot(); EXPECT_ENTRY_NOT_EXIST(dir, page_id, snap0); - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry1); @@ -133,7 +133,7 @@ try auto snap1 = dir->createSnapshot(); EXPECT_ENTRY_EQ(entry1, dir, page_id, snap1); - PageEntryV3 entry2{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x1234, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x1234, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry2); @@ -151,7 +151,7 @@ try // Put identical page within one `edit` page_id++; - PageEntryV3 entry3{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x12345, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x12345, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(page_id, entry1); @@ -172,8 +172,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyPutDelRead) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -185,8 +185,8 @@ try EXPECT_ENTRY_EQ(entry1, dir, 1, snap1); EXPECT_ENTRY_EQ(entry2, dir, 2, snap1); - PageEntryV3 entry3{.file_id = 3, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry4{.file_id = 4, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 3, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry4{.file_id = 4, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -217,8 +217,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyUpdateOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -236,14 +236,14 @@ try EXPECT_ENTRY_EQ(entry2, dir, 3, snap1); // Update on ref page is not allowed - PageEntryV3 entry_updated{.file_id = 999, .size = 16, .tag = 0, .offset = 0x123, .checksum = 0x123}; + PageEntryV3 entry_updated{.file_id = 999, .size = 16, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x123}; { PageEntriesEdit edit; edit.put(3, entry_updated); ASSERT_ANY_THROW(dir->apply(std::move(edit))); } - PageEntryV3 entry_updated2{.file_id = 777, .size = 16, .tag = 0, .offset = 0x123, .checksum = 0x123}; + PageEntryV3 entry_updated2{.file_id = 777, .size = 16, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x123}; { PageEntriesEdit edit; edit.put(2, entry_updated2); @@ -255,8 +255,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyDeleteOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -305,8 +305,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyRefOnRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -343,8 +343,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyDuplicatedRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -410,8 +410,8 @@ CATCH TEST_F(PageDirectoryTest, ApplyCollapseDuplicatedRefEntries) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -447,9 +447,9 @@ CATCH TEST_F(PageDirectoryTest, ApplyRefToNotExistEntry) try { - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry3{.file_id = 3, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry3{.file_id = 3, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry1); @@ -628,12 +628,12 @@ try } CATCH -#define INSERT_BLOBID_ENTRY(BLOBID, VERSION) \ - PageEntryV3 entry_v##VERSION{.file_id = (BLOBID), .size = (VERSION), .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ +#define INSERT_BLOBID_ENTRY(BLOBID, VERSION) \ + PageEntryV3 entry_v##VERSION{.file_id = (BLOBID), .size = (VERSION), .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ entries.createNewEntry(PageVersion(VERSION), entry_v##VERSION); #define INSERT_ENTRY(VERSION) INSERT_BLOBID_ENTRY(1, VERSION) -#define INSERT_GC_ENTRY(VERSION, EPOCH) \ - PageEntryV3 entry_gc_v##VERSION##_##EPOCH{.file_id = 2, .size = 100 * (VERSION) + (EPOCH), .tag = 0, .offset = 0x234, .checksum = 0x5678}; \ +#define INSERT_GC_ENTRY(VERSION, EPOCH) \ + PageEntryV3 entry_gc_v##VERSION##_##EPOCH{.file_id = 2, .size = 100 * (VERSION) + (EPOCH), .padded_size = 0, .tag = 0, .offset = 0x234, .checksum = 0x5678}; \ entries.createNewEntry(PageVersion((VERSION), (EPOCH)), entry_gc_v##VERSION##_##EPOCH); class VersionedEntriesTest : public ::testing::Test @@ -644,14 +644,14 @@ class VersionedEntriesTest : public ::testing::Test { DerefCounter deref_counter; PageEntriesV3 removed_entries; - bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, removed_entries, entries.acquireLock()); + bool all_removed = entries.cleanOutdatedEntries(seq, &deref_counter, &removed_entries, entries.acquireLock()); return {all_removed, removed_entries, deref_counter}; } std::tuple runDeref(UInt64 seq, PageVersion ver, Int64 decrease_num) { PageEntriesV3 removed_entries; - bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, removed_entries); + bool all_removed = entries.derefAndClean(seq, buildV3Id(TEST_NAMESPACE_ID, page_id), ver, decrease_num, &removed_entries); return {all_removed, removed_entries}; } @@ -1271,12 +1271,12 @@ class PageDirectoryGCTest : public PageDirectoryTest { }; -#define INSERT_ENTRY_TO(PAGE_ID, VERSION, BLOB_FILE_ID) \ - PageEntryV3 entry_v##VERSION{.file_id = (BLOB_FILE_ID), .size = (VERSION), .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ - { \ - PageEntriesEdit edit; \ - edit.put((PAGE_ID), entry_v##VERSION); \ - dir->apply(std::move(edit)); \ +#define INSERT_ENTRY_TO(PAGE_ID, VERSION, BLOB_FILE_ID) \ + PageEntryV3 entry_v##VERSION{.file_id = (BLOB_FILE_ID), .size = (VERSION), .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; \ + { \ + PageEntriesEdit edit; \ + edit.put((PAGE_ID), entry_v##VERSION); \ + dir->apply(std::move(edit)); \ } // Insert an entry into mvcc directory #define INSERT_ENTRY(PAGE_ID, VERSION) INSERT_ENTRY_TO(PAGE_ID, VERSION, 1) @@ -1566,7 +1566,7 @@ try INSERT_ENTRY_ACQ_SNAP(page_id, 5); INSERT_ENTRY(another_page_id, 6); INSERT_ENTRY(another_page_id, 7); - PageEntryV3 entry_v8{.file_id = 1, .size = 8, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_v8{.file_id = 1, .size = 8, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(page_id); @@ -1756,7 +1756,7 @@ TEST_F(PageDirectoryGCTest, GCOnRefedEntries) try { // 10->entry1, 11->10=>11->entry1; del 10->entry1 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1793,7 +1793,7 @@ TEST_F(PageDirectoryGCTest, GCOnRefedEntries2) try { // 10->entry1, 11->10=>11->entry1; del 10->entry1 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1836,7 +1836,7 @@ TEST_F(PageDirectoryGCTest, UpsertOnRefedEntries) try { // 10->entry1, 11->10, 12->10 - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry1{.file_id = 1, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(10, entry1); @@ -1860,7 +1860,7 @@ try } // upsert 10->entry2 - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry2{.file_id = 2, .size = 1024, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; auto full_gc_entries = dir->getEntriesByBlobIds({1}); @@ -2024,10 +2024,10 @@ try return d; }; - PageEntryV3 entry_1_v1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_1_v2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_2_v1{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_2_v2{.file_id = 2, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_1_v1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_1_v2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_2_v1{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_2_v2{.file_id = 2, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry_1_v1); @@ -2055,8 +2055,8 @@ try // 10->ext, 11->10, del 10->ext // 50->entry, 51->50, 52->51=>50, del 50 - PageEntryV3 entry_50{.file_id = 1, .size = 50, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_60{.file_id = 1, .size = 90, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50{.file_id = 1, .size = 50, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_60{.file_id = 1, .size = 90, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; { PageEntriesEdit edit; edit.del(2); @@ -2218,9 +2218,9 @@ try Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id1)).createFile(); Poco::File(fmt::format("{}/{}{}", path, BlobFile::BLOB_PREFIX_NAME, file_id2)).createFile(); - PageEntryV3 entry_1_v1{.file_id = file_id1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_5_v1{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x100, .checksum = 0x4567}; - PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .tag = 0, .offset = 0x400, .checksum = 0x4567}; + PageEntryV3 entry_1_v1{.file_id = file_id1, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_5_v1{.file_id = file_id2, .size = 255, .padded_size = 0, .tag = 0, .offset = 0x100, .checksum = 0x4567}; + PageEntryV3 entry_5_v2{.file_id = file_id2, .size = 255, .padded_size = 0, .tag = 0, .offset = 0x400, .checksum = 0x4567}; { PageEntriesEdit edit; edit.put(1, entry_1_v1); @@ -2275,8 +2275,8 @@ CATCH TEST_F(PageDirectoryGCTest, CleanAfterDecreaseRef) try { - PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; auto restore_from_edit = [](const PageEntriesEdit & edit) { auto ctx = ::DB::tests::TiFlashTestEnv::getContext(); diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index f7ba33c46c8..f9ef25cb973 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1441,6 +1441,55 @@ try } CATCH +TEST_F(PageStorageTest, EntryTagAfterFullGC) +try +{ + { + PageStorage::Config config; + config.blob_heavy_gc_valid_rate = 1.0; /// always run full gc + page_storage = reopenWithConfig(config); + } + + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + PageId page_id = 120; + UInt64 tag = 12345; + { + WriteBatch batch; + batch.putPage(page_id, tag, std::make_shared(c_buff, buf_sz), buf_sz, {}); + page_storage->write(std::move(batch)); + } + + { + auto entry = page_storage->getEntry(page_id); + ASSERT_EQ(entry.tag, tag); + auto page = page_storage->read(page_id); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page.data.begin() + i), static_cast(i % 0xff)); + } + } + + auto done_full_gc = page_storage->gc(); + EXPECT_TRUE(done_full_gc); + + { + auto entry = page_storage->getEntry(page_id); + ASSERT_EQ(entry.tag, tag); + auto page = page_storage->read(page_id); + for (size_t i = 0; i < buf_sz; ++i) + { + EXPECT_EQ(*(page.data.begin() + i), static_cast(i % 0xff)); + } + } +} +CATCH } // namespace PS::V3::tests } // namespace DB diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp index 078daa3e5b4..74e56c929d8 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp @@ -85,6 +85,16 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic return run_mode; } + PageReader newMixedPageReader(PageStorage::SnapshotPtr & snapshot) + { + return storage_pool_mix->newLogReader(nullptr, snapshot); + } + + PageReader newMixedPageReader() + { + return storage_pool_mix->newLogReader(nullptr, true, "PageStorageMixedTest"); + } + void reloadV2StoragePool() { db_context->setPageStorageRunMode(PageStorageRunMode::ONLY_V2); @@ -1035,7 +1045,7 @@ try // Thread A create snapshot for read auto snapshot_mix_before_merge_delta = page_reader_mix->getSnapshot("ReadWithSnapshotAfterMergeDelta"); { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + auto page_reader_mix_with_snap = newMixedPageReader(snapshot_mix_before_merge_delta); const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page2 = page_reader_mix_with_snap.read(2); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1044,7 +1054,7 @@ try ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); } { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, true, "ReadWithSnapshotAfterMergeDelta"); + auto page_reader_mix_with_snap = newMixedPageReader(); const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page2 = page_reader_mix_with_snap.read(2); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1063,7 +1073,7 @@ try } // Thread A continue to read 1, 3 { - auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + auto page_reader_mix_with_snap = newMixedPageReader(snapshot_mix_before_merge_delta); // read 1, 3 with snapshot, should be success const auto & page1 = page_reader_mix_with_snap.read(1); const auto & page3 = page_reader_mix_with_snap.read(3); @@ -1071,6 +1081,7 @@ try ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); ASSERT_THROW(page_reader_mix_with_snap.read(4), DB::Exception); } + { // Revert v3 WriteBatch batch; @@ -1081,6 +1092,290 @@ try } CATCH +TEST_F(PageStorageMixedTest, refWithSnapshot2) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_maps = newMixedPageReader(snapshot_mix).read({1, 2}); + ASSERT_EQ(page_maps.size(), 2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[1], 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[2], 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot3) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + // to keep mix mode + batch.putExternal(10, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_mix->write(std::move(batch), nullptr); + } + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_maps = newMixedPageReader(snapshot_mix).read({1, 2}); + ASSERT_EQ(page_maps.size(), 2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[1], 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page_maps[2], 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot4) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = page_reader_mix->read(1); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot5) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + auto page1 = page_reader_mix->read(2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, refWithSnapshot6) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = page_reader_mix->read(2); + + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } +} +CATCH + +TEST_F(PageStorageMixedTest, ReadWithSnapshot2) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1; + char c_buff1[buf_sz]; + c_buff1[0] = 1; + + char c_buff2[buf_sz]; + c_buff2[0] = 2; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff1, buf_sz); + batch.putPage(1, tag, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + // Change to mix mode here + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + auto snapshot_mix = page_reader_mix->getSnapshot(""); + { + WriteBatch batch; + batch.delPage(1); + ReadBufferPtr buff = std::make_shared(c_buff2, buf_sz); + batch.putPage(1, tag, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page1 = newMixedPageReader(snapshot_mix).read(1); + ASSERT_PAGE_EQ(c_buff1, buf_sz, page1, 1); + } + + { + auto page1 = page_reader_mix->read(1); + ASSERT_PAGE_EQ(c_buff2, buf_sz, page1, 1); + } + + { + // Revert v3 + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } +} +CATCH + } // namespace PS::V3::tests } // namespace DB diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 6d47adabbc5..b4e6c2d9204 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -34,8 +34,8 @@ namespace DB::PS::V3::tests { TEST(WALSeriTest, AllPuts) { - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); PageEntriesEdit edit; edit.put(1, entry_p1); @@ -56,8 +56,8 @@ TEST(WALSeriTest, AllPuts) TEST(WALSeriTest, PutsAndRefsAndDels) try { - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); PageEntriesEdit edit; edit.put(3, entry_p3); @@ -104,9 +104,9 @@ CATCH TEST(WALSeriTest, Upserts) { - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); PageEntriesEdit edit; @@ -164,7 +164,7 @@ TEST(WALSeriTest, RefExternalAndEntry) { PageEntriesEdit edit; - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; edit.varEntry(1, ver1_0, entry_p1_2, 2); edit.varDel(1, ver2_0); edit.varRef(2, ver3_0, 1); @@ -405,8 +405,8 @@ try ASSERT_NE(wal, nullptr); // Stage 2. Apply with only puts - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); { PageEntriesEdit edit; @@ -435,8 +435,8 @@ try } // Stage 3. Apply with puts and refs - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); { PageEntriesEdit edit; @@ -468,9 +468,9 @@ try // Stage 4. Apply with delete and upsert - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); { @@ -514,8 +514,8 @@ try std::vector size_each_edit; // Stage 1. Apply with only puts - PageEntryV3 entry_p1{.file_id = 1, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p2{.file_id = 1, .size = 2, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1{.file_id = 1, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p2{.file_id = 1, .size = 2, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20(/*seq=*/20); { PageEntriesEdit edit; @@ -526,8 +526,8 @@ try } // Stage 2. Apply with puts and refs - PageEntryV3 entry_p3{.file_id = 1, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5{.file_id = 1, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3{.file_id = 1, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5{.file_id = 1, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver21(/*seq=*/21); { PageEntriesEdit edit; @@ -540,9 +540,9 @@ try } // Stage 3. Apply with delete and upsert - PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p1_2{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p3_2{.file_id = 2, .size = 3, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_p5_2{.file_id = 2, .size = 5, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageVersion ver20_1(/*seq=*/20, /*epoch*/ 1); PageVersion ver21_1(/*seq=*/21, /*epoch*/ 1); { @@ -615,7 +615,7 @@ try PageVersion ver(/*seq*/ 32); for (size_t i = 0; i < num_edits_test; ++i) { - PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; PageEntriesEdit edit; const size_t num_pages_put = d_20(rd); for (size_t p = 0; p < num_pages_put; ++p) @@ -660,7 +660,7 @@ try .persisted_log_files = persisted_log_files}; PageEntriesEdit snap_edit; - PageEntryV3 entry{.file_id = 2, .size = 1, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry{.file_id = 2, .size = 1, .padded_size = 0, .tag = 0, .offset = 0x123, .checksum = 0x4567}; std::uniform_int_distribution<> d_10000(0, 10000); // just fill in some random entry for (size_t i = 0; i < 70; ++i) diff --git a/dbms/src/Storages/Page/stress/stress_page_storage.cpp b/dbms/src/Storages/Page/stress/stress_page_storage.cpp deleted file mode 100644 index 818be710363..00000000000 --- a/dbms/src/Storages/Page/stress/stress_page_storage.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -namespace DB -{ -// Define is_background_thread for this binary -// It is required for `RateLimiter` but we do not link with `BackgroundProcessingPool`. -#if __APPLE__ && __clang__ -__thread bool is_background_thread = false; -#else -thread_local bool is_background_thread = false; -#endif -} // namespace DB - -int main(int argc, char ** argv) -try -{ - StressEnv::initGlobalLogger(); - auto env = StressEnv::parse(argc, argv); - env.setup(); - - auto & mamager = StressWorkloadManger::getInstance(); - mamager.setEnv(env); - mamager.runWorkload(); - - return StressEnvStatus::getInstance().isSuccess(); -} -catch (...) -{ - DB::tryLogCurrentException(""); - exit(-1); -} diff --git a/dbms/src/Storages/Page/workload/CMakeLists.txt b/dbms/src/Storages/Page/workload/CMakeLists.txt new file mode 100644 index 00000000000..adf94c75f11 --- /dev/null +++ b/dbms/src/Storages/Page/workload/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include_directories (${CMAKE_CURRENT_BINARY_DIR}) + +set (page-workload-src MainEntry.cpp PSBackground.cpp PSRunnable.cpp PSStressEnv.cpp PSWorkload.cpp) + +add_library (page-workload-lib ${page-workload-src}) +target_link_libraries (page-workload-lib dbms clickhouse_functions clickhouse-server-lib) +target_compile_options (page-workload-lib PRIVATE -Wno-format -lc++) \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h similarity index 95% rename from dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp rename to dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h index 40595f0cb59..3daaf10ffb3 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyMemoryCostInGC.cpp +++ b/dbms/src/Storages/Page/workload/HeavyMemoryCostInGC.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyMemoryCostInGC : public StressWorkload , public StressWorkloadFunc @@ -79,5 +81,4 @@ class HeavyMemoryCostInGC fmt::format("Memory Peak is {} , it should not bigger than {} ", metrics_dumper->getMemoryPeak(), 5 * 1024 * 1024)); } }; - -REGISTER_WORKLOAD(HeavyMemoryCostInGC) +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/workload/HeavyRead.cpp b/dbms/src/Storages/Page/workload/HeavyRead.h similarity index 95% rename from dbms/src/Storages/Page/stress/workload/HeavyRead.cpp rename to dbms/src/Storages/Page/workload/HeavyRead.h index 15aeb1320cf..80023f95988 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavyRead.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyRead : public StressWorkload , public StressWorkloadFunc { @@ -68,5 +70,4 @@ class HeavyRead : public StressWorkload } } }; - -REGISTER_WORKLOAD(HeavyRead) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.h similarity index 96% rename from dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp rename to dbms/src/Storages/Page/workload/HeavySkewWriteRead.h index 78ffa5b60e0..0e75bc0d3e5 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavySkewWriteRead.cpp +++ b/dbms/src/Storages/Page/workload/HeavySkewWriteRead.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavySkewWriteRead : public StressWorkload , public StressWorkloadFunc { @@ -83,5 +85,4 @@ class HeavySkewWriteRead : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HeavySkewWriteRead) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp b/dbms/src/Storages/Page/workload/HeavyWrite.h similarity index 95% rename from dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp rename to dbms/src/Storages/Page/workload/HeavyWrite.h index 265b289db56..54b7585ee20 100644 --- a/dbms/src/Storages/Page/stress/workload/HeavyWrite.cpp +++ b/dbms/src/Storages/Page/workload/HeavyWrite.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HeavyWrite : public StressWorkload , public StressWorkloadFunc { @@ -70,5 +72,4 @@ class HeavyWrite : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HeavyWrite) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp b/dbms/src/Storages/Page/workload/HighValidBigFileGC.h similarity index 97% rename from dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp rename to dbms/src/Storages/Page/workload/HighValidBigFileGC.h index 866782c9578..cc3b5b45135 100644 --- a/dbms/src/Storages/Page/stress/workload/HighValidBigFileGC.cpp +++ b/dbms/src/Storages/Page/workload/HighValidBigFileGC.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HighValidBigFileGCWorkload : public StressWorkload , public StressWorkloadFunc @@ -127,5 +129,4 @@ class HighValidBigFileGCWorkload private: UInt64 gc_time_ms = 0; }; - -REGISTER_WORKLOAD(HighValidBigFileGCWorkload) +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h similarity index 96% rename from dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp rename to dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h index b49347fc858..071a104010c 100644 --- a/dbms/src/Storages/Page/stress/workload/HoldSnapshotsLongTime.cpp +++ b/dbms/src/Storages/Page/workload/HoldSnapshotsLongTime.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class HoldSnapshotsLongTime : public StressWorkload , public StressWorkloadFunc { @@ -92,5 +94,4 @@ class HoldSnapshotsLongTime : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(HoldSnapshotsLongTime) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/workload/MainEntry.cpp b/dbms/src/Storages/Page/workload/MainEntry.cpp new file mode 100644 index 00000000000..18e42106c90 --- /dev/null +++ b/dbms/src/Storages/Page/workload/MainEntry.cpp @@ -0,0 +1,58 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB::PS::tests; + +int StressWorkload::mainEntry(int argc, char ** argv) +{ + { + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + work_load_register(); + } + try + { + StressEnv::initGlobalLogger(); + auto env = StressEnv::parse(argc, argv); + env.setup(); + + auto & mamager = StressWorkloadManger::getInstance(); + mamager.setEnv(env); + mamager.runWorkload(); + + return StressEnvStatus::getInstance().isSuccess(); + } + catch (...) + { + DB::tryLogCurrentException(""); + exit(-1); + } +} \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/Normal.cpp b/dbms/src/Storages/Page/workload/Normal.h similarity index 95% rename from dbms/src/Storages/Page/stress/workload/Normal.cpp rename to dbms/src/Storages/Page/workload/Normal.h index 0323b857613..164f17b9d61 100644 --- a/dbms/src/Storages/Page/stress/workload/Normal.cpp +++ b/dbms/src/Storages/Page/workload/Normal.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class NormalWorkload : public StressWorkload , public StressWorkloadFunc @@ -75,5 +77,4 @@ class NormalWorkload stop_watch.stop(); } }; - -REGISTER_WORKLOAD(NormalWorkload) +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSBackground.cpp b/dbms/src/Storages/Page/workload/PSBackground.cpp similarity index 96% rename from dbms/src/Storages/Page/stress/PSBackground.cpp rename to dbms/src/Storages/Page/workload/PSBackground.cpp index af7329e8348..247bea23dcc 100644 --- a/dbms/src/Storages/Page/stress/PSBackground.cpp +++ b/dbms/src/Storages/Page/workload/PSBackground.cpp @@ -13,11 +13,14 @@ // limitations under the License. #include -#include #include #include +#include #include + +namespace DB::PS::tests +{ void PSMetricsDumper::onTime(Poco::Timer & /*timer*/) { for (auto & metric : metrics) @@ -107,3 +110,4 @@ void StressTimeout::start() { timeout_timer.start(Poco::TimerCallback(*this, &StressTimeout::onTime)); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSBackground.h b/dbms/src/Storages/Page/workload/PSBackground.h similarity index 97% rename from dbms/src/Storages/Page/stress/PSBackground.h rename to dbms/src/Storages/Page/workload/PSBackground.h index 8c22458c5e8..c91dad1361f 100644 --- a/dbms/src/Storages/Page/stress/PSBackground.h +++ b/dbms/src/Storages/Page/workload/PSBackground.h @@ -15,14 +15,16 @@ #pragma once #include #include -#include #include +#include namespace CurrentMetrics { extern const Metric PSMVCCSnapshotsList; } +namespace DB::PS::tests +{ class PSMetricsDumper { public: @@ -162,3 +164,4 @@ class StressTimeout Poco::Timer timeout_timer; }; using StressTimeoutPtr = std::shared_ptr; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSRunnable.cpp b/dbms/src/Storages/Page/workload/PSRunnable.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/PSRunnable.cpp rename to dbms/src/Storages/Page/workload/PSRunnable.cpp index 5d6c8ecc5c6..5e9774ccc99 100644 --- a/dbms/src/Storages/Page/stress/PSRunnable.cpp +++ b/dbms/src/Storages/Page/workload/PSRunnable.cpp @@ -16,14 +16,16 @@ #include #include #include -#include #include #include +#include #include #include #include +namespace DB::PS::tests +{ void PSRunnable::run() try { @@ -69,7 +71,7 @@ DB::ReadBufferPtr PSWriter::genRandomData(const DB::PageId pageId, DB::MemHolder std::uniform_int_distribution<> dist(0, 3000); const size_t buff_sz = approx_page_mb * DB::MB + dist(size_gen); - char * buff = static_cast(malloc(buff_sz)); + char * buff = static_cast(malloc(buff_sz)); // NOLINT if (buff == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -78,7 +80,7 @@ DB::ReadBufferPtr PSWriter::genRandomData(const DB::PageId pageId, DB::MemHolder const char buff_ch = pageId % 0xFF; memset(buff, buff_ch, buff_sz); - holder = DB::createMemHolder(buff, [&](char * p) { free(p); }); + holder = DB::createMemHolder(buff, [&](char * p) { free(p); }); // NOLINT return std::make_shared(const_cast(buff), buff_sz); } @@ -88,7 +90,7 @@ void PSWriter::updatedRandomData() size_t memory_size = approx_page_mb * DB::MB * 2; if (memory == nullptr) { - memory = static_cast(malloc(memory_size)); + memory = static_cast(malloc(memory_size)); // NOLINT if (memory == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -147,7 +149,7 @@ void PSCommonWriter::updatedRandomData() if (memory == nullptr) { - memory = static_cast(malloc(memory_size)); + memory = static_cast(malloc(memory_size)); // NOLINT if (memory == nullptr) { throw DB::Exception("Alloc fix memory failed.", DB::ErrorCodes::LOGICAL_ERROR); @@ -415,3 +417,4 @@ DB::PageId PSIncreaseWriter::genRandomPageId() { return static_cast(begin_page_id++); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSRunnable.h b/dbms/src/Storages/Page/workload/PSRunnable.h similarity index 90% rename from dbms/src/Storages/Page/stress/PSRunnable.h rename to dbms/src/Storages/Page/workload/PSRunnable.h index 3ddcd73c093..b723236391d 100644 --- a/dbms/src/Storages/Page/stress/PSRunnable.h +++ b/dbms/src/Storages/Page/workload/PSRunnable.h @@ -13,12 +13,14 @@ // limitations under the License. #pragma once -#include #include #include +#include const DB::PageId MAX_PAGE_ID_DEFAULT = 1000; +namespace DB::PS::tests +{ class PSRunnable : public Poco::Runnable { public: @@ -46,7 +48,7 @@ class PSWriter : public PSRunnable gen.seed(time(nullptr)); } - virtual ~PSWriter() + ~PSWriter() override { if (memory != nullptr) { @@ -54,7 +56,7 @@ class PSWriter : public PSRunnable } } - virtual String description() override + String description() override { return fmt::format("(Stress Test Writer {})", index); } @@ -67,7 +69,7 @@ class PSWriter : public PSRunnable static void fillAllPages(const PSPtr & ps); - virtual bool runImpl() override; + bool runImpl() override; protected: virtual DB::PageId genRandomPageId(); @@ -91,11 +93,11 @@ class PSCommonWriter : public PSWriter : PSWriter(ps_, index_) {} - virtual void updatedRandomData() override; + void updatedRandomData() override; - virtual String description() override { return fmt::format("(Stress Test Common Writer {})", index); } + String description() override { return fmt::format("(Stress Test Common Writer {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setBatchBufferNums(size_t numbers); @@ -120,7 +122,7 @@ class PSCommonWriter : public PSWriter DB::PageFieldSizes data_sizes = {}; - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; virtual size_t genBufferSize(); }; @@ -154,7 +156,7 @@ class PSWindowWriter : public PSCommonWriter void setNormalDistributionSigma(size_t sigma); protected: - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; protected: size_t window_size = 100; @@ -170,12 +172,12 @@ class PSIncreaseWriter : public PSCommonWriter String description() override { return fmt::format("(Stress Test Increase Writer {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setPageRange(size_t page_range); protected: - virtual DB::PageId genRandomPageId() override; + DB::PageId genRandomPageId() override; protected: size_t begin_page_id = 1; @@ -192,9 +194,9 @@ class PSReader : public PSRunnable gen.seed(time(nullptr)); } - virtual String description() override { return fmt::format("(Stress Test PSReader {})", index); } + String description() override { return fmt::format("(Stress Test PSReader {})", index); } - virtual bool runImpl() override; + bool runImpl() override; void setPageReadOnce(size_t page_read_once); @@ -242,7 +244,7 @@ class PSWindowReader : public PSReader void setWriterNums(size_t writer_nums); protected: - virtual DB::PageIds genRandomPageIds() override; + DB::PageIds genRandomPageIds() override; protected: size_t window_size = 100; @@ -261,12 +263,13 @@ class PSSnapshotReader : public PSReader : PSReader(ps_, index_) {} - virtual bool runImpl() override; + bool runImpl() override; void setSnapshotGetIntervalMs(size_t snapshot_get_interval_ms_); protected: - size_t snapshots_hold_num; + size_t snapshots_hold_num = 0; size_t snapshot_get_interval_ms = 0; std::list snapshots; -}; \ No newline at end of file +}; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.cpp b/dbms/src/Storages/Page/workload/PSStressEnv.cpp similarity index 97% rename from dbms/src/Storages/Page/stress/PSStressEnv.cpp rename to dbms/src/Storages/Page/workload/PSStressEnv.cpp index 7d680cd43c0..f5cead0a158 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.cpp +++ b/dbms/src/Storages/Page/workload/PSStressEnv.cpp @@ -16,18 +16,20 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include #include #include +namespace DB::PS::tests +{ Poco::Logger * StressEnv::logger; void StressEnv::initGlobalLogger() { @@ -146,3 +148,4 @@ void StressEnv::setup() init_pages = true; setupSignal(); } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSStressEnv.h b/dbms/src/Storages/Page/workload/PSStressEnv.h similarity index 98% rename from dbms/src/Storages/Page/stress/PSStressEnv.h rename to dbms/src/Storages/Page/workload/PSStressEnv.h index 1c7d8ee761f..e67cb325430 100644 --- a/dbms/src/Storages/Page/stress/PSStressEnv.h +++ b/dbms/src/Storages/Page/workload/PSStressEnv.h @@ -25,6 +25,8 @@ namespace Poco class Logger; } +namespace DB::PS::tests +{ using PSPtr = std::shared_ptr; enum StressEnvStat @@ -124,3 +126,4 @@ struct StressEnv void setup(); }; +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSWorkload.cpp b/dbms/src/Storages/Page/workload/PSWorkload.cpp similarity index 98% rename from dbms/src/Storages/Page/stress/PSWorkload.cpp rename to dbms/src/Storages/Page/workload/PSWorkload.cpp index ce1f8d92ce0..81f13527f48 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.cpp +++ b/dbms/src/Storages/Page/workload/PSWorkload.cpp @@ -14,12 +14,14 @@ #include #include -#include #include #include #include +#include #include +namespace DB::PS::tests +{ void StressWorkload::onDumpResult() { UInt64 time_interval = stop_watch.elapsedMilliseconds(); @@ -177,3 +179,4 @@ void StressWorkloadManger::runWorkload() } } } +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/PSWorkload.h b/dbms/src/Storages/Page/workload/PSWorkload.h similarity index 85% rename from dbms/src/Storages/Page/stress/PSWorkload.h rename to dbms/src/Storages/Page/workload/PSWorkload.h index cb099b4203a..26a9c24d6da 100644 --- a/dbms/src/Storages/Page/stress/PSWorkload.h +++ b/dbms/src/Storages/Page/workload/PSWorkload.h @@ -16,15 +16,17 @@ #include #include -#include -#include -#include #include #include #include +#include +#include +#include #include #define NORMAL_WORKLOAD 0 +namespace DB::PS::tests +{ template class StressWorkloadFunc { @@ -45,6 +47,8 @@ class StressWorkloadFunc class StressWorkload { public: + static int mainEntry(int argc, char ** argv); + explicit StressWorkload(StressEnv options_) : options(options_) {} @@ -189,13 +193,15 @@ class StressWorkloadManger StressEnv options; }; -#define REGISTER_WORKLOAD(WORKLOAD) \ - static void __attribute__((constructor)) _work_load_register_named_##WORKLOAD(void) \ - { \ - StressWorkloadManger::getInstance().reg( \ - WORKLOAD::nameFunc(), \ - WORKLOAD::maskFunc(), \ - [](const StressEnv & opts) -> std::shared_ptr { \ - return std::make_shared(opts); \ - }); \ - } +template +void work_load_register() +{ + StressWorkloadManger::getInstance().reg( + Workload::nameFunc(), + Workload::maskFunc(), + [](const StressEnv & opts) -> std::shared_ptr { + return std::make_shared(opts); + }); +} + +} // namespace DB::PS::tests diff --git a/dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h similarity index 97% rename from dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp rename to dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h index 190cbf6b323..337c732e6f7 100644 --- a/dbms/src/Storages/Page/stress/workload/PageStorageInMemoryCapacity.cpp +++ b/dbms/src/Storages/Page/workload/PageStorageInMemoryCapacity.h @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include #include #include - #ifdef __APPLE__ #include @@ -27,6 +26,8 @@ #include #endif +namespace DB::PS::tests +{ class PageStorageInMemoryCapacity : public StressWorkload , public StressWorkloadFunc { @@ -89,14 +90,14 @@ class PageStorageInMemoryCapacity : public StressWorkload } FILE * file = fopen("/proc/meminfo", "r"); - if (file != NULL) + if (file != nullptr) { char buffer[128]; #define MEMORY_TOTAL_LABEL "MemTotal:" while (fgets(buffer, 128, file)) { if ((strncmp((buffer), (MEMORY_TOTAL_LABEL), strlen(MEMORY_TOTAL_LABEL)) == 0) - && sscanf(buffer + strlen(MEMORY_TOTAL_LABEL), " %32llu kB", &total_mem)) + && sscanf(buffer + strlen(MEMORY_TOTAL_LABEL), " %32llu kB", &total_mem)) // NOLINT { break; } @@ -173,5 +174,4 @@ class PageStorageInMemoryCapacity : public StressWorkload std::round(resident_used) ? (total_mem / ((double)resident_used / page_writen)) : 0)); } }; - -REGISTER_WORKLOAD(PageStorageInMemoryCapacity) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp b/dbms/src/Storages/Page/workload/ThousandsOfOffset.h similarity index 98% rename from dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp rename to dbms/src/Storages/Page/workload/ThousandsOfOffset.h index 3a215f76769..0232ea235f1 100644 --- a/dbms/src/Storages/Page/stress/workload/ThousandsOfOffset.cpp +++ b/dbms/src/Storages/Page/workload/ThousandsOfOffset.h @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +namespace DB::PS::tests +{ class ThousandsOfOffset : public StressWorkload , public StressWorkloadFunc { @@ -167,5 +169,4 @@ class ThousandsOfOffset : public StressWorkload return true; } }; - -REGISTER_WORKLOAD(ThousandsOfOffset) \ No newline at end of file +} // namespace DB::PS::tests \ No newline at end of file diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 0dc05674696..1d7c0ace57f 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -34,24 +34,6 @@ #include - -namespace ProfileEvents -{ -extern const Event StorageBufferFlush; -extern const Event StorageBufferErrorOnFlush; -extern const Event StorageBufferPassedAllMinThresholds; -extern const Event StorageBufferPassedTimeMaxThreshold; -extern const Event StorageBufferPassedRowsMaxThreshold; -extern const Event StorageBufferPassedBytesMaxThreshold; -} // namespace ProfileEvents - -namespace CurrentMetrics -{ -extern const Metric StorageBufferRows; -extern const Metric StorageBufferBytes; -} // namespace CurrentMetrics - - namespace DB { namespace ErrorCodes @@ -170,10 +152,6 @@ static void appendBlock(const Block & from, Block & to) to.checkNumberOfRows(); size_t rows = from.rows(); - size_t bytes = from.bytes(); - - CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); - CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes); size_t old_rows = to.rows(); @@ -430,25 +408,21 @@ bool StorageBuffer::checkThresholdsImpl(size_t rows, size_t bytes, time_t time_p { if (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedAllMinThresholds); return true; } if (time_passed > max_thresholds.time) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedTimeMaxThreshold); return true; } if (rows > max_thresholds.rows) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedRowsMaxThreshold); return true; } if (bytes > max_thresholds.bytes) { - ProfileEvents::increment(ProfileEvents::StorageBufferPassedBytesMaxThreshold); return true; } @@ -495,11 +469,6 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) buffer.data.swap(block_to_write); buffer.first_write_time = 0; - CurrentMetrics::sub(CurrentMetrics::StorageBufferRows, block_to_write.rows()); - CurrentMetrics::sub(CurrentMetrics::StorageBufferBytes, block_to_write.bytes()); - - ProfileEvents::increment(ProfileEvents::StorageBufferFlush); - LOG_FMT_TRACE(log, "Flushing buffer with {} rows, {} bytes, age {} seconds.", rows, bytes, time_passed); if (no_destination) @@ -517,13 +486,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) } catch (...) { - ProfileEvents::increment(ProfileEvents::StorageBufferErrorOnFlush); - /// Return the block to its place in the buffer. - - CurrentMetrics::add(CurrentMetrics::StorageBufferRows, block_to_write.rows()); - CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, block_to_write.bytes()); - buffer.data.swap(block_to_write); if (!buffer.first_write_time) diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp deleted file mode 100644 index 317cac21d52..00000000000 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_TEXT; - extern const int DATABASE_ACCESS_DENIED; -} - -namespace -{ -class CatBoostDatasetBlockInputStream : public IProfilingBlockInputStream -{ -public: - - CatBoostDatasetBlockInputStream(const std::string & file_name, const std::string & format_name, - const Block & sample_block, const Context & context, size_t max_block_size) - : file_name(file_name), format_name(format_name) - { - read_buf = std::make_unique(file_name); - reader = FormatFactory().getInput(format_name, *read_buf, sample_block, context, max_block_size); - } - - String getName() const override - { - return "CatBoostDataset"; - } - - Block readImpl() override - { - return reader->read(); - } - - void readPrefixImpl() override - { - reader->readPrefix(); - } - - void readSuffixImpl() override - { - reader->readSuffix(); - } - - Block getHeader() const override { return sample_block; }; - -private: - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; - std::string file_name; - std::string format_name; -}; - -} - -static boost::filesystem::path canonicalPath(std::string && path) -{ - return boost::filesystem::canonical(boost::filesystem::path(path)); -} - -static std::string resolvePath(const boost::filesystem::path & base_path, std::string && path) -{ - boost::filesystem::path resolved_path(path); - if (!resolved_path.is_absolute()) - return (base_path / resolved_path).string(); - return resolved_path.string(); -} - -static void checkCreationIsAllowed(const String & base_path, const String & path) -{ - if (base_path != path.substr(0, base_path.size())) - throw Exception( - "Using file descriptor or user specified path as source of storage isn't allowed for server daemons", - ErrorCodes::DATABASE_ACCESS_DENIED); -} - - -StorageCatBoostPool::StorageCatBoostPool(const Context & context, - String column_description_file_name_, - String data_description_file_name_) - : column_description_file_name(std::move(column_description_file_name_)), - data_description_file_name(std::move(data_description_file_name_)) -{ - auto base_path = canonicalPath(context.getPath()); - column_description_file_name = resolvePath(base_path, std::move(column_description_file_name)); - data_description_file_name = resolvePath(base_path, std::move(data_description_file_name)); - if (context.getApplicationType() == Context::ApplicationType::SERVER) - { - const auto & base_path_str = base_path.string(); - checkCreationIsAllowed(base_path_str, column_description_file_name); - checkCreationIsAllowed(base_path_str, data_description_file_name); - } - - parseColumnDescription(); - createSampleBlockAndColumns(); -} - -std::string StorageCatBoostPool::getColumnTypesString(const ColumnTypesMap & columnTypesMap) -{ - std::string types_string; - bool first = true; - for (const auto & value : columnTypesMap) - { - if (!first) - types_string.append(", "); - - first = false; - types_string += value.first; - } - - return types_string; -} - -void StorageCatBoostPool::checkDatasetDescription() -{ - std::ifstream in(data_description_file_name); - if (!in.good()) - throw Exception("Cannot open file: " + data_description_file_name, ErrorCodes::CANNOT_OPEN_FILE); - - std::string line; - if (!std::getline(in, line)) - throw Exception("File is empty: " + data_description_file_name, ErrorCodes::CANNOT_PARSE_TEXT); - - size_t columns_count = 1; - for (char sym : line) - if (sym == '\t') - ++columns_count; - - columns_description.resize(columns_count); -} - -void StorageCatBoostPool::parseColumnDescription() -{ - /// NOTE: simple parsing - /// TODO: use ReadBufferFromFile - - checkDatasetDescription(); - - std::ifstream in(column_description_file_name); - if (!in.good()) - throw Exception("Cannot open file: " + column_description_file_name, ErrorCodes::CANNOT_OPEN_FILE); - - std::string line; - size_t line_num = 0; - auto column_types_map = getColumnTypesMap(); - auto column_types_string = getColumnTypesString(column_types_map); - - /// Enumerate default names for columns as Auxiliary, Auxiliary1, Auxiliary2, ... - std::map columns_per_type_count; - - while (std::getline(in, line)) - { - ++line_num; - std::string str_line_num = std::to_string(line_num); - - if (line.empty()) - continue; - - std::istringstream iss(line); - std::vector tokens; - std::string token; - while (std::getline(iss, token, '\t')) - tokens.push_back(token); - - if (tokens.size() != 2 && tokens.size() != 3) - throw Exception("Cannot parse column description at line " + str_line_num + " '" + line + "' " - + ": expected 2 or 3 columns, got " + std::to_string(tokens.size()), - ErrorCodes::CANNOT_PARSE_TEXT); - - std::string str_id = tokens[0]; - std::string col_type = tokens[1]; - std::string col_alias = tokens.size() > 2 ? tokens[2] : ""; - - size_t num_id; - try - { - num_id = std::stoull(str_id); - } - catch (std::exception & e) - { - throw Exception("Cannot parse column index at row " + str_line_num + ": " + e.what(), - ErrorCodes::CANNOT_PARSE_TEXT); - } - - if (num_id >= columns_description.size()) - throw Exception("Invalid index at row " + str_line_num + ": " + str_id - + ", expected in range [0, " + std::to_string(columns_description.size()) + ")", - ErrorCodes::CANNOT_PARSE_TEXT); - - if (column_types_map.count(col_type) == 0) - throw Exception("Invalid column type: " + col_type + ", expected: " + column_types_string, - ErrorCodes::CANNOT_PARSE_TEXT); - - auto type = column_types_map[col_type]; - - std::string col_name; - - bool is_feature_column = type == DatasetColumnType::Num || type == DatasetColumnType::Categ; - auto & col_number = columns_per_type_count[type]; - /// If column is not feature skip '0' after the name (to use 'Target' instead of 'Target0'). - col_name = col_type + (is_feature_column || col_number ? std::to_string(col_number) : ""); - ++col_number; - - columns_description[num_id] = ColumnDescription(col_name, col_alias, type); - } -} - -void StorageCatBoostPool::createSampleBlockAndColumns() -{ - ColumnsDescription columns; - NamesAndTypesList cat_columns; - NamesAndTypesList num_columns; - sample_block.clear(); - for (auto & desc : columns_description) - { - DataTypePtr type; - if (desc.column_type == DatasetColumnType::Categ - || desc.column_type == DatasetColumnType::Auxiliary - || desc.column_type == DatasetColumnType::DocId) - type = std::make_shared(); - else - type = std::make_shared(); - - if (desc.column_type == DatasetColumnType::Categ) - cat_columns.emplace_back(desc.column_name, type); - else if (desc.column_type == DatasetColumnType::Num) - num_columns.emplace_back(desc.column_name, type); - else - columns.materialized.emplace_back(desc.column_name, type); - - if (!desc.alias.empty()) - { - auto alias = std::make_shared(desc.column_name); - columns.defaults[desc.alias] = {ColumnDefaultKind::Alias, alias}; - columns.aliases.emplace_back(desc.alias, type); - } - - sample_block.insert(ColumnWithTypeAndName(type, desc.column_name)); - } - columns.ordinary.insert(columns.ordinary.end(), num_columns.begin(), num_columns.end()); - columns.ordinary.insert(columns.ordinary.end(), cat_columns.begin(), cat_columns.end()); - - setColumns(columns); -} - -BlockInputStreams StorageCatBoostPool::read(const Names & column_names, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t max_block_size, - unsigned /*threads*/) -{ - auto stream = std::make_shared( - data_description_file_name, "TSV", sample_block, context, max_block_size); - - auto filter_stream = std::make_shared(stream, column_names, false); - return { filter_stream }; -} - -} diff --git a/dbms/src/Storages/StorageCatBoostPool.h b/dbms/src/Storages/StorageCatBoostPool.h deleted file mode 100644 index 0f4f7c2cede..00000000000 --- a/dbms/src/Storages/StorageCatBoostPool.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include - -namespace DB -{ -class StorageCatBoostPool : public ext::SharedPtrHelper - , public IStorage -{ -public: - std::string getName() const override { return "CatBoostPool"; } - - std::string getTableName() const override { return table_name; } - - BlockInputStreams read(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size, - unsigned threads) override; - -private: - String table_name; - - String column_description_file_name; - String data_description_file_name; - Block sample_block; - - enum class DatasetColumnType - { - Target, - Num, - Categ, - Auxiliary, - DocId, - Weight, - Baseline - }; - - using ColumnTypesMap = std::map; - - ColumnTypesMap getColumnTypesMap() const - { - return { - {"Target", DatasetColumnType::Target}, - {"Num", DatasetColumnType::Num}, - {"Categ", DatasetColumnType::Categ}, - {"Auxiliary", DatasetColumnType::Auxiliary}, - {"DocId", DatasetColumnType::DocId}, - {"Weight", DatasetColumnType::Weight}, - {"Baseline", DatasetColumnType::Baseline}, - }; - }; - - std::string getColumnTypesString(const ColumnTypesMap & columnTypesMap); - - struct ColumnDescription - { - std::string column_name; - std::string alias; - DatasetColumnType column_type; - - ColumnDescription() - : column_type(DatasetColumnType::Num) - {} - ColumnDescription(std::string column_name, std::string alias, DatasetColumnType column_type) - : column_name(std::move(column_name)) - , alias(std::move(alias)) - , column_type(column_type) - {} - }; - - std::vector columns_description; - - void checkDatasetDescription(); - void parseColumnDescription(); - void createSampleBlockAndColumns(); - -protected: - StorageCatBoostPool(const Context & context, String column_description_file_name, String data_description_file_name); -}; - -} // namespace DB diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 67d32c73a05..a6de4efb3ac 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -775,12 +775,12 @@ void StorageDeltaMerge::checkStatus(const Context & context) void StorageDeltaMerge::flushCache(const Context & context) { - flushCache(context, DM::RowKeyRange::newAll(is_common_handle, rowkey_column_size)); + flushCache(context, DM::RowKeyRange::newAll(is_common_handle, rowkey_column_size), /* try_until_succeed */ true); } -void StorageDeltaMerge::flushCache(const Context & context, const DM::RowKeyRange & range_to_flush) +bool StorageDeltaMerge::flushCache(const Context & context, const DM::RowKeyRange & range_to_flush, bool try_until_succeed) { - getAndMaybeInitStore()->flushCache(context, range_to_flush); + return getAndMaybeInitStore()->flushCache(context, range_to_flush, try_until_succeed); } void StorageDeltaMerge::mergeDelta(const Context & context) diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 79ee225d237..9e4ab12ad4f 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -73,7 +73,7 @@ class StorageDeltaMerge void flushCache(const Context & context) override; - void flushCache(const Context & context, const DM::RowKeyRange & range_to_flush) override; + bool flushCache(const Context & context, const DM::RowKeyRange & range_to_flush, bool try_until_succeed) override; /// Merge delta into the stable layer for all segments. /// diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp deleted file mode 100644 index 4dec4fd5ea0..00000000000 --- a/dbms/src/Storages/StorageFile.cpp +++ /dev/null @@ -1,348 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_SEEK_THROUGH_FILE; - extern const int DATABASE_ACCESS_DENIED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_IDENTIFIER; - extern const int INCORRECT_FILE_NAME; - extern const int FILE_DOESNT_EXIST; - extern const int EMPTY_LIST_OF_COLUMNS_PASSED; -}; - - -static std::string getTablePath(const std::string & db_dir_path, const std::string & table_name, const std::string & format_name) -{ - return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); -} - -/// Both db_dir_path and table_path must be converted to absolute paths (in particular, path cannot contain '..'). -static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, int table_fd) -{ - if (context_global.getApplicationType() != Context::ApplicationType::SERVER) - return; - - if (table_fd >= 0) - throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - else if (!startsWith(table_path, db_dir_path)) - throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - Poco::File table_path_poco_file = Poco::File(table_path); - if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exist", ErrorCodes::FILE_DOESNT_EXIST); - else if (table_path_poco_file.isDirectory()) - throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); -} - - -StorageFile::StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_) - : IStorage(columns_), - table_name(table_name_), format_name(format_name_), context_global(context_), table_fd(table_fd_) -{ - if (table_fd < 0) /// Will use file - { - use_table_fd = false; - - if (!table_path_.empty()) /// Is user's file - { - Poco::Path poco_path = Poco::Path(table_path_); - if (poco_path.isRelative()) - poco_path = Poco::Path(db_dir_path, poco_path); - - path = poco_path.absolute().toString(); - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - is_db_table = false; - } - else /// Is DB's file - { - if (db_dir_path.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); - - path = getTablePath(db_dir_path, table_name, format_name); - is_db_table = true; - Poco::File(Poco::Path(path).parent()).createDirectories(); - } - } - else /// Will use FD - { - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - - is_db_table = false; - use_table_fd = true; - - /// Save initial offset, it will be used for repeating SELECTs - /// If FD isn't seekable (lseek returns -1), then the second and subsequent SELECTs will fail. - table_fd_init_offset = lseek(table_fd, 0, SEEK_CUR); - } -} - - -class StorageFileBlockInputStream : public IProfilingBlockInputStream -{ -public: - StorageFileBlockInputStream(StorageFile & storage_, const Context & context, size_t max_block_size) - : storage(storage_) - { - if (storage.use_table_fd) - { - storage.rwlock.lock(); - - /// We could use common ReadBuffer and WriteBuffer in storage to leverage cache - /// and add ability to seek unseekable files, but cache sync isn't supported. - - if (storage.table_fd_was_used) /// We need seek to initial position - { - if (storage.table_fd_init_offset < 0) - throw Exception("File descriptor isn't seekable, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - - /// ReadBuffer's seek() doesn't make sence, since cache is empty - if (lseek(storage.table_fd, storage.table_fd_init_offset, SEEK_SET) < 0) - throwFromErrno("Cannot seek file descriptor, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - } - - storage.table_fd_was_used = true; - read_buf = std::make_unique(storage.table_fd); - } - else - { - storage.rwlock.lock_shared(); - - read_buf = std::make_unique(storage.path); - } - - reader = FormatFactory().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - } - - ~StorageFileBlockInputStream() override - { - if (storage.use_table_fd) - storage.rwlock.unlock(); - else - storage.rwlock.unlock_shared(); - } - - String getName() const override - { - return storage.getName(); - } - - Block readImpl() override - { - return reader->read(); - } - - Block getHeader() const override { return reader->getHeader(); }; - - void readPrefixImpl() override - { - reader->readPrefix(); - } - - void readSuffixImpl() override - { - reader->readSuffix(); - } - -private: - StorageFile & storage; - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; -}; - - -BlockInputStreams StorageFile::read( - const Names & /*column_names*/, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t max_block_size, - unsigned /*num_streams*/) -{ - return BlockInputStreams(1, std::make_shared(*this, context, max_block_size)); -} - - -class StorageFileBlockOutputStream : public IBlockOutputStream -{ -public: - explicit StorageFileBlockOutputStream(StorageFile & storage_) - : storage(storage_), lock(storage.rwlock) - { - if (storage.use_table_fd) - { - /** NOTE: Using real file binded to FD may be misleading: - * SELECT *; INSERT insert_data; SELECT *; last SELECT returns initil_fd_data + insert_data - * INSERT data; SELECT *; last SELECT returns only insert_data - */ - storage.table_fd_was_used = true; - write_buf = std::make_unique(storage.table_fd); - } - else - { - write_buf = std::make_unique(storage.path, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT); - } - - writer = FormatFactory().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global); - } - - Block getHeader() const override { return storage.getSampleBlock(); } - - void write(const Block & block) override - { - writer->write(block); - } - - void writePrefix() override - { - writer->writePrefix(); - } - - void writeSuffix() override - { - writer->writeSuffix(); - } - - void flush() override - { - writer->flush(); - } - -private: - StorageFile & storage; - std::unique_lock lock; - std::unique_ptr write_buf; - BlockOutputStreamPtr writer; -}; - -BlockOutputStreamPtr StorageFile::write( - const ASTPtr & /*query*/, - const Settings & /*settings*/) -{ - return std::make_shared(*this); -} - - -void StorageFile::drop() -{ - /// Extra actions are not required. -} - - -void StorageFile::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) -{ - if (!is_db_table) - throw Exception("Can't rename table '" + table_name + "' binded to user-defined file (or FD)", ErrorCodes::DATABASE_ACCESS_DENIED); - - std::unique_lock lock(rwlock); - - std::string path_new = getTablePath(new_path_to_db, new_table_name, format_name); - Poco::File(Poco::Path(path_new).parent()).createDirectories(); - Poco::File(path).renameTo(path_new); - - path = std::move(path_new); -} - - -void registerStorageFile(StorageFactory & factory) -{ - factory.registerStorage("File", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (!(engine_args.size() == 1 || engine_args.size() == 2)) - throw Exception( - "Storage File requires 1 or 2 arguments: name of used format and source.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); - String format_name = static_cast(*engine_args[0]).value.safeGet(); - - int source_fd = -1; - String source_path; - if (engine_args.size() >= 2) - { - /// Will use FD if engine_args[1] is int literal or identifier with std* name - - if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) - { - if (identifier->name == "stdin") - source_fd = STDIN_FILENO; - else if (identifier->name == "stdout") - source_fd = STDOUT_FILENO; - else if (identifier->name == "stderr") - source_fd = STDERR_FILENO; - else - throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", - ErrorCodes::UNKNOWN_IDENTIFIER); - } - else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) - { - auto type = literal->value.getType(); - if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::String) - source_path = literal->value.get(); - } - } - - return StorageFile::create( - source_path, source_fd, - args.data_path, - args.table_name, format_name, args.columns, - args.context); - }); -} - -} diff --git a/dbms/src/Storages/StorageFile.h b/dbms/src/Storages/StorageFile.h deleted file mode 100644 index ca46f7f366e..00000000000 --- a/dbms/src/Storages/StorageFile.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ -class StorageFileBlockInputStream; -class StorageFileBlockOutputStream; - -class StorageFile : public ext::SharedPtrHelper - , public IStorage -{ -public: - std::string getName() const override - { - return "File"; - } - - std::string getTableName() const override - { - return table_name; - } - - BlockInputStreams read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size, - unsigned num_streams) override; - - BlockOutputStreamPtr write( - const ASTPtr & query, - const Settings & settings) override; - - void drop() override; - - void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - - String getDataPath() const override { return path; } - -protected: - friend class StorageFileBlockInputStream; - friend class StorageFileBlockOutputStream; - - /** there are three options (ordered by priority): - - use specified file descriptor if (fd >= 0) - - use specified table_path if it isn't empty - - create own table inside data/db/table/ - */ - StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_); - -private: - std::string table_name; - std::string format_name; - Context & context_global; - - std::string path; - int table_fd = -1; - - bool is_db_table = true; /// Table is stored in real database, not user's file - bool use_table_fd = false; /// Use table_fd insted of path - std::atomic table_fd_was_used{false}; /// To detect repeating reads from stdin - off_t table_fd_init_offset = -1; /// Initial position of fd, used for repeating reads - - mutable std::shared_mutex rwlock; - - Poco::Logger * log = &Poco::Logger::get("StorageFile"); -}; - -} // namespace DB diff --git a/dbms/src/Storages/Transaction/Collator.cpp b/dbms/src/Storages/Transaction/Collator.cpp index a9b4d0784be..1b0221a6829 100644 --- a/dbms/src/Storages/Transaction/Collator.cpp +++ b/dbms/src/Storages/Transaction/Collator.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -29,17 +30,10 @@ TiDBCollators dummy_collators; std::vector dummy_sort_key_contaners; std::string dummy_sort_key_contaner; -std::string_view rtrim(const char * s, size_t length) +ALWAYS_INLINE std::string_view rtrim(const char * s, size_t length) { auto v = std::string_view(s, length); - size_t end = v.find_last_not_of(' '); - return end == std::string_view::npos ? "" : v.substr(0, end + 1); -} - -template -int signum(T val) -{ - return (0 < val) - (val < 0); + return DB::RightTrim(v); } using Rune = int32_t; @@ -183,26 +177,26 @@ class Pattern : public ITiDBCollator::IPattern }; template -class BinCollator : public ITiDBCollator +class BinCollator final : public ITiDBCollator { public: explicit BinCollator(int32_t id) : ITiDBCollator(id) {} + int compare(const char * s1, size_t length1, const char * s2, size_t length2) const override { if constexpr (padding) - return signum(rtrim(s1, length1).compare(rtrim(s2, length2))); + return DB::RtrimStrCompare({s1, length1}, {s2, length2}); else - return signum(std::string_view(s1, length1).compare(std::string_view(s2, length2))); + return DB::RawStrCompare({s1, length1}, {s2, length2}); } StringRef sortKey(const char * s, size_t length, std::string &) const override { if constexpr (padding) { - auto v = rtrim(s, length); - return StringRef(v.data(), v.length()); + return StringRef(rtrim(s, length)); } else { @@ -249,7 +243,7 @@ using WeightType = uint16_t; extern const std::array weight_lut; } // namespace GeneralCI -class GeneralCICollator : public ITiDBCollator +class GeneralCICollator final : public ITiDBCollator { public: explicit GeneralCICollator(int32_t id) @@ -270,7 +264,7 @@ class GeneralCICollator : public ITiDBCollator auto sk2 = weight(c2); auto cmp = sk1 - sk2; if (cmp != 0) - return signum(cmp); + return DB::signum(cmp); } return (offset1 < v1.length()) - (offset2 < v2.length()); @@ -365,7 +359,7 @@ const std::array weight_lut_long = { } // namespace UnicodeCI -class UnicodeCICollator : public ITiDBCollator +class UnicodeCICollator final : public ITiDBCollator { public: explicit UnicodeCICollator(int32_t id) @@ -420,7 +414,7 @@ class UnicodeCICollator : public ITiDBCollator } else { - return signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); + return DB::signum(static_cast(s1_first & 0xFFFF) - static_cast(s2_first & 0xFFFF)); } } } @@ -593,6 +587,8 @@ class UnicodeCICollator : public ITiDBCollator friend class Pattern; }; +using UTF8MB4_BIN_TYPE = BinCollator; + TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) { switch (id) @@ -607,10 +603,10 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) static const auto latin1_collator = BinCollator(LATIN1_BIN); return &latin1_collator; case ITiDBCollator::UTF8MB4_BIN: - static const auto utf8mb4_collator = BinCollator(UTF8MB4_BIN); + static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN); return &utf8mb4_collator; case ITiDBCollator::UTF8_BIN: - static const auto utf8_collator = BinCollator(UTF8_BIN); + static const auto utf8_collator = UTF8MB4_BIN_TYPE(UTF8_BIN); return &utf8_collator; case ITiDBCollator::UTF8_GENERAL_CI: static const auto utf8_general_ci_collator = GeneralCICollator(UTF8_GENERAL_CI); diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index c636d9e60ab..b0cacefe6f4 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -77,10 +77,12 @@ struct DecodingStorageSchemaSnapshot , decoding_schema_version{decoding_schema_version_} { std::unordered_map column_lut; + std::unordered_map column_name_id_map; for (size_t i = 0; i < table_info_.columns.size(); i++) { const auto & ci = table_info_.columns[i]; column_lut.emplace(ci.id, i); + column_name_id_map.emplace(ci.name, ci.id); } for (size_t i = 0; i < column_defines->size(); i++) { @@ -88,7 +90,7 @@ struct DecodingStorageSchemaSnapshot sorted_column_id_with_pos.insert({cd.id, i}); if (cd.id != TiDBPkColumnID && cd.id != VersionColumnID && cd.id != DelMarkColumnID) { - auto & columns = table_info_.columns; + const auto & columns = table_info_.columns; column_infos.push_back(columns[column_lut.at(cd.id)]); } else @@ -100,10 +102,14 @@ struct DecodingStorageSchemaSnapshot // create pk related metadata if needed if (is_common_handle) { - const auto & primary_index_info = table_info_.getPrimaryIndexInfo(); - for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + /// we will not update the IndexInfo except Rename DDL. + /// When the add column / drop column action happenes, the offset of each column may change + /// Thus, we should not use offset to get the column we want, + /// but use to compare the column name to get the column id. + const auto & primary_index_cols = table_info_.getPrimaryIndexInfo().idx_cols; + for (const auto & col : primary_index_cols) { - auto pk_column_id = table_info_.columns[primary_index_info.idx_cols[i].offset].id; + auto pk_column_id = column_name_id_map[col.name]; pk_column_ids.emplace_back(pk_column_id); pk_pos_map.emplace(pk_column_id, reinterpret_cast(std::numeric_limits::max())); } @@ -125,11 +131,11 @@ struct DecodingStorageSchemaSnapshot { auto pk_pos_iter = pk_pos_map.begin(); size_t column_pos_in_block = 0; - for (auto iter = sorted_column_id_with_pos.begin(); iter != sorted_column_id_with_pos.end(); iter++) + for (auto & column_id_with_pos : sorted_column_id_with_pos) { if (pk_pos_iter == pk_pos_map.end()) break; - if (pk_pos_iter->first == iter->first) + if (pk_pos_iter->first == column_id_with_pos.first) { pk_pos_iter->second = column_pos_in_block; pk_pos_iter++; diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 318a04c6ed9..fb31e4476bb 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -129,7 +129,7 @@ void KVStore::traverseRegions(std::function & callback(region.first, region.second); } -void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log) +bool KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log, bool try_until_succeed) { auto table_id = region.getMappedTableID(); auto storage = tmt.getStorages().get(table_id); @@ -139,7 +139,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi "tryFlushRegionCacheInStorage can not get table for region {} with table id {}, ignored", region.toString(), table_id); - return; + return true; } try @@ -151,7 +151,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi region.getRange()->getMappedTableID(), storage->isCommonHandle(), storage->getRowKeyColumnSize()); - storage->flushCache(tmt.getContext(), rowkey_range); + return storage->flushCache(tmt.getContext(), rowkey_range, try_until_succeed); } catch (DB::Exception & e) { @@ -159,6 +159,7 @@ void KVStore::tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & regi if (e.code() != ErrorCodes::TABLE_IS_DROPPED) throw; } + return true; } void KVStore::tryPersist(RegionID region_id) @@ -326,6 +327,64 @@ void KVStore::persistRegion(const Region & region, const RegionTaskLock & region LOG_FMT_DEBUG(log, "Persist {} done", region.toString(false)); } +bool KVStore::needFlushRegionData(UInt64 region_id, TMTContext & tmt) +{ + auto region_task_lock = region_manager.genRegionTaskLock(region_id); + const RegionPtr curr_region_ptr = getRegion(region_id); + return canFlushRegionDataImpl(curr_region_ptr, false, false, tmt, region_task_lock); +} + +bool KVStore::tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt) +{ + auto region_task_lock = region_manager.genRegionTaskLock(region_id); + const RegionPtr curr_region_ptr = getRegion(region_id); + return canFlushRegionDataImpl(curr_region_ptr, true, try_until_succeed, tmt, region_task_lock); +} + +bool KVStore::canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock) +{ + if (curr_region_ptr == nullptr) + { + throw Exception(fmt::format("region not found when trying flush", ErrorCodes::LOGICAL_ERROR)); + } + auto & curr_region = *curr_region_ptr; + + auto [rows, size_bytes] = curr_region.getApproxMemCacheInfo(); + + LOG_FMT_DEBUG(log, "{} approx mem cache info: rows {}, bytes {}", curr_region.toString(false), rows, size_bytes); + + bool can_flush = false; + if (rows >= region_compact_log_min_rows.load(std::memory_order_relaxed) + || size_bytes >= region_compact_log_min_bytes.load(std::memory_order_relaxed)) + { + // if rows or bytes more than threshold, flush cache and persist mem data. + can_flush = true; + } + else + { + // if there is little data in mem, wait until time interval reached threshold. + // use random period so that lots of regions will not be persisted at same time. + auto compact_log_period = std::rand() % region_compact_log_period.load(std::memory_order_relaxed); // NOLINT + can_flush = !(curr_region.lastCompactLogTime() + Seconds{compact_log_period} > Clock::now()); + } + if (can_flush && flush_if_possible) + { + LOG_FMT_DEBUG(log, "{} flush region due to can_flush_data", curr_region.toString(false)); + if (tryFlushRegionCacheInStorage(tmt, curr_region, log, try_until_succeed)) + { + persistRegion(curr_region, region_task_lock, "compact raft log"); + curr_region.markCompactLog(); + curr_region.cleanApproxMemCacheInfo(); + return true; + } + else + { + return false; + } + } + return can_flush; +} + EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( raft_cmdpb::AdminCmdType cmd_type, UInt64 curr_region_id, @@ -359,32 +418,12 @@ EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( } else { - auto [rows, size_bytes] = curr_region.getApproxMemCacheInfo(); - - LOG_FMT_DEBUG(log, "{} approx mem cache info: rows {}, bytes {}", curr_region.toString(false), rows, size_bytes); - - if (rows >= region_compact_log_min_rows.load(std::memory_order_relaxed) - || size_bytes >= region_compact_log_min_bytes.load(std::memory_order_relaxed)) - { - // if rows or bytes more than threshold, flush cache and perist mem data. - return true; - } - else - { - // if thhere is little data in mem, wait until time interval reached threshold. - // use random period so that lots of regions will not be persisted at same time. - auto compact_log_period = std::rand() % region_compact_log_period.load(std::memory_order_relaxed); // NOLINT - return !(curr_region.lastCompactLogTime() + Seconds{compact_log_period} > Clock::now()); - } + return canFlushRegionDataImpl(curr_region_ptr, true, /* try_until_succeed */ false, tmt, region_task_lock); } }; if (check_sync_log()) { - tryFlushRegionCacheInStorage(tmt, curr_region, log); - persistRegion(curr_region, region_task_lock, "compact raft log"); - curr_region.markCompactLog(); - curr_region.cleanApproxMemCacheInfo(); return EngineStoreApplyRes::Persist; } return EngineStoreApplyRes::None; diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index bb45e65d18b..b58083557a1 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -91,7 +91,7 @@ class KVStore final : private boost::noncopyable void tryPersist(RegionID region_id); - static void tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log); + static bool tryFlushRegionCacheInStorage(TMTContext & tmt, const Region & region, Poco::Logger * log, bool try_until_succeed = true); size_t regionSize() const; EngineStoreApplyRes handleAdminRaftCmd(raft_cmdpb::AdminRequest && request, @@ -108,6 +108,9 @@ class KVStore final : private boost::noncopyable TMTContext & tmt); EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 region_id, UInt64 index, UInt64 term, TMTContext & tmt); + bool needFlushRegionData(UInt64 region_id, TMTContext & tmt); + bool tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt); + void handleApplySnapshot(metapb::Region && region, uint64_t peer_id, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); std::vector /* */ preHandleSnapshotToFiles( @@ -219,6 +222,11 @@ class KVStore final : private boost::noncopyable UInt64 term, TMTContext & tmt); + /// Notice that if flush_if_possible is set to false, we only check if a flush is allowed by rowsize/size/interval. + /// It will not check if a flush will eventually succeed. + /// In other words, `canFlushRegionDataImpl(flush_if_possible=true)` can return false. + bool canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock); + void persistRegion(const Region & region, const RegionTaskLock & region_task_lock, const char * caller); void releaseReadIndexWorkers(); void handleDestroy(UInt64 region_id, TMTContext & tmt, const KVStoreTaskLock &); diff --git a/dbms/src/Storages/Transaction/PDTiKVClient.cpp b/dbms/src/Storages/Transaction/PDTiKVClient.cpp index 5a4b751fd9c..a06f1a3ae64 100644 --- a/dbms/src/Storages/Transaction/PDTiKVClient.cpp +++ b/dbms/src/Storages/Transaction/PDTiKVClient.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Timestamp PDClientHelper::cached_gc_safe_point = 0; -std::chrono::time_point PDClientHelper::safe_point_last_update_time; +std::atomic PDClientHelper::cached_gc_safe_point = 0; +std::atomic> PDClientHelper::safe_point_last_update_time; } // namespace DB diff --git a/dbms/src/Storages/Transaction/PDTiKVClient.h b/dbms/src/Storages/Transaction/PDTiKVClient.h index 4986c28f4ac..e5801cc7fae 100644 --- a/dbms/src/Storages/Transaction/PDTiKVClient.h +++ b/dbms/src/Storages/Transaction/PDTiKVClient.h @@ -29,6 +29,8 @@ #include #include +#include + // We define a shared ptr here, because TMTContext / SchemaSyncer / IndexReader all need to // `share` the resource of cluster. using KVClusterPtr = std::shared_ptr; @@ -49,7 +51,7 @@ struct PDClientHelper { // In case we cost too much to update safe point from PD. std::chrono::time_point now = std::chrono::system_clock::now(); - const auto duration = std::chrono::duration_cast(now - safe_point_last_update_time); + const auto duration = std::chrono::duration_cast(now - safe_point_last_update_time.load()); const auto min_interval = std::max(Int64(1), safe_point_update_interval_seconds); // at least one second if (duration.count() < min_interval) return cached_gc_safe_point; @@ -73,8 +75,8 @@ struct PDClientHelper } private: - static Timestamp cached_gc_safe_point; - static std::chrono::time_point safe_point_last_update_time; + static std::atomic cached_gc_safe_point; + static std::atomic> safe_point_last_update_time; }; diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index 4b2ca6c07a8..cf151c4270d 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -39,6 +40,8 @@ namespace FailPoints extern const char pause_before_apply_raft_cmd[]; extern const char pause_before_apply_raft_snapshot[]; extern const char force_set_safepoint_when_decode_block[]; +extern const char unblock_query_init_after_write[]; +extern const char pause_query_init[]; } // namespace FailPoints namespace ErrorCodes @@ -150,6 +153,7 @@ static void writeRegionDataToStorage( default: throw Exception("Unknown StorageEngine: " + toString(static_cast(storage->engineType())), ErrorCodes::LOGICAL_ERROR); } + write_part_cost = watch.elapsedMilliseconds(); GET_METRIC(tiflash_raft_write_data_to_storage_duration_seconds, type_write).Observe(write_part_cost / 1000.0); if (need_decode) @@ -164,10 +168,20 @@ static void writeRegionDataToStorage( /// decoding data. Check the test case for more details. FAIL_POINT_PAUSE(FailPoints::pause_before_apply_raft_cmd); + /// disable pause_query_init when the write action finish, to make the query action continue. + /// the usage of unblock_query_init_after_write and pause_query_init can refer to InterpreterSelectQuery::init + SCOPE_EXIT({ + fiu_do_on(FailPoints::unblock_query_init_after_write, { + FailPointHelper::disableFailPoint(FailPoints::pause_query_init); + }); + }); + /// Try read then write once. { if (atomic_read_write(false)) + { return; + } } /// If first try failed, sync schema and force read then write. @@ -176,10 +190,12 @@ static void writeRegionDataToStorage( tmt.getSchemaSyncer()->syncSchemas(context); if (!atomic_read_write(true)) + { // Failure won't be tolerated this time. // TODO: Enrich exception message. throw Exception("Write region " + std::to_string(region->id()) + " to table " + std::to_string(table_id) + " failed", ErrorCodes::LOGICAL_ERROR); + } } } diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index 8a40ca9b15e..d4ba50d5714 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -128,6 +128,34 @@ EngineStoreApplyRes HandleAdminRaftCmd( } } +uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id) +{ + try + { + auto & kvstore = server->tmt->getKVStore(); + return kvstore->needFlushRegionData(region_id, *server->tmt); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + exit(-1); + } +} + +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed) +{ + try + { + auto & kvstore = server->tmt->getKVStore(); + return kvstore->tryFlushRegionData(region_id, until_succeed, *server->tmt); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + exit(-1); + } +} + static_assert(sizeof(RaftStoreProxyFFIHelper) == sizeof(TiFlashRaftProxyHelper)); static_assert(alignof(RaftStoreProxyFFIHelper) == alignof(TiFlashRaftProxyHelper)); diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index e1c01599275..aafe4b375eb 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -125,6 +125,8 @@ EngineStoreApplyRes HandleAdminRaftCmd( EngineStoreApplyRes HandleWriteRaftCmd(const EngineStoreServerWrap * server, WriteCmdsView cmds, RaftCmdHeader header); +uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id); +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed); void AtomicUpdateProxy(EngineStoreServerWrap * server, RaftStoreProxyFFIHelper * proxy); void HandleDestroy(EngineStoreServerWrap * server, uint64_t region_id); EngineStoreApplyRes HandleIngestSST(EngineStoreServerWrap * server, SSTViewVec snaps, RaftCmdHeader header); @@ -158,6 +160,8 @@ inline EngineStoreServerHelper GetEngineStoreServerHelper( .fn_gen_cpp_string = GenCppRawString, .fn_handle_write_raft_cmd = HandleWriteRaftCmd, .fn_handle_admin_raft_cmd = HandleAdminRaftCmd, + .fn_need_flush_data = NeedFlushData, + .fn_try_flush_data = TryFlushData, .fn_atomic_update_proxy = AtomicUpdateProxy, .fn_handle_destroy = HandleDestroy, .fn_handle_ingest_sst = HandleIngestSST, diff --git a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp index dafacd8947d..792f149f588 100644 --- a/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFIStatusService.cpp @@ -22,26 +22,6 @@ namespace DB { -HttpRequestRes HandleHttpRequestTestShow( - EngineStoreServerWrap *, - std::string_view path, - const std::string & api_name, - std::string_view query, - std::string_view body) -{ - auto * res = RawCppString::New(fmt::format( - "api_name: {}\npath: {}\nquery: {}\nbody: {}", - api_name, - path, - query, - body)); - return HttpRequestRes{ - .status = HttpRequestStatus::Ok, - .res = CppStrWithView{ - .inner = GenRawCppPtr(res, RawCppPtrTypeImpl::String), - .view = BaseBuffView{res->data(), res->size()}}}; -} - HttpRequestRes HandleHttpRequestSyncStatus( EngineStoreServerWrap * server, std::string_view path, @@ -112,8 +92,7 @@ using HANDLE_HTTP_URI_METHOD = HttpRequestRes (*)(EngineStoreServerWrap *, std:: static const std::map AVAILABLE_HTTP_URI = { {"/tiflash/sync-status/", HandleHttpRequestSyncStatus}, - {"/tiflash/store-status", HandleHttpRequestStoreStatus}, - {"/tiflash/test-show", HandleHttpRequestTestShow}}; + {"/tiflash/store-status", HandleHttpRequestStoreStatus}}; uint8_t CheckHttpUriAvailable(BaseBuffView path_) { diff --git a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp index 3223c815989..7de79dd5c6d 100644 --- a/dbms/src/Storages/Transaction/ReadIndexWorker.cpp +++ b/dbms/src/Storages/Transaction/ReadIndexWorker.cpp @@ -880,7 +880,7 @@ BatchReadIndexRes ReadIndexWorkerManager::batchReadIndex( } } { // if meet timeout, which means part of regions can not get response from leader, try to poll rest tasks - TEST_LOG_FMT("rest {}, poll rest tasks onece", tasks.size()); + TEST_LOG_FMT("rest {}, poll rest tasks once", tasks.size()); while (!tasks.empty()) { diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index af351f4a6b0..2ec690c467b 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -186,6 +186,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } else { + // For common handle, sometimes we need to decode the value from encoded key instead of encoded value auto * raw_extra_column = const_cast((block.getByPosition(extra_handle_column_pos)).column.get()); raw_extra_column->insertData(pk->data(), pk->size()); /// decode key and insert pk columns if needed @@ -207,6 +208,8 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } + block.checkNumberOfRows(); + return true; } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index ec633e805c0..004d9f40447 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -41,7 +41,7 @@ class Block; class RegionBlockReader : private boost::noncopyable { public: - RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schema_snapshot_); + explicit RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schema_snapshot_); /// Read `data_list` as a block. /// diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index c855d5b3226..5ae36a4bd64 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -230,7 +230,7 @@ void removeObsoleteDataInStorage( auto rowkey_range = DM::RowKeyRange::fromRegionRange(handle_range, table_id, table_id, storage->isCommonHandle(), storage->getRowKeyColumnSize()); dm_storage->deleteRange(rowkey_range, context->getSettingsRef()); - dm_storage->flushCache(*context, rowkey_range); // flush to disk + dm_storage->flushCache(*context, rowkey_range, /*try_until_succeed*/ true); // flush to disk } catch (DB::Exception & e) { diff --git a/dbms/src/Storages/Transaction/RowCodec.cpp b/dbms/src/Storages/Transaction/RowCodec.cpp index 427544a0467..ea7f6b7c2da 100644 --- a/dbms/src/Storages/Transaction/RowCodec.cpp +++ b/dbms/src/Storages/Transaction/RowCodec.cpp @@ -314,7 +314,7 @@ bool appendRowV2ToBlock( ColumnID pk_handle_id, bool force_decode) { - UInt8 row_flag = readLittleEndian(&raw_value[1]); + auto row_flag = readLittleEndian(&raw_value[1]); bool is_big = row_flag & RowV2::BigRowMask; return is_big ? appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode) : appendRowV2ToBlockImpl(raw_value, column_ids_iter, column_ids_iter_end, block, block_column_pos, column_infos, pk_handle_id, force_decode); @@ -360,9 +360,10 @@ bool appendRowV2ToBlockImpl( decodeUInts::ColumnIDType>(cursor, raw_value, num_null_columns, null_column_ids); decodeUInts::ValueOffsetType>(cursor, raw_value, num_not_null_columns, value_offsets); size_t values_start_pos = cursor; - size_t id_not_null = 0, id_null = 0; + size_t idx_not_null = 0; + size_t idx_null = 0; // Merge ordered not null/null columns to keep order. - while (id_not_null < not_null_column_ids.size() || id_null < null_column_ids.size()) + while (idx_not_null < not_null_column_ids.size() || idx_null < null_column_ids.size()) { if (column_ids_iter == column_ids_iter_end) { @@ -371,24 +372,31 @@ bool appendRowV2ToBlockImpl( } bool is_null; - if (id_not_null < not_null_column_ids.size() && id_null < null_column_ids.size()) - is_null = not_null_column_ids[id_not_null] > null_column_ids[id_null]; + if (idx_not_null < not_null_column_ids.size() && idx_null < null_column_ids.size()) + is_null = not_null_column_ids[idx_not_null] > null_column_ids[idx_null]; else - is_null = id_null < null_column_ids.size(); + is_null = idx_null < null_column_ids.size(); - auto next_datum_column_id = is_null ? null_column_ids[id_null] : not_null_column_ids[id_not_null]; + auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null]; if (column_ids_iter->first > next_datum_column_id) { - // extra column + // The next column id to read is bigger than the column id of next datum in encoded row. + // It means this is the datum of extra column. May happen when reading after dropping + // a column. if (!force_decode) return false; + // Ignore the extra column and continue to parse other datum if (is_null) - id_null++; + idx_null++; else - id_not_null++; + idx_not_null++; } else if (column_ids_iter->first < next_datum_column_id) { + // The next column id to read is less than the column id of next datum in encoded row. + // It means this is the datum of missing column. May happen when reading after adding + // a column. + // Fill with default value and continue to read data for next column id. const auto & column_info = column_infos[column_ids_iter->second]; if (!addDefaultValueToColumnIfPossible(column_info, block, block_column_pos, force_decode)) return false; @@ -397,7 +405,7 @@ bool appendRowV2ToBlockImpl( } else { - // if pk_handle_id is a valid column id, then it means the table's pk_is_handle is true + // If pk_handle_id is a valid column id, then it means the table's pk_is_handle is true // we can just ignore the pk value encoded in value part if (unlikely(column_ids_iter->first == pk_handle_id)) { @@ -405,15 +413,16 @@ bool appendRowV2ToBlockImpl( block_column_pos++; if (is_null) { - id_null++; + idx_null++; } else { - id_not_null++; + idx_not_null++; } continue; } + // Parse the datum. auto * raw_column = const_cast((block.getByPosition(block_column_pos)).column.get()); const auto & column_info = column_infos[column_ids_iter->second]; if (is_null) @@ -432,15 +441,15 @@ bool appendRowV2ToBlockImpl( } // ColumnNullable::insertDefault just insert a null value raw_column->insertDefault(); - id_null++; + idx_null++; } else { - size_t start = id_not_null ? value_offsets[id_not_null - 1] : 0; - size_t length = value_offsets[id_not_null] - start; + size_t start = idx_not_null ? value_offsets[idx_not_null - 1] : 0; + size_t length = value_offsets[idx_not_null] - start; if (!raw_column->decodeTiDBRowV2Datum(values_start_pos + start, raw_value, length, force_decode)) return false; - id_not_null++; + idx_not_null++; } column_ids_iter++; block_column_pos++; diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 15bf2a3fb58..6d07c47f235 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -631,8 +632,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) - : offset(0) - , length(0) + : length(0) + , offset(0) { deserialize(json); } @@ -772,6 +773,37 @@ catch (const Poco::Exception & e) DB::Exception(e)); } +String TiFlashModeToString(TiFlashMode tiflash_mode) +{ + switch (tiflash_mode) + { + case TiFlashMode::Normal: + return ""; + case TiFlashMode::Fast: + return "fast"; + default: + LOG_FMT_WARNING(&Poco::Logger::get("TiDB"), "TiFlashModeToString with invalid tiflash mode {}", tiflash_mode); + return ""; + } +} + +TiFlashMode parseTiFlashMode(String mode_str) +{ + if (mode_str.empty()) + { + return TiFlashMode::Normal; + } + else if (mode_str == "fast") + { + return TiFlashMode::Fast; + } + else + { + throw DB::Exception( + std::string(__PRETTY_FUNCTION__) + + " ParseTiFlashMode Failed. mode " + mode_str + " is unvalid, please set mode as fast/normal"); + } +} /////////////////////// ////// TableInfo ////// /////////////////////// @@ -840,6 +872,8 @@ try json->set("tiflash_replica", replica_info.getJSONObject()); + json->set("tiflash_mode", std::string(TiFlashModeToString(tiflash_mode))); + json->stringify(buf); return buf.str(); @@ -926,6 +960,14 @@ try replica_info.deserialize(replica_obj); } } + if (obj->has("tiflash_mode")) + { + auto mode = obj->getValue("tiflash_mode"); + if (!mode.empty()) + { + tiflash_mode = parseTiFlashMode(mode); + } + } if (is_common_handle && index_infos.size() != 1) { throw DB::Exception( diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index f67bfb332c7..a9d46b60c13 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -179,7 +179,6 @@ struct ColumnInfo ColumnID id = -1; String name; - Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; Poco::Dynamic::Var default_bit_value; @@ -212,6 +211,12 @@ struct ColumnInfo static Int64 getTimeValue(const String &); static Int64 getYearValue(const String &); static UInt64 getBitValue(const String &); + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset = -1; }; enum PartitionType @@ -298,8 +303,13 @@ struct IndexColumnInfo void deserialize(Poco::JSON::Object::Ptr json); String name; - Int32 offset; Int32 length; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset; }; struct IndexInfo { @@ -323,6 +333,12 @@ struct IndexInfo bool is_global; }; +enum class TiFlashMode +{ + Normal, + Fast, +}; + struct TableInfo { TableInfo() = default; @@ -372,6 +388,8 @@ struct TableInfo // The TiFlash replica info persisted by TiDB TiFlashReplicaInfo replica_info; + TiFlashMode tiflash_mode = TiFlashMode::Normal; + ::TiDB::StorageEngine engine_type = ::TiDB::StorageEngine::UNSPECIFIED; ColumnID getColumnID(const String & name) const; @@ -385,7 +403,12 @@ struct TableInfo bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == DB::InvalidTableID && partition.enable; } - /// should not be called if is_common_handle = false + /// should not be called if is_common_handle = false. + /// when use IndexInfo, please avoid to use the offset info + /// the offset value may be wrong in some cases, + /// due to we will not update IndexInfo except RENAME DDL action, + /// but DDL like add column / drop column may change the offset of columns + /// Thus, please be very careful when you must have to use offset information !!!!! const IndexInfo & getPrimaryIndexInfo() const { return index_infos[0]; } IndexInfo & getPrimaryIndexInfo() { return index_infos[0]; } @@ -398,4 +421,7 @@ String genJsonNull(); tipb::FieldType columnInfoToFieldType(const ColumnInfo & ci); ColumnInfo fieldTypeToColumnInfo(const tipb::FieldType & field_type); +String TiFlashModeToString(TiFlashMode tiflash_mode); +TiFlashMode parseTiFlashMode(String mode_str); + } // namespace TiDB diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index 4a25b6d9292..10a7f7220e9 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -30,7 +30,6 @@ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -38,7 +37,6 @@ extern const int LOGICAL_ERROR; namespace RecordKVFormat { - enum CFModifyFlag : UInt8 { PutFlag = 'P', @@ -83,17 +81,35 @@ inline TiKVKey encodeAsTiKVKey(const String & ori_str) return TiKVKey(ss.releaseStr()); } -inline UInt64 encodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 encodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 encodeInt64(const Int64 x) { return encodeUInt64(static_cast(x) ^ SIGN_MASK); } +inline UInt64 encodeInt64(const Int64 x) +{ + return encodeUInt64(static_cast(x) ^ SIGN_MASK); +} -inline UInt64 encodeUInt64Desc(const UInt64 x) { return encodeUInt64(~x); } +inline UInt64 encodeUInt64Desc(const UInt64 x) +{ + return encodeUInt64(~x); +} -inline UInt64 decodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 decodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 decodeUInt64Desc(const UInt64 x) { return ~decodeUInt64(x); } +inline UInt64 decodeUInt64Desc(const UInt64 x) +{ + return ~decodeUInt64(x); +} -inline Int64 decodeInt64(const UInt64 x) { return static_cast(decodeUInt64(x) ^ SIGN_MASK); } +inline Int64 decodeInt64(const UInt64 x) +{ + return static_cast(decodeUInt64(x) ^ SIGN_MASK); +} inline void encodeInt64(const Int64 x, WriteBuffer & ss) { @@ -125,7 +141,10 @@ inline DecodedTiKVKey genRawKey(const TableID tableId, const HandleID handleId) return key; } -inline TiKVKey genKey(const TableID tableId, const HandleID handleId) { return encodeAsTiKVKey(genRawKey(tableId, handleId)); } +inline TiKVKey genKey(const TableID tableId, const HandleID handleId) +{ + return encodeAsTiKVKey(genRawKey(tableId, handleId)); +} inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector keys) { @@ -135,9 +154,16 @@ inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector key memcpy(key.data() + 1, reinterpret_cast(&big_endian_table_id), 8); memcpy(key.data() + 1 + 8, RecordKVFormat::RECORD_PREFIX_SEP, 2); WriteBufferFromOwnString ss; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < keys.size(); i++) { - DB::EncodeDatum(keys[i], table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset].getCodecFlag(), ss); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + DB::EncodeDatum(keys[i], table_info.columns[idx].getCodecFlag(), ss); } return encodeAsTiKVKey(key + ss.releaseStr()); } @@ -176,29 +202,50 @@ inline std::tuple decodeTiKVKeyFull(const TiKVKey & key) } } -inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) { return std::get<0>(decodeTiKVKeyFull(key)); } +inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) +{ + return std::get<0>(decodeTiKVKeyFull(key)); +} -inline Timestamp getTs(const TiKVKey & key) { return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); } +inline Timestamp getTs(const TiKVKey & key) +{ + return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); +} -inline TableID getTableId(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + 1)); } +inline TableID getTableId(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + 1)); +} -inline HandleID getHandle(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); } +inline HandleID getHandle(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); +} inline RawTiDBPK getRawTiDBPK(const DecodedTiKVKey & key) { return std::make_shared(key.begin() + RAW_KEY_NO_HANDLE_SIZE, key.end()); } -inline TableID getTableId(const TiKVKey & key) { return getTableId(decodeTiKVKey(key)); } +inline TableID getTableId(const TiKVKey & key) +{ + return getTableId(decodeTiKVKey(key)); +} -inline HandleID getHandle(const TiKVKey & key) { return getHandle(decodeTiKVKey(key)); } +inline HandleID getHandle(const TiKVKey & key) +{ + return getHandle(decodeTiKVKey(key)); +} inline bool isRecord(const DecodedTiKVKey & raw_key) { return raw_key.size() >= RAW_KEY_SIZE && raw_key[0] == TABLE_PREFIX && memcmp(raw_key.data() + 9, RECORD_PREFIX_SEP, 2) == 0; } -inline TiKVKey truncateTs(const TiKVKey & key) { return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); } +inline TiKVKey truncateTs(const TiKVKey & key) +{ + return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); +} inline TiKVKey appendTs(const TiKVKey & key, Timestamp ts) { @@ -215,7 +262,12 @@ inline TiKVKey genKey(TableID tableId, HandleID handleId, Timestamp ts) } inline TiKVValue encodeLockCfValue( - UInt8 lock_type, const String & primary, Timestamp ts, UInt64 ttl, const String * short_value = nullptr, Timestamp min_commit_ts = 0) + UInt8 lock_type, + const String & primary, + Timestamp ts, + UInt64 ttl, + const String * short_value = nullptr, + Timestamp min_commit_ts = 0) { WriteBufferFromOwnString res; res.write(lock_type); @@ -275,7 +327,10 @@ inline R readVarInt(const char *& data, size_t & len) return res; } -inline UInt64 readVarUInt(const char *& data, size_t & len) { return readVarInt(data, len); } +inline UInt64 readVarUInt(const char *& data, size_t & len) +{ + return readVarInt(data, len); +} inline UInt8 readUInt8(const char *& data, size_t & len) { @@ -347,30 +402,29 @@ inline DecodedWriteCFValue decodeWriteCfValue(const TiKVValue & value) auto flag = RecordKVFormat::readUInt8(data, len); switch (flag) { - case RecordKVFormat::SHORT_VALUE_PREFIX: - { - size_t slen = RecordKVFormat::readUInt8(data, len); - if (slen > len) - throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); - short_value = RecordKVFormat::readRawString(data, len, slen); - break; - } - case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: - // ignore - break; - case RecordKVFormat::GC_FENCE_PREFIX: - /** + case RecordKVFormat::SHORT_VALUE_PREFIX: + { + size_t slen = RecordKVFormat::readUInt8(data, len); + if (slen > len) + throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); + short_value = RecordKVFormat::readRawString(data, len, slen); + break; + } + case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: + // ignore + break; + case RecordKVFormat::GC_FENCE_PREFIX: + /** * according to https://github.com/tikv/tikv/pull/9207, when meet `GC fence` flag, it is definitely a * rewriting record and there must be a complete row written to tikv, just ignore it in tiflash. */ - return std::nullopt; - default: - throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); + return std::nullopt; + default: + throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); } } - return InnerDecodedWriteCFValue{write_type, prewrite_ts, - short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; + return InnerDecodedWriteCFValue{write_type, prewrite_ts, short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; } inline TiKVValue encodeWriteCfValue(UInt8 write_type, Timestamp ts, std::string_view short_value = {}, bool gc_fence = false) diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 20b395a9952..34e0d3d4104 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -237,14 +237,14 @@ std::pair> getTableInfoAndFields(ColumnIDs handle_ { table_info.is_common_handle = true; TiDB::IndexInfo index_info; - for (size_t i = 0; i < handle_ids.size(); i++) + for (auto handle_id : handle_ids) { TiDB::IndexColumnInfo index_column_info; - for (size_t pos = 0; pos < table_info.columns.size(); pos++) + for (auto & column : table_info.columns) { - if (table_info.columns[pos].id == handle_ids[i]) + if (column.id == handle_id) { - index_column_info.offset = pos; + index_column_info.name = column.name; break; } } diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp new file mode 100644 index 00000000000..05ab637de7f --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "RowCodecTestUtils.h" + +using TableInfo = TiDB::TableInfo; +namespace DB::tests +{ +using ColumnIDs = std::vector; +class RegionBlockReaderBenchTest : public benchmark::Fixture +{ +protected: + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; + + enum RowEncodeVersion + { + RowV1, + RowV2 + }; + +protected: + void SetUp(const benchmark::State & /*state*/) override + { + data_list_read.clear(); + fields_map.clear(); + } + + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version, size_t num_rows) + { + // for later check + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + + std::vector value_fields; + std::vector pk_fields; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_fields.emplace_back(fields[i]); + else + pk_fields.emplace_back(fields[i]); + } + + // create PK + WriteBufferFromOwnString pk_buf; + if (table_info.is_common_handle) + { + auto & primary_index_info = table_info.getPrimaryIndexInfo(); + for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + { + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + } + } + else + { + DB::EncodeInt64(handle_value, pk_buf); + } + RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; + // create value + WriteBufferFromOwnString value_buf; + if (row_version == RowEncodeVersion::RowV1) + { + encodeRowV1(table_info, value_fields, value_buf); + } + else if (row_version == RowEncodeVersion::RowV2) + { + encodeRowV2(table_info, value_fields, value_buf); + } + else + { + throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); + } + auto row_value = std::make_shared(std::move(value_buf.str())); + for (size_t i = 0; i < num_rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); + } + + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode) const + { + RegionBlockReader reader{decoding_schema}; + Block block = createBlockSortByColumnID(decoding_schema); + return reader.read(block, data_list_read, force_decode); + } + + std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + { + return getTableInfoAndFields( + handle_ids, + is_common_handle, + ColumnIDValue(2, handle_value), + ColumnIDValue(3, std::numeric_limits::max()), + ColumnIDValue(4, std::numeric_limits::min()), + ColumnIDValue(9, String("aaa")), + ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4)), + ColumnIDValueNull(11)); + } +}; + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +constexpr size_t num_iterations_test = 1000; + +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp new file mode 100644 index 00000000000..1de9809ecad --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "RowCodecTestUtils.h" + +namespace DB::tests +{ +static TableInfo getTableInfoByJson(const String & json_table_info) +{ + return TableInfo(json_table_info); +} +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) +{ + // table with column [A,B,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,5 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); +} + +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndexAfterDropColumn) +{ + // drop column B for [A,B,C,D]; table with column [A,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,4 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 4); +} + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index f0cafce3914..77aab06f6cf 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -18,10 +18,9 @@ #include #include #include +#include #include -#include "region_helper.h" - namespace DB { namespace RegionBench @@ -37,13 +36,6 @@ extern void ChangeRegionStateRange(RegionState & region_state, bool source_at_le namespace tests { -RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key, const TiFlashRaftProxyHelper * proxy_helper = nullptr) -{ - return std::make_shared( - RegionMeta(createPeer(2, true), createRegionInfo(id, std::move(start_key), std::move(end_key)), initialApplyState()), - proxy_helper); -} - class RegionKVStoreTest : public ::testing::Test { public: @@ -1187,6 +1179,12 @@ void RegionKVStoreTest::testKVStore() ASSERT_EQ(e.message(), "unsupported admin command type InvalidAdmin"); } } + { + // There shall be data to flush. + ASSERT_EQ(kvs.needFlushRegionData(19, ctx.getTMTContext()), true); + // Force flush until succeed only for testing. + ASSERT_EQ(kvs.tryFlushRegionData(19, true, ctx.getTMTContext()), true); + } } void test_mergeresult() diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index 6a883230854..d08b4dd3738 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -26,13 +26,13 @@ using ColumnIDs = std::vector; class RegionBlockReaderTestFixture : public ::testing::Test { protected: - Int64 handle_value_ = 100; - UInt8 del_mark_value_ = 0; - UInt64 version_value_ = 100; - size_t rows_ = 3; + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 3; - RegionDataReadInfoList data_list_read_; - std::unordered_map fields_map_; + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; enum RowEncodeVersion { @@ -43,8 +43,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test protected: void SetUp() override { - data_list_read_.clear(); - fields_map_.clear(); + data_list_read.clear(); + fields_map.clear(); } void TearDown() override {} @@ -52,8 +52,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check + std::unordered_map column_name_columns_index_map; for (size_t i = 0; i < table_info.columns.size(); i++) - fields_map_.emplace(table_info.columns[i].id, fields[i]); + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } std::vector value_fields; std::vector pk_fields; @@ -72,13 +76,13 @@ class RegionBlockReaderTestFixture : public ::testing::Test auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { - size_t pk_offset = primary_index_info.idx_cols[i].offset; - EncodeDatum(pk_fields[i], table_info.columns[pk_offset].getCodecFlag(), pk_buf); + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else { - DB::EncodeInt64(handle_value_, pk_buf); + DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; // create value @@ -96,44 +100,44 @@ class RegionBlockReaderTestFixture : public ::testing::Test throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows_; i++) - data_list_read_.emplace_back(pk, del_mark_value_, version_value_, row_value); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } void checkBlock(DecodingStorageSchemaSnapshotConstPtr decoding_schema, const Block & block) const { ASSERT_EQ(block.columns(), decoding_schema->column_defines->size()); - for (size_t row = 0; row < rows_; row++) + for (size_t row = 0; row < rows; row++) { for (size_t pos = 0; pos < block.columns(); pos++) { - auto & column_element = block.getByPosition(pos); + const auto & column_element = block.getByPosition(pos); if (row == 0) { - ASSERT_EQ(column_element.column->size(), rows_); + ASSERT_EQ(column_element.column->size(), rows); } if (column_element.name == EXTRA_HANDLE_COLUMN_NAME) { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read_[row]))); + ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value_)); + ASSERT_EQ((*column_element.column)[row], Field(handle_value)); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value_)); + ASSERT_EQ((*column_element.column)[row], Field(version_value)); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value_))); + ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); } else { - ASSERT_EQ((*column_element.column)[row], fields_map_.at(column_element.column_id)); + ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); } } } @@ -143,7 +147,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - if (!reader.read(block, data_list_read_, force_decode)) + if (!reader.read(block, data_list_read, force_decode)) return false; checkBlock(decoding_schema, block); @@ -155,7 +159,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test return getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -170,7 +174,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test handle_ids, is_common_handle, ColumnIDValue(1, String("")), - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(8, String("")), @@ -182,12 +186,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test // add default value for missing column std::vector missing_column_ids{1, 8, 13}; String missing_column_default_value = String("default"); - for (size_t i = 0; i < table_info.columns.size(); i++) + for (auto & column : table_info.columns) { - if (std::find(missing_column_ids.begin(), missing_column_ids.end(), table_info.columns[i].id) != missing_column_ids.end()) + if (std::find(missing_column_ids.begin(), missing_column_ids.end(), column.id) != missing_column_ids.end()) { - table_info.columns[i].origin_default_value = missing_column_default_value; - fields_map_.emplace(table_info.columns[i].id, Field(missing_column_default_value)); + column.origin_default_value = missing_column_default_value; + fields_map.emplace(column.id, Field(missing_column_default_value)); } } return table_info; @@ -199,7 +203,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4))); @@ -212,7 +216,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -227,7 +231,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), diff --git a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp index 516a173b151..871153cb0e9 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp @@ -42,7 +42,7 @@ struct ParseCase std::function check; }; -TEST(TiDBTableInfo_test, ParseFromJSON) +TEST(TiDBTableInfoTest, ParseFromJSON) try { auto cases = { @@ -136,54 +136,54 @@ struct StmtCase } }; -TEST(TiDBTableInfo_test, GenCreateTableStatement) +TEST(TiDBTableInfoTest, GenCreateTableStatement) try { auto cases = // {StmtCase{ 1145, // R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":1145,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","partition":null})json", // - R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":1145,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":0}'))stmt", // + R"json({"id":1145,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","partition":null,"tiflash_mode":"fast"})json", // + R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":1145,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"fast","tiflash_replica":{"Count":0},"update_timestamp":0}'))stmt", // }, StmtCase{ 2049, // R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null})json", // - R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // }, StmtCase{ 31, // R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // }, StmtCase{ 33, // R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // + R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // }, StmtCase{ 35, // R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // + R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // }, StmtCase{ 37, // R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", - R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // + R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // }, StmtCase{ 32, // R"json({"id":1,"db_name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1})json", // - R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // + R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1,"tiflash_mode":""})json", // + R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_mode":"","tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // }}; - for (auto & c : cases) + for (const auto & c : cases) { c.verifyTableInfo(); } diff --git a/dbms/src/Storages/Transaction/tests/region_helper.h b/dbms/src/Storages/Transaction/tests/region_helper.h index 2808ace0ecb..39bae2669ab 100644 --- a/dbms/src/Storages/Transaction/tests/region_helper.h +++ b/dbms/src/Storages/Transaction/tests/region_helper.h @@ -18,8 +18,10 @@ #include -using namespace DB; - +namespace DB +{ +namespace tests +{ #define ASSERT_CHECK(cond, res) \ do \ { \ @@ -37,7 +39,7 @@ using namespace DB; #define ASSERT_CHECK_EQUAL(a, b, res) \ do \ { \ - if (!(a == b)) \ + if (!((a) == (b))) \ { \ std::cerr << __FILE__ << ":" << __LINE__ << ":" \ << " Assertion " << #a << " == " << #b << " failed.\n"; \ @@ -76,3 +78,16 @@ inline RegionMeta createRegionMeta(UInt64 id, DB::TableID table_id, std::optiona /*region=*/createRegionInfo(id, RecordKVFormat::genKey(table_id, 0), RecordKVFormat::genKey(table_id, 300)), /*apply_state_=*/apply_state.value_or(initialApplyState())); } + +inline RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string end_key, const TiFlashRaftProxyHelper * proxy_helper = nullptr) +{ + return std::make_shared( + RegionMeta( + createPeer(2, true), + createRegionInfo(id, std::move(start_key), std::move(end_key)), + initialApplyState()), + proxy_helper); +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index a709be0b017..ddf815316ab 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -27,7 +27,6 @@ void registerStorageNull(StorageFactory & factory); void registerStorageMerge(StorageFactory & factory); void registerStorageBuffer(StorageFactory & factory); void registerStorageMemory(StorageFactory & factory); -void registerStorageFile(StorageFactory & factory); void registerStorageDictionary(StorageFactory & factory); void registerStorageSet(StorageFactory & factory); void registerStorageJoin(StorageFactory & factory); @@ -47,7 +46,6 @@ void registerStorages() registerStorageMerge(factory); registerStorageBuffer(factory); registerStorageMemory(factory); - registerStorageFile(factory); registerStorageDictionary(factory); registerStorageSet(factory); registerStorageJoin(factory); diff --git a/dbms/src/Storages/tests/gtest_filter_parser.cpp b/dbms/src/Storages/tests/gtest_filter_parser.cpp index 8820c05d2da..3a554fcf4b6 100644 --- a/dbms/src/Storages/tests/gtest_filter_parser.cpp +++ b/dbms/src/Storages/tests/gtest_filter_parser.cpp @@ -98,7 +98,7 @@ DM::RSOperatorPtr FilterParserTest::generateRsOperator(const String table_info_j DM::ColumnDefines columns_to_read; { NamesAndTypes source_columns; - std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info, log->getLog()); + std::tie(source_columns, std::ignore) = parseColumnsFromTableInfo(table_info); dag_query = std::make_unique( conditions, DAGPreparedSets(), diff --git a/dbms/src/TableFunctions/ITableFunction.cpp b/dbms/src/TableFunctions/ITableFunction.cpp index ca05075cac0..d262a5637f7 100644 --- a/dbms/src/TableFunctions/ITableFunction.cpp +++ b/dbms/src/TableFunctions/ITableFunction.cpp @@ -15,17 +15,10 @@ #include #include - -namespace ProfileEvents -{ -extern const Event TableFunctionExecute; -} - namespace DB { StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context & context) const { - ProfileEvents::increment(ProfileEvents::TableFunctionExecute); return executeImpl(ast_function, context); } diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp b/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp deleted file mode 100644 index ab5cd7e5849..00000000000 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int BAD_ARGUMENTS; -} // namespace ErrorCodes - - -StoragePtr TableFunctionCatBoostPool::executeImpl(const ASTPtr & ast_function, const Context & context) const -{ - ASTs & args_func = typeid_cast(*ast_function).children; - - std::string err = "Table function '" + getName() + "' requires 2 parameters: " - + "column descriptions file, dataset description file"; - - if (args_func.size() != 1) - throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 2) - throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - auto getStringLiteral = [](const IAST & node, const char * description) { - auto lit = typeid_cast(&node); - if (!lit) - throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS); - - if (lit->value.getType() != Field::Types::String) - throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS); - - return safeGet(lit->value); - }; - String column_descriptions_file = getStringLiteral(*args[0], "Column descriptions file"); - String dataset_description_file = getStringLiteral(*args[1], "Dataset description file"); - - return StorageCatBoostPool::create(context, column_descriptions_file, dataset_description_file); -} - -void registerTableFunctionCatBoostPool(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h b/dbms/src/TableFunctions/TableFunctionCatBoostPool.h deleted file mode 100644 index 0b5b32dfffe..00000000000 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -/* catboostPool('column_descriptions_file', 'dataset_description_file') - * Create storage from CatBoost dataset. - */ -class TableFunctionCatBoostPool : public ITableFunction -{ -public: - static constexpr auto name = "catBoostPool"; - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; -}; - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp deleted file mode 100644 index 0ff1a5b443f..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int DATABASE_ACCESS_DENIED; -} // namespace ErrorCodes - -StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const -{ - // Parse args - ASTs & args_func = typeid_cast(*ast_function).children; - - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 3) - throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < 3; ++i) - args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - - std::string path = static_cast(*args[0]).value.safeGet(); - std::string format = static_cast(*args[1]).value.safeGet(); - std::string structure = static_cast(*args[2]).value.safeGet(); - - // Create sample block - std::vector structure_vals; - boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - - if (structure_vals.size() % 2 != 0) - throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); - - Block sample_block; - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - - for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) - { - ColumnWithTypeAndName column; - column.name = structure_vals[i]; - column.type = data_type_factory.get(structure_vals[i + 1]); - column.column = column.type->createColumn(); - sample_block.insert(std::move(column)); - } - - // Create table - StoragePtr storage = StorageFile::create( - path, - -1, - context.getUserFilesPath(), - getName(), - format, - ColumnsDescription{sample_block.getNamesAndTypesList()}, - const_cast(context)); - - storage->startup(); - - return storage; -} - - -void registerTableFunctionFile(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h deleted file mode 100644 index dda367c2679..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -/* file(path, format, structure) - creates a temporary storage from file - * - * - * The file must be in the clickhouse data directory. - * The relative path begins with the clickhouse data directory. - */ -class TableFunctionFile : public ITableFunction -{ -public: - static constexpr auto name = "file"; - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; -}; - - -} // namespace DB diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 2eac0ce0548..8449077cc96 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -21,16 +21,13 @@ namespace DB { void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); -void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); -void registerTableFunctionFile(TableFunctionFactory & factory); + void registerTableFunctions() { auto & factory = TableFunctionFactory::instance(); registerTableFunctionMerge(factory); registerTableFunctionNumbers(factory); - registerTableFunctionCatBoostPool(factory); - registerTableFunctionFile(factory); } } // namespace DB diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index dcf727614b1..ea19ff08dd3 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -36,6 +36,7 @@ void columnToTiPBExpr(tipb::Expr * expr, const ColumnWithTypeAndName column, siz if (column.column->isColumnNullable()) { auto [col, null_map] = removeNullable(column.column.get()); + (void)null_map; is_const = col->isColumnConst(); } } @@ -97,6 +98,7 @@ void columnsToTiPBExprForTiDBCast( if (type_column.column->isColumnNullable()) { auto [col, null_map] = removeNullable(type_column.column.get()); + (void)null_map; is_const = col->isColumnConst(); } } diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 91c1430f7a0..763c74b45b6 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include #include #include + +#include + namespace DB::tests { DAGContext & ExecutorTest::getDAGContext() @@ -34,15 +38,20 @@ void ExecutorTest::initializeContext() void ExecutorTest::SetUpTestCase() { - try - { - DB::registerFunctions(); - DB::registerAggregateFunctions(); - } - catch (DB::Exception &) - { - // Maybe another test has already registered, ignore exception here. - } + auto register_func = [](std::function func) { + try + { + func(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } + }; + + register_func(DB::registerFunctions); + register_func(DB::registerAggregateFunctions); + register_func(DB::registerWindowFunctions); } void ExecutorTest::initializeClientInfo() @@ -96,33 +105,43 @@ Block mergeBlocks(Blocks blocks) } } // namespace -void ExecutorTest::readAndAssertBlock(BlockInputStreamPtr stream, const ColumnsWithTypeAndName & expect_columns) +DB::ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream) { Blocks actual_blocks; - Block except_block(expect_columns); stream->readPrefix(); while (auto block = stream->read()) { actual_blocks.push_back(block); } stream->readSuffix(); - Block actual_block = mergeBlocks(actual_blocks); - ASSERT_BLOCK_EQ(except_block, actual_block); + return mergeBlocks(actual_blocks).getColumnsWithTypeAndName(); +} + +void ExecutorTest::enablePlanner(bool is_enable) +{ + context.context.setSetting("enable_planner", is_enable ? "true" : "false"); } -void ExecutorTest::executeStreams(const std::shared_ptr & request, std::unordered_map & source_columns_map, const ColumnsWithTypeAndName & expect_columns, size_t concurrency) +DB::ColumnsWithTypeAndName ExecutorTest::executeStreams(const std::shared_ptr & request, std::unordered_map & source_columns_map, size_t concurrency) { DAGContext dag_context(*request, "executor_test", concurrency); dag_context.setColumnsForTest(source_columns_map); context.context.setDAGContext(&dag_context); // Currently, don't care about regions information in tests. DAGQuerySource dag(context.context); - readAndAssertBlock(executeQuery(dag, context.context, false, QueryProcessingStage::Complete).in, expect_columns); + return readBlock(executeQuery(dag, context.context, false, QueryProcessingStage::Complete).in); +} + +DB::ColumnsWithTypeAndName ExecutorTest::executeStreams(const std::shared_ptr & request, size_t concurrency) +{ + return executeStreams(request, context.executorIdColumnsMap(), concurrency); } -void ExecutorTest::executeStreams(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns, size_t concurrency) +DB::ColumnsWithTypeAndName ExecutorTest::executeStreamsWithSingleSource(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, SourceType type, size_t concurrency) { - executeStreams(request, context.executorIdColumnsMap(), expect_columns, concurrency); + std::unordered_map source_columns_map; + source_columns_map[getSourceName(type)] = source_columns; + return executeStreams(request, source_columns_map, concurrency); } void ExecutorTest::dagRequestEqual(const String & expected_string, const std::shared_ptr & actual) diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 56a07085e50..54fed31f88d 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -15,18 +15,21 @@ #pragma once #include -#include -#include #include #include #include -#include -#include #include #include +#include + namespace DB::tests { void executeInterpreter(const std::shared_ptr & request, Context & context); + +::testing::AssertionResult check_columns_equality(const ColumnsWithTypeAndName & expected, const ColumnsWithTypeAndName & actual, bool _restrict); + +DB::ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream); + class ExecutorTest : public ::testing::Test { protected: @@ -48,45 +51,56 @@ class ExecutorTest : public ::testing::Test DAGContext & getDAGContext(); + /// for planner + void enablePlanner(bool is_enable); + template + void wrapForDisEnablePlanner(FF && ff) + { + enablePlanner(false); + ff(); + enablePlanner(true); + ff(); + } + static void dagRequestEqual(const String & expected_string, const std::shared_ptr & actual); void executeInterpreter(const String & expected_string, const std::shared_ptr & request, size_t concurrency); - void executeStreams( - const std::shared_ptr & request, - std::unordered_map & source_columns_map, - const ColumnsWithTypeAndName & expect_columns, - size_t concurrency = 1); - void executeStreams( - const std::shared_ptr & request, - const ColumnsWithTypeAndName & expect_columns, - size_t concurrency = 1); - - template - ColumnWithTypeAndName toNullableVec(const std::vector::FieldType>> & v) + enum SourceType { - return createColumn>(v); - } - - template - ColumnWithTypeAndName toVec(const std::vector::FieldType> & v) - { - return createColumn(v); - } + TableScan, + ExchangeReceiver + }; - template - ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) + // for single source query, the source executor name is ${type}_0 + static String getSourceName(SourceType type) { - return createColumn>(v, name); + switch (type) + { + case TableScan: + return "table_scan_0"; + case ExchangeReceiver: + return "exchange_receiver_0"; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unknown Executor Source type {}", + type); + } } - template - ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) - { - return createColumn(v, name); - } + ColumnsWithTypeAndName executeStreams( + const std::shared_ptr & request, + std::unordered_map & source_columns_map, + size_t concurrency = 1); + ColumnsWithTypeAndName executeStreams( + const std::shared_ptr & request, + size_t concurrency = 1); - static void readAndAssertBlock(BlockInputStreamPtr stream, const ColumnsWithTypeAndName & expect_columns); + ColumnsWithTypeAndName executeStreamsWithSingleSource( + const std::shared_ptr & request, + const ColumnsWithTypeAndName & source_columns, + SourceType type = TableScan, + size_t concurrency = 1); protected: MockDAGRequestContext context; @@ -95,4 +109,4 @@ class ExecutorTest : public ::testing::Test #define ASSERT_DAGREQUEST_EQAUL(str, request) dagRequestEqual((str), (request)); #define ASSERT_BLOCKINPUTSTREAM_EQAUL(str, request, concurrency) executeInterpreter((str), (request), (concurrency)) -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index dae07f7123b..1c8b0242bfa 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include +#include #include +#include #include #include #include @@ -23,7 +25,10 @@ #include #include #include -#include + +#include +#include + namespace DB { @@ -103,21 +108,118 @@ ::testing::AssertionResult columnEqual( return columnEqual(expected.column, actual.column); } -void blockEqual( +::testing::AssertionResult blockEqual( const Block & expected, const Block & actual) { size_t columns = actual.columns(); + size_t expected_columns = expected.columns(); - ASSERT_TRUE(expected.columns() == columns); + ASSERT_EQUAL(expected_columns, columns, "Block size mismatch"); for (size_t i = 0; i < columns; ++i) { const auto & expected_col = expected.getByPosition(i); const auto & actual_col = actual.getByPosition(i); - ASSERT_TRUE(actual_col.type->getName() == expected_col.type->getName()); - ASSERT_COLUMN_EQ(expected_col.column, actual_col.column); + + auto cmp_res = columnEqual(expected_col, actual_col); + if (!cmp_res) + return cmp_res; + } + return ::testing::AssertionSuccess(); +} + +/// size of each column should be the same +std::multiset columnsToRowSet(const ColumnsWithTypeAndName & cols) +{ + if (cols.empty()) + return {}; + if (cols[0].column->empty()) + return {}; + + size_t cols_size = cols.size(); + std::vector rows{cols[0].column->size()}; + + for (auto & r : rows) + { + r.resize(cols_size, true); + } + + for (auto const & [col_id, col] : ext::enumerate(cols)) + { + for (size_t i = 0, size = col.column->size(); i < size; ++i) + { + new (rows[i].place(col_id)) Field((*col.column)[i]); + } + } + return {std::make_move_iterator(rows.begin()), std::make_move_iterator(rows.end())}; +} + +::testing::AssertionResult columnsEqual( + const ColumnsWithTypeAndName & expected, + const ColumnsWithTypeAndName & actual, + bool _restrict) +{ + if (_restrict) + return blockEqual(Block(expected), Block(actual)); + + auto expect_cols_size = expected.size(); + auto actual_cols_size = actual.size(); + + ASSERT_EQUAL(expect_cols_size, actual_cols_size, "Columns size mismatch"); + + for (size_t i = 0; i < expect_cols_size; ++i) + { + auto const & expect_col = expected[i]; + auto const & actual_col = actual[i]; + ASSERT_EQUAL(expect_col.column->getName(), actual_col.column->getName(), fmt::format("Column {} name mismatch", i)); + ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i)); + auto type_eq = dataTypeEqual(expected[i].type, actual[i].type); + if (!type_eq) + return type_eq; + } + + auto const expected_row_set = columnsToRowSet(expected); + auto const actual_row_set = columnsToRowSet(actual); + + if (expected_row_set != actual_row_set) + { + FmtBuffer buf; + + auto expect_it = expected_row_set.begin(); + auto actual_it = actual_row_set.begin(); + + buf.append("Columns row set mismatch\n").append("expected_row_set:\n"); + for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it) + { + buf.joinStr( + expect_it->begin(), + expect_it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + if (*expect_it != *actual_it) + break; + } + + ++actual_it; + + buf.append("...\nactual_row_set:\n"); + for (auto it = actual_row_set.begin(); it != actual_it; ++it) + { + buf.joinStr( + it->begin(), + it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + } + buf.append("...\n"); + + return testing::AssertionFailure() << buf.toString(); } + + return testing::AssertionSuccess(); } std::pair buildFunction( @@ -242,5 +344,96 @@ ColumnWithTypeAndName createOnlyNullColumn(size_t size, const String & name) return {std::move(col), data_type, name}; } +ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp) +{ + std::vector::FieldType> vec; + vec.reserve(v.size()); + for (const auto & value_str : v) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + DataTypePtr data_type = std::make_shared(fsp); + return {makeColumn(data_type, vec), data_type, name, 0}; +} + +ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp) +{ + std::vector::FieldType>> vec; + vec.reserve(v.size()); + for (const auto & value_str : v) + { + if (!value_str.empty()) + { + Field value = parseMyDateTime(value_str, fsp); + vec.push_back(value.template safeGet()); + } + else + { + vec.push_back({}); + } + } + DataTypePtr data_type = makeNullable(std::make_shared(fsp)); + return {makeColumn>(data_type, vec), data_type, name, 0}; +} + +String getColumnsContent(const ColumnsWithTypeAndName & cols) +{ + if (cols.size() <= 0) + return ""; + return getColumnsContent(cols, 0, cols[0].column->size() - 1); +} + +String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end) +{ + const size_t col_num = cols.size(); + if (col_num <= 0) + return ""; + + const size_t col_size = cols[0].column->size(); + assert(begin <= end); + assert(col_size > end); + assert(col_size > begin); + + bool is_same = true; + + for (size_t i = 1; i < col_num; ++i) + { + if (cols[i].column->size() != col_size) + is_same = false; + } + + assert(is_same); /// Ensure the sizes of columns in cols are the same + + std::vector> col_content; + FmtBuffer fmt_buf; + for (size_t i = 0; i < col_num; ++i) + { + /// Push the column name + fmt_buf.append(fmt::format("{}: (", cols[i].name)); + for (size_t j = begin; j <= end; ++j) + col_content.push_back(std::make_pair(j, (*cols[i].column)[j].toString())); + + /// Add content + fmt_buf.joinStr( + col_content.begin(), + col_content.end(), + [](const auto & content, FmtBuffer & fmt_buf) { + fmt_buf.append(fmt::format("{}: {}", content.first, content.second)); + }, + ", "); + + fmt_buf.append(")\n"); + col_content.clear(); + } + + return fmt_buf.toString(); +} + +ColumnsWithTypeAndName createColumns(const ColumnsWithTypeAndName & cols) +{ + return cols; +} + } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index 7704c69a89f..8680d1886b1 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -514,6 +514,17 @@ ColumnWithTypeAndName createConstColumn( return createConstColumn(data_type_args, size, InferredFieldType(std::nullopt), name); } +String getColumnsContent(const ColumnsWithTypeAndName & cols); + +/// We can designate the range of columns printed with begin and end. range: [begin, end] +String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end); + +// This wrapper function only serves to construct columns input for function-like macros, +// since preprocessor recognizes `{col1, col2, col3}` as three arguments instead of one. +// E.g. preprocessor does not allow us to write `ASSERT_COLUMNS_EQ_R({col1, col2, col3}, actual_cols)`, +// but with this func we can write `ASSERT_COLUMNS_EQ_R(createColumns{col1, col2, col3}, actual_cols)` instead. +ColumnsWithTypeAndName createColumns(const ColumnsWithTypeAndName & cols); + ::testing::AssertionResult dataTypeEqual( const DataTypePtr & expected, const DataTypePtr & actual); @@ -527,10 +538,15 @@ ::testing::AssertionResult columnEqual( const ColumnWithTypeAndName & expected, const ColumnWithTypeAndName & actual); -void blockEqual( +::testing::AssertionResult blockEqual( const Block & expected, const Block & actual); +::testing::AssertionResult columnsEqual( + const ColumnsWithTypeAndName & expected, + const ColumnsWithTypeAndName & actual, + bool _restrict); + ColumnWithTypeAndName executeFunction( Context & context, const String & func_name, @@ -654,6 +670,33 @@ ColumnWithTypeAndName createNullableColumn( return createNullableColumn(data_type_args, vec, null_map, name, 0); } +template +ColumnWithTypeAndName toNullableVec(const std::vector::FieldType>> & v) +{ + return createColumn>(v); +} + +template +ColumnWithTypeAndName toVec(const std::vector::FieldType> & v) +{ + return createColumn(v); +} + +template +ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) +{ + return createColumn>(v, name); +} + +template +ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) +{ + return createColumn(v, name); +} + +ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp); + +ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp); class FunctionTest : public ::testing::Test { protected: @@ -729,5 +772,10 @@ class FunctionTest : public ::testing::Test #define ASSERT_COLUMN_EQ(expected, actual) ASSERT_TRUE(DB::tests::columnEqual((expected), (actual))) #define ASSERT_BLOCK_EQ(expected, actual) DB::tests::blockEqual((expected), (actual)) + +/// restrictly checking columns equality, both data set and each row's offset should be the same +#define ASSERT_COLUMNS_EQ_R(expected, actual) ASSERT_TRUE(DB::tests::columnsEqual((expected), (actual), true)) +/// unrestrictly checking columns equality, only checking data set equality +#define ASSERT_COLUMNS_EQ_UR(expected, actual) ASSERT_TRUE(DB::tests::columnsEqual((expected), (actual), false)) } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/bench_dbms_main.cpp b/dbms/src/TestUtils/bench_dbms_main.cpp index 48bd02a71f7..092c45c35e2 100644 --- a/dbms/src/TestUtils/bench_dbms_main.cpp +++ b/dbms/src/TestUtils/bench_dbms_main.cpp @@ -20,6 +20,8 @@ int main(int argc, char * argv[]) { benchmark::Initialize(&argc, argv); DB::tests::TiFlashTestEnv::setupLogger(); + // Each time TiFlashTestEnv::getContext() is called, some log will print, it's annoying. + Poco::Logger::root().setLevel("error"); DB::tests::TiFlashTestEnv::initializeGlobalContext(); if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index b8d2b039bd2..a0ae4b11270 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -204,6 +204,66 @@ void serializeExchangeReceiver(const String & executor_id, const tipb::ExchangeR buf.append("}\n"); } +void serializeWindow(const String & executor_id, const tipb::Window & window [[maybe_unused]], FmtBuffer & buf) +{ + buf.fmtAppend("{} | partition_by: {{", executor_id); + buf.joinStr( + window.partition_by().begin(), + window.partition_by().end(), + [&](const auto & partition_by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(partition_by.expr(), buf); + fb.fmtAppend(", desc: {})", partition_by.desc()); + }, + ", "); + buf.append("}}, order_by: {"); + buf.joinStr( + window.order_by().begin(), + window.order_by().end(), + [&](const auto & order_by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(order_by.expr(), buf); + fb.fmtAppend(", desc: {})", order_by.desc()); + }, + ", "); + buf.append("}, func_desc: {"); + buf.joinStr( + window.func_desc().begin(), + window.func_desc().end(), + [&](const auto & func, FmtBuffer &) { + serializeExpression(func, buf); + }, + ", "); + if (window.has_frame()) + { + buf.append("}, frame: {"); + if (window.frame().has_start()) + { + buf.fmtAppend("start<{}, {}, {}>", window.frame().start().type(), window.frame().start().unbounded(), window.frame().start().offset()); + } + if (window.frame().has_end()) + { + buf.fmtAppend(", end<{}, {}, {}>", window.frame().end().type(), window.frame().end().unbounded(), window.frame().end().offset()); + } + } + buf.append("}\n"); +} + +void serializeSort(const String & executor_id, const tipb::Sort & sort [[maybe_unused]], FmtBuffer & buf) +{ + buf.fmtAppend("{} | isPartialSort: {}, partition_by: {{", executor_id, sort.ispartialsort()); + buf.joinStr( + sort.byitems().begin(), + sort.byitems().end(), + [&](const auto & by, FmtBuffer & fb) { + fb.append("("); + serializeExpression(by.expr(), buf); + fb.fmtAppend(", desc: {})", by.desc()); + }, + ", "); + buf.append("}\n"); +} + void ExecutorSerializer::serialize(const tipb::Executor & root_executor, size_t level) { auto append_str = [&level, this](const tipb::Executor & executor) { @@ -248,9 +308,11 @@ void ExecutorSerializer::serialize(const tipb::Executor & root_executor, size_t serializeExchangeSender(executor.executor_id(), executor.exchange_sender(), buf); break; case tipb::ExecType::TypeSort: - throw TiFlashException("Sort executor is not supported", Errors::Coprocessor::Unimplemented); // todo support sort executor. + serializeSort(executor.executor_id(), executor.sort(), buf); + break; case tipb::ExecType::TypeWindow: - throw TiFlashException("Window executor is not supported", Errors::Coprocessor::Unimplemented); // todo support window executor. + serializeWindow(executor.executor_id(), executor.window(), buf); + break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); } diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index af939002cff..30d05786c9a 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -23,8 +23,6 @@ #include #include -#include - namespace DB::tests { ASTPtr buildColumn(const String & column_name) @@ -37,7 +35,7 @@ ASTPtr buildLiteral(const Field & field) return std::make_shared(field); } -ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) +ASTPtr buildOrderByItemVec(MockOrderByItemVec order_by_items) { std::vector vec(order_by_items.size()); size_t i = 0; @@ -54,6 +52,15 @@ ASTPtr buildOrderByItemList(MockOrderByItems order_by_items) return exp_list; } +MockWindowFrame buildDefaultRowsFrame() +{ + MockWindowFrame frame; + frame.type = tipb::WindowFrameType::Rows; + frame.end = {tipb::WindowBoundType::CurrentRow, false, 0}; + frame.start = {tipb::WindowBoundType::CurrentRow, false, 0}; + return frame; +} + // a mock DAGRequest should prepare its time_zone, flags, encode_type and output_schema. void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) { @@ -85,7 +92,7 @@ std::shared_ptr DAGRequestBuilder::build(MockDAGRequestContext return dag_request_ptr; } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String & table, const MockColumnInfoVec & columns) { assert(!columns.empty()); TableInfo table_info; @@ -96,6 +103,9 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String TiDB::ColumnInfo ret; ret.tp = column.second; ret.name = column.first; + // TODO: find a way to assign decimal field's flen. + if (ret.tp == TiDB::TP::TypeNewDecimal) + ret.flen = 65; ret.id = i++; table_info.columns.push_back(std::move(ret)); } @@ -104,27 +114,17 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String return *this; } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoVec & columns) { return mockTable(name.first, name.second, columns); } -DAGRequestBuilder & DAGRequestBuilder::mockTable(const MockTableName & name, const MockColumnInfoList & columns) +DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count) { - return mockTable(name.first, name.second, columns); + return buildExchangeReceiver(columns, fine_grained_shuffle_stream_count); } -DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfos & columns) -{ - return buildExchangeReceiver(columns); -} - -DAGRequestBuilder & DAGRequestBuilder::exchangeReceiver(const MockColumnInfoList & columns) -{ - return buildExchangeReceiver(columns); -} - -DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInfos & columns) +DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count) { DAGSchema schema; for (const auto & column : columns) @@ -135,7 +135,7 @@ DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInf schema.push_back({column.first, info}); } - root = compileExchangeReceiver(getExecutorIndex(), schema); + root = compileExchangeReceiver(getExecutorIndex(), schema, fine_grained_shuffle_stream_count); return *this; } @@ -170,33 +170,23 @@ DAGRequestBuilder & DAGRequestBuilder::topN(ASTPtr order_exprs, ASTPtr limit_exp DAGRequestBuilder & DAGRequestBuilder::topN(const String & col_name, bool desc, int limit) { assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemList({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); return *this; } -DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, int limit) +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, int limit) { return topN(order_by_items, buildLiteral(Field(static_cast(limit)))); } -DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItems order_by_items, ASTPtr limit_expr) +DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, ASTPtr limit_expr) { assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemList(order_by_items), limit_expr); + root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec(order_by_items), limit_expr); return *this; } -DAGRequestBuilder & DAGRequestBuilder::project(const String & col_name) -{ - assert(root); - auto exp_list = std::make_shared(); - exp_list->children.push_back(buildColumn(col_name)); - - root = compileProject(root, getExecutorIndex(), exp_list); - return *this; -} - -DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) +DAGRequestBuilder & DAGRequestBuilder::project(MockAstVec exprs) { assert(root); auto exp_list = std::make_shared(); @@ -208,7 +198,7 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockAsts exprs) return *this; } -DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNames col_names) +DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNameVec col_names) { assert(root); auto exp_list = std::make_shared(); @@ -227,12 +217,12 @@ DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchang return *this; } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs) { return join(right, exprs, ASTTableJoin::Kind::Inner); } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind) { assert(root); assert(right.root); @@ -258,7 +248,7 @@ DAGRequestBuilder & DAGRequestBuilder::aggregation(ASTPtr agg_func, ASTPtr group return buildAggregation(agg_funcs, group_by_exprs); } -DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAsts agg_funcs, MockAsts group_by_exprs) +DAGRequestBuilder & DAGRequestBuilder::aggregation(MockAstVec agg_funcs, MockAstVec group_by_exprs) { auto agg_func_list = std::make_shared(); auto group_by_expr_list = std::make_shared(); @@ -276,41 +266,61 @@ DAGRequestBuilder & DAGRequestBuilder::buildAggregation(ASTPtr agg_funcs, ASTPtr return *this; } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos) +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { - std::vector v_column_info(columnInfos.size()); - size_t i = 0; - for (const auto & info : columnInfos) - { - v_column_info[i++] = std::move(info); - } - mock_tables[name.first + "." + name.second] = v_column_info; + assert(root); + auto window_func_list = std::make_shared(); + window_func_list->children.push_back(window_func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec({partition_by}), buildOrderByItemVec({order_by}), frame, fine_grained_shuffle_stream_count); + return *this; } -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos) +DAGRequestBuilder & DAGRequestBuilder::window(ASTPtr window_func, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { - mock_tables[db + "." + table] = columnInfos; + assert(root); + auto window_func_list = std::make_shared(); + window_func_list->children.push_back(window_func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec(partition_by_vec), buildOrderByItemVec(order_by_vec), frame, fine_grained_shuffle_stream_count); + return *this; } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos) +DAGRequestBuilder & DAGRequestBuilder::window(MockAstVec window_funcs, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) { - mock_tables[name.first + "." + name.second] = columnInfos; + assert(root); + auto window_func_list = std::make_shared(); + for (const auto & func : window_funcs) + window_func_list->children.push_back(func); + root = compileWindow(root, getExecutorIndex(), window_func_list, buildOrderByItemVec(partition_by_vec), buildOrderByItemVec(order_by_vec), frame, fine_grained_shuffle_stream_count); + return *this; } -void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfos & columnInfos) +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) { - exchange_schemas[name] = columnInfos; + assert(root); + root = compileSort(root, getExecutorIndex(), buildOrderByItemVec({order_by}), is_partial_sort, fine_grained_shuffle_stream_count); + return *this; } -void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfoList & columnInfos) +DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) { - std::vector v_column_info(columnInfos.size()); - size_t i = 0; - for (const auto & info : columnInfos) - { - v_column_info[i++] = std::move(info); - } - exchange_schemas[name] = v_column_info; + assert(root); + root = compileSort(root, getExecutorIndex(), buildOrderByItemVec(order_by_vec), is_partial_sort, fine_grained_shuffle_stream_count); + return *this; +} + +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos) +{ + mock_tables[db + "." + table] = columnInfos; +} + +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos) +{ + mock_tables[name.first + "." + name.second] = columnInfos; +} + +void MockDAGRequestContext::addExchangeRelationSchema(String name, const MockColumnInfoVec & columnInfos) +{ + exchange_schemas[name] = columnInfos; } void MockDAGRequestContext::addMockTableColumnData(const String & db, const String & table, ColumnsWithTypeAndName columns) @@ -328,37 +338,19 @@ void MockDAGRequestContext::addExchangeReceiverColumnData(const String & name, C mock_exchange_columns[name] = columns; } -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns) -{ - addMockTable(db, table, columnInfos); - addMockTableColumnData(db, table, columns); -} - -void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns) { addMockTable(db, table, columnInfos); addMockTableColumnData(db, table, columns); } -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns) -{ - addMockTable(name, columnInfos); - addMockTableColumnData(name, columns); -} - -void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns) { addMockTable(name, columnInfos); addMockTableColumnData(name, columns); } -void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfos columnInfos, ColumnsWithTypeAndName columns) -{ - addExchangeRelationSchema(name, columnInfos); - addExchangeReceiverColumnData(name, columns); -} - -void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfoList columnInfos, ColumnsWithTypeAndName columns) +void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnInfoVec columnInfos, ColumnsWithTypeAndName columns) { addExchangeRelationSchema(name, columnInfos); addExchangeReceiverColumnData(name, columns); @@ -376,9 +368,9 @@ DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name) return builder; } -DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) +DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count) { - auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name]); + auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name], fine_grained_shuffle_stream_count); receiver_source_task_ids_map[builder.getRoot()->name] = {}; // If don't have related columns, user must pass input columns as argument of executeStreams in order to run Executors Tests. // If user don't want to test executors, it will be safe to run Interpreter Tests. @@ -388,4 +380,4 @@ DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name) } return builder; } -} // namespace DB::tests \ No newline at end of file +} // namespace DB::tests diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 95551cdfc9e..8b5a6d300ff 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -20,19 +20,18 @@ #include #include -#include -#include - namespace DB::tests { using MockColumnInfo = std::pair; -using MockColumnInfos = std::vector; -using MockColumnInfoList = std::initializer_list; +using MockColumnInfoVec = std::vector; using MockTableName = std::pair; using MockOrderByItem = std::pair; -using MockOrderByItems = std::initializer_list; -using MockColumnNames = std::initializer_list; -using MockAsts = std::initializer_list; +using MockOrderByItemVec = std::vector; +using MockPartitionByItem = std::pair; +using MockPartitionByItemVec = std::vector; +using MockColumnNameVec = std::vector; +using MockAstVec = std::vector; +using MockWindowFrame = mock::MockWindowFrame; class MockDAGRequestContext; @@ -64,12 +63,10 @@ class DAGRequestBuilder std::shared_ptr build(MockDAGRequestContext & mock_context); - DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfos & columns); - DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfos & columns); - DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoList & columns); + DAGRequestBuilder & mockTable(const String & db, const String & table, const MockColumnInfoVec & columns); + DAGRequestBuilder & mockTable(const MockTableName & name, const MockColumnInfoVec & columns); - DAGRequestBuilder & exchangeReceiver(const MockColumnInfos & columns); - DAGRequestBuilder & exchangeReceiver(const MockColumnInfoList & columns); + DAGRequestBuilder & exchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count = 0); DAGRequestBuilder & filter(ASTPtr filter_expr); @@ -78,28 +75,34 @@ class DAGRequestBuilder DAGRequestBuilder & topN(ASTPtr order_exprs, ASTPtr limit_expr); DAGRequestBuilder & topN(const String & col_name, bool desc, int limit); - DAGRequestBuilder & topN(MockOrderByItems order_by_items, int limit); - DAGRequestBuilder & topN(MockOrderByItems order_by_items, ASTPtr limit_expr); + DAGRequestBuilder & topN(MockOrderByItemVec order_by_items, int limit); + DAGRequestBuilder & topN(MockOrderByItemVec order_by_items, ASTPtr limit_expr); - DAGRequestBuilder & project(const String & col_name); - DAGRequestBuilder & project(MockAsts expr); - DAGRequestBuilder & project(MockColumnNames col_names); + DAGRequestBuilder & project(MockAstVec exprs); + DAGRequestBuilder & project(MockColumnNameVec col_names); DAGRequestBuilder & exchangeSender(tipb::ExchangeType exchange_type); - // Currentlt only support inner join, left join and right join. + // Currently only support inner join, left join and right join. // TODO support more types of join. - DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs); - DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAsts exprs, ASTTableJoin::Kind kind); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind); // aggregation DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); - DAGRequestBuilder & aggregation(MockAsts agg_funcs, MockAsts group_by_exprs); + DAGRequestBuilder & aggregation(MockAstVec agg_funcs, MockAstVec group_by_exprs); + + // window + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItem order_by, MockPartitionByItem partition_by, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & window(MockAstVec window_funcs, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & window(ASTPtr window_func, MockOrderByItemVec order_by_vec, MockPartitionByItemVec partition_by_vec, MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); + DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); private: void initDAGRequest(tipb::DAGRequest & dag_request); DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); - DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfos & columns); + DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count = 0); ExecutorPtr root; DAGProperties properties; @@ -123,30 +126,25 @@ class MockDAGRequestContext return DAGRequestBuilder(index); } - void addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos); - void addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos); - void addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos); - void addExchangeRelationSchema(String name, const MockColumnInfos & columnInfos); - void addExchangeRelationSchema(String name, const MockColumnInfoList & columnInfos); + void addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos); + void addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos); + void addExchangeRelationSchema(String name, const MockColumnInfoVec & columnInfos); void addMockTableColumnData(const String & db, const String & table, ColumnsWithTypeAndName columns); - void addMockTable(const String & db, const String & table, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const String & db, const String & table, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const MockTableName & name, const MockColumnInfoList & columnInfos, ColumnsWithTypeAndName columns); - void addMockTable(const MockTableName & name, const MockColumnInfos & columnInfos, ColumnsWithTypeAndName columns); + void addMockTable(const String & db, const String & table, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns); + void addMockTable(const MockTableName & name, const MockColumnInfoVec & columnInfos, ColumnsWithTypeAndName columns); void addMockTableColumnData(const MockTableName & name, ColumnsWithTypeAndName columns); void addExchangeReceiverColumnData(const String & name, ColumnsWithTypeAndName columns); - void addExchangeReceiver(const String & name, MockColumnInfos columnInfos, ColumnsWithTypeAndName columns); - void addExchangeReceiver(const String & name, MockColumnInfoList columnInfos, ColumnsWithTypeAndName columns); + void addExchangeReceiver(const String & name, MockColumnInfoVec columnInfos, ColumnsWithTypeAndName columns); std::unordered_map & executorIdColumnsMap() { return executor_id_columns_map; } DAGRequestBuilder scan(String db_name, String table_name); - DAGRequestBuilder receive(String exchange_name); + DAGRequestBuilder receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count = 0); private: size_t index; - std::unordered_map mock_tables; - std::unordered_map exchange_schemas; + std::unordered_map mock_tables; + std::unordered_map exchange_schemas; std::unordered_map mock_table_columns; std::unordered_map mock_exchange_columns; std::unordered_map executor_id_columns_map; /// @@ -161,8 +159,10 @@ class MockDAGRequestContext ASTPtr buildColumn(const String & column_name); ASTPtr buildLiteral(const Field & field); -ASTPtr buildFunction(MockAsts exprs, const String & name); -ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); +ASTPtr buildFunction(MockAstVec exprs, const String & name); +ASTPtr buildOrderByItemVec(MockOrderByItemVec order_by_items); + +MockWindowFrame buildDefaultRowsFrame(); #define col(name) buildColumn((name)) #define lit(field) buildLiteral((field)) @@ -173,7 +173,12 @@ ASTPtr buildOrderByItemList(MockOrderByItems order_by_items); #define gt(expr1, expr2) makeASTFunction("greater", (expr1), (expr2)) #define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) -#define NOT(expr) makeASTFunction("not", (expr1), (expr2)) -#define Max(expr) makeASTFunction("max", expr) - -} // namespace DB::tests \ No newline at end of file +#define NOT(expr) makeASTFunction("not", (expr)) +#define Max(expr) makeASTFunction("max", (expr)) +#define Sum(expr) makeASTFunction("sum", (expr)) +/// Window functions +#define RowNumber() makeASTFunction("RowNumber") +#define Rank() makeASTFunction("Rank") +#define DenseRank() makeASTFunction("DenseRank") + +} // namespace DB::tests diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 214148fe47f..72f0bb505d1 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -76,7 +76,7 @@ TEST_F(MockDAGRequestTest, Projection) try { auto request = context.scan("test_db", "test_table") - .project("s1") + .project({"s1"}) .build(context); { String expected = "project_1 | {<0, String>}\n" @@ -252,5 +252,17 @@ try } CATCH +TEST_F(MockDAGRequestTest, MockWindow) +try +{ + auto request = context.scan("test_db", "test_table").sort({"s1", false}, true).window(RowNumber(), {"s1", true}, {"s2", false}, buildDefaultRowsFrame()).build(context); + { + String expected = "window_2 | partition_by: {(<1, String>, desc: false)}}, order_by: {(<0, String>, desc: true)}, func_desc: {row_number()}, frame: {start<2, false, 0>, end<2, false, 0>}\n" + " sort_1 | isPartialSort: true, partition_by: {(<0, String>, desc: false)}\n" + " table_scan_0 | {<0, String>, <1, String>}\n"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } +} +CATCH } // namespace tests } // namespace DB \ No newline at end of file diff --git a/dbms/src/TestUtils/tests/gtest_print_columns.cpp b/dbms/src/TestUtils/tests/gtest_print_columns.cpp new file mode 100644 index 00000000000..50631fc4f4a --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_print_columns.cpp @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class PrintColumnsTest : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + test_cols.push_back(toNullableVec("col1", ColumnWithInt32{36, 34, 32, 27, {}, {}})); + test_cols.push_back(toNullableVec("col2", ColumnWithString{"female", "male", "male", "female", "male", "female"})); + col_len = test_cols[0].column->size(); + } + + ColumnsWithTypeAndName test_cols; + size_t col_len; + const String result1{"col1: (0: Int64_36, 1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL, 5: NULL)\ncol2: (0: 'female', 1: 'male', 2: 'male', 3: 'female', 4: 'male', 5: 'female')\n"}; + const String result2{"col1: (0: Int64_36, 1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL, 5: NULL)\ncol2: (0: 'female', 1: 'male', 2: 'male', 3: 'female', 4: 'male', 5: 'female')\n"}; + const String result3{"col1: (0: Int64_36)\ncol2: (0: 'female')\n"}; + const String result4{"col1: (1: Int64_34, 2: Int64_32, 3: Int64_27, 4: NULL)\ncol2: (1: 'male', 2: 'male', 3: 'female', 4: 'male')\n"}; +}; + +TEST_F(PrintColumnsTest, SimpleTest) +try +{ + EXPECT_EQ(getColumnsContent(test_cols), result1); + EXPECT_EQ(getColumnsContent(test_cols, 0, col_len - 1), result2); + EXPECT_EQ(getColumnsContent(test_cols, 0, 0), result3); + EXPECT_EQ(getColumnsContent(test_cols, 1, col_len - 2), result4); +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TiDB/Schema/SchemaBuilder-internal.h b/dbms/src/TiDB/Schema/SchemaBuilder-internal.h index a331205ce8c..94edcbea204 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder-internal.h +++ b/dbms/src/TiDB/Schema/SchemaBuilder-internal.h @@ -35,7 +35,7 @@ struct TableInfo; } namespace DB { -std::tuple parseColumnsFromTableInfo(const TiDB::TableInfo & table_info, Poco::Logger * log); +std::tuple parseColumnsFromTableInfo(const TiDB::TableInfo & table_info); constexpr char tmpNamePrefix[] = "_tiflash_tmp_"; diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp index 99e540e6c95..6e4ad10e344 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.cpp +++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -320,7 +321,7 @@ inline SchemaChanges detectSchemaChanges( } template -void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyAlterPhysicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { LOG_FMT_INFO(log, "Altering table {}", name_mapper.debugCanonicalName(*db_info, *table_info)); @@ -370,7 +371,15 @@ void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_inf const auto & schema_change = schema_changes[i]; /// Update column infos by applying schema change in this step. schema_change.second(orig_table_info); - /// Update schema version aggressively for the sake of correctness. + /// Update schema version aggressively for the sake of correctness(for read part). + /// In read action, we will use table_info.schema_version(storage_version) and TiDBSchemaSyncer.cur_version(global_version) to compare with query_version, to decide whether we can read under this query_version, or we need to make the schema newer. + /// In our comparison logic, we only serve the query when the query schema version meet the criterion: storage_version <= query_version <= global_version(The more detail info you can refer the comments in DAGStorageInterpreter::getAndLockStorages.) + /// And when apply multi diffs here, we only update global_version when all diffs have been applied. + /// So the global_version may be less than the actual "global_version" of the local schema in the process of applying schema changes. + /// And if we don't update the storage_version ahead of time, we may meet the following case when apply multiple diffs: storage_version <= global_version < actual "global_version". + /// If we receive a query with the same version as global_version, we can have the following scenario: storage_version <= global_version == query_version < actual "global_version". + /// And because storage_version <= global_version == query_version meet the criterion of serving the query, the query will be served. But query_version < actual "global_version" indicates that we use a newer schema to server an older query which may cause some inconsistency issue. + /// So we update storage_version aggressively to prevent the above scenario happens. orig_table_info.schema_version = target_version; auto alter_lock = storage->lockForAlter(getThreadName()); storage->alterFromTiDB( @@ -386,7 +395,7 @@ void SchemaBuilder::applyAlterPhysicalTable(DBInfoPtr db_inf } template -void SchemaBuilder::applyAlterTable(DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyAlterTable(const DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -405,7 +414,7 @@ void SchemaBuilder::applyAlterTable(DBInfoPtr db_info, Table } template -void SchemaBuilder::applyAlterLogicalTable(DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyAlterLogicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { // Alter logical table first. applyAlterPhysicalTable(db_info, table_info, storage); @@ -534,6 +543,11 @@ void SchemaBuilder::applyDiff(const SchemaDiff & diff) applySetTiFlashReplica(db_info, diff.table_id); break; } + case SchemaActionType::SetTiFlashMode: + { + applySetTiFlashMode(db_info, diff.table_id); + break; + } default: { if (diff.type < SchemaActionType::MaxRecognizedType) @@ -561,7 +575,7 @@ void SchemaBuilder::applyDiff(const SchemaDiff & diff) } template -void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyPartitionDiff(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -585,7 +599,7 @@ void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_in } template -void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_info, TableInfoPtr table_info, ManageableStoragePtr storage) +void SchemaBuilder::applyPartitionDiff(const TiDB::DBInfoPtr & db_info, const TableInfoPtr & table_info, const ManageableStoragePtr & storage) { const auto & orig_table_info = storage->getTableInfo(); if (!orig_table_info.isLogicalPartitionTable()) @@ -651,7 +665,7 @@ void SchemaBuilder::applyPartitionDiff(TiDB::DBInfoPtr db_in } template -void SchemaBuilder::applyRenameTable(DBInfoPtr new_db_info, TableID table_id) +void SchemaBuilder::applyRenameTable(const DBInfoPtr & new_db_info, TableID table_id) { auto new_table_info = getter.getTableInfo(new_db_info->id, table_id); if (new_table_info == nullptr) @@ -671,9 +685,9 @@ void SchemaBuilder::applyRenameTable(DBInfoPtr new_db_info, template void SchemaBuilder::applyRenameLogicalTable( - DBInfoPtr new_db_info, - TableInfoPtr new_table_info, - ManageableStoragePtr storage) + const DBInfoPtr & new_db_info, + const TableInfoPtr & new_table_info, + const ManageableStoragePtr & storage) { applyRenamePhysicalTable(new_db_info, *new_table_info, storage); @@ -695,9 +709,9 @@ void SchemaBuilder::applyRenameLogicalTable( template void SchemaBuilder::applyRenamePhysicalTable( - DBInfoPtr new_db_info, - TableInfo & new_table_info, - ManageableStoragePtr storage) + const DBInfoPtr & new_db_info, + const TableInfo & new_table_info, + const ManageableStoragePtr & storage) { const auto old_mapped_db_name = storage->getDatabaseName(); const auto new_mapped_db_name = name_mapper.mapDatabaseName(*new_db_info); @@ -900,7 +914,7 @@ String createDatabaseStmt(Context & context, const DBInfo & db_info, const Schem } template -void SchemaBuilder::applyCreateSchema(TiDB::DBInfoPtr db_info) +void SchemaBuilder::applyCreateSchema(const TiDB::DBInfoPtr & db_info) { GET_METRIC(tiflash_schema_internal_ddl_count, type_create_db).Increment(); LOG_FMT_INFO(log, "Creating database {}", name_mapper.debugDatabaseName(*db_info)); @@ -963,13 +977,12 @@ void SchemaBuilder::applyDropSchema(const String & db_name) } std::tuple -parseColumnsFromTableInfo(const TiDB::TableInfo & table_info, Poco::Logger * log) +parseColumnsFromTableInfo(const TiDB::TableInfo & table_info) { NamesAndTypes columns; std::vector primary_keys; for (const auto & column : table_info.columns) { - LOG_FMT_DEBUG(log, "Analyzing column: {}, type: {}", column.name, static_cast(column.tp)); DataTypePtr type = getDataTypeByColumnInfo(column); columns.emplace_back(column.name, type); if (column.hasPriKeyFlag()) @@ -999,7 +1012,7 @@ String createTableStmt( Poco::Logger * log) { LOG_FMT_DEBUG(log, "Analyzing table info : {}", table_info.serialize()); - auto [columns, pks] = parseColumnsFromTableInfo(table_info, log); + auto [columns, pks] = parseColumnsFromTableInfo(table_info); String stmt; WriteBufferFromString stmt_buf(stmt); @@ -1040,7 +1053,7 @@ String createTableStmt( } template -void SchemaBuilder::applyCreatePhysicalTable(DBInfoPtr db_info, TableInfoPtr table_info) +void SchemaBuilder::applyCreatePhysicalTable(const DBInfoPtr & db_info, const TableInfoPtr & table_info) { GET_METRIC(tiflash_schema_internal_ddl_count, type_create_table).Increment(); LOG_FMT_INFO(log, "Creating table {}", name_mapper.debugCanonicalName(*db_info, *table_info)); @@ -1102,7 +1115,7 @@ void SchemaBuilder::applyCreatePhysicalTable(DBInfoPtr db_in } template -void SchemaBuilder::applyCreateTable(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyCreateTable(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto table_info = getter.getTableInfo(db_info->id, table_id); if (table_info == nullptr) @@ -1116,7 +1129,7 @@ void SchemaBuilder::applyCreateTable(TiDB::DBInfoPtr db_info } template -void SchemaBuilder::applyCreateLogicalTable(TiDB::DBInfoPtr db_info, TableInfoPtr table_info) +void SchemaBuilder::applyCreateLogicalTable(const TiDB::DBInfoPtr & db_info, const TableInfoPtr & table_info) { if (table_info->isLogicalPartitionTable()) { @@ -1162,7 +1175,7 @@ void SchemaBuilder::applyDropPhysicalTable(const String & db } template -void SchemaBuilder::applyDropTable(DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applyDropTable(const DBInfoPtr & db_info, TableID table_id) { auto & tmt_context = context.getTMTContext(); auto * storage = tmt_context.getStorages().get(table_id).get(); @@ -1186,13 +1199,14 @@ void SchemaBuilder::applyDropTable(DBInfoPtr db_info, TableI } template -void SchemaBuilder::applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TableID table_id) +void SchemaBuilder::applySetTiFlashReplica(const TiDB::DBInfoPtr & db_info, TableID table_id) { auto latest_table_info = getter.getTableInfo(db_info->id, table_id); if (unlikely(latest_table_info == nullptr)) { throw TiFlashException(fmt::format("miss table in TiKV : {}", table_id), Errors::DDL::StaleSchema); } + auto & tmt_context = context.getTMTContext(); auto storage = tmt_context.getStorages().get(latest_table_info->id); if (unlikely(storage == nullptr)) @@ -1201,18 +1215,37 @@ void SchemaBuilder::applySetTiFlashReplica(TiDB::DBInfoPtr d Errors::DDL::MissingTable); } - auto managed_storage = std::dynamic_pointer_cast(storage); - if (unlikely(!managed_storage)) - throw Exception(fmt::format("{} is not a ManageableStorage", name_mapper.debugCanonicalName(*db_info, *latest_table_info))); + applySetTiFlashReplicaOnLogicalTable(db_info, latest_table_info, storage); +} - applySetTiFlashReplica(db_info, latest_table_info, managed_storage); +template +void SchemaBuilder::applySetTiFlashReplicaOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage) +{ + applySetTiFlashReplicaOnPhysicalTable(db_info, table_info, storage); + + if (table_info->isLogicalPartitionTable()) + { + auto & tmt_context = context.getTMTContext(); + + for (const auto & part_def : table_info->partition.definitions) + { + auto new_part_table_info = table_info->producePartitionTableInfo(part_def.id, name_mapper); + auto part_storage = tmt_context.getStorages().get(new_part_table_info->id); + if (unlikely(part_storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *new_part_table_info)), + Errors::DDL::MissingTable); + } + applySetTiFlashReplicaOnPhysicalTable(db_info, new_part_table_info, part_storage); + } + } } template -void SchemaBuilder::applySetTiFlashReplica( - TiDB::DBInfoPtr db_info, - TiDB::TableInfoPtr latest_table_info, - ManageableStoragePtr storage) +void SchemaBuilder::applySetTiFlashReplicaOnPhysicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & latest_table_info, + const ManageableStoragePtr & storage) { if (storage->getTableInfo().replica_info.count == latest_table_info->replica_info.count) return; @@ -1231,6 +1264,75 @@ void SchemaBuilder::applySetTiFlashReplica( LOG_FMT_INFO(log, "Updated replica info for {}", name_mapper.debugCanonicalName(*db_info, table_info)); } + +template +void SchemaBuilder::applySetTiFlashMode(const TiDB::DBInfoPtr & db_info, TableID table_id) +{ + auto latest_table_info = getter.getTableInfo(db_info->id, table_id); + + if (unlikely(latest_table_info == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiKV : {}", table_id), Errors::DDL::StaleSchema); + } + + auto & tmt_context = context.getTMTContext(); + auto storage = tmt_context.getStorages().get(latest_table_info->id); + if (unlikely(storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *latest_table_info)), + Errors::DDL::MissingTable); + } + + applySetTiFlashModeOnLogicalTable(db_info, latest_table_info, storage); +} + +template +void SchemaBuilder::applySetTiFlashModeOnLogicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & table_info, + const ManageableStoragePtr & storage) +{ + applySetTiFlashModeOnPhysicalTable(db_info, table_info, storage); + + if (table_info->isLogicalPartitionTable()) + { + auto & tmt_context = context.getTMTContext(); + for (const auto & part_def : table_info->partition.definitions) + { + auto new_part_table_info = table_info->producePartitionTableInfo(part_def.id, name_mapper); + auto part_storage = tmt_context.getStorages().get(table_info->id); + if (unlikely(part_storage == nullptr)) + { + throw TiFlashException(fmt::format("miss table in TiFlash : {}", name_mapper.debugCanonicalName(*db_info, *new_part_table_info)), + Errors::DDL::MissingTable); + } + applySetTiFlashModeOnPhysicalTable(db_info, new_part_table_info, part_storage); + } + } +} + + +template +void SchemaBuilder::applySetTiFlashModeOnPhysicalTable( + const TiDB::DBInfoPtr & db_info, + const TiDB::TableInfoPtr & latest_table_info, + const ManageableStoragePtr & storage) +{ + if (storage->getTableInfo().tiflash_mode == latest_table_info->tiflash_mode) + return; + + TiDB::TableInfo table_info = storage->getTableInfo(); + table_info.tiflash_mode = latest_table_info->tiflash_mode; + AlterCommands commands; + + LOG_FMT_INFO(log, "Updating tiflash mode for {} to {}", name_mapper.debugCanonicalName(*db_info, table_info), TiFlashModeToString(table_info.tiflash_mode)); + + auto alter_lock = storage->lockForAlter(getThreadName()); + storage->alterFromTiDB(alter_lock, commands, name_mapper.mapDatabaseName(*db_info), table_info, name_mapper, context); + LOG_FMT_INFO(log, "Updated tiflash mode for {} to {}", name_mapper.debugCanonicalName(*db_info, table_info), TiFlashModeToString(table_info.tiflash_mode)); +} + + template void SchemaBuilder::syncAllSchema() { @@ -1299,7 +1401,9 @@ void SchemaBuilder::syncAllSchema() /// Rename if needed. applyRenameLogicalTable(db, table, storage); /// Update replica info if needed. - applySetTiFlashReplica(db, table, storage); + applySetTiFlashReplicaOnLogicalTable(db, table, storage); + /// Update tiflash mode if needed. + applySetTiFlashModeOnLogicalTable(db, table, storage); /// Alter if needed. applyAlterLogicalTable(db, table, storage); LOG_FMT_DEBUG(log, "Table {} synced during sync all schemas", name_mapper.debugCanonicalName(*db, *table)); diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.h b/dbms/src/TiDB/Schema/SchemaBuilder.h index 8446765f74a..827203a682f 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.h +++ b/dbms/src/TiDB/Schema/SchemaBuilder.h @@ -55,39 +55,44 @@ struct SchemaBuilder bool applyCreateSchema(DatabaseID schema_id); - void applyCreateSchema(TiDB::DBInfoPtr db_info); + void applyCreateSchema(const TiDB::DBInfoPtr & db_info); - void applyCreateTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyCreateTable(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyCreateLogicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info); + void applyCreateLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info); - void applyCreatePhysicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info); + void applyCreatePhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info); - void applyDropTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyDropTable(const TiDB::DBInfoPtr & db_info, TableID table_id); /// Parameter schema_name should be mapped. void applyDropPhysicalTable(const String & db_name, TableID table_id); - void applyPartitionDiff(TiDB::DBInfoPtr db_info, TableID table_id); + void applyPartitionDiff(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyPartitionDiff(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyPartitionDiff(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyAlterTable(TiDB::DBInfoPtr db_info, TableID table_id); + void applyAlterTable(const TiDB::DBInfoPtr & db_info, TableID table_id); - void applyAlterLogicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyAlterLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyAlterPhysicalTable(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applyAlterPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); - void applyRenameTable(TiDB::DBInfoPtr new_db_info, TiDB::TableID table_id); + void applyRenameTable(const TiDB::DBInfoPtr & new_db_info, TiDB::TableID table_id); - void applyRenameLogicalTable(TiDB::DBInfoPtr new_db_info, TiDB::TableInfoPtr new_table_info, ManageableStoragePtr storage); + void applyRenameLogicalTable(const TiDB::DBInfoPtr & new_db_info, const TiDB::TableInfoPtr & new_table_info, const ManageableStoragePtr & storage); - void applyRenamePhysicalTable(TiDB::DBInfoPtr new_db_info, TiDB::TableInfo & new_table_info, ManageableStoragePtr storage); + void applyRenamePhysicalTable(const TiDB::DBInfoPtr & new_db_info, const TiDB::TableInfo & new_table_info, const ManageableStoragePtr & storage); void applyExchangeTablePartition(const SchemaDiff & diff); - void applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TableID table_id); - void applySetTiFlashReplica(TiDB::DBInfoPtr db_info, TiDB::TableInfoPtr table_info, ManageableStoragePtr storage); + void applySetTiFlashReplica(const TiDB::DBInfoPtr & db_info, TableID table_id); + void applySetTiFlashReplicaOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + void applySetTiFlashReplicaOnPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + + void applySetTiFlashMode(const TiDB::DBInfoPtr & db_info, TableID table_id); + void applySetTiFlashModeOnLogicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); + void applySetTiFlashModeOnPhysicalTable(const TiDB::DBInfoPtr & db_info, const TiDB::TableInfoPtr & table_info, const ManageableStoragePtr & storage); }; } // namespace DB diff --git a/dbms/src/TiDB/Schema/SchemaGetter.cpp b/dbms/src/TiDB/Schema/SchemaGetter.cpp index 7f52f9301b1..6e333d6ba87 100644 --- a/dbms/src/TiDB/Schema/SchemaGetter.cpp +++ b/dbms/src/TiDB/Schema/SchemaGetter.cpp @@ -19,7 +19,6 @@ namespace DB { - namespace ErrorCodes { extern const int SCHEMA_SYNC_ERROR; @@ -188,18 +187,26 @@ Int64 SchemaGetter::getVersion() return std::stoll(ver); } +bool SchemaGetter::checkSchemaDiffExists(Int64 ver) +{ + String key = getSchemaDiffKey(ver); + String data = TxnStructure::get(snap, key); + return !data.empty(); +} + String SchemaGetter::getSchemaDiffKey(Int64 ver) { return std::string(schemaDiffPrefix) + ":" + std::to_string(ver); } -SchemaDiff SchemaGetter::getSchemaDiff(Int64 ver) +std::optional SchemaGetter::getSchemaDiff(Int64 ver) { String key = getSchemaDiffKey(ver); String data = TxnStructure::get(snap, key); if (data.empty()) { - throw TiFlashException("cannot find schema diff for version: " + std::to_string(ver), Errors::Table::SyncError); + LOG_FMT_WARNING(log, "The schema diff for version {}, key {} is empty.", ver, key); + return std::nullopt; } SchemaDiff diff; diff.deserialize(data); diff --git a/dbms/src/TiDB/Schema/SchemaGetter.h b/dbms/src/TiDB/Schema/SchemaGetter.h index cfa5e1c6335..72fd00678f7 100644 --- a/dbms/src/TiDB/Schema/SchemaGetter.h +++ b/dbms/src/TiDB/Schema/SchemaGetter.h @@ -26,8 +26,11 @@ #include +#include + namespace DB { +// The enum results are completely the same as the DDL Action listed in the "parser/model/ddl.go" of TiDB codebase, which must be keeping in sync. enum class SchemaActionType : Int8 { None = 0, @@ -91,11 +94,14 @@ enum class SchemaActionType : Int8 AlterTableStatsOptions = 58, AlterNoCacheTable = 59, CreateTables = 60, + ActionMultiSchemaChange = 61, + SetTiFlashMode = 62, + // If we supporte new type from TiDB. // MaxRecognizedType also needs to be changed. // It should always be equal to the maximum supported type + 1 - MaxRecognizedType = 61, + MaxRecognizedType = 63, }; struct AffectedOption @@ -137,7 +143,9 @@ struct SchemaGetter Int64 getVersion(); - SchemaDiff getSchemaDiff(Int64 ver); + bool checkSchemaDiffExists(Int64 ver); + + std::optional getSchemaDiff(Int64 ver); static String getSchemaDiffKey(Int64 ver); diff --git a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h index 4fdba195acb..a23aeab139f 100644 --- a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h +++ b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h @@ -106,21 +106,31 @@ struct TiDBSchemaSyncer : public SchemaSyncer Stopwatch watch; SCOPE_EXIT({ GET_METRIC(tiflash_schema_apply_duration_seconds).Observe(watch.elapsedSeconds()); }); - LOG_FMT_INFO(log, "start to sync schemas. current version is: {} and try to sync schema version to: {}", cur_version, version); + LOG_FMT_INFO(log, "Start to sync schemas. current version is: {} and try to sync schema version to: {}", cur_version, version); // Show whether the schema mutex is held for a long time or not. GET_METRIC(tiflash_schema_applying).Set(1.0); SCOPE_EXIT({ GET_METRIC(tiflash_schema_applying).Set(0.0); }); GET_METRIC(tiflash_schema_apply_count, type_diff).Increment(); - if (!tryLoadSchemaDiffs(getter, version, context)) + // After the feature concurrent DDL, TiDB does `update schema version` before `set schema diff`, and they are done in separate transactions. + // So TiFlash may see a schema version X but no schema diff X, meaning that the transaction of schema diff X has not been committed or has + // been aborted. + // However, TiDB makes sure that if we get a schema version X, then the schema diff X-1 must exist. Otherwise the transaction of schema diff + // X-1 is aborted and we can safely ignore it. + // Since TiDB can not make sure the schema diff of the latest schema version X is not empty, under this situation we should set the `cur_version` + // to X-1 and try to fetch the schema diff X next time. + Int64 version_after_load_diff = 0; + if (version_after_load_diff = tryLoadSchemaDiffs(getter, version, context); version_after_load_diff == -1) { GET_METRIC(tiflash_schema_apply_count, type_full).Increment(); loadAllSchema(getter, version, context); + // After loadAllSchema, we need update `version_after_load_diff` by last diff value exist or not + version_after_load_diff = getter.checkSchemaDiffExists(version) ? version : version - 1; } - cur_version = version; + cur_version = version_after_load_diff; GET_METRIC(tiflash_schema_version).Set(cur_version); - LOG_FMT_INFO(log, "end sync schema, version has been updated to {}", cur_version); + LOG_FMT_INFO(log, "End sync schema, version has been updated to {}{}", cur_version, cur_version == version ? "" : "(latest diff is empty)"); return true; } @@ -144,30 +154,60 @@ struct TiDBSchemaSyncer : public SchemaSyncer return it->second; } - bool tryLoadSchemaDiffs(Getter & getter, Int64 version, Context & context) + // Return Values + // - if latest schema diff is not empty, return the (latest_version) + // - if latest schema diff is empty, return the (latest_version - 1) + // - if error happend, return (-1) + Int64 tryLoadSchemaDiffs(Getter & getter, Int64 latest_version, Context & context) { - if (isTooOldSchema(cur_version, version)) + if (isTooOldSchema(cur_version, latest_version)) { - return false; + return -1; } - LOG_FMT_DEBUG(log, "try load schema diffs."); + LOG_FMT_DEBUG(log, "Try load schema diffs."); - SchemaBuilder builder(getter, context, databases, version); + SchemaBuilder builder(getter, context, databases, latest_version); Int64 used_version = cur_version; - std::vector diffs; - while (used_version < version) + // First get all schema diff from `cur_version` to `latest_version`. Only apply the schema diff(s) if we fetch all + // schema diff without any exception. + std::vector> diffs; + while (used_version < latest_version) { used_version++; diffs.push_back(getter.getSchemaDiff(used_version)); } - LOG_FMT_DEBUG(log, "end load schema diffs with total {} entries.", diffs.size()); + LOG_FMT_DEBUG(log, "End load schema diffs with total {} entries.", diffs.size()); + try { - for (const auto & diff : diffs) + for (size_t diff_index = 0; diff_index < diffs.size(); ++diff_index) { - builder.applyDiff(diff); + const auto & schema_diff = diffs[diff_index]; + + if (!schema_diff) + { + // If `schema diff` from `latest_version` got empty `schema diff` + // Then we won't apply to `latest_version`, but we will apply to `latest_version - 1` + // If `schema diff` from [`cur_version`, `latest_version - 1`] got empty `schema diff` + // Then we should just skip it. + // + // example: + // - `cur_version` is 1, `latest_version` is 10 + // - The schema diff of schema version [2,4,6] is empty, Then we just skip it. + // - The schema diff of schema version 10 is empty, Then we should just apply version into 9 + if (diff_index != diffs.size() - 1) + { + LOG_FMT_WARNING(log, "Skip the schema diff from version {}. ", cur_version + diff_index + 1); + continue; + } + + // if diff_index == diffs.size() - 1, return used_version - 1; + return used_version - 1; + } + + builder.applyDiff(*schema_diff); } } catch (TiFlashException & e) @@ -177,7 +217,7 @@ struct TiDBSchemaSyncer : public SchemaSyncer GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); } LOG_FMT_WARNING(log, "apply diff meets exception : {} \n stack is {}", e.displayText(), e.getStackTrace().toString()); - return false; + return -1; } catch (Exception & e) { @@ -187,21 +227,22 @@ struct TiDBSchemaSyncer : public SchemaSyncer } GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {} \n stack is {}", e.displayText(), e.getStackTrace().toString()); - return false; + return -1; } catch (Poco::Exception & e) { GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {}", e.displayText()); - return false; + return -1; } catch (std::exception & e) { GET_METRIC(tiflash_schema_apply_count, type_failed).Increment(); LOG_FMT_WARNING(log, "apply diff meets exception : {}", e.what()); - return false; + return -1; } - return true; + + return used_version; } void loadAllSchema(Getter & getter, Int64 version, Context & context) diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp index e4205f6f938..06253cac66e 100644 --- a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp @@ -12,334 +12,180 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include -#include -#include +#include namespace DB::tests { -class WindowFunction : public DB::tests::FunctionTest +class WindowExecutorTestRunner : public DB::tests::ExecutorTest { -protected: - std::shared_ptr mock_interpreter; - - void SetUp() override - { - DB::tests::FunctionTest::SetUp(); - DB::registerWindowFunctions(); - } - - template - ColumnWithTypeAndName toNullableVec(String name, const std::vector::FieldType>> & v) - { - return createColumn>(v, name); - } - - template - ColumnWithTypeAndName toVec(String name, const std::vector::FieldType> & v) - { - return createColumn(v, name); - } - - template - static ColumnWithTypeAndName toConst(const T s) - { - return createConstColumn(1, s); - } - - static ColumnWithTypeAndName toDatetimeVec(String name, const std::vector & v, int fsp) - { - std::vector::FieldType> vec; - for (const auto & value_str : v) - { - Field value = parseMyDateTime(value_str, fsp); - vec.push_back(value.template safeGet()); - } - DataTypePtr data_type = std::make_shared(fsp); - return {makeColumn(data_type, vec), data_type, name, 0}; - } - - static ColumnWithTypeAndName toNullableDatetimeVec(String name, const std::vector & v, int fsp) - { - std::vector::FieldType>> vec; - for (const auto & value_str : v) - { - if (!value_str.empty()) - { - Field value = parseMyDateTime(value_str, fsp); - vec.push_back(value.template safeGet()); - } - else - { - vec.push_back({}); - } - } - DataTypePtr data_type = makeNullable(std::make_shared(fsp)); - return {makeColumn>(data_type, vec), data_type, name, 0}; - } - - void setMaxBlockSize(int size) - { - context.getSettingsRef().max_block_size.set(size); - } - - void mockInterpreter(std::vector source_columns, Context context) - { - std::vector mock_input_streams_vec = {}; - DAGQueryBlock mock_query_block(0, static_cast>(nullptr)); - std::vector mock_subqueries_for_sets = {}; - mock_interpreter = std::make_shared(context, - mock_input_streams_vec, - mock_query_block, - 1); - - mock_interpreter->analyzer = std::make_unique(std::move(source_columns), context); - } - - void mockExecuteTableScan(DAGPipeline & pipeline, ColumnsWithTypeAndName columns) - { - pipeline.streams.push_back(std::make_shared(columns, context.getSettingsRef().max_block_size)); - mock_interpreter->input_streams_vec.push_back(pipeline.streams); - } - - void mockExecuteWindowOrder(DAGPipeline & pipeline, std::string sort_json_str) +public: + void initializeContext() override { - tipb::Sort sort; - google::protobuf::util::JsonStringToMessage(sort_json_str, &sort); - mock_interpreter->handleWindowOrder(pipeline, sort); - mock_interpreter->input_streams_vec[0] = pipeline.streams; - NamesWithAliases final_project; - for (const auto & column : (*mock_interpreter->analyzer).source_columns) - { - final_project.push_back({column.name, ""}); - } - mockExecuteProject(pipeline, final_project); - } - - void mockExecuteWindow(DAGPipeline & pipeline, std::string window_json_str) - { - tipb::Window window; - google::protobuf::util::JsonStringToMessage(window_json_str, &window); - mock_interpreter->handleWindow(pipeline, window); - mock_interpreter->input_streams_vec[0] = pipeline.streams; - NamesWithAliases final_project; - for (const auto & column : (*mock_interpreter->analyzer).source_columns) - { - final_project.push_back({column.name, ""}); - } - mockExecuteProject(pipeline, final_project); - } - - void mockExecuteProject(DAGPipeline & pipeline, NamesWithAliases & final_project) - { - mock_interpreter->executeProject(pipeline, final_project); - } - - static Block mergeBlocks(Blocks blocks) - { - if (blocks.empty()) - { - return {}; - } - Block sample_block; - std::vector actual_cols; - - for (const auto & block : blocks) - { - if (!sample_block) - { - sample_block = block; - for (const auto & column : block.getColumnsWithTypeAndName()) - { - actual_cols.push_back(column.type->createColumn()); - } - } - - for (size_t i = 0; i < block.columns(); ++i) - { - for (size_t j = 0; j < block.rows(); ++j) - { - actual_cols[i]->insert((*(block.getColumnsWithTypeAndName())[i].column)[j]); - } - } - } - - ColumnsWithTypeAndName actual_columns; - - for (size_t i = 0; i < actual_cols.size(); ++i) - { - actual_columns.push_back({std::move(actual_cols[i]), sample_block.getColumnsWithTypeAndName()[i].type, sample_block.getColumnsWithTypeAndName()[i].name, sample_block.getColumnsWithTypeAndName()[i].column_id}); - } - return Block(actual_columns); - } - - void testOneWindowFunction(const std::vector & source_column_types, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, const std::string window_json_str, const std::string sort_json_str) - { - mockInterpreter(source_column_types, context); - DAGPipeline pipeline; - ExpressionActionsChain chain; - Block except_block(expect_columns); - - mockExecuteTableScan(pipeline, source_columns); - - mockExecuteWindowOrder(pipeline, sort_json_str); - - mockExecuteWindow(pipeline, window_json_str); - - auto stream = pipeline.firstStream(); - - Blocks actual_blocks; - while (Block block = stream->read()) - { - actual_blocks.push_back(block); - } - - Block actual_block = mergeBlocks(actual_blocks); - - if (actual_block) - { - // Check that input columns is properly split to many blocks - ASSERT_EQ(actual_blocks.size(), (actual_block.rows() - 1) / context.getSettingsRef().max_block_size + 1); - } - ASSERT_BLOCK_EQ(except_block, actual_block); + ExecutorTest::initializeContext(); + context.addMockTable( + {"test_db", "test_table"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); + context.addMockTable( + {"test_db", "test_table_string"}, + {{"partition", TiDB::TP::TypeString}, {"order", TiDB::TP::TypeString}}, + {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), + toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}); + + context.addMockTable( + {"test_db", "test_table_more_cols"}, + {{"partition1", TiDB::TP::TypeLongLong}, {"partition2", TiDB::TP::TypeLongLong}, {"order1", TiDB::TP::TypeLongLong}, {"order2", TiDB::TP::TypeLongLong}}, + {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), + toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}); + + context.addMockTable( + {"test_db", "test_table_float64"}, + {{"partition", TiDB::TP::TypeDouble}, {"order", TiDB::TP::TypeDouble}}, + {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}); + + context.addMockTable( + {"test_db", "test_table_datetime"}, + {{"partition", TiDB::TP::TypeDatetime}, {"order", TiDB::TP::TypeDatetime}}); + + context.addMockTable( + {"test_db", "test_table_for_rank"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); } }; -TEST_F(WindowFunction, testWindowFunctionByPartitionAndOrder) +TEST_F(WindowExecutorTestRunner, testWindowFunctionByPartitionAndOrder) try { - setMaxBlockSize(3); - - std::string window_json; - std::string sort_json; - /***** row_number with different types of input *****/ // int - sql : select *, row_number() over w1 from test1 window w1 as (partition by partition_int order by order_int) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAkMCV6NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + auto request = context + .scan("test_db", "test_table") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // null input - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, + executeStreamsWithSingleSource( + request, {toNullableVec("partition", {}), toNullableVec("order", {})}, - {}, - window_json, - sort_json); + {}); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}), + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGA8Nz57tP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"},{"tp":254,"flag":0,"flen":32,"decimal":0,"collate":46,"charset":"utf8mb4"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}, - {toVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + request = context + .scan("test_db", "test_table_string") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); - // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, - {toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); - // decimal - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_decimal) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAoN3M99P+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"},{"tp":246,"flag":0,"flen":6,"decimal":2,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, - {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + // nullable + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}), + createColumns({toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); + + // float64 - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_float64) + request = context + .scan("test_db", "test_table_float64") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}), + createColumns({toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsNmBhdT+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"},{"tp":12,"flag":128,"flen":26,"decimal":6,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, - {toDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + request = context + .scan("test_db", "test_table_datetime") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), + createColumns({toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, - {toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})}, - window_json, - sort_json); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), + createColumns({toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); // 2 partiton key and 2 order key // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) - window_json = R"({"funcDesc":[{"tp":"RowNumber","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"frame":{"type":"Rows","start":{"type":"CurrentRow","unbounded":false,"offset":"0"},"end":{"type":"CurrentRow","unbounded":false,"offset":"0"}},"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAI=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAM=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIKA0Img1If/BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition1", std::make_shared()), NameAndTypePair("partition2", std::make_shared()), NameAndTypePair("order1", std::make_shared()), NameAndTypePair("order2", std::make_shared())}, - {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}, - {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), toVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), toVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})}, - window_json, - sort_json); + request = context + .scan("test_db", "test_table_more_cols") + .sort({{"partition1", false}, {"partition2", false}, {"order1", false}, {"order2", false}}, true) + .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) + .build(context); + + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), + toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); /***** rank, dense_rank *****/ - window_json = R"({"funcDesc":[{"tp":"Rank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false},{"tp":"DenseRank","sig":"Unspecified","fieldType":{"tp":8,"flag":128,"flen":21,"decimal":-1,"collate":63,"charset":"binary"},"hasDistinct":false}],"partitionBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"orderBy":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"child":{"tp":"TypeSort","executorId":"Sort_12","sort":{"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}}}})"; - sort_json = R"({"byItems":[{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAA=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false},{"expr":{"tp":"ColumnRef","val":"gAAAAAAAAAE=","sig":"Unspecified","fieldType":{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},"hasDistinct":false},"desc":false}],"isPartialSort":true,"child":{"tp":"TypeExchangeReceiver","exchangeReceiver":{"encodedTaskMeta":["CIGAsOnl3NP+BRABIg4xMjcuMC4wLjE6MzkzMA=="],"fieldTypes":[{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"},{"tp":3,"flag":0,"flen":11,"decimal":0,"collate":63,"charset":"binary"}]},"executorId":"ExchangeReceiver_11"}})"; - testOneWindowFunction( - {NameAndTypePair("partition", std::make_shared()), NameAndTypePair("order", std::make_shared())}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), toVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); + request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); + ASSERT_COLUMNS_EQ_R(executeStreams(request), + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); // nullable - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); - - testOneWindowFunction( - {NameAndTypePair("partition", makeNullable(std::make_shared())), NameAndTypePair("order", makeNullable(std::make_shared()))}, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})}, - window_json, - sort_json); + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}), + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})})); + + ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource( + request, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}), + createColumns({toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})})); } CATCH + } // namespace DB::tests diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 5fd25c5d238..2bedb312d07 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -198,3 +198,7 @@ if (ARCH_AMD64) src/crc64_sse2_asimd.cpp APPEND COMPILE_FLAGS "-mpclmul") endif() + +if (ARCH_AARCH64 AND ARCH_LINUX) + target_link_libraries (common PUBLIC tiflash-aarch64-string tiflash-aarch64-math) +endif() diff --git a/libs/libcommon/include/common/getMemoryAmount.h b/libs/libcommon/include/common/getMemoryAmount.h index 98aa87661c3..0807c6f8e12 100644 --- a/libs/libcommon/include/common/getMemoryAmount.h +++ b/libs/libcommon/include/common/getMemoryAmount.h @@ -19,5 +19,6 @@ /** * Returns the size of physical memory (RAM) in bytes. * Returns 0 on unsupported platform +* Note: do not support environment under resource isolation mechanism like Docker, CGroup. */ uint64_t getMemoryAmount(); diff --git a/libs/libcommon/include/common/types.h b/libs/libcommon/include/common/types.h index 139fc10e980..87c7215d91f 100644 --- a/libs/libcommon/include/common/types.h +++ b/libs/libcommon/include/common/types.h @@ -25,6 +25,7 @@ #if defined(__clang__) #pragma GCC diagnostic ignored "-Wunknown-warning-option" #pragma GCC diagnostic ignored "-Wdeprecated-copy" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wtautological-constant-out-of-range-compare" #endif #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index f899a47ed10..0d72f950add 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1653635389238, + "iteration": 1654217728945, "links": [], "panels": [ { @@ -542,7 +542,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/limit/", + "fill": 0, + "nullPointMode": "null", + "color": "#C4162A" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -633,6 +640,13 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "K" + }, + { + "expr": "sum(tiflash_system_current_metric_MemoryCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "legendFormat": "limit-{{instance}}", + "exemplar": true, + "refId": "L", + "hide": false } ], "thresholds": [], @@ -701,15 +715,15 @@ "hiddenSeries": false, "id": 51, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -728,6 +742,12 @@ "alias": "total", "fill": 0, "lines": false + }, + { + "alias": "/limit/", + "fill": 0, + "nullPointMode": "null", + "color": "#C4162A" } ], "spaceLength": 10, @@ -742,6 +762,13 @@ "legendFormat": "{{instance}}", "refId": "A", "step": 40 + }, + { + "expr": "sum(tiflash_system_current_metric_LogicalCPUCores{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "legendFormat": "limit-{{instance}}", + "exemplar": true, + "refId": "B", + "intervalFactor": 1 } ], "thresholds": [], @@ -3878,7 +3905,7 @@ "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 5, "w": 12, "x": 0, "y": 21 @@ -3893,6 +3920,7 @@ "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": false }, @@ -3908,38 +3936,27 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/(delta_merge)|(seg_)/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { - "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split|seg_split_fg\"}[1m])) by (type)", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, - "intervalFactor": 1, + "interval": "", + "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A" - }, - { - "expr": "sum(increase(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"delta_merge|delta_merge_fg|delta_merge_bg_gc|seg_merge|seg_split|seg_split_fg\"}[1m])) by (type)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{type}}", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Tasks OPS", + "title": "Small Internal Tasks OPS", "tooltip": { "shared": true, "sort": 0, @@ -3955,7 +3972,7 @@ }, "yaxes": [ { - "decimals": null, + "decimals": 1, "format": "ops", "label": null, "logBase": 1, @@ -3969,7 +3986,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -3988,10 +4005,10 @@ "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 5, "w": 12, "x": 12, "y": 21 @@ -4023,58 +4040,233 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "/^.*-delta_merge/", - "yaxis": 2 + "exemplar": false, + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le,type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max-{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Small Internal Tasks Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "alias": "/^.*-seg_split/", - "yaxis": 2 + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false } ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Total number of storage's internal sub tasks", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 130, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], "spaceLength": 10, "stack": false, - "steppedLine": false, + "steppedLine": true, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_subtask_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, - "intervalFactor": 1, - "legendFormat": "max-{{type}}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", "refId": "A" - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Large Internal Tasks OPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "99-{{type}}", - "refId": "B" + "decimals": 1, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "expr": "histogram_quantile(0.95, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "95-{{type}}", - "refId": "C" - }, + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Duration of storage's internal sub tasks", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 131, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "expr": "histogram_quantile(0.80, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le,type))", + "exemplar": true, + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_subtask_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"(delta_merge|seg_merge|seg_split).*\"}[$__rate_interval])) by (le,type))", "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "80-{{type}}", - "refId": "D" + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max-{{type}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Internal Tasks Duration", + "title": "Large Internal Tasks Duration", "tooltip": { "shared": true, "sort": 0, @@ -4090,6 +4282,7 @@ }, "yaxes": [ { + "decimals": 1, "format": "s", "label": null, "logBase": 1, @@ -4103,7 +4296,7 @@ "logBase": 1, "max": null, "min": "0", - "show": true + "show": false } ], "yaxis": { @@ -4128,7 +4321,7 @@ "h": 8, "w": 12, "x": 0, - "y": 29 + "y": 31 }, "hiddenSeries": false, "id": 43, @@ -4234,7 +4427,7 @@ "h": 8, "w": 12, "x": 12, - "y": 29 + "y": 31 }, "heatmap": {}, "hideZeroBuckets": true, @@ -4297,7 +4490,7 @@ "h": 8, "w": 12, "x": 0, - "y": 37 + "y": 39 }, "hiddenSeries": false, "id": 46, @@ -4420,7 +4613,7 @@ "h": 8, "w": 12, "x": 12, - "y": 37 + "y": 39 }, "hiddenSeries": false, "id": 47, @@ -4544,7 +4737,7 @@ "h": 8, "w": 12, "x": 0, - "y": 45 + "y": 47 }, "height": "", "hiddenSeries": false, @@ -4674,7 +4867,7 @@ "h": 8, "w": 12, "x": 12, - "y": 45 + "y": 47 }, "height": "", "hiddenSeries": false, @@ -4802,7 +4995,7 @@ "h": 8, "w": 12, "x": 0, - "y": 53 + "y": 55 }, "hiddenSeries": false, "id": 88, @@ -5002,7 +5195,7 @@ "h": 8, "w": 12, "x": 12, - "y": 53 + "y": 55 }, "hiddenSeries": false, "id": 67, @@ -5116,7 +5309,7 @@ "h": 8, "w": 12, "x": 0, - "y": 61 + "y": 63 }, "hiddenSeries": false, "id": 84, @@ -5216,7 +5409,7 @@ "h": 8, "w": 12, "x": 12, - "y": 61 + "y": 63 }, "hiddenSeries": false, "id": 86, @@ -8183,5 +8376,5 @@ "timezone": "", "title": "Test-Cluster-TiFlash-Summary", "uid": "SVbh2xUWk", - "version": 2 -} + "version": 1 +} \ No newline at end of file diff --git a/release-centos7-llvm/Makefile b/release-centos7-llvm/Makefile index 1b15df7ddc3..9c1bba42a53 100644 --- a/release-centos7-llvm/Makefile +++ b/release-centos7-llvm/Makefile @@ -23,6 +23,10 @@ image_tiflash_llvm_base_aarch64: build_tiflash_release_amd64: docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-release.sh +# Add build_tiflash_debug_amd64 target to enable FailPoints on x86. Since outputs are the same as release version, no new package targets added. +build_tiflash_debug_amd64: + docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-debug.sh + build_tiflash_ci_amd64: docker run --rm -v $(realpath ..):/build/tics hub.pingcap.net/tiflash/tiflash-llvm-base:amd64 /build/tics/release-centos7-llvm/scripts/build-tiflash-ci.sh diff --git a/dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt b/release-centos7-llvm/scripts/build-debug.sh old mode 100644 new mode 100755 similarity index 76% rename from dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt rename to release-centos7-llvm/scripts/build-debug.sh index 36270a0c8e4..59dc9b86a54 --- a/dbms/src/Storages/DeltaMerge/tools/CMakeLists.txt +++ b/release-centos7-llvm/scripts/build-debug.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright 2022 PingCAP, Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,6 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -include_directories (${CMAKE_CURRENT_BINARY_DIR}) -add_subdirectory (workload EXCLUDE_FROM_ALL) +CMAKE_PREFIX_PATH=$1 + +set -ueox pipefail + +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" + +${SCRIPTPATH}/build-tiflash-release.sh "DEBUG" "${CMAKE_PREFIX_PATH}" diff --git a/release-centos7-llvm/scripts/build-tiflash-release.sh b/release-centos7-llvm/scripts/build-tiflash-release.sh index 42993b51afe..01ca00e8706 100755 --- a/release-centos7-llvm/scripts/build-tiflash-release.sh +++ b/release-centos7-llvm/scripts/build-tiflash-release.sh @@ -47,7 +47,13 @@ ENABLE_PCH=${ENABLE_PCH:-ON} INSTALL_DIR="${SRCPATH}/release-centos7-llvm/tiflash" rm -rf ${INSTALL_DIR} && mkdir -p ${INSTALL_DIR} -BUILD_DIR="${SRCPATH}/release-centos7-llvm/build-release" +if [ $CMAKE_BUILD_TYPE == "RELWITHDEBINFO" ]; then + BUILD_DIR="$SRCPATH/release-centos7-llvm/build-release" + ENABLE_FAILPOINTS="OFF" +else + BUILD_DIR="$SRCPATH/release-centos7-llvm/build-debug" + ENABLE_FAILPOINTS="ON" +fi rm -rf ${BUILD_DIR} && mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -S "${SRCPATH}" \ @@ -55,6 +61,7 @@ cmake -S "${SRCPATH}" \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DENABLE_TESTING=OFF \ -DENABLE_TESTS=OFF \ + -DENABLE_FAILPOINTS=${ENABLE_FAILPOINTS} \ -Wno-dev \ -DUSE_CCACHE=OFF \ -DRUN_HAVE_STD_REGEX=0 \ diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index 8abe450c11a..6c4925c9619 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -66,3 +66,89 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; + +### about issue 5154 to check whether add column/drop column will effect the cluster index decode +### drop the column between two columns that are cluster index columns + +mysql> drop table if exists test.t_3; +mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); +mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_3 set tiflash replica 1; + +func> wait_table test t_3 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_3 drop column B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | ++---+---+---+ + +# insert some rows +mysql> insert into test.t_3 values (3,3,3),(4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | +| 3 | 3 | 3 | +| 4 | 4 | 4 | ++---+---+---+ + +mysql> drop table test.t_3; + +### add the column between two columns that are cluster index columns +mysql> drop table if exists test.t_4 +mysql> create table test.t_4 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); + +mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_4 set tiflash replica 1; + +func> wait_table test t_4 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_4 Add column E int after B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+---+---+ +| A | B | E | C | D | ++---+---+------+---+---+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | ++---+---+------+---+---+ + +mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+------+------+ +| A | B | E | C | D | ++---+---+------+------+------+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | +| 3 | 3 | 3 | 3 | 3 | +| 4 | 4 | 4 | 4 | 4 | ++---+---+------+------+------+ + +mysql> drop table test.t_4; \ No newline at end of file diff --git a/tests/fullstack-test/expr/bitshift_operator.test b/tests/fullstack-test/expr/bitshift_operator.test new file mode 100644 index 00000000000..0d55a1b56a9 --- /dev/null +++ b/tests/fullstack-test/expr/bitshift_operator.test @@ -0,0 +1,43 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t (a int); +mysql> alter table test.t set tiflash replica 1; +mysql> insert into test.t values(-1); + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a>>0 as v1, a>>64 as v2, a>>10 as v3 from test.t; ++----------------------+------+-------------------+ +| v1 | v2 | v3 | ++----------------------+------+-------------------+ +| 18446744073709551615 | 0 | 18014398509481983 | ++----------------------+------+-------------------+ + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a from test.t where a>>100000=0; ++------+ +| a | ++------+ +| -1 | ++------+ + +mysql> set tidb_enforce_mpp=1; set @@session.tidb_isolation_read_engines = "tiflash"; select a from test.t where a>>63=1; ++------+ +| a | ++------+ +| -1 | ++------+ + +mysql> drop table if exists test.t diff --git a/tests/fullstack-test/expr/duration_pushdown.test b/tests/fullstack-test/expr/duration_pushdown.test index 63106fa1788..442a708a802 100644 --- a/tests/fullstack-test/expr/duration_pushdown.test +++ b/tests/fullstack-test/expr/duration_pushdown.test @@ -106,6 +106,14 @@ mysql> use test; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflas # | 123500 | # +----------------+ +mysql> use test; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select time_to_sec(a) from t; ++----------------+ +| time_to_sec(a) | ++----------------+ +| 2520610 | +| -2520610 | ++----------------+ + mysql> drop table if exists test.time_test; mysql> create table test.time_test(id int(11),v1 time(3) not null, v2 time(3)); diff --git a/tests/fullstack-test/expr/format.test b/tests/fullstack-test/expr/format.test index 8cea75d6914..719e30c974d 100644 --- a/tests/fullstack-test/expr/format.test +++ b/tests/fullstack-test/expr/format.test @@ -44,3 +44,52 @@ int_val 1,234.000 mysql> drop table if exists test.t + +mysql> create table test.t(id int, value decimal(65,4)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(1,9999999999999999999999999999999999999999999999999999999999999.9999) + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,-3) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,0) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,3) as result from test.t +result +10,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000,000.000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,10) as result from test.t +result +9,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999,999.9999000000 + + +mysql> drop table if exists test.t + +mysql> create table test.t(id int, value decimal(7,4)) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values(1,999.9999) + +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,-2) as result from test.t +result +1,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,0) as result from test.t +result +1,000 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,2) as result from test.t +result +1,000.00 + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select format(value,10) as result from test.t +result +999.9999000000 + +mysql> drop table if exists test.t diff --git a/tests/fullstack-test/expr/get_format.test b/tests/fullstack-test/expr/get_format.test new file mode 100644 index 00000000000..5409302c10a --- /dev/null +++ b/tests/fullstack-test/expr/get_format.test @@ -0,0 +1,60 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table test.t(location varchar(10)); +mysql> insert into test.t values('USA'), ('JIS'), ('ISO'), ('EUR'), ('INTERNAL'); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATE, location) from test.t; ++----------------------------+ +| GET_FORMAT(DATE, location) | ++----------------------------+ +| %m.%d.%Y | +| %Y-%m-%d | +| %Y-%m-%d | +| %d.%m.%Y | +| %Y%m%d | ++----------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(DATETIME, location) from test.t; ++--------------------------------+ +| GET_FORMAT(DATETIME, location) | ++--------------------------------+ +| %Y-%m-%d %H.%i.%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H.%i.%s | +| %Y%m%d%H%i%s | ++--------------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIMESTAMP, location) from test.t; ++---------------------------------+ +| GET_FORMAT(TIMESTAMP, location) | ++---------------------------------+ +| %Y-%m-%d %H.%i.%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H:%i:%s | +| %Y-%m-%d %H.%i.%s | +| %Y%m%d%H%i%s | ++---------------------------------+ +mysql> set @@tidb_enforce_mpp=1; set @@tidb_isolation_read_engines='tiflash'; select GET_FORMAT(TIME, location) from test.t; ++----------------------------+ +| GET_FORMAT(TIME, location) | ++----------------------------+ +| %h:%i:%s %p | +| %H:%i:%s | +| %H:%i:%s | +| %H.%i.%s | +| %H%i%s | ++----------------------------+ +mysql> drop table if exists test.t; diff --git a/tests/fullstack-test/expr/reverse.test b/tests/fullstack-test/expr/reverse.test new file mode 100644 index 00000000000..9195adf2b7d --- /dev/null +++ b/tests/fullstack-test/expr/reverse.test @@ -0,0 +1,44 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a varchar(256)); + + +mysql> insert into test.t values('one week’s time test'); +mysql> insert into test.t values('abc测试def'); +mysql> insert into test.t values('abcテストabc'); +mysql> insert into test.t values('ѐёђѓєѕіїјљњћќѝўџ'); +mysql> insert into test.t values('+ѐ-ё*ђ/ѓ!є@ѕ#і@ї%ј……љ&њ(ћ)ќ¥ѝ#ў@џ!^'); +mysql> insert into test.t values('αβγδεζηθικλμνξοπρστυφχψωσ'); +mysql> insert into test.t values('▲α▼βγ➨δε☎ζη✂θι€κλ♫μν✓ξο✚πρ℉στ♥υφ♖χψ♘ω★σ✕'); +mysql> insert into test.t values('թփձջրչճժծքոեռտըւիօպասդֆգհյկլխզղցվբնմշ'); +mysql> insert into test.t values(NULL); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select reverse(a) from test.t; ++-------------------------------------------------------------------------------------------------+ +| reverse(a) | ++-------------------------------------------------------------------------------------------------+ +| tset emit s’keew eno | +| fed试测cba | +| cbaトステcba | +| џўѝќћњљјїіѕєѓђёѐ | +| ^!џ@ў#ѝ¥ќ)ћ(њ&љ……ј%ї@і#ѕ@є!ѓ/ђ*ё-ѐ+ | +| σωψχφυτσρποξνμλκιθηζεδγβα | +| ✕σ★ω♘ψχ♖φυ♥τσ℉ρπ✚οξ✓νμ♫λκ€ιθ✂ηζ☎εδ➨γβ▼α▲ | +| շմնբվցղզխլկյհգֆդսապօիւըտռեոքծժճչրջձփթ | +| NULL | ++-------------------------------------------------------------------------------------------------+ diff --git a/tests/fullstack-test/mpp/issue_2471.test b/tests/fullstack-test/mpp/issue_2471.test index 4a1528595e8..9966eaadec3 100644 --- a/tests/fullstack-test/mpp/issue_2471.test +++ b/tests/fullstack-test/mpp/issue_2471.test @@ -35,7 +35,7 @@ mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_bro => DBGInvoke __enable_fail_point(exception_in_creating_set_input_stream) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_opt_broadcast_cartesian_join=2; select * from a as t1 left join a as t2 on t1.id = t2.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_in_creating_set_input_stream is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_in_creating_set_input_stream) diff --git a/tests/fullstack-test/mpp/mpp_fail.test b/tests/fullstack-test/mpp/mpp_fail.test index 7af5fef3f89..0e272c0b621 100644 --- a/tests/fullstack-test/mpp/mpp_fail.test +++ b/tests/fullstack-test/mpp/mpp_fail.test @@ -71,20 +71,20 @@ ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_be ## exception during mpp run non root task => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception,, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) ## exception during mpp run root task => DBGInvoke __enable_fail_point(exception_during_mpp_root_task_run) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_root_task_run) ## exception during mpp write err to tunnel => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_write_err_to_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Failed to write error msg to tunnel, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_write_err_to_tunnel) @@ -92,7 +92,7 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang => DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __enable_fail_point(exception_during_mpp_close_tunnel) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 0, e.displayText() = DB::Exception: Exchange receiver meet error : Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered., e.what() = DB::Exception,, e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) => DBGInvoke __disable_fail_point(exception_during_mpp_close_tunnel) @@ -125,7 +125,7 @@ ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Exchang ## ensure build1, build2-probe1, probe2 in the CreatingSets, test the bug where build1 throw exception but not change the build state, thus block the build2-probe1, at last this query hangs. => DBGInvoke __enable_fail_point(exception_mpp_hash_build) mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; set @@tidb_broadcast_join_threshold_count=0; set @@tidb_broadcast_join_threshold_size=0; select t1.id from test.t t1 join test.t t2 on t1.id = t2.id and t1.id <2 join (select id from test.t group by id) t3 on t2.id=t3.id; -ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered. +ERROR 1105 (HY000) at line 1: other error for mpp stream: Code: 10007, e.displayText() = DB::Exception: Fail point FailPoints::exception_mpp_hash_build is triggered., e.what() = DB::Exception, => DBGInvoke __disable_fail_point(exception_mpp_hash_build) # Clean up. diff --git a/tests/fullstack-test/mpp/window.test b/tests/fullstack-test/mpp/window.test new file mode 100644 index 00000000000..698d39ef2ea --- /dev/null +++ b/tests/fullstack-test/mpp/window.test @@ -0,0 +1,32 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t1; +mysql> create table test.t1(c1 int, c2 int); +mysql> insert into test.t1 values(1, 1),(2, 2),(3, 3),(1, 1),(2, 2),(3, 3),(4, 4); +mysql> alter table test.t1 set tiflash replica 1; +func> wait_table test t1 +mysql> use test; set @@tidb_isolation_read_engines='tiflash'; select c1, c2, row_number() over w2, row_number() over w1 from test.t1 window w1 as(partition by c1), w2 as (partition by c1, c2) order by 1, 2, 3, 4; ++------+------+----------------------+----------------------+ +| c1 | c2 | row_number() over w2 | row_number() over w1 | ++------+------+----------------------+----------------------+ +| 1 | 1 | 1 | 1 | +| 1 | 1 | 2 | 2 | +| 2 | 2 | 1 | 1 | +| 2 | 2 | 2 | 2 | +| 3 | 3 | 1 | 1 | +| 3 | 3 | 2 | 2 | +| 4 | 4 | 1 | 1 | ++------+------+----------------------+----------------------+ +mysql> drop table if exists test.t1; diff --git a/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test new file mode 100644 index 00000000000..5e43936379b --- /dev/null +++ b/tests/fullstack-test2/ddl/alter_table_tiflash_replica_and_mode.test @@ -0,0 +1,89 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# test tiflash replica for normal case +mysql> drop table if exists test.t +mysql> create table test.t(a int) +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +>> DBGInvoke get_tiflash_replica_count("test", "t") +┌─get_tiflash_replica_count(test, t)─┐ +│ 1 │ +└────────────────────────────────────┘ + +# test tiflash mode in normal mode +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)─┐ +│ │ +└───────────────────────────┘ + +mysql> alter table test.t set tiflash mode fast + +>> DBGInvoke __refresh_schemas() + +# test tiflash mode in fast mode +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)───┐ +│ fast │ +└─────────────────────────────┘ + +# test replica for partition tables +mysql> drop table if exists test.t +mysql> create table test.t (x int) partition by range (x) (partition p0 values less than (5), partition p1 values less than (10)); +mysql> alter table test.t set tiflash mode fast +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +>> DBGInvoke get_tiflash_replica_count("test", "t") +┌─get_tiflash_replica_count(test, t)─┐ +│ 1 │ +└────────────────────────────────────┘ + +>> DBGInvoke get_tiflash_mode("test", "t") +┌─get_tiflash_mode(test, t)──────────┐ +│ fast │ +└────────────────────────────────────┘ + +>> DBGInvoke get_partition_tables_tiflash_replica_count("test", "t") +┌─get_partition_tables_tiflash_replica_count(test, t)─┐ +│ 1/1/ │ +└─────────────────────────────────────────────────────┘ + +# test tiflash mode for partition tables +>> DBGInvoke get_partition_tables_tiflash_mode("test", "t") +┌─get_partition_tables_tiflash_mode(test, t)─┐ +│ fast/fast/ │ +└────────────────────────────────────────────┘ + +# test replica for add partition tables after set replica +mysql> alter table test.t add partition (partition p2 values less than (2010)); + +>> DBGInvoke __refresh_schemas() + +>> DBGInvoke get_partition_tables_tiflash_replica_count("test", "t") +┌─get_partition_tables_tiflash_replica_count(test, t)─┐ +│ 1/1/1/ │ +└─────────────────────────────────────────────────────┘ + +# test tiflash mode for add partition tables after set replica +>> DBGInvoke get_partition_tables_tiflash_mode("test", "t") +┌─get_partition_tables_tiflash_mode(test, t)─┐ +│ fast/fast/fast/ │ +└────────────────────────────────────────────┘ + + + diff --git a/tests/fullstack-test2/ddl/alter_tiflash_mode.test b/tests/fullstack-test2/ddl/alter_tiflash_mode.test new file mode 100644 index 00000000000..c9f3ef488c4 --- /dev/null +++ b/tests/fullstack-test2/ddl/alter_tiflash_mode.test @@ -0,0 +1,48 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t +mysql> create table test.t(a int, b int) +mysql> alter table test.t set tiflash replica 1 + +func> wait_table test t + +# check default mode of tiflash table +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | NORMAL | ++--------------+------------+---------------+-----------+-----------+ + +# check change mode + +mysql> alter table test.t set tiflash mode fast +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | FAST | ++--------------+------------+---------------+-----------+-----------+ + +# check change mode +mysql> alter table test.t set tiflash mode normal +mysql> select table_schema,table_name,replica_count,available,table_mode from information_schema.tiflash_replica where table_schema='test' and table_name='t'; ++--------------+------------+---------------+-----------+-----------+ +| table_schema | table_name | replica_count | available | table_mode| ++--------------+------------+---------------+-----------+-----------+ +| test | t | 1 | 1 | NORMAL | ++--------------+------------+---------------+-----------+-----------+ + +mysql> drop table if exists test.t \ No newline at end of file diff --git a/tests/fullstack-test2/ddl/multi_alter_with_write.test b/tests/fullstack-test2/ddl/multi_alter_with_write.test new file mode 100644 index 00000000000..3284511d775 --- /dev/null +++ b/tests/fullstack-test2/ddl/multi_alter_with_write.test @@ -0,0 +1,880 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this test focus on the case when multi DDL actions happen closely +#( and these DDL actions will be fetched in the same regular sync schema duration.) +# and there are some corresponding insert(write) actions between these DDL actions. +# Considering that these write actions and these schema change will arrive at +# tiflash in a different order, we simulate these different order situation to check +# that our schema module was working correctly. + +# TiDB Timeline : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 + +# stop regular schema sync +=> DBGInvoke __enable_schema_sync_service('false') + +# Enable the failpoint and make it pause before applying the raft cmd to write a row +>> DBGInvoke __init_fail_point() +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# Enable the failpoint to make our query only start when the write action finished +>> DBGInvoke __enable_fail_point(unblock_query_init_after_write) + +# ----------------------------------------------------------------------------- +# Order 1 : write cmd 1 | alter cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------- +# Order 2 : write cmd 1 | alter cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# --------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> call sync schema and get alter cmd 2 happen +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 3 : write cmd 1 | alter cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 4 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | alter cmd 2 | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# check no schema change before write cmd 2 take effect +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# --------------------------------------------------------------------------------------------------------------------- +# Order 5 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1) | write cmd 3 --> sync schema(alter cmd 2) +# ---------------------------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ----------------------------------------------------------------------------------------------- +# Order 6 : write cmd 1 | write cmd 2 --> sync schema(alter cmd 1 alter cmd 2) | write cmd 3 +# ----------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 1 │ 4.50 │ abc │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 2 --> should call sync schema, get the alter cmd 1 && alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------- +# Order 7 : alter cmd 1 | write cmd 1 | write cmd 2 | alter cmd 2 | write cmd 3 +# ------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 8 : alter cmd 1 | write cmd 1 | write cmd 2 | write cmd 3 --> sync schema(alter cmd 2) +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +│ 3 │ 0.20 │ ccc │ 3 │ 0.1 │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +# check what happen after write cmd 3 --> should call sync schema, get the alter cmd 2 happened. +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# -------------------------------------------------------------------------------------------------- +# Order 9 : alter cmd 1 | write cmd 1 | alter cmd 2 | write cmd 2 -->sync schema() | write cmd 3 +# -------------------------------------------------------------------------------------------------- + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┬─e───┐ +│ 0 │ 0.00 │ │ 0 │ \N │ +│ 1 │ 4.50 │ abc │ 0 │ \N │ +└─────┴──────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# ------------------------------------------------------------------------------------------------------------------ +# Order 10 : alter cmd 1 | alter cmd 2 | write cmd 1 -->sync schema() | write cmd 2 -->sync schema() | write cmd 3 +# ------------------------------------------------------------------------------------------------------------------ + +mysql> drop table if exists test.t +mysql> create table test.t(a int primary key, b decimal(5,2) not NULL, c varchar(10), d int default 0); + +mysql> alter table test.t set tiflash replica 1; + +func> wait_table test t + +# add a new pre write to make check the alter cmd 1 more convenient. +mysql> insert into test.t (a, b, c) values (0, 0, ' '); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─b────┬─c─────┬─d───┐ +│ 0 │ 0.00 │ │ 0 │ +└─────┴──────┴───────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 1 +mysql> insert into test.t (a, b, c) values (1, 4.5, 'abc'); + +# alter cmd 1 +mysql> alter table test.t add column e decimal(6,1) NULL; + +# make alter cmd 1 take effect +>> DBGInvoke __refresh_schemas() + +# write cmd 2 +mysql> insert into test.t values (3, 0.2, 'ccc', 3, 0.1); + +# alter cmd 2 +mysql> alter table test.t drop column b; + +# make alter cmd 2 take effect +>> DBGInvoke __refresh_schemas() + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 1 and write cmd 2 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + +# write cmd 3 +mysql> insert into test.t values (4, 'abcd', 10, 0.2); + +# enable pause_query_init make query start until write cmd finish +>> DBGInvoke __enable_fail_point(pause_query_init) + +# make write cmd 3 take effect +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) + +=> DBGInvoke query_mapped('select * from \$d.\$t', test, t) +┌─a───┬─c─────┬─d───┬─e───┐ +│ 0 │ │ 0 │ \N │ +│ 1 │ abc │ 0 │ \N │ +│ 3 │ ccc │ 3 │ 0.1 │ +│ 4 │ abcd │ 10 │ 0.2 │ +└─────┴───────┴─────┴─────┘ + +>> DBGInvoke __enable_fail_point(pause_before_apply_raft_cmd) + + +## + +=> DBGInvoke __enable_schema_sync_service('true') +>> DBGInvoke __disable_fail_point(pause_before_apply_raft_cmd) +>> DBGInvoke __disable_fail_point(unblock_query_init_after_write) +>> DBGInvoke __disable_fail_point(pause_query_init) \ No newline at end of file diff --git a/tests/fullstack-test2/ddl/rename_table_across_databases.test b/tests/fullstack-test2/ddl/rename_table_across_databases.test index c78c27138a0..bc27668bd0c 100644 --- a/tests/fullstack-test2/ddl/rename_table_across_databases.test +++ b/tests/fullstack-test2/ddl/rename_table_across_databases.test @@ -52,7 +52,7 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test_new +------+------+ # check if table info updated. ->> select tidb_database,tidb_name from system.tables where is_tombstone = 0 and (tidb_database = 'test' and tidb_name='t') or (tidb_database='test_new' and tidb_name='t2') +>> select tidb_database,tidb_name from system.tables where is_tombstone = 0 and ((tidb_database = 'test' and tidb_name='t') or (tidb_database='test_new' and tidb_name='t2')) ┌─tidb_database─┬─tidb_name─┐ │ test_new │ t2 │ └───────────────┴───────────┘ diff --git a/tests/run-test.py b/tests/run-test.py index 843fe7c79b4..a2bcee0ce99 100644 --- a/tests/run-test.py +++ b/tests/run-test.py @@ -29,6 +29,7 @@ UNFINISHED_1_PREFIX = '\t' UNFINISHED_2_PREFIX = ' ' WORD_PH = '{#WORD}' +LINE_PH = '{#LINE}' CURL_TIDB_STATUS_PREFIX = 'curl_tidb> ' verbose = False @@ -138,18 +139,22 @@ def match_ph_word(line): # TODO: Support more place holders, eg: {#NUMBER} def compare_line(line, template): - while True: - i = template.find(WORD_PH) - if i < 0: - return line == template - else: - if line[:i] != template[:i]: - return False - j = match_ph_word(line[i:]) - if j == 0: - return False - template = template[i + len(WORD_PH):] - line = line[i + j:] + l = template.find(LINE_PH) + if l >= 0: + return True + else: + while True: + i = template.find(WORD_PH) + if i < 0: + return line == template + else: + if line[:i] != template[:i]: + return False + j = match_ph_word(line[i:]) + if j == 0: + return False + template = template[i + len(WORD_PH):] + line = line[i + j:] class MySQLCompare: @@ -194,11 +199,14 @@ def matched(outputs, matches): b = MySQLCompare.parse_excepted_outputs(matches) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True @@ -212,11 +220,14 @@ def matched(outputs, matches, fuzz): b = parse_table_parts(matches, fuzz) return a == b else: - if len(outputs) != len(matches): + if len(outputs) > len(matches): return False for i in range(0, len(outputs)): if not compare_line(outputs[i], matches[i]): return False + for i in range(len(outputs), len(matches)): + if not compare_line("", matches[i]): + return False return True diff --git a/tests/sanitize/tsan.suppression b/tests/sanitize/tsan.suppression new file mode 100644 index 00000000000..73824caa2b9 --- /dev/null +++ b/tests/sanitize/tsan.suppression @@ -0,0 +1 @@ +race:dbms/src/Common/TiFlashMetrics.h diff --git a/tests/tidb-ci/new_collation_fullstack/expr.test b/tests/tidb-ci/new_collation_fullstack/expr.test index 15ada0f335c..1e2135c4f2d 100644 --- a/tests/tidb-ci/new_collation_fullstack/expr.test +++ b/tests/tidb-ci/new_collation_fullstack/expr.test @@ -35,6 +35,13 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 2 | abc | +------+-------+ +mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'abc '; ++------+-------+ +| id | value1| ++------+-------+ +| 1 | abc | +| 2 | abc | ++------+-------+ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value from test.t where value like 'aB%'; +------+-------+ @@ -62,6 +69,13 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_s | 3 | def | +------+-------+ +mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 = 'def '; ++------+-------+ +| id | value1| ++------+-------+ +| 3 | def | ++------+-------+ + mysql> set session tidb_isolation_read_engines='tiflash'; select /*+ read_from_storage(tiflash[t]) */ id, value1 from test.t where value1 in ('Abc','def'); +------+-------+ | id | value1|