Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Update Clickhouse Version (20230517)
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 committed May 19, 2023
1 parent abb0950 commit 6d02cbf
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 23 deletions.
4 changes: 3 additions & 1 deletion cpp-ch/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
cmake_minimum_required(VERSION 3.20)
set(CH_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ClickHouse CACHE STRING "ClickHouse source dir")

file(READ ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.version CH_VERSION)

set(CH_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ClickHouse CACHE STRING "ClickHouse source dir")
string(REGEX REPLACE ".*CH_ORG=([^\n]+).*" "\\1" CH_ORG "${CH_VERSION}")
string(REGEX REPLACE ".*CH_BRANCH=([^\n]+).*" "\\1" CH_BRANCH "${CH_VERSION}")
string(REGEX REPLACE ".*CH_COMMIT=([^\n]+).*" "\\1" CH_COMMIT "${CH_VERSION}")
Expand Down Expand Up @@ -46,6 +47,7 @@ add_custom_command(
-DENABLE_TESTS=OFF
-DENABLE_JEMALLOC=ON
-DENABLE_MULTITARGET_CODE=ON
-DENABLE_UTILS=ON
-DENABLE_EXTERN_LOCAL_ENGINE=ON
-DCOMPILER_FLAGS='-fvisibility=hidden -fvisibility-inlines-hidden'
-S ${CH_SOURCE_DIR} -G Ninja -B ${CH_BINARY_DIR} &&
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=clickhouse_backend
CH_COMMIT=52be833f
CH_BRANCH=rebase_ch_20230517
CH_COMMIT=eefee3cad7b
4 changes: 2 additions & 2 deletions cpp-ch/local-engine/Functions/regexpExtractAllSpark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <base/StringRef.h>
#include <Common/Documentation.h>
#include <Common/FunctionDocumentation.h>

namespace DB
{
Expand Down Expand Up @@ -365,7 +365,7 @@ namespace
void registerFunctionRegexpExtractAllSpark(DB::FunctionFactory & factory)
{
factory.registerFunction<FunctionRegexpExtractAllSpark>(
Documentation{"Extracts all the fragments of a string that matches the regexp pattern and corresponds to the regex group index."});
FunctionDocumentation{.description = R"(Extracts all the fragments of a string that matches the regexp pattern and corresponds to the regex group index.)"});
}

}
7 changes: 3 additions & 4 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,7 @@ const ActionsDAG::Node * SerializedPlanParser::parseExpression(ActionsDAGPtr act
elem_set->insertFromBlock(elem_block.getColumnsWithTypeAndName());
elem_set->finishInsert();

auto arg = ColumnSet::create(elem_set->getTotalRowCount(), elem_set);
auto arg = ColumnSet::create(elem_set->getTotalRowCount(), FutureSet(elem_set));
args.emplace_back(&action_dag->addColumn(ColumnWithTypeAndName(std::move(arg), std::make_shared<DataTypeSet>(), name)));

const auto * function_node = toFunctionNode(action_dag, "in", args);
Expand Down Expand Up @@ -2248,7 +2248,7 @@ DB::QueryPlanPtr SerializedPlanParser::parseJoin(substrait::JoinRel join, DB::Qu
google::protobuf::StringValue optimization;
optimization.ParseFromString(join.advanced_extension().optimization().value());
auto join_opt_info = parseJoinOptimizationInfo(optimization.value());
auto table_join = std::make_shared<TableJoin>(global_context->getSettings(), global_context->getTemporaryVolume());
auto table_join = std::make_shared<TableJoin>(global_context->getSettings(), global_context->getGlobalTemporaryVolume());
if (join.type() == substrait::JoinRel_JoinType_JOIN_TYPE_INNER)
{
table_join->setKind(DB::JoinKind::Inner);
Expand Down Expand Up @@ -2503,7 +2503,7 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ
ColumnNumbersList aggregation_keys_indexes_list;
AggregationKeysInfo info(aggregation_keys, aggregation_keys_indexes_list, GroupByKind::NONE);
SizeLimits size_limits_for_set;
ActionsVisitor::Data visitor_data(
ActionsMatcher::Data visitor_data(
context,
size_limits_for_set,
size_t(0),
Expand All @@ -2513,7 +2513,6 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ
false /* no_subqueries */,
false /* no_makeset */,
false /* only_consts */,
false /* create_source_for_in */,
info);
ActionsVisitor(visitor_data).visit(ast);
return visitor_data.getActions();
Expand Down
2 changes: 2 additions & 0 deletions cpp-ch/local-engine/Shuffle/SelectorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <base/types.h>
#include <substrait/plan.pb.h>
#include <Common/BlockIterator.h>
#include <Common/PODArray.h>

namespace local_engine
{
struct PartitionInfo
Expand Down
5 changes: 1 addition & 4 deletions cpp-ch/local-engine/Storages/CustomStorageMergeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,7 @@ bool CustomStorageMergeTree::partIsAssignedToBackgroundOperation(const MergeTree
{
throw std::runtime_error("not implement");
}
MutationCommands CustomStorageMergeTree::getFirstAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & /*part*/) const
{
return {};
}

std::string CustomStorageMergeTree::getName() const
{
throw std::runtime_error("not implement");
Expand Down
3 changes: 2 additions & 1 deletion cpp-ch/local-engine/Storages/CustomStorageMergeTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ class CustomStorageMergeTree final : public MergeTreeData
void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override;
void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr context) override;
bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const override;
MutationCommands getFirstAlterMutationCommandsForPart(const DataPartPtr & part) const override;
size_t getNumberOfUnfinishedMutations() const override { return 0; }
std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const override { return {}; }
void attachRestoredParts(MutableDataPartsVector && parts) override { throw std::runtime_error("not implement"); };
};

Expand Down
1 change: 1 addition & 0 deletions cpp-ch/local-engine/Storages/StorageJoinFromReadBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <Compression/CompressedReadBuffer.h>
#include <Formats/NativeReader.h>
#include <Interpreters/Context.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/castColumn.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ FormatFile::InputFormatPtr ParquetFormatFile::createInputFormat(const DB::Block
std::back_inserter(skip_row_group_indices));

format_settings.parquet.skip_row_groups = std::unordered_set<int>(skip_row_group_indices.begin(), skip_row_group_indices.end());
auto input_format = std::make_shared<DB::ParquetBlockInputFormat>(*(res->read_buffer), header, format_settings);
auto input_format = std::make_shared<DB::ParquetBlockInputFormat>(res->read_buffer.get(), nullptr, header, format_settings, 1, 8192);
// clang-format off
#endif
// clang-format on
Expand Down
19 changes: 11 additions & 8 deletions cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ class S3FileReadBufferBuilder : public ReadBufferBuilder
auto cache_base_path = context->getConfigRef().getString("s3.local_cache.cache_path", "/tmp/gluten/local_cache");
if (!fs::exists(cache_base_path))
fs::create_directories(cache_base_path);

file_cache_settings.base_path = cache_base_path;
new_settings = DB::ReadSettings();
new_settings.enable_filesystem_cache = context->getConfigRef().getBool("s3.local_cache.enabled", false);
if (new_settings.enable_filesystem_cache)
{
auto cache = DB::FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings, "s3_local_cache");
auto cache = DB::FileCacheFactory::instance().getOrCreate("s3_local_cache", file_cache_settings);
cache->initialize();

new_settings.remote_fs_cache = cache;
Expand All @@ -119,9 +119,9 @@ class S3FileReadBufferBuilder : public ReadBufferBuilder
size_t object_size = DB::S3::getObjectSize(*client, bucket, key, "");

auto read_buffer_creator
= [bucket, this](const std::string & path, size_t read_until_position) -> std::shared_ptr<DB::ReadBufferFromFileBase>
= [bucket, this](const std::string & path, size_t read_until_position) -> std::unique_ptr<DB::ReadBufferFromFileBase>
{
return std::make_shared<DB::ReadBufferFromS3>(
return std::make_unique<DB::ReadBufferFromS3>(
shared_client,
bucket,
path,
Expand All @@ -134,11 +134,13 @@ class S3FileReadBufferBuilder : public ReadBufferBuilder
/* restricted_seek */ true);
};

auto s3_impl = std::make_unique<DB::ReadBufferFromRemoteFSGather>(
std::move(read_buffer_creator), DB::StoredObjects{DB::StoredObject{key, object_size}}, new_settings);
DB::StoredObjects stored_objects{DB::StoredObject{key, object_size}};
auto s3_impl
= std::make_unique<DB::ReadBufferFromRemoteFSGather>(std::move(read_buffer_creator), stored_objects, new_settings, nullptr);

auto & pool_reader = context->getThreadPoolReader(DB::Context::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
auto async_reader = std::make_unique<DB::AsynchronousReadIndirectBufferFromRemoteFS>(pool_reader, new_settings, std::move(s3_impl));
auto & pool_reader = context->getThreadPoolReader(DB::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
auto async_reader = std::make_unique<DB::AsynchronousReadIndirectBufferFromRemoteFS>(
pool_reader, new_settings, std::move(s3_impl), nullptr, nullptr);

async_reader->setReadUntilEnd();
if (new_settings.remote_fs_prefetch)
Expand Down Expand Up @@ -195,6 +197,7 @@ class S3FileReadBufferBuilder : public ReadBufferBuilder
config.getString(config_prefix + ".secret_access_key", ""),
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
{},
{},
{.use_environment_credentials
= config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)),
.use_insecure_imds_request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/castColumn.h>
#include <QueryPipeline/Pipe.h>
#include <Storages/SubstraitSource/FormatFile.h>
Expand Down

0 comments on commit 6d02cbf

Please sign in to comment.