Skip to content

Commit

Permalink
[Improve](complex-type)update orc reader for complex type and add reg…
Browse files Browse the repository at this point in the history
…ress tests (apache#22856)
  • Loading branch information
amorynan authored and airborne12 committed Aug 21, 2023
1 parent 54f1813 commit 616a31b
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 28 deletions.
20 changes: 0 additions & 20 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,11 +1291,6 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name,
reinterpret_cast<const DataTypeArray*>(remove_nullable(data_type).get())
->get_nested_type());
const orc::Type* nested_orc_type = orc_column_type->getSubtype(0);
if (nested_orc_type->getKind() == orc::TypeKind::MAP ||
nested_orc_type->getKind() == orc::TypeKind::STRUCT) {
return Status::InternalError(
"Array does not support nested map/struct type in column {}", col_name);
}
return _orc_column_to_doris_column<is_filter>(
col_name, static_cast<ColumnArray&>(*data_column).get_data_ptr(), nested_type,
nested_orc_type, orc_list->elements.get(), element_size);
Expand All @@ -1317,15 +1312,6 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name,
->get_value_type());
const orc::Type* orc_key_type = orc_column_type->getSubtype(0);
const orc::Type* orc_value_type = orc_column_type->getSubtype(1);
if (orc_key_type->getKind() == orc::TypeKind::LIST ||
orc_key_type->getKind() == orc::TypeKind::MAP ||
orc_key_type->getKind() == orc::TypeKind::STRUCT ||
orc_value_type->getKind() == orc::TypeKind::LIST ||
orc_value_type->getKind() == orc::TypeKind::MAP ||
orc_value_type->getKind() == orc::TypeKind::STRUCT) {
return Status::InternalError("Map does not support nested complex type in column {}",
col_name);
}
const ColumnPtr& doris_key_column = doris_map.get_keys_ptr();
const ColumnPtr& doris_value_column = doris_map.get_values_ptr();
RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(col_name, doris_key_column,
Expand All @@ -1349,12 +1335,6 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name,
for (int i = 0; i < doris_struct.tuple_size(); ++i) {
orc::ColumnVectorBatch* orc_field = orc_struct->fields[i];
const orc::Type* orc_type = orc_column_type->getSubtype(i);
if (orc_type->getKind() == orc::TypeKind::LIST ||
orc_type->getKind() == orc::TypeKind::MAP ||
orc_type->getKind() == orc::TypeKind::STRUCT) {
return Status::InternalError(
"Struct does not support nested complex type in column {}", col_name);
}
const ColumnPtr& doris_field = doris_struct.get_column_ptr(i);
const DataTypePtr& doris_type = doris_struct_type->get_element(i);
RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(
Expand Down
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions regression-test/pipeline/p0/conf/be.conf
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ enable_fuzzy_mode=true
max_depth_of_expr_tree=200
enable_set_in_bitmap_value=true
enable_feature_binlog=true
max_sys_mem_available_low_water_mark_bytes=69206016
user_files_secure_path=/
20 changes: 12 additions & 8 deletions regression-test/suites/external_table_p0/tvf/test_local_tvf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,37 @@ suite("test_local_tvf") {
assertTrue(table.size() > 0)
def be_id = table[0][0]

List<List<Object>> doris_log = sql """ ADMIN SHOW FRONTEND CONFIG like "sys_log_dir"; """
assertTrue(doris_log.size() > 0)
def doris_log_path = doris_log[0][1]

table = sql """
select count(*) from local(
"file_path" = "log/be.out",
"file_path" = "${doris_log_path}/fe.out",
"backend_id" = "${be_id}",
"format" = "csv")
where c1 like "%start_time%";"""
where c1 like "%FE type%";"""

assertTrue(table.size() > 0)
assertTrue(Long.valueOf(table[0][0]) > 0)

table = sql """
select count(*) from local(
"file_path" = "log/*.out",
"file_path" = "${doris_log_path}/*.out",
"backend_id" = "${be_id}",
"format" = "csv")
where c1 like "%start_time%";"""
where c1 like "%FE type%";"""

assertTrue(table.size() > 0)
assertTrue(Long.valueOf(table[0][0]) > 0)

test {
sql """
select count(*) from local(
"file_path" = "../log/be.out",
"file_path" = "../fe.out",
"backend_id" = "${be_id}",
"format" = "csv")
where c1 like "%start_time%";
where c1 like "%FE type%";
"""
// check exception message contains
exception "can not contain '..' in path"
Expand All @@ -56,10 +60,10 @@ suite("test_local_tvf") {
test {
sql """
select count(*) from local(
"file_path" = "./log/xx.out",
"file_path" = "./xx.out",
"backend_id" = "${be_id}",
"format" = "csv")
where c1 like "%start_time%";
where c1 like "%FE type%";
"""
// check exception message contains
exception "No matches found"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// This suit test the `backends` tvf
suite("test_local_tvf_with_complex_type", "p0") {
List<List<Object>> table = sql """ select * from backends(); """
assertTrue(table.size() > 0)
def be_id = table[0][0]
def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"

qt_sql """
select * from local(
"file_path" = "${dataFilePath}/complex_type.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""


qt_sql """
select * from local(
"file_path" = "${dataFilePath}/complex_type.parquet",
"backend_id" = "${be_id}",
"format" = "parquet"); """

}

0 comments on commit 616a31b

Please sign in to comment.