Skip to content

Commit

Permalink
[Improve](complex-type) update for array/map element_at with nested c…
Browse files Browse the repository at this point in the history
…omplex type with local tvf (apache#22927)
  • Loading branch information
amorynan authored and airborne12 committed Aug 21, 2023
1 parent e221ef2 commit f8e4891
Show file tree
Hide file tree
Showing 8 changed files with 550 additions and 18 deletions.
1 change: 1 addition & 0 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ void ColumnArray::insert(const Field& x) {
}

void ColumnArray::insert_from(const IColumn& src_, size_t n) {
DCHECK(n < src_.size());
const ColumnArray& src = assert_cast<const ColumnArray&>(src_);
size_t size = src.size_at(n);
size_t offset = src.offset_at(n);
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/columns/column_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ void ColumnMap::pop_back(size_t n) {
}

void ColumnMap::insert_from(const IColumn& src_, size_t n) {
DCHECK(n < src_.size());
const ColumnMap& src = assert_cast<const ColumnMap&>(src_);
size_t size = src.size_at(n);
size_t offset = src.offset_at(n);
Expand Down
42 changes: 42 additions & 0 deletions be/src/vec/functions/array/function_array_element.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "vec/columns/column_map.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/core/block.h"
Expand Down Expand Up @@ -283,6 +284,44 @@ class FunctionArrayElement : public IFunction {
return _execute_nullable(args, input_rows_count, src_null_map, dst_null_map);
}

ColumnPtr _execute_common(const ColumnArray::Offsets64& offsets, const IColumn& nested_column,
const UInt8* arr_null_map, const IColumn& indices,
const UInt8* nested_null_map, UInt8* dst_null_map) {
// prepare return data
auto dst_column = nested_column.clone_empty();
dst_column->reserve(offsets.size());

// process
for (size_t row = 0; row < offsets.size(); ++row) {
size_t off = offsets[row - 1];
size_t len = offsets[row] - off;
auto index = indices.get_int(row);
// array is nullable
bool null_flag = bool(arr_null_map && arr_null_map[row]);
// calc index in nested column
if (!null_flag && index > 0 && index <= len) {
index += off - 1;
} else if (!null_flag && index < 0 && -index <= len) {
index += off + len;
} else {
null_flag = true;
}
// nested column nullable check
if (!null_flag && nested_null_map && nested_null_map[index]) {
null_flag = true;
}
// actual data copy
if (!null_flag) {
dst_null_map[row] = false;
dst_column->insert_from(nested_column, index);
} else {
dst_null_map[row] = true;
dst_column->insert_default();
}
}
return dst_column;
}

ColumnPtr _execute_nullable(const ColumnsWithTypeAndName& arguments, size_t input_rows_count,
const UInt8* src_null_map, UInt8* dst_null_map) {
// check array nested column type and get data
Expand Down Expand Up @@ -356,6 +395,9 @@ class FunctionArrayElement : public IFunction {
} else if (check_column<ColumnString>(*nested_column)) {
res = _execute_string(offsets, *nested_column, src_null_map, *idx_col, nested_null_map,
dst_null_map);
} else {
res = _execute_common(offsets, *nested_column, src_null_map, *idx_col, nested_null_map,
dst_null_map);
}

return res;
Expand Down
19 changes: 1 addition & 18 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,7 @@
[['array'], 'ARRAY', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
[['array'], 'ARRAY', ['STRING', '...'], 'ALWAYS_NOT_NULLABLE'],

[['element_at', '%element_extract%'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'TINYINT', ['ARRAY_TINYINT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'SMALLINT', ['ARRAY_SMALLINT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'INT', ['ARRAY_INT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'BIGINT', ['ARRAY_BIGINT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'LARGEINT', ['ARRAY_LARGEINT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DATETIME', ['ARRAY_DATETIME', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DATE', ['ARRAY_DATE', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DATETIMEV2', ['ARRAY_DATETIMEV2', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DATEV2', ['ARRAY_DATEV2', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'FLOAT', ['ARRAY_FLOAT', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DOUBLE', ['ARRAY_DOUBLE', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DECIMALV2', ['ARRAY_DECIMALV2', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DECIMAL32', ['ARRAY_DECIMAL32', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DECIMAL64', ['ARRAY_DECIMAL64', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'DECIMAL128', ['ARRAY_DECIMAL128', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'T', ['ARRAY<T>', 'BIGINT'], 'ALWAYS_NULLABLE', ['T']],

[['arrays_overlap'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'ARRAY_BOOLEAN'], 'ALWAYS_NULLABLE'],
[['arrays_overlap'], 'BOOLEAN', ['ARRAY_TINYINT', 'ARRAY_TINYINT'], 'ALWAYS_NULLABLE'],
Expand Down
Binary file added regression-test/data/external_table_p0/tvf/t.orc
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// This suit test the `backends` tvf
suite("test_local_tvf_with_complex_type_element_at", "p0") {
List<List<Object>> table = sql """ select * from backends(); """
assertTrue(table.size() > 0)
def be_id = table[0][0]
def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"

/**
* here is file schema
* var schema = StructType(
StructField("id", IntegerType, true) ::
StructField("arr_arr", ArrayType(ArrayType(StringType), true), true)::
StructField("arr_map", ArrayType(MapType(StringType, DateType)), true) ::
StructField("arr_struct", ArrayType(StructType(StructField("vin", StringType, true)::StructField("charge_id", IntegerType, true)::Nil))) ::
StructField("map_map", MapType(StringType, MapType(StringType, DoubleType)), true)::
StructField("map_arr", MapType(IntegerType, ArrayType(DoubleType)), true)::
StructField("map_struct", MapType(TimestampType, StructType(StructField("vin", StringType, true)::StructField("charge_id", IntegerType, true)::StructField("start_time", DoubleType, true)::Nil), true))::
StructField("struct_arr_map", StructType(StructField("aa", ArrayType(StringType), true)::StructField("mm", MapType(DateType, StringType), true)::Nil))::
Nil
)
*/

qt_sql """
select * from local(
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """
select count(*) from local(
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """ select arr_arr[1][1] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """ select arr_map[1] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""
qt_sql """ select arr_map[1]["WdTnFb-LHW8Nel-laB-HCQA"] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """ select map_map["W1iF16-DE1gzJx-avC-Mrf6"]["HJVQSC-46l3xm7-J6c-moIH"] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """ select map_arr[1] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""
qt_sql """ select map_arr[1][7] from local (
"file_path" = "${dataFilePath}/t.orc",
"backend_id" = "${be_id}",
"format" = "orc");"""

qt_sql """
select * from local(
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet"); """

qt_sql """
select count(*) from local(
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet"); """


qt_sql """ select arr_arr[1][1] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""

qt_sql """ select arr_map[1] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""
qt_sql """ select arr_map[1]["WdTnFb-LHW8Nel-laB-HCQA"] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""

qt_sql """ select map_map["W1iF16-DE1gzJx-avC-Mrf6"]["HJVQSC-46l3xm7-J6c-moIH"] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""

qt_sql """ select map_arr[1] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""
qt_sql """ select map_arr[1][7] from local (
"file_path" = "${dataFilePath}/t.parquet",
"backend_id" = "${be_id}",
"format" = "parquet");"""
}

0 comments on commit f8e4891

Please sign in to comment.