Skip to content

Commit

Permalink
revert array test
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan committed Nov 29, 2024
1 parent abf4400 commit c707b6a
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 319 deletions.
4 changes: 2 additions & 2 deletions be/src/http/action/compaction_score_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ constexpr std::string_view TABLET_ID = "tablet_id";

template <typename T>
concept CompactionScoreAccessble = requires(T t) {
{ t.get_real_compaction_score() } -> std::same_as<uint32_t>;
};
{ t.get_real_compaction_score() } -> std::same_as<uint32_t>;
};

template <CompactionScoreAccessble T>
std::vector<CompactionScoreResult> calculate_compaction_scores(
Expand Down
147 changes: 33 additions & 114 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest
offsets = ColumnOffsets::create();
}


void ColumnArray::shrink_padding_chars() {
data->shrink_padding_chars();
}
Expand Down Expand Up @@ -372,8 +371,7 @@ void ColumnArray::pop_back(size_t n) {
void ColumnArray::reserve(size_t n) {
get_offsets().reserve(n);
get_data().reserve(
get_offsets()
.back()); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
}

//please check you real need size in data column, because it's maybe need greater size when data is string column
Expand Down Expand Up @@ -475,63 +473,30 @@ void ColumnArray::insert_range_from_ignore_overflow(const IColumn& src, size_t s
}

ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t result_size_hint) const {
if (typeid_cast<const ColumnUInt8*>(data.get())) {
if (typeid_cast<const ColumnUInt8*>(data.get()))
return filter_number<UInt8>(filt, result_size_hint);
}
if (typeid_cast<const ColumnUInt16*>(data.get())) {
if (typeid_cast<const ColumnUInt16*>(data.get()))
return filter_number<UInt16>(filt, result_size_hint);
}
if (typeid_cast<const ColumnUInt32*>(data.get())) {
if (typeid_cast<const ColumnUInt32*>(data.get()))
return filter_number<UInt32>(filt, result_size_hint);
}
if (typeid_cast<const ColumnUInt64*>(data.get())) {
if (typeid_cast<const ColumnUInt64*>(data.get()))
return filter_number<UInt64>(filt, result_size_hint);
}
if (typeid_cast<const ColumnUInt128*>(data.get())) {
return filter_number<UInt128>(filt, result_size_hint);
}
if (typeid_cast<const ColumnInt8*>(data.get())) {
if (typeid_cast<const ColumnInt8*>(data.get()))
return filter_number<Int8>(filt, result_size_hint);
}
if (typeid_cast<const ColumnInt16*>(data.get())) {
if (typeid_cast<const ColumnInt16*>(data.get()))
return filter_number<Int16>(filt, result_size_hint);
}
if (typeid_cast<const ColumnInt32*>(data.get())) {
if (typeid_cast<const ColumnInt32*>(data.get()))
return filter_number<Int32>(filt, result_size_hint);
}
if (typeid_cast<const ColumnInt64*>(data.get())) {
if (typeid_cast<const ColumnInt64*>(data.get()))
return filter_number<Int64>(filt, result_size_hint);
}
if (typeid_cast<const ColumnInt128*>(data.get())) {
return filter_number<Int128>(filt, result_size_hint);
}
if (typeid_cast<const ColumnFloat32*>(data.get())) {
if (typeid_cast<const ColumnFloat32*>(data.get()))
return filter_number<Float32>(filt, result_size_hint);
}
if (typeid_cast<const ColumnFloat64*>(data.get())) {
if (typeid_cast<const ColumnFloat64*>(data.get()))
return filter_number<Float64>(filt, result_size_hint);
}
if (typeid_cast<const ColumnDecimal32*>(data.get())) {
return filter_number<Decimal32>(filt, result_size_hint);
}
if (typeid_cast<const ColumnDecimal64*>(data.get())) {
return filter_number<Decimal64>(filt, result_size_hint);
}
if (typeid_cast<const ColumnDecimal128V2*>(data.get())) {
return filter_number<Decimal128V2>(filt, result_size_hint);
}
if (typeid_cast<const ColumnDecimal128V3*>(data.get())) {
return filter_number<Decimal128V3>(filt, result_size_hint);
}
if (typeid_cast<const ColumnDecimal256*>(data.get())) {
return filter_number<Decimal256>(filt, result_size_hint);
}
if (typeid_cast<const ColumnString*>(data.get())) {
return filter_string(filt, result_size_hint);
}
if (typeid_cast<const ColumnNullable*>(data.get())) {
if (typeid_cast<const ColumnString*>(data.get())) return filter_string(filt, result_size_hint);
//if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint);
if (typeid_cast<const ColumnNullable*>(data.get()))
return filter_nullable(filt, result_size_hint);
}
return filter_generic(filt, result_size_hint);
}

Expand Down Expand Up @@ -570,13 +535,12 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hin
if (get_offsets().empty()) return ColumnArray::create(data);

auto res = ColumnArray::create(data->clone_empty());
using ColVecType = ColumnVectorOrDecimal<T>;

auto& res_elems = assert_cast<ColVecType&>(res->get_data()).get_data();
auto& res_elems = assert_cast<ColumnVector<T>&>(res->get_data()).get_data();
auto& res_offsets = res->get_offsets();

filter_arrays_impl<T, Offset64>(
assert_cast<const ColVecType&, TypeCheckOnRelease::DISABLE>(*data).get_data(),
assert_cast<const ColumnVector<T>&, TypeCheckOnRelease::DISABLE>(*data).get_data(),
get_offsets(), res_elems, res_offsets, filt, result_size_hint);
return res;
}
Expand Down Expand Up @@ -835,71 +799,27 @@ void ColumnArray::insert_many_from(const IColumn& src, size_t position, size_t l
}

ColumnPtr ColumnArray::replicate(const IColumn::Offsets& replicate_offsets) const {
if (replicate_offsets.empty()) {
return clone_empty();
}
if (replicate_offsets.empty()) return clone_empty();

// keep ColumnUInt8 for ColumnNullable::null_map
if (typeid_cast<const ColumnUInt8*>(data.get())) {
if (typeid_cast<const ColumnUInt8*>(data.get()))
return replicate_number<UInt8>(replicate_offsets);
}
if (typeid_cast<const ColumnUInt16*>(data.get())) {
return replicate_number<UInt16>(replicate_offsets);
}
if (typeid_cast<const ColumnUInt32*>(data.get())) {
return replicate_number<UInt32>(replicate_offsets);
}
if (typeid_cast<const ColumnUInt64*>(data.get())) {
return replicate_number<UInt64>(replicate_offsets);
}
if (typeid_cast<const ColumnUInt128*>(data.get())) {
return replicate_number<UInt128>(replicate_offsets);
}
if (typeid_cast<const ColumnInt8*>(data.get())) {
if (typeid_cast<const ColumnInt8*>(data.get()))
return replicate_number<Int8>(replicate_offsets);
}
if (typeid_cast<const ColumnInt16*>(data.get())) {
if (typeid_cast<const ColumnInt16*>(data.get()))
return replicate_number<Int16>(replicate_offsets);
}
if (typeid_cast<const ColumnInt32*>(data.get())) {
if (typeid_cast<const ColumnInt32*>(data.get()))
return replicate_number<Int32>(replicate_offsets);
}
if (typeid_cast<const ColumnInt64*>(data.get())) {
if (typeid_cast<const ColumnInt64*>(data.get()))
return replicate_number<Int64>(replicate_offsets);
}
if (typeid_cast<const ColumnInt128*>(data.get())) {
return replicate_number<Int128>(replicate_offsets);
}
if (typeid_cast<const ColumnFloat32*>(data.get())) {
if (typeid_cast<const ColumnFloat32*>(data.get()))
return replicate_number<Float32>(replicate_offsets);
}
if (typeid_cast<const ColumnFloat64*>(data.get())) {
if (typeid_cast<const ColumnFloat64*>(data.get()))
return replicate_number<Float64>(replicate_offsets);
}
if (typeid_cast<const ColumnDecimal32*>(data.get())) {
return replicate_number<Decimal32>(replicate_offsets);
}
if (typeid_cast<const ColumnDecimal64*>(data.get())) {
return replicate_number<Decimal64>(replicate_offsets);
}
if (typeid_cast<const ColumnDecimal128V2*>(data.get())) {
return replicate_number<Decimal128V2>(replicate_offsets);
}
if (typeid_cast<const ColumnDecimal128V3*>(data.get())) {
return replicate_number<Decimal128V3>(replicate_offsets);
}
if (typeid_cast<const ColumnDecimal256*>(data.get())) {
return replicate_number<Decimal256>(replicate_offsets);
}
if (typeid_cast<const ColumnString*>(data.get())) {
return replicate_string(replicate_offsets);
}
if (typeid_cast<const ColumnConst*>(data.get())) {
return replicate_const(replicate_offsets);
}
if (typeid_cast<const ColumnNullable*>(data.get())) {
if (typeid_cast<const ColumnString*>(data.get())) return replicate_string(replicate_offsets);
if (typeid_cast<const ColumnConst*>(data.get())) return replicate_const(replicate_offsets);
if (typeid_cast<const ColumnNullable*>(data.get()))
return replicate_nullable(replicate_offsets);
}
return replicate_generic(replicate_offsets);
}

Expand All @@ -909,20 +829,19 @@ ColumnPtr ColumnArray::replicate_number(const IColumn::Offsets& replicate_offset
column_match_offsets_size(col_size, replicate_offsets.size());

MutableColumnPtr res = clone_empty();
using ColVecType = ColumnVectorOrDecimal<T>;

if (!col_size) {
return res;
}

auto& res_arr = assert_cast<ColumnArray&>(*res);

const typename ColVecType::Container& src_data =
assert_cast<const ColVecType&>(*data).get_data();
const typename ColumnVector<T>::Container& src_data =
assert_cast<const ColumnVector<T>&>(*data).get_data();
const auto& src_offsets = get_offsets();

typename ColVecType::Container& res_data =
assert_cast<ColVecType&>(res_arr.get_data()).get_data();
typename ColumnVector<T>::Container& res_data =
assert_cast<ColumnVector<T>&>(res_arr.get_data()).get_data();
auto& res_offsets = res_arr.get_offsets();

res_data.reserve(data->size() / col_size * replicate_offsets.back());
Expand Down Expand Up @@ -1137,10 +1056,10 @@ ColumnPtr ColumnArray::permute(const Permutation& perm, size_t limit) const {
nested_perm.push_back(offset_at(perm[i]) + j);
}
}
if (!nested_perm.empty()) {
if (nested_perm.size() != 0) {
res->data = data->permute(nested_perm, nested_perm.size());
}
return res;
}

} // namespace doris::vectorized
} // namespace doris::vectorized
9 changes: 0 additions & 9 deletions be/src/vec/columns/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,6 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t limit, int nan_direct
}
}

template <typename T>
const char* ColumnVector<T>::get_family_name() const {
// however we have a conflict type of number and other can store in number type such as ipv4 and uint32
if (std::is_same_v<T, IPv4>) {
return "IPv4";
}
return TypeName<T>::get();
}

template <typename T>
MutableColumnPtr ColumnVector<T>::clone_resized(size_t size) const {
auto res = this->create();
Expand Down
8 changes: 7 additions & 1 deletion be/src/vec/columns/column_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,13 @@ class ColumnVector final : public COWHelper<IColumn, ColumnVector<T>> {

void resize(size_t n) override { data.resize(n); }

std::string get_name() const override { return TypeName<T>::get(); }
std::string get_name() const override {
// however we have a conflict type of number and other can store in number type such as ipv4 and uint32
if (std::is_same_v<T, IPv4>) {
return "IPv4";
}
return TypeName<T>::get();
}

MutableColumnPtr clone_resized(size_t size) const override;

Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/columns_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ INSTANTIATE(UInt32, IColumn::Offset)
INSTANTIATE(UInt32, ColumnArray::Offset64)
INSTANTIATE(UInt64, IColumn::Offset)
INSTANTIATE(UInt64, ColumnArray::Offset64)
INSTANTIATE(UInt128, IColumn::Offset)
INSTANTIATE(UInt128, ColumnArray::Offset64)
INSTANTIATE(Int8, IColumn::Offset)
INSTANTIATE(Int8, ColumnArray::Offset64)
INSTANTIATE(Int16, IColumn::Offset)
Expand All @@ -373,8 +375,6 @@ INSTANTIATE(Int64, IColumn::Offset)
INSTANTIATE(Int64, ColumnArray::Offset64)
INSTANTIATE(Int128, IColumn::Offset)
INSTANTIATE(Int128, ColumnArray::Offset64)
INSTANTIATE(UInt128, IColumn::Offset)
INSTANTIATE(UInt128, ColumnArray::Offset64)
INSTANTIATE(Float32, IColumn::Offset)
INSTANTIATE(Float32, ColumnArray::Offset64)
INSTANTIATE(Float64, IColumn::Offset)
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/columns/predicate_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<
data.push_back_without_reserve(sv);
}

bool is_predicate_column() const override { return true; }

// used for int128
void insert_in_copy_way(const char* data_ptr, size_t length) {
T val {};
Expand Down
52 changes: 43 additions & 9 deletions be/src/vec/functions/function_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,8 +928,18 @@ class FunctionIPv6CIDRToRange : public IFunction {
auto& vec_res_upper_range = col_res_upper_range->get_data();

static constexpr UInt8 max_cidr_mask = IPV6_BINARY_LENGTH * 8;
unsigned char ipv6_address_data[IPV6_BINARY_LENGTH];

if (is_addr_const) {
StringRef str_ref = from_column.get_data_at(0);
const char* value = str_ref.data;
size_t value_size = str_ref.size;
if (value_size > IPV6_BINARY_LENGTH || value == nullptr || value_size == 0) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal ipv6 address '{}'",
std::string(value, value_size));
}
memcpy(ipv6_address_data, value, value_size);
memset(ipv6_address_data + value_size, 0, IPV6_BINARY_LENGTH - value_size);
for (size_t i = 0; i < input_rows_count; ++i) {
auto cidr = cidr_column.get_int(i);
if (cidr < 0 || cidr > max_cidr_mask) {
Expand All @@ -939,9 +949,13 @@ class FunctionIPv6CIDRToRange : public IFunction {
if constexpr (std::is_same_v<FromColumn, ColumnString>) {
// 16 bytes ipv6 string is stored in big-endian byte order
// so transfer to little-endian firstly
auto* src_data = const_cast<char*>(from_column.get_data_at(0).data);
std::reverse(src_data, src_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(src_data, reinterpret_cast<char*>(&vec_res_lower_range[i]),
if (!IPv6Value::is_valid_string(value, value_size)) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal ipv6 address '{}'",
std::string(value, value_size));
}
std::reverse(ipv6_address_data, ipv6_address_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(reinterpret_cast<const char*>(&ipv6_address_data),
reinterpret_cast<char*>(&vec_res_lower_range[i]),
reinterpret_cast<char*>(&vec_res_upper_range[i]),
cast_set<UInt8>(cidr));
} else {
Expand All @@ -961,9 +975,19 @@ class FunctionIPv6CIDRToRange : public IFunction {
if constexpr (std::is_same_v<FromColumn, ColumnString>) {
// 16 bytes ipv6 string is stored in big-endian byte order
// so transfer to little-endian firstly
auto* src_data = const_cast<char*>(from_column.get_data_at(i).data);
std::reverse(src_data, src_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(src_data, reinterpret_cast<char*>(&vec_res_lower_range[i]),
StringRef str_ref = from_column.get_data_at(i);
const char* value = str_ref.data;
size_t value_size = str_ref.size;
if (value_size > IPV6_BINARY_LENGTH || value == nullptr || value_size == 0
|| !IPv6Value::is_valid_string(value, value_size)) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal ipv6 address '{}'",
std::string(value, value_size));
}
memcpy(ipv6_address_data, value, value_size);
memset(ipv6_address_data + value_size, 0, IPV6_BINARY_LENGTH - value_size);
std::reverse(ipv6_address_data, ipv6_address_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(reinterpret_cast<const char*>(&ipv6_address_data),
reinterpret_cast<char*>(&vec_res_lower_range[i]),
reinterpret_cast<char*>(&vec_res_upper_range[i]),
cast_set<UInt8>(cidr));
} else {
Expand All @@ -983,9 +1007,19 @@ class FunctionIPv6CIDRToRange : public IFunction {
if constexpr (std::is_same_v<FromColumn, ColumnString>) {
// 16 bytes ipv6 string is stored in big-endian byte order
// so transfer to little-endian firstly
auto* src_data = const_cast<char*>(from_column.get_data_at(i).data);
std::reverse(src_data, src_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(src_data, reinterpret_cast<char*>(&vec_res_lower_range[i]),
StringRef str_ref = from_column.get_data_at(i);
const char* value = str_ref.data;
size_t value_size = str_ref.size;
if (value_size > IPV6_BINARY_LENGTH || value == nullptr || value_size == 0
|| !IPv6Value::is_valid_string(value, value_size)) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "Illegal ipv6 address '{}'",
std::string(value, value_size));
}
memcpy(ipv6_address_data, value, value_size);
memset(ipv6_address_data + value_size, 0, IPV6_BINARY_LENGTH - value_size);
std::reverse(ipv6_address_data, ipv6_address_data + IPV6_BINARY_LENGTH);
apply_cidr_mask(reinterpret_cast<const char*>(&ipv6_address_data),
reinterpret_cast<char*>(&vec_res_lower_range[i]),
reinterpret_cast<char*>(&vec_res_upper_range[i]),
cast_set<UInt8>(cidr));
} else {
Expand Down
Loading

0 comments on commit c707b6a

Please sign in to comment.