Skip to content

Commit

Permalink
[Improvement](function) optimization for substr with ascii string (#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
BiteTheDDDDt authored Jan 11, 2024
1 parent ef64bf7 commit 090a342
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 118 deletions.
57 changes: 52 additions & 5 deletions be/src/util/simd/vstring_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,54 @@ inline uint8_t get_utf8_byte_length(uint8_t character) {
return UTF8_BYTE_LENGTH[character];
}

// copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
// The function returns true (1) if all chars passed in src are
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
inline bool validate_ascii_fast(const char* src, size_t len) {
size_t i = 0;
__m128i has_error = _mm_setzero_si128();
if (len >= 16) {
for (; i <= len - 16; i += 16) {
__m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
has_error = _mm_or_si128(has_error, current_bytes);
}
}
int error_mask = _mm_movemask_epi8(has_error);

char tail_has_error = 0;
for (; i < len; i++) {
tail_has_error |= src[i];
}
error_mask |= (tail_has_error & 0x80);

return !error_mask;
}

#ifdef __AVX2__
#include <x86intrin.h>
// The function returns true (1) if all chars passed in src are
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
inline bool validate_ascii_fast_avx(const char* src, size_t len) {
size_t i = 0;
__m256i has_error = _mm256_setzero_si256();
if (len >= 32) {
for (; i <= len - 32; i += 32) {
__m256i current_bytes = _mm256_loadu_si256((const __m256i*)(src + i));
has_error = _mm256_or_si256(has_error, current_bytes);
}
}
int error_mask = _mm256_movemask_epi8(has_error);

char tail_has_error = 0;
for (; i < len; i++) {
tail_has_error |= src[i];
}
error_mask |= (tail_has_error & 0x80);

return !error_mask;
}
#endif

namespace simd {

class VStringFunctions {
Expand Down Expand Up @@ -219,11 +267,10 @@ class VStringFunctions {

// Gcc will do auto simd in this function
static bool is_ascii(const StringRef& str) {
char or_code = 0;
for (size_t i = 0; i < str.size; i++) {
or_code |= str.data[i];
}
return !(or_code & 0x80);
#ifdef __AVX2__
return validate_ascii_fast_avx(str.data, str.size);
#endif
return validate_ascii_fast(str.data, str.size);
}

static void reverse(const StringRef& str, StringRef dst) {
Expand Down
8 changes: 8 additions & 0 deletions be/src/vec/common/pod_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,14 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r
this->c_end += bytes_to_copy;
}

template <typename It1, typename It2>
void insert_assume_reserved_and_allow_overflow(It1 from_begin, It2 from_end) {
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
memcpy_small_allow_read_write_overflow15(
this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy);
this->c_end += bytes_to_copy;
}

void swap(PODArray& rhs) {
#ifndef NDEBUG
this->unprotect();
Expand Down
226 changes: 113 additions & 113 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#pragma once

#include <glog/logging.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -52,6 +51,7 @@
#include "vec/common/memcpy_small.h"
#include "vec/common/pod_array.h"
#include "vec/common/pod_array_fwd.h"
#include "vec/common/string_utils/string_utils.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
Expand Down Expand Up @@ -120,6 +120,14 @@ struct StringOP {
chars.insert(string_value.data(), string_value.data() + string_value.size());
offsets[index] = chars.size();
}

static void push_value_string_reserved_and_allow_overflow(const std::string_view& string_value,
int index, ColumnString::Chars& chars,
ColumnString::Offsets& offsets) {
chars.insert_assume_reserved_and_allow_overflow(string_value.data(),
string_value.data() + string_value.size());
offsets[index] = chars.size();
}
};

struct SubstringUtil {
Expand Down Expand Up @@ -147,30 +155,37 @@ struct SubstringUtil {
check_set_nullable(argument_columns[i], null_map, col_const[i]);
}

auto specific_str_column = assert_cast<const ColumnString*>(argument_columns[0].get());
auto specific_start_column =
const auto* specific_str_column =
assert_cast<const ColumnString*>(argument_columns[0].get());
const auto* specific_start_column =
assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
auto specific_len_column =
const auto* specific_len_column =
assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
if (col_const[1] && col_const[2]) {
vectors<true>(specific_str_column->get_chars(), specific_str_column->get_offsets(),
specific_start_column->get_data(), specific_len_column->get_data(),
null_map->get_data(), res->get_chars(), res->get_offsets());
} else {
vectors<false>(specific_str_column->get_chars(), specific_str_column->get_offsets(),
specific_start_column->get_data(), specific_len_column->get_data(),
null_map->get_data(), res->get_chars(), res->get_offsets());

auto vectors = vectors_utf8<false>;
bool is_ascii = simd::VStringFunctions::is_ascii(
{specific_str_column->get_chars().data(), specific_str_column->get_chars().size()});
if (col_const[1] && col_const[2] && is_ascii) {
vectors = vectors_ascii<true>;
} else if (col_const[1] && col_const[2]) {
vectors = vectors_utf8<true>;
} else if (is_ascii) {
vectors = vectors_ascii<false>;
}
vectors(specific_str_column->get_chars(), specific_str_column->get_offsets(),
specific_start_column->get_data(), specific_len_column->get_data(),
null_map->get_data(), res->get_chars(), res->get_offsets());

block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
}

private:
template <bool Const>
static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
NullMap& null_map, ColumnString::Chars& res_chars,
ColumnString::Offsets& res_offsets) {
template <bool is_const>
static void vectors_utf8(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
NullMap& null_map, ColumnString::Chars& res_chars,
ColumnString::Offsets& res_offsets) {
size_t size = offsets.size();
res_offsets.resize(size);
res_chars.reserve(chars.size());
Expand All @@ -179,119 +194,104 @@ struct SubstringUtil {
PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
PMR::vector<size_t> index {&pool};

auto* __restrict data_ptr = chars.data();
auto* __restrict offset_ptr = offsets.data();

if constexpr (Const) {
const auto start_value = start[0];
const auto len_value = len[0];
if (start_value == 0 || len_value <= 0) {
if constexpr (is_const) {
if (start[0] == 0 || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
} else {
for (size_t i = 0; i < size; ++i) {
const int str_size = offset_ptr[i] - offset_ptr[i - 1];
const uint8_t* raw_str = data_ptr + offset_ptr[i - 1];
// return empty string if start > src.length
if (start_value > str_size || start_value < -str_size || str_size == 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
// reference to string_function.cpp: substring
size_t byte_pos = 0;
index.clear();
for (size_t j = 0, char_size = 0;
j < str_size &&
(start_value <= 0 || index.size() <= start_value + len_value);
j += char_size) {
char_size = UTF8_BYTE_LENGTH[(unsigned char)(raw_str)[j]];
index.push_back(j);
}
return;
}
}

int fixed_pos = start_value;
if (fixed_pos < 0) {
fixed_pos = str_size + fixed_pos + 1;
} else if (fixed_pos > index.size()) {
StringOP::push_null_string(i, res_chars, res_offsets, null_map);
continue;
}
for (size_t i = 0; i < size; ++i) {
int str_size = offsets[i] - offsets[i - 1];
const char* str_data = (char*)chars.data() + offsets[i - 1];
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];

byte_pos = index[fixed_pos - 1];
int fixed_len = str_size - byte_pos;
if (fixed_pos + len_value <= index.size()) {
fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
}
// return empty string if start > src.length
if (start_value > str_size || str_size == 0 || start_value == 0 || len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}

if (byte_pos <= str_size && fixed_len > 0) {
// return StringRef(str.data + byte_pos, fixed_len);
StringOP::push_value_string(
std::string_view {reinterpret_cast<const char*>(raw_str + byte_pos),
(size_t)fixed_len},
i, res_chars, res_offsets);
} else {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
size_t byte_pos = 0;
index.clear();
for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
char_size = get_utf8_byte_length(str_data[j]);
index.push_back(j);
if (start_value > 0 && index.size() > start_value + len_value) {
break;
}
}
} else {
PMR::vector<std::pair<const unsigned char*, int>> strs(&pool);
strs.resize(size);
for (int i = 0; i < size; ++i) {
strs[i].first = data_ptr + offset_ptr[i - 1];
strs[i].second = offset_ptr[i] - offset_ptr[i - 1];

int fixed_pos = start_value;
if (fixed_pos < -(int)index.size()) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
if (fixed_pos < 0) {
fixed_pos = index.size() + fixed_pos + 1;
}
if (fixed_pos > index.size()) {
StringOP::push_null_string(i, res_chars, res_offsets, null_map);
continue;
}

for (size_t i = 0; i < size; ++i) {
auto [raw_str, str_size] = strs[i];
const auto& start_value = start[i];
const auto& len_value = len[i];
byte_pos = index[fixed_pos - 1];
size_t fixed_len = str_size - byte_pos;
if (fixed_pos + len_value <= index.size()) {
fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
}

// return empty string if start > src.length
if (start_value > str_size || str_size == 0 || start_value == 0 || len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
// reference to string_function.cpp: substring
size_t byte_pos = 0;
index.clear();
for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
char_size = UTF8_BYTE_LENGTH[(unsigned char)(raw_str)[j]];
index.push_back(j);
if (start_value > 0 && index.size() > start_value + len_value) {
break;
}
}
if (byte_pos <= str_size && fixed_len > 0) {
StringOP::push_value_string_reserved_and_allow_overflow(
{str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
} else {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
}
}

int fixed_pos = start_value;
if (fixed_pos < -(int)index.size()) {
template <bool is_const>
static void vectors_ascii(const ColumnString::Chars& chars,
const ColumnString::Offsets& offsets,
const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
NullMap& null_map, ColumnString::Chars& res_chars,
ColumnString::Offsets& res_offsets) {
size_t size = offsets.size();
res_offsets.resize(size);

if constexpr (is_const) {
if (start[0] == 0 || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
if (fixed_pos < 0) {
fixed_pos = index.size() + fixed_pos + 1;
}
if (fixed_pos > index.size()) {
StringOP::push_null_string(i, res_chars, res_offsets, null_map);
continue;
}
return;
}
res_chars.reserve(std::min(chars.size(), len[0] * size));
} else {
res_chars.reserve(chars.size());
}

byte_pos = index[fixed_pos - 1];
int fixed_len = str_size - byte_pos;
if (fixed_pos + len_value <= index.size()) {
fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
}
for (size_t i = 0; i < size; ++i) {
int str_size = offsets[i] - offsets[i - 1];
const char* str_data = (char*)chars.data() + offsets[i - 1];

if (byte_pos <= str_size && fixed_len > 0) {
// return StringRef(str.data + byte_pos, fixed_len);
StringOP::push_value_string(
std::string_view {reinterpret_cast<const char*>(raw_str + byte_pos),
(size_t)fixed_len},
i, res_chars, res_offsets);
} else {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];

if (start_value > str_size || start_value < -str_size || str_size == 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
int fixed_pos = start_value - 1;
if (fixed_pos < 0) {
fixed_pos = str_size + fixed_pos + 1;
}
size_t fixed_len = std::min(str_size - fixed_pos, len_value);
StringOP::push_value_string_reserved_and_allow_overflow(
{str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
}
}
};
Expand Down

0 comments on commit 090a342

Please sign in to comment.