Skip to content

Commit

Permalink
Make Segment v2 use string's real length(#1943) (#1944)
Browse files Browse the repository at this point in the history
  • Loading branch information
wangbo authored and imay committed Oct 13, 2019
1 parent 8232261 commit 80e9b21
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 108 deletions.
37 changes: 31 additions & 6 deletions be/src/olap/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ class Field {
inline size_t field_size() const { return size() + 1; }
inline size_t index_size() const { return _index_size; }

inline void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
virtual inline void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
inline void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
inline char* allocate_value_from_arena(Arena* arena) const { return _type_info->allocate_value_from_arena(arena); }
virtual inline char* allocate_value_from_arena(Arena* arena) const { return arena->Allocate(_type_info->size()); }

inline void agg_update(RowCursorCell* dest, const RowCursorCell& src, MemPool* mem_pool = nullptr) const {
_agg_info->update(dest, src, mem_pool);
Expand Down Expand Up @@ -200,10 +200,6 @@ class Field {
_type_info->deep_copy_with_arena(dest, src, arena);
}

inline void direct_copy_content(char* dest, const char* src) const {
_type_info->direct_copy(dest, src);
}

// Copy srouce content to destination in index format.
template<typename DstCellType, typename SrcCellType>
void to_index(DstCellType* dst, const SrcCellType& src) const;
Expand Down Expand Up @@ -259,6 +255,14 @@ class Field {
// 长度,单位为字节
// 除字符串外,其它类型都是确定的
uint32_t _length;

char* allocate_string_value_from_arena(Arena* arena) const {
char* type_value = arena->Allocate(sizeof(Slice));
auto slice = reinterpret_cast<Slice*>(type_value);
slice->size = _length;
slice->data = arena->Allocate(slice->size);
return type_value;
};
};

template<typename LhsCellType, typename RhsCellType>
Expand Down Expand Up @@ -378,6 +382,16 @@ class CharField: public Field {
CharField* clone() const override {
return new CharField(*this);
}

char* allocate_value_from_arena(Arena* arena) const override {
return Field::allocate_string_value_from_arena(arena);
}

void set_to_max(char* ch) const override {
auto slice = reinterpret_cast<Slice*>(ch);
slice->size = _length;
memset(slice->data, 0xFF, slice->size);
}
};

class VarcharField: public Field {
Expand All @@ -389,6 +403,7 @@ class VarcharField: public Field {
return _length - OLAP_STRING_MAX_BYTES;
}

// minus OLAP_STRING_MAX_BYTES here just for being compatible with old storage format
char* allocate_memory(char* cell_ptr, char* variable_ptr) const override {
auto slice = (Slice*)cell_ptr;
slice->data = variable_ptr;
Expand All @@ -400,6 +415,16 @@ class VarcharField: public Field {
VarcharField* clone() const override {
return new VarcharField(*this);
}

char* allocate_value_from_arena(Arena* arena) const override {
return Field::allocate_string_value_from_arena(arena);
}

void set_to_max(char* ch) const override {
auto slice = reinterpret_cast<Slice*>(ch);
slice->size = _length - OLAP_STRING_MAX_BYTES;
memset(slice->data, 0xFF, slice->size);
}
};

class BitmapAggField: public Field {
Expand Down
3 changes: 0 additions & 3 deletions be/src/olap/olap_define.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10;
// the max length supported for varchar type
static const uint16_t OLAP_STRING_MAX_LENGTH = 65535;

//the max length supported for char type
static const uint16_t OLAP_CHAR_MAX_LENGTH = 255;

static const int32_t PREFERRED_SNAPSHOT_VERSION = 3;

// the max bytes for stored string length
Expand Down
16 changes: 8 additions & 8 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ class NullBitmapBuilder {
};

ColumnWriter::ColumnWriter(const ColumnWriterOptions& opts,
const TypeInfo* typeinfo,
std::unique_ptr<Field> field,
bool is_nullable,
WritableFile* output_file)
: _opts(opts),
_type_info(typeinfo),
_is_nullable(is_nullable),
_output_file(output_file) {
_output_file(output_file),
_field(std::move(field)) {
}

ColumnWriter::~ColumnWriter() {
Expand All @@ -92,7 +92,7 @@ ColumnWriter::~ColumnWriter() {
}

Status ColumnWriter::init() {
RETURN_IF_ERROR(EncodingInfo::get(_type_info, _opts.encoding_type, &_encoding_info));
RETURN_IF_ERROR(EncodingInfo::get(_field->type_info(), _opts.encoding_type, &_encoding_info));
if (_opts.compression_type != NO_COMPRESSION) {
RETURN_IF_ERROR(get_block_compression_codec(_opts.compression_type, &_compress_codec));
}
Expand All @@ -105,7 +105,7 @@ Status ColumnWriter::init() {
if (page_builder == nullptr) {
return Status::NotSupported(
Substitute("Failed to create page builder for type $0 and encoding $1",
_type_info->type(), _opts.encoding_type));
_field->type(), _opts.encoding_type));
}
_page_builder.reset(page_builder);
// create ordinal builder
Expand All @@ -115,7 +115,7 @@ Status ColumnWriter::init() {
_null_bitmap_builder.reset(new NullBitmapBuilder());
}
if (_opts.need_zone_map) {
_column_zone_map_builder.reset(new ColumnZoneMapBuilder(_type_info));
_column_zone_map_builder.reset(new ColumnZoneMapBuilder(_field.get()));
}
return Status::OK();
}
Expand Down Expand Up @@ -148,7 +148,7 @@ Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) {
bool is_page_full = (num_written < remaining);
remaining -= num_written;
_next_rowid += num_written;
*ptr += _type_info->size() * num_written;
*ptr += _field->size() * num_written;
// we must write null bits after write data, because we don't
// know how many rows can be written into current page
if (_is_nullable) {
Expand Down Expand Up @@ -240,7 +240,7 @@ Status ColumnWriter::write_zone_map() {
}

void ColumnWriter::write_meta(ColumnMetaPB* meta) {
meta->set_type(_type_info->type());
meta->set_type(_field->type());
meta->set_encoding(_opts.encoding_type);
meta->set_compression(_opts.compression_type);
meta->set_is_nullable(_is_nullable);
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/column_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class PageBuilder;
class ColumnWriter {
public:
ColumnWriter(const ColumnWriterOptions& opts,
const TypeInfo* typeinfo,
std::unique_ptr<Field> field,
bool is_nullable,
WritableFile* output_file);
~ColumnWriter();
Expand Down Expand Up @@ -138,7 +138,6 @@ class ColumnWriter {

private:
ColumnWriterOptions _opts;
const TypeInfo* _type_info = nullptr;
bool _is_nullable;
WritableFile* _output_file = nullptr;

Expand All @@ -154,6 +153,7 @@ class ColumnWriter {
std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
std::unique_ptr<OrdinalPageIndexBuilder> _ordinal_index_builder;
std::unique_ptr<ColumnZoneMapBuilder> _column_zone_map_builder;
std::unique_ptr<Field> _field;

PagePointer _ordinal_index_pp;
PagePointer _zone_map_pp;
Expand Down
9 changes: 4 additions & 5 deletions be/src/olap/rowset/segment_v2/column_zone_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@ namespace doris {

namespace segment_v2 {

ColumnZoneMapBuilder::ColumnZoneMapBuilder(const TypeInfo* type_info) : _type_info(type_info) {
ColumnZoneMapBuilder::ColumnZoneMapBuilder(Field* field) : _field(field) {
PageBuilderOptions options;
options.data_page_size = 0;
_page_builder.reset(new BinaryPlainPageBuilder(options));
_field.reset(FieldFactory::create_by_type(_type_info->type()));
_zone_map.min_value = _field->allocate_value_from_arena(&_arena);
_zone_map.max_value = _field->allocate_value_from_arena(&_arena);

Expand All @@ -38,12 +37,12 @@ Status ColumnZoneMapBuilder::add(const uint8_t *vals, size_t count) {
if (vals != nullptr) {
for (int i = 0; i < count; ++i) {
if (_field->compare(_zone_map.min_value, (char *)vals) > 0) {
_field->direct_copy_content(_zone_map.min_value, (const char *)vals);
_field->type_info()->direct_copy(_zone_map.min_value, (const char *)vals);
}
if (_field->compare(_zone_map.max_value, (char *)vals) < 0) {
_field->direct_copy_content(_zone_map.max_value, (const char *)vals);
_field->type_info()->direct_copy(_zone_map.max_value, (const char *)vals);
}
vals += _type_info->size();
vals += _field->size();
if (!_zone_map.has_not_null) {
_zone_map.has_not_null = true;
}
Expand Down
5 changes: 2 additions & 3 deletions be/src/olap/rowset/segment_v2/column_zone_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct ZoneMap {
// The binary is encoded by BinaryPlainPageBuilder
class ColumnZoneMapBuilder {
public:
ColumnZoneMapBuilder(const TypeInfo* type_info);
ColumnZoneMapBuilder(Field* field);

Status add(const uint8_t* vals, size_t count);

Expand All @@ -68,9 +68,8 @@ class ColumnZoneMapBuilder {
void _reset_zone_map();

private:
const TypeInfo* _type_info;
std::unique_ptr<BinaryPlainPageBuilder> _page_builder;
std::unique_ptr<Field> _field;
Field* _field;
// memory will be managed by arena
ZoneMap _zone_map;
Arena _arena;
Expand Down
10 changes: 5 additions & 5 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
bool is_nullable = column.is_nullable();
column_meta->set_is_nullable(is_nullable);

// TODO(zc): we can add type_info into TabletColumn?
const TypeInfo* type_info = get_type_info(column.type());
DCHECK(type_info != nullptr);

ColumnWriterOptions opts;
opts.compression_type = segment_v2::CompressionTypePB::LZ4F;
// now we create zone map for key columns
if (column.is_key()) {
opts.need_zone_map = true;
}
std::unique_ptr<ColumnWriter> writer(new ColumnWriter(opts, type_info, is_nullable, _output_file.get()));

std::unique_ptr<Field> field(FieldFactory::create(column));
DCHECK(field.get() != nullptr);

std::unique_ptr<ColumnWriter> writer(new ColumnWriter(opts, std::move(field), is_nullable, _output_file.get()));
RETURN_IF_ERROR(writer->init());
_column_writers.push_back(std::move(writer));
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

namespace doris {

void (*FieldTypeTraits<OLAP_FIELD_TYPE_CHAR>::set_to_max)(void*) = nullptr;

template<typename TypeTraitsClass>
TypeInfo::TypeInfo(TypeTraitsClass t)
: _equal(TypeTraitsClass::equal),
Expand All @@ -27,7 +29,6 @@ TypeInfo::TypeInfo(TypeTraitsClass t)
_deep_copy(TypeTraitsClass::deep_copy),
_deep_copy_with_arena(TypeTraitsClass::deep_copy_with_arena),
_direct_copy(TypeTraitsClass::direct_copy),
_allocate_value_from_arena(TypeTraitsClass::allocate_value_from_arena),
_from_string(TypeTraitsClass::from_string),
_to_string(TypeTraitsClass::to_string),
_set_to_max(TypeTraitsClass::set_to_max),
Expand Down
39 changes: 5 additions & 34 deletions be/src/olap/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ class TypeInfo {
_direct_copy(dest, src);
}

inline char* allocate_value_from_arena(Arena* arena) const {
return _allocate_value_from_arena(arena);
}

OLAPStatus from_string(void* buf, const std::string& scan_key) const {
return _from_string(buf, scan_key);
}
Expand All @@ -89,7 +85,6 @@ class TypeInfo {
void (*_deep_copy)(void* dest, const void* src, MemPool* mem_pool);
void (*_deep_copy_with_arena)(void* dest, const void* src, Arena* arena);
void (*_direct_copy)(void* dest, const void* src);
char* (*_allocate_value_from_arena)(Arena* arena);

OLAPStatus (*_from_string)(void* buf, const std::string& scan_key);
std::string (*_to_string)(const void* src);
Expand Down Expand Up @@ -218,10 +213,6 @@ struct BaseFieldtypeTraits : public CppTypeTraits<field_type> {
return HashUtil::hash(data, sizeof(CppType), seed);
}

static inline char* allocate_value_from_arena(Arena* arena) {
return arena->Allocate(sizeof(CppType));
}

static std::string to_string(const void* src) {
std::stringstream stream;
stream << *reinterpret_cast<const CppType*>(src);
Expand Down Expand Up @@ -563,12 +554,10 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_CHAR> : public BaseFieldtypeTraits<OLAP_F
memory_copy(l_slice->data, r_slice->data, r_slice->size);
l_slice->size = r_slice->size;
}
static void set_to_max(void* buf) {
// this function is used by scan key,
// the size may be greater than length in schema.
auto slice = reinterpret_cast<Slice*>(buf);
memset(slice->data, 0xff, slice->size);
}

// using field.set_to_max to set varchar/char,not here
static void (*set_to_max)(void*);

static void set_to_min(void* buf) {
auto slice = reinterpret_cast<Slice*>(buf);
memset(slice->data, 0, slice->size);
Expand All @@ -577,13 +566,6 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_CHAR> : public BaseFieldtypeTraits<OLAP_F
auto slice = reinterpret_cast<const Slice*>(data);
return HashUtil::hash(slice->data, slice->size, seed);
}
static char* allocate_value_from_arena(Arena* arena) {
char* type_value = arena->Allocate(sizeof(Slice));
auto slice = reinterpret_cast<Slice*>(type_value);
slice->size = OLAP_CHAR_MAX_LENGTH;
slice->data = arena->Allocate(OLAP_CHAR_MAX_LENGTH);
return type_value;
}
};

template<>
Expand All @@ -601,22 +583,11 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_VARCHAR> : public FieldTypeTraits<OLAP_FI
slice->size = value_len;
return OLAP_SUCCESS;
}
static void set_to_max(void* buf) {
auto slice = reinterpret_cast<Slice*>(buf);
slice->size = 1;
memset(slice->data, 0xFF, 1);
}

static void set_to_min(void* buf) {
auto slice = reinterpret_cast<Slice*>(buf);
slice->size = 0;
}
static char* allocate_value_from_arena(Arena* arena) {
char* type_value = arena->Allocate(sizeof(Slice));
auto slice = reinterpret_cast<Slice*>(type_value);
slice->size = OLAP_STRING_MAX_LENGTH;
slice->data = arena->Allocate(OLAP_STRING_MAX_LENGTH);
return type_value;
}
};

template<>
Expand Down
Loading

0 comments on commit 80e9b21

Please sign in to comment.