Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: refactor row format with slice formant and single/multi slice row format #1381

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c15f63c
Refactor row format with slice formant and single/multi slice row format
tobegit3hub Mar 5, 2022
8da7721
Use slice index instead of schema index for ir builder
tobegit3hub Mar 5, 2022
c210917
Fix check for row format object
tobegit3hub Mar 5, 2022
1bb5889
Fix unit test casesg
tobegit3hub Mar 5, 2022
0d4d8aa
Add deconstructor and check pointer
tobegit3hub Mar 5, 2022
c4cd525
Enable single slice row format if enable unsafe row opt
tobegit3hub Mar 5, 2022
25fb442
Support encode single slice row format when enable unsaferow opt in o…
tobegit3hub Mar 5, 2022
42b3be6
Fix use slice idx instead of schema index
tobegit3hub Mar 5, 2022
dae74fe
Correct the column index from ColInfo instead of passed parameter
tobegit3hub Mar 5, 2022
4687156
Get row format corrected col index
tobegit3hub Mar 5, 2022
33f0dfc
Refine for comments
tobegit3hub Mar 7, 2022
abad45d
Add unit test for window without select
tobegit3hub Mar 7, 2022
3a3b92c
Update batch config
tobegit3hub Mar 8, 2022
34c10ea
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 8, 2022
337decb
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 9, 2022
6b12abf
Support new codec and unsafe row codec for window with append slice
tobegit3hub Mar 9, 2022
28f369c
Add more unit test for unsafe row opt
tobegit3hub Mar 9, 2022
c4d358a
Release empty row for window computer
tobegit3hub Mar 10, 2022
6130aa2
Support convert timestamp for unsaferow and openmldb row format
tobegit3hub Mar 10, 2022
2289ff7
Support set long in joined row with unsafe row
tobegit3hub Mar 10, 2022
ed86c0b
Format the scala code
tobegit3hub Mar 10, 2022
7158c3f
Fix get incorrect row from joined row
tobegit3hub Mar 10, 2022
4705165
Check schema context for cpp unit tests
tobegit3hub Mar 10, 2022
28d8d85
Fix cpplint
tobegit3hub Mar 10, 2022
7445958
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 10, 2022
e8d596c
Update ut for SliceFormat and check row format is nullptr
tobegit3hub Mar 11, 2022
dd2b295
Merge branch 'main' into feat/refactor_row_format_and_support_single_…
tobegit3hub Mar 11, 2022
c55e128
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 11, 2022
f0ceb3c
Fix syntax error after resolve conflict
tobegit3hub Mar 11, 2022
1aa51d4
Reset unsaferow gflag for cpp testsg
tobegit3hub Mar 12, 2022
34b5027
Ignore toydb engine test for cicd
tobegit3hub Mar 12, 2022
e5f4627
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 14, 2022
ec24d5e
Merge branch 'main' of github.com:4paradigm/OpenMLDB into feat/refact…
tobegit3hub Mar 14, 2022
013b450
Revert to run hybridse toydb engine test
tobegit3hub Mar 14, 2022
334fe34
Ignore failed test for toydb
tobegit3hub Mar 14, 2022
d0c88fd
Init function def for const project node
tobegit3hub Mar 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 79 additions & 3 deletions hybridse/include/codec/fe_row_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,12 @@ struct StringColInfo : public ColInfo {
str_start_offset(str_start_offset) {}
};

class RowFormat {


class SliceFormat {
public:
explicit RowFormat(const hybridse::codec::Schema* schema);
virtual ~RowFormat() {}
explicit SliceFormat(const hybridse::codec::Schema* schema);
virtual ~SliceFormat() {}

bool GetStringColumnInfo(size_t idx, StringColInfo* res) const;

Expand All @@ -217,6 +219,80 @@ class RowFormat {
uint32_t str_field_start_offset_;
};


class RowFormat {
public:
tobegit3hub marked this conversation as resolved.
Show resolved Hide resolved
virtual bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const = 0;
virtual const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const = 0;
virtual size_t GetSliceId(size_t schema_idx) const = 0;
};

class MultiSlicesRowFormat : public RowFormat {
public:
MultiSlicesRowFormat(const Schema* schema) {
slice_formats_.push_back(SliceFormat(schema));
}

MultiSlicesRowFormat(std::vector<const Schema*> schemas) {
for (auto schema: schemas) {
slice_formats_.push_back(SliceFormat(schema));
}

}
bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override {
return slice_formats_[schema_idx].GetStringColumnInfo(idx, res);
}

const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override {
return slice_formats_[schema_idx].GetColumnInfo(idx);
}

size_t GetSliceId(size_t schema_idx) const override {
return schema_idx;
}

private:
std::vector<SliceFormat> slice_formats_;
};

class SingleSliceRowFormat : public RowFormat {
public:
// TODO: Add deconstructor
SingleSliceRowFormat(const Schema* schema) {
slice_format_ = new SliceFormat(schema);
}

SingleSliceRowFormat(std::vector<const Schema*> schemas) {
Schema merge_schema;
tobegit3hub marked this conversation as resolved.
Show resolved Hide resolved
int offset = 0;
for (auto schema: schemas) {
offsets_.push_back(offset);
offset += schema->size();
// TODO: Merge schema
//merge_schema.MergeFrom(schema);
}

slice_format_ = new SliceFormat(&merge_schema);
}

bool GetStringColumnInfo(size_t schema_idx, size_t idx, StringColInfo* res) const override {
return slice_format_->GetStringColumnInfo(offsets_[schema_idx] + idx, res);
}

const ColInfo* GetColumnInfo(size_t schema_idx, size_t idx) const override {
return slice_format_->GetColumnInfo(offsets_[schema_idx] + idx);
}

size_t GetSliceId(size_t schema_idx) const override {
return 0;
}

private:
std::vector<size_t> offsets_;
SliceFormat* slice_format_;
};


} // namespace codec
} // namespace hybridse
#endif // HYBRIDSE_INCLUDE_CODEC_FE_ROW_CODEC_H_
6 changes: 3 additions & 3 deletions hybridse/include/vm/schemas_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ class SchemasContext {
const PhysicalOpNode* GetRoot() const;

/**
* Get detailed format for `idx`th schema source.
* Get detailed format.
*/
const codec::RowFormat* GetRowFormat(size_t idx) const;
const codec::RowFormat* GetRowFormat() const;

/**
* Get `idx`th schema source.
Expand Down Expand Up @@ -267,7 +267,7 @@ class SchemasContext {
std::vector<SchemaSource*> schema_sources_;

// detailed schema format info
std::vector<codec::RowFormat> row_formats_;
const codec::RowFormat* row_format_ = nullptr;

// owned schema object
codec::Schema owned_concat_output_schema_;
Expand Down
8 changes: 4 additions & 4 deletions hybridse/src/codec/fe_row_codec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ int32_t RowView::GetString(uint32_t idx, const char** val, uint32_t* length) {
length);
}

RowFormat::RowFormat(const hybridse::codec::Schema* schema)
SliceFormat::SliceFormat(const hybridse::codec::Schema* schema)
: schema_(schema), infos_(), next_str_pos_(), str_field_start_offset_(0) {
if (nullptr == schema) {
return;
Expand Down Expand Up @@ -959,11 +959,11 @@ RowFormat::RowFormat(const hybridse::codec::Schema* schema)
str_field_start_offset_ = offset;
}

const ColInfo* RowFormat::GetColumnInfo(size_t idx) const {
const ColInfo* SliceFormat::GetColumnInfo(size_t idx) const {
return idx < infos_.size() ? &infos_[idx] : nullptr;
}

bool RowFormat::GetStringColumnInfo(size_t idx, StringColInfo* res) const {
bool SliceFormat::GetStringColumnInfo(size_t idx, StringColInfo* res) const {
if (nullptr == res) {
LOG(WARNING) << "input args have null";
return false;
Expand All @@ -982,7 +982,7 @@ bool RowFormat::GetStringColumnInfo(size_t idx, StringColInfo* res) const {
next_offset = nit->second;
} else {
if (FLAGS_enable_spark_unsaferow_format) {
// Do not need to get next offset for UnsafeRowOpt
// No need to get next offset for UnsafeRowOpt and ignore the warning
} else {
LOG(WARNING) << "fail to get string field next offset";
return false;
Expand Down
6 changes: 3 additions & 3 deletions hybridse/src/codegen/aggregate_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ bool AggregateIRBuilder::CollectAggColumn(const hybridse::node::ExprNode* expr,
return false;
}
const codec::ColInfo& col_info =
*schema_context_->GetRowFormat(schema_idx)
->GetColumnInfo(col_idx);
*schema_context_->GetRowFormat()
->GetColumnInfo(schema_idx, col_idx);
auto col_type = col_info.type;
uint32_t offset = col_info.offset;

Expand Down Expand Up @@ -700,7 +700,7 @@ base::Status AggregateIRBuilder::BuildMulti(const std::string& base_funcname,

ScopeVar dummy_scope_var;
BufNativeIRBuilder buf_builder(
schema_idx, schema_context_->GetRowFormat(schema_idx),
schema_idx, schema_context_->GetRowFormat(),
body_block, &dummy_scope_var);
NativeValue field_value;
CHECK_TRUE(buf_builder.BuildGetField(info.col_idx, slice_info.first, slice_info.second, &field_value),
Expand Down
4 changes: 2 additions & 2 deletions hybridse/src/codegen/buf_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ bool BufNativeIRBuilder::BuildGetField(size_t col_idx, ::llvm::Value* row_ptr, :
}

node::TypeNode data_type;
const codec::ColInfo* col_info = format_->GetColumnInfo(col_idx);
const codec::ColInfo* col_info = format_->GetColumnInfo(schema_idx_, col_idx);
if (col_info == nullptr) {
LOG(WARNING) << "fail to resolve field info at " << col_idx;
return false;
Expand Down Expand Up @@ -113,7 +113,7 @@ bool BufNativeIRBuilder::BuildGetField(size_t col_idx, ::llvm::Value* row_ptr, :

case ::hybridse::node::kVarchar: {
codec::StringColInfo str_info;
if (!format_->GetStringColumnInfo(col_idx, &str_info)) {
if (!format_->GetStringColumnInfo(schema_idx_, col_idx, &str_info)) {
LOG(WARNING) << "fail to get string filed offset and next offset" << col_info->name;
}
DLOG(INFO) << "get string with offset " << offset << " next offset " << str_info.str_next_offset
Expand Down
4 changes: 2 additions & 2 deletions hybridse/src/codegen/context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ CodeGenContext::CodeGenContext(::llvm::Module* module,
llvm_ir_builder_(*llvm_ctx_),
schemas_context_(schemas_context),
parameter_types_(parameter_types),
parameter_row_format_(parameter_types),
parameter_row_format_(new codec::MultiSlicesRowFormat(parameter_types)),
node_manager_(node_manager) {}

::llvm::Function* CodeGenContext::GetCurrentFunction() const {
Expand Down Expand Up @@ -193,7 +193,7 @@ const vm::SchemasContext* CodeGenContext::schemas_context() const {
const codec::Schema* CodeGenContext::parameter_types() const {
return parameter_types_;
}
const codec::RowFormat& CodeGenContext::parameter_row_format() const {
const codec::RowFormat* CodeGenContext::parameter_row_format() const {
return parameter_row_format_;
}
node::NodeManager* CodeGenContext::node_manager() const {
Expand Down
4 changes: 2 additions & 2 deletions hybridse/src/codegen/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class CodeGenContext {

const vm::SchemasContext* schemas_context() const;
const codec::Schema* parameter_types() const;
const codec::RowFormat& parameter_row_format() const;
const codec::RowFormat* parameter_row_format() const;
node::NodeManager* node_manager() const;

private:
Expand All @@ -172,7 +172,7 @@ class CodeGenContext {

const vm::SchemasContext* schemas_context_;
const codec::Schema* parameter_types_;
const codec::RowFormat parameter_row_format_;
codec::RowFormat* parameter_row_format_ = nullptr;
tobegit3hub marked this conversation as resolved.
Show resolved Hide resolved

std::unordered_map<std::string, CodeScope> function_scopes_;

Expand Down
4 changes: 2 additions & 2 deletions hybridse/src/codegen/expr_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ Status ExprIRBuilder::BuildParameterExpr(const ::hybridse::node::ParameterExpr*
size_t schema_idx = 0;
CHECK_STATUS(ExtractSliceFromRow(parameter_row, schema_idx, &slice_ptr, &slice_size))
BufNativeIRBuilder buf_builder(
schema_idx, &ctx_->parameter_row_format(),
schema_idx, ctx_->parameter_row_format(),
ctx_->GetCurrentBlock(), ctx_->GetCurrentScope()->sv());
CHECK_TRUE(
buf_builder.BuildGetField(parameter->position()-1, slice_ptr, slice_size, output),
Expand Down Expand Up @@ -924,7 +924,7 @@ Status ExprIRBuilder::BuildGetFieldExpr(
::llvm::Value* slice_size = nullptr;
CHECK_STATUS(ExtractSliceFromRow(input_value, schema_idx, &slice_ptr, &slice_size))
BufNativeIRBuilder buf_builder(
schema_idx, schemas_context->GetRowFormat(schema_idx),
schema_idx, schemas_context->GetRowFormat(),
ctx_->GetCurrentBlock(), ctx_->GetCurrentScope()->sv());
CHECK_TRUE(
buf_builder.BuildGetField(col_idx, slice_ptr, slice_size, output),
Expand Down
8 changes: 4 additions & 4 deletions hybridse/src/codegen/window_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx,
return false;
}
::hybridse::node::TypeNode data_type;
auto row_format = schemas_context_->GetRowFormat(schema_idx);
auto row_format = schemas_context_->GetRowFormat();
if (row_format == nullptr) {
LOG(WARNING) << "fail to get row format at " << schema_idx;
return false;
}
const codec::ColInfo* col_info = row_format->GetColumnInfo(col_idx);
const codec::ColInfo* col_info = row_format->GetColumnInfo(schema_idx, col_idx);
if (col_info == nullptr) {
LOG(WARNING) << "fail to get column info at " << schema_idx << ":"
<< col_idx;
Expand All @@ -148,8 +148,8 @@ bool MemoryWindowDecodeIRBuilder::BuildGetCol(size_t schema_idx, size_t col_idx,
}
case ::hybridse::node::kVarchar: {
codec::StringColInfo str_col_info;
if (!schemas_context_->GetRowFormat(schema_idx)
->GetStringColumnInfo(col_idx, &str_col_info)) {
if (!schemas_context_->GetRowFormat()
->GetStringColumnInfo(schema_idx, col_idx, &str_col_info)) {
LOG(WARNING)
<< "fail to get string filed offset and next offset"
<< " at " << col_idx;
Expand Down
18 changes: 12 additions & 6 deletions hybridse/src/vm/schemas_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ void SchemasContext::Clear() {
delete ptr;
}
schema_sources_.clear();
row_formats_.clear();
// TODO(tobe): release row_format
//row_format_.clear();
delete row_format_;
tobegit3hub marked this conversation as resolved.
Show resolved Hide resolved
owned_concat_output_schema_.Clear();
}

Expand Down Expand Up @@ -475,8 +477,8 @@ bool SchemasContext::IsColumnAmbiguous(const std::string& column_name) const {
return column_id_set.size() != 1;
}

const codec::RowFormat* SchemasContext::GetRowFormat(size_t idx) const {
return idx < row_formats_.size() ? &row_formats_[idx] : nullptr;
const codec::RowFormat* SchemasContext::GetRowFormat() const {
return row_format_;
}

const std::string& SchemasContext::GetName() const {
Expand Down Expand Up @@ -510,19 +512,23 @@ const codec::Schema* SchemasContext::GetOutputSchema() const {
}

bool SchemasContext::CheckBuild() const {
return row_formats_.size() == schema_sources_.size();
//return row_formats_.size() == schema_sources_.size();
return row_format_ == nullptr;
}

void SchemasContext::Build() {
// initialize detailed formats
row_formats_.clear();
//row_formats_.clear();
std::vector<const hybridse::codec::Schema*> schemas;
for (const auto& source : schema_sources_) {
if (source->GetSchema() == nullptr) {
LOG(WARNING) << "Source schema is null";
return;
}
row_formats_.emplace_back(codec::RowFormat(source->GetSchema()));
schemas.push_back(source->GetSchema());
}
row_format_ = new codec::MultiSlicesRowFormat(schemas);
tobegit3hub marked this conversation as resolved.
Show resolved Hide resolved

// initialize mappings
column_id_map_.clear();
column_name_map_.clear();
Expand Down