From b42a2a7f9e977b2f243dbcb794476c6b286eb057 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Thu, 1 Aug 2024 13:53:36 +0800 Subject: [PATCH 1/3] Clean mis-public method --- dbms/src/Columns/ColumnArray.cpp | 5 +++++ dbms/src/Columns/ColumnArray.h | 1 + dbms/src/Flash/Coprocessor/ArrowColCodec.cpp | 11 ++++++++--- dbms/src/Flash/Coprocessor/TiDBColumn.cpp | 16 ++++++++++++++++ dbms/src/Flash/Coprocessor/TiDBColumn.h | 10 +++++----- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index c81d1787cc5..f16b82a88e2 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -1110,6 +1110,11 @@ void ColumnArray::insertFromDatumData(const char * data, size_t length) insertData(data, precise_data_size); } +std::pair ColumnArray::getElementRef(size_t element_idx) const +{ + return {static_cast(sizeAt(element_idx)), getDataAt(element_idx)}; +} + size_t ColumnArray::encodeIntoDatumData(size_t element_idx, WriteBuffer & writer) const { RUNTIME_CHECK(boost::endian::order::native == boost::endian::order::little); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 91e7e824cdc..b2a5f00deaf 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -175,6 +175,7 @@ class ColumnArray final : public COWPtrHelper void insertFromDatumData(const char * data, size_t length) override; size_t encodeIntoDatumData(size_t element_idx, WriteBuffer & writer) const; + std::pair getElementRef(size_t element_idx) const; private: ColumnPtr data; diff --git a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp index 750ec173814..c25569479d0 100644 --- a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp +++ b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp @@ -304,6 +304,12 @@ void flashArrayFloat32ColToArrowCol( // We only unwrap the NULLABLE() part. const IColumn * nested_col = getNestedCol(flash_col_untyped); const auto * flash_col = checkAndGetColumn(nested_col); + + RUNTIME_CHECK(boost::endian::order::native == boost::endian::order::little); + + RUNTIME_CHECK(checkAndGetColumn>(&flash_col->getData())); + RUNTIME_CHECK(flash_col->getData().isFixedAndContiguous()); + for (size_t i = start_index; i < end_index; i++) { // todo check if we can convert flash_col to DAG col directly since the internal representation is almost the same @@ -316,9 +322,8 @@ void flashArrayFloat32ColToArrowCol( } } - auto encoded_size = flash_col->encodeIntoDatumData(i, *dag_column.data); - RUNTIME_CHECK(encoded_size > 0); - dag_column.finishAppendVar(encoded_size); + auto [num_elems, elem_bytes] = flash_col->getElementRef(i); + dag_column.appendVectorF32(num_elems, elem_bytes); } } diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp index 373e2a7320a..abbcf162167 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp @@ -12,10 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include +#include #include +#include namespace DB { @@ -118,6 +121,19 @@ void TiDBColumn::append(const TiDBEnum & ti_enum) finishAppendVar(size); } +void TiDBColumn::appendVectorF32(UInt32 num_elem, StringRef elem_bytes) +{ + writeIntBinary(num_elem, *data); + size_t encoded_size = sizeof(UInt32); + + RUNTIME_CHECK(elem_bytes.size == num_elem * sizeof(Float32)); + data->write(elem_bytes.data, elem_bytes.size); + encoded_size += elem_bytes.size; + + RUNTIME_CHECK(encoded_size > 0); + finishAppendVar(encoded_size); +} + void TiDBColumn::append(const TiDBBit & bit) { data->write(bit.val.data, bit.val.size); diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.h b/dbms/src/Flash/Coprocessor/TiDBColumn.h index 2095daa9f6a..ea24d286dba 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.h +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.h @@ -41,20 +41,20 @@ class TiDBColumn void append(const TiDBDecimal & decimal); void append(const TiDBBit & bit); void append(const TiDBEnum & ti_enum); + void appendVectorF32(UInt32 num_elem, StringRef elem_bytes); void encodeColumn(WriteBuffer & ss); void clear(); - // FIXME: expose for ColumnArray::encodeIntoDatumData, find a better way to implement it. - void finishAppendVar(UInt32 size); - // WriteBufferFromOwnString is not moveable. - std::unique_ptr data; - private: + void finishAppendVar(UInt32 size); void finishAppendFixed(); bool isFixed() const { return fixed_size != VAR_SIZE; } void appendNullBitMap(bool value); + // WriteBufferFromOwnString is not moveable. + std::unique_ptr data; + UInt32 length; UInt32 null_cnt; std::vector null_bitmap; From b77ee6af906e0d962f13f36df3f9923d26cb3122 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Thu, 1 Aug 2024 13:56:37 +0800 Subject: [PATCH 2/3] Remove endian check --- dbms/src/Columns/ColumnArray.cpp | 21 -------------------- dbms/src/Columns/ColumnArray.h | 1 - dbms/src/Flash/Coprocessor/ArrowColCodec.cpp | 2 -- dbms/src/Flash/Coprocessor/TiDBColumn.cpp | 2 +- 4 files changed, 1 insertion(+), 25 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index f16b82a88e2..6c5d57e3006 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -1115,25 +1115,4 @@ std::pair ColumnArray::getElementRef(size_t element_idx) cons return {static_cast(sizeAt(element_idx)), getDataAt(element_idx)}; } -size_t ColumnArray::encodeIntoDatumData(size_t element_idx, WriteBuffer & writer) const -{ - RUNTIME_CHECK(boost::endian::order::native == boost::endian::order::little); - - RUNTIME_CHECK(checkAndGetColumn>(&getData())); - RUNTIME_CHECK(getData().isFixedAndContiguous()); - - auto n = static_cast(sizeAt(element_idx)); - - writeIntBinary(n, writer); - size_t encoded_size = sizeof(UInt32); - - auto data = getDataAt(element_idx); - RUNTIME_CHECK(data.size == n * sizeof(Float32)); - writer.write(data.data, data.size); - encoded_size += data.size; - - return encoded_size; -} - - } // namespace DB diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index b2a5f00deaf..852c15f6ada 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -174,7 +174,6 @@ class ColumnArray final : public COWPtrHelper void insertFromDatumData(const char * data, size_t length) override; - size_t encodeIntoDatumData(size_t element_idx, WriteBuffer & writer) const; std::pair getElementRef(size_t element_idx) const; private: diff --git a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp index c25569479d0..83fc585ee38 100644 --- a/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp +++ b/dbms/src/Flash/Coprocessor/ArrowColCodec.cpp @@ -305,8 +305,6 @@ void flashArrayFloat32ColToArrowCol( const IColumn * nested_col = getNestedCol(flash_col_untyped); const auto * flash_col = checkAndGetColumn(nested_col); - RUNTIME_CHECK(boost::endian::order::native == boost::endian::order::little); - RUNTIME_CHECK(checkAndGetColumn>(&flash_col->getData())); RUNTIME_CHECK(flash_col->getData().isFixedAndContiguous()); diff --git a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp index abbcf162167..169a264b1ba 100644 --- a/dbms/src/Flash/Coprocessor/TiDBColumn.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBColumn.cpp @@ -123,7 +123,7 @@ void TiDBColumn::append(const TiDBEnum & ti_enum) void TiDBColumn::appendVectorF32(UInt32 num_elem, StringRef elem_bytes) { - writeIntBinary(num_elem, *data); + encodeLittleEndian(num_elem, *data); size_t encoded_size = sizeof(UInt32); RUNTIME_CHECK(elem_bytes.size == num_elem * sizeof(Float32)); From 77012af07853afaacfef8332fdf1f2f06518da2e Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Thu, 1 Aug 2024 14:17:47 +0800 Subject: [PATCH 3/3] Add some comments --- dbms/src/Flash/Coprocessor/TiDBChunk.cpp | 2 +- dbms/src/Flash/Coprocessor/tests/gtest_streaming_writer.cpp | 4 ++-- dbms/src/TiDB/Schema/TiDB.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/TiDBChunk.cpp b/dbms/src/Flash/Coprocessor/TiDBChunk.cpp index f3a59350bdb..30d7aaea537 100644 --- a/dbms/src/Flash/Coprocessor/TiDBChunk.cpp +++ b/dbms/src/Flash/Coprocessor/TiDBChunk.cpp @@ -31,7 +31,7 @@ namespace DB { TiDBChunk::TiDBChunk(const std::vector & field_types) { - for (auto & type : field_types) + for (const auto & type : field_types) { columns.emplace_back(getFieldLengthForArrowEncode(type.tp())); } diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_streaming_writer.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_writer.cpp index c337f72de60..5ea16759552 100644 --- a/dbms/src/Flash/Coprocessor/tests/gtest_streaming_writer.cpp +++ b/dbms/src/Flash/Coprocessor/tests/gtest_streaming_writer.cpp @@ -41,7 +41,7 @@ class TestStreamingWriter : public testing::Test } public: - TestStreamingWriter() {} + TestStreamingWriter() = default; // Return 10 Int64 column. static std::vector makeFields() @@ -93,7 +93,7 @@ struct MockStreamWriter {} void write(tipb::SelectResponse & response) { checker(response); } - bool isWritable() const { throw Exception("Unsupport async write"); } + static bool isWritable() { throw Exception("Unsupport async write"); } private: MockStreamWriterChecker checker; diff --git a/dbms/src/TiDB/Schema/TiDB.h b/dbms/src/TiDB/Schema/TiDB.h index 84efebea820..a51d56188c8 100644 --- a/dbms/src/TiDB/Schema/TiDB.h +++ b/dbms/src/TiDB/Schema/TiDB.h @@ -56,6 +56,7 @@ using DB::Timestamp; // Column types. // In format: // TiDB type, int value, codec flag, CH type. +// The int value is defined in pingcap/tidb pkg/parser/mysql/type.go #ifdef M #error "Please undefine macro M first." #endif