From b6316c091f416967c5e7c9a9284601fa4507aa72 Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 10 Sep 2024 13:23:26 +0800 Subject: [PATCH] GH-44036: [C++] IPC: ipc reader/writer code enhancement (#44019) ### Rationale for this change So minor ipc code enhancement when I read the code ### What changes are included in this PR? Avoid copying shared_ptr in some naive space ### Are these changes tested? Covered by existence ### Are there any user-facing changes? no * GitHub Issue: #44036 Authored-by: mwish Signed-off-by: Sutou Kouhei --- cpp/src/arrow/ipc/reader.cc | 12 ++++++------ cpp/src/arrow/ipc/writer.cc | 21 +++++++++------------ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index da84f2f2dc87d..98214c1debb86 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -305,7 +305,7 @@ class ArrayLoader { RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1])); } else { buffer_index_++; - out_->buffers[1].reset(new Buffer(nullptr, 0)); + out_->buffers[1] = std::make_shared(nullptr, 0); } return Status::OK(); } @@ -644,11 +644,11 @@ Result> LoadRecordBatch( const flatbuf::RecordBatch* metadata, const std::shared_ptr& schema, const std::vector& inclusion_mask, const IpcReadContext& context, io::RandomAccessFile* file) { - if (inclusion_mask.size() > 0) { - return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file); - } else { + if (inclusion_mask.empty()) { return LoadRecordBatchSubset(metadata, schema, /*inclusion_mask=*/nullptr, context, file); + } else { + return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file); } } @@ -1447,7 +1447,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { // Prebuffering's read patterns are also slightly worse than the alternative // when doing whole-file reads because the logic is not in place to recognize // we can just read the entire file up-front - if (options_.included_fields.size() != 0 && + if (!options_.included_fields.empty() && options_.included_fields.size() != schema_->fields().size() && !file_->supports_zero_copy()) { RETURN_NOT_OK(state->PreBufferMetadata({})); @@ -1907,7 +1907,7 @@ Result> RecordBatchFileReader::Open( Future> RecordBatchFileReader::OpenAsync( const std::shared_ptr& file, const IpcReadOptions& options) { ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize()); - return OpenAsync(std::move(file), footer_offset, options); + return OpenAsync(file, footer_offset, options); } Future> RecordBatchFileReader::OpenAsync( diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index f603e60c66555..88aa3f3f8a47a 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -86,7 +86,7 @@ bool HasNestedDict(const ArrayData& data) { } Status GetTruncatedBitmap(int64_t offset, int64_t length, - const std::shared_ptr input, MemoryPool* pool, + const std::shared_ptr& input, MemoryPool* pool, std::shared_ptr* buffer) { if (!input) { *buffer = input; @@ -103,7 +103,7 @@ Status GetTruncatedBitmap(int64_t offset, int64_t length, } Status GetTruncatedBuffer(int64_t offset, int64_t length, int32_t byte_width, - const std::shared_ptr input, MemoryPool* pool, + const std::shared_ptr& input, MemoryPool* pool, std::shared_ptr* buffer) { if (!input) { *buffer = input; @@ -252,7 +252,7 @@ class RecordBatchSerializer { } Status Assemble(const RecordBatch& batch) { - if (field_nodes_.size() > 0) { + if (!field_nodes_.empty()) { field_nodes_.clear(); buffer_meta_.clear(); out_->body_buffers.clear(); @@ -335,8 +335,7 @@ class RecordBatchSerializer { ARROW_ASSIGN_OR_RAISE(auto shifted_offsets, AllocateBuffer(required_bytes, options_.memory_pool)); - offset_type* dest_offsets = - reinterpret_cast(shifted_offsets->mutable_data()); + auto dest_offsets = shifted_offsets->mutable_span_as(); const offset_type start_offset = array.value_offset(0); for (int i = 0; i < array.length(); ++i) { @@ -362,7 +361,6 @@ class RecordBatchSerializer { offset_type* out_min_offset, offset_type* out_max_end) { auto offsets = array.value_offsets(); - auto sizes = array.value_sizes(); const int64_t required_bytes = sizeof(offset_type) * array.length(); if (array.offset() != 0) { @@ -572,7 +570,7 @@ class RecordBatchSerializer { Status Visit(const StructArray& array) { --max_recursion_depth_; for (int i = 0; i < array.num_fields(); ++i) { - std::shared_ptr field = array.field(i); + const auto& field = array.field(i); RETURN_NOT_OK(VisitArray(*field)); } ++max_recursion_depth_; @@ -641,8 +639,7 @@ class RecordBatchSerializer { ARROW_ASSIGN_OR_RAISE( auto shifted_offsets_buffer, AllocateBuffer(length * sizeof(int32_t), options_.memory_pool)); - int32_t* shifted_offsets = - reinterpret_cast(shifted_offsets_buffer->mutable_data()); + auto shifted_offsets = shifted_offsets_buffer->mutable_span_as(); // Offsets are guaranteed to be increasing according to the spec, so // the first offset we find for a child is the initial offset and @@ -899,7 +896,7 @@ Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool, RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor, scratch_space->mutable_data(), &stream)); - out->reset(new Tensor(tensor.type(), contiguous_data, tensor.shape())); + *out = std::make_unique(tensor.type(), contiguous_data, tensor.shape()); return Status::OK(); } @@ -1005,7 +1002,7 @@ class SparseTensorSerializer { } Status Assemble(const SparseTensor& sparse_tensor) { - if (buffer_meta_.size() > 0) { + if (!buffer_meta_.empty()) { buffer_meta_.clear(); out_->body_buffers.clear(); } @@ -1169,7 +1166,7 @@ Status RecordBatchWriter::WriteTable(const Table& table) { return WriteTable(tab namespace internal { -IpcPayloadWriter::~IpcPayloadWriter() {} +IpcPayloadWriter::~IpcPayloadWriter() = default; Status IpcPayloadWriter::Start() { return Status::OK(); }