From 8cdf059d5db7f9c991e5fab93195872ef6ffbbc1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 12 Dec 2016 13:01:22 -0500 Subject: [PATCH 1/5] Consolidate Array and Builder classes in array.h, builder.h. Remove arrow/types subdirectory Change-Id: I1d84f714b02a2e1abc0171663ca183b682be5907 --- cpp/CMakeLists.txt | 9 - cpp/src/arrow/CMakeLists.txt | 6 + cpp/src/arrow/api.h | 6 - .../decimal-test.cc => array-decimal-test.cc} | 2 +- .../list-test.cc => array-list-test.cc} | 4 - ...mitive-test.cc => array-primitive-test.cc} | 4 +- .../string-test.cc => array-string-test.cc} | 4 +- .../struct-test.cc => array-struct-test.cc} | 5 - cpp/src/arrow/array-test.cc | 1 - cpp/src/arrow/array.cc | 439 ++++++++++++++++++ cpp/src/arrow/array.h | 371 ++++++++++++++- cpp/src/arrow/builder.cc | 323 +++++++++++++ cpp/src/arrow/builder.h | 312 ++++++++++++- cpp/src/arrow/column-benchmark.cc | 2 +- cpp/src/arrow/column-test.cc | 1 - cpp/src/arrow/ipc/adapter.cc | 4 - cpp/src/arrow/ipc/ipc-adapter-test.cc | 4 - cpp/src/arrow/ipc/ipc-file-test.cc | 5 +- cpp/src/arrow/ipc/ipc-json-test.cc | 14 +- cpp/src/arrow/ipc/json-internal.cc | 6 +- cpp/src/arrow/ipc/test-common.h | 5 +- cpp/src/arrow/pretty_print-test.cc | 5 +- cpp/src/arrow/pretty_print.cc | 3 - cpp/src/arrow/table-test.cc | 2 +- cpp/src/arrow/test-util.h | 37 +- cpp/src/arrow/type.cc | 6 + cpp/src/arrow/types/CMakeLists.txt | 39 -- cpp/src/arrow/types/construct.cc | 124 ----- cpp/src/arrow/types/construct.h | 47 -- cpp/src/arrow/types/datetime.h | 27 -- cpp/src/arrow/types/decimal.cc | 31 -- cpp/src/arrow/types/decimal.h | 28 -- cpp/src/arrow/types/list.cc | 162 ------- cpp/src/arrow/types/list.h | 170 ------- cpp/src/arrow/types/primitive.cc | 294 ------------ cpp/src/arrow/types/primitive.h | 371 --------------- cpp/src/arrow/types/string.cc | 150 ------ cpp/src/arrow/types/string.h | 149 ------ cpp/src/arrow/types/struct.cc | 108 ----- cpp/src/arrow/types/struct.h | 116 ----- cpp/src/arrow/types/test-common.h | 70 --- cpp/src/arrow/types/union.cc | 27 -- cpp/src/arrow/types/union.h | 48 -- 43 files changed, 1506 insertions(+), 2035 deletions(-) rename cpp/src/arrow/{types/decimal-test.cc => array-decimal-test.cc} (97%) rename cpp/src/arrow/{types/list-test.cc => array-list-test.cc} (98%) rename cpp/src/arrow/{types/primitive-test.cc => array-primitive-test.cc} (99%) rename cpp/src/arrow/{types/string-test.cc => array-string-test.cc} (98%) rename cpp/src/arrow/{types/struct-test.cc => array-struct-test.cc} (98%) delete mode 100644 cpp/src/arrow/types/CMakeLists.txt delete mode 100644 cpp/src/arrow/types/construct.cc delete mode 100644 cpp/src/arrow/types/construct.h delete mode 100644 cpp/src/arrow/types/datetime.h delete mode 100644 cpp/src/arrow/types/decimal.cc delete mode 100644 cpp/src/arrow/types/decimal.h delete mode 100644 cpp/src/arrow/types/list.cc delete mode 100644 cpp/src/arrow/types/list.h delete mode 100644 cpp/src/arrow/types/primitive.cc delete mode 100644 cpp/src/arrow/types/primitive.h delete mode 100644 cpp/src/arrow/types/string.cc delete mode 100644 cpp/src/arrow/types/string.h delete mode 100644 cpp/src/arrow/types/struct.cc delete mode 100644 cpp/src/arrow/types/struct.h delete mode 100644 cpp/src/arrow/types/test-common.h delete mode 100644 cpp/src/arrow/types/union.cc delete mode 100644 cpp/src/arrow/types/union.h diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 798d75fe55643..d5a416168c65d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -750,14 +750,6 @@ set(ARROW_SRCS src/arrow/table.cc src/arrow/type.cc - src/arrow/types/construct.cc - src/arrow/types/decimal.cc - src/arrow/types/list.cc - src/arrow/types/primitive.cc - src/arrow/types/string.cc - src/arrow/types/struct.cc - src/arrow/types/union.cc - src/arrow/util/bit-util.cc src/arrow/util/buffer.cc src/arrow/util/memory-pool.cc @@ -823,7 +815,6 @@ endif() add_subdirectory(src/arrow) add_subdirectory(src/arrow/io) add_subdirectory(src/arrow/util) -add_subdirectory(src/arrow/types) #---------------------------------------------------------------------- # IPC library diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 6c0dea20ba7b5..ac88ddb363944 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -37,6 +37,12 @@ install(FILES set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS}) ADD_ARROW_TEST(array-test) +ADD_ARROW_TEST(array-decimal-test) +ADD_ARROW_TEST(array-list-test) +ADD_ARROW_TEST(array-primitive-test) +ADD_ARROW_TEST(array-string-test) +ADD_ARROW_TEST(array-struct-test) + ADD_ARROW_TEST(column-test) ADD_ARROW_TEST(pretty_print-test) ADD_ARROW_TEST(schema-test) diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 2d317b49cb7b6..34574034c2ec5 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -27,12 +27,6 @@ #include "arrow/table.h" #include "arrow/type.h" -#include "arrow/types/construct.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" - #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" diff --git a/cpp/src/arrow/types/decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc similarity index 97% rename from cpp/src/arrow/types/decimal-test.cc rename to cpp/src/arrow/array-decimal-test.cc index 7296ff8176113..9e00fd9a7dd49 100644 --- a/cpp/src/arrow/types/decimal-test.cc +++ b/cpp/src/arrow/array-decimal-test.cc @@ -17,7 +17,7 @@ #include "gtest/gtest.h" -#include "arrow/types/decimal.h" +#include "arrow/type.h" namespace arrow { diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/array-list-test.cc similarity index 98% rename from cpp/src/arrow/types/list-test.cc rename to cpp/src/arrow/array-list-test.cc index cb9a8c12d8ab9..926dcd62a5ec0 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/array-list-test.cc @@ -27,10 +27,6 @@ #include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/construct.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/test-common.h" #include "arrow/util/status.h" using std::shared_ptr; diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc similarity index 99% rename from cpp/src/arrow/types/primitive-test.cc rename to cpp/src/arrow/array-primitive-test.cc index bdc8ec00be02c..5fe237699e689 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/array-primitive-test.cc @@ -22,13 +22,11 @@ #include "gtest/gtest.h" +#include "arrow/array.h" #include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/types/construct.h" -#include "arrow/types/primitive.h" -#include "arrow/types/test-common.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/status.h" diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/array-string-test.cc similarity index 98% rename from cpp/src/arrow/types/string-test.cc rename to cpp/src/arrow/array-string-test.cc index 3c4b12b7bc772..b144c632133d6 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/array-string-test.cc @@ -24,11 +24,9 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/test-common.h" namespace arrow { diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/array-struct-test.cc similarity index 98% rename from cpp/src/arrow/types/struct-test.cc rename to cpp/src/arrow/array-struct-test.cc index 197d7d4ad1f5e..8291d2ab22667 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/array-struct-test.cc @@ -25,11 +25,6 @@ #include "arrow/builder.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/construct.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/struct.h" -#include "arrow/types/test-common.h" #include "arrow/util/status.h" using std::shared_ptr; diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 158124468992a..ec5b9c81ec11b 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -26,7 +26,6 @@ #include "arrow/array.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/primitive.h" #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 1f0bb66e91a3e..aff3d1634d311 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -19,9 +19,12 @@ #include #include +#include +#include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" +#include "arrow/util/logging.h" #include "arrow/util/status.h" namespace arrow { @@ -85,4 +88,440 @@ Status NullArray::Accept(ArrayVisitor* visitor) const { return visitor->Visit(*this); } +// ---------------------------------------------------------------------- +// Primitive array base + +PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, + const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap) + : Array(type, length, null_count, null_bitmap) { + data_ = data; + raw_data_ = data == nullptr ? nullptr : data_->data(); +} + +bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { + if (this == &other) { return true; } + if (null_count_ != other.null_count_) { return false; } + + if (null_count_ > 0) { + bool equal_bitmap = + null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); + if (!equal_bitmap) { return false; } + + const uint8_t* this_data = raw_data_; + const uint8_t* other_data = other.raw_data_; + + auto size_meta = dynamic_cast(type_.get()); + int value_byte_size = size_meta->bit_width() / 8; + DCHECK_GT(value_byte_size, 0); + + for (int i = 0; i < length_; ++i) { + if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; } + this_data += value_byte_size; + other_data += value_byte_size; + } + return true; + } else { + if (length_ == 0 && other.length_ == 0) { return true; } + return data_->Equals(*other.data_, length_); + } +} + +bool PrimitiveArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + return EqualsExact(*static_cast(arr.get())); +} + +template +Status NumericArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; + +// ---------------------------------------------------------------------- +// BooleanArray + +BooleanArray::BooleanArray(int32_t length, const std::shared_ptr& data, + int32_t null_count, const std::shared_ptr& null_bitmap) + : PrimitiveArray( + std::make_shared(), length, data, null_count, null_bitmap) {} + +BooleanArray::BooleanArray(const TypePtr& type, int32_t length, + const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap) + : PrimitiveArray(type, length, data, null_count, null_bitmap) {} + +bool BooleanArray::EqualsExact(const BooleanArray& other) const { + if (this == &other) return true; + if (null_count_ != other.null_count_) { return false; } + + if (null_count_ > 0) { + bool equal_bitmap = + null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_)); + if (!equal_bitmap) { return false; } + + const uint8_t* this_data = raw_data_; + const uint8_t* other_data = other.raw_data_; + + for (int i = 0; i < length_; ++i) { + if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) { + return false; + } + } + return true; + } else { + return data_->Equals(*other.data_, BitUtil::BytesForBits(length_)); + } +} + +bool BooleanArray::Equals(const ArrayPtr& arr) const { + if (this == arr.get()) return true; + if (Type::BOOL != arr->type_enum()) { return false; } + return EqualsExact(*static_cast(arr.get())); +} + +bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx, + int32_t other_start_idx, const ArrayPtr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { + return false; + } + } + return true; +} + +Status BooleanArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +// ---------------------------------------------------------------------- +// ListArray + +bool ListArray::EqualsExact(const ListArray& other) const { + if (this == &other) { return true; } + if (null_count_ != other.null_count_) { return false; } + + bool equal_offsets = + offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); + if (!equal_offsets) { return false; } + bool equal_null_bitmap = true; + if (null_count_ > 0) { + equal_null_bitmap = + null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_)); + } + + if (!equal_null_bitmap) { return false; } + + return values()->Equals(other.values()); +} + +bool ListArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (this->type_enum() != arr->type_enum()) { return false; } + return EqualsExact(*static_cast(arr.get())); +} + +bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i)) { return false; } + if (is_null) continue; + const int32_t begin_offset = offset(i); + const int32_t end_offset = offset(i + 1); + const int32_t other_begin_offset = other->offset(o_i); + const int32_t other_end_offset = other->offset(o_i + 1); + // Underlying can't be equal if the size isn't equal + if (end_offset - begin_offset != other_end_offset - other_begin_offset) { + return false; + } + if (!values_->RangeEquals( + begin_offset, end_offset, other_begin_offset, other->values())) { + return false; + } + } + return true; +} + +Status ListArray::Validate() const { + if (length_ < 0) { return Status::Invalid("Length was negative"); } + if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); } + if (offset_buffer_->size() / static_cast(sizeof(int32_t)) < length_) { + std::stringstream ss; + ss << "offset buffer size (bytes): " << offset_buffer_->size() + << " isn't large enough for length: " << length_; + return Status::Invalid(ss.str()); + } + const int32_t last_offset = offset(length_); + if (last_offset > 0) { + if (!values_) { + return Status::Invalid("last offset was non-zero and values was null"); + } + if (values_->length() != last_offset) { + std::stringstream ss; + ss << "Final offset invariant not equal to values length: " << last_offset + << "!=" << values_->length(); + return Status::Invalid(ss.str()); + } + + const Status child_valid = values_->Validate(); + if (!child_valid.ok()) { + std::stringstream ss; + ss << "Child array invalid: " << child_valid.ToString(); + return Status::Invalid(ss.str()); + } + } + + int32_t prev_offset = offset(0); + if (prev_offset != 0) { return Status::Invalid("The first offset wasn't zero"); } + for (int32_t i = 1; i <= length_; ++i) { + int32_t current_offset = offset(i); + if (IsNull(i - 1) && current_offset != prev_offset) { + std::stringstream ss; + ss << "Offset invariant failure at: " << i << " inconsistent offsets for null slot" + << current_offset << "!=" << prev_offset; + return Status::Invalid(ss.str()); + } + if (current_offset < prev_offset) { + std::stringstream ss; + ss << "Offset invariant failure: " << i + << " inconsistent offset for non-null slot: " << current_offset << "<" + << prev_offset; + return Status::Invalid(ss.str()); + } + prev_offset = current_offset; + } + return Status::OK(); +} + +Status ListArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +// ---------------------------------------------------------------------- +// String and binary + +static std::shared_ptr kBinary = std::make_shared(); +static std::shared_ptr kString = std::make_shared(); + +BinaryArray::BinaryArray(int32_t length, const std::shared_ptr& offsets, + const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap) + : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {} + +BinaryArray::BinaryArray(const TypePtr& type, int32_t length, + const std::shared_ptr& offsets, const std::shared_ptr& data, + int32_t null_count, const std::shared_ptr& null_bitmap) + : Array(type, length, null_count, null_bitmap), + offset_buffer_(offsets), + offsets_(reinterpret_cast(offset_buffer_->data())), + data_buffer_(data), + data_(nullptr) { + if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); } +} + +Status BinaryArray::Validate() const { + // TODO(wesm): what to do here? + return Status::OK(); +} + +bool BinaryArray::EqualsExact(const BinaryArray& other) const { + if (!Array::EqualsExact(other)) { return false; } + + bool equal_offsets = + offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); + if (!equal_offsets) { return false; } + + if (!data_buffer_ && !(other.data_buffer_)) { return true; } + + return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size()); +} + +bool BinaryArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (this->type_enum() != arr->type_enum()) { return false; } + return EqualsExact(*static_cast(arr.get())); +} + +bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i)) { return false; } + if (is_null) continue; + const int32_t begin_offset = offset(i); + const int32_t end_offset = offset(i + 1); + const int32_t other_begin_offset = other->offset(o_i); + const int32_t other_end_offset = other->offset(o_i + 1); + // Underlying can't be equal if the size isn't equal + if (end_offset - begin_offset != other_end_offset - other_begin_offset) { + return false; + } + + if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset, + end_offset - begin_offset)) { + return false; + } + } + return true; +} + +Status BinaryArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +StringArray::StringArray(int32_t length, const std::shared_ptr& offsets, + const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap) + : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {} + +Status StringArray::Validate() const { + // TODO(emkornfield) Validate proper UTF8 code points? + return BinaryArray::Validate(); +} + +Status StringArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +// ---------------------------------------------------------------------- +// Struct + +std::shared_ptr StructArray::field(int32_t pos) const { + DCHECK_GT(field_arrays_.size(), 0); + return field_arrays_[pos]; +} + +bool StructArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + if (null_count_ != arr->null_count()) { return false; } + return RangeEquals(0, length_, 0, arr); +} + +bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (Type::STRUCT != arr->type_enum()) { return false; } + const auto other = static_cast(arr.get()); + + bool equal_fields = true; + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + if (IsNull(i) != arr->IsNull(o_i)) { return false; } + if (IsNull(i)) continue; + for (size_t j = 0; j < field_arrays_.size(); ++j) { + // TODO: really we should be comparing stretches of non-null data rather + // than looking at one value at a time. + equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j)); + if (!equal_fields) { return false; } + } + } + + return true; +} + +Status StructArray::Validate() const { + if (length_ < 0) { return Status::Invalid("Length was negative"); } + + if (null_count() > length_) { + return Status::Invalid("Null count exceeds the length of this struct"); + } + + if (field_arrays_.size() > 0) { + // Validate fields + int32_t array_length = field_arrays_[0]->length(); + size_t idx = 0; + for (auto it : field_arrays_) { + if (it->length() != array_length) { + std::stringstream ss; + ss << "Length is not equal from field " << it->type()->ToString() + << " at position {" << idx << "}"; + return Status::Invalid(ss.str()); + } + + const Status child_valid = it->Validate(); + if (!child_valid.ok()) { + std::stringstream ss; + ss << "Child array invalid: " << child_valid.ToString() << " at position {" << idx + << "}"; + return Status::Invalid(ss.str()); + } + ++idx; + } + + if (array_length > 0 && array_length != length_) { + return Status::Invalid("Struct's length is not equal to its child arrays"); + } + } + return Status::OK(); +} + +Status StructArray::Accept(ArrayVisitor* visitor) const { + return visitor->Visit(*this); +} + +// ---------------------------------------------------------------------- + +#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \ + case Type::ENUM: \ + out->reset(new ArrayType(type, length, data, null_count, null_bitmap)); \ + break; + +Status MakePrimitiveArray(const TypePtr& type, int32_t length, + const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap, ArrayPtr* out) { + switch (type->type) { + MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray); + MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array); + MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array); + MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array); + MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array); + MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array); + MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array); + MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array); + MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array); + MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray); + MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray); + MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array); + MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray); + MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray); + default: + return Status::NotImplemented(type->ToString()); + } +#ifdef NDEBUG + return Status::OK(); +#else + return (*out)->Validate(); +#endif +} + } // namespace arrow diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 91fb93e625494..2fae734ce15bc 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -23,12 +23,12 @@ #include "arrow/type.h" #include "arrow/util/bit-util.h" +#include "arrow/util/buffer.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" namespace arrow { -class Buffer; class MemoryPool; class MutableBuffer; class Status; @@ -110,6 +110,375 @@ typedef std::shared_ptr ArrayPtr; Status ARROW_EXPORT GetEmptyBitmap( MemoryPool* pool, int32_t length, std::shared_ptr* result); +// Base class for fixed-size logical types. See MakePrimitiveArray +// (types/construct.h) for constructing a specific subclass. +class ARROW_EXPORT PrimitiveArray : public Array { + public: + virtual ~PrimitiveArray() {} + + std::shared_ptr data() const { return data_; } + + bool EqualsExact(const PrimitiveArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + + protected: + PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); + std::shared_ptr data_; + const uint8_t* raw_data_; +}; + +template +class ARROW_EXPORT NumericArray : public PrimitiveArray { + public: + using TypeClass = TYPE; + using value_type = typename TypeClass::c_type; + NumericArray(int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr) + : PrimitiveArray( + std::make_shared(), length, data, null_count, null_bitmap) {} + NumericArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr) + : PrimitiveArray(type, length, data, null_count, null_bitmap) {} + + bool EqualsExact(const NumericArray& other) const { + return PrimitiveArray::EqualsExact(static_cast(other)); + } + + bool ApproxEquals(const std::shared_ptr& arr) const { return Equals(arr); } + + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + const auto other = static_cast*>(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { + return false; + } + } + return true; + } + const value_type* raw_data() const { + return reinterpret_cast(raw_data_); + } + + Status Accept(ArrayVisitor* visitor) const override; + + value_type Value(int i) const { return raw_data()[i]; } +}; + +template <> +inline bool NumericArray::ApproxEquals( + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + + const auto& other = *static_cast*>(arr.get()); + + if (this == &other) { return true; } + if (null_count_ != other.null_count_) { return false; } + + auto this_data = reinterpret_cast(raw_data_); + auto other_data = reinterpret_cast(other.raw_data_); + + static constexpr float EPSILON = 1E-5; + + if (length_ == 0 && other.length_ == 0) { return true; } + + if (null_count_ > 0) { + bool equal_bitmap = + null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); + if (!equal_bitmap) { return false; } + + for (int i = 0; i < length_; ++i) { + if (IsNull(i)) continue; + if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } + } + } else { + for (int i = 0; i < length_; ++i) { + if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } + } + } + return true; +} + +template <> +inline bool NumericArray::ApproxEquals( + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + + const auto& other = *static_cast*>(arr.get()); + + if (this == &other) { return true; } + if (null_count_ != other.null_count_) { return false; } + + auto this_data = reinterpret_cast(raw_data_); + auto other_data = reinterpret_cast(other.raw_data_); + + if (length_ == 0 && other.length_ == 0) { return true; } + + static constexpr double EPSILON = 1E-5; + + if (null_count_ > 0) { + bool equal_bitmap = + null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); + if (!equal_bitmap) { return false; } + + for (int i = 0; i < length_; ++i) { + if (IsNull(i)) continue; + if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } + } + } else { + for (int i = 0; i < length_; ++i) { + if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } + } + } + return true; +} + +class ARROW_EXPORT BooleanArray : public PrimitiveArray { + public: + using TypeClass = BooleanType; + + BooleanArray(int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); + BooleanArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); + + bool EqualsExact(const BooleanArray& other) const; + bool Equals(const ArrayPtr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; + + Status Accept(ArrayVisitor* visitor) const override; + + const uint8_t* raw_data() const { return reinterpret_cast(raw_data_); } + + bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); } +}; + +// ---------------------------------------------------------------------- +// ListArray + +class ARROW_EXPORT ListArray : public Array { + public: + using TypeClass = ListType; + + ListArray(const TypePtr& type, int32_t length, std::shared_ptr offsets, + const ArrayPtr& values, int32_t null_count = 0, + std::shared_ptr null_bitmap = nullptr) + : Array(type, length, null_count, null_bitmap) { + offset_buffer_ = offsets; + offsets_ = offsets == nullptr ? nullptr : reinterpret_cast( + offset_buffer_->data()); + values_ = values; + } + + Status Validate() const override; + + virtual ~ListArray() = default; + + // Return a shared pointer in case the requestor desires to share ownership + // with this array. + std::shared_ptr values() const { return values_; } + std::shared_ptr offsets() const { + return std::static_pointer_cast(offset_buffer_); + } + + std::shared_ptr value_type() const { return values_->type(); } + + const int32_t* raw_offsets() const { return offsets_; } + + int32_t offset(int i) const { return offsets_[i]; } + + // Neither of these functions will perform boundschecking + int32_t value_offset(int i) const { return offsets_[i]; } + int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; } + + bool EqualsExact(const ListArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; + + Status Accept(ArrayVisitor* visitor) const override; + + protected: + std::shared_ptr offset_buffer_; + const int32_t* offsets_; + ArrayPtr values_; +}; + +// ---------------------------------------------------------------------- +// Binary and String + +class ARROW_EXPORT BinaryArray : public Array { + public: + using TypeClass = BinaryType; + + BinaryArray(int32_t length, const std::shared_ptr& offsets, + const std::shared_ptr& data, int32_t null_count = 0, + const std::shared_ptr& null_bitmap = nullptr); + + // Constructor that allows sub-classes/builders to propagate there logical type up the + // class hierarchy. + BinaryArray(const TypePtr& type, int32_t length, const std::shared_ptr& offsets, + const std::shared_ptr& data, int32_t null_count = 0, + const std::shared_ptr& null_bitmap = nullptr); + + // Return the pointer to the given elements bytes + // TODO(emkornfield) introduce a StringPiece or something similar to capture zero-copy + // pointer + offset + const uint8_t* GetValue(int i, int32_t* out_length) const { + const int32_t pos = offsets_[i]; + *out_length = offsets_[i + 1] - pos; + return data_ + pos; + } + + std::shared_ptr data() const { return data_buffer_; } + std::shared_ptr offsets() const { return offset_buffer_; } + + const int32_t* raw_offsets() const { return offsets_; } + + int32_t offset(int i) const { return offsets_[i]; } + + // Neither of these functions will perform boundschecking + int32_t value_offset(int i) const { return offsets_[i]; } + int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; } + + bool EqualsExact(const BinaryArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; + + Status Validate() const override; + + Status Accept(ArrayVisitor* visitor) const override; + + private: + std::shared_ptr offset_buffer_; + const int32_t* offsets_; + + std::shared_ptr data_buffer_; + const uint8_t* data_; +}; + +class ARROW_EXPORT StringArray : public BinaryArray { + public: + using TypeClass = StringType; + + StringArray(int32_t length, const std::shared_ptr& offsets, + const std::shared_ptr& data, int32_t null_count = 0, + const std::shared_ptr& null_bitmap = nullptr); + + // Construct a std::string + // TODO: std::bad_alloc possibility + std::string GetString(int i) const { + int32_t nchars; + const uint8_t* str = GetValue(i, &nchars); + return std::string(reinterpret_cast(str), nchars); + } + + Status Validate() const override; + + Status Accept(ArrayVisitor* visitor) const override; +}; + +// ---------------------------------------------------------------------- +// Struct + +class ARROW_EXPORT StructArray : public Array { + public: + using TypeClass = StructType; + + StructArray(const TypePtr& type, int32_t length, std::vector& field_arrays, + int32_t null_count = 0, std::shared_ptr null_bitmap = nullptr) + : Array(type, length, null_count, null_bitmap) { + type_ = type; + field_arrays_ = field_arrays; + } + + Status Validate() const override; + + virtual ~StructArray() {} + + // Return a shared pointer in case the requestor desires to share ownership + // with this array. + std::shared_ptr field(int32_t pos) const; + + const std::vector& fields() const { return field_arrays_; } + + bool EqualsExact(const StructArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const override; + + Status Accept(ArrayVisitor* visitor) const override; + + protected: + // The child arrays corresponding to each field of the struct data type. + std::vector field_arrays_; +}; + +// ---------------------------------------------------------------------- +// Union + +class UnionArray : public Array { + protected: + // The data are types encoded as int16 + Buffer* types_; + std::vector> children_; +}; + +class DenseUnionArray : public UnionArray { + protected: + Buffer* offset_buf_; +}; + +class SparseUnionArray : public UnionArray {}; + +// ---------------------------------------------------------------------- +// extern templates and other details + +// gcc and clang disagree about how to handle template visibility when you have +// explicit specializations https://llvm.org/bugs/show_bug.cgi?id=24815 +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" +#endif + +// Only instantiate these templates once +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; +extern template class ARROW_EXPORT NumericArray; + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + +// ---------------------------------------------------------------------- +// Helper functions + +// Create new arrays for logical types that are backed by primitive arrays. +Status ARROW_EXPORT MakePrimitiveArray(const std::shared_ptr& type, + int32_t length, const std::shared_ptr& data, int32_t null_count, + const std::shared_ptr& null_bitmap, std::shared_ptr* out); + } // namespace arrow #endif diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 151b257a3d894..6e2f02d36bbe0 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -19,8 +19,12 @@ #include +#include "arrow/array.h" +#include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" +#include "arrow/util/logging.h" #include "arrow/util/status.h" namespace arrow { @@ -123,4 +127,323 @@ void ArrayBuilder::UnsafeSetNotNull(int32_t length) { length_ = new_length; } +template +Status PrimitiveBuilder::Init(int32_t capacity) { + RETURN_NOT_OK(ArrayBuilder::Init(capacity)); + data_ = std::make_shared(pool_); + + int64_t nbytes = TypeTraits::bytes_required(capacity); + RETURN_NOT_OK(data_->Resize(nbytes)); + // TODO(emkornfield) valgrind complains without this + memset(data_->mutable_data(), 0, nbytes); + + raw_data_ = reinterpret_cast(data_->mutable_data()); + return Status::OK(); +} + +template +Status PrimitiveBuilder::Resize(int32_t capacity) { + // XXX: Set floor size for now + if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; } + + if (capacity_ == 0) { + RETURN_NOT_OK(Init(capacity)); + } else { + RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); + const int64_t old_bytes = data_->size(); + const int64_t new_bytes = TypeTraits::bytes_required(capacity); + RETURN_NOT_OK(data_->Resize(new_bytes)); + raw_data_ = reinterpret_cast(data_->mutable_data()); + memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); + } + return Status::OK(); +} + +template +Status PrimitiveBuilder::Append( + const value_type* values, int32_t length, const uint8_t* valid_bytes) { + RETURN_NOT_OK(Reserve(length)); + + if (length > 0) { + memcpy(raw_data_ + length_, values, TypeTraits::bytes_required(length)); + } + + // length_ is update by these + ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); + + return Status::OK(); +} + +template +Status PrimitiveBuilder::Finish(std::shared_ptr* out) { + const int64_t bytes_required = TypeTraits::bytes_required(length_); + if (bytes_required > 0 && bytes_required < data_->size()) { + // Trim buffers + RETURN_NOT_OK(data_->Resize(bytes_required)); + } + *out = std::make_shared::ArrayType>( + type_, length_, data_, null_count_, null_bitmap_); + + data_ = null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + return Status::OK(); +} + +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; + +Status BooleanBuilder::Init(int32_t capacity) { + RETURN_NOT_OK(ArrayBuilder::Init(capacity)); + data_ = std::make_shared(pool_); + + int64_t nbytes = BitUtil::BytesForBits(capacity); + RETURN_NOT_OK(data_->Resize(nbytes)); + // TODO(emkornfield) valgrind complains without this + memset(data_->mutable_data(), 0, nbytes); + + raw_data_ = reinterpret_cast(data_->mutable_data()); + return Status::OK(); +} + +Status BooleanBuilder::Resize(int32_t capacity) { + // XXX: Set floor size for now + if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; } + + if (capacity_ == 0) { + RETURN_NOT_OK(Init(capacity)); + } else { + RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); + const int64_t old_bytes = data_->size(); + const int64_t new_bytes = BitUtil::BytesForBits(capacity); + + RETURN_NOT_OK(data_->Resize(new_bytes)); + raw_data_ = reinterpret_cast(data_->mutable_data()); + memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); + } + return Status::OK(); +} + +Status BooleanBuilder::Finish(std::shared_ptr* out) { + const int64_t bytes_required = BitUtil::BytesForBits(length_); + + if (bytes_required > 0 && bytes_required < data_->size()) { + // Trim buffers + RETURN_NOT_OK(data_->Resize(bytes_required)); + } + *out = std::make_shared(type_, length_, data_, null_count_, null_bitmap_); + + data_ = null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + return Status::OK(); +} + +Status BooleanBuilder::Append( + const uint8_t* values, int32_t length, const uint8_t* valid_bytes) { + RETURN_NOT_OK(Reserve(length)); + + for (int i = 0; i < length; ++i) { + // Skip reading from unitialised memory + // TODO: This actually is only to keep valgrind happy but may or may not + // have a performance impact. + if ((valid_bytes != nullptr) && !valid_bytes[i]) continue; + + if (values[i] > 0) { + BitUtil::SetBit(raw_data_, length_ + i); + } else { + BitUtil::ClearBit(raw_data_, length_ + i); + } + } + + // this updates length_ + ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); +} + +// ---------------------------------------------------------------------- +// ListBuilder + +ListBuilder::ListBuilder( + MemoryPool* pool, std::shared_ptr value_builder, const TypePtr& type) + : ArrayBuilder( + pool, type ? type : std::static_pointer_cast( + std::make_shared(value_builder->type()))), + offset_builder_(pool), + value_builder_(value_builder) {} + +ListBuilder::ListBuilder( + MemoryPool* pool, std::shared_ptr values, const TypePtr& type) + : ArrayBuilder(pool, type ? type : std::static_pointer_cast( + std::make_shared(values->type()))), + offset_builder_(pool), + values_(values) {} + +Status ListBuilder::Init(int32_t elements) { + DCHECK_LT(elements, std::numeric_limits::max()); + RETURN_NOT_OK(ArrayBuilder::Init(elements)); + // one more then requested for offsets + return offset_builder_.Resize((elements + 1) * sizeof(int32_t)); +} + +Status ListBuilder::Resize(int32_t capacity) { + DCHECK_LT(capacity, std::numeric_limits::max()); + // one more then requested for offsets + RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t))); + return ArrayBuilder::Resize(capacity); +} + +Status ListBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr items = values_; + if (!items) { RETURN_NOT_OK(value_builder_->Finish(&items)); } + + RETURN_NOT_OK(offset_builder_.Append(items->length())); + std::shared_ptr offsets = offset_builder_.Finish(); + + *out = std::make_shared( + type_, length_, offsets, items, null_count_, null_bitmap_); + + Reset(); + + return Status::OK(); +} + +void ListBuilder::Reset() { + capacity_ = length_ = null_count_ = 0; + null_bitmap_ = nullptr; +} + +std::shared_ptr ListBuilder::value_builder() const { + DCHECK(!values_) << "Using value builder is pointless when values_ is set"; + return value_builder_; +} + +// ---------------------------------------------------------------------- +// String and binary + +// This used to be a static member variable of BinaryBuilder, but it can cause +// valgrind to report a (spurious?) memory leak when needed in other shared +// libraries. The problem came up while adding explicit visibility to libarrow +// and libparquet_arrow +static TypePtr kBinaryValueType = TypePtr(new UInt8Type()); + +BinaryBuilder::BinaryBuilder(MemoryPool* pool, const TypePtr& type) + : ListBuilder(pool, std::make_shared(pool, kBinaryValueType), type) { + byte_builder_ = static_cast(value_builder_.get()); +} + +Status BinaryBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr result; + RETURN_NOT_OK(ListBuilder::Finish(&result)); + + const auto list = std::dynamic_pointer_cast(result); + auto values = std::dynamic_pointer_cast(list->values()); + + *out = std::make_shared(list->length(), list->offsets(), values->data(), + list->null_count(), list->null_bitmap()); + return Status::OK(); +} + +Status StringBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr result; + RETURN_NOT_OK(ListBuilder::Finish(&result)); + + const auto list = std::dynamic_pointer_cast(result); + auto values = std::dynamic_pointer_cast(list->values()); + + *out = std::make_shared(list->length(), list->offsets(), values->data(), + list->null_count(), list->null_bitmap()); + return Status::OK(); +} + +// ---------------------------------------------------------------------- +// Struct + +Status StructBuilder::Finish(std::shared_ptr* out) { + std::vector> fields(field_builders_.size()); + for (size_t i = 0; i < field_builders_.size(); ++i) { + RETURN_NOT_OK(field_builders_[i]->Finish(&fields[i])); + } + + *out = std::make_shared(type_, length_, fields, null_count_, null_bitmap_); + + null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + + return Status::OK(); +} + +std::shared_ptr StructBuilder::field_builder(int pos) const { + DCHECK_GT(field_builders_.size(), 0); + return field_builders_[pos]; +} + +// ---------------------------------------------------------------------- +// Helper functions + +#define BUILDER_CASE(ENUM, BuilderType) \ + case Type::ENUM: \ + out->reset(new BuilderType(pool, type)); \ + return Status::OK(); + +// Initially looked at doing this with vtables, but shared pointers makes it +// difficult +// +// TODO(wesm): come up with a less monolithic strategy +Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, + std::shared_ptr* out) { + switch (type->type) { + BUILDER_CASE(UINT8, UInt8Builder); + BUILDER_CASE(INT8, Int8Builder); + BUILDER_CASE(UINT16, UInt16Builder); + BUILDER_CASE(INT16, Int16Builder); + BUILDER_CASE(UINT32, UInt32Builder); + BUILDER_CASE(INT32, Int32Builder); + BUILDER_CASE(UINT64, UInt64Builder); + BUILDER_CASE(INT64, Int64Builder); + BUILDER_CASE(TIMESTAMP, TimestampBuilder); + + BUILDER_CASE(BOOL, BooleanBuilder); + + BUILDER_CASE(FLOAT, FloatBuilder); + BUILDER_CASE(DOUBLE, DoubleBuilder); + + BUILDER_CASE(STRING, StringBuilder); + BUILDER_CASE(BINARY, BinaryBuilder); + + case Type::LIST: { + std::shared_ptr value_builder; + std::shared_ptr value_type = + static_cast(type.get())->value_type(); + RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder)); + out->reset(new ListBuilder(pool, value_builder)); + return Status::OK(); + } + + case Type::STRUCT: { + std::vector& fields = type->children_; + std::vector> values_builder; + + for (auto it : fields) { + std::shared_ptr builder; + RETURN_NOT_OK(MakeBuilder(pool, it->type, &builder)); + values_builder.push_back(builder); + } + out->reset(new StructBuilder(pool, type, values_builder)); + return Status::OK(); + } + + default: + return Status::NotImplemented(type->ToString()); + } +} + } // namespace arrow diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 73e49c0a69674..ad0b27568e185 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -23,6 +23,8 @@ #include #include "arrow/type.h" +#include "arrow/util/bit-util.h" +#include "arrow/util/buffer.h" #include "arrow/util/macros.h" #include "arrow/util/status.h" #include "arrow/util/visibility.h" @@ -31,7 +33,6 @@ namespace arrow { class Array; class MemoryPool; -class PoolBuffer; static constexpr int32_t kMinBuilderCapacity = 1 << 5; @@ -130,6 +131,315 @@ class ARROW_EXPORT ArrayBuilder { DISALLOW_COPY_AND_ASSIGN(ArrayBuilder); }; +template +class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { + public: + using value_type = typename Type::c_type; + + explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) + : ArrayBuilder(pool, type), data_(nullptr) {} + + virtual ~PrimitiveBuilder() {} + + using ArrayBuilder::Advance; + + // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { + RETURN_NOT_OK(Reserve(length)); + UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); + } + + Status AppendNull() { + RETURN_NOT_OK(Reserve(1)); + UnsafeAppendToBitmap(false); + return Status::OK(); + } + + std::shared_ptr data() const { return data_; } + + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append( + const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr); + + Status Finish(std::shared_ptr* out) override; + Status Init(int32_t capacity) override; + + // Increase the capacity of the builder to accommodate at least the indicated + // number of elements + Status Resize(int32_t capacity) override; + + protected: + std::shared_ptr data_; + value_type* raw_data_; +}; + +template +class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder { + public: + using typename PrimitiveBuilder::value_type; + using PrimitiveBuilder::PrimitiveBuilder; + + using PrimitiveBuilder::Append; + using PrimitiveBuilder::Init; + using PrimitiveBuilder::Resize; + using PrimitiveBuilder::Reserve; + + // Scalar append. + Status Append(value_type val) { + RETURN_NOT_OK(ArrayBuilder::Reserve(1)); + UnsafeAppend(val); + return Status::OK(); + } + + // Does not capacity-check; make sure to call Reserve beforehand + void UnsafeAppend(value_type val) { + BitUtil::SetBit(null_bitmap_data_, length_); + raw_data_[length_++] = val; + } + + protected: + using PrimitiveBuilder::length_; + using PrimitiveBuilder::null_bitmap_data_; + using PrimitiveBuilder::raw_data_; +}; + +// Builders + +using UInt8Builder = NumericBuilder; +using UInt16Builder = NumericBuilder; +using UInt32Builder = NumericBuilder; +using UInt64Builder = NumericBuilder; + +using Int8Builder = NumericBuilder; +using Int16Builder = NumericBuilder; +using Int32Builder = NumericBuilder; +using Int64Builder = NumericBuilder; +using TimestampBuilder = NumericBuilder; + +using HalfFloatBuilder = NumericBuilder; +using FloatBuilder = NumericBuilder; +using DoubleBuilder = NumericBuilder; + +class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { + public: + explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) + : ArrayBuilder(pool, type), data_(nullptr) {} + + virtual ~BooleanBuilder() {} + + using ArrayBuilder::Advance; + + // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { + RETURN_NOT_OK(Reserve(length)); + UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); + } + + Status AppendNull() { + RETURN_NOT_OK(Reserve(1)); + UnsafeAppendToBitmap(false); + return Status::OK(); + } + + std::shared_ptr data() const { return data_; } + + // Scalar append + Status Append(bool val) { + Reserve(1); + BitUtil::SetBit(null_bitmap_data_, length_); + if (val) { + BitUtil::SetBit(raw_data_, length_); + } else { + BitUtil::ClearBit(raw_data_, length_); + } + ++length_; + return Status::OK(); + } + + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append( + const uint8_t* values, int32_t length, const uint8_t* valid_bytes = nullptr); + + Status Finish(std::shared_ptr* out) override; + Status Init(int32_t capacity) override; + + // Increase the capacity of the builder to accommodate at least the indicated + // number of elements + Status Resize(int32_t capacity) override; + + protected: + std::shared_ptr data_; + uint8_t* raw_data_; +}; + +// ---------------------------------------------------------------------- +// List builder + +// Builder class for variable-length list array value types +// +// To use this class, you must append values to the child array builder and use +// the Append function to delimit each distinct list value (once the values +// have been appended to the child array) or use the bulk API to append +// a sequence of offests and null values. +// +// A note on types. Per arrow/type.h all types in the c++ implementation are +// logical so even though this class always builds list array, this can +// represent multiple different logical types. If no logical type is provided +// at construction time, the class defaults to List where t is taken from the +// value_builder/values that the object is constructed with. +class ARROW_EXPORT ListBuilder : public ArrayBuilder { + public: + // Use this constructor to incrementally build the value array along with offsets and + // null bitmap. + ListBuilder(MemoryPool* pool, std::shared_ptr value_builder, + const TypePtr& type = nullptr); + + // Use this constructor to build the list with a pre-existing values array + ListBuilder( + MemoryPool* pool, std::shared_ptr values, const TypePtr& type = nullptr); + + virtual ~ListBuilder() {} + + Status Init(int32_t elements) override; + Status Resize(int32_t capacity) override; + Status Finish(std::shared_ptr* out) override; + + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append( + const int32_t* offsets, int32_t length, const uint8_t* valid_bytes = nullptr) { + RETURN_NOT_OK(Reserve(length)); + UnsafeAppendToBitmap(valid_bytes, length); + offset_builder_.UnsafeAppend(offsets, length); + return Status::OK(); + } + + // Start a new variable-length list slot + // + // This function should be called before beginning to append elements to the + // value builder + Status Append(bool is_valid = true) { + RETURN_NOT_OK(Reserve(1)); + UnsafeAppendToBitmap(is_valid); + RETURN_NOT_OK(offset_builder_.Append(value_builder_->length())); + return Status::OK(); + } + + Status AppendNull() { return Append(false); } + + std::shared_ptr value_builder() const; + + protected: + BufferBuilder offset_builder_; + std::shared_ptr value_builder_; + std::shared_ptr values_; + + void Reset(); +}; + +// ---------------------------------------------------------------------- +// Binary and String + +// BinaryBuilder : public ListBuilder +class ARROW_EXPORT BinaryBuilder : public ListBuilder { + public: + explicit BinaryBuilder(MemoryPool* pool, const TypePtr& type); + virtual ~BinaryBuilder() {} + + Status Append(const uint8_t* value, int32_t length) { + RETURN_NOT_OK(ListBuilder::Append()); + return byte_builder_->Append(value, length); + } + + Status Append(const char* value, int32_t length) { + return Append(reinterpret_cast(value), length); + } + + Status Append(const std::string& value) { return Append(value.c_str(), value.size()); } + + Status Finish(std::shared_ptr* out) override; + + protected: + UInt8Builder* byte_builder_; +}; + +// String builder +class ARROW_EXPORT StringBuilder : public BinaryBuilder { + public: + explicit StringBuilder(MemoryPool* pool, const TypePtr& type) + : BinaryBuilder(pool, type) {} + + using BinaryBuilder::Append; + + Status Finish(std::shared_ptr* out) override; + + Status Append(const std::vector& values, uint8_t* null_bytes); +}; + +// ---------------------------------------------------------------------- +// Struct + +// --------------------------------------------------------------------------------- +// StructArray builder +// Append, Resize and Reserve methods are acting on StructBuilder. +// Please make sure all these methods of all child-builders' are consistently +// called to maintain data-structure consistency. +class ARROW_EXPORT StructBuilder : public ArrayBuilder { + public: + StructBuilder(MemoryPool* pool, const std::shared_ptr& type, + const std::vector>& field_builders) + : ArrayBuilder(pool, type) { + field_builders_ = field_builders; + } + + Status Finish(std::shared_ptr* out) override; + + // Null bitmap is of equal length to every child field, and any zero byte + // will be considered as a null for that field, but users must using app- + // end methods or advance methods of the child builders' independently to + // insert data. + Status Append(int32_t length, const uint8_t* valid_bytes) { + RETURN_NOT_OK(Reserve(length)); + UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); + } + + // Append an element to the Struct. All child-builders' Append method must + // be called independently to maintain data-structure consistency. + Status Append(bool is_valid = true) { + RETURN_NOT_OK(Reserve(1)); + UnsafeAppendToBitmap(is_valid); + return Status::OK(); + } + + Status AppendNull() { return Append(false); } + + std::shared_ptr field_builder(int pos) const; + + const std::vector>& field_builders() const { + return field_builders_; + } + + protected: + std::vector> field_builders_; +}; + +// ---------------------------------------------------------------------- +// Helper functions + +Status ARROW_EXPORT MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, + std::shared_ptr* out); + } // namespace arrow #endif // ARROW_BUILDER_H_ diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc index f429a813c6f20..aeb89763094e3 100644 --- a/cpp/src/arrow/column-benchmark.cc +++ b/cpp/src/arrow/column-benchmark.cc @@ -17,8 +17,8 @@ #include "benchmark/benchmark.h" +#include "arrow/array.h" #include "arrow/test-util.h" -#include "arrow/types/primitive.h" #include "arrow/util/memory-pool.h" namespace arrow { diff --git a/cpp/src/arrow/column-test.cc b/cpp/src/arrow/column-test.cc index ac3636d1b6dab..9005245b20419 100644 --- a/cpp/src/arrow/column-test.cc +++ b/cpp/src/arrow/column-test.cc @@ -27,7 +27,6 @@ #include "arrow/schema.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/primitive.h" using std::shared_ptr; using std::vector; diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index edf716f662753..89d6bb39d9c09 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -33,10 +33,6 @@ #include "arrow/table.h" #include "arrow/type.h" #include "arrow/types/construct.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc index 1accfde7c4842..0908c887f40e3 100644 --- a/cpp/src/arrow/ipc/ipc-adapter-test.cc +++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc @@ -31,10 +31,6 @@ #include "arrow/ipc/util.h" #include "arrow/test-util.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc index a1feac401f24e..5c79238eb90e2 100644 --- a/cpp/src/arrow/ipc/ipc-file-test.cc +++ b/cpp/src/arrow/ipc/ipc-file-test.cc @@ -24,6 +24,7 @@ #include "gtest/gtest.h" +#include "arrow/array.h" #include "arrow/io/memory.h" #include "arrow/io/test-common.h" #include "arrow/ipc/adapter.h" @@ -32,10 +33,6 @@ #include "arrow/ipc/util.h" #include "arrow/test-util.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc index ba4d9ca982850..86a2a9b8ccdd2 100644 --- a/cpp/src/arrow/ipc/ipc-json-test.cc +++ b/cpp/src/arrow/ipc/ipc-json-test.cc @@ -26,15 +26,13 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/ipc/json-internal.h" #include "arrow/ipc/json.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" @@ -147,7 +145,7 @@ TEST(TestJsonArrayWriter, NestedTypes) { std::vector values = {0, 1, 2, 3, 4, 5, 6}; std::shared_ptr values_array; - MakeArray(int32(), values_is_valid, values, &values_array); + ArrayFromVector(int32(), values_is_valid, values, &values_array); // List std::vector list_is_valid = {true, false, true, true, true}; @@ -188,10 +186,10 @@ void MakeBatchArrays(const std::shared_ptr& schema, const int num_rows, test::randint(num_rows, 0, 100, &v2_values); std::shared_ptr v1; - MakeArray(schema->field(0)->type, is_valid, v1_values, &v1); + ArrayFromVector(schema->field(0)->type, is_valid, v1_values, &v1); std::shared_ptr v2; - MakeArray(schema->field(1)->type, is_valid, v2_values, &v2); + ArrayFromVector(schema->field(1)->type, is_valid, v2_values, &v2); static const int kBufferSize = 10; static uint8_t buffer[kBufferSize]; @@ -323,13 +321,13 @@ TEST(TestJsonFileReadWrite, MinimalFormatExample) { std::vector foo_valid = {true, false, true, true, true}; std::vector foo_values = {1, 2, 3, 4, 5}; std::shared_ptr foo; - MakeArray(int32(), foo_valid, foo_values, &foo); + ArrayFromVector(int32(), foo_valid, foo_values, &foo); ASSERT_TRUE(batch->column(0)->Equals(foo)); std::vector bar_valid = {true, false, false, true, true}; std::vector bar_values = {1, 2, 3, 4, 5}; std::shared_ptr bar; - MakeArray(float64(), bar_valid, bar_values, &bar); + ArrayFromVector(float64(), bar_valid, bar_values, &bar); ASSERT_TRUE(batch->column(1)->Equals(bar)); } diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index ff9f59800be38..0ca48750e5ddf 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -28,14 +28,12 @@ #include "rapidjson/writer.h" #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/schema.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/bit-util.h" +#include "arrow/util/logging.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 65b378215222d..7a35a51ed20af 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -25,13 +25,10 @@ #include #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc index b1e6a11cedd9b..c22d3aa632b9d 100644 --- a/cpp/src/arrow/pretty_print-test.cc +++ b/cpp/src/arrow/pretty_print-test.cc @@ -26,14 +26,11 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/pretty_print.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" namespace arrow { diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index c63a9e93e6a63..fc9702eabcbae 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -25,9 +25,6 @@ #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/types/list.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" #include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index 743fb669700ea..6a61735accf4d 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -21,12 +21,12 @@ #include "gtest/gtest.h" +#include "arrow/array.h" #include "arrow/column.h" #include "arrow/schema.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/types/primitive.h" #include "arrow/util/status.h" using std::shared_ptr; diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index b86a1809cd0e9..049d8e59f8c99 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -28,6 +28,7 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/builder.h" #include "arrow/column.h" #include "arrow/schema.h" #include "arrow/table.h" @@ -253,8 +254,9 @@ Status MakeRandomBytePoolBuffer(int32_t length, MemoryPool* pool, } // namespace test template -void MakeArray(const std::shared_ptr& type, const std::vector& is_valid, - const std::vector& values, std::shared_ptr* out) { +void ArrayFromVector(const std::shared_ptr& type, + const std::vector& is_valid, const std::vector& values, + std::shared_ptr* out) { std::shared_ptr values_buffer; std::shared_ptr values_bitmap; @@ -272,6 +274,37 @@ void MakeArray(const std::shared_ptr& type, const std::vector& i values_buffer, null_count, values_bitmap); } +class TestBuilder : public ::testing::Test { + public: + void SetUp() { + pool_ = default_memory_pool(); + type_ = TypePtr(new UInt8Type()); + builder_.reset(new UInt8Builder(pool_, type_)); + builder_nn_.reset(new UInt8Builder(pool_, type_)); + } + + protected: + MemoryPool* pool_; + + TypePtr type_; + std::unique_ptr builder_; + std::unique_ptr builder_nn_; +}; + +template +Status MakeArray(const std::vector& valid_bytes, const std::vector& values, + int size, Builder* builder, ArrayPtr* out) { + // Append the first 1000 + for (int i = 0; i < size; ++i) { + if (valid_bytes[i] > 0) { + RETURN_NOT_OK(builder->Append(values[i])); + } else { + RETURN_NOT_OK(builder->AppendNull()); + } + } + return builder->Finish(out); +} + } // namespace arrow #endif // ARROW_TEST_UTIL_H_ diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 75f5086f37de0..193cb2eaf633f 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -220,6 +220,12 @@ std::vector UnionType::GetBufferLayout() const { } } +std::string DecimalType::ToString() const { + std::stringstream s; + s << "decimal(" << precision << ", " << scale << ")"; + return s.str(); +} + std::vector DecimalType::GetBufferLayout() const { // TODO(wesm) return {}; diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt deleted file mode 100644 index 6d59acfdf2eec..0000000000000 --- a/cpp/src/arrow/types/CMakeLists.txt +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -####################################### -# arrow_types -####################################### - -# Headers: top level -install(FILES - construct.h - datetime.h - decimal.h - list.h - primitive.h - string.h - struct.h - union.h - DESTINATION include/arrow/types) - - -ADD_ARROW_TEST(decimal-test) -ADD_ARROW_TEST(list-test) -ADD_ARROW_TEST(primitive-test) -ADD_ARROW_TEST(string-test) -ADD_ARROW_TEST(struct-test) diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc deleted file mode 100644 index ab9c59fd4639d..0000000000000 --- a/cpp/src/arrow/types/construct.cc +++ /dev/null @@ -1,124 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/construct.h" - -#include - -#include "arrow/type.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" - -namespace arrow { - -class ArrayBuilder; - -#define BUILDER_CASE(ENUM, BuilderType) \ - case Type::ENUM: \ - out->reset(new BuilderType(pool, type)); \ - return Status::OK(); - -// Initially looked at doing this with vtables, but shared pointers makes it -// difficult -// -// TODO(wesm): come up with a less monolithic strategy -Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, - std::shared_ptr* out) { - switch (type->type) { - BUILDER_CASE(UINT8, UInt8Builder); - BUILDER_CASE(INT8, Int8Builder); - BUILDER_CASE(UINT16, UInt16Builder); - BUILDER_CASE(INT16, Int16Builder); - BUILDER_CASE(UINT32, UInt32Builder); - BUILDER_CASE(INT32, Int32Builder); - BUILDER_CASE(UINT64, UInt64Builder); - BUILDER_CASE(INT64, Int64Builder); - BUILDER_CASE(TIMESTAMP, TimestampBuilder); - - BUILDER_CASE(BOOL, BooleanBuilder); - - BUILDER_CASE(FLOAT, FloatBuilder); - BUILDER_CASE(DOUBLE, DoubleBuilder); - - BUILDER_CASE(STRING, StringBuilder); - BUILDER_CASE(BINARY, BinaryBuilder); - - case Type::LIST: { - std::shared_ptr value_builder; - std::shared_ptr value_type = - static_cast(type.get())->value_type(); - RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder)); - out->reset(new ListBuilder(pool, value_builder)); - return Status::OK(); - } - - case Type::STRUCT: { - std::vector& fields = type->children_; - std::vector> values_builder; - - for (auto it : fields) { - std::shared_ptr builder; - RETURN_NOT_OK(MakeBuilder(pool, it->type, &builder)); - values_builder.push_back(builder); - } - out->reset(new StructBuilder(pool, type, values_builder)); - return Status::OK(); - } - - default: - return Status::NotImplemented(type->ToString()); - } -} - -#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \ - case Type::ENUM: \ - out->reset(new ArrayType(type, length, data, null_count, null_bitmap)); \ - break; - -Status MakePrimitiveArray(const TypePtr& type, int32_t length, - const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap, ArrayPtr* out) { - switch (type->type) { - MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray); - MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array); - MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array); - MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array); - MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array); - MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array); - MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array); - MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array); - MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array); - MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray); - MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray); - MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array); - MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray); - MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray); - default: - return Status::NotImplemented(type->ToString()); - } -#ifdef NDEBUG - return Status::OK(); -#else - return (*out)->Validate(); -#endif -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h deleted file mode 100644 index e18e946d1a64c..0000000000000 --- a/cpp/src/arrow/types/construct.h +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_CONSTRUCT_H -#define ARROW_TYPES_CONSTRUCT_H - -#include -#include -#include - -#include "arrow/util/visibility.h" - -namespace arrow { - -class Array; -class ArrayBuilder; -class Buffer; -struct DataType; -struct Field; -class MemoryPool; -class Status; - -Status ARROW_EXPORT MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, - std::shared_ptr* out); - -// Create new arrays for logical types that are backed by primitive arrays. -Status ARROW_EXPORT MakePrimitiveArray(const std::shared_ptr& type, - int32_t length, const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap, std::shared_ptr* out); - -} // namespace arrow - -#endif // ARROW_BUILDER_H_ diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h deleted file mode 100644 index a8f863923129a..0000000000000 --- a/cpp/src/arrow/types/datetime.h +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_DATETIME_H -#define ARROW_TYPES_DATETIME_H - -#include - -#include "arrow/type.h" - -namespace arrow {} // namespace arrow - -#endif // ARROW_TYPES_DATETIME_H diff --git a/cpp/src/arrow/types/decimal.cc b/cpp/src/arrow/types/decimal.cc deleted file mode 100644 index 1d9a5e50e460b..0000000000000 --- a/cpp/src/arrow/types/decimal.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/decimal.h" - -#include -#include - -namespace arrow { - -std::string DecimalType::ToString() const { - std::stringstream s; - s << "decimal(" << precision << ", " << scale << ")"; - return s.str(); -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/decimal.h b/cpp/src/arrow/types/decimal.h deleted file mode 100644 index b3ea3a56d8008..0000000000000 --- a/cpp/src/arrow/types/decimal.h +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_DECIMAL_H -#define ARROW_TYPES_DECIMAL_H - -#include - -#include "arrow/type.h" -#include "arrow/util/visibility.h" - -namespace arrow {} // namespace arrow - -#endif // ARROW_TYPES_DECIMAL_H diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc deleted file mode 100644 index d86563253bd5a..0000000000000 --- a/cpp/src/arrow/types/list.cc +++ /dev/null @@ -1,162 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -#include "arrow/types/list.h" - -#include - -namespace arrow { - -bool ListArray::EqualsExact(const ListArray& other) const { - if (this == &other) { return true; } - if (null_count_ != other.null_count_) { return false; } - - bool equal_offsets = - offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); - if (!equal_offsets) { return false; } - bool equal_null_bitmap = true; - if (null_count_ > 0) { - equal_null_bitmap = - null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_)); - } - - if (!equal_null_bitmap) { return false; } - - return values()->Equals(other.values()); -} - -bool ListArray::Equals(const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (this->type_enum() != arr->type_enum()) { return false; } - return EqualsExact(*static_cast(arr.get())); -} - -bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - const auto other = static_cast(arr.get()); - for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { - const bool is_null = IsNull(i); - if (is_null != arr->IsNull(o_i)) { return false; } - if (is_null) continue; - const int32_t begin_offset = offset(i); - const int32_t end_offset = offset(i + 1); - const int32_t other_begin_offset = other->offset(o_i); - const int32_t other_end_offset = other->offset(o_i + 1); - // Underlying can't be equal if the size isn't equal - if (end_offset - begin_offset != other_end_offset - other_begin_offset) { - return false; - } - if (!values_->RangeEquals( - begin_offset, end_offset, other_begin_offset, other->values())) { - return false; - } - } - return true; -} - -Status ListArray::Validate() const { - if (length_ < 0) { return Status::Invalid("Length was negative"); } - if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); } - if (offset_buffer_->size() / static_cast(sizeof(int32_t)) < length_) { - std::stringstream ss; - ss << "offset buffer size (bytes): " << offset_buffer_->size() - << " isn't large enough for length: " << length_; - return Status::Invalid(ss.str()); - } - const int32_t last_offset = offset(length_); - if (last_offset > 0) { - if (!values_) { - return Status::Invalid("last offset was non-zero and values was null"); - } - if (values_->length() != last_offset) { - std::stringstream ss; - ss << "Final offset invariant not equal to values length: " << last_offset - << "!=" << values_->length(); - return Status::Invalid(ss.str()); - } - - const Status child_valid = values_->Validate(); - if (!child_valid.ok()) { - std::stringstream ss; - ss << "Child array invalid: " << child_valid.ToString(); - return Status::Invalid(ss.str()); - } - } - - int32_t prev_offset = offset(0); - if (prev_offset != 0) { return Status::Invalid("The first offset wasn't zero"); } - for (int32_t i = 1; i <= length_; ++i) { - int32_t current_offset = offset(i); - if (IsNull(i - 1) && current_offset != prev_offset) { - std::stringstream ss; - ss << "Offset invariant failure at: " << i << " inconsistent offsets for null slot" - << current_offset << "!=" << prev_offset; - return Status::Invalid(ss.str()); - } - if (current_offset < prev_offset) { - std::stringstream ss; - ss << "Offset invariant failure: " << i - << " inconsistent offset for non-null slot: " << current_offset << "<" - << prev_offset; - return Status::Invalid(ss.str()); - } - prev_offset = current_offset; - } - return Status::OK(); -} - -Status ListBuilder::Init(int32_t elements) { - DCHECK_LT(elements, std::numeric_limits::max()); - RETURN_NOT_OK(ArrayBuilder::Init(elements)); - // one more then requested for offsets - return offset_builder_.Resize((elements + 1) * sizeof(int32_t)); -} - -Status ListBuilder::Resize(int32_t capacity) { - DCHECK_LT(capacity, std::numeric_limits::max()); - // one more then requested for offsets - RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t))); - return ArrayBuilder::Resize(capacity); -} - -Status ListBuilder::Finish(std::shared_ptr* out) { - std::shared_ptr items = values_; - if (!items) { RETURN_NOT_OK(value_builder_->Finish(&items)); } - - RETURN_NOT_OK(offset_builder_.Append(items->length())); - std::shared_ptr offsets = offset_builder_.Finish(); - - *out = std::make_shared( - type_, length_, offsets, items, null_count_, null_bitmap_); - - Reset(); - - return Status::OK(); -} - -void ListBuilder::Reset() { - capacity_ = length_ = null_count_ = 0; - null_bitmap_ = nullptr; -} - -Status ListArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h deleted file mode 100644 index ec09a78afa66c..0000000000000 --- a/cpp/src/arrow/types/list.h +++ /dev/null @@ -1,170 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_LIST_H -#define ARROW_TYPES_LIST_H - -#include -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/builder.h" -#include "arrow/type.h" -#include "arrow/types/primitive.h" -#include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/logging.h" -#include "arrow/util/status.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class MemoryPool; - -class ARROW_EXPORT ListArray : public Array { - public: - using TypeClass = ListType; - - ListArray(const TypePtr& type, int32_t length, std::shared_ptr offsets, - const ArrayPtr& values, int32_t null_count = 0, - std::shared_ptr null_bitmap = nullptr) - : Array(type, length, null_count, null_bitmap) { - offset_buffer_ = offsets; - offsets_ = offsets == nullptr ? nullptr : reinterpret_cast( - offset_buffer_->data()); - values_ = values; - } - - Status Validate() const override; - - virtual ~ListArray() = default; - - // Return a shared pointer in case the requestor desires to share ownership - // with this array. - std::shared_ptr values() const { return values_; } - std::shared_ptr offsets() const { - return std::static_pointer_cast(offset_buffer_); - } - - std::shared_ptr value_type() const { return values_->type(); } - - const int32_t* raw_offsets() const { return offsets_; } - - int32_t offset(int i) const { return offsets_[i]; } - - // Neither of these functions will perform boundschecking - int32_t value_offset(int i) const { return offsets_[i]; } - int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; } - - bool EqualsExact(const ListArray& other) const; - bool Equals(const std::shared_ptr& arr) const override; - - bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const ArrayPtr& arr) const override; - - Status Accept(ArrayVisitor* visitor) const override; - - protected: - std::shared_ptr offset_buffer_; - const int32_t* offsets_; - ArrayPtr values_; -}; - -// ---------------------------------------------------------------------- -// Array builder - -// Builder class for variable-length list array value types -// -// To use this class, you must append values to the child array builder and use -// the Append function to delimit each distinct list value (once the values -// have been appended to the child array) or use the bulk API to append -// a sequence of offests and null values. -// -// A note on types. Per arrow/type.h all types in the c++ implementation are -// logical so even though this class always builds list array, this can -// represent multiple different logical types. If no logical type is provided -// at construction time, the class defaults to List where t is taken from the -// value_builder/values that the object is constructed with. -class ARROW_EXPORT ListBuilder : public ArrayBuilder { - public: - // Use this constructor to incrementally build the value array along with offsets and - // null bitmap. - ListBuilder(MemoryPool* pool, std::shared_ptr value_builder, - const TypePtr& type = nullptr) - : ArrayBuilder( - pool, type ? type : std::static_pointer_cast( - std::make_shared(value_builder->type()))), - offset_builder_(pool), - value_builder_(value_builder) {} - - // Use this constructor to build the list with a pre-existing values array - ListBuilder( - MemoryPool* pool, std::shared_ptr values, const TypePtr& type = nullptr) - : ArrayBuilder(pool, type ? type : std::static_pointer_cast( - std::make_shared(values->type()))), - offset_builder_(pool), - values_(values) {} - - virtual ~ListBuilder() {} - - Status Init(int32_t elements) override; - Status Resize(int32_t capacity) override; - Status Finish(std::shared_ptr* out) override; - - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot - Status Append( - const int32_t* offsets, int32_t length, const uint8_t* valid_bytes = nullptr) { - RETURN_NOT_OK(Reserve(length)); - UnsafeAppendToBitmap(valid_bytes, length); - offset_builder_.UnsafeAppend(offsets, length); - return Status::OK(); - } - - // Start a new variable-length list slot - // - // This function should be called before beginning to append elements to the - // value builder - Status Append(bool is_valid = true) { - RETURN_NOT_OK(Reserve(1)); - UnsafeAppendToBitmap(is_valid); - RETURN_NOT_OK(offset_builder_.Append(value_builder_->length())); - return Status::OK(); - } - - Status AppendNull() { return Append(false); } - - std::shared_ptr value_builder() const { - DCHECK(!values_) << "Using value builder is pointless when values_ is set"; - return value_builder_; - } - - protected: - BufferBuilder offset_builder_; - std::shared_ptr value_builder_; - std::shared_ptr values_; - - void Reset(); -}; - -} // namespace arrow - -#endif // ARROW_TYPES_LIST_H diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc deleted file mode 100644 index 75e5a9ff40e16..0000000000000 --- a/cpp/src/arrow/types/primitive.cc +++ /dev/null @@ -1,294 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/primitive.h" - -#include -#include - -#include "arrow/type_traits.h" -#include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/logging.h" - -namespace arrow { - -// ---------------------------------------------------------------------- -// Primitive array base - -PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, - const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap) - : Array(type, length, null_count, null_bitmap) { - data_ = data; - raw_data_ = data == nullptr ? nullptr : data_->data(); -} - -bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { - if (this == &other) { return true; } - if (null_count_ != other.null_count_) { return false; } - - if (null_count_ > 0) { - bool equal_bitmap = - null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); - if (!equal_bitmap) { return false; } - - const uint8_t* this_data = raw_data_; - const uint8_t* other_data = other.raw_data_; - - auto size_meta = dynamic_cast(type_.get()); - int value_byte_size = size_meta->bit_width() / 8; - DCHECK_GT(value_byte_size, 0); - - for (int i = 0; i < length_; ++i) { - if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; } - this_data += value_byte_size; - other_data += value_byte_size; - } - return true; - } else { - if (length_ == 0 && other.length_ == 0) { return true; } - return data_->Equals(*other.data_, length_); - } -} - -bool PrimitiveArray::Equals(const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - return EqualsExact(*static_cast(arr.get())); -} - -template -Status NumericArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; -template class NumericArray; - -template -Status PrimitiveBuilder::Init(int32_t capacity) { - RETURN_NOT_OK(ArrayBuilder::Init(capacity)); - data_ = std::make_shared(pool_); - - int64_t nbytes = TypeTraits::bytes_required(capacity); - RETURN_NOT_OK(data_->Resize(nbytes)); - // TODO(emkornfield) valgrind complains without this - memset(data_->mutable_data(), 0, nbytes); - - raw_data_ = reinterpret_cast(data_->mutable_data()); - return Status::OK(); -} - -template -Status PrimitiveBuilder::Resize(int32_t capacity) { - // XXX: Set floor size for now - if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; } - - if (capacity_ == 0) { - RETURN_NOT_OK(Init(capacity)); - } else { - RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - const int64_t old_bytes = data_->size(); - const int64_t new_bytes = TypeTraits::bytes_required(capacity); - RETURN_NOT_OK(data_->Resize(new_bytes)); - raw_data_ = reinterpret_cast(data_->mutable_data()); - memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); - } - return Status::OK(); -} - -template -Status PrimitiveBuilder::Append( - const value_type* values, int32_t length, const uint8_t* valid_bytes) { - RETURN_NOT_OK(Reserve(length)); - - if (length > 0) { - memcpy(raw_data_ + length_, values, TypeTraits::bytes_required(length)); - } - - // length_ is update by these - ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); - - return Status::OK(); -} - -template -Status PrimitiveBuilder::Finish(std::shared_ptr* out) { - const int64_t bytes_required = TypeTraits::bytes_required(length_); - if (bytes_required > 0 && bytes_required < data_->size()) { - // Trim buffers - RETURN_NOT_OK(data_->Resize(bytes_required)); - } - *out = std::make_shared::ArrayType>( - type_, length_, data_, null_count_, null_bitmap_); - - data_ = null_bitmap_ = nullptr; - capacity_ = length_ = null_count_ = 0; - return Status::OK(); -} - -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; -template class PrimitiveBuilder; - -Status BooleanBuilder::Init(int32_t capacity) { - RETURN_NOT_OK(ArrayBuilder::Init(capacity)); - data_ = std::make_shared(pool_); - - int64_t nbytes = BitUtil::BytesForBits(capacity); - RETURN_NOT_OK(data_->Resize(nbytes)); - // TODO(emkornfield) valgrind complains without this - memset(data_->mutable_data(), 0, nbytes); - - raw_data_ = reinterpret_cast(data_->mutable_data()); - return Status::OK(); -} - -Status BooleanBuilder::Resize(int32_t capacity) { - // XXX: Set floor size for now - if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; } - - if (capacity_ == 0) { - RETURN_NOT_OK(Init(capacity)); - } else { - RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - const int64_t old_bytes = data_->size(); - const int64_t new_bytes = BitUtil::BytesForBits(capacity); - - RETURN_NOT_OK(data_->Resize(new_bytes)); - raw_data_ = reinterpret_cast(data_->mutable_data()); - memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); - } - return Status::OK(); -} - -Status BooleanBuilder::Finish(std::shared_ptr* out) { - const int64_t bytes_required = BitUtil::BytesForBits(length_); - - if (bytes_required > 0 && bytes_required < data_->size()) { - // Trim buffers - RETURN_NOT_OK(data_->Resize(bytes_required)); - } - *out = std::make_shared(type_, length_, data_, null_count_, null_bitmap_); - - data_ = null_bitmap_ = nullptr; - capacity_ = length_ = null_count_ = 0; - return Status::OK(); -} - -Status BooleanBuilder::Append( - const uint8_t* values, int32_t length, const uint8_t* valid_bytes) { - RETURN_NOT_OK(Reserve(length)); - - for (int i = 0; i < length; ++i) { - // Skip reading from unitialised memory - // TODO: This actually is only to keep valgrind happy but may or may not - // have a performance impact. - if ((valid_bytes != nullptr) && !valid_bytes[i]) continue; - - if (values[i] > 0) { - BitUtil::SetBit(raw_data_, length_ + i); - } else { - BitUtil::ClearBit(raw_data_, length_ + i); - } - } - - // this updates length_ - ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length); - return Status::OK(); -} - -BooleanArray::BooleanArray(int32_t length, const std::shared_ptr& data, - int32_t null_count, const std::shared_ptr& null_bitmap) - : PrimitiveArray( - std::make_shared(), length, data, null_count, null_bitmap) {} - -BooleanArray::BooleanArray(const TypePtr& type, int32_t length, - const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap) - : PrimitiveArray(type, length, data, null_count, null_bitmap) {} - -bool BooleanArray::EqualsExact(const BooleanArray& other) const { - if (this == &other) return true; - if (null_count_ != other.null_count_) { return false; } - - if (null_count_ > 0) { - bool equal_bitmap = - null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_)); - if (!equal_bitmap) { return false; } - - const uint8_t* this_data = raw_data_; - const uint8_t* other_data = other.raw_data_; - - for (int i = 0; i < length_; ++i) { - if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) { - return false; - } - } - return true; - } else { - return data_->Equals(*other.data_, BitUtil::BytesForBits(length_)); - } -} - -bool BooleanArray::Equals(const ArrayPtr& arr) const { - if (this == arr.get()) return true; - if (Type::BOOL != arr->type_enum()) { return false; } - return EqualsExact(*static_cast(arr.get())); -} - -bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx, - int32_t other_start_idx, const ArrayPtr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - const auto other = static_cast(arr.get()); - for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { - const bool is_null = IsNull(i); - if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { - return false; - } - } - return true; -} - -Status BooleanArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h deleted file mode 100644 index ec578e1e0aee7..0000000000000 --- a/cpp/src/arrow/types/primitive.h +++ /dev/null @@ -1,371 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_PRIMITIVE_H -#define ARROW_TYPES_PRIMITIVE_H - -#include -#include -#include -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/builder.h" -#include "arrow/type.h" -#include "arrow/type_fwd.h" -#include "arrow/types/datetime.h" -#include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class MemoryPool; - -// Base class for fixed-size logical types. See MakePrimitiveArray -// (types/construct.h) for constructing a specific subclass. -class ARROW_EXPORT PrimitiveArray : public Array { - public: - virtual ~PrimitiveArray() {} - - std::shared_ptr data() const { return data_; } - - bool EqualsExact(const PrimitiveArray& other) const; - bool Equals(const std::shared_ptr& arr) const override; - - protected: - PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); - std::shared_ptr data_; - const uint8_t* raw_data_; -}; - -template -class ARROW_EXPORT NumericArray : public PrimitiveArray { - public: - using TypeClass = TYPE; - using value_type = typename TypeClass::c_type; - NumericArray(int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr) - : PrimitiveArray( - std::make_shared(), length, data, null_count, null_bitmap) {} - NumericArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr) - : PrimitiveArray(type, length, data, null_count, null_bitmap) {} - - bool EqualsExact(const NumericArray& other) const { - return PrimitiveArray::EqualsExact(static_cast(other)); - } - - bool ApproxEquals(const std::shared_ptr& arr) const { return Equals(arr); } - - bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const ArrayPtr& arr) const override { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - const auto other = static_cast*>(arr.get()); - for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { - const bool is_null = IsNull(i); - if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { - return false; - } - } - return true; - } - const value_type* raw_data() const { - return reinterpret_cast(raw_data_); - } - - Status Accept(ArrayVisitor* visitor) const override; - - value_type Value(int i) const { return raw_data()[i]; } -}; - -template <> -inline bool NumericArray::ApproxEquals( - const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - - const auto& other = *static_cast*>(arr.get()); - - if (this == &other) { return true; } - if (null_count_ != other.null_count_) { return false; } - - auto this_data = reinterpret_cast(raw_data_); - auto other_data = reinterpret_cast(other.raw_data_); - - static constexpr float EPSILON = 1E-5; - - if (length_ == 0 && other.length_ == 0) { return true; } - - if (null_count_ > 0) { - bool equal_bitmap = - null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); - if (!equal_bitmap) { return false; } - - for (int i = 0; i < length_; ++i) { - if (IsNull(i)) continue; - if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } - } - } else { - for (int i = 0; i < length_; ++i) { - if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } - } - } - return true; -} - -template <> -inline bool NumericArray::ApproxEquals( - const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - - const auto& other = *static_cast*>(arr.get()); - - if (this == &other) { return true; } - if (null_count_ != other.null_count_) { return false; } - - auto this_data = reinterpret_cast(raw_data_); - auto other_data = reinterpret_cast(other.raw_data_); - - if (length_ == 0 && other.length_ == 0) { return true; } - - static constexpr double EPSILON = 1E-5; - - if (null_count_ > 0) { - bool equal_bitmap = - null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8); - if (!equal_bitmap) { return false; } - - for (int i = 0; i < length_; ++i) { - if (IsNull(i)) continue; - if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } - } - } else { - for (int i = 0; i < length_; ++i) { - if (fabs(this_data[i] - other_data[i]) > EPSILON) { return false; } - } - } - return true; -} - -template -class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { - public: - using value_type = typename Type::c_type; - - explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) - : ArrayBuilder(pool, type), data_(nullptr) {} - - virtual ~PrimitiveBuilder() {} - - using ArrayBuilder::Advance; - - // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory - Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { - RETURN_NOT_OK(Reserve(length)); - UnsafeAppendToBitmap(valid_bytes, length); - return Status::OK(); - } - - Status AppendNull() { - RETURN_NOT_OK(Reserve(1)); - UnsafeAppendToBitmap(false); - return Status::OK(); - } - - std::shared_ptr data() const { return data_; } - - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot - Status Append( - const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr); - - Status Finish(std::shared_ptr* out) override; - Status Init(int32_t capacity) override; - - // Increase the capacity of the builder to accommodate at least the indicated - // number of elements - Status Resize(int32_t capacity) override; - - protected: - std::shared_ptr data_; - value_type* raw_data_; -}; - -template -class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder { - public: - using typename PrimitiveBuilder::value_type; - using PrimitiveBuilder::PrimitiveBuilder; - - using PrimitiveBuilder::Append; - using PrimitiveBuilder::Init; - using PrimitiveBuilder::Resize; - using PrimitiveBuilder::Reserve; - - // Scalar append. - Status Append(value_type val) { - RETURN_NOT_OK(ArrayBuilder::Reserve(1)); - UnsafeAppend(val); - return Status::OK(); - } - - // Does not capacity-check; make sure to call Reserve beforehand - void UnsafeAppend(value_type val) { - BitUtil::SetBit(null_bitmap_data_, length_); - raw_data_[length_++] = val; - } - - protected: - using PrimitiveBuilder::length_; - using PrimitiveBuilder::null_bitmap_data_; - using PrimitiveBuilder::raw_data_; -}; - -// Builders - -using UInt8Builder = NumericBuilder; -using UInt16Builder = NumericBuilder; -using UInt32Builder = NumericBuilder; -using UInt64Builder = NumericBuilder; - -using Int8Builder = NumericBuilder; -using Int16Builder = NumericBuilder; -using Int32Builder = NumericBuilder; -using Int64Builder = NumericBuilder; -using TimestampBuilder = NumericBuilder; - -using HalfFloatBuilder = NumericBuilder; -using FloatBuilder = NumericBuilder; -using DoubleBuilder = NumericBuilder; - -class ARROW_EXPORT BooleanArray : public PrimitiveArray { - public: - using TypeClass = BooleanType; - - BooleanArray(int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); - BooleanArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, - int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); - - bool EqualsExact(const BooleanArray& other) const; - bool Equals(const ArrayPtr& arr) const override; - bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const ArrayPtr& arr) const override; - - Status Accept(ArrayVisitor* visitor) const override; - - const uint8_t* raw_data() const { return reinterpret_cast(raw_data_); } - - bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); } -}; - -class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { - public: - explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) - : ArrayBuilder(pool, type), data_(nullptr) {} - - virtual ~BooleanBuilder() {} - - using ArrayBuilder::Advance; - - // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory - Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { - RETURN_NOT_OK(Reserve(length)); - UnsafeAppendToBitmap(valid_bytes, length); - return Status::OK(); - } - - Status AppendNull() { - RETURN_NOT_OK(Reserve(1)); - UnsafeAppendToBitmap(false); - return Status::OK(); - } - - std::shared_ptr data() const { return data_; } - - // Scalar append - Status Append(bool val) { - Reserve(1); - BitUtil::SetBit(null_bitmap_data_, length_); - if (val) { - BitUtil::SetBit(raw_data_, length_); - } else { - BitUtil::ClearBit(raw_data_, length_); - } - ++length_; - return Status::OK(); - } - - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot - Status Append( - const uint8_t* values, int32_t length, const uint8_t* valid_bytes = nullptr); - - Status Finish(std::shared_ptr* out) override; - Status Init(int32_t capacity) override; - - // Increase the capacity of the builder to accommodate at least the indicated - // number of elements - Status Resize(int32_t capacity) override; - - protected: - std::shared_ptr data_; - uint8_t* raw_data_; -}; - -// gcc and clang disagree about how to handle template visibility when you have -// explicit specializations https://llvm.org/bugs/show_bug.cgi?id=24815 -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wattributes" -#endif - -// Only instantiate these templates once -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; -extern template class ARROW_EXPORT NumericArray; - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - -} // namespace arrow - -#endif // ARROW_TYPES_PRIMITIVE_H diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc deleted file mode 100644 index db963dfa0de5f..0000000000000 --- a/cpp/src/arrow/types/string.cc +++ /dev/null @@ -1,150 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/string.h" - -#include -#include -#include - -#include "arrow/type.h" - -namespace arrow { - -static std::shared_ptr kBinary = std::make_shared(); -static std::shared_ptr kString = std::make_shared(); - -BinaryArray::BinaryArray(int32_t length, const std::shared_ptr& offsets, - const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap) - : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {} - -BinaryArray::BinaryArray(const TypePtr& type, int32_t length, - const std::shared_ptr& offsets, const std::shared_ptr& data, - int32_t null_count, const std::shared_ptr& null_bitmap) - : Array(type, length, null_count, null_bitmap), - offset_buffer_(offsets), - offsets_(reinterpret_cast(offset_buffer_->data())), - data_buffer_(data), - data_(nullptr) { - if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); } -} - -Status BinaryArray::Validate() const { - // TODO(wesm): what to do here? - return Status::OK(); -} - -bool BinaryArray::EqualsExact(const BinaryArray& other) const { - if (!Array::EqualsExact(other)) { return false; } - - bool equal_offsets = - offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); - if (!equal_offsets) { return false; } - - if (!data_buffer_ && !(other.data_buffer_)) { return true; } - - return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size()); -} - -bool BinaryArray::Equals(const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (this->type_enum() != arr->type_enum()) { return false; } - return EqualsExact(*static_cast(arr.get())); -} - -bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - const auto other = static_cast(arr.get()); - for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { - const bool is_null = IsNull(i); - if (is_null != arr->IsNull(o_i)) { return false; } - if (is_null) continue; - const int32_t begin_offset = offset(i); - const int32_t end_offset = offset(i + 1); - const int32_t other_begin_offset = other->offset(o_i); - const int32_t other_end_offset = other->offset(o_i + 1); - // Underlying can't be equal if the size isn't equal - if (end_offset - begin_offset != other_end_offset - other_begin_offset) { - return false; - } - - if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset, - end_offset - begin_offset)) { - return false; - } - } - return true; -} - -Status BinaryArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -StringArray::StringArray(int32_t length, const std::shared_ptr& offsets, - const std::shared_ptr& data, int32_t null_count, - const std::shared_ptr& null_bitmap) - : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {} - -Status StringArray::Validate() const { - // TODO(emkornfield) Validate proper UTF8 code points? - return BinaryArray::Validate(); -} - -Status StringArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -// This used to be a static member variable of BinaryBuilder, but it can cause -// valgrind to report a (spurious?) memory leak when needed in other shared -// libraries. The problem came up while adding explicit visibility to libarrow -// and libparquet_arrow -static TypePtr kBinaryValueType = TypePtr(new UInt8Type()); - -BinaryBuilder::BinaryBuilder(MemoryPool* pool, const TypePtr& type) - : ListBuilder(pool, std::make_shared(pool, kBinaryValueType), type) { - byte_builder_ = static_cast(value_builder_.get()); -} - -Status BinaryBuilder::Finish(std::shared_ptr* out) { - std::shared_ptr result; - RETURN_NOT_OK(ListBuilder::Finish(&result)); - - const auto list = std::dynamic_pointer_cast(result); - auto values = std::dynamic_pointer_cast(list->values()); - - *out = std::make_shared(list->length(), list->offsets(), values->data(), - list->null_count(), list->null_bitmap()); - return Status::OK(); -} - -Status StringBuilder::Finish(std::shared_ptr* out) { - std::shared_ptr result; - RETURN_NOT_OK(ListBuilder::Finish(&result)); - - const auto list = std::dynamic_pointer_cast(result); - auto values = std::dynamic_pointer_cast(list->values()); - - *out = std::make_shared(list->length(), list->offsets(), values->data(), - list->null_count(), list->null_bitmap()); - return Status::OK(); -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h deleted file mode 100644 index c8752439f168c..0000000000000 --- a/cpp/src/arrow/types/string.h +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_STRING_H -#define ARROW_TYPES_STRING_H - -#include -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/type.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/util/status.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class Buffer; -class MemoryPool; - -class ARROW_EXPORT BinaryArray : public Array { - public: - using TypeClass = BinaryType; - - BinaryArray(int32_t length, const std::shared_ptr& offsets, - const std::shared_ptr& data, int32_t null_count = 0, - const std::shared_ptr& null_bitmap = nullptr); - - // Constructor that allows sub-classes/builders to propagate there logical type up the - // class hierarchy. - BinaryArray(const TypePtr& type, int32_t length, const std::shared_ptr& offsets, - const std::shared_ptr& data, int32_t null_count = 0, - const std::shared_ptr& null_bitmap = nullptr); - - // Return the pointer to the given elements bytes - // TODO(emkornfield) introduce a StringPiece or something similar to capture zero-copy - // pointer + offset - const uint8_t* GetValue(int i, int32_t* out_length) const { - DCHECK(out_length); - const int32_t pos = offsets_[i]; - *out_length = offsets_[i + 1] - pos; - return data_ + pos; - } - - std::shared_ptr data() const { return data_buffer_; } - std::shared_ptr offsets() const { return offset_buffer_; } - - const int32_t* raw_offsets() const { return offsets_; } - - int32_t offset(int i) const { return offsets_[i]; } - - // Neither of these functions will perform boundschecking - int32_t value_offset(int i) const { return offsets_[i]; } - int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; } - - bool EqualsExact(const BinaryArray& other) const; - bool Equals(const std::shared_ptr& arr) const override; - bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const ArrayPtr& arr) const override; - - Status Validate() const override; - - Status Accept(ArrayVisitor* visitor) const override; - - private: - std::shared_ptr offset_buffer_; - const int32_t* offsets_; - - std::shared_ptr data_buffer_; - const uint8_t* data_; -}; - -class ARROW_EXPORT StringArray : public BinaryArray { - public: - using TypeClass = StringType; - - StringArray(int32_t length, const std::shared_ptr& offsets, - const std::shared_ptr& data, int32_t null_count = 0, - const std::shared_ptr& null_bitmap = nullptr); - - // Construct a std::string - // TODO: std::bad_alloc possibility - std::string GetString(int i) const { - int32_t nchars; - const uint8_t* str = GetValue(i, &nchars); - return std::string(reinterpret_cast(str), nchars); - } - - Status Validate() const override; - - Status Accept(ArrayVisitor* visitor) const override; -}; - -// BinaryBuilder : public ListBuilder -class ARROW_EXPORT BinaryBuilder : public ListBuilder { - public: - explicit BinaryBuilder(MemoryPool* pool, const TypePtr& type); - virtual ~BinaryBuilder() {} - - Status Append(const uint8_t* value, int32_t length) { - RETURN_NOT_OK(ListBuilder::Append()); - return byte_builder_->Append(value, length); - } - - Status Append(const char* value, int32_t length) { - return Append(reinterpret_cast(value), length); - } - - Status Append(const std::string& value) { return Append(value.c_str(), value.size()); } - - Status Finish(std::shared_ptr* out) override; - - protected: - UInt8Builder* byte_builder_; -}; - -// String builder -class ARROW_EXPORT StringBuilder : public BinaryBuilder { - public: - explicit StringBuilder(MemoryPool* pool, const TypePtr& type) - : BinaryBuilder(pool, type) {} - - using BinaryBuilder::Append; - - Status Finish(std::shared_ptr* out) override; - - Status Append(const std::vector& values, uint8_t* null_bytes); -}; - -} // namespace arrow - -#endif // ARROW_TYPES_STRING_H diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc deleted file mode 100644 index 0e0db23544bf7..0000000000000 --- a/cpp/src/arrow/types/struct.cc +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/struct.h" - -#include - -namespace arrow { - -bool StructArray::Equals(const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (this->type_enum() != arr->type_enum()) { return false; } - if (null_count_ != arr->null_count()) { return false; } - return RangeEquals(0, length_, 0, arr); -} - -bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const std::shared_ptr& arr) const { - if (this == arr.get()) { return true; } - if (!arr) { return false; } - if (Type::STRUCT != arr->type_enum()) { return false; } - const auto other = static_cast(arr.get()); - - bool equal_fields = true; - for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { - if (IsNull(i) != arr->IsNull(o_i)) { return false; } - if (IsNull(i)) continue; - for (size_t j = 0; j < field_arrays_.size(); ++j) { - // TODO: really we should be comparing stretches of non-null data rather - // than looking at one value at a time. - equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j)); - if (!equal_fields) { return false; } - } - } - - return true; -} - -Status StructArray::Validate() const { - if (length_ < 0) { return Status::Invalid("Length was negative"); } - - if (null_count() > length_) { - return Status::Invalid("Null count exceeds the length of this struct"); - } - - if (field_arrays_.size() > 0) { - // Validate fields - int32_t array_length = field_arrays_[0]->length(); - size_t idx = 0; - for (auto it : field_arrays_) { - if (it->length() != array_length) { - std::stringstream ss; - ss << "Length is not equal from field " << it->type()->ToString() - << " at position {" << idx << "}"; - return Status::Invalid(ss.str()); - } - - const Status child_valid = it->Validate(); - if (!child_valid.ok()) { - std::stringstream ss; - ss << "Child array invalid: " << child_valid.ToString() << " at position {" << idx - << "}"; - return Status::Invalid(ss.str()); - } - ++idx; - } - - if (array_length > 0 && array_length != length_) { - return Status::Invalid("Struct's length is not equal to its child arrays"); - } - } - return Status::OK(); -} - -Status StructArray::Accept(ArrayVisitor* visitor) const { - return visitor->Visit(*this); -} - -Status StructBuilder::Finish(std::shared_ptr* out) { - std::vector> fields(field_builders_.size()); - for (size_t i = 0; i < field_builders_.size(); ++i) { - RETURN_NOT_OK(field_builders_[i]->Finish(&fields[i])); - } - - *out = std::make_shared(type_, length_, fields, null_count_, null_bitmap_); - - null_bitmap_ = nullptr; - capacity_ = length_ = null_count_ = 0; - - return Status::OK(); -} - -} // namespace arrow diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h deleted file mode 100644 index 1e2bf2d9a1223..0000000000000 --- a/cpp/src/arrow/types/struct.h +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_STRUCT_H -#define ARROW_TYPES_STRUCT_H - -#include -#include -#include - -#include "arrow/type.h" -#include "arrow/types/list.h" -#include "arrow/types/primitive.h" -#include "arrow/util/visibility.h" - -namespace arrow { - -class ARROW_EXPORT StructArray : public Array { - public: - using TypeClass = StructType; - - StructArray(const TypePtr& type, int32_t length, std::vector& field_arrays, - int32_t null_count = 0, std::shared_ptr null_bitmap = nullptr) - : Array(type, length, null_count, null_bitmap) { - type_ = type; - field_arrays_ = field_arrays; - } - - Status Validate() const override; - - virtual ~StructArray() {} - - // Return a shared pointer in case the requestor desires to share ownership - // with this array. - std::shared_ptr field(int32_t pos) const { - DCHECK_GT(field_arrays_.size(), 0); - return field_arrays_[pos]; - } - const std::vector& fields() const { return field_arrays_; } - - bool EqualsExact(const StructArray& other) const; - bool Equals(const std::shared_ptr& arr) const override; - bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, - const std::shared_ptr& arr) const override; - - Status Accept(ArrayVisitor* visitor) const override; - - protected: - // The child arrays corresponding to each field of the struct data type. - std::vector field_arrays_; -}; - -// --------------------------------------------------------------------------------- -// StructArray builder -// Append, Resize and Reserve methods are acting on StructBuilder. -// Please make sure all these methods of all child-builders' are consistently -// called to maintain data-structure consistency. -class ARROW_EXPORT StructBuilder : public ArrayBuilder { - public: - StructBuilder(MemoryPool* pool, const std::shared_ptr& type, - const std::vector>& field_builders) - : ArrayBuilder(pool, type) { - field_builders_ = field_builders; - } - - Status Finish(std::shared_ptr* out) override; - - // Null bitmap is of equal length to every child field, and any zero byte - // will be considered as a null for that field, but users must using app- - // end methods or advance methods of the child builders' independently to - // insert data. - Status Append(int32_t length, const uint8_t* valid_bytes) { - RETURN_NOT_OK(Reserve(length)); - UnsafeAppendToBitmap(valid_bytes, length); - return Status::OK(); - } - - // Append an element to the Struct. All child-builders' Append method must - // be called independently to maintain data-structure consistency. - Status Append(bool is_valid = true) { - RETURN_NOT_OK(Reserve(1)); - UnsafeAppendToBitmap(is_valid); - return Status::OK(); - } - - Status AppendNull() { return Append(false); } - - std::shared_ptr field_builder(int pos) const { - DCHECK_GT(field_builders_.size(), 0); - return field_builders_[pos]; - } - const std::vector>& field_builders() const { - return field_builders_; - } - - protected: - std::vector> field_builders_; -}; - -} // namespace arrow - -#endif // ARROW_TYPES_STRUCT_H diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h deleted file mode 100644 index 6e6ab85ad4eb7..0000000000000 --- a/cpp/src/arrow/types/test-common.h +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_TEST_COMMON_H -#define ARROW_TYPES_TEST_COMMON_H - -#include -#include -#include - -#include "gtest/gtest.h" - -#include "arrow/array.h" -#include "arrow/builder.h" -#include "arrow/test-util.h" -#include "arrow/type.h" -#include "arrow/util/memory-pool.h" - -namespace arrow { - -using std::unique_ptr; - -class TestBuilder : public ::testing::Test { - public: - void SetUp() { - pool_ = default_memory_pool(); - type_ = TypePtr(new UInt8Type()); - builder_.reset(new UInt8Builder(pool_, type_)); - builder_nn_.reset(new UInt8Builder(pool_, type_)); - } - - protected: - MemoryPool* pool_; - - TypePtr type_; - unique_ptr builder_; - unique_ptr builder_nn_; -}; - -template -Status MakeArray(const std::vector& valid_bytes, const std::vector& values, - int size, Builder* builder, ArrayPtr* out) { - // Append the first 1000 - for (int i = 0; i < size; ++i) { - if (valid_bytes[i] > 0) { - RETURN_NOT_OK(builder->Append(values[i])); - } else { - RETURN_NOT_OK(builder->AppendNull()); - } - } - return builder->Finish(out); -} - -} // namespace arrow - -#endif // ARROW_TYPES_TEST_COMMON_H diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc deleted file mode 100644 index cc2934b2e4adb..0000000000000 --- a/cpp/src/arrow/types/union.cc +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/union.h" - -#include -#include -#include -#include - -#include "arrow/type.h" - -namespace arrow {} // namespace arrow diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h deleted file mode 100644 index 44f39cc69942b..0000000000000 --- a/cpp/src/arrow/types/union.h +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_UNION_H -#define ARROW_TYPES_UNION_H - -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/type.h" - -namespace arrow { - -class Buffer; - -class UnionArray : public Array { - protected: - // The data are types encoded as int16 - Buffer* types_; - std::vector> children_; -}; - -class DenseUnionArray : public UnionArray { - protected: - Buffer* offset_buf_; -}; - -class SparseUnionArray : public UnionArray {}; - -} // namespace arrow - -#endif // ARROW_TYPES_UNION_H From 66ac3f73002f549ba11d2a672c8954ae476db763 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 12 Dec 2016 14:23:57 -0500 Subject: [PATCH 2/5] Promote buffer.h/status.h/memory-pool.h to top level directory Change-Id: If499bab530929beed1e76e631a1512d44d8f4d62 --- cpp/CMakeLists.txt | 6 +++--- cpp/src/arrow/CMakeLists.txt | 7 ++++++- cpp/src/arrow/api.h | 7 +++---- cpp/src/arrow/array-list-test.cc | 2 +- cpp/src/arrow/array-primitive-test.cc | 4 ++-- cpp/src/arrow/array-struct-test.cc | 2 +- cpp/src/arrow/array-test.cc | 4 ++-- cpp/src/arrow/array.h | 5 ++--- cpp/src/arrow/{util => }/buffer-test.cc | 4 ++-- cpp/src/arrow/{util => }/buffer.cc | 0 cpp/src/arrow/{util => }/buffer.h | 2 +- cpp/src/arrow/builder.h | 4 ++-- cpp/src/arrow/io/file.cc | 6 +++--- cpp/src/arrow/io/hdfs.cc | 6 +++--- cpp/src/arrow/io/interfaces.cc | 4 ++-- cpp/src/arrow/io/io-file-test.cc | 2 +- cpp/src/arrow/io/io-hdfs-test.cc | 2 +- cpp/src/arrow/io/libhdfs_shim.cc | 2 +- cpp/src/arrow/io/memory.cc | 5 ++--- cpp/src/arrow/io/test-common.h | 4 ++-- cpp/src/arrow/ipc/adapter.cc | 1 - cpp/src/arrow/ipc/test-common.h | 4 ++-- cpp/src/arrow/ipc/util.h | 2 +- .../{util/memory-pool-test.cc => memory_pool-test.cc} | 4 ++-- cpp/src/arrow/{util/memory-pool.cc => memory_pool.cc} | 0 cpp/src/arrow/{util/memory-pool.h => memory_pool.h} | 0 cpp/src/arrow/{util => }/status-test.cc | 2 +- cpp/src/arrow/{util => }/status.cc | 0 cpp/src/arrow/{util => }/status.h | 0 cpp/src/arrow/table-test.cc | 2 +- cpp/src/arrow/test-util.h | 6 +++--- cpp/src/arrow/type.h | 2 +- cpp/src/arrow/util/CMakeLists.txt | 6 ------ python/src/pyarrow/adapters/builtin.cc | 2 +- python/src/pyarrow/adapters/pandas.cc | 2 +- python/src/pyarrow/common.cc | 4 ++-- python/src/pyarrow/common.h | 5 +++-- python/src/pyarrow/io.cc | 4 ++-- 38 files changed, 60 insertions(+), 64 deletions(-) rename cpp/src/arrow/{util => }/buffer-test.cc (98%) rename cpp/src/arrow/{util => }/buffer.cc (100%) rename cpp/src/arrow/{util => }/buffer.h (99%) rename cpp/src/arrow/{util/memory-pool-test.cc => memory_pool-test.cc} (96%) rename cpp/src/arrow/{util/memory-pool.cc => memory_pool.cc} (100%) rename cpp/src/arrow/{util/memory-pool.h => memory_pool.h} (100%) rename cpp/src/arrow/{util => }/status-test.cc (97%) rename cpp/src/arrow/{util => }/status.cc (100%) rename cpp/src/arrow/{util => }/status.h (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d5a416168c65d..adcca0e0b49e8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -743,17 +743,17 @@ set(ARROW_PRIVATE_LINK_LIBS set(ARROW_SRCS src/arrow/array.cc + src/arrow/buffer.cc src/arrow/builder.cc src/arrow/column.cc + src/arrow/memory_pool.cc src/arrow/pretty_print.cc src/arrow/schema.cc + src/arrow/status.cc src/arrow/table.cc src/arrow/type.cc src/arrow/util/bit-util.cc - src/arrow/util/buffer.cc - src/arrow/util/memory-pool.cc - src/arrow/util/status.cc ) add_library(arrow_objlib OBJECT diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index ac88ddb363944..7d7bc29f4abd8 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -20,9 +20,12 @@ install(FILES api.h array.h column.h + buffer.h builder.h + memory_pool.h pretty_print.h schema.h + status.h table.h type.h type_fwd.h @@ -42,10 +45,12 @@ ADD_ARROW_TEST(array-list-test) ADD_ARROW_TEST(array-primitive-test) ADD_ARROW_TEST(array-string-test) ADD_ARROW_TEST(array-struct-test) - +ADD_ARROW_TEST(buffer-test) ADD_ARROW_TEST(column-test) +ADD_ARROW_TEST(memory_pool-test) ADD_ARROW_TEST(pretty_print-test) ADD_ARROW_TEST(schema-test) +ADD_ARROW_TEST(status-test) ADD_ARROW_TEST(table-test) ADD_ARROW_BENCHMARK(column-benchmark) diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 34574034c2ec5..1de198df5988e 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -26,9 +26,8 @@ #include "arrow/schema.h" #include "arrow/table.h" #include "arrow/type.h" - -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #endif // ARROW_API_H diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc index 926dcd62a5ec0..8baaf06a7dbcc 100644 --- a/cpp/src/arrow/array-list-test.cc +++ b/cpp/src/arrow/array-list-test.cc @@ -25,9 +25,9 @@ #include "arrow/array.h" #include "arrow/builder.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/util/status.h" using std::shared_ptr; using std::string; diff --git a/cpp/src/arrow/array-primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc index 5fe237699e689..a10e2404f29c6 100644 --- a/cpp/src/arrow/array-primitive-test.cc +++ b/cpp/src/arrow/array-primitive-test.cc @@ -23,13 +23,13 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/builder.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" using std::string; using std::shared_ptr; diff --git a/cpp/src/arrow/array-struct-test.cc b/cpp/src/arrow/array-struct-test.cc index 8291d2ab22667..58386fe028fd2 100644 --- a/cpp/src/arrow/array-struct-test.cc +++ b/cpp/src/arrow/array-struct-test.cc @@ -23,9 +23,9 @@ #include "arrow/array.h" #include "arrow/builder.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/util/status.h" using std::shared_ptr; using std::string; diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index ec5b9c81ec11b..783104e874bb7 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -24,10 +24,10 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" namespace arrow { diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 2fae734ce15bc..078c87b4713a0 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -21,9 +21,9 @@ #include #include +#include "arrow/buffer.h" #include "arrow/type.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -110,8 +110,7 @@ typedef std::shared_ptr ArrayPtr; Status ARROW_EXPORT GetEmptyBitmap( MemoryPool* pool, int32_t length, std::shared_ptr* result); -// Base class for fixed-size logical types. See MakePrimitiveArray -// (types/construct.h) for constructing a specific subclass. +// Base class for fixed-size logical types class ARROW_EXPORT PrimitiveArray : public Array { public: virtual ~PrimitiveArray() {} diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/buffer-test.cc similarity index 98% rename from cpp/src/arrow/util/buffer-test.cc rename to cpp/src/arrow/buffer-test.cc index 095b07b7ab309..c1d027bb653fe 100644 --- a/cpp/src/arrow/util/buffer-test.cc +++ b/cpp/src/arrow/buffer-test.cc @@ -21,9 +21,9 @@ #include "gtest/gtest.h" +#include "arrow/buffer.h" +#include "arrow/status.h" #include "arrow/test-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" using std::string; diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/buffer.cc similarity index 100% rename from cpp/src/arrow/util/buffer.cc rename to cpp/src/arrow/buffer.cc diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/buffer.h similarity index 99% rename from cpp/src/arrow/util/buffer.h rename to cpp/src/arrow/buffer.h index 5c87395deebb0..27437ca0486c3 100644 --- a/cpp/src/arrow/util/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -23,8 +23,8 @@ #include #include +#include "arrow/status.h" #include "arrow/util/macros.h" -#include "arrow/util/status.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index ad0b27568e185..7ec0da7f1ecac 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -22,11 +22,11 @@ #include #include +#include "arrow/buffer.h" +#include "arrow/status.h" #include "arrow/type.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/macros.h" -#include "arrow/util/status.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 05fa6663e335d..c50a9bba28e8e 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -107,9 +107,9 @@ #include "arrow/io/interfaces.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc index 8c6d49f92e606..b8e212026b11c 100644 --- a/cpp/src/arrow/io/hdfs.cc +++ b/cpp/src/arrow/io/hdfs.cc @@ -22,10 +22,10 @@ #include #include +#include "arrow/buffer.h" #include "arrow/io/hdfs.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 44986cee1afc9..68c1ac30f8250 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -20,8 +20,8 @@ #include #include -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" +#include "arrow/buffer.h" +#include "arrow/status.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc index fad49cef89908..f0ea7ec5e4dea 100644 --- a/cpp/src/arrow/io/io-file-test.cc +++ b/cpp/src/arrow/io/io-file-test.cc @@ -30,7 +30,7 @@ #include "arrow/io/file.h" #include "arrow/io/test-common.h" -#include "arrow/util/memory-pool.h" +#include "arrow/memory_pool.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index 8338de6d96a55..e07eaa3d1b487 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -25,8 +25,8 @@ #include // NOLINT #include "arrow/io/hdfs.h" +#include "arrow/status.h" #include "arrow/test-util.h" -#include "arrow/util/status.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/libhdfs_shim.cc b/cpp/src/arrow/io/libhdfs_shim.cc index 36b8a4ec980a9..3715376ebb95b 100644 --- a/cpp/src/arrow/io/libhdfs_shim.cc +++ b/cpp/src/arrow/io/libhdfs_shim.cc @@ -53,7 +53,7 @@ extern "C" { #include // NOLINT -#include "arrow/util/status.h" +#include "arrow/status.h" #include "arrow/util/visibility.h" namespace fs = boost::filesystem; diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc index af495e27e5642..b5cf4b77a980d 100644 --- a/cpp/src/arrow/io/memory.cc +++ b/cpp/src/arrow/io/memory.cc @@ -38,10 +38,9 @@ #include #include +#include "arrow/buffer.h" #include "arrow/io/interfaces.h" - -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" +#include "arrow/status.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.h index f8fed883cf583..146808371d307 100644 --- a/cpp/src/arrow/io/test-common.h +++ b/cpp/src/arrow/io/test-common.h @@ -32,10 +32,10 @@ // nothing #endif +#include "arrow/buffer.h" #include "arrow/io/memory.h" +#include "arrow/memory_pool.h" #include "arrow/test-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" namespace arrow { namespace io { diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index 89d6bb39d9c09..7af2fd05ae3df 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -32,7 +32,6 @@ #include "arrow/schema.h" #include "arrow/table.h" #include "arrow/type.h" -#include "arrow/types/construct.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 7a35a51ed20af..8416f0df57364 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -25,13 +25,13 @@ #include #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/builder.h" +#include "arrow/memory_pool.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/util.h b/cpp/src/arrow/ipc/util.h index 242d6624f1e7f..2000c61e7ed57 100644 --- a/cpp/src/arrow/ipc/util.h +++ b/cpp/src/arrow/ipc/util.h @@ -22,7 +22,7 @@ #include "arrow/array.h" #include "arrow/io/interfaces.h" -#include "arrow/util/status.h" +#include "arrow/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/memory_pool-test.cc similarity index 96% rename from cpp/src/arrow/util/memory-pool-test.cc rename to cpp/src/arrow/memory_pool-test.cc index 5d60376f794ff..d6f323d276305 100644 --- a/cpp/src/arrow/util/memory-pool-test.cc +++ b/cpp/src/arrow/memory_pool-test.cc @@ -20,9 +20,9 @@ #include "gtest/gtest.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "arrow/test-util.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/memory_pool.cc similarity index 100% rename from cpp/src/arrow/util/memory-pool.cc rename to cpp/src/arrow/memory_pool.cc diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/memory_pool.h similarity index 100% rename from cpp/src/arrow/util/memory-pool.h rename to cpp/src/arrow/memory_pool.h diff --git a/cpp/src/arrow/util/status-test.cc b/cpp/src/arrow/status-test.cc similarity index 97% rename from cpp/src/arrow/util/status-test.cc rename to cpp/src/arrow/status-test.cc index e0ff20fea1233..969ba970c154f 100644 --- a/cpp/src/arrow/util/status-test.cc +++ b/cpp/src/arrow/status-test.cc @@ -17,8 +17,8 @@ #include "gtest/gtest.h" +#include "arrow/status.h" #include "arrow/test-util.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/status.cc similarity index 100% rename from cpp/src/arrow/util/status.cc rename to cpp/src/arrow/status.cc diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/status.h similarity index 100% rename from cpp/src/arrow/util/status.h rename to cpp/src/arrow/status.h diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc index 6a61735accf4d..f62336d07f09a 100644 --- a/cpp/src/arrow/table-test.cc +++ b/cpp/src/arrow/table-test.cc @@ -24,10 +24,10 @@ #include "arrow/array.h" #include "arrow/column.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/util/status.h" using std::shared_ptr; using std::vector; diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index 049d8e59f8c99..aa310b1a49ebe 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -28,18 +28,18 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/column.h" +#include "arrow/memory_pool.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/logging.h" -#include "arrow/util/memory-pool.h" #include "arrow/util/random.h" -#include "arrow/util/status.h" #define ASSERT_RAISES(ENUM, expr) \ do { \ diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 966706cb520b2..8637081acd9b7 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -23,9 +23,9 @@ #include #include +#include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/macros.h" -#include "arrow/util/status.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 6e19730219553..8d9afccf867df 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -22,12 +22,9 @@ # Headers: top level install(FILES bit-util.h - buffer.h logging.h macros.h - memory-pool.h random.h - status.h visibility.h DESTINATION include/arrow/util) @@ -72,6 +69,3 @@ if (ARROW_BUILD_BENCHMARKS) endif() ADD_ARROW_TEST(bit-util-test) -ADD_ARROW_TEST(buffer-test) -ADD_ARROW_TEST(memory-pool-test) -ADD_ARROW_TEST(status-test) diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index c034fbd977747..ac2f533c408c7 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -21,7 +21,7 @@ #include "pyarrow/adapters/builtin.h" #include "arrow/api.h" -#include "arrow/util/status.h" +#include "arrow/status.h" #include "pyarrow/helpers.h" diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index adb27e83ef120..64b708695194a 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -31,7 +31,7 @@ #include "arrow/api.h" #include "arrow/util/bit-util.h" -#include "arrow/util/status.h" +#include "arrow/status.h" #include "pyarrow/common.h" #include "pyarrow/config.h" diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc index fa875f2b9aba1..fb4d3496ac79f 100644 --- a/python/src/pyarrow/common.cc +++ b/python/src/pyarrow/common.cc @@ -21,8 +21,8 @@ #include #include -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" using arrow::Status; diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h index 7f3131ef03dd8..7e3382634a781 100644 --- a/python/src/pyarrow/common.h +++ b/python/src/pyarrow/common.h @@ -19,10 +19,11 @@ #define PYARROW_COMMON_H #include "pyarrow/config.h" -#include "arrow/util/buffer.h" -#include "arrow/util/macros.h" #include "pyarrow/visibility.h" +#include "arrow/buffer.h" +#include "arrow/util/macros.h" + namespace arrow { class MemoryPool; } namespace pyarrow { diff --git a/python/src/pyarrow/io.cc b/python/src/pyarrow/io.cc index e6dbc12d429b0..12f5ba0bf2b49 100644 --- a/python/src/pyarrow/io.cc +++ b/python/src/pyarrow/io.cc @@ -21,8 +21,8 @@ #include #include "arrow/io/memory.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "pyarrow/common.h" From 6f7ae77ad8d6097d7b848a53bd87a288d7bbec2b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 12 Dec 2016 14:31:39 -0500 Subject: [PATCH 3/5] Fixes, cpplint Change-Id: I598cabec287f87f254b9d5d58f8cf0f0a81e8610 --- cpp/src/arrow/api.h | 6 +++--- cpp/src/arrow/array.h | 2 ++ cpp/src/arrow/buffer.cc | 6 +++--- cpp/src/arrow/builder.cc | 2 ++ cpp/src/arrow/builder.h | 1 + cpp/src/arrow/memory_pool.cc | 2 +- cpp/src/arrow/status.cc | 2 +- 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 1de198df5988e..51437d863b8b9 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -21,13 +21,13 @@ #define ARROW_API_H #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/column.h" +#include "arrow/memory_pool.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/type.h" -#include "arrow/buffer.h" -#include "arrow/memory_pool.h" -#include "arrow/status.h" #endif // ARROW_API_H diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 078c87b4713a0..c34b165fdbbc9 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -20,6 +20,8 @@ #include #include +#include +#include #include "arrow/buffer.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index a230259e5930d..6ffa03a0b5663 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/util/buffer.h" +#include "arrow/buffer.h" #include #include +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "arrow/util/bit-util.h" #include "arrow/util/logging.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 6e2f02d36bbe0..481ded73ab08c 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -17,7 +17,9 @@ #include "arrow/builder.h" +#include #include +#include #include "arrow/array.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 7ec0da7f1ecac..7162d31d2464a 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "arrow/buffer.h" diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 9aa706693e9f7..d27be44916a24 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/util/memory-pool.h" +#include "arrow/memory_pool.h" #include #include diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc index 08e9ae3946e51..e1a242721eccc 100644 --- a/cpp/src/arrow/status.cc +++ b/cpp/src/arrow/status.cc @@ -10,7 +10,7 @@ // non-const method, all threads accessing the same Status must use // external synchronization. -#include "arrow/util/status.h" +#include "arrow/status.h" #include From 9dc2e22f0b2bfa87523469ee1da02ab9fcb22d43 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 12 Dec 2016 15:00:51 -0500 Subject: [PATCH 4/5] Fix remaining old includes Change-Id: I5df4cfcb88e2db2c1b297c40bb43826cc5fe6945 --- cpp/src/arrow/array.cc | 4 ++-- cpp/src/arrow/builder.cc | 4 ++-- cpp/src/arrow/column-benchmark.cc | 2 +- cpp/src/arrow/column.cc | 2 +- cpp/src/arrow/ipc/adapter.cc | 4 ++-- cpp/src/arrow/ipc/file.cc | 4 ++-- cpp/src/arrow/ipc/ipc-adapter-test.cc | 6 +++--- cpp/src/arrow/ipc/ipc-file-test.cc | 6 +++--- cpp/src/arrow/ipc/ipc-json-test.cc | 4 ++-- cpp/src/arrow/ipc/ipc-metadata-test.cc | 2 +- cpp/src/arrow/ipc/json-integration-test.cc | 2 +- cpp/src/arrow/ipc/json-internal.cc | 4 ++-- cpp/src/arrow/ipc/json.cc | 6 +++--- cpp/src/arrow/ipc/metadata-internal.cc | 4 ++-- cpp/src/arrow/ipc/metadata.cc | 4 ++-- cpp/src/arrow/memory_pool.cc | 2 +- cpp/src/arrow/pretty_print.cc | 2 +- cpp/src/arrow/table.cc | 2 +- cpp/src/arrow/type.cc | 2 +- cpp/src/arrow/util/bit-util.cc | 4 ++-- 20 files changed, 35 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index aff3d1634d311..7ab61f59f551b 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -21,11 +21,11 @@ #include #include +#include "arrow/buffer.h" +#include "arrow/status.h" #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/logging.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 481ded73ab08c..493b5e7ccab9e 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -22,12 +22,12 @@ #include #include "arrow/array.h" +#include "arrow/buffer.h" +#include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/logging.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc index aeb89763094e3..650ec90fc0728 100644 --- a/cpp/src/arrow/column-benchmark.cc +++ b/cpp/src/arrow/column-benchmark.cc @@ -18,8 +18,8 @@ #include "benchmark/benchmark.h" #include "arrow/array.h" +#include "arrow/memory_pool.h" #include "arrow/test-util.h" -#include "arrow/util/memory-pool.h" namespace arrow { namespace { diff --git a/cpp/src/arrow/column.cc b/cpp/src/arrow/column.cc index eca5f4d30a698..1d136e7d95a55 100644 --- a/cpp/src/arrow/column.cc +++ b/cpp/src/arrow/column.cc @@ -21,8 +21,8 @@ #include #include "arrow/array.h" +#include "arrow/status.h" #include "arrow/type.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index 7af2fd05ae3df..f813c1dbbc3b0 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -23,6 +23,7 @@ #include #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/io/interfaces.h" #include "arrow/io/memory.h" #include "arrow/ipc/Message_generated.h" @@ -30,12 +31,11 @@ #include "arrow/ipc/metadata.h" #include "arrow/ipc/util.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" #include "arrow/util/logging.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/file.cc b/cpp/src/arrow/ipc/file.cc index fa50058ea4200..d7d2e613f87db 100644 --- a/cpp/src/arrow/ipc/file.cc +++ b/cpp/src/arrow/ipc/file.cc @@ -22,14 +22,14 @@ #include #include +#include "arrow/buffer.h" #include "arrow/io/interfaces.h" #include "arrow/io/memory.h" #include "arrow/ipc/adapter.h" #include "arrow/ipc/metadata.h" #include "arrow/ipc/util.h" -#include "arrow/util/buffer.h" +#include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc index 0908c887f40e3..f309b8562f76a 100644 --- a/cpp/src/arrow/ipc/ipc-adapter-test.cc +++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc @@ -30,11 +30,11 @@ #include "arrow/ipc/test-common.h" #include "arrow/ipc/util.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc index 5c79238eb90e2..0a9f677966389 100644 --- a/cpp/src/arrow/ipc/ipc-file-test.cc +++ b/cpp/src/arrow/ipc/ipc-file-test.cc @@ -32,11 +32,11 @@ #include "arrow/ipc/test-common.h" #include "arrow/ipc/util.h" +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc index 86a2a9b8ccdd2..f793a2659579c 100644 --- a/cpp/src/arrow/ipc/ipc-json-test.cc +++ b/cpp/src/arrow/ipc/ipc-json-test.cc @@ -29,12 +29,12 @@ #include "arrow/builder.h" #include "arrow/ipc/json-internal.h" #include "arrow/ipc/json.h" +#include "arrow/memory_pool.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/ipc-metadata-test.cc b/cpp/src/arrow/ipc/ipc-metadata-test.cc index de08e6dab73c6..7c5744a241068 100644 --- a/cpp/src/arrow/ipc/ipc-metadata-test.cc +++ b/cpp/src/arrow/ipc/ipc-metadata-test.cc @@ -24,9 +24,9 @@ #include "arrow/io/memory.h" #include "arrow/ipc/metadata.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 291a719d4e58c..5e593560f8cfa 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -33,9 +33,9 @@ #include "arrow/ipc/json.h" #include "arrow/pretty_print.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/test-util.h" -#include "arrow/util/status.h" DEFINE_string(arrow, "", "Arrow file name"); DEFINE_string(json, "", "JSON file name"); diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index 0ca48750e5ddf..db11b7d0372f7 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -29,13 +29,13 @@ #include "arrow/array.h" #include "arrow/builder.h" +#include "arrow/memory_pool.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/logging.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/json.cc b/cpp/src/arrow/ipc/json.cc index 2281611f8b879..6e3a9939730f4 100644 --- a/cpp/src/arrow/ipc/json.cc +++ b/cpp/src/arrow/ipc/json.cc @@ -23,14 +23,14 @@ #include #include "arrow/array.h" +#include "arrow/buffer.h" #include "arrow/ipc/json-internal.h" +#include "arrow/memory_pool.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/type.h" -#include "arrow/util/buffer.h" #include "arrow/util/logging.h" -#include "arrow/util/memory-pool.h" -#include "arrow/util/status.h" namespace arrow { namespace ipc { diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 5a2758912b759..16069a8f9dcf0 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -25,11 +25,11 @@ #include "flatbuffers/flatbuffers.h" +#include "arrow/buffer.h" #include "arrow/ipc/Message_generated.h" #include "arrow/schema.h" +#include "arrow/status.h" #include "arrow/type.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc index 44d3939c04f1d..f0674ff8d5aeb 100644 --- a/cpp/src/arrow/ipc/metadata.cc +++ b/cpp/src/arrow/ipc/metadata.cc @@ -28,9 +28,9 @@ #include "arrow/ipc/Message_generated.h" #include "arrow/ipc/metadata-internal.h" +#include "arrow/buffer.h" #include "arrow/schema.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" +#include "arrow/status.h" namespace arrow { diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index d27be44916a24..f55b1ac668c7c 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -22,8 +22,8 @@ #include #include +#include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index fc9702eabcbae..9c439c47eb82c 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -22,10 +22,10 @@ #include "arrow/array.h" #include "arrow/pretty_print.h" +#include "arrow/status.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" -#include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index eb1258a73038a..855d4ec04085d 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -24,7 +24,7 @@ #include "arrow/array.h" #include "arrow/column.h" #include "arrow/schema.h" -#include "arrow/util/status.h" +#include "arrow/status.h" namespace arrow { diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 193cb2eaf633f..5b172e41f6809 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -20,7 +20,7 @@ #include #include -#include "arrow/util/status.h" +#include "arrow/status.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc index 7e1cb1867171e..9c82407ecc092 100644 --- a/cpp/src/arrow/util/bit-util.cc +++ b/cpp/src/arrow/util/bit-util.cc @@ -18,9 +18,9 @@ #include #include +#include "arrow/buffer.h" +#include "arrow/status.h" #include "arrow/util/bit-util.h" -#include "arrow/util/buffer.h" -#include "arrow/util/status.h" namespace arrow { From 6f556ea7bdf808208aa0b70fc3e04254543e1c59 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 12 Dec 2016 16:11:47 -0500 Subject: [PATCH 5/5] Add missing math.h include for clang Change-Id: I5a7f2c324c1252a880531a7589f8a4afc8d55c65 --- cpp/src/arrow/array.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index c34b165fdbbc9..1a4a9237a1f79 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -18,6 +18,7 @@ #ifndef ARROW_ARRAY_H #define ARROW_ARRAY_H +#include #include #include #include