Skip to content

Commit

Permalink
reuse base binary converter with templating
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Jan 30, 2024
1 parent 8622dd3 commit 8de3cfc
Showing 1 changed file with 12 additions and 45 deletions.
57 changes: 12 additions & 45 deletions python/pyarrow/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -697,56 +697,23 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
PyBytesView view_;
};

template <typename T>
class PyPrimitiveConverter<T, enable_if_base_binary<T>>
: public PrimitiveConverter<T, PyConverter> {
public:
using OffsetType = typename T::offset_type;

Status Append(PyObject* value) override {
if (PyValue::IsNull(this->options_, value)) {
this->primitive_builder_->UnsafeAppendNull();
} else if (arrow::py::is_scalar(value)) {
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
arrow::py::unwrap_scalar(value));
ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
} else {
ARROW_RETURN_NOT_OK(
PyValue::Convert(this->primitive_type_, this->options_, value, view_));
if (!view_.is_utf8) {
// observed binary value
observed_binary_ = true;
}
// Since we don't know the varying length input size in advance, we need to
// reserve space in the value builder one by one. ReserveData raises CapacityError
// if the value would not fit into the array.
ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
this->primitive_builder_->UnsafeAppend(view_.bytes,
static_cast<OffsetType>(view_.size));
}
return Status::OK();
}

Result<std::shared_ptr<Array>> ToArray() override {
ARROW_ASSIGN_OR_RAISE(auto array, (PrimitiveConverter<T, PyConverter>::ToArray()));
if (observed_binary_) {
// if we saw any non-unicode, cast results to BinaryArray
auto binary_type = TypeTraits<typename T::PhysicalType>::type_singleton();
return array->View(binary_type);
} else {
return array;
}
}
template <typename T, typename Enable = void>
struct OffsetTypeTrait {
using type = typename T::offset_type;
};

protected:
PyBytesView view_;
bool observed_binary_ = false;
template <typename T>
struct OffsetTypeTrait<T, enable_if_binary_view_like<T>> {
using type = int64_t;
};

template <typename T>
class PyPrimitiveConverter<T, enable_if_t<is_binary_view_like_type<T>::value>>
class PyPrimitiveConverter<
T, enable_if_t<is_base_binary_type<T>::value || is_binary_view_like_type<T>::value>>
: public PrimitiveConverter<T, PyConverter> {
public:
using OffsetType = typename OffsetTypeTrait<T>::type;

Status Append(PyObject* value) override {
if (PyValue::IsNull(this->options_, value)) {
this->primitive_builder_->UnsafeAppendNull();
Expand All @@ -766,7 +733,7 @@ class PyPrimitiveConverter<T, enable_if_t<is_binary_view_like_type<T>::value>>
// if the value would not fit into the array.
ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
this->primitive_builder_->UnsafeAppend(view_.bytes,
static_cast<int64_t>(view_.size));
static_cast<OffsetType>(view_.size));
}
return Status::OK();
}
Expand Down

0 comments on commit 8de3cfc

Please sign in to comment.