Skip to content

Commit

Permalink
ARROW-15277: [C++][Python] Use ChunkedArray::Make for chunked_array (#…
Browse files Browse the repository at this point in the history
…13950)

Supersedes and will close #12096 

[ARROW-15277](https://issues.apache.org/jira/browse/ARROW-15277)

Lead-authored-by: Miles Granger <[email protected]>
Co-authored-by: Eduardo Ponce <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
3 people authored Aug 25, 2022
1 parent 897c186 commit dd0988b
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 25 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
}
for (const auto& chunk : chunks) {
if (!chunk->type()->Equals(*type)) {
return Status::Invalid("Array chunks must all be same type");
return Status::TypeError("Array chunks must all be same type");
}
}
return std::make_shared<ChunkedArray>(std::move(chunks), std::move(type));
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ TEST_F(TestChunkedArray, Make) {
ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
AssertChunkedEqual(*result, *result2);

ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0, chunk1}));
ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0}, int16()));
ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1}));
ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0}, int16()));
}

TEST_F(TestChunkedArray, MakeEmpty) {
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
CChunkedArray(const vector[shared_ptr[CArray]]& arrays)
CChunkedArray(const vector[shared_ptr[CArray]]& arrays,
const shared_ptr[CDataType]& type)

@staticmethod
CResult[shared_ptr[CChunkedArray]] Make(vector[shared_ptr[CArray]] chunks,
shared_ptr[CDataType] type)
int64_t length()
int64_t null_count()
int num_chunks()
Expand Down
21 changes: 5 additions & 16 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,8 @@ def chunked_array(arrays, type=None):
cdef:
Array arr
vector[shared_ptr[CArray]] c_arrays
shared_ptr[CChunkedArray] sp_chunked_array
shared_ptr[CChunkedArray] c_result
shared_ptr[CDataType] c_type

type = ensure_type(type, allow_none=True)

Expand All @@ -1318,25 +1319,13 @@ def chunked_array(arrays, type=None):
# subsequent arrays to the firstly inferred array type
# it also spares the inference overhead after the first chunk
type = arr.type
else:
if arr.type != type:
raise TypeError(
"All array chunks must have type {}".format(type)
)

c_arrays.push_back(arr.sp_array)

if c_arrays.size() == 0 and type is None:
raise ValueError("When passing an empty collection of arrays "
"you must also pass the data type")

sp_chunked_array.reset(
new CChunkedArray(c_arrays, pyarrow_unwrap_data_type(type))
)
c_type = pyarrow_unwrap_data_type(type)
with nogil:
check_status(sp_chunked_array.get().Validate())

return pyarrow_wrap_chunked_array(sp_chunked_array)
c_result = GetResultValue(CChunkedArray.Make(c_arrays, c_type))
return pyarrow_wrap_chunked_array(c_result)


cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema):
Expand Down
8 changes: 8 additions & 0 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,14 @@ def test_mode_chunked_array():
assert len(pc.mode(arr)) == 0


def test_empty_chunked_array():
msg = "cannot construct ChunkedArray from empty vector and omitted type"
with pytest.raises(pa.ArrowInvalid, match=msg):
pa.chunked_array([])

pa.chunked_array([], type=pa.int8())


def test_variance():
data = [1, 2, 3, 4, 5, 6, 7, 8]
assert pc.variance(data).as_py() == 5.25
Expand Down
10 changes: 4 additions & 6 deletions python/pyarrow/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,7 @@ def test_chunked_array_construction():
assert len(arr) == 3
assert len(arr.chunks) == 2

msg = (
"When passing an empty collection of arrays you must also pass the "
"data type"
)
msg = "cannot construct ChunkedArray from empty vector and omitted type"
with pytest.raises(ValueError, match=msg):
assert pa.chunked_array([])

Expand Down Expand Up @@ -143,14 +140,15 @@ def test_chunked_array_to_numpy():


def test_chunked_array_mismatch_types():
with pytest.raises(TypeError):
msg = "chunks must all be same type"
with pytest.raises(TypeError, match=msg):
# Given array types are different
pa.chunked_array([
pa.array([1, 2, 3]),
pa.array([1., 2., 3.])
])

with pytest.raises(TypeError):
with pytest.raises(TypeError, match=msg):
# Given array type is different from explicit type argument
pa.chunked_array([pa.array([1, 2, 3])], type=pa.float64())

Expand Down

0 comments on commit dd0988b

Please sign in to comment.