From 10cdd5fc5dcfc73404ae825f5d4bcf357c69ff24 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 16 Aug 2024 12:49:28 -0700 Subject: [PATCH] Reenable arrow tests (#16556) This PR reenables the tests that were disabled in #16379, converting them to use the new C data interface functions instead of the old libarrow-based ones. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/16556 --- cpp/tests/CMakeLists.txt | 4 - cpp/tests/interop/arrow_utils.hpp | 5 +- cpp/tests/interop/from_arrow_test.cpp | 145 +++++++++++++------ cpp/tests/interop/to_arrow_test.cpp | 192 ++++++++++++++++---------- cpp/tests/streams/interop_test.cpp | 78 ----------- 5 files changed, 224 insertions(+), 200 deletions(-) delete mode 100644 cpp/tests/streams/interop_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 8c4b0f1e367..006b36add0e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -690,10 +690,6 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) -# Deprecation from 16297 and fixes in 16379 caused this test to be empty This will be reenabled once -# the deprecated APIs have been replaced in 24.10. -# -# ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp index 1fdf02e02f1..08eada632a5 100644 --- a/cpp/tests/interop/arrow_utils.hpp +++ b/cpp/tests/interop/arrow_utils.hpp @@ -32,6 +32,8 @@ #include +#include + #pragma once template @@ -154,8 +156,9 @@ std::shared_ptr get_arrow_list_array(std::vector data, "Failed to append values to buffer builder"); CUDF_EXPECTS(buff_builder.Finish(&offset_buffer).ok(), "Failed to allocate buffer"); + auto nullable = std::accumulate(list_validity.begin(), list_validity.end(), 0) > 0; return std::make_shared( - arrow::list(data_array->type()), + arrow::list(arrow::field("", data_array->type(), nullable)), offsets.size() - 1, offset_buffer, data_array, diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 733e5814425..81c406c0faf 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -14,13 +14,6 @@ * limitations under the License. */ -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export the arrow C data -// interface which we consume with from_arrow_host. For now, the tests -// are commented out. - -#if 0 - #include #include @@ -43,6 +36,10 @@ #include +#include +#include +#include + std::unique_ptr get_cudf_table() { std::vector> columns; @@ -93,6 +90,45 @@ struct FromArrowTest : public cudf::test::BaseFixture {}; template struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {}; +std::optional> export_table(std::shared_ptr arrow_table) +{ + ArrowSchema schema; + if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; } + auto batch = arrow_table->CombineChunksToBatch().ValueOrDie(); + ArrowArray arr; + if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; } + auto ret = cudf::from_arrow(&schema, &arr); + arr.release(&arr); + schema.release(&schema); + return {std::move(ret)}; +} + +std::optional> export_scalar(arrow::Scalar const& arrow_scalar) +{ + auto maybe_array = arrow::MakeArrayFromScalar(arrow_scalar, 1); + if (!maybe_array.ok()) { return std::nullopt; } + auto array = *maybe_array; + + ArrowSchema schema; + if (!arrow::ExportType(*array->type(), &schema).ok()) { return std::nullopt; } + + ArrowArray arr; + if (!arrow::ExportArray(*array, &arr).ok()) { return std::nullopt; } + + auto col = cudf::from_arrow_column(&schema, &arr); + auto ret = cudf::get_element(col->view(), 0); + + arr.release(&arr); + schema.release(&schema); + return {std::move(ret)}; +} + +std::optional> export_scalar( + std::shared_ptr const arrow_scalar) +{ + return export_scalar(*arrow_scalar); +} + TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes); TEST_F(FromArrowTest, EmptyTable) @@ -102,9 +138,10 @@ TEST_F(FromArrowTest, EmptyTable) auto expected_cudf_table = tables.first->view(); auto arrow_table = tables.second; - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, DateTimeTable) @@ -127,9 +164,10 @@ TEST_F(FromArrowTest, DateTimeTable) auto arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TYPED_TEST(FromArrowTestDurationsTest, DurationTable) @@ -160,9 +198,10 @@ TYPED_TEST(FromArrowTestDurationsTest, DurationTable) auto arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, NestedList) @@ -188,8 +227,9 @@ TEST_F(FromArrowTest, NestedList) auto arrow_table = arrow::Table::Make(schema, {nested_list_arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, StructColumn) @@ -274,9 +314,10 @@ TEST_F(FromArrowTest, StructColumn) auto schema = std::make_shared(schema_vector); auto input = arrow::Table::Make(schema, {struct_array}); - auto got_cudf_table = cudf::from_arrow(*input); + auto got_cudf_table = export_table(input); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, DictionaryIndicesType) @@ -304,9 +345,10 @@ TEST_F(FromArrowTest, DictionaryIndicesType) cudf::table expected_table(std::move(columns)); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table.value()->view()); } TEST_F(FromArrowTest, ChunkedArray) @@ -369,9 +411,10 @@ TEST_F(FromArrowTest, ChunkedArray) auto expected_cudf_table = get_cudf_table(); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view()); } struct FromArrowTestSlice @@ -388,13 +431,14 @@ TEST_P(FromArrowTestSlice, SliceTest) auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0]; auto expected_cudf_table = cudf::table{sliced_cudf_table}; auto sliced_arrow_table = arrow_table->Slice(start, end - start); - auto got_cudf_table = cudf::from_arrow(*sliced_arrow_table); + auto got_cudf_table = export_table(sliced_arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); // This has been added to take-care of empty string column issue with no children - if (got_cudf_table->num_rows() == 0 and expected_cudf_table.num_rows() == 0) { - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view()); + if (got_cudf_table.value()->num_rows() == 0 and expected_cudf_table.num_rows() == 0) { + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view()); } else { - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table.value()->view()); } } @@ -417,9 +461,10 @@ TEST_F(FromArrowTest, FixedPoint128Table) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -441,9 +486,10 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -466,9 +512,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -493,9 +540,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -519,9 +567,12 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic) { TypeParam const value{42}; auto const arrow_scalar = arrow::MakeScalar(value); - auto const cudf_scalar = cudf::from_arrow(*arrow_scalar); + + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); + auto const cudf_numeric_scalar = - dynamic_cast*>(cudf_scalar.get()); + dynamic_cast*>(cudf_scalar.value().get()); if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); } EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id())); EXPECT_EQ(cudf_numeric_scalar->value(), value); @@ -535,12 +586,13 @@ TEST_F(FromArrowDecimalScalarTest, Basic) auto const value{42}; auto const precision{8}; auto const scale{4}; - auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); - auto cudf_scalar = cudf::from_arrow(arrow_scalar); + auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); // Arrow offers a minimum of 128 bits for the Decimal type. auto const cudf_decimal_scalar = - dynamic_cast*>(cudf_scalar.get()); + dynamic_cast*>(cudf_scalar.value().get()); EXPECT_EQ(cudf_decimal_scalar->type(), cudf::data_type(cudf::type_to_id(), scale)); EXPECT_EQ(cudf_decimal_scalar->value(), value); @@ -552,9 +604,10 @@ TEST_F(FromArrowStringScalarTest, Basic) { auto const value = std::string("hello world"); auto const arrow_scalar = arrow::StringScalar(value); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_string_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_string_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING)); EXPECT_EQ(cudf_string_scalar->to_string(), value); } @@ -572,9 +625,10 @@ TEST_F(FromArrowListScalarTest, Basic) auto const array = *maybe_array; auto const arrow_scalar = arrow::ListScalar(array); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_list_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_list_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST)); cudf::test::fixed_width_column_wrapper const lhs( @@ -592,9 +646,10 @@ TEST_F(FromArrowStructScalarTest, Basic) auto const field = arrow::field("", underlying_arrow_scalar->type); auto const arrow_type = arrow::struct_({field}); auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_struct_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_struct_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT)); cudf::test::fixed_width_column_wrapper const col({value}); @@ -602,5 +657,3 @@ TEST_F(FromArrowStructScalarTest, Basic) CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view()); } - -#endif diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 328ba210a3f..90ae12cdd90 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -14,13 +14,6 @@ * limitations under the License. */ -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export via the arrow C data -// interface with to_arrow_host which arrow can consume. For now, the -// test is commented out. - -#if 0 - #include #include @@ -38,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +39,8 @@ #include +#include + using vector_of_columns = std::vector>; std::pair, std::shared_ptr> get_tables( @@ -130,7 +126,7 @@ std::pair, std::shared_ptr> get_table auto keys = cudf::test::to_host(view.keys()).first; auto indices = cudf::test::to_host(view.indices()).first; auto dict_array = get_arrow_dict_array(std::vector(keys.begin(), keys.end()), - std::vector(indices.begin(), indices.end()), + std::vector(indices.begin(), indices.end()), validity); auto boolarray = get_arrow_array(bool_data, bool_validity); auto list_array = get_arrow_list_array( @@ -168,6 +164,21 @@ struct ToArrowTest : public cudf::test::BaseFixture {}; template struct ToArrowTestDurationsTest : public cudf::test::BaseFixture {}; +auto is_equal(cudf::table_view const& table, + cudf::host_span metadata, + std::shared_ptr expected_arrow_table) +{ + auto got_arrow_schema = cudf::to_arrow_schema(table, metadata); + auto got_arrow_table = cudf::to_arrow_host(table); + + for (auto i = 0; i < got_arrow_schema->n_children; ++i) { + auto arr = arrow::ImportArray(got_arrow_table->array.children[i], got_arrow_schema->children[i]) + .ValueOrDie(); + if (!expected_arrow_table->column(i)->Equals(arrow::ChunkedArray(arr))) { return false; } + } + return true; +} + TYPED_TEST_SUITE(ToArrowTestDurationsTest, cudf::test::DurationTypes); TEST_F(ToArrowTest, EmptyTable) @@ -179,10 +190,9 @@ TEST_F(ToArrowTest, EmptyTable) auto struct_meta = cudf::column_metadata{"f"}; struct_meta.children_meta = {{"integral"}, {"string"}}; - auto got_arrow_table = - cudf::to_arrow(cudf_table_view, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = { + {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}; + ASSERT_TRUE(is_equal(cudf_table_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, DateTimeTable) @@ -203,12 +213,10 @@ TEST_F(ToArrowTest, DateTimeTable) std::vector> schema_vector({arrow::field("a", arr->type())}); auto schema = std::make_shared(schema_vector); - auto expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TYPED_TEST(ToArrowTestDurationsTest, DurationTable) @@ -239,9 +247,8 @@ TYPED_TEST(ToArrowTestDurationsTest, DurationTable) auto expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, NestedList) @@ -255,20 +262,20 @@ TEST_F(ToArrowTest, NestedList) auto list_arr = get_arrow_list_array({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1}); std::vector offset{0, 0, 2}; auto mask_buffer = arrow::internal::BytesToBits({0, 1}).ValueOrDie(); - auto nested_list_arr = std::make_shared(arrow::list(list(arrow::int64())), - offset.size() - 1, - arrow::Buffer::Wrap(offset), - list_arr, - mask_buffer); + auto nested_list_arr = std::make_shared( + arrow::list(arrow::field("a", arrow::list(arrow::int64()), false)), + offset.size() - 1, + arrow::Buffer::Wrap(offset), + list_arr, + mask_buffer); std::vector> schema_vector( {arrow::field("a", nested_list_arr->type())}); auto schema = std::make_shared(schema_vector); - auto expected_arrow_table = arrow::Table::Make(schema, {nested_list_arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + auto expected_arrow_table = arrow::Table::Make(schema, {nested_list_arr}); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, StructColumn) @@ -324,7 +331,10 @@ TEST_F(ToArrowTest, StructColumn) auto list_arr = get_arrow_list_array({1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 2, 4, 5, 6, 7, 9}); std::vector offset{0, 3, 4, 6}; auto nested_list_arr = std::make_shared( - arrow::list(list(arrow::int64())), offset.size() - 1, arrow::Buffer::Wrap(offset), list_arr); + arrow::list(arrow::field("a", arrow::list(arrow::field("a", arrow::int64(), false)), false)), + offset.size() - 1, + arrow::Buffer::Wrap(offset), + list_arr); std::vector> child_arrays2({str2_array, int2_array}); auto fields2 = std::vector>{ @@ -356,9 +366,8 @@ TEST_F(ToArrowTest, StructColumn) auto expected_arrow_table = arrow::Table::Make(schema, {struct_array}); - auto got_arrow_table = cudf::to_arrow(input_view, {metadata}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const meta = {metadata}; + ASSERT_TRUE(is_equal(input_view, meta, expected_arrow_table)); } template @@ -380,9 +389,8 @@ TEST_F(ToArrowTest, FixedPoint64Table) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -402,9 +410,8 @@ TEST_F(ToArrowTest, FixedPoint128Table) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -431,9 +438,8 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -455,9 +461,8 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -479,9 +484,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, arrow_table)); } } @@ -503,9 +507,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, arrow_table)); } } @@ -529,9 +532,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -554,9 +556,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto const got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -575,10 +576,10 @@ TEST_P(ToArrowTestSlice, SliceTest) auto expected_arrow_table = arrow_table->Slice(start, end - start); auto struct_meta = cudf::column_metadata{"f"}; struct_meta.children_meta = {{"integral"}, {"string"}}; - auto got_arrow_table = - cudf::to_arrow(sliced_cudf_table, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}); - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = { + {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}; + ASSERT_TRUE(is_equal(sliced_cudf_table, metadata, expected_arrow_table)); } INSTANTIATE_TEST_CASE_P(ToArrowTest, @@ -595,13 +596,58 @@ using NumericTypesNotBool = cudf::test::Concat; TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool); +auto col_to_arrow_type(cudf::column_view const& col) +{ + switch (col.type().id()) { + case cudf::type_id::BOOL8: return arrow::boolean(); + case cudf::type_id::INT8: return arrow::int8(); + case cudf::type_id::INT16: return arrow::int16(); + case cudf::type_id::INT32: return arrow::int32(); + case cudf::type_id::INT64: return arrow::int64(); + case cudf::type_id::UINT8: return arrow::uint8(); + case cudf::type_id::UINT16: return arrow::uint16(); + case cudf::type_id::UINT32: return arrow::uint32(); + case cudf::type_id::UINT64: return arrow::uint64(); + case cudf::type_id::FLOAT32: return arrow::float32(); + case cudf::type_id::FLOAT64: return arrow::float64(); + case cudf::type_id::TIMESTAMP_DAYS: return arrow::date32(); + case cudf::type_id::STRING: return arrow::utf8(); + case cudf::type_id::LIST: + return arrow::list(col_to_arrow_type(col.child(cudf::lists_column_view::child_column_index))); + case cudf::type_id::DECIMAL128: return arrow::decimal(38, -col.type().scale()); + default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error); + } +} + +std::optional> cudf_scalar_to_arrow( + cudf::scalar const& scalar, std::optional metadata = std::nullopt) +{ + auto const cudf_column = cudf::make_column_from_scalar(scalar, 1); + auto const c_arrow_array = cudf::to_arrow_host(*cudf_column); + auto const arrow_array = [&]() { + if (metadata.has_value()) { + auto const table = cudf::table_view({cudf_column->view()}); + std::vector const table_metadata = {metadata.value()}; + auto const arrow_schema = cudf::to_arrow_schema(table, table_metadata); + return arrow::ImportArray(&c_arrow_array->array, arrow_schema->children[0]).ValueOrDie(); + } else { + auto const arrow_type = col_to_arrow_type(cudf_column->view()); + return arrow::ImportArray(&c_arrow_array->array, arrow_type).ValueOrDie(); + } + }(); + auto const maybe_scalar = arrow_array->GetScalar(0); + if (!maybe_scalar.ok()) { return std::nullopt; } + return maybe_scalar.ValueOrDie(); +} + TYPED_TEST(ToArrowNumericScalarTest, Basic) { TypeParam const value{42}; auto const cudf_scalar = cudf::make_fixed_width_scalar(value); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const ref_arrow_scalar = arrow::MakeScalar(value); EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); @@ -621,8 +667,9 @@ TEST_F(ToArrowDecimalScalarTest, Basic) auto const cudf_scalar = cudf::make_fixed_point_scalar(value, numeric::scale_type{scale}); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const maybe_ref_arrow_scalar = arrow::MakeScalar(arrow::decimal128(precision, -scale), value); @@ -636,9 +683,10 @@ struct ToArrowStringScalarTest : public cudf::test::BaseFixture {}; TEST_F(ToArrowStringScalarTest, Basic) { std::string const value{"hello world"}; - auto const cudf_scalar = cudf::make_string_scalar(value); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const cudf_scalar = cudf::make_string_scalar(value); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const ref_arrow_scalar = arrow::MakeScalar(value); EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); @@ -656,8 +704,9 @@ TEST_F(ToArrowListScalarTest, Basic) auto const cudf_scalar = cudf::make_list_scalar(col); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; arrow::Int64Builder builder; auto const status = builder.AppendValues(host_values, host_validity); @@ -682,7 +731,10 @@ TEST_F(ToArrowStructScalarTest, Basic) cudf::column_metadata metadata{""}; metadata.children_meta.emplace_back(field_name); - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar, metadata); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const underlying_arrow_scalar = arrow::MakeScalar(value); auto const field = arrow::field(field_name, underlying_arrow_scalar->type, false); @@ -693,5 +745,3 @@ TEST_F(ToArrowStructScalarTest, Basic) } CUDF_TEST_PROGRAM_MAIN() - -#endif diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp deleted file mode 100644 index 9ba862585d0..00000000000 --- a/cpp/tests/streams/interop_test.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export via the arrow C data -// interface with to_arrow_host which arrow can consume. For now, the -// test is commented out. - -#if 0 - -#include -#include -#include - -#include -#include -#include -#include - -struct ArrowTest : public cudf::test::BaseFixture {}; - -TEST_F(ArrowTest, ToArrow) -{ - int32_t const value{42}; - auto col = cudf::test::fixed_width_column_wrapper{{value}}; - cudf::table_view tbl{{col}}; - - std::vector metadata{{""}}; - cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, FromArrow) -{ - std::vector host_values = {1, 2, 3, 5, 6, 7, 8}; - std::vector host_validity = {true, true, true, false, true, true, true}; - - arrow::Int64Builder builder; - auto status = builder.AppendValues(host_values, host_validity); - auto maybe_array = builder.Finish(); - auto array = *maybe_array; - - auto field = arrow::field("", arrow::int32()); - auto schema = arrow::schema({field}); - auto table = arrow::Table::Make(schema, {array}); - cudf::from_arrow(*table, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, ToArrowScalar) -{ - int32_t const value{42}; - auto cudf_scalar = - cudf::make_fixed_width_scalar(value, cudf::test::get_default_stream()); - - cudf::column_metadata metadata{""}; - cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, FromArrowScalar) -{ - int32_t const value{42}; - auto arrow_scalar = arrow::MakeScalar(value); - cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream()); -} - -#endif