diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp index d02305cf478..5d61d81bb71 100644 --- a/cpp/benchmarks/io/csv/csv_writer.cpp +++ b/cpp/benchmarks/io/csv/csv_writer.cpp @@ -26,8 +26,6 @@ constexpr size_t data_size = 256 << 20; constexpr cudf::size_type num_cols = 64; -namespace cudf_io = cudf::io; - class CsvWrite : public cudf::benchmark { }; @@ -44,9 +42,9 @@ void BM_csv_write_varying_inout(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf_io::csv_writer_options options = - cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true); - cudf_io::write_csv(options); + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view); + cudf::io::write_csv(options); } state.SetBytesProcessed(data_size * state.iterations()); @@ -74,12 +72,11 @@ void BM_csv_write_varying_options(benchmark::State& state) auto mem_stats_logger = cudf::memory_stats_logger(); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - cudf_io::csv_writer_options options = - cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view) - .include_header(true) + cudf::io::csv_writer_options options = + cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view) .na_rep(na_per) .rows_per_chunk(rows_per_chunk); - cudf_io::write_csv(options); + cudf::io::write_csv(options); } state.SetBytesProcessed(data_size * state.iterations()); diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index da64c1bbf3c..1a9c7153644 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -23,8 +23,6 @@ #include -namespace cudf_io = cudf::io; - temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; std::string random_file_in_dir(std::string const& dir_path) @@ -43,21 +41,21 @@ cuio_source_sink_pair::cuio_source_sink_pair(io_type type) { } -cudf_io::source_info cuio_source_sink_pair::make_source_info() +cudf::io::source_info cuio_source_sink_pair::make_source_info() { switch (type) { - case io_type::FILEPATH: return cudf_io::source_info(file_name); - case io_type::HOST_BUFFER: return cudf_io::source_info(buffer.data(), buffer.size()); + case io_type::FILEPATH: return cudf::io::source_info(file_name); + case io_type::HOST_BUFFER: return cudf::io::source_info(buffer.data(), buffer.size()); default: CUDF_FAIL("invalid input type"); } } -cudf_io::sink_info cuio_source_sink_pair::make_sink_info() +cudf::io::sink_info cuio_source_sink_pair::make_sink_info() { switch (type) { - case io_type::VOID: return cudf_io::sink_info(&void_sink); - case io_type::FILEPATH: return cudf_io::sink_info(file_name); - case io_type::HOST_BUFFER: return cudf_io::sink_info(&buffer); + case io_type::VOID: return cudf::io::sink_info(&void_sink); + case io_type::FILEPATH: return cudf::io::sink_info(file_name); + case io_type::HOST_BUFFER: return cudf::io::sink_info(&buffer); default: CUDF_FAIL("invalid output type"); } } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 4f0bdbd9b31..f532836ef95 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -49,8 +49,6 @@ #include #include -namespace cudf_io = cudf::io; - using cudf::data_type; using cudf::type_id; using cudf::type_to_id; @@ -113,12 +111,12 @@ struct CsvFixedPointReaderTest : public CsvReaderTest { return acc.empty() ? rhs : (acc + "\n" + rhs); }); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .dtypes({data_type{type_to_id(), scale}}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*input_column, result_view.column(0)); @@ -283,7 +281,7 @@ void write_csv_helper(std::string const& filename, std::vector const& names = {}) { // csv_writer_options only keeps a pointer to metadata (non-owning) - cudf_io::table_metadata metadata{}; + cudf::io::table_metadata metadata{}; if (not names.empty()) { metadata.column_names = names; @@ -297,14 +295,14 @@ void write_csv_helper(std::string const& filename, }); } - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filename), table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filename), table) .include_header(include_header) .rows_per_chunk( 1) // Note: this gets adjusted to multiple of 8 (per legacy code logic and requirements) .metadata(&metadata); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); } template @@ -352,9 +350,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn) std::copy(sequence, sequence + num_rows, output_iterator); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(std::vector(sequence, sequence + num_rows), view.column(0)); @@ -407,11 +405,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale) auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv"; - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table) .include_header(false); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); std::vector result_strings; result_strings.reserve(reference_strings.size()); @@ -454,11 +452,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale) auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv"; - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table) + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table) .include_header(false); - cudf_io::write_csv(writer_options); + cudf::io::write_csv(writer_options); std::vector result_strings; result_strings.reserve(reference_strings.size()); @@ -500,8 +498,8 @@ TEST_F(CsvReaderTest, MultiColumn) outfile << line.str(); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .header(-1) .dtypes({dtype(), dtype(), @@ -513,7 +511,7 @@ TEST_F(CsvReaderTest, MultiColumn) dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(int8_values, view.column(0)); @@ -548,14 +546,14 @@ TEST_F(CsvReaderTest, RepeatColumn) } // repeats column in indexes and names, misses 1 column. - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype(), dtype(), dtype(), dtype()}) .names({"A", "B", "C", "D"}) .use_cols_indexes({1, 0, 0}) .use_cols_names({"D", "B", "B"}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(3, view.num_columns()); @@ -573,14 +571,14 @@ TEST_F(CsvReaderTest, Booleans) "true\nYes,5,foo,false\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A", "B", "C", "D"}) .dtypes({dtype(), dtype(), dtype(), dtype()}) .true_values({"yes", "Yes", "YES", "foo", "FOO"}) .false_values({"no", "No", "NO", "Bar", "bar"}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); // Booleans are the same (integer) data type, but valued at 0 or 1 const auto view = result.tbl->view(); @@ -605,13 +603,13 @@ TEST_F(CsvReaderTest, Dates) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -641,13 +639,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_SECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -677,13 +675,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMilliSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -713,13 +711,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMicroSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -749,13 +747,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampNanoSeconds) outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -789,12 +787,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_SECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -817,12 +815,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMilliSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -845,12 +843,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMicroSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -873,12 +871,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampNanoSeconds) } } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -897,13 +895,13 @@ TEST_F(CsvReaderTest, FloatingPoint) "98007199999998;"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .lineterminator(';') .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -930,12 +928,12 @@ TEST_F(CsvReaderTest, Strings) outfile << "30,stu \"\"vwx\"\" yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -960,12 +958,12 @@ TEST_F(CsvReaderTest, StringsQuotes) outfile << "30,stu `vwx` yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) .quotechar('`'); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -989,13 +987,13 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored) outfile << "30,stu \"vwx\" yz" << '\n'; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE) + .quoting(cudf::io::quote_style::NONE) .doublequote(false); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(2, view.num_columns()); @@ -1015,14 +1013,14 @@ TEST_F(CsvReaderTest, SkiprowsNrows) outfile << "1\n2\n3\n4\n5\n6\n7\n8\n9\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(1) .skiprows(2) .nrows(2); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1039,14 +1037,14 @@ TEST_F(CsvReaderTest, ByteRange) outfile << "1000\n2000\n3000\n4000\n5000\n6000\n7000\n8000\n9000\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .byte_range_offset(11) .byte_range_size(15); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1058,13 +1056,13 @@ TEST_F(CsvReaderTest, ByteRange) TEST_F(CsvReaderTest, ByteRangeStrings) { std::string input = "\"a\"\n\"b\"\n\"c\""; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{input.c_str(), input.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{input.c_str(), input.size()}) .names({"A"}) .dtypes({dtype()}) .header(-1) .byte_range_offset(4); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1081,13 +1079,13 @@ TEST_F(CsvReaderTest, BlanksAndComments) outfile << "1\n#blank\n3\n4\n5\n#blank\n\n\n8\n9\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .comment('#'); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1104,9 +1102,9 @@ TEST_F(CsvReaderTest, EmptyFile) outfile << ""; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -1120,9 +1118,9 @@ TEST_F(CsvReaderTest, NoDataFile) outfile << "\n\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -1136,9 +1134,9 @@ TEST_F(CsvReaderTest, HeaderOnlyFile) outfile << "\"a\",\"b\",\"c\"\n\n"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_rows()); @@ -1156,11 +1154,11 @@ TEST_F(CsvReaderTest, ArrowFileSource) std::shared_ptr infile; ASSERT_TRUE(arrow::io::ReadableFile::Open(filepath).Value(&infile).ok()); - auto arrow_source = cudf_io::arrow_io_source{infile}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{&arrow_source}) + auto arrow_source = cudf::io::arrow_io_source{infile}; + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{&arrow_source}) .dtypes({dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1177,12 +1175,12 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) outfile << "1.2e1+\n3.4e2-\n5.6e3e\n7.8e3A\n9.0Be1\n1C.2"; } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); EXPECT_EQ(1, view.num_columns()); @@ -1199,10 +1197,10 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) TEST_F(CsvReaderTest, StringInference) { std::string buffer = "\"-1\"\n"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), type_id::STRING); @@ -1211,11 +1209,11 @@ TEST_F(CsvReaderTest, StringInference) TEST_F(CsvReaderTest, TypeInferenceThousands) { std::string buffer = "1`400,123,1`234.56\n123`456,123456,12.34"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .thousands('`'); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); EXPECT_EQ(result_view.num_columns(), 3); @@ -1238,12 +1236,12 @@ TEST_F(CsvReaderTest, TypeInferenceWithDecimal) // col#1 => STRING (contains digits and period character, which is NOT the decimal point here) // col#2 => FLOAT64 (column contains digits and decimal point (i.e., ';')) std::string buffer = "1`400,1.23,1`234;56\n123`456,123.456,12;34"; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .thousands('`') .decimal(';'); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto result_view = result.tbl->view(); EXPECT_EQ(result_view.num_columns(), 3); @@ -1263,17 +1261,17 @@ TEST_F(CsvReaderTest, SkipRowsXorSkipFooter) { std::string buffer = "1,2,3"; - cudf_io::csv_reader_options skiprows_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options skiprows_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .skiprows(1); - EXPECT_NO_THROW(cudf_io::read_csv(skiprows_options)); + EXPECT_NO_THROW(cudf::io::read_csv(skiprows_options)); - cudf_io::csv_reader_options skipfooter_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options skipfooter_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .header(-1) .skipfooter(1); - EXPECT_NO_THROW(cudf_io::read_csv(skipfooter_options)); + EXPECT_NO_THROW(cudf::io::read_csv(skipfooter_options)); } TEST_F(CsvReaderTest, nullHandling) @@ -1286,13 +1284,13 @@ TEST_F(CsvReaderTest, nullHandling) // Test disabling na_filter { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .na_filter(false) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}); @@ -1301,12 +1299,12 @@ TEST_F(CsvReaderTest, nullHandling) // Test enabling na_filter { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1317,13 +1315,13 @@ TEST_F(CsvReaderTest, nullHandling) // Setting na_values with default values { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .na_values({"Null"}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1334,14 +1332,14 @@ TEST_F(CsvReaderTest, nullHandling) // Setting na_values without default values { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .keep_default_na(false) .na_values({"Null"}) .dtypes({dtype()}) .header(-1) .skip_blank_lines(false); - const auto result = cudf_io::read_csv(in_opts); + const auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); auto expect = cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"}, @@ -1356,14 +1354,14 @@ TEST_F(CsvReaderTest, FailCases) std::string buffer = "1,2,3"; { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .skiprows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .skipfooter(1), cudf::logic_error); @@ -1371,77 +1369,77 @@ TEST_F(CsvReaderTest, FailCases) { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_offset(4) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .skiprows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .skipfooter(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .byte_range_size(4) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skiprows(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .byte_range_offset(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skiprows(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .byte_range_size(4), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .nrows(1) .skipfooter(1), cudf::logic_error); @@ -1449,14 +1447,14 @@ TEST_F(CsvReaderTest, FailCases) } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .skipfooter(1) .nrows(1), cudf::logic_error); } { EXPECT_THROW( - cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .na_filter(false) .na_values({"Null"}), cudf::logic_error); @@ -1472,13 +1470,13 @@ TEST_F(CsvReaderTest, HexTest) } // specify hex columns by name { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .parse_hex({"A"}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); expect_column_data_equal( std::vector{0, -4096, 16702650, 11259375, 11259375, 2501034507}, @@ -1487,13 +1485,13 @@ TEST_F(CsvReaderTest, HexTest) // specify hex columns by index { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1) .parse_hex(std::vector{0}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); expect_column_data_equal( std::vector{0, -4096, 16702650, 11259375, 11259375, 2501034507}, @@ -1513,9 +1511,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumnWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -1581,8 +1579,8 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .header(-1) .dtypes({dtype(), dtype(), @@ -1594,7 +1592,7 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter) dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1629,13 +1627,13 @@ TEST_F(CsvReaderTest, DatesWithWriter) // TODO need to add a dayfirst flag? write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .dayfirst(true) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1654,9 +1652,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1675,9 +1675,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1696,9 +1698,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1718,9 +1722,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1739,9 +1745,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) + .names({"A"}) + .header(-1); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); @@ -1760,13 +1768,13 @@ TEST_F(CsvReaderTest, FloatingPointWithWriter) // TODO add lineterminator=";" write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names({"A"}) .dtypes({dtype()}) .header(-1); // in_opts.lineterminator = ';'; - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -1786,12 +1794,12 @@ TEST_F(CsvReaderTest, StringsWithWriter) // TODO add quoting style flag? write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0)); @@ -1811,12 +1819,12 @@ TEST_F(CsvReaderTest, StringsWithWriterSimple) // TODO add quoting style flag? write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}) - .quoting(cudf_io::quote_style::NONE); - auto result = cudf_io::read_csv(in_opts); + .quoting(cudf::io::quote_style::NONE); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0)); @@ -1835,11 +1843,11 @@ TEST_F(CsvReaderTest, StringsEmbeddedDelimiter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes(std::vector{dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view()); } @@ -1858,15 +1866,15 @@ TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes({dtype(), dtype(), dtype(), dtype(), dtype()}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view()); } @@ -1877,9 +1885,9 @@ TEST_F(CsvReaderTest, EmptyFileWithWriter) cudf::table_view empty_table; write_csv_helper(filepath, empty_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_csv(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty_table, result.tbl->view()); } @@ -1918,11 +1926,11 @@ TEST_F(CsvReaderTest, UserImplementedSource) << "\n"; } TestSource source{csv_data.str()}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{&source}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{&source}) .dtypes({dtype(), dtype(), dtype()}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); auto const view = result.tbl->view(); expect_column_data_equal(int8_values, view.column(0)); @@ -1962,15 +1970,15 @@ TEST_F(CsvReaderTest, DurationsWithWriter) write_csv_helper(filepath, input_table, true, names); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .names(names) .dtypes({data_type{type_id::DURATION_DAYS}, data_type{type_id::DURATION_SECONDS}, data_type{type_id::DURATION_MILLISECONDS}, data_type{type_id::DURATION_MICROSECONDS}, data_type{type_id::DURATION_NANOSECONDS}}); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table); @@ -2038,9 +2046,9 @@ TEST_F(CsvReaderTest, ParseInRangeIntegers) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); @@ -2117,9 +2125,9 @@ TEST_F(CsvReaderTest, ParseOutOfRangeIntegers) write_csv_helper(filepath, input_table, false); - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); @@ -2148,9 +2156,9 @@ TEST_F(CsvReaderTest, ReadMaxNumericValue) std::copy(sequence, sequence + num_rows, output_iterator); } - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1); - auto result = cudf_io::read_csv(in_opts); + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); expect_column_data_equal(std::vector(sequence, sequence + num_rows), view.column(0)); @@ -2164,8 +2172,8 @@ TEST_F(CsvReaderTest, DefaultWriteChunkSize) auto input_column = column_wrapper(sequence, sequence + num_rows); auto input_table = cudf::table_view{std::vector{input_column}}; - cudf_io::csv_writer_options opts = - cudf_io::csv_writer_options::builder(cudf_io::sink_info{"unused.path"}, input_table); + cudf::io::csv_writer_options opts = + cudf::io::csv_writer_options::builder(cudf::io::sink_info{"unused.path"}, input_table); ASSERT_EQ(num_rows, opts.get_rows_per_chunk()); } } @@ -2174,12 +2182,12 @@ TEST_F(CsvReaderTest, DtypesMap) { std::string csv_in{"12,9\n34,8\n56,7"}; - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{csv_in.c_str(), csv_in.size()}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()}) .names({"A", "B"}) .dtypes({{"B", dtype()}, {"A", dtype()}}) .header(-1); - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto result_table = result.tbl->view(); ASSERT_EQ(result_table.num_columns(), 2); @@ -2191,12 +2199,12 @@ TEST_F(CsvReaderTest, DtypesMap) TEST_F(CsvReaderTest, DtypesMapPartial) { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0}) .names({"A", "B"}) .dtypes({{"A", dtype()}}); { - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); ASSERT_EQ(type_id::INT16, view.column(0).type().id()); @@ -2206,7 +2214,7 @@ TEST_F(CsvReaderTest, DtypesMapPartial) in_opts.set_dtypes({{"B", dtype()}}); { - auto result = cudf_io::read_csv(in_opts); + auto result = cudf::io::read_csv(in_opts); const auto view = result.tbl->view(); ASSERT_EQ(type_id::STRING, view.column(0).type().id()); @@ -2216,12 +2224,12 @@ TEST_F(CsvReaderTest, DtypesMapPartial) TEST_F(CsvReaderTest, DtypesArrayInvalid) { - cudf_io::csv_reader_options in_opts = - cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0}) + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0}) .names({"A", "B", "C"}) .dtypes(std::vector{dtype(), dtype()}); - EXPECT_THROW(cudf_io::read_csv(in_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_csv(in_opts), cudf::logic_error); } TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch) @@ -2234,16 +2242,16 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch) cudf::table_view input_table(std::vector{int_column, str_column}); // write that dataframe to a csv using default options to some temporary file - cudf_io::csv_writer_options writer_options = - cudf_io::csv_writer_options::builder(cudf_io::sink_info{filepath}, input_table); - cudf_io::write_csv(writer_options); + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, input_table); + cudf::io::write_csv(writer_options); // read the temp csv file using default options - cudf_io::csv_reader_options read_options = - cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::csv_reader_options read_options = + cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}) .dtypes(std::vector{dtype(), dtype()}); - cudf_io::table_with_metadata new_table_and_metadata = cudf_io::read_csv(read_options); + cudf::io::table_with_metadata new_table_and_metadata = cudf::io::read_csv(read_options); // verify that the tables are identical, or as identical as expected. const auto new_table_view = new_table_and_metadata.tbl->view(); diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp index 5a0db6e3c64..d7ab881861a 100644 --- a/cpp/tests/io/json_test.cpp +++ b/cpp/tests/io/json_test.cpp @@ -62,8 +62,6 @@ using column_wrapper = cudf::test::strings_column_wrapper, cudf::test::fixed_width_column_wrapper>::type; -namespace cudf_io = cudf::io; - cudf::test::TempDirTestEnvironment* const temp_env = static_cast( ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); @@ -239,12 +237,12 @@ TEST_P(JsonReaderParamTest, BasicJsonLines) {{{"0", "1"}, {"1", "1.1"}}, {{"0", "2"}, {"1", "2.2"}}, {{"0", "3"}, {"1", "3.3"}}}, "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(std::vector{dtype(), dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 3); @@ -286,13 +284,13 @@ TEST_P(JsonReaderParamTest, FloatingPoint) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32); @@ -318,13 +316,13 @@ TEST_P(JsonReaderParamTest, JsonLinesStrings) "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes({{"2", dtype()}, {"0", dtype()}, {"1", dtype()}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -386,8 +384,8 @@ TEST_P(JsonReaderParamTest, MultiColumn) outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype(), dtype(), dtype(), @@ -396,7 +394,7 @@ TEST_P(JsonReaderParamTest, MultiColumn) dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); @@ -443,12 +441,12 @@ TEST_P(JsonReaderParamTest, Booleans) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); // Booleans are the same (integer) data type, but valued at 0 or 1 const auto view = result.tbl->view(); @@ -488,13 +486,13 @@ TEST_P(JsonReaderParamTest, Dates) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}}) .lines(true) .dayfirst(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -544,12 +542,12 @@ TEST_P(JsonReaderParamTest, Durations) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({data_type{type_id::DURATION_NANOSECONDS}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(result.tbl->num_columns(), 1); @@ -583,12 +581,12 @@ TEST_P(JsonReaderParamTest, JsonLinesDtypeInference) "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -623,12 +621,12 @@ TEST_P(JsonReaderParamTest, JsonLinesFileInput) outfile << data; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -652,13 +650,13 @@ TEST_F(JsonReaderTest, JsonLinesByteRange) outfile << "[1000]\n[2000]\n[3000]\n[4000]\n[5000]\n[6000]\n[7000]\n[8000]\n[9000]\n"; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .byte_range_offset(11) .byte_range_size(20); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->num_rows(), 3); @@ -681,12 +679,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjects) outfile << " {\"co\\\"l1\" : 1, \"col2\" : 2.0} \n"; outfile.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 1); @@ -707,12 +705,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings) auto const test_opt = GetParam(); bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient); auto test_json_objects = [test_experimental](std::string const& data) { - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -751,12 +749,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData) std::string const data = "{ \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col1\":200, \"col3\":\"bbb\"}\n"; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -790,12 +788,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder) "{\"col1\":100, \"col2\":1.1, \"col3\":\"aaa\"}\n" "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n"; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -825,9 +823,9 @@ TEST_F(JsonReaderTest, EmptyFile) outfile << ""; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true); - auto result = cudf_io::read_json(in_options); + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + auto result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -842,9 +840,9 @@ TEST_F(JsonReaderTest, NoDataFile) outfile << "{}\n"; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); EXPECT_EQ(0, view.num_columns()); @@ -862,13 +860,13 @@ TEST_F(JsonReaderTest, ArrowFileSource) std::shared_ptr infile; ASSERT_TRUE(arrow::io::ReadableFile::Open(fname).Value(&infile).ok()); - auto arrow_source = cudf_io::arrow_io_source{infile}; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{&arrow_source}) + auto arrow_source = cudf::io::arrow_io_source{infile}; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source}) .dtypes({dtype()}) .lines(true); ; - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT8); @@ -899,12 +897,12 @@ TEST_P(JsonReaderParamTest, InvalidFloatingPoint) outfile << data; } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .dtypes({dtype()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32); @@ -925,11 +923,11 @@ TEST_P(JsonReaderParamTest, StringInference) std::string record_orient = to_records_orient({{{"0", R"("-1")"}}}, "\n"); std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.c_str(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.c_str(), data.size()}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 1); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING); @@ -1009,12 +1007,12 @@ TEST_P(JsonReaderParamTest, ParseInRangeIntegers) std::ofstream outfile(filepath, std::ofstream::out); outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); @@ -1114,12 +1112,12 @@ TEST_P(JsonReaderParamTest, ParseOutOfRangeIntegers) std::ofstream outfile(filepath, std::ofstream::out); outfile << line.str(); } - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); const auto view = result.tbl->view(); @@ -1155,12 +1153,12 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs) outfile2 << data[1]; outfile2.close(); - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{{file1, file2}}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}}) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 4); @@ -1183,23 +1181,23 @@ TEST_F(JsonReaderTest, BadDtypeParams) { std::string buffer = "[1,2,3,4]"; - cudf_io::json_reader_options options_vec = - cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::json_reader_options options_vec = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) .dtypes({dtype()}); // should throw because there are four columns and only one dtype - EXPECT_THROW(cudf_io::read_json(options_vec), cudf::logic_error); + EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error); - cudf_io::json_reader_options options_map = - cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()}) + cudf::io::json_reader_options options_map = + cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()}) .lines(true) .dtypes(std::map{{"0", dtype()}, {"1", dtype()}, {"2", dtype()}, {"wrong_name", dtype()}}); // should throw because one of the columns is not in the dtype map - EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error); + EXPECT_THROW(cudf::io::read_json(options_map), cudf::logic_error); } TEST_F(JsonReaderTest, JsonExperimentalBasic) @@ -1209,9 +1207,9 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic) outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])"; outfile.close(); - cudf_io::json_reader_options options = - cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).experimental(true); - auto result = cudf_io::read_json(options); + cudf::io::json_reader_options options = + cudf::io::json_reader_options::builder(cudf::io::source_info{fname}).experimental(true); + auto result = cudf::io::read_json(options); EXPECT_EQ(result.tbl->num_columns(), 2); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -1366,15 +1364,15 @@ TEST_P(JsonReaderParamTest, JsonDtypeSchema) std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient; - std::map dtype_schema{ + std::map dtype_schema{ {"2", {dtype()}}, {"0", {dtype()}}, {"1", {dtype()}}}; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) .dtypes(dtype_schema) .lines(true) .experimental(test_experimental); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); EXPECT_EQ(result.tbl->num_columns(), 3); EXPECT_EQ(result.tbl->num_rows(), 2); @@ -1399,7 +1397,7 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema) { std::string json_string = R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}])"; - std::map dtype_schema{ + std::map dtype_schema{ {"a", { data_type{cudf::type_id::LIST}, @@ -1408,14 +1406,14 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema) {"b", {dtype()}}, }; - cudf_io::json_reader_options in_options = - cudf_io::json_reader_options::builder( - cudf_io::source_info{json_string.data(), json_string.size()}) + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{json_string.data(), json_string.size()}) .dtypes(dtype_schema) .lines(false) .experimental(true); - cudf_io::table_with_metadata result = cudf_io::read_json(in_options); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); // Make sure we have columns "a" and "b" ASSERT_EQ(result.tbl->num_columns(), 2); diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index a658ed0a55d..2f761eeac66 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -42,8 +42,6 @@ #define ZSTD_SUPPORTED 0 #endif -namespace cudf_io = cudf::io; - template using column_wrapper = typename std::conditional, @@ -182,9 +180,9 @@ struct SkipRowTest { sequence, sequence + file_num_rows); table_view input_table({input_col}); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, input_table); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input_table); + cudf::io::write_orc(out_opts); auto begin_sequence = sequence, end_sequence = sequence; if (skip_rows < file_num_rows) { @@ -203,12 +201,12 @@ struct SkipRowTest { auto filepath = temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, read_num_rows); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .skip_rows(skip_rows) .num_rows(read_num_rows); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } @@ -218,11 +216,11 @@ struct SkipRowTest { temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc"); auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, file_num_rows - skip_rows); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .skip_rows(skip_rows); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view()); } }; @@ -239,13 +237,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -261,13 +259,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumnWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -283,15 +281,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestamps.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -309,15 +307,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestampsWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -333,15 +331,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .timestamp_type(this->type()); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -381,7 +379,7 @@ TEST_F(OrcWriterTest, MultiColumn) table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("bools"); expected_metadata.column_metadata[1].set_name("int8s"); expected_metadata.column_metadata[2].set_name("int16s"); @@ -394,14 +392,14 @@ TEST_F(OrcWriterTest, MultiColumn) expected_metadata.column_metadata[9].set_name("structs"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -449,7 +447,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) struct_col col8{{ages_col}, {0, 1, 1, 0, 1, 1, 0, 1, 1, 0}}; table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("bools"); expected_metadata.column_metadata[1].set_name("int8s"); expected_metadata.column_metadata[2].set_name("int16s"); @@ -461,14 +459,14 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls) expected_metadata.column_metadata[8].set_name("structs"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumnWithNulls.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -484,15 +482,15 @@ TEST_F(OrcWriterTest, ReadZeroRows) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .num_rows(0); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); EXPECT_EQ(0, result.tbl->num_rows()); EXPECT_EQ(1, result.tbl->num_columns()); @@ -513,20 +511,20 @@ TEST_F(OrcWriterTest, Strings) table_view expected({col0, col1, col2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -560,7 +558,7 @@ TEST_F(OrcWriterTest, SlicedTable) table_view expected({col0, col1, col2, col3, col4, col5}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); @@ -571,14 +569,14 @@ TEST_F(OrcWriterTest, SlicedTable) auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows)}); auto filepath = temp_env->get_temp_filepath("SlicedTable.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -592,19 +590,20 @@ TEST_F(OrcWriterTest, HostBuffer) table_view expected{{col}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); std::vector out_buffer; - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info(out_buffer.data(), out_buffer.size())) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(out_buffer.data(), out_buffer.size())) .use_index(false); - const auto result = cudf_io::read_orc(in_opts); + const auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -625,14 +624,14 @@ TEST_F(OrcWriterTest, negTimestampsNano) table_view expected({timestamps_ns}); auto filepath = temp_env->get_temp_filepath("OrcNegTimestamp.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL( expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS); @@ -647,13 +646,13 @@ TEST_F(OrcWriterTest, Slice) cudf::table_view tbl{result}; auto filepath = temp_env->get_temp_filepath("Slice.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto read_table = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto read_table = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); } @@ -664,13 +663,13 @@ TEST_F(OrcChunkedWriterTest, SingleTable) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedSingle.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -684,13 +683,13 @@ TEST_F(OrcChunkedWriterTest, SimpleTable) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -704,13 +703,13 @@ TEST_F(OrcChunkedWriterTest, LargeTables) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -730,17 +729,17 @@ TEST_F(OrcChunkedWriterTest, ManyTables) auto expected = cudf::concatenate(table_views); auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { writer.write(tbl); }); writer.close(); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -760,20 +759,20 @@ TEST_F(OrcChunkedWriterTest, Metadata) table_view expected({col0, col1, col2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("ChunkedMetadata.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&expected_metadata); - cudf_io::orc_chunked_writer(opts).write(expected).write(expected); + cudf::io::orc_chunked_writer(opts).write(expected).write(expected); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); } @@ -793,13 +792,13 @@ TEST_F(OrcChunkedWriterTest, Strings) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -811,9 +810,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedTypes) auto table2 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); } @@ -824,9 +823,9 @@ TEST_F(OrcChunkedWriterTest, ChunkedWritingAfterClosing) auto table1 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedWritingAfterClosing.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); writer.close(); EXPECT_THROW(writer.write(*table1), cudf::logic_error); @@ -839,9 +838,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedStructure) auto table2 = create_random_fixed_table(3, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer writer(opts); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer writer(opts); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); } @@ -855,13 +854,13 @@ TEST_F(OrcChunkedWriterTest, ReadStripes) auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{1, 0, 1}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -872,15 +871,15 @@ TEST_F(OrcChunkedWriterTest, ReadStripesError) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedStripesError.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(*table1); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(*table1); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{0, 1}}); - EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{0, 1}}); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); read_opts.set_stripes({{-1}}); - EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error); } TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) @@ -915,13 +914,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -958,13 +957,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc"); - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2); + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1002,30 +1001,30 @@ TEST_F(OrcStatisticsTest, Basic) auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_orc(out_opts); - auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath}); + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); auto const expected_column_names = std::vector{"", "_col0", "_col1", "_col2", "_col3", "_col4"}; EXPECT_EQ(stats.column_names, expected_column_names); - auto validate_statistics = [&](std::vector const& stats) { + auto validate_statistics = [&](std::vector const& stats) { auto& s0 = stats[0]; EXPECT_EQ(*s0.number_of_values, 9ul); auto& s1 = stats[1]; EXPECT_EQ(*s1.number_of_values, 4ul); - auto& ts1 = std::get(s1.type_specific_stats); + auto& ts1 = std::get(s1.type_specific_stats); EXPECT_EQ(*ts1.minimum, 1); EXPECT_EQ(*ts1.maximum, 7); EXPECT_EQ(*ts1.sum, 16); auto& s2 = stats[2]; EXPECT_EQ(*s2.number_of_values, 4ul); - auto& ts2 = std::get(s2.type_specific_stats); + auto& ts2 = std::get(s2.type_specific_stats); EXPECT_EQ(*ts2.minimum, 1.); EXPECT_EQ(*ts2.maximum, 7.); // No sum ATM, filed #7087 @@ -1033,18 +1032,18 @@ TEST_F(OrcStatisticsTest, Basic) auto& s3 = stats[3]; EXPECT_EQ(*s3.number_of_values, 9ul); - auto& ts3 = std::get(s3.type_specific_stats); + auto& ts3 = std::get(s3.type_specific_stats); EXPECT_EQ(*ts3.minimum, "Friday"); EXPECT_EQ(*ts3.maximum, "Wednesday"); EXPECT_EQ(*ts3.sum, 58ul); auto& s4 = stats[4]; EXPECT_EQ(*s4.number_of_values, 9ul); - EXPECT_EQ(std::get(s4.type_specific_stats).count[0], 8ul); + EXPECT_EQ(std::get(s4.type_specific_stats).count[0], 8ul); auto& s5 = stats[5]; EXPECT_EQ(*s5.number_of_values, 4ul); - auto& ts5 = std::get(s5.type_specific_stats); + auto& ts5 = std::get(s5.type_specific_stats); EXPECT_EQ(*ts5.minimum_utc, 1000); EXPECT_EQ(*ts5.maximum_utc, 7000); ASSERT_FALSE(ts5.minimum); @@ -1070,18 +1069,18 @@ TEST_F(OrcWriterTest, SlicedValidMask) auto sliced_col = cudf::slice(static_cast(col), indices); cudf::table_view tbl{sliced_col}; - cudf_io::table_input_metadata expected_metadata(tbl); + cudf::io::table_input_metadata expected_metadata(tbl); expected_metadata.column_metadata[0].set_name("col_string"); auto filepath = temp_env->get_temp_filepath("OrcStrings.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(tbl, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1093,13 +1092,13 @@ TEST_F(OrcReaderTest, SingleInputs) auto table1 = create_random_fixed_table(5, 5, true); auto filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); - cudf_io::orc_writer_options write_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view()); - cudf_io::write_orc(write_opts); + cudf::io::orc_writer_options write_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(write_opts); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -1125,11 +1124,11 @@ TEST_F(OrcReaderTest, zstdCompressionRegression) auto source = cudf::io::source_info(reinterpret_cast(input_buffer), sizeof(input_buffer)); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(source).use_index(false); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(source).use_index(false); cudf::io::table_with_metadata result; - CUDF_EXPECT_NO_THROW(result = cudf_io::read_orc(in_opts)); + CUDF_EXPECT_NO_THROW(result = cudf::io::read_orc(in_opts)); EXPECT_EQ(1920800, result.tbl->num_rows()); } @@ -1143,21 +1142,21 @@ TEST_F(OrcReaderTest, MultipleInputs) auto const filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc"); { - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view()); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view()); + cudf::io::write_orc(out_opts); } auto const filepath2 = temp_env->get_temp_filepath("SimpleTable2.orc"); { - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath2}, table2->view()); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath2}, table2->view()); + cudf::io::write_orc(out_opts); } - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1, filepath2}}); - auto result = cudf_io::read_orc(read_opts); + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}}); + auto result = cudf::io::read_orc(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1180,14 +1179,14 @@ TEST_P(OrcWriterTestDecimal, Decimal64) cudf::table_view tbl({static_cast(col)}); auto filepath = temp_env->get_temp_filepath("Decimal64.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(tbl.column(0), result.tbl->view().column(0)); } @@ -1211,14 +1210,14 @@ TEST_F(OrcWriterTest, Decimal32) cudf::table_view expected({col}); auto filepath = temp_env->get_temp_filepath("Decimal32.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, result.tbl->view().column(0)); } @@ -1248,15 +1247,15 @@ TEST_F(OrcStatisticsTest, Overflow) auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_orc(out_opts); + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_orc(out_opts); - auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath}); + auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath}); auto check_sum_exist = [&](int idx, bool expected) { auto const& s = stats.file_stats[idx]; - auto const& ts = std::get(s.type_specific_stats); + auto const& ts = std::get(s.type_specific_stats); EXPECT_EQ(ts.sum.has_value(), expected); }; check_sum_exist(1, false); @@ -1311,8 +1310,8 @@ TEST_F(OrcStatisticsTest, HasNull) 0x4F, 0x52, 0x43, 0x17, }; - auto const stats = cudf_io::read_parsed_orc_statistics( - cudf_io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info{reinterpret_cast(nulls_orc.data()), nulls_orc.size()}); EXPECT_EQ(stats.file_stats[1].has_null, true); EXPECT_EQ(stats.file_stats[2].has_null, false); @@ -1343,35 +1342,35 @@ TEST_P(OrcWriterTestStripes, StripeSize) auto validate = [&](std::vector const& orc_buffer) { auto const expected_stripe_num = std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); - auto const stats = cudf_io::read_parsed_orc_statistics( - cudf_io::source_info(orc_buffer.data(), orc_buffer.size())); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder( - cudf_io::source_info(orc_buffer.data(), orc_buffer.size())) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())) .use_index(false); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); }; { std::vector out_buffer_chunked; - cudf_io::chunked_orc_writer_options opts = - cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info(&out_buffer_chunked)) + cudf::io::chunked_orc_writer_options opts = + cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked)) .stripe_size_rows(size_rows) .stripe_size_bytes(size_bytes); - cudf_io::orc_chunked_writer(opts).write(expected->view()); + cudf::io::orc_chunked_writer(opts).write(expected->view()); validate(out_buffer_chunked); } { std::vector out_buffer; - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected->view()) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected->view()) .stripe_size_rows(size_rows) .stripe_size_bytes(size_bytes); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); validate(out_buffer); } } @@ -1392,15 +1391,15 @@ TEST_F(OrcWriterTest, StripeSizeInvalid) std::vector out_buffer; EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .stripe_size_rows(511), cudf::logic_error); EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .stripe_size_bytes(63 << 10), cudf::logic_error); EXPECT_THROW( - cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) + cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view()) .row_index_stride(511), cudf::logic_error); } @@ -1438,18 +1437,18 @@ TEST_F(OrcWriterTest, TestMap) table_view expected({*list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_list_column_as_map(); auto filepath = temp_env->get_temp_filepath("MapColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false); - auto result = cudf_io::read_orc(in_opts); + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false); + auto result = cudf::io::read_orc(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1466,22 +1465,22 @@ TEST_F(OrcReaderTest, NestedColumnSelection) struct_col s_col{child_col1, child_col2}; table_view expected({s_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("struct_s"); expected_metadata.column_metadata[0].child(0).set_name("field_a"); expected_metadata.column_metadata[0].child(1).set_name("field_b"); auto filepath = temp_env->get_temp_filepath("OrcNestedSelection.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) .columns({"struct_s.field_b"}); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); // Verify that only one child column is included in the output table ASSERT_EQ(1, result.tbl->view().column(0).num_children()); @@ -1503,20 +1502,20 @@ TEST_F(OrcReaderTest, DecimalOptions) dec128_col col{col_data, col_data + num_rows, mask}; table_view expected({col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("dec"); auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options valid_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options valid_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .decimal128_columns({"dec", "fake_name"}); // Should not throw, even with "fake name" - EXPECT_NO_THROW(cudf_io::read_orc(valid_opts)); + EXPECT_NO_THROW(cudf::io::read_orc(valid_opts)); } TEST_F(OrcWriterTest, DecimalOptionsNested) @@ -1547,24 +1546,24 @@ TEST_F(OrcWriterTest, DecimalOptionsNested) table_view expected({*map_list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("maps"); expected_metadata.column_metadata[0].set_list_column_as_map(); expected_metadata.column_metadata[0].child(1).child(0).child(0).set_name("dec64"); expected_metadata.column_metadata[0].child(1).child(0).child(1).set_name("dec128"); auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::orc_writer_options out_opts = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_orc(out_opts); + cudf::io::write_orc(out_opts); - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}) .use_index(false) // One less level of nesting because children of map columns are the child struct's children .decimal128_columns({"maps.0.dec64"}); - auto result = cudf_io::read_orc(in_opts); + auto result = cudf::io::read_orc(in_opts); // Both columns should be read as decimal128 CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result.tbl->view().column(0).child(1).child(0).child(0), @@ -1577,14 +1576,15 @@ TEST_F(OrcReaderTest, EmptyColumnsParam) auto const expected = create_random_fixed_table(2, 4, false); std::vector out_buffer; - cudf_io::orc_writer_options args = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected); - cudf_io::write_orc(args); + cudf::io::orc_writer_options args = + cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); + cudf::io::write_orc(args); - cudf_io::orc_reader_options read_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{out_buffer.data(), out_buffer.size()}) + cudf::io::orc_reader_options read_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .columns({}); - auto const result = cudf_io::read_orc(read_opts); + auto const result = cudf::io::read_orc(read_opts); EXPECT_EQ(result.tbl->num_columns(), 0); EXPECT_EQ(result.tbl->num_rows(), 0); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index cf22ab8a525..8a98efabcb5 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -45,8 +45,6 @@ #include #include -namespace cudf_io = cudf::io; - template using column_wrapper = typename std::conditional, @@ -430,13 +428,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -453,13 +451,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -477,14 +475,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, Chronos) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("Chronos.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -503,14 +501,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls) auto expected = table_view{{col}}; auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -527,14 +525,14 @@ TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow) table_view expected({col}); auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .timestamp_type(this->type()); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -576,7 +574,7 @@ TEST_F(ParquetWriterTest, MultiColumn) auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); // expected_metadata.column_metadata[0].set_name( "bools"); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); @@ -588,14 +586,14 @@ TEST_F(ParquetWriterTest, MultiColumn) expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40); auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -647,7 +645,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); // expected_metadata.column_names.emplace_back("bools"); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); @@ -658,15 +656,15 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20); auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); // TODO: Need to be able to return metadata in tree form from reader so they can be compared. @@ -691,20 +689,20 @@ TEST_F(ParquetWriterTest, Strings) auto expected = table_view{{col0, col1, col2}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); auto filepath = temp_env->get_temp_filepath("Strings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -740,7 +738,7 @@ TEST_F(ParquetWriterTest, StringsAsBinary) auto write_tbl = table_view{{col0, col1, col2, col3, col4}}; - cudf_io::table_input_metadata expected_metadata(write_tbl); + cudf::io::table_input_metadata expected_metadata(write_tbl); expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true); expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true); expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true); @@ -748,20 +746,20 @@ TEST_F(ParquetWriterTest, StringsAsBinary) expected_metadata.column_metadata[4].set_name("col_binary"); auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, write_tbl) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema( - {cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema()), - cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema())}); - auto result = cudf_io::read_parquet(in_opts); + {cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()), + cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())}); + auto result = cudf::io::read_parquet(in_opts); auto expected = table_view{{col3, col4, col3, col3, col4}}; CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); @@ -855,7 +853,7 @@ TEST_F(ParquetWriterTest, SlicedTable) // auto expected_slice = expected; auto expected_slice = cudf::slice(expected, {2, static_cast(num_rows) - 1}); - cudf_io::table_input_metadata expected_metadata(expected_slice); + cudf::io::table_input_metadata expected_metadata(expected_slice); expected_metadata.column_metadata[0].set_name("col_other"); expected_metadata.column_metadata[1].set_name("col_string"); expected_metadata.column_metadata[2].set_name("col_another"); @@ -869,14 +867,14 @@ TEST_F(ParquetWriterTest, SlicedTable) expected_metadata.column_metadata[6].child(1).child(1).set_name("flats"); auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -956,7 +954,7 @@ TEST_F(ParquetWriterTest, ListColumn) table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_list_int_0"); expected_metadata.column_metadata[1].set_name("col_list_list_int_1"); expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2"); @@ -967,14 +965,14 @@ TEST_F(ParquetWriterTest, ListColumn) expected_metadata.column_metadata[6].set_name("col_list_list_list_7"); auto filepath = temp_env->get_temp_filepath("ListColumn.parquet"); - auto out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata) - .compression(cudf_io::compression_type::NONE); + .compression(cudf::io::compression_type::NONE); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - auto in_opts = cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(in_opts); + auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -999,7 +997,7 @@ TEST_F(ParquetWriterTest, MultiIndex) auto expected = table_view{{col0, col1, col2, col3, col4}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("int8s"); expected_metadata.column_metadata[1].set_name("int16s"); expected_metadata.column_metadata[2].set_name("int32s"); @@ -1007,18 +1005,18 @@ TEST_F(ParquetWriterTest, MultiIndex) expected_metadata.column_metadata[4].set_name("doubles"); auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata) .key_value_metadata( {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}}); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .use_pandas_metadata(true) .columns({"int32s", "floats", "doubles"}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1034,17 +1032,17 @@ TEST_F(ParquetWriterTest, HostBuffer) const auto expected = table_view{{col}}; - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("col_other"); std::vector out_buffer; - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected) .metadata(&expected_metadata); - cudf_io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = cudf_io::parquet_reader_options::builder( - cudf_io::source_info(out_buffer.data(), out_buffer.size())); - const auto result = cudf_io::read_parquet(in_opts); + cudf::io::write_parquet(out_opts); + cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder( + cudf::io::source_info(out_buffer.data(), out_buffer.size())); + const auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1056,13 +1054,13 @@ TEST_F(ParquetWriterTest, NonNullable) auto expected = create_random_fixed_table(9, 9, false); auto filepath = temp_env->get_temp_filepath("NonNullable.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1095,13 +1093,13 @@ TEST_F(ParquetWriterTest, Struct) auto expected = table_view({*struct_2}); auto filepath = temp_env->get_temp_filepath("Struct.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + cudf::io::read_parquet(read_args); } TEST_F(ParquetWriterTest, StructOfList) @@ -1156,7 +1154,7 @@ TEST_F(ParquetWriterTest, StructOfList) auto expected = table_view({*struct_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -1166,14 +1164,14 @@ TEST_F(ParquetWriterTest, StructOfList) expected_metadata.column_metadata[0].child(1).child(3).set_name("flats"); auto filepath = temp_env->get_temp_filepath("StructOfList.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - const auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + const auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1213,7 +1211,7 @@ TEST_F(ParquetWriterTest, ListOfStruct) auto expected = table_view({*list_col}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars"); @@ -1221,14 +1219,14 @@ TEST_F(ParquetWriterTest, ListOfStruct) expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)); - const auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)); + const auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1284,36 +1282,34 @@ TEST_F(ParquetWriterTest, CustomDataSink) auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet"); custom_test_data_sink custom_sink(filepath); - namespace cudf_io = cudf::io; - srand(31337); auto expected = create_random_fixed_table(5, 10, false); // write out using the custom sink { - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); } // write out using a memmapped sink std::vector buf_sink; { - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&buf_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected); + cudf::io::write_parquet(args); } // read them back in and make sure everything matches - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); - cudf_io::parquet_reader_options buf_args = cudf_io::parquet_reader_options::builder( - cudf_io::source_info{buf_sink.data(), buf_sink.size()}); - auto buf_tbl = cudf_io::read_parquet(buf_args); + cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder( + cudf::io::source_info{buf_sink.data(), buf_sink.size()}); + auto buf_tbl = cudf::io::read_parquet(buf_args); CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view()); } @@ -1322,20 +1318,18 @@ TEST_F(ParquetWriterTest, DeviceWriteLargeishFile) auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet"); custom_test_data_sink custom_sink(filepath); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(4, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -1354,19 +1348,19 @@ TEST_F(ParquetWriterTest, PartitionedWrite) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1385,19 +1379,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyPartitions) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1416,19 +1410,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyColumns) auto expected2 = cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows}); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder( - cudf_io::sink_info(std::vector{filepath1, filepath2}), *source) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder( + cudf::io::sink_info(std::vector{filepath1, filepath2}), *source) .partitions({partition1, partition2}) - .compression(cudf_io::compression_type::NONE); - cudf_io::write_parquet(args); + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(args); - auto result1 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1))); + auto result1 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view()); - auto result2 = cudf_io::read_parquet( - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2))); + auto result2 = cudf::io::read_parquet( + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2))); CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view()); } @@ -1439,9 +1433,9 @@ std::string create_parquet_file(int num_cols) auto const table = create_random_fixed_table(num_cols, 10, true); auto const filepath = temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet"); - cudf_io::parquet_writer_options const out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table->view()); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options const out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view()); + cudf::io::write_parquet(out_opts); return filepath; } @@ -1451,16 +1445,16 @@ TEST_F(ParquetWriterTest, MultipleMismatchedSources) { auto const float5file = create_parquet_file(5); std::vector files{int5file, float5file}; - cudf_io::parquet_reader_options const read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{files}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{files}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } { auto const int10file = create_parquet_file(10); std::vector files{int5file, int10file}; - cudf_io::parquet_reader_options const read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{files}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options const read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{files}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } } @@ -1473,13 +1467,13 @@ TEST_F(ParquetWriterTest, Slice) cudf::table_view tbl{result}; auto filepath = temp_env->get_temp_filepath("Slice.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_opts); + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto read_table = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto read_table = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl); } @@ -1490,13 +1484,13 @@ TEST_F(ParquetChunkedWriterTest, SingleTable) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1); } @@ -1510,13 +1504,13 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1530,14 +1524,14 @@ TEST_F(ParquetChunkedWriterTest, LargeTables) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - auto md = cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2).close(); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close(); CUDF_EXPECTS(!md, "The return value should be null."); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1557,18 +1551,18 @@ TEST_F(ParquetChunkedWriterTest, ManyTables) auto expected = cudf::concatenate(table_views); auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) { writer.write(tbl); }); auto md = writer.close({"dummy/path"}); CUDF_EXPECTS(md, "The returned metadata should not be null."); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1592,13 +1586,13 @@ TEST_F(ParquetChunkedWriterTest, Strings) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1651,13 +1645,13 @@ TEST_F(ParquetChunkedWriterTest, ListColumn) auto expected = cudf::concatenate(std::vector({tbl0, tbl1})); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl0).write(tbl1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -1704,7 +1698,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).set_nullability(false); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); @@ -1713,14 +1707,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct) expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1795,7 +1789,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("family"); expected_metadata.column_metadata[0].child(1).set_nullability(false); expected_metadata.column_metadata[0].child(1).child(0).set_name("human?"); @@ -1806,14 +1800,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList) expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats"); auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -1831,9 +1825,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedTypes) auto table2 = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); writer.close(); @@ -1845,9 +1839,9 @@ TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing) auto table = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table).close(); EXPECT_THROW(writer.write(*table), cudf::logic_error); } @@ -1858,14 +1852,14 @@ TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile) auto table = create_random_fixed_table(4, 4, true); auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } TEST_F(ParquetChunkedWriterTest, MismatchedStructure) @@ -1875,9 +1869,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructure) auto table2 = create_random_fixed_table(3, 4, true); auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(*table1); EXPECT_THROW(writer.write(*table2), cudf::logic_error); writer.close(); @@ -1915,9 +1909,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructureList) auto tbl1 = table_view({col01, col11}); auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer writer(args); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer writer(args); writer.write(tbl0); EXPECT_THROW(writer.write(tbl1), cudf::logic_error); } @@ -1931,13 +1925,13 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability) auto full_table = cudf::concatenate(std::vector({*table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -1969,7 +1963,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("human?"); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -1977,14 +1971,14 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct) expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -2000,7 +1994,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet"); - cudf_io::table_input_metadata metadata(*table1); + cudf::io::table_input_metadata metadata(*table1); // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst // and considers all columns nullable. However cudf::concatenate will not force nulls in case no @@ -2010,14 +2004,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) col_meta.set_nullability(false); } - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&metadata); - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2057,7 +2051,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto full_table = cudf::concatenate(std::vector({table1, table2})); - cudf_io::table_input_metadata metadata(table1); + cudf::io::table_input_metadata metadata(table1); metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level metadata.column_metadata[0].child(1).set_nullability( false); // non-nullable at second (leaf) level @@ -2065,14 +2059,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}) .metadata(&metadata); - cudf_io::parquet_chunked_writer(args).write(table1).write(table2); + cudf::io::parquet_chunked_writer(args).write(table1).write(table2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2102,7 +2096,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) auto full_table = cudf::concatenate(std::vector({table_1, table_2})); - cudf_io::table_input_metadata expected_metadata(table_1); + cudf::io::table_input_metadata expected_metadata(table_1); expected_metadata.column_metadata[0].set_name("being").set_nullability(false); expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false); expected_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -2110,14 +2104,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct) expected_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); args.set_metadata(&expected_metadata); - cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2); + cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); cudf::test::expect_metadata_equal(expected_metadata, result.metadata); @@ -2132,16 +2126,16 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups) auto full_table = cudf::concatenate(std::vector({*table2, *table1, *table2})); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); { - cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2); + cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2); } - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .row_groups({{1, 0, 1}}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } @@ -2152,17 +2146,17 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError) auto table1 = create_random_fixed_table(5, 5, true); auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(*table1); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(*table1); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).row_groups({{0, 1}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}}); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); read_opts.set_row_groups({{-1}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); read_opts.set_row_groups({{0}, {0}}); - EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error); + EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error); } TEST_F(ParquetWriterTest, DecimalWrite) @@ -2182,26 +2176,26 @@ TEST_F(ParquetWriterTest, DecimalWrite) auto table = table_view({col0, col1}); auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table); - cudf_io::table_input_metadata expected_metadata(table); + cudf::io::table_input_metadata expected_metadata(table); // verify failure if too small a precision is given expected_metadata.column_metadata[0].set_decimal_precision(7); expected_metadata.column_metadata[1].set_decimal_precision(1); args.set_metadata(&expected_metadata); - EXPECT_THROW(cudf_io::write_parquet(args), cudf::logic_error); + EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error); // verify success if equal precision is given expected_metadata.column_metadata[0].set_decimal_precision(7); expected_metadata.column_metadata[1].set_decimal_precision(9); args.set_metadata(&expected_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table); } @@ -2243,13 +2237,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -2291,13 +2285,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2) auto expected = cudf::concatenate(std::vector({tbl1, tbl2})); auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet"); - cudf_io::chunked_parquet_writer_options args = - cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); - cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2); + cudf::io::chunked_parquet_writer_options args = + cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath}); + cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected); } @@ -2350,20 +2344,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWeakCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(16, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2373,20 +2365,18 @@ TEST_F(ParquetWriterStressTest, LargeTableGoodCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 128 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2396,20 +2386,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWithValids) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 6, true); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2419,20 +2407,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_random_fixed_table(16, 4 * 1024 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2442,20 +2428,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 128 * 1024, false); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2465,20 +2449,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids) mm_buf.reserve(4 * 1024 * 1024 * 16); custom_test_memmap_sink custom_sink(&mm_buf); - namespace cudf_io = cudf::io; - // exercises multiple rowgroups srand(31337); auto expected = create_compressible_fixed_table(16, 4 * 1024 * 1024, 6, true); // write out using the custom sink (which uses device writes) - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options custom_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()}); - auto custom_tbl = cudf_io::read_parquet(custom_args); + cudf::io::parquet_reader_options custom_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()}); + auto custom_tbl = cudf::io::read_parquet(custom_args); CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view()); } @@ -2491,14 +2473,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(16); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16); + auto result = cudf::io::read_parquet(read_opts); // we should only get back 4 rows EXPECT_EQ(result.tbl->view().column(0).size(), 4); @@ -2511,14 +2493,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("PastBounds.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).skip_rows(4); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4); + auto result = cudf::io::read_parquet(read_opts); // we should get empty columns back EXPECT_EQ(result.tbl->view().num_columns(), 4); @@ -2533,14 +2515,14 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(0); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0); + auto result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->view().num_columns(), 4); EXPECT_EQ(result.tbl->view().column(0).size(), 0); @@ -2553,16 +2535,16 @@ TEST_F(ParquetReaderTest, UserBounds) auto expected = create_random_fixed_table(4, 4, false); auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected); + cudf::io::write_parquet(args); // attempt to read more rows than there actually are - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .skip_rows(4) .num_rows(0); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); // we should get empty columns back EXPECT_EQ(result.tbl->view().num_columns(), 4); @@ -2578,9 +2560,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) // clang-format on cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2592,11 +2574,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) {62, 2}, {63, 1}}; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2622,9 +2604,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) // this file will have row groups of 1,000,000 each cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2636,11 +2618,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge) {24001231, 17}, {29000001, 989999}, {29999999, 1} }; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2660,9 +2642,9 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge) // this file will have row groups of 1,000,000 each cudf::table_view tbl({col}); auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl); - cudf_io::write_parquet(out_args); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl); + cudf::io::write_parquet(out_args); // skip_rows / num_rows // clang-format off @@ -2674,11 +2656,11 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge) {4001231, 17}, {1900000, 989999}, {4999999, 1} }; // clang-format on for (auto p : params) { - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); if (p.first >= 0) { read_args.set_skip_rows(p.first); } if (p.second >= 0) { read_args.set_num_rows(p.second); } - auto result = cudf_io::read_parquet(read_args); + auto result = cudf::io::read_parquet(read_args); p.first = p.first < 0 ? 0 : p.first; p.second = p.second < 0 ? static_cast(col).size() - p.first : p.second; @@ -2697,17 +2679,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .columns({"b", "a"}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2719,17 +2702,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .columns({"b", "a"}); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2744,21 +2728,21 @@ TEST_F(ParquetReaderTest, ReorderedColumns) cudf::table_view tbl{{a, b, c, d}}; auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet"); - cudf_io::table_input_metadata md(tbl); + cudf::io::table_input_metadata md(tbl); md.column_metadata[0].set_name("a"); md.column_metadata[1].set_name("b"); md.column_metadata[2].set_name("c"); md.column_metadata[3].set_name("d"); - cudf_io::parquet_writer_options opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md); - cudf_io::write_parquet(opts); + cudf::io::parquet_writer_options opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md); + cudf::io::write_parquet(opts); { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"d", "a", "b", "c"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a); @@ -2768,10 +2752,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns) { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"c", "d", "a", "b"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d); @@ -2781,10 +2765,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns) { // read them out of order - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .columns({"d", "c", "b", "a"}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c); @@ -2818,7 +2802,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) auto input = table_view({*struct_2}); - cudf_io::table_input_metadata input_metadata(input); + cudf::io::table_input_metadata input_metadata(input); input_metadata.column_metadata[0].set_name("being"); input_metadata.column_metadata[0].child(0).set_name("human?"); input_metadata.column_metadata[0].child(1).set_name("particulars"); @@ -2826,16 +2810,16 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) input_metadata.column_metadata[0].child(1).child(1).set_name("age"); auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet"); - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, input) + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input) .metadata(&input_metadata); - cudf_io::write_parquet(args); + cudf::io::write_parquet(args); { // Test selecting a single leaf from the table - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars.age"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expect_ages_col = cudf::test::fixed_width_column_wrapper{ {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}}; @@ -2844,7 +2828,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release(); auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("age"); @@ -2854,10 +2838,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) } { // Test selecting a non-leaf and expecting all hierarchy from that node onwards - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expected_weights_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; @@ -2872,7 +2856,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release(); auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("weight"); @@ -2883,10 +2867,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) } { // Test selecting struct children out of order - cudf_io::parquet_reader_options read_args = - cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath)) + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath)) .columns({"being.particulars.age", "being.particulars.weight", "being.human?"}); - const auto result = cudf_io::read_parquet(read_args); + const auto result = cudf::io::read_parquet(read_args); auto expected_weights_col = cudf::test::fixed_width_column_wrapper{1.1, 2.4, 5.3, 8.0, 9.6, 6.9}; @@ -2906,7 +2890,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn) auto expected = table_view({*expect_s_2}); - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[0].set_name("being"); expected_metadata.column_metadata[0].child(0).set_name("particulars"); expected_metadata.column_metadata[0].child(0).child(0).set_name("age"); @@ -3086,9 +3070,9 @@ TEST_F(ParquetReaderTest, DecimalRead) 0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31}; unsigned int decimals_parquet_len = 2366; - cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder( - cudf_io::source_info{reinterpret_cast(decimals_parquet), decimals_parquet_len}); - auto result = cudf_io::read_parquet(read_opts); + cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder( + cudf::io::source_info{reinterpret_cast(decimals_parquet), decimals_parquet_len}); + auto result = cudf::io::read_parquet(read_opts); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; }); @@ -3134,9 +3118,9 @@ TEST_F(ParquetReaderTest, DecimalRead) std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1); - cudf_io::parquet_reader_options read_strict_opts = read_opts; + cudf::io::parquet_reader_options read_strict_opts = read_opts; read_strict_opts.set_columns({"dec7p4", "dec14p5"}); - EXPECT_NO_THROW(cudf_io::read_parquet(read_strict_opts)); + EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts)); } { // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4) @@ -3229,10 +3213,10 @@ TEST_F(ParquetReaderTest, DecimalRead) unsigned int parquet_len = 1226; - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{ + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{ reinterpret_cast(fixed_len_bytes_decimal_parquet), parquet_len}); - auto result = cudf_io::read_parquet(read_opts); + auto result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->view().num_columns(), 3); auto validity_c0 = cudf::test::iterators::nulls_at({19}); @@ -3324,18 +3308,18 @@ TEST_F(ParquetReaderTest, EmptyOutput) table_view expected({c0, c1, c2, *c3, c4}); // set precision on the decimal column - cudf_io::table_input_metadata expected_metadata(expected); + cudf::io::table_input_metadata expected_metadata(expected); expected_metadata.column_metadata[2].set_decimal_precision(1); auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet"); - cudf_io::parquet_writer_options out_args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf::io::parquet_writer_options out_args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected); out_args.set_metadata(&expected_metadata); - cudf_io::write_parquet(out_args); + cudf::io::write_parquet(out_args); - cudf_io::parquet_reader_options read_args = - cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - auto result = cudf_io::read_parquet(read_args); + cudf::io::parquet_reader_options read_args = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + auto result = cudf::io::read_parquet(read_args); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -3345,33 +3329,33 @@ TEST_F(ParquetWriterTest, RowGroupSizeInvalid) const auto unused_table = std::make_unique(); std::vector out_buffer; - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_rows(4999), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .max_page_size_rows(4999), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_bytes(3 << 10), - cudf::logic_error); - EXPECT_THROW( - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .max_page_size_bytes(3 << 10), - cudf::logic_error); - - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .row_group_size_rows(4999), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .max_page_size_rows(4999), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) .row_group_size_bytes(3 << 10), cudf::logic_error); - EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) + .max_page_size_bytes(3 << 10), + cudf::logic_error); + + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .row_group_size_rows(4999), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .max_page_size_rows(4999), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) + .row_group_size_bytes(3 << 10), + cudf::logic_error); + EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) .max_page_size_bytes(3 << 10), cudf::logic_error); } @@ -3381,13 +3365,13 @@ TEST_F(ParquetWriterTest, RowGroupPageSizeMatch) const auto unused_table = std::make_unique
(); std::vector out_buffer; - auto options = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view()) - .row_group_size_bytes(128 * 1024) - .max_page_size_bytes(512 * 1024) - .row_group_size_rows(10000) - .max_page_size_rows(20000) - .build(); + auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), + unused_table->view()) + .row_group_size_bytes(128 * 1024) + .max_page_size_bytes(512 * 1024) + .row_group_size_rows(10000) + .max_page_size_rows(20000) + .build(); EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes()); EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows()); } @@ -3396,7 +3380,7 @@ TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch) { std::vector out_buffer; - auto options = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer)) + auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer)) .row_group_size_bytes(128 * 1024) .max_page_size_bytes(512 * 1024) .row_group_size_rows(10000) @@ -3420,7 +3404,7 @@ TEST_F(ParquetWriterTest, EmptyList) cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); using lcw = cudf::test::lists_column_wrapper; @@ -3447,7 +3431,7 @@ TEST_F(ParquetWriterTest, DeepEmptyList) cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0); @@ -3472,7 +3456,7 @@ TEST_F(ParquetWriterTest, EmptyListWithStruct) auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet"); cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath), cudf::table_view({*L0}))); - auto result = cudf_io::read_parquet( + auto result = cudf::io::read_parquet( cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath))); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0); @@ -4362,15 +4346,15 @@ TEST_F(ParquetReaderTest, EmptyColumnsParam) auto const expected = create_random_fixed_table(2, 4, false); std::vector out_buffer; - cudf_io::parquet_writer_options args = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected); - cudf_io::write_parquet(args); + cudf::io::parquet_writer_options args = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected); + cudf::io::write_parquet(args); - cudf_io::parquet_reader_options read_opts = - cudf_io::parquet_reader_options::builder( - cudf_io::source_info{out_buffer.data(), out_buffer.size()}) + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder( + cudf::io::source_info{out_buffer.data(), out_buffer.size()}) .columns({}); - auto const result = cudf_io::read_parquet(read_opts); + auto const result = cudf::io::read_parquet(read_opts); EXPECT_EQ(result.tbl->num_columns(), 0); EXPECT_EQ(result.tbl->num_rows(), 0); @@ -4401,7 +4385,7 @@ TEST_F(ParquetReaderTest, BinaryAsStrings) {'F', 'u', 'n', 'd', 'a', 'y'}}; auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}}; - cudf_io::table_input_metadata output_metadata(output); + cudf::io::table_input_metadata output_metadata(output); output_metadata.column_metadata[0].set_name("col_other"); output_metadata.column_metadata[1].set_name("col_string"); output_metadata.column_metadata[2].set_name("col_float"); @@ -4409,37 +4393,38 @@ TEST_F(ParquetReaderTest, BinaryAsStrings) output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, output) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}}; auto expected_mixed = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema({{}, {}, {}, {}, {}}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view()); - cudf_io::parquet_reader_options default_in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}); - result = cudf_io::read_parquet(default_in_opts); + cudf::io::parquet_reader_options default_in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}); + result = cudf::io::read_parquet(default_in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view()); - std::vector md{ + std::vector md{ {}, {}, {}, - cudf_io::reader_column_schema().set_convert_binary_to_strings(false), - cudf_io::reader_column_schema().set_convert_binary_to_strings(false)}; + cudf::io::reader_column_schema().set_convert_binary_to_strings(false), + cudf::io::reader_column_schema().set_convert_binary_to_strings(false)}; - cudf_io::parquet_reader_options mixed_in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - result = cudf_io::read_parquet(mixed_in_opts); + cudf::io::parquet_reader_options mixed_in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + result = cudf::io::read_parquet(mixed_in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view()); } @@ -4478,32 +4463,33 @@ TEST_F(ParquetReaderTest, NestedByteArray) {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}}; auto const expected = table_view{{int_col, float_col, list_list_int_col}}; - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0].set_name("col_other"); output_metadata.column_metadata[1].set_name("col_float"); output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - auto source = cudf_io::datasource::create(filepath); - cudf_io::parquet::FileMetaData fmd; + auto source = cudf::io::datasource::create(filepath); + cudf::io::parquet::FileMetaData fmd; read_footer(source, &fmd); EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY); - std::vector md{ + std::vector md{ {}, {}, - cudf_io::reader_column_schema().add_child( - cudf_io::reader_column_schema().set_convert_binary_to_strings(false))}; + cudf::io::reader_column_schema().add_child( + cudf::io::reader_column_schema().set_convert_binary_to_strings(false))}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } @@ -4524,23 +4510,23 @@ TEST_F(ParquetWriterTest, ByteArrayStats) {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}}; auto expected = table_view{{list_int_col0, list_int_col1}}; - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true); output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) .set_column_schema({{}, {}}); - auto result = cudf_io::read_parquet(in_opts); + auto result = cudf::io::read_parquet(in_opts); - auto source = cudf_io::datasource::create(filepath); - cudf_io::parquet::FileMetaData fmd; + auto source = cudf::io::datasource::create(filepath); + cudf::io::parquet::FileMetaData fmd; read_footer(source, &fmd); @@ -4570,7 +4556,7 @@ TEST_F(ParquetReaderTest, StructByteArray) auto const expected = table_view{{struct_col}}; EXPECT_EQ(1, expected.num_columns()); - cudf_io::table_input_metadata output_metadata(expected); + cudf::io::table_input_metadata output_metadata(expected); output_metadata.column_metadata[0] .set_name("struct_binary") .child(0) @@ -4578,17 +4564,18 @@ TEST_F(ParquetReaderTest, StructByteArray) .set_output_as_binary(true); auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet"); - cudf_io::parquet_writer_options out_opts = - cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected) + cudf::io::parquet_writer_options out_opts = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) .metadata(&output_metadata); - cudf_io::write_parquet(out_opts); + cudf::io::write_parquet(out_opts); - std::vector md{cudf_io::reader_column_schema().add_child( - cudf_io::reader_column_schema().set_convert_binary_to_strings(false))}; + std::vector md{cudf::io::reader_column_schema().add_child( + cudf::io::reader_column_schema().set_convert_binary_to_strings(false))}; - cudf_io::parquet_reader_options in_opts = - cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md); - auto result = cudf_io::read_parquet(in_opts); + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}) + .set_column_schema(md); + auto result = cudf::io::read_parquet(in_opts); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); }