From 11f32faaa787173b71d329bba9c1c07f7658a936 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 8 Feb 2023 17:39:31 +0800 Subject: [PATCH] Switch to nested JSON reader Signed-off-by: Chong Gao --- java/src/main/native/src/TableJni.cpp | 6 ++-- .../test/java/ai/rapids/cudf/TableTest.java | 30 ++++++++++++++----- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 3d730ff61a1..680f8cd2cbb 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1334,8 +1334,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readAndInferJSON( cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source) .dayfirst(static_cast(day_first)) - .lines(static_cast(lines)) - .legacy(true); + .lines(static_cast(lines)); auto result = std::make_unique(cudf::io::read_json(opts.build())); @@ -1441,8 +1440,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON( cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source) .dayfirst(static_cast(day_first)) - .lines(static_cast(lines)) - .legacy(true); + .lines(static_cast(lines)); if (!n_col_names.is_null() && data_types.size() > 0) { if (n_col_names.size() != n_types.size()) { diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 1656d871c2d..4f00bc7493d 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -335,22 +335,38 @@ void testReadJSONBufferInferred() { JSONOptions opts = JSONOptions.builder() .withDayFirst(true) .build(); - byte[] data = ("[false,A,1,2,05/03/2001]\n" + - "[true,B,2,3,31/10/2010]'\n" + - "[false,C,3,4,20/10/1994]\n" + - "[true,D,4,5,18/10/1990]").getBytes(StandardCharsets.UTF_8); + byte[] data = ("[false,A,1,2]\n" + + "[true,B,2,3]\n" + + "[false,C,3,4]\n" + + "[true,D,4,5]").getBytes(StandardCharsets.UTF_8); try (Table expected = new Table.TestBuilder() .column(false, true, false, true) .column("A", "B", "C", "D") .column(1L, 2L, 3L, 4L) .column(2L, 3L, 4L, 5L) - .timestampMillisecondsColumn(983750400000L, 1288483200000L, 782611200000L, 656208000000L) .build(); Table table = Table.readJSON(Schema.INFERRED, opts, data)) { assertTablesAreEqual(expected, table); } } + @Test + void testReadJSONSubColumns() { + // JSON file has 2 columns, here only read 1 column + Schema schema = Schema.builder() + .column(DType.INT32, "age") + .build(); + JSONOptions opts = JSONOptions.builder() + .withLines(true) + .build(); + try (Table expected = new Table.TestBuilder() + .column(null, 30, 19) + .build(); + Table table = Table.readJSON(schema, opts, TEST_SIMPLE_JSON_FILE)) { + assertTablesAreEqual(expected, table); + } + } + @Test void testReadJSONBuffer() { // JSON reader will set the column according to the iterator if can't infer the name @@ -363,7 +379,7 @@ void testReadJSONBuffer() { JSONOptions opts = JSONOptions.builder() .build(); byte[] data = ("[A,1,2]\n" + - "[B,2,3]'\n" + + "[B,2,3]\n" + "[C,3,4]\n" + "[D,4,5]").getBytes(StandardCharsets.UTF_8); try (Table expected = new Table.TestBuilder() @@ -389,7 +405,7 @@ void testReadJSONBufferWithOffset() { .build(); int bytesToIgnore = 8; byte[] data = ("[A,1,2]\n" + - "[B,2,3]'\n" + + "[B,2,3]\n" + "[C,3,4]\n" + "[D,4,5]").getBytes(StandardCharsets.UTF_8); try (Table expected = new Table.TestBuilder()