Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Repro for issue 14282 #14291

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 63 additions & 8 deletions java/src/test/java/ai/rapids/cudf/TableTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,7 @@
import static ai.rapids.cudf.ParquetWriterOptions.structBuilder;
import static ai.rapids.cudf.Table.TestBuilder;
import static ai.rapids.cudf.Table.removeNullMasksIfNeeded;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.*;

public class TableTest extends CudfTestBase {
private static final HostMemoryAllocator hostMemoryAllocator = DefaultHostMemoryAllocator.get();
Expand All @@ -87,6 +80,7 @@ public class TableTest extends CudfTestBase {
private static final File TEST_SIMPLE_CSV_FILE = TestUtils.getResourceAsFile("simple.csv");
private static final File TEST_SIMPLE_JSON_FILE = TestUtils.getResourceAsFile("people.json");
private static final File TEST_JSON_ERROR_FILE = TestUtils.getResourceAsFile("people_with_invalid_lines.json");
private static final File TEST_TEACHERS_JSON = TestUtils.getResourceAsFile("teachers.json");

private static final Schema CSV_DATA_BUFFER_SCHEMA = Schema.builder()
.column(DType.INT32, "A")
Expand Down Expand Up @@ -309,6 +303,67 @@ void testGetNumberOfColumns() {
}
}

@Test
void testReadTeachersJSON() throws IOException {
// regression test for https://github.com/rapidsai/cudf/issues/14282

// read JSON into memory and build a list of teacher names so
// that we can check that they match the results of cuDF parsing the JSON
StringBuilder b = new StringBuilder();
List<String> list = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new FileReader(TEST_TEACHERS_JSON))) {
String line = reader.readLine();
while (line != null) {
b.append(line.trim());
b.append('\n');
if (line.equals("{\"teacher\":null}")) {
list.add("NULL");
} else {
int pos = line.indexOf("\"", 13);
list.add(line.substring(13, pos));
}
line = reader.readLine();
}
}

try (ColumnVector cv = ColumnVector.fromStrings(b.toString().trim());
HostColumnVector hostCv = cv.copyToHost()) {

HostMemoryBuffer data = hostCv.getData();
long start = hostCv.getStartListOffset(0);
long end = hostCv.getEndListOffset(0);
long length = end - start;
JSONOptions opts = JSONOptions.builder()
.withRecoverWithNull(true)
.build();

try (TableWithMeta tableWithMeta = Table.readJSON(opts, data, start, length)) {
String[] columnNames = tableWithMeta.getColumnNames();
assert (columnNames.length == 2);
assert (columnNames[0].equals("teacher"));
assert (columnNames[1].equals("student"));
try (Table table = tableWithMeta.releaseTable()) {
ColumnVector c = table.getColumn(0);
assertEquals(512, c.rows);

// verify all data is correct
try (HostColumnVector hcv = c.copyToHost()) {
for (int i = 0; i < c.getRowCount(); i++) {
String s = "NULL";
if (!hcv.isNull(i)) {
s = hcv.getJavaString(i);
}
String orig = list.get(i);
assert orig.equals(s) : "Expected '" + orig + "' at index " + i + " but found '" + s + "'";
}
}

assertEquals(186, c.getNullCount());
}
}
}
}

@Test
void testReadJSONFile() {
Schema schema = Schema.builder()
Expand Down
Loading