Skip to content

Commit

Permalink
Merge pull request #7275 from rapidsai/branch-0.18
Browse files Browse the repository at this point in the history
[gpuCI] Auto-merge branch-0.18 to branch-0.19 [skip ci]
  • Loading branch information
GPUtester authored Feb 1, 2021
2 parents 9718f5c + 0ee8004 commit d9ed2b1
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
6 changes: 5 additions & 1 deletion cpp/src/io/parquet/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,11 @@ class aggregate_metadata {
for (const auto &use_name : local_use_names) {
for (size_t schema_idx = 1; schema_idx < pfm.schema.size(); schema_idx++) {
auto const &schema = pfm.schema[schema_idx];
if (use_name == schema.name) { output_column_schemas.push_back(schema_idx); }
// We select only top level columns by name. Selecting nested columns by name is not
// supported. Top level columns are identified by their parent being the root (idx == 0)
if (use_name == schema.name and schema.parent_idx == 0) {
output_column_schemas.push_back(schema_idx);
}
}
}
}
Expand Down
Binary file not shown.
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,15 @@ def test_parquet_reader_mixedcompression(datadir):
assert_eq(expect, got)


def test_parquet_reader_select_columns(datadir):
fname = datadir / "nested_column_map.parquet"

expect = cudf.read_parquet(fname).to_pandas()[["value"]]
got = cudf.read_parquet(fname, columns=["value"])

assert_eq(expect, got)


def test_parquet_reader_invalids(tmpdir):
test_pdf = make_pdf(nrows=1000, nvalids=1000 // 4, dtype=np.int64)

Expand Down

0 comments on commit d9ed2b1

Please sign in to comment.