Skip to content

Commit

Permalink
Drop support for skiprows and num_rows in cudf.read_parquet (#1…
Browse files Browse the repository at this point in the history
…1480)

This PR removes support for `skiprows` & `num_rows` in parquet reader. A continuation of #11218

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #11480
  • Loading branch information
galipremsagar authored Aug 5, 2022
1 parent 493d96b commit 2e13e5f
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 183 deletions.
6 changes: 0 additions & 6 deletions python/cudf/cudf/_lib/cpp/io/parquet.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
data_type get_timestamp_type() except +
bool is_enabled_convert_strings_to_categories() except +
bool is_enabled_use_pandas_metadata() except +
size_type get_skip_rows() except +
size_type get_num_rows() except +

# setter

void set_columns(vector[string] col_names) except +
void set_row_groups(vector[vector[size_type]] row_grp) except +
void enable_convert_strings_to_categories(bool val) except +
void enable_use_pandas_metadata(bool val) except +
void set_skip_rows(size_type val) except +
void set_num_rows(size_type val) except +
void set_timestamp_type(data_type type) except +

@staticmethod
Expand All @@ -55,8 +51,6 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
parquet_reader_options_builder& use_pandas_metadata(
bool val
) except +
parquet_reader_options_builder& skip_rows(size_type val) except +
parquet_reader_options_builder& num_rows(size_type val) except +
parquet_reader_options_builder& timestamp_type(
data_type type
) except +
Expand Down
12 changes: 3 additions & 9 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _parse_metadata(meta):


cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
skiprows=None, num_rows=None, strings_to_categorical=False,
strings_to_categorical=False,
use_pandas_metadata=True):
"""
Cython function to call into libcudf API, see `read_parquet`.
Expand All @@ -151,8 +151,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,

cdef bool cpp_strings_to_categorical = strings_to_categorical
cdef bool cpp_use_pandas_metadata = use_pandas_metadata
cdef size_type cpp_skiprows = skiprows if skiprows is not None else 0
cdef size_type cpp_num_rows = num_rows if num_rows is not None else -1

cdef vector[vector[size_type]] cpp_row_groups
cdef data_type cpp_timestamp_type = cudf_types.data_type(
cudf_types.type_id.EMPTY
Expand All @@ -168,8 +167,6 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
.row_groups(cpp_row_groups)
.convert_strings_to_categories(cpp_strings_to_categorical)
.use_pandas_metadata(cpp_use_pandas_metadata)
.skip_rows(cpp_skiprows)
.num_rows(cpp_num_rows)
.timestamp_type(cpp_timestamp_type)
.build()
)
Expand Down Expand Up @@ -291,10 +288,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
step=range_index_meta['step'],
name=range_index_meta['name']
)
if skiprows is not None:
idx = idx[skiprows:]
if num_rows is not None:
idx = idx[:num_rows]

df._index = idx
elif set(index_col).issubset(column_names):
index_data = df[index_col]
Expand Down
20 changes: 0 additions & 20 deletions python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,6 @@ def read_parquet(
columns=None,
filters=None,
row_groups=None,
skiprows=None,
num_rows=None,
strings_to_categorical=False,
use_pandas_metadata=True,
use_python_file_object=True,
Expand All @@ -371,18 +369,6 @@ def read_parquet(
):
"""{docstring}"""

if skiprows is not None:
warnings.warn(
"skiprows is deprecated and will be removed.",
FutureWarning,
)

if num_rows is not None:
warnings.warn(
"num_rows is deprecated and will be removed.",
FutureWarning,
)

# Do not allow the user to set file-opening options
# when `use_python_file_object=False` is specified
if use_python_file_object is False:
Expand Down Expand Up @@ -485,8 +471,6 @@ def read_parquet(
*args,
columns=columns,
row_groups=row_groups,
skiprows=skiprows,
num_rows=num_rows,
strings_to_categorical=strings_to_categorical,
use_pandas_metadata=use_pandas_metadata,
partition_keys=partition_keys,
Expand Down Expand Up @@ -575,8 +559,6 @@ def _read_parquet(
engine,
columns=None,
row_groups=None,
skiprows=None,
num_rows=None,
strings_to_categorical=None,
use_pandas_metadata=None,
*args,
Expand All @@ -589,8 +571,6 @@ def _read_parquet(
filepaths_or_buffers,
columns=columns,
row_groups=row_groups,
skiprows=skiprows,
num_rows=num_rows,
strings_to_categorical=strings_to_categorical,
use_pandas_metadata=use_pandas_metadata,
)
Expand Down
Loading

0 comments on commit 2e13e5f

Please sign in to comment.