Drop support for skiprows and num_rows in cudf.read_parquet (#1…

…1480) This PR removes support for `skiprows` & `num_rows` in parquet reader. A continuation of #11218 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Vukasin Milovanovic (https://github.com/vuule) URL: #11480
rapidsai · Aug 5, 2022 · 2e13e5f · 2e13e5f
1 parent 493d96b
commit 2e13e5f
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 183 deletions.
diff --git a/python/cudf/cudf/_lib/cpp/io/parquet.pxd b/python/cudf/cudf/_lib/cpp/io/parquet.pxd
@@ -20,17 +20,13 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         data_type get_timestamp_type() except +
         bool is_enabled_convert_strings_to_categories() except +
         bool is_enabled_use_pandas_metadata() except +
-        size_type get_skip_rows() except +
-        size_type get_num_rows() except +
 
         # setter
 
         void set_columns(vector[string] col_names) except +
         void set_row_groups(vector[vector[size_type]] row_grp) except +
         void enable_convert_strings_to_categories(bool val) except +
         void enable_use_pandas_metadata(bool val) except +
-        void set_skip_rows(size_type val) except +
-        void set_num_rows(size_type val) except +
         void set_timestamp_type(data_type type) except +
 
         @staticmethod
@@ -55,8 +51,6 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         parquet_reader_options_builder& use_pandas_metadata(
             bool val
         ) except +
-        parquet_reader_options_builder& skip_rows(size_type val) except +
-        parquet_reader_options_builder& num_rows(size_type val) except +
         parquet_reader_options_builder& timestamp_type(
             data_type type
         ) except +

diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
@@ -125,7 +125,7 @@ def _parse_metadata(meta):
 
 
 cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
-                   skiprows=None, num_rows=None, strings_to_categorical=False,
+                   strings_to_categorical=False,
                    use_pandas_metadata=True):
     """
     Cython function to call into libcudf API, see `read_parquet`.
@@ -151,8 +151,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
 
     cdef bool cpp_strings_to_categorical = strings_to_categorical
     cdef bool cpp_use_pandas_metadata = use_pandas_metadata
-    cdef size_type cpp_skiprows = skiprows if skiprows is not None else 0
-    cdef size_type cpp_num_rows = num_rows if num_rows is not None else -1
+
     cdef vector[vector[size_type]] cpp_row_groups
     cdef data_type cpp_timestamp_type = cudf_types.data_type(
         cudf_types.type_id.EMPTY
@@ -168,8 +167,6 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
         .row_groups(cpp_row_groups)
         .convert_strings_to_categories(cpp_strings_to_categorical)
         .use_pandas_metadata(cpp_use_pandas_metadata)
-        .skip_rows(cpp_skiprows)
-        .num_rows(cpp_num_rows)
         .timestamp_type(cpp_timestamp_type)
         .build()
     )
@@ -291,10 +288,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
                     step=range_index_meta['step'],
                     name=range_index_meta['name']
                 )
-                if skiprows is not None:
-                    idx = idx[skiprows:]
-                if num_rows is not None:
-                    idx = idx[:num_rows]
+
             df._index = idx
         elif set(index_col).issubset(column_names):
             index_data = df[index_col]

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
@@ -359,8 +359,6 @@ def read_parquet(
     columns=None,
     filters=None,
     row_groups=None,
-    skiprows=None,
-    num_rows=None,
     strings_to_categorical=False,
     use_pandas_metadata=True,
     use_python_file_object=True,
@@ -371,18 +369,6 @@ def read_parquet(
 ):
     """{docstring}"""
 
-    if skiprows is not None:
-        warnings.warn(
-            "skiprows is deprecated and will be removed.",
-            FutureWarning,
-        )
-
-    if num_rows is not None:
-        warnings.warn(
-            "num_rows is deprecated and will be removed.",
-            FutureWarning,
-        )
-
     # Do not allow the user to set file-opening options
     # when `use_python_file_object=False` is specified
     if use_python_file_object is False:
@@ -485,8 +471,6 @@ def read_parquet(
         *args,
         columns=columns,
         row_groups=row_groups,
-        skiprows=skiprows,
-        num_rows=num_rows,
         strings_to_categorical=strings_to_categorical,
         use_pandas_metadata=use_pandas_metadata,
         partition_keys=partition_keys,
@@ -575,8 +559,6 @@ def _read_parquet(
     engine,
     columns=None,
     row_groups=None,
-    skiprows=None,
-    num_rows=None,
     strings_to_categorical=None,
     use_pandas_metadata=None,
     *args,
@@ -589,8 +571,6 @@ def _read_parquet(
             filepaths_or_buffers,
             columns=columns,
             row_groups=row_groups,
-            skiprows=skiprows,
-            num_rows=num_rows,
             strings_to_categorical=strings_to_categorical,
             use_pandas_metadata=use_pandas_metadata,
         )