Introduce dedicated options for low memory readers (#16289)

This PR disables low memory readers by default in `cudf.pandas` and instead gives a provision to enable them with dedicated options. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: #16289
rapidsai · Jul 16, 2024 · 3418f91 · 3418f91
1 parent a6de6cc
commit 3418f91
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 4 deletions.
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
@@ -99,7 +99,7 @@ cpdef read_json(object filepaths_or_buffers,
         else:
             raise TypeError("`dtype` must be 'list like' or 'dict'")
 
-    if cudf.get_option("mode.pandas_compatible") and lines:
+    if cudf.get_option("io.json.low_memory") and lines:
         res_cols, res_col_names, res_child_names = plc.io.json.chunked_read_json(
             plc.io.SourceInfo(filepaths_or_buffers),
             processed_dtypes,

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
@@ -916,7 +916,7 @@ def _read_parquet(
                 "cudf engine doesn't support the "
                 f"following positional arguments: {list(args)}"
             )
-        if cudf.get_option("mode.pandas_compatible"):
+        if cudf.get_option("io.parquet.low_memory"):
             return libparquet.ParquetReader(
                 filepaths_or_buffers,
                 columns=columns,

diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
@@ -325,6 +325,32 @@ def _integer_and_none_validator(val):
     _make_contains_validator([False, True]),
 )
 
+_register_option(
+    "io.parquet.low_memory",
+    False,
+    textwrap.dedent(
+        """
+        If set to `False`, reads entire parquet in one go.
+        If set to `True`, reads parquet file in chunks.
+        \tValid values are True or False. Default is False.
+    """
+    ),
+    _make_contains_validator([False, True]),
+)
+
+_register_option(
+    "io.json.low_memory",
+    False,
+    textwrap.dedent(
+        """
+        If set to `False`, reads entire json in one go.
+        If set to `True`, reads json file in chunks.
+        \tValid values are True or False. Default is False.
+    """
+    ),
+    _make_contains_validator([False, True]),
+)
+
 
 class option_context(ContextDecorator):
     """

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
@@ -1441,6 +1441,6 @@ def test_chunked_json_reader():
     df.to_json(buf, lines=True, orient="records", engine="cudf")
     buf.seek(0)
     df = df.to_pandas()
-    with cudf.option_context("mode.pandas_compatible", True):
+    with cudf.option_context("io.json.low_memory", True):
         gdf = cudf.read_json(buf, lines=True)
     assert_eq(df, gdf)
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
@@ -3772,6 +3772,6 @@ def test_parquet_reader_pandas_compatibility():
     )
     buffer = BytesIO()
     df.to_parquet(buffer)
-    with cudf.option_context("mode.pandas_compatible", True):
+    with cudf.option_context("io.parquet.low_memory", True):
         expected = cudf.read_parquet(buffer)
     assert_eq(expected, df)