From 3a0c843efac92818d29065e4a597c69925575f5e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 16 Jul 2024 15:23:39 +0000 Subject: [PATCH] Introduce dedicated options --- python/cudf/cudf/_lib/json.pyx | 2 +- python/cudf/cudf/io/parquet.py | 2 +- python/cudf/cudf/options.py | 26 ++++++++++++++++++++++++++ python/cudf/cudf/tests/test_json.py | 2 +- python/cudf/cudf/tests/test_parquet.py | 2 +- 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 853dd431099..03bf9ed8b75 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -99,7 +99,7 @@ cpdef read_json(object filepaths_or_buffers, else: raise TypeError("`dtype` must be 'list like' or 'dict'") - if cudf.get_option("mode.pandas_compatible") and lines: + if cudf.get_option("io.json.low_memory") and lines: res_cols, res_col_names, res_child_names = plc.io.json.chunked_read_json( plc.io.SourceInfo(filepaths_or_buffers), processed_dtypes, diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index fd0792b5edb..02b26ea1c01 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -916,7 +916,7 @@ def _read_parquet( "cudf engine doesn't support the " f"following positional arguments: {list(args)}" ) - if cudf.get_option("mode.pandas_compatible"): + if cudf.get_option("io.parquet.low_memory"): return libparquet.ParquetReader( filepaths_or_buffers, columns=columns, diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index 1f539e7f266..94e73021cec 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -325,6 +325,32 @@ def _integer_and_none_validator(val): _make_contains_validator([False, True]), ) +_register_option( + "io.parquet.low_memory", + False, + textwrap.dedent( + """ + If set to `False`, reads entire parquet in one go. + If set to `True`, reads parquet file in chunks. + \tValid values are True or False. Default is False. + """ + ), + _make_contains_validator([False, True]), +) + +_register_option( + "io.json.low_memory", + False, + textwrap.dedent( + """ + If set to `False`, reads entire json in one go. + If set to `True`, reads json file in chunks. + \tValid values are True or False. Default is False. + """ + ), + _make_contains_validator([False, True]), +) + class option_context(ContextDecorator): """ diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 7771afd692f..c81c2d1d94b 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -1441,6 +1441,6 @@ def test_chunked_json_reader(): df.to_json(buf, lines=True, orient="records", engine="cudf") buf.seek(0) df = df.to_pandas() - with cudf.option_context("mode.pandas_compatible", True): + with cudf.option_context("io.json.low_memory", True): gdf = cudf.read_json(buf, lines=True) assert_eq(df, gdf) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index ff0c9040737..ecb7fd44422 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -3772,6 +3772,6 @@ def test_parquet_reader_pandas_compatibility(): ) buffer = BytesIO() df.to_parquet(buffer) - with cudf.option_context("mode.pandas_compatible", True): + with cudf.option_context("io.parquet.low_memory", True): expected = cudf.read_parquet(buffer) assert_eq(expected, df)