From c824fee8181d06ba1c05a5de4d4ebc0a52027753 Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Thu, 13 Oct 2022 16:21:09 -0700 Subject: [PATCH] Add clear indication of non-GPU accelerated parameters in read_json docstring (#11825) This PR moves the "pandas engine only" arguments to the end of the optional argument list of the docstring. This is the way an `admonition` will look like: Screen Shot 2022-10-11 at 12 06 50 PM Authors: - Gregory Kimball (https://github.com/GregoryKimball) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Lawrence Mitchell (https://github.com/wence-) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11825 --- python/cudf/cudf/utils/ioutils.py | 80 ++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 366b2e0ebae..0a0647f1297 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -451,7 +451,7 @@ """ doc_to_orc = docfmt_partial(docstring=_docstring_to_orc) -_docstring_read_json = """ +_docstring_read_json = r""" Load a JSON dataset into a DataFrame Parameters @@ -466,8 +466,13 @@ engine : {{ 'auto', 'cudf', 'cudf_experimental', 'pandas' }}, default 'auto' Parser engine to use. If 'auto' is passed, the engine will be automatically selected based on the other parameters. -orient : string, - Indication of expected JSON string format (pandas engine only). +orient : string + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Indication of expected JSON string format. Compatible JSON strings can be produced by ``to_json()`` with a corresponding orient value. The set of possible orients is: @@ -500,12 +505,23 @@ typ : type of object to recover (series or frame), default 'frame' With cudf engine, only frame output is supported. dtype : boolean or dict, default True - If True, infer dtypes, if a dict of column to dtype, then use those, - if False, then don't infer dtypes at all, applies only to the data. + If True, infer dtypes for all columns; if False, then don't infer dtypes at all, + if a dict, provide a mapping from column names to their respective dtype (any missing + columns will have their dtype inferred). Applies only to the data. convert_axes : boolean, default True - Try to convert the axes to the proper dtypes (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Try to convert the axes to the proper dtypes. convert_dates : boolean, default True - List of columns to parse for dates (pandas engine only); If True, then try + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + List of columns to parse for dates; If True, then try to parse datelike columns default is True; a column label is datelike if * it ends with ``'_at'``, @@ -514,27 +530,57 @@ * it is ``'modified'``, or * it is ``'date'`` keep_default_dates : boolean, default True - If parsing dates, parse the default datelike columns (pandas engine only) + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + If parsing dates, parse the default datelike columns. numpy : boolean, default False - Direct decoding to numpy arrays (pandas engine only). Supports numeric + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Direct decoding to numpy arrays. Supports numeric data only, but non-numeric column and index labels are supported. Note also that the JSON ordering MUST be the same for each term if numpy=True. precise_float : boolean, default False + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + Set to enable usage of higher precision (strtod) function when decoding string to double values (pandas engine only). Default (False) is to use fast but less precise builtin functionality date_unit : string, default None - The timestamp unit to detect if converting dates (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + The timestamp unit to detect if converting dates. The default behavior is to try and detect the correct precision, but if this is not desired then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, milliseconds, microseconds or nanoseconds. encoding : str, default is 'utf-8' + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + The encoding to use to decode py3 bytes. With cudf engine, only utf-8 is supported. lines : boolean, default False Read the file as a json object per line. chunksize : integer, default None - Return JsonReader object for iteration (pandas engine only). + + .. admonition:: Not GPU-accelerated + + This parameter is only supported with ``engine='pandas'``. + + Return JsonReader object for iteration. See the `line-delimited json docs `_ for more information on ``chunksize``. @@ -547,12 +593,22 @@ otherwise. If using 'zip', the ZIP file must contain only one data file to be read in. Set to None for no decompression. byte_range : list or tuple, default None - Byte range within the input file to be read (cudf engine only). + + .. admonition:: GPU-accelerated + + This parameter is only supported with ``engine='cudf'``. + + Byte range within the input file to be read. The first number is the offset in bytes, the second number is the range size in bytes. Set the size to zero to read all data after the offset location. Reads the row that starts before or at the end of the range, even if it ends after the end of the range. keep_quotes : bool, default False + + .. admonition:: GPU-accelerated experimental feature + + This parameter is only supported with ``engine='cudf_experimental'``. + This parameter is only supported in ``cudf_experimental`` engine. If `True`, any string values are read literally (and wrapped in an additional set of quotes).