From e89017b9a543e75b2c86b7ed0cfd6b0e7bda6b65 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Sun, 24 Mar 2024 23:19:04 -0400 Subject: [PATCH 1/9] note on pandas conpat in interleave_columns --- python/cudf/cudf/core/dataframe.py | 6 +++++- python/cudf/cudf/tests/test_reshape.py | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 2a4f93c1716..3d913d74cd4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4379,7 +4379,7 @@ def query(self, expr, local_dict=None): 1 2018-10-08 .. pandas-compat:: - **DataFrame.query** + :func:`pandas.DataFrame.query` One difference from pandas is that ``query`` currently only supports numeric, datetime, timedelta, or bool dtypes. @@ -7541,6 +7541,10 @@ def interleave_columns(self): Returns ------- The interleaved columns as a single column + + .. pandas-compat:: + This method does not exist in pandas but it can be run + as `df.T.melt()["value"]`. """ if ("category" == self.dtypes).any(): raise ValueError( diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index d618669755d..1d15d94f6f0 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -302,9 +302,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype): else: got = gdf.interleave_columns() - expect = pd.Series(np.vstack(pdf.to_numpy()).reshape((-1,))).astype( - dtype - ) + expect = pdf.T.melt()["value"].astype(dtype) assert_eq(expect, got) From 5606d0325945484cc491912f4e9a14c22f5d02dd Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Sun, 24 Mar 2024 23:36:47 -0400 Subject: [PATCH 2/9] use .to_numpy method --- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/tests/test_reshape.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 3d913d74cd4..98a8eee86d4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7544,7 +7544,7 @@ def interleave_columns(self): .. pandas-compat:: This method does not exist in pandas but it can be run - as `df.T.melt()["value"]`. + as `pd.Series(np.vstack(df.to_numpy()).reshape((-1,)))`. """ if ("category" == self.dtypes).any(): raise ValueError( diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 1d15d94f6f0..d618669755d 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -302,7 +302,9 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype): else: got = gdf.interleave_columns() - expect = pdf.T.melt()["value"].astype(dtype) + expect = pd.Series(np.vstack(pdf.to_numpy()).reshape((-1,))).astype( + dtype + ) assert_eq(expect, got) From 26b237b2dbf007f887b9cc4c063612f58b47ac00 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Sun, 24 Mar 2024 23:51:39 -0400 Subject: [PATCH 3/9] add func link in couple other places --- docs/cudf/source/developer_guide/documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cudf/source/developer_guide/documentation.md b/docs/cudf/source/developer_guide/documentation.md index c8da689479c..f41c7ac88f9 100644 --- a/docs/cudf/source/developer_guide/documentation.md +++ b/docs/cudf/source/developer_guide/documentation.md @@ -164,7 +164,7 @@ The directive should be used inside docstrings like so: Docstring body .. pandas-compat:: - **$API_NAME** + :func:`pandas.API_NAME` Explanation of differences ``` From af752c6cd0e1c266a97c2ecf7d9f4056dad7e70b Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Mon, 25 Mar 2024 00:00:17 -0400 Subject: [PATCH 4/9] use intersphinx mapping at top of doc string --- python/cudf/cudf/core/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 98a8eee86d4..0974a53a925 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4320,7 +4320,7 @@ def query(self, expr, local_dict=None): """ Query with a boolean expression using Numba to compile a GPU kernel. - See pandas.DataFrame.query. + See :func:`pandas.DataFrame.query`. Parameters ---------- From ad372347aa0bec064864119d87427d59f36d7fb4 Mon Sep 17 00:00:00 2001 From: raybellwaves Date: Sun, 14 Apr 2024 05:02:36 +0000 Subject: [PATCH 5/9] add correct mapping path and use :meth: --- docs/cudf/source/conf.py | 5 ++++- python/cudf/cudf/core/dataframe.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index b891ff99d47..a93fc32c1a1 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -308,7 +308,10 @@ def clean_all_xml_files(path): "dlpack": ("https://dmlc.github.io/dlpack/latest/", None), "nanoarrow": ("https://arrow.apache.org/nanoarrow/latest", None), "numpy": ("https://numpy.org/doc/stable", None), - "pandas": ("https://pandas.pydata.org/docs/", None), + "pandas": ( + "https://pandas.pydata.org/pandas-docs/stable/", + "https://pandas.pydata.org/pandas-docs/stable/objects.inv", + ), "pyarrow": ("https://arrow.apache.org/docs/", None), "python": ("https://docs.python.org/3", None), "rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None), diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0974a53a925..a1c718b6302 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4320,7 +4320,7 @@ def query(self, expr, local_dict=None): """ Query with a boolean expression using Numba to compile a GPU kernel. - See :func:`pandas.DataFrame.query`. + See :meth:`pandas.DataFrame.query`. Parameters ---------- From edf4a65d7bac41d624cae54d1ac1135a2bce08c3 Mon Sep 17 00:00:00 2001 From: raybellwaves Date: Sun, 14 Apr 2024 05:05:08 +0000 Subject: [PATCH 6/9] switch pandas-compat back in documentation.md --- docs/cudf/source/developer_guide/documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cudf/source/developer_guide/documentation.md b/docs/cudf/source/developer_guide/documentation.md index f41c7ac88f9..c8da689479c 100644 --- a/docs/cudf/source/developer_guide/documentation.md +++ b/docs/cudf/source/developer_guide/documentation.md @@ -164,7 +164,7 @@ The directive should be used inside docstrings like so: Docstring body .. pandas-compat:: - :func:`pandas.API_NAME` + **$API_NAME** Explanation of differences ``` From eccb53a9e1f7f99d0cd27054953203e7d2634ab8 Mon Sep 17 00:00:00 2001 From: raybellwaves Date: Sun, 14 Apr 2024 05:05:37 +0000 Subject: [PATCH 7/9] try method --- python/cudf/cudf/core/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index a1c718b6302..f8abdc5e7e3 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4379,7 +4379,7 @@ def query(self, expr, local_dict=None): 1 2018-10-08 .. pandas-compat:: - :func:`pandas.DataFrame.query` + :meth:`pandas.DataFrame.query` One difference from pandas is that ``query`` currently only supports numeric, datetime, timedelta, or bool dtypes. From f8832485c11103cf6b1239ad96f2f4f07c492991 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Sun, 21 Apr 2024 21:46:50 -0400 Subject: [PATCH 8/9] apply updated conf --- docs/cudf/source/conf.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index a93fc32c1a1..bcefa3fbdf8 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -142,8 +142,6 @@ def clean_all_xml_files(path): tree.write(fn) - - # Breathe Configuration breathe_projects = {"libcudf": "../../../cpp/doxygen/xml"} for project_path in breathe_projects.values(): @@ -187,7 +185,9 @@ def clean_all_xml_files(path): # The short X.Y version. version = f"{CUDF_VERSION.major:02}.{CUDF_VERSION.minor:02}" # The full version. -release = f"{CUDF_VERSION.major:02}.{CUDF_VERSION.minor:02}.{CUDF_VERSION.micro:02}" +release = ( + f"{CUDF_VERSION.major:02}.{CUDF_VERSION.minor:02}.{CUDF_VERSION.micro:02}" +) # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -310,7 +310,7 @@ def clean_all_xml_files(path): "numpy": ("https://numpy.org/doc/stable", None), "pandas": ( "https://pandas.pydata.org/pandas-docs/stable/", - "https://pandas.pydata.org/pandas-docs/stable/objects.inv", + None, ), "pyarrow": ("https://arrow.apache.org/docs/", None), "python": ("https://docs.python.org/3", None), @@ -383,7 +383,7 @@ def _generate_namespaces(namespaces): "type_id", # Unknown base types "int32_t", - "void" + "void", } @@ -451,9 +451,14 @@ def _cached_intersphinx_lookup(env, node, contnode): def on_missing_reference(app, env, node, contnode): # These variables are defined outside the function to speed up the build. - global _all_namespaces, _names_to_skip_in_cpp, \ - _names_to_skip_in_pylibcudf, _intersphinx_extra_prefixes, \ - _domain_objects, _prefixed_domain_objects, _intersphinx_cache + global \ + _all_namespaces, \ + _names_to_skip_in_cpp, \ + _names_to_skip_in_pylibcudf, \ + _intersphinx_extra_prefixes, \ + _domain_objects, \ + _prefixed_domain_objects, \ + _intersphinx_cache # Precompute and cache domains for faster lookups if _domain_objects is None: From aeab0ba8f1a3800482b1db20af19002ce30ea5a3 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Sun, 21 Apr 2024 21:53:50 -0400 Subject: [PATCH 9/9] DOC: add interleave_columns to pandas-compat --- python/cudf/cudf/core/dataframe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f8abdc5e7e3..66090525bd2 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4379,7 +4379,7 @@ def query(self, expr, local_dict=None): 1 2018-10-08 .. pandas-compat:: - :meth:`pandas.DataFrame.query` + **DataFrame.query** One difference from pandas is that ``query`` currently only supports numeric, datetime, timedelta, or bool dtypes. @@ -7543,8 +7543,10 @@ def interleave_columns(self): The interleaved columns as a single column .. pandas-compat:: + **DataFrame.interleave_columns** + This method does not exist in pandas but it can be run - as `pd.Series(np.vstack(df.to_numpy()).reshape((-1,)))`. + as ``pd.Series(np.vstack(df.to_numpy()).reshape((-1,)))``. """ if ("category" == self.dtypes).any(): raise ValueError(