From 719a8a6934ae5eaeb22764d1bfdeb75893750bae Mon Sep 17 00:00:00 2001
From: Ray Bell <rayjohnbell0@gmail.com>
Date: Mon, 10 Jun 2024 15:57:17 -0400
Subject: [PATCH] Update PandasCompat.py to resolve references (#15704)

This PR allows the PandasCompat sphinx ext to contain resolved references. For example, you can now add intersphinx mapping to the content of the admonition.

### Motivation

I enjoy connecting the PyData communities and this PR allows for more opportunities to use intersphinx mapping to link back to the pandas docs.

### History

I first tried this in a previous PR (https://github.com/rapidsai/cudf/pull/15383#discussion_r1537888240) and commented here (https://github.com/rapidsai/cudf/pull/15383#issuecomment-2028451487) that I may get around to investigating this further. I finally had to time to work on this and made a bit of progress.

### Testing

I created a separate repo for this at https://github.com/raybellwaves/compatsphinxext which deploys straight to https://raybellwaves.github.io/compatsphinxext you can see it's working as expected here: https://raybellwaves.github.io/compatsphinxext/compat.html. You should be able to fork that and tinker pretty quickly.

### Further work

This could be cleaned up (for example I couldn't get the [source] to display in the admonition as I worked from the latest sphinx todo extension (https://github.com/sphinx-doc/sphinx/blob/master/sphinx/ext/todo.py)). The existing pandas-compat Admonition's could be switched to this if agreed. In addition, the documentation around how to write pandas-compat entries going forward (https://github.com/rapidsai/cudf/blob/branch-24.06/docs/cudf/source/developer_guide/documentation.md#comparing-to-pandas) will also have to be updated.

Longer term the extension could be published and used across RAPIDS libraries where there are differences in compatibility with PyData libraries e.g. pandas, network, scikit-learn to simplify linking to those dos. I'm not sure if I'll have time to work on this though.

Authors:
  - Ray Bell (https://github.com/raybellwaves)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/15704
---
 docs/cudf/source/_ext/PandasCompat.py | 143 +++++++++++++++++---------
 docs/cudf/source/conf.py              |   2 +
 2 files changed, 94 insertions(+), 51 deletions(-)

diff --git a/docs/cudf/source/_ext/PandasCompat.py b/docs/cudf/source/_ext/PandasCompat.py
index af2b16035c3..331495c981e 100644
--- a/docs/cudf/source/_ext/PandasCompat.py
+++ b/docs/cudf/source/_ext/PandasCompat.py
@@ -1,14 +1,20 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION
+# Copyright (c) 2021-2024, NVIDIA CORPORATION
 
 # This file is adapted from official sphinx tutorial for `todo` extension:
 # https://www.sphinx-doc.org/en/master/development/tutorials/todo.html
+from __future__ import annotations
+
+from typing import cast
 
 from docutils import nodes
+from docutils.nodes import Element
 from docutils.parsers.rst import Directive
-from sphinx.locale import get_translation
-from sphinx.util.docutils import SphinxDirective
-
-translator = get_translation("sphinx")
+from docutils.parsers.rst.directives.admonitions import BaseAdmonition
+from sphinx import addnodes
+from sphinx.domains import Domain
+from sphinx.errors import NoUri
+from sphinx.locale import _ as get_translation_sphinx
+from sphinx.util.docutils import SphinxDirective, new_document
 
 
 class PandasCompat(nodes.Admonition, nodes.Element):
@@ -32,7 +38,7 @@ def run(self):
         return [PandasCompatList("")]
 
 
-class PandasCompatDirective(SphinxDirective):
+class PandasCompatDirective(BaseAdmonition, SphinxDirective):
 
     # this enables content in the directive
     has_content = True
@@ -43,9 +49,11 @@ def run(self):
 
         PandasCompat_node = PandasCompat("\n".join(self.content))
         PandasCompat_node += nodes.title(
-            translator("Pandas Compatibility Note"),
-            translator("Pandas Compatibility Note"),
+            get_translation_sphinx("Pandas Compatibility Note"),
+            get_translation_sphinx("Pandas Compatibility Note"),
         )
+        PandasCompat_node["docname"] = self.env.docname
+        PandasCompat_node["target"] = targetnode
         self.state.nested_parse(
             self.content, self.content_offset, PandasCompat_node
         )
@@ -84,71 +92,104 @@ def merge_PandasCompats(app, env, docnames, other):
         )
 
 
-def process_PandasCompat_nodes(app, doctree, fromdocname):
-    if not app.config.include_pandas_compat:
-        for node in doctree.traverse(PandasCompat):
-            node.parent.remove(node)
+class PandasCompatDomain(Domain):
+    name = "pandascompat"
+    label = "pandascompat"
 
-    # Replace all PandasCompatList nodes with a list of the collected
-    # PandasCompats. Augment each PandasCompat with a backlink to the
-    # original location.
-    env = app.builder.env
+    @property
+    def pandascompats(self):
+        return self.data.setdefault("pandascompats", {})
 
-    if not hasattr(env, "PandasCompat_all_pandas_compat"):
-        env.PandasCompat_all_pandas_compat = []
+    def clear_doc(self, docname):
+        self.pandascompats.pop(docname, None)
+
+    def merge_domaindata(self, docnames, otherdata):
+        for docname in docnames:
+            self.pandascompats[docname] = otherdata["pandascompats"][docname]
+
+    def process_doc(self, env, docname, document):
+        pandascompats = self.pandascompats.setdefault(docname, [])
+        for pandascompat in document.findall(PandasCompat):
+            env.app.emit("pandascompat-defined", pandascompat)
+            pandascompats.append(pandascompat)
 
-    for node in doctree.traverse(PandasCompatList):
-        if not app.config.include_pandas_compat:
-            node.replace_self([])
-            continue
 
-        content = []
+class PandasCompatListProcessor:
+    def __init__(self, app, doctree, docname):
+        self.builder = app.builder
+        self.config = app.config
+        self.env = app.env
+        self.domain = cast(PandasCompatDomain, app.env.get_domain("pandascompat"))
+        self.document = new_document("")
+        self.process(doctree, docname)
 
-        for PandasCompat_info in env.PandasCompat_all_pandas_compat:
-            para = nodes.paragraph()
+    def process(self, doctree: nodes.document, docname: str) -> None:
+        pandascompats = [v for vals in self.domain.pandascompats.values() for v in vals]
+        for node in doctree.findall(PandasCompatList):
+            if not self.config.include_pandas_compat:
+                node.parent.remove(node)
+                continue
 
-            # Create a reference back to the original docstring
-            newnode = nodes.reference("", "")
-            innernode = nodes.emphasis(
-                translator("[source]"), translator("[source]")
-            )
-            newnode["refdocname"] = PandasCompat_info["docname"]
-            newnode["refuri"] = app.builder.get_relative_uri(
-                fromdocname, PandasCompat_info["docname"]
-            )
-            newnode["refuri"] += "#" + PandasCompat_info["target"]["refid"]
-            newnode.append(innernode)
-            para += newnode
+            content: list[Element | None] = [nodes.target()] if node.get("ids") else []
 
-            # Insert the reference node into PandasCompat node
-            # Note that this node is a deepcopy from the original copy
-            # in the docstring, so changing this does not affect that in the
-            # doc.
-            PandasCompat_info["PandasCompat"].append(para)
+            for pandascompat in pandascompats:
+                # Create a copy of the pandascompat node
+                new_pandascompat = pandascompat.deepcopy()
+                new_pandascompat["ids"].clear()
 
-            # Insert the PandasCompand node into the PandasCompatList Node
-            content.append(PandasCompat_info["PandasCompat"])
+                self.resolve_reference(new_pandascompat, docname)
+                content.append(new_pandascompat)
 
-        node.replace_self(content)
+                ref = self.create_reference(pandascompat, docname)
+                content.append(ref)
+
+            node.replace_self(content)
+
+    def create_reference(self, pandascompat, docname):
+        para = nodes.paragraph()
+        newnode = nodes.reference("", "")
+        innernode = nodes.emphasis(
+            get_translation_sphinx("[source]"), get_translation_sphinx("[source]")
+        )
+        newnode["refdocname"] = pandascompat["docname"]
+        try:
+            newnode["refuri"] = self.builder.get_relative_uri(
+                docname, pandascompat["docname"]
+            ) + "#" + pandascompat["target"]["refid"]
+        except NoUri:
+            # ignore if no URI can be determined, e.g. for LaTeX output
+            pass
+        newnode.append(innernode)
+        para += newnode
+        return para
+
+    def resolve_reference(self, todo, docname: str) -> None:
+        """Resolve references in the todo content."""
+        for node in todo.findall(addnodes.pending_xref):
+            if "refdoc" in node:
+                node["refdoc"] = docname
+
+        # Note: To resolve references, it is needed to wrap it with document node
+        self.document += todo
+        self.env.resolve_references(self.document, docname, self.builder)
+        self.document.remove(todo)
 
 
 def setup(app):
     app.add_config_value("include_pandas_compat", False, "html")
-
     app.add_node(PandasCompatList)
     app.add_node(
         PandasCompat,
         html=(visit_PandasCompat_node, depart_PandasCompat_node),
         latex=(visit_PandasCompat_node, depart_PandasCompat_node),
         text=(visit_PandasCompat_node, depart_PandasCompat_node),
+        man=(visit_PandasCompat_node, depart_PandasCompat_node),
+        texinfo=(visit_PandasCompat_node, depart_PandasCompat_node),
     )
-
-    # Sphinx directives are lower-cased
     app.add_directive("pandas-compat", PandasCompatDirective)
     app.add_directive("pandas-compat-list", PandasCompatListDirective)
-    app.connect("doctree-resolved", process_PandasCompat_nodes)
-    app.connect("env-purge-doc", purge_PandasCompats)
-    app.connect("env-merge-info", merge_PandasCompats)
+    app.add_domain(PandasCompatDomain)
+    app.connect("doctree-resolved", PandasCompatListProcessor)
 
     return {
         "version": "0.1",
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 73d8b4445d3..e9c760e288e 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -617,6 +617,8 @@ def linkcode_resolve(domain, info) -> str | None:
         f"branch-{version}/python/cudf/cudf/{fn}{linespec}"
     )
 
+# Needed for avoid build warning for PandasCompat extension
+suppress_warnings = ["myst.domains"]
 
 def setup(app):
     app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")