From a859eee4c1eafa1b8440ac5f4c878ff40c285672 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Mon, 4 Dec 2023 10:45:17 -0500
Subject: [PATCH 01/21] move scripts over to cugraph repo

---
 python/cugraph-equivariant/LICENSE            |   1 +
 python/cugraph-equivariant/README.md          |  16 ++
 .../cugraph_equivariant/VERSION               |   1 +
 .../cugraph_equivariant/__init__.py           |  14 ++
 .../cugraph_equivariant/_version.py           |  27 +++
 .../cugraph_equivariant/nn/__init__.py        |  21 +++
 .../nn/tensor_product_conv.py                 | 170 ++++++++++++++++++
 .../cugraph_equivariant/tests/conftest.py     |  30 ++++
 .../cugraph_equivariant/tests/test_scatter.py |  27 +++
 .../cugraph_equivariant/utils/__init__.py     |  18 ++
 .../cugraph_equivariant/utils/scatter.py      |  43 +++++
 python/cugraph-equivariant/pyproject.toml     |  54 ++++++
 python/cugraph-equivariant/setup.py           |  20 +++
 13 files changed, 442 insertions(+)
 create mode 120000 python/cugraph-equivariant/LICENSE
 create mode 100644 python/cugraph-equivariant/README.md
 create mode 120000 python/cugraph-equivariant/cugraph_equivariant/VERSION
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/__init__.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/_version.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
 create mode 100644 python/cugraph-equivariant/pyproject.toml
 create mode 100644 python/cugraph-equivariant/setup.py

diff --git a/python/cugraph-equivariant/LICENSE b/python/cugraph-equivariant/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/cugraph-equivariant/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/cugraph-equivariant/README.md b/python/cugraph-equivariant/README.md
new file mode 100644
index 00000000000..2875a4d8555
--- /dev/null
+++ b/python/cugraph-equivariant/README.md
@@ -0,0 +1,16 @@
+# cugraph-equivariant
+
+## Description
+
+cugraph-equivariant library provides fast symmetry-preserving (equivariant) operations and convolutional layers, to accelerate the equivariant neural networks in drug discovery and other domains.
+
+## Build from source
+
+Developers are suggested to create a conda environment that includes the runtime and test dependencies and pip install `cugraph-equivariant` in an editable mode.
+
+```bash
+# for cuda 11.8
+mamba env create -n cugraph_equivariant -f python/cugraph-equivariant/conda/cugraph_equivariant_dev_cuda-118_arch-x86_64.yaml
+conda activate cugraph_equivariant
+./build_component.sh -n cugraph-equivariant
+```
diff --git a/python/cugraph-equivariant/cugraph_equivariant/VERSION b/python/cugraph-equivariant/cugraph_equivariant/VERSION
new file mode 120000
index 00000000000..d62dc733efd
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/python/cugraph-equivariant/cugraph_equivariant/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/__init__.py
new file mode 100644
index 00000000000..59672a57073
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cugraph_equivariant._version import __git_commit__, __version__
diff --git a/python/cugraph-equivariant/cugraph_equivariant/_version.py b/python/cugraph-equivariant/cugraph_equivariant/_version.py
new file mode 100644
index 00000000000..c2e2180a6a7
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/_version.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.resources
+
+# Read VERSION file from the module that is symlinked to VERSION file
+# in the root of the repo at build time or copied to the module at
+# installation. VERSION is a separate file that allows CI build-time scripts
+# to update version info (including commit hashes) without modifying
+# source files.
+__version__ = (
+    importlib.resources.files("cugraph_equivariant")
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
+)
+__git_commit__ = ""
diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
new file mode 100644
index 00000000000..79055c90b2b
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .tensor_product_conv import FullyConnectedTensorProductConv
+
+DiffDockTensorProductConv = FullyConnectedTensorProductConv
+
+__all__ = [
+    "FullyConnectedTensorProductConv",
+    "DiffDockTensorProductConv",
+]
diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
new file mode 100644
index 00000000000..76020ed0703
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Callable, Sequence
+
+import torch
+from torch import nn
+from e3nn import o3
+from e3nn.nn import BatchNorm
+
+from cugraph_equivariant.utils import scatter_reduce
+
+try:
+    from pylibcugraphops.equivariant import TensorProduct
+
+    HAS_TP_LIB = True
+except ImportError:
+    HAS_TP_LIB = False
+
+
+class FullyConnectedTensorProductConv(nn.Module):
+    r"""Message passing layer for tensor products in DiffDock-like architectures.
+    The left operand of tensor product is the spherical harmonic representation
+    of edge vector; the right operand consists of node features in irreps.
+
+    .. math::
+        \sum_{b \in \mathcal{N}_a} Y\left(\hat{r}_{a b}\right)
+        \otimes_{\psi_{a b}} \mathbf{h}_b
+
+    where the path weights :math:`\psi_{a b}` are from user input to the forward()
+    function. For example, they can be constructed from edge embeddings and
+    scalar features:
+
+    .. math::
+        \psi_{a b} = \operatorname{MLP}
+        \left(e_{a b}, \mathbf{h}_a^0, \mathbf{h}_b^0\right)
+
+    Parameters
+    ----------
+    in_irreps : e3nn.o3.Irreps
+        Irreps for the input node features.
+
+    sh_irreps : e3nn.o3.Irreps
+        Irreps for the spherical harmonic representations of edge vectors.
+
+    out_irreps : e3nn.o3.Irreps
+        Irreps for the output.
+
+    batch_norm : bool, optional (default=True)
+        If true, batch normalization is applied.
+
+    mlp_channels : sequence of ints, optional (default=None)
+        A sequence of integers defining number of neurons in each layer in MLP
+        before the output layer. If `None`, no MLP will be added.
+
+    mlp_activation : callable, optional (default=torch.nn.GELU)
+        Activation function in MLP.
+    """
+
+    def __init__(
+        self,
+        in_irreps: o3.Irreps,
+        sh_irreps: o3.Irreps,
+        out_irreps: o3.Irreps,
+        batch_norm: bool = True,
+        mlp_channels: Optional[Sequence[int]] = None,
+        mlp_activation: Optional[Callable[..., nn.Module]] = nn.GELU,
+    ):
+        super().__init__()
+        self.in_irreps = in_irreps
+        self.out_irreps = out_irreps
+        self.sh_irreps = sh_irreps
+
+        if HAS_TP_LIB:
+            self.tp = TensorProduct(str(in_irreps), str(sh_irreps), str(out_irreps))
+        else:
+            self.tp = o3.FullyConnectedTensorProduct(
+                in_irreps, sh_irreps, out_irreps, shared_weights=False
+            )
+
+        self.batch_norm = BatchNorm(out_irreps) if batch_norm else None
+
+        if mlp_channels is not None:
+            dims = list(mlp_channels) + [self.tp.weight_numel]
+            mlp = []
+            for i in range(len(dims) - 1):
+                mlp.append(nn.Linear(dims[i], dims[i + 1]))
+                if mlp_activation is not None and i != len(dims) - 2:
+                    mlp.append(mlp_activation)
+            self.mlp = nn.Sequential(mlp)
+        else:
+            self.mlp = None
+
+    def forward(
+        self,
+        src_features: torch.Tensor,
+        edge_sh: torch.Tensor,
+        edge_scalars: torch.Tensor,  # (n_edge, n_edge_scalars)
+        graph: tuple[torch.Tensor, tuple[int, int]],  # COO, (n_src, n_dst)
+        src_scalars: Optional[torch.Tensor] = None,  # (n_src, n_src_scalars)
+        dst_scalars: Optional[torch.Tensor] = None,  # (n_dst, n_dst_scalars)
+        reduce: str = "mean",
+        edge_envelope: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """Forward pass.
+
+        Parameters
+        ----------
+        src_features : torch.Tensor
+            Source node features.
+            Shape: (num_src_nodes, in_irreps.dim)
+
+        edge_sh : torch.Tensor
+            The spherical harmonic representations of the edge vectors.
+            Shape: (num_edges, sh_irreps.dim)
+
+        edge_scalars: torch.Tensor
+            Edge embeddings.
+            Shape: (num_edges, self.tp.weight_numel) when the layer does not
+            contain MLP; (num_edges, self.mlp[0].in_features) when it does.
+
+        graph : Any
+            The graph.
+
+        src_scalars:
+        dst_scalars:
+
+        reduce : str, optional (default="mean")
+            Reduction operator. Choose between "mean" and "sum".
+
+        Returns
+        -------
+        torch.Tensor
+            Output node features.
+            Shape: (num_dst_nodes, out_irreps.dim)
+        """
+        if self.mlp is None:
+            assert self.tp.weight_numel == edge_scalars.size(-1)
+        else:
+            assert self.mlp[0].in_features == edge_scalars.size(-1)
+
+        if reduce not in ["mean", "sum"]:
+            raise ValueError(
+                f"reduce argument must be either 'mean' or 'sum', got {reduce}."
+            )
+
+        (src, dst), (num_src_nodes, num_dst_nodes) = graph
+
+        if self.mlp is not None:
+            tp_weights = self.mlp(edge_scalars)
+        else:
+            tp_weights = edge_scalars
+
+        out_tp = self.tp(src_features[src], edge_sh, tp_weights)
+        out = scatter_reduce(out_tp, dst, dim=0, dim_size=num_dst_nodes, reduce=reduce)
+
+        if self.batch_norm:
+            out = self.batch_norm(out)
+
+        return out
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
new file mode 100644
index 00000000000..dea14ebf106
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+@pytest.fixture
+def example_scatter_data():
+    src_feat = torch.Tensor([3, 1, 0, 1, 1, 2])
+    dst_indices = torch.Tensor([0, 1, 2, 2, 3, 1])
+
+    results = {
+        "sum": torch.Tensor([3., 3., 1., 1.]),
+        "mean": torch.Tensor([3., 1.5, 0.5, 1.]),
+        "prod": torch.Tensor([3., 2., 0., 1.]),
+        "amax": torch.Tensor([3., 2., 1., 1.]),
+        "amin": torch.Tensor([3., 1., 0., 1.]),
+    }
+
+    return src_feat, dst_indices, results
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
new file mode 100644
index 00000000000..415632a2013
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+from cugraph_equivariant.utils import scatter_reduce
+
+@pytest.mark.parametrize("reduce", ["sum", "mean", "prod", "amax", "amin"])
+def test_scatter_reduce(example_scatter_data, reduce):
+    device = torch.device("cuda:0")
+    src, index, out_true = example_scatter_data
+    src = src.to(device)
+    index = index.to(device)
+
+    out = scatter_reduce(src, index, dim=0, dim_size=None, reduce=reduce)
+
+    assert torch.allclose(out.cpu(), out_true[reduce])
diff --git a/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
new file mode 100644
index 00000000000..53bd64c3e6f
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .scatter import scatter_reduce
+
+__all__ = [
+    "scatter_reduce",
+]
diff --git a/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py b/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
new file mode 100644
index 00000000000..4e5a4fe474a
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch
+
+
+def broadcast(src: torch.Tensor, ref: torch.Tensor, dim: int) -> torch.Tensor:
+    size = ((1,) * dim) + (-1,) + ((1,) * (ref.dim() - dim - 1))
+    return src.view(size).expand_as(ref)
+
+
+def scatter_reduce(
+    src: torch.Tensor,
+    index: torch.Tensor,
+    dim: int = 0,
+    dim_size: Optional[int] = None,  # value of out.size(dim)
+    reduce: str = "sum",  # "sum", "prod", "mean", "amax", "amin"
+):
+    # scatter() expects index to be int64
+    index = broadcast(index, src, dim).to(torch.int64)
+
+    size = list(src.size())
+
+    if dim_size is not None:
+        assert dim_size >= int(index.max()) + 1
+        size[dim] = dim_size
+    else:
+        size[dim] = int(index.max()) + 1
+
+    out = torch.zeros(size, dtype=src.dtype, device=src.device)
+    return out.scatter_reduce_(dim, index, src, reduce, include_self=False)
diff --git a/python/cugraph-equivariant/pyproject.toml b/python/cugraph-equivariant/pyproject.toml
new file mode 100644
index 00000000000..9c7caa78f96
--- /dev/null
+++ b/python/cugraph-equivariant/pyproject.toml
@@ -0,0 +1,54 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+requires = [
+    "setuptools",
+    "wheel",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "cugraph-equivariant"
+dynamic = ["version"]
+description = "Fast GPU-based equivariant operations and convolutional layers."
+readme = { file = "README.md", content-type = "text/markdown" }
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.9"
+classifiers = [
+    "Intended Audience :: Developers",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+dependencies = [
+    "pylibcugraphops==24.2.*",
+]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/cugraph"
+Documentation = "https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph-ops/"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+
+[tool.setuptools.dynamic]
+version = {file = "cugraph_equivariant/VERSION"}
+
+[tool.setuptools.packages.find]
+include = [
+    "cugraph_equivariant*",
+    "cugraph_equivariant.*",
+]
diff --git a/python/cugraph-equivariant/setup.py b/python/cugraph-equivariant/setup.py
new file mode 100644
index 00000000000..8aa97fec0f9
--- /dev/null
+++ b/python/cugraph-equivariant/setup.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from setuptools import find_packages, setup
+
+if __name__ == "__main__":
+    packages = find_packages(include=["cugraph_equivariant*"])
+    setup(
+        package_data={key: ["VERSION"] for key in packages},
+    )

From d2186ee8f8deb48425546e3bb906d32e6424a113 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 12 Dec 2023 15:29:06 -0500
Subject: [PATCH 02/21] test conda build

---
 build.sh                                      | 13 ++++++-
 ci/build_python.sh                            |  8 +++-
 conda/recipes/cugraph-equivariant/build.sh    |  7 ++++
 conda/recipes/cugraph-equivariant/meta.yaml   | 39 +++++++++++++++++++
 .../cugraph_equivariant/tests/conftest.py     | 11 +++---
 .../cugraph_equivariant/tests/test_scatter.py |  1 +
 6 files changed, 72 insertions(+), 7 deletions(-)
 create mode 100644 conda/recipes/cugraph-equivariant/build.sh
 create mode 100644 conda/recipes/cugraph-equivariant/meta.yaml

diff --git a/build.sh b/build.sh
index fa7a4f6f363..fa9574b66dc 100755
--- a/build.sh
+++ b/build.sh
@@ -31,6 +31,7 @@ VALIDARGS="
    cugraph-service
    cugraph-pyg
    cugraph-dgl
+   cugraph-equivariant
    nx-cugraph
    cpp-mgtests
    cpp-mtmgtests
@@ -60,6 +61,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    cugraph-service            - build the cugraph-service_client and cugraph-service_server Python package
    cugraph-pyg                - build the cugraph-pyg Python package
    cugraph-dgl                - build the cugraph-dgl extensions for DGL
+   cugraph-equivariant        - build the cugraph-equivariant Python package
    nx-cugraph                 - build the nx-cugraph Python package
    cpp-mgtests                - build libcugraph and libcugraph_etl MG tests. Builds MPI communicator, adding MPI as a dependency.
    cpp-mtmgtests              - build libcugraph MTMG tests. Adds UCX as a dependency (temporary).
@@ -219,7 +221,7 @@ if hasArg uninstall; then
     # removes the latest one and leaves the others installed. build.sh uninstall
     # can be run multiple times to remove all of them, but that is not obvious.
     pip uninstall -y pylibcugraph cugraph cugraph-service-client cugraph-service-server \
-        cugraph-dgl cugraph-pyg nx-cugraph
+        cugraph-dgl cugraph-pyg cugraph-equivariant nx-cugraph
 fi
 
 if hasArg clean; then
@@ -394,6 +396,15 @@ if hasArg cugraph-dgl || hasArg all; then
     fi
 fi
 
+# Build and install the cugraph-equivariant Python package
+if hasArg cugraph-equivariant || hasArg all; then
+    if hasArg --clean; then
+        cleanPythonDir ${REPODIR}/python/cugraph-equivariant
+    else
+        python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/cugraph-equivariant
+    fi
+fi
+
 # Build and install the nx-cugraph Python package
 if hasArg nx-cugraph || hasArg all; then
     if hasArg --clean; then
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 90a40c539ff..30a089d74f9 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -19,7 +19,7 @@ echo "${version}" > VERSION
 rapids-logger "Begin py build"
 
 package_dir="python"
-for package_name in pylibcugraph cugraph nx-cugraph cugraph-pyg cugraph-dgl; do 
+for package_name in pylibcugraph cugraph nx-cugraph cugraph-pyg cugraph-dgl; do
   underscore_package_name=$(echo "${package_name}" | tr "-" "_")
   sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
 done
@@ -85,4 +85,10 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
     conda/recipes/cugraph-dgl
 fi
 
+rapids-conda-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/cugraph-equivariant
+
 rapids-upload-conda-to-s3 python
diff --git a/conda/recipes/cugraph-equivariant/build.sh b/conda/recipes/cugraph-equivariant/build.sh
new file mode 100644
index 00000000000..453a093e471
--- /dev/null
+++ b/conda/recipes/cugraph-equivariant/build.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+# This assumes the script is executed from the root of the repo directory
+
+./build.sh cugraph-equivariant
diff --git a/conda/recipes/cugraph-equivariant/meta.yaml b/conda/recipes/cugraph-equivariant/meta.yaml
new file mode 100644
index 00000000000..cf8fcd64444
--- /dev/null
+++ b/conda/recipes/cugraph-equivariant/meta.yaml
@@ -0,0 +1,39 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: cugraph-equivariant
+  version: {{ version }}
+
+source:
+  path: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  build:
+      number: {{ GIT_DESCRIBE_NUMBER }}
+      string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+
+requirements:
+  host:
+    - python
+  run:
+    - e3nn >=0.5.1
+    - pylibcugraphops ={{ minor_version }}
+    - pytorch >=2.0
+    - python
+
+tests:
+  imports:
+    - cugraph_equivariant
+
+about:
+  home: https://rapids.ai/
+  dev_url: https://github.com/rapidsai/cugraph
+  license: Apache-2.0
+  license_file: ../../../LICENSE
+  summary: GPU-accelerated equivariant convolutional layers.
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
index dea14ebf106..61dd0f50d95 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
@@ -14,17 +14,18 @@
 import pytest
 import torch
 
+
 @pytest.fixture
 def example_scatter_data():
     src_feat = torch.Tensor([3, 1, 0, 1, 1, 2])
     dst_indices = torch.Tensor([0, 1, 2, 2, 3, 1])
 
     results = {
-        "sum": torch.Tensor([3., 3., 1., 1.]),
-        "mean": torch.Tensor([3., 1.5, 0.5, 1.]),
-        "prod": torch.Tensor([3., 2., 0., 1.]),
-        "amax": torch.Tensor([3., 2., 1., 1.]),
-        "amin": torch.Tensor([3., 1., 0., 1.]),
+        "sum": torch.Tensor([3.0, 3.0, 1.0, 1.0]),
+        "mean": torch.Tensor([3.0, 1.5, 0.5, 1.0]),
+        "prod": torch.Tensor([3.0, 2.0, 0.0, 1.0]),
+        "amax": torch.Tensor([3.0, 2.0, 1.0, 1.0]),
+        "amin": torch.Tensor([3.0, 1.0, 0.0, 1.0]),
     }
 
     return src_feat, dst_indices, results
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
index 415632a2013..fdbbda52f8c 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
@@ -15,6 +15,7 @@
 import torch
 from cugraph_equivariant.utils import scatter_reduce
 
+
 @pytest.mark.parametrize("reduce", ["sum", "mean", "prod", "amax", "amin"])
 def test_scatter_reduce(example_scatter_data, reduce):
     device = torch.device("cuda:0")

From 148cb4e79efa8b28dec745d88b96235a3678dce2 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 12 Dec 2023 15:48:23 -0500
Subject: [PATCH 03/21] separate conda build into a new job

---
 .github/workflows/pr.yaml              |  7 ++++++
 ci/build_python.sh                     |  6 -----
 ci/build_python_cugraph-equivariant.sh | 31 ++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100755 ci/build_python_cugraph-equivariant.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 82c71efffdb..a976354504e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -26,6 +26,7 @@ jobs:
       - wheel-build-nx-cugraph
       - wheel-tests-nx-cugraph
       - devcontainer
+      - conda-python-build-cugraph-equivariant
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
   checks:
@@ -52,6 +53,12 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
     with:
       build_type: pull-request
+  conda-python-build-cugraph-equivariant:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    with:
+      build_type: pull-request
+      build_script: ci/build_python.sh
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 30a089d74f9..31982648884 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -85,10 +85,4 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
     conda/recipes/cugraph-dgl
 fi
 
-rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/cugraph-equivariant
-
 rapids-upload-conda-to-s3 python
diff --git a/ci/build_python_cugraph-equivariant.sh b/ci/build_python_cugraph-equivariant.sh
new file mode 100755
index 00000000000..b61c7de79f0
--- /dev/null
+++ b/ci/build_python_cugraph-equivariant.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+source rapids-env-update
+
+rapids-print-env
+
+CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+
+version=$(rapids-generate-version)
+git_commit=$(git rev-parse HEAD)
+export RAPIDS_PACKAGE_VERSION=${version}
+echo "${version}" > VERSION
+
+rapids-logger "Begin py build"
+
+package_dir="python"
+for package_name in cugraph-equivariant; do
+  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
+  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
+done
+
+rapids-conda-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/cugraph-equivariant
+
+rapids-upload-conda-to-s3 python

From 15404b59dbe7163bfa154e702bcd44f3757d70a7 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 12 Dec 2023 16:02:08 -0500
Subject: [PATCH 04/21] remove cpp channel

---
 ci/build_python_cugraph-equivariant.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ci/build_python_cugraph-equivariant.sh b/ci/build_python_cugraph-equivariant.sh
index b61c7de79f0..1eab2b09841 100755
--- a/ci/build_python_cugraph-equivariant.sh
+++ b/ci/build_python_cugraph-equivariant.sh
@@ -7,8 +7,6 @@ source rapids-env-update
 
 rapids-print-env
 
-CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-
 version=$(rapids-generate-version)
 git_commit=$(git rev-parse HEAD)
 export RAPIDS_PACKAGE_VERSION=${version}
@@ -24,7 +22,6 @@ done
 
 rapids-conda-retry mambabuild \
   --no-test \
-  --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/cugraph-equivariant
 

From 3c58d7c5c197066411acc325f1da0335ae776547 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 12 Dec 2023 16:09:08 -0500
Subject: [PATCH 05/21] fix workflow

---
 .github/workflows/pr.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a976354504e..f65181e092a 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -58,7 +58,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
     with:
       build_type: pull-request
-      build_script: ci/build_python.sh
+      build_script: ci/build_python_cugraph-equivariant.sh
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit

From 834c1ba5ad63ec89614c6dd34962a1c9bf2564b6 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Wed, 13 Dec 2023 19:53:04 -0500
Subject: [PATCH 06/21] build wheel

---
 .github/workflows/pr.yaml                   | 7 +++++++
 ci/build_wheel.sh                           | 2 +-
 ci/build_wheel_cugraph-equivariant.sh       | 6 ++++++
 conda/recipes/cugraph-equivariant/meta.yaml | 2 --
 4 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100755 ci/build_wheel_cugraph-equivariant.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f65181e092a..17ca3d5e333 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -27,6 +27,7 @@ jobs:
       - wheel-tests-nx-cugraph
       - devcontainer
       - conda-python-build-cugraph-equivariant
+      - wheel-build-cugraph-equivariant
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
   checks:
@@ -134,6 +135,12 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_nx-cugraph.sh
+  wheel-build-cugraph-equivariant:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    with:
+      build_type: pull-request
+      script: ci/build_wheel_cugraph-equivariant.sh
   devcontainer:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 163520ea1da..c81f0ecf423 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -55,7 +55,7 @@ cd "${package_dir}"
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should not have auditwheel run on them.
-if [[ ${package_name} == "nx-cugraph" ]]; then
+if [[ ${package_name} == "nx-cugraph" || ${package_name} == "cugraph-equivariant" ]]; then
     RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist
 else
     mkdir -p final_dist
diff --git a/ci/build_wheel_cugraph-equivariant.sh b/ci/build_wheel_cugraph-equivariant.sh
new file mode 100755
index 00000000000..34cbab77297
--- /dev/null
+++ b/ci/build_wheel_cugraph-equivariant.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+./ci/build_wheel.sh cugraph-equivariant python/cugraph-equivariant
diff --git a/conda/recipes/cugraph-equivariant/meta.yaml b/conda/recipes/cugraph-equivariant/meta.yaml
index cf8fcd64444..f2a8f60eff8 100644
--- a/conda/recipes/cugraph-equivariant/meta.yaml
+++ b/conda/recipes/cugraph-equivariant/meta.yaml
@@ -22,9 +22,7 @@ requirements:
   host:
     - python
   run:
-    - e3nn >=0.5.1
     - pylibcugraphops ={{ minor_version }}
-    - pytorch >=2.0
     - python
 
 tests:

From 1f4077adafd1fbb7e842ec103730953b2051dad9 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Wed, 10 Jan 2024 17:07:09 -0500
Subject: [PATCH 07/21] year

---
 build.sh                                                        | 2 +-
 ci/build_python_cugraph-equivariant.sh                          | 2 +-
 ci/build_wheel_cugraph-equivariant.sh                           | 2 +-
 conda/recipes/cugraph-equivariant/build.sh                      | 2 +-
 conda/recipes/cugraph-equivariant/meta.yaml                     | 2 +-
 python/cugraph-equivariant/cugraph_equivariant/__init__.py      | 2 +-
 python/cugraph-equivariant/cugraph_equivariant/_version.py      | 2 +-
 python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py   | 2 +-
 .../cugraph_equivariant/nn/tensor_product_conv.py               | 2 +-
 .../cugraph-equivariant/cugraph_equivariant/tests/conftest.py   | 2 +-
 .../cugraph_equivariant/tests/test_scatter.py                   | 2 +-
 .../cugraph-equivariant/cugraph_equivariant/utils/__init__.py   | 2 +-
 python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py | 2 +-
 python/cugraph-equivariant/setup.py                             | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/build.sh b/build.sh
index a025f107977..82de45ca9fb 100755
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 # cugraph build script
 
diff --git a/ci/build_python_cugraph-equivariant.sh b/ci/build_python_cugraph-equivariant.sh
index 1eab2b09841..b48d203a210 100755
--- a/ci/build_python_cugraph-equivariant.sh
+++ b/ci/build_python_cugraph-equivariant.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/ci/build_wheel_cugraph-equivariant.sh b/ci/build_wheel_cugraph-equivariant.sh
index 34cbab77297..fcc8e0f774c 100755
--- a/ci/build_wheel_cugraph-equivariant.sh
+++ b/ci/build_wheel_cugraph-equivariant.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
diff --git a/conda/recipes/cugraph-equivariant/build.sh b/conda/recipes/cugraph-equivariant/build.sh
index 453a093e471..f0ff1688b55 100644
--- a/conda/recipes/cugraph-equivariant/build.sh
+++ b/conda/recipes/cugraph-equivariant/build.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 # This assumes the script is executed from the root of the repo directory
 
diff --git a/conda/recipes/cugraph-equivariant/meta.yaml b/conda/recipes/cugraph-equivariant/meta.yaml
index f2a8f60eff8..a952812f845 100644
--- a/conda/recipes/cugraph-equivariant/meta.yaml
+++ b/conda/recipes/cugraph-equivariant/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
diff --git a/python/cugraph-equivariant/cugraph_equivariant/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/__init__.py
index 59672a57073..20507bd9329 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/__init__.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/_version.py b/python/cugraph-equivariant/cugraph_equivariant/_version.py
index c2e2180a6a7..31a707bb17e 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/_version.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/_version.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
index 79055c90b2b..8f4d8de0042 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index 76020ed0703..1f74920a8bf 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
index 61dd0f50d95..c7c6bad07db 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
index fdbbda52f8c..ff8048468ee 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_scatter.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py b/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
index 53bd64c3e6f..b4acfe8d090 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/utils/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py b/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
index 4e5a4fe474a..45cc541fc7b 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/utils/scatter.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph-equivariant/setup.py b/python/cugraph-equivariant/setup.py
index 8aa97fec0f9..acd0df3f717 100644
--- a/python/cugraph-equivariant/setup.py
+++ b/python/cugraph-equivariant/setup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

From 6462cc37b138d37c68da831773a94dc891624d46 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Fri, 12 Jan 2024 00:51:13 -0500
Subject: [PATCH 08/21] revise mlp support

---
 .../nn/tensor_product_conv.py                 | 77 +++++++++++++++----
 .../tests/test_tensor_product_conv.py         | 70 +++++++++++++++++
 2 files changed, 132 insertions(+), 15 deletions(-)
 create mode 100644 python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index 1f74920a8bf..9cabd519899 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -65,6 +65,14 @@ class FullyConnectedTensorProductConv(nn.Module):
 
     mlp_activation : callable, optional (default=torch.nn.GELU)
         Activation function in MLP.
+
+    mlp_fast_first_layer : bool, optional (default=False)
+        When enabled, for the first layer of MLP, the module performs GEMMs on
+        individual components (i.e., edge embeddings, scalar features of source
+        and destinations nodes) of the weights before indexing and concatenation,
+        leading to a lower complexity in most use cases. This option requires
+        users to explicitly pass in `src_scalars` and `dst_scalars` in
+        `forward()` call.
     """
 
     def __init__(
@@ -75,6 +83,7 @@ def __init__(
         batch_norm: bool = True,
         mlp_channels: Optional[Sequence[int]] = None,
         mlp_activation: Optional[Callable[..., nn.Module]] = nn.GELU,
+        mlp_fast_first_layer: bool = False,
     ):
         super().__init__()
         self.in_irreps = in_irreps
@@ -90,6 +99,14 @@ def __init__(
 
         self.batch_norm = BatchNorm(out_irreps) if batch_norm else None
 
+        if mlp_fast_first_layer:
+            assert mlp_channels is not None
+            assert mlp_channels[0] % 3 == 0
+            self.n_scalars = int(mlp_channels[0] / 3)
+        else:
+            self.n_scalars = None
+        self.mlp_fast_first_layer = mlp_fast_first_layer
+
         if mlp_channels is not None:
             dims = list(mlp_channels) + [self.tp.weight_numel]
             mlp = []
@@ -105,10 +122,10 @@ def forward(
         self,
         src_features: torch.Tensor,
         edge_sh: torch.Tensor,
-        edge_scalars: torch.Tensor,  # (n_edge, n_edge_scalars)
+        edge_emb: torch.Tensor,
         graph: tuple[torch.Tensor, tuple[int, int]],  # COO, (n_src, n_dst)
-        src_scalars: Optional[torch.Tensor] = None,  # (n_src, n_src_scalars)
-        dst_scalars: Optional[torch.Tensor] = None,  # (n_dst, n_dst_scalars)
+        src_scalars: Optional[torch.Tensor] = None,
+        dst_scalars: Optional[torch.Tensor] = None,
         reduce: str = "mean",
         edge_envelope: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
@@ -124,20 +141,31 @@ def forward(
             The spherical harmonic representations of the edge vectors.
             Shape: (num_edges, sh_irreps.dim)
 
-        edge_scalars: torch.Tensor
-            Edge embeddings.
-            Shape: (num_edges, self.tp.weight_numel) when the layer does not
-            contain MLP; (num_edges, self.mlp[0].in_features) when it does.
+        edge_emb: torch.Tensor
+            Edge embeddings that are fed into MLPs to generate tensor product weights.
+            Shape: (num_edges, dim), where `dim` should equal to:
+            - `tp.weight_numel` when the layer does not contain MLPs.
+            - `num_scalars` when `mlp_fast_first_layer` enabled.
+            - `mlp_channels[0]` otherwise.
 
         graph : Any
             The graph.
 
-        src_scalars:
-        dst_scalars:
+        src_scalars: torch.Tensor, optional
+            Scalar features of source nodes.
+            Shape: (num_src_nodes, n_scalars)
+
+        dst_scalars: torch.Tensor, optional
+            Scalar features of destination nodes.
+            Shape: (num_dst_nodes, n_scalars)
 
         reduce : str, optional (default="mean")
             Reduction operator. Choose between "mean" and "sum".
 
+        edge_envelope: torch.Tensor, optional
+            Edge envelope.
+            Shape: (num_edges,)
+
         Returns
         -------
         torch.Tensor
@@ -145,9 +173,14 @@ def forward(
             Shape: (num_dst_nodes, out_irreps.dim)
         """
         if self.mlp is None:
-            assert self.tp.weight_numel == edge_scalars.size(-1)
+            assert self.tp.weight_numel == edge_emb.size(-1)
         else:
-            assert self.mlp[0].in_features == edge_scalars.size(-1)
+            if self.mlp_fast_first_layer:
+                assert edge_emb.size(-1) == self.n_scalars
+                assert src_scalars.size(-1) == self.n_scalars
+                assert dst_scalars.size(-1) == self.n_scalars
+            else:
+                assert self.mlp[0].in_features == edge_emb.size(-1)
 
         if reduce not in ["mean", "sum"]:
             raise ValueError(
@@ -157,12 +190,26 @@ def forward(
         (src, dst), (num_src_nodes, num_dst_nodes) = graph
 
         if self.mlp is not None:
-            tp_weights = self.mlp(edge_scalars)
+            if self.mlp_fast_first_layer:
+                w_edge, w_src, w_dst = torch.chunk(self.mlp[0].weight, chunks=3, dim=-1)
+                tp_weights = (
+                    edge_emb @ w_edge.T
+                    + (src_scalars @ w_src.T)[src]
+                    + (dst_scalars @ w_dst.T)[dst]
+                    + self.mlp[0].bias
+                )
+                tp_weights = self.mlp[1:](tp_weights)
+            else:
+                tp_weights = self.mlp(edge_emb)
         else:
-            tp_weights = edge_scalars
+            tp_weights = edge_emb
+
+        out = self.tp(src_features[src], edge_sh, tp_weights)
+
+        if edge_envelope is not None:
+            out = out * edge_envelope.view(-1, 1)
 
-        out_tp = self.tp(src_features[src], edge_sh, tp_weights)
-        out = scatter_reduce(out_tp, dst, dim=0, dim_size=num_dst_nodes, reduce=reduce)
+        out = scatter_reduce(out, dst, dim=0, dim_size=num_dst_nodes, reduce=reduce)
 
         if self.batch_norm:
             out = self.batch_norm(out)
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
new file mode 100644
index 00000000000..2dd95242ae3
--- /dev/null
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from e3nn import o3
+from cugraph_equivariant.nn import FullyConnectedTensorProductConv
+
+
+def test_tensor_product_conv_equivariance():
+    torch.manual_seed(12345)
+
+    in_irreps = o3.Irreps("10x0e + 10x1e")
+    out_irreps = o3.Irreps("20x0e + 10x1e")
+    sh_irreps = o3.Irreps.spherical_harmonics(lmax=2)
+
+    tp_conv = FullyConnectedTensorProductConv(
+        in_irreps=in_irreps, sh_irreps=sh_irreps, out_irreps=out_irreps
+    )
+
+    num_src_nodes, num_dst_nodes = 9, 7
+    num_edges = 40
+    src = torch.randint(num_src_nodes, (num_edges,))
+    dst = torch.randint(num_dst_nodes, (num_edges,))
+    edge_index = torch.vstack((src, dst))
+
+    src_pos = torch.randn(num_src_nodes, 3)
+    dst_pos = torch.randn(num_dst_nodes, 3)
+    edge_vec = dst_pos[dst] - src_pos[src]
+    edge_sh = o3.spherical_harmonics(
+        tp_conv.sh_irreps, edge_vec, normalize=True, normalization="component"
+    )
+    src_features = torch.randn(num_src_nodes, in_irreps.dim)
+
+    weights_tp = torch.randn(num_edges, tp_conv.tp.weight_numel)
+
+    rot = o3.rand_matrix()
+    D_in = tp_conv.in_irreps.D_from_matrix(rot)
+    D_sh = tp_conv.sh_irreps.D_from_matrix(rot)
+    D_out = tp_conv.out_irreps.D_from_matrix(rot)
+
+    # rotate before
+    out_before = tp_conv(
+        src_features=src_features @ D_in,
+        edge_sh=edge_sh @ D_sh,
+        edge_emb=weights_tp,
+        graph=(edge_index, (num_src_nodes, num_dst_nodes)),
+    )
+
+    # rotate after
+    out_after = (
+        tp_conv(
+            src_features=src_features,
+            edge_sh=edge_sh,
+            edge_emb=weights_tp,
+            graph=(edge_index, (num_src_nodes, num_dst_nodes)),
+        )
+        @ D_out
+    )
+
+    torch.allclose(out_before, out_after, rtol=1e-4, atol=1e-4)

From 2259a8d6d60501c0243d4d74ac5be2e71755a428 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Fri, 12 Jan 2024 13:40:36 -0500
Subject: [PATCH 09/21] test with mlps

---
 .../nn/tensor_product_conv.py                 | 18 +++++-----
 .../tests/test_tensor_product_conv.py         | 36 +++++++++++++++----
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index 9cabd519899..46f1eea019f 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -102,9 +102,9 @@ def __init__(
         if mlp_fast_first_layer:
             assert mlp_channels is not None
             assert mlp_channels[0] % 3 == 0
-            self.n_scalars = int(mlp_channels[0] / 3)
+            self.num_scalars = int(mlp_channels[0] / 3)
         else:
-            self.n_scalars = None
+            self.num_scalars = None
         self.mlp_fast_first_layer = mlp_fast_first_layer
 
         if mlp_channels is not None:
@@ -113,8 +113,8 @@ def __init__(
             for i in range(len(dims) - 1):
                 mlp.append(nn.Linear(dims[i], dims[i + 1]))
                 if mlp_activation is not None and i != len(dims) - 2:
-                    mlp.append(mlp_activation)
-            self.mlp = nn.Sequential(mlp)
+                    mlp.append(mlp_activation())
+            self.mlp = nn.Sequential(*mlp)
         else:
             self.mlp = None
 
@@ -153,11 +153,11 @@ def forward(
 
         src_scalars: torch.Tensor, optional
             Scalar features of source nodes.
-            Shape: (num_src_nodes, n_scalars)
+            Shape: (num_src_nodes, num_scalars)
 
         dst_scalars: torch.Tensor, optional
             Scalar features of destination nodes.
-            Shape: (num_dst_nodes, n_scalars)
+            Shape: (num_dst_nodes, num_scalars)
 
         reduce : str, optional (default="mean")
             Reduction operator. Choose between "mean" and "sum".
@@ -176,9 +176,9 @@ def forward(
             assert self.tp.weight_numel == edge_emb.size(-1)
         else:
             if self.mlp_fast_first_layer:
-                assert edge_emb.size(-1) == self.n_scalars
-                assert src_scalars.size(-1) == self.n_scalars
-                assert dst_scalars.size(-1) == self.n_scalars
+                assert edge_emb.size(-1) == self.num_scalars
+                assert src_scalars.size(-1) == self.num_scalars
+                assert dst_scalars.size(-1) == self.num_scalars
             else:
                 assert self.mlp[0].in_features == edge_emb.size(-1)
 
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
index 2dd95242ae3..3e9c0ca3df6 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -11,12 +11,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pytest
+
 import torch
 from e3nn import o3
 from cugraph_equivariant.nn import FullyConnectedTensorProductConv
 
 
-def test_tensor_product_conv_equivariance():
+@pytest.mark.parametrize(
+    "mlp_channels, mlp_fast_first_layer",
+    [[(3 * 10, 8, 8), True], [(7,), False], [None, False]],
+)
+def test_tensor_product_conv_equivariance(mlp_channels, mlp_fast_first_layer):
     torch.manual_seed(12345)
 
     in_irreps = o3.Irreps("10x0e + 10x1e")
@@ -24,7 +30,11 @@ def test_tensor_product_conv_equivariance():
     sh_irreps = o3.Irreps.spherical_harmonics(lmax=2)
 
     tp_conv = FullyConnectedTensorProductConv(
-        in_irreps=in_irreps, sh_irreps=sh_irreps, out_irreps=out_irreps
+        in_irreps=in_irreps,
+        sh_irreps=sh_irreps,
+        out_irreps=out_irreps,
+        mlp_channels=mlp_channels,
+        mlp_fast_first_layer=mlp_fast_first_layer,
     )
 
     num_src_nodes, num_dst_nodes = 9, 7
@@ -41,19 +51,31 @@ def test_tensor_product_conv_equivariance():
     )
     src_features = torch.randn(num_src_nodes, in_irreps.dim)
 
-    weights_tp = torch.randn(num_edges, tp_conv.tp.weight_numel)
-
     rot = o3.rand_matrix()
     D_in = tp_conv.in_irreps.D_from_matrix(rot)
     D_sh = tp_conv.sh_irreps.D_from_matrix(rot)
     D_out = tp_conv.out_irreps.D_from_matrix(rot)
 
+    if mlp_channels is None:
+        edge_emb = torch.randn(num_edges, tp_conv.tp.weight_numel)
+        src_scalars = dst_scalars = None
+    else:
+        if mlp_fast_first_layer:
+            edge_emb = torch.randn(num_edges, tp_conv.num_scalars)
+            src_scalars = torch.randn(num_src_nodes, tp_conv.num_scalars)
+            dst_scalars = torch.randn(num_dst_nodes, tp_conv.num_scalars)
+        else:
+            edge_emb = torch.randn(num_edges, tp_conv.mlp[0].in_features)
+            src_scalars = dst_scalars = None
+
     # rotate before
     out_before = tp_conv(
         src_features=src_features @ D_in,
         edge_sh=edge_sh @ D_sh,
-        edge_emb=weights_tp,
+        edge_emb=edge_emb,
         graph=(edge_index, (num_src_nodes, num_dst_nodes)),
+        src_scalars=src_scalars,
+        dst_scalars=dst_scalars,
     )
 
     # rotate after
@@ -61,8 +83,10 @@ def test_tensor_product_conv_equivariance():
         tp_conv(
             src_features=src_features,
             edge_sh=edge_sh,
-            edge_emb=weights_tp,
+            edge_emb=edge_emb,
             graph=(edge_index, (num_src_nodes, num_dst_nodes)),
+            src_scalars=src_scalars,
+            dst_scalars=dst_scalars,
         )
         @ D_out
     )

From c525bef4fad2e03810fd58f74babedfe81f02064 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Fri, 12 Jan 2024 17:21:57 -0500
Subject: [PATCH 10/21] test wheel

---
 .github/workflows/pr.yaml                 | 11 +++++++-
 ci/test_wheel_cugraph-equivariant.sh      | 33 +++++++++++++++++++++++
 dependencies.yaml                         | 22 +++++++++++++++
 python/cugraph-equivariant/pyproject.toml | 14 ++++++++--
 4 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100755 ci/test_wheel_cugraph-equivariant.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 455399f418d..080b7653580 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -29,9 +29,10 @@ jobs:
       - wheel-tests-cugraph-dgl
       - wheel-build-cugraph-pyg
       - wheel-tests-cugraph-pyg
-      - devcontainer
       - conda-python-build-cugraph-equivariant
       - wheel-build-cugraph-equivariant
+      - wheel-tests-cugraph-equivariant
+      - devcontainer
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
   checks:
@@ -175,6 +176,14 @@ jobs:
     with:
       build_type: pull-request
       script: ci/build_wheel_cugraph-equivariant.sh
+  wheel-tests-cugraph-equivariant:
+    needs: wheel-build-cugraph-equivariant
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    with:
+      build_type: pull-request
+      script: ci/test_wheel_cugraph-equivariant.sh
+      matrix_filter: map(select(.ARCH == "amd64"))
   devcontainer:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02
diff --git a/ci/test_wheel_cugraph-equivariant.sh b/ci/test_wheel_cugraph-equivariant.sh
new file mode 100755
index 00000000000..f054780b03a
--- /dev/null
+++ b/ci/test_wheel_cugraph-equivariant.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -eoxu pipefail
+
+package_name="cugraph-equivariant"
+package_dir="python/cugraph-equivariant"
+
+python_package_name=$(echo ${package_name}|sed 's/-/_/g')
+
+mkdir -p ./dist
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+
+# use 'ls' to expand wildcard before adding `[extra]` requires for pip
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+# pip creates wheels using python package names
+python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
+
+
+PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')"
+PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2}
+if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then
+  PYTORCH_CUDA_VER="121"
+else
+  PYTORCH_CUDA_VER=$PKG_CUDA_VER
+fi
+PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"
+
+rapids-logger "Installing PyTorch and e3nn"
+rapids-retry python -m pip install torch --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install e3nn
+
+python -m pytest python/cugraph-equivariant/cugraph_equivariant/tests
diff --git a/dependencies.yaml b/dependencies.yaml
index 3eed525bfe4..be413cf4c6a 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -197,6 +197,28 @@ files:
       key: test
     includes:
       - test_python_common
+  py_build_cugraph_equivariant:
+    output: pyproject
+    pyproject_dir: python/cugraph-equivariant
+    extras:
+      table: build-system
+    includes:
+      - python_build_wheel
+  py_run_cugraph_equivariant:
+    output: pyproject
+    pyproject_dir: python/cugraph-equivariant
+    extras:
+      table: project
+    includes:
+      - depends_on_pylibcugraphops
+  py_test_cugraph_equivariant:
+    output: pyproject
+    pyproject_dir: python/cugraph-equivariant
+    extras:
+      table: project.optional-dependencies
+      key: test
+    includes:
+      - test_python_common
   py_build_cugraph_service_client:
     output: pyproject
     pyproject_dir: python/cugraph-service/client
diff --git a/python/cugraph-equivariant/pyproject.toml b/python/cugraph-equivariant/pyproject.toml
index 9c7caa78f96..f261b0e3535 100644
--- a/python/cugraph-equivariant/pyproject.toml
+++ b/python/cugraph-equivariant/pyproject.toml
@@ -13,7 +13,7 @@
 
 [build-system]
 requires = [
-    "setuptools",
+    "setuptools>=61.0.0",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
@@ -35,12 +35,22 @@ classifiers = [
 ]
 dependencies = [
     "pylibcugraphops==24.2.*",
-]
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
 Homepage = "https://github.com/rapidsai/cugraph"
 Documentation = "https://docs.rapids.ai/api/cugraph/stable/api_docs/cugraph-ops/"
 
+[project.optional-dependencies]
+test = [
+    "pandas",
+    "pytest",
+    "pytest-benchmark",
+    "pytest-cov",
+    "pytest-xdist",
+    "scipy",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
 [tool.setuptools]
 license-files = ["LICENSE"]
 

From 3fd56dc4ab5cd33b9b72d37a3f4eb8dbc4885bac Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Fri, 12 Jan 2024 20:51:33 -0500
Subject: [PATCH 11/21] absorb conda build into build_python script

---
 .github/workflows/pr.yaml              |  7 ------
 ci/build_python.sh                     |  5 ++++
 ci/build_python_cugraph-equivariant.sh | 28 ---------------------
 ci/test_python.sh                      | 34 ++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 35 deletions(-)
 delete mode 100755 ci/build_python_cugraph-equivariant.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 080b7653580..1bb2e0ab0a7 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -29,7 +29,6 @@ jobs:
       - wheel-tests-cugraph-dgl
       - wheel-build-cugraph-pyg
       - wheel-tests-cugraph-pyg
-      - conda-python-build-cugraph-equivariant
       - wheel-build-cugraph-equivariant
       - wheel-tests-cugraph-equivariant
       - devcontainer
@@ -59,12 +58,6 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
     with:
       build_type: pull-request
-  conda-python-build-cugraph-equivariant:
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
-    with:
-      build_type: pull-request
-      build_script: ci/build_python_cugraph-equivariant.sh
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
diff --git a/ci/build_python.sh b/ci/build_python.sh
index a99e5ce63e8..07a4f59396b 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -89,4 +89,9 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
     conda/recipes/cugraph-dgl
 fi
 
+rapids-conda-retry mambabuild \
+  --no-test \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/cugraph-equivariant
+
 rapids-upload-conda-to-s3 python
diff --git a/ci/build_python_cugraph-equivariant.sh b/ci/build_python_cugraph-equivariant.sh
deleted file mode 100755
index b48d203a210..00000000000
--- a/ci/build_python_cugraph-equivariant.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-set -euo pipefail
-
-source rapids-env-update
-
-rapids-print-env
-
-version=$(rapids-generate-version)
-git_commit=$(git rev-parse HEAD)
-export RAPIDS_PACKAGE_VERSION=${version}
-echo "${version}" > VERSION
-
-rapids-logger "Begin py build"
-
-package_dir="python"
-for package_name in cugraph-equivariant; do
-  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
-  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
-done
-
-rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/cugraph-equivariant
-
-rapids-upload-conda-to-s3 python
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 7eb5a08edc8..f5a554e6414 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -247,5 +247,39 @@ else
   rapids-logger "skipping cugraph_pyg pytest on CUDA != 11.8"
 fi
 
+if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
+  if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
+    # Reuse cugraph-dgl's test env for cugraph-equivariant
+    set +u
+    conda activate test_cugraph_dgl
+    set -u
+    pip install e3nn==0.5.1
+
+    rapids-print-env
+
+    rapids-logger "pytest cugraph-equivariant"
+    pushd python/cugraph-equivariant/cugraph_equivariant
+    pytest \
+      --cache-clear \
+      --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-dgl.xml" \
+      --cov-config=../../.coveragerc \
+      --cov=cugraph_dgl \
+      --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-dgl-coverage.xml" \
+      --cov-report=term \
+      .
+    popd
+
+    # Reactivate the test environment back
+    set +u
+    conda deactivate
+    conda activate test
+    set -u
+  else
+    rapids-logger "skipping cugraph-equivariant pytest on ARM64"
+  fi
+else
+  rapids-logger "skipping cugraph-equivariant pytest on CUDA!=11.8"
+fi
+
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}

From 924e55b6316909c3c94b14def90782327aad9d0c Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Thu, 18 Jan 2024 18:29:57 -0500
Subject: [PATCH 12/21] update based on cugraph-ops pr

---
 .../nn/tensor_product_conv.py                 | 54 +++++++++++++++----
 .../tests/test_tensor_product_conv.py         | 32 +++++------
 2 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index 46f1eea019f..b6a10d738a9 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -20,12 +20,11 @@
 
 from cugraph_equivariant.utils import scatter_reduce
 
-try:
-    from pylibcugraphops.equivariant import TensorProduct
-
-    HAS_TP_LIB = True
-except ImportError:
-    HAS_TP_LIB = False
+from pylibcugraphops.pytorch.operators import (
+    FusedFullyConnectedTensorProduct,
+    transpose_irrep_to_m_last,
+    transpose_irrep_to_channels_last,
+)
 
 
 class FullyConnectedTensorProductConv(nn.Module):
@@ -73,6 +72,33 @@ class FullyConnectedTensorProductConv(nn.Module):
         leading to a lower complexity in most use cases. This option requires
         users to explicitly pass in `src_scalars` and `dst_scalars` in
         `forward()` call.
+
+    use_e3nn_tp: bool, optional (default=False)
+        If `True`, use TensorProduct functions from e3nn.
+
+    Examples
+    --------
+    >>> # Case 1: MLP with the input layer having 6 channels and 2 hidden layers
+    >>> #         having 16 channels. edge_emb.size(1) must match the size of
+    >>> #         the input layer: 6
+    >>>
+    >>> conv1 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
+    >>>     mlp_channels=[6, 16, 16], mlp_activation=nn.ReLU).cuda()
+    >>> out = conv1(src_features, edge_sh, edge_emb, graph)
+    >>>
+    >>> # Case 2: No MLP, edge_emb will be directly used as the tensor product weights
+    >>>
+    >>> conv2 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
+    >>>     mlp_channels=None).cuda()
+    >>> out = conv2(src_features, edge_sh, edge_emb, graph)
+    >>>
+    >>> # Case 3: Same as case 1 but with `mlp_fast_first_layer=True`. The scalar features
+    >>> # from edges, sources and destinations have to be passed in separately.
+    >>>
+    >>> conv3 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
+    >>>     mlp_channels=[6, 16, 16], mlp_fast_first_layer=True).cuda()
+    >>> out = conv3(src_features, edge_sh, edge_scalars, graph,
+    >>>     src_scalars=src_scalars, dst_scalars=dst_scalars)
     """
 
     def __init__(
@@ -84,18 +110,20 @@ def __init__(
         mlp_channels: Optional[Sequence[int]] = None,
         mlp_activation: Optional[Callable[..., nn.Module]] = nn.GELU,
         mlp_fast_first_layer: bool = False,
+        use_e3nn_tp: bool = False,
     ):
         super().__init__()
         self.in_irreps = in_irreps
         self.out_irreps = out_irreps
         self.sh_irreps = sh_irreps
 
-        if HAS_TP_LIB:
-            self.tp = TensorProduct(str(in_irreps), str(sh_irreps), str(out_irreps))
-        else:
+        if use_e3nn_tp:
             self.tp = o3.FullyConnectedTensorProduct(
                 in_irreps, sh_irreps, out_irreps, shared_weights=False
             )
+        else:
+            self.tp = FusedFullyConnectedTensorProduct(in_irreps, sh_irreps, out_irreps)
+        self.use_e3nn_tp = use_e3nn_tp
 
         self.batch_norm = BatchNorm(out_irreps) if batch_norm else None
 
@@ -204,7 +232,13 @@ def forward(
         else:
             tp_weights = edge_emb
 
-        out = self.tp(src_features[src], edge_sh, tp_weights)
+        if not self.use_e3nn_tp:
+            out = self.tp(src_features[src], edge_sh, tp_weights)
+        else:
+            src_features = transpose_irrep_to_m_last(src_features, self.in_irreps)
+            edge_sh = transpose_irrep_to_m_last(edge_sh, self.sh_irreps)
+            out = self.tp(src_features[src], edge_sh, tp_weights)
+            out = transpose_irrep_to_channels_last(out, self.out_irreps)
 
         if edge_envelope is not None:
             out = out * edge_envelope.view(-1, 1)
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
index 3e9c0ca3df6..5dc90fe3f0d 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -17,6 +17,8 @@
 from e3nn import o3
 from cugraph_equivariant.nn import FullyConnectedTensorProductConv
 
+device = torch.device("cuda:0")
+
 
 @pytest.mark.parametrize(
     "mlp_channels, mlp_fast_first_layer",
@@ -35,37 +37,37 @@ def test_tensor_product_conv_equivariance(mlp_channels, mlp_fast_first_layer):
         out_irreps=out_irreps,
         mlp_channels=mlp_channels,
         mlp_fast_first_layer=mlp_fast_first_layer,
-    )
+    ).to(device)
 
     num_src_nodes, num_dst_nodes = 9, 7
     num_edges = 40
-    src = torch.randint(num_src_nodes, (num_edges,))
-    dst = torch.randint(num_dst_nodes, (num_edges,))
+    src = torch.randint(num_src_nodes, (num_edges,), device=device)
+    dst = torch.randint(num_dst_nodes, (num_edges,), device=device)
     edge_index = torch.vstack((src, dst))
 
-    src_pos = torch.randn(num_src_nodes, 3)
-    dst_pos = torch.randn(num_dst_nodes, 3)
+    src_pos = torch.randn(num_src_nodes, 3, device=device)
+    dst_pos = torch.randn(num_dst_nodes, 3, device=device)
     edge_vec = dst_pos[dst] - src_pos[src]
     edge_sh = o3.spherical_harmonics(
         tp_conv.sh_irreps, edge_vec, normalize=True, normalization="component"
-    )
-    src_features = torch.randn(num_src_nodes, in_irreps.dim)
+    ).to(device)
+    src_features = torch.randn(num_src_nodes, in_irreps.dim, device=device)
 
     rot = o3.rand_matrix()
-    D_in = tp_conv.in_irreps.D_from_matrix(rot)
-    D_sh = tp_conv.sh_irreps.D_from_matrix(rot)
-    D_out = tp_conv.out_irreps.D_from_matrix(rot)
+    D_in = tp_conv.in_irreps.D_from_matrix(rot).to(device)
+    D_sh = tp_conv.sh_irreps.D_from_matrix(rot).to(device)
+    D_out = tp_conv.out_irreps.D_from_matrix(rot).to(device)
 
     if mlp_channels is None:
-        edge_emb = torch.randn(num_edges, tp_conv.tp.weight_numel)
+        edge_emb = torch.randn(num_edges, tp_conv.tp.weight_numel, device=device)
         src_scalars = dst_scalars = None
     else:
         if mlp_fast_first_layer:
-            edge_emb = torch.randn(num_edges, tp_conv.num_scalars)
-            src_scalars = torch.randn(num_src_nodes, tp_conv.num_scalars)
-            dst_scalars = torch.randn(num_dst_nodes, tp_conv.num_scalars)
+            edge_emb = torch.randn(num_edges, tp_conv.num_scalars, device=device)
+            src_scalars = torch.randn(num_src_nodes, tp_conv.num_scalars, device=device)
+            dst_scalars = torch.randn(num_dst_nodes, tp_conv.num_scalars, device=device)
         else:
-            edge_emb = torch.randn(num_edges, tp_conv.mlp[0].in_features)
+            edge_emb = torch.randn(num_edges, tp_conv.mlp[0].in_features, device=device)
             src_scalars = dst_scalars = None
 
     # rotate before

From 0a9c4b85a945d40ae3b2f62204facb0815a51795 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Thu, 18 Jan 2024 20:55:59 -0500
Subject: [PATCH 13/21] style

---
 .../cugraph_equivariant/nn/tensor_product_conv.py            | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index b6a10d738a9..c4231fd8818 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -92,8 +92,9 @@ class FullyConnectedTensorProductConv(nn.Module):
     >>>     mlp_channels=None).cuda()
     >>> out = conv2(src_features, edge_sh, edge_emb, graph)
     >>>
-    >>> # Case 3: Same as case 1 but with `mlp_fast_first_layer=True`. The scalar features
-    >>> # from edges, sources and destinations have to be passed in separately.
+    >>> # Case 3: Same as case 1 but with `mlp_fast_first_layer=True`. The scalar
+    >>> #         features from edges, sources and destinations have to be passed
+    >>> #         in separately.
     >>>
     >>> conv3 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
     >>>     mlp_channels=[6, 16, 16], mlp_fast_first_layer=True).cuda()

From b9ce1ca6bbf61ddeadab7ba03e7aa3af1900c2c5 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Mon, 22 Jan 2024 21:17:53 -0500
Subject: [PATCH 14/21] address comments

---
 .../nn/tensor_product_conv.py                 | 79 +++++++++----------
 .../tests/test_tensor_product_conv.py         | 23 ++++--
 2 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index c4231fd8818..c8e5f29265e 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -20,11 +20,7 @@
 
 from cugraph_equivariant.utils import scatter_reduce
 
-from pylibcugraphops.pytorch.operators import (
-    FusedFullyConnectedTensorProduct,
-    transpose_irrep_to_m_last,
-    transpose_irrep_to_channels_last,
-)
+from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct
 
 
 class FullyConnectedTensorProductConv(nn.Module):
@@ -110,32 +106,20 @@ def __init__(
         batch_norm: bool = True,
         mlp_channels: Optional[Sequence[int]] = None,
         mlp_activation: Optional[Callable[..., nn.Module]] = nn.GELU,
-        mlp_fast_first_layer: bool = False,
-        use_e3nn_tp: bool = False,
+        e3nn_compat_mode: bool = False,
     ):
         super().__init__()
         self.in_irreps = in_irreps
         self.out_irreps = out_irreps
         self.sh_irreps = sh_irreps
+        self.e3nn_compat_mode = e3nn_compat_mode
 
-        if use_e3nn_tp:
-            self.tp = o3.FullyConnectedTensorProduct(
-                in_irreps, sh_irreps, out_irreps, shared_weights=False
-            )
-        else:
-            self.tp = FusedFullyConnectedTensorProduct(in_irreps, sh_irreps, out_irreps)
-        self.use_e3nn_tp = use_e3nn_tp
+        self.tp = FusedFullyConnectedTensorProduct(
+            in_irreps, sh_irreps, out_irreps, e3nn_compat_mode=e3nn_compat_mode
+        )
 
         self.batch_norm = BatchNorm(out_irreps) if batch_norm else None
 
-        if mlp_fast_first_layer:
-            assert mlp_channels is not None
-            assert mlp_channels[0] % 3 == 0
-            self.num_scalars = int(mlp_channels[0] / 3)
-        else:
-            self.num_scalars = None
-        self.mlp_fast_first_layer = mlp_fast_first_layer
-
         if mlp_channels is not None:
             dims = list(mlp_channels) + [self.tp.weight_numel]
             mlp = []
@@ -152,7 +136,7 @@ def forward(
         src_features: torch.Tensor,
         edge_sh: torch.Tensor,
         edge_emb: torch.Tensor,
-        graph: tuple[torch.Tensor, tuple[int, int]],  # COO, (n_src, n_dst)
+        graph: tuple[torch.Tensor, tuple[int, int]],
         src_scalars: Optional[torch.Tensor] = None,
         dst_scalars: Optional[torch.Tensor] = None,
         reduce: str = "mean",
@@ -177,8 +161,10 @@ def forward(
             - `num_scalars` when `mlp_fast_first_layer` enabled.
             - `mlp_channels[0]` otherwise.
 
-        graph : Any
-            The graph.
+        graph : tuple
+            A tuple that stores the graph information, with the first element being
+            the adjacency matrix in COO, and the second element being its shape:
+            (num_src_nodes, num_dst_nodes).
 
         src_scalars: torch.Tensor, optional
             Scalar features of source nodes.
@@ -201,26 +187,40 @@ def forward(
             Output node features.
             Shape: (num_dst_nodes, out_irreps.dim)
         """
+        edge_emb_size = edge_emb.size(-1)
+        src_scalars_size = 0 if src_scalars is None else src_scalars.size(-1)
+        dst_scalars_size = 0 if dst_scalars is None else dst_scalars.size(-1)
+
         if self.mlp is None:
-            assert self.tp.weight_numel == edge_emb.size(-1)
+            if self.tp.weight_numel != edge_emb_size:
+                raise RuntimeError(
+                    f"When MLP is not present, edge_emb's last dimension must "
+                    f"equal tp.weight_numel (but got {edge_emb_size} and "
+                    f"{self.tp.weight_numel})"
+                )
         else:
-            if self.mlp_fast_first_layer:
-                assert edge_emb.size(-1) == self.num_scalars
-                assert src_scalars.size(-1) == self.num_scalars
-                assert dst_scalars.size(-1) == self.num_scalars
-            else:
-                assert self.mlp[0].in_features == edge_emb.size(-1)
+            total_size = edge_emb_size + src_scalars_size + dst_scalars_size
+            if self.mlp[0].in_features != total_size:
+                raise RuntimeError(
+                    f"The size of MLP's input layer ({self.mlp[0].in_features}) "
+                    f"does not match the total number of scalar features from "
+                    f"edge_emb, src_scalars and dst_scalars ({total_size})"
+                )
 
         if reduce not in ["mean", "sum"]:
-            raise ValueError(
+            raise RuntimeError(
                 f"reduce argument must be either 'mean' or 'sum', got {reduce}."
             )
 
         (src, dst), (num_src_nodes, num_dst_nodes) = graph
 
         if self.mlp is not None:
-            if self.mlp_fast_first_layer:
-                w_edge, w_src, w_dst = torch.chunk(self.mlp[0].weight, chunks=3, dim=-1)
+            if src_scalars is not None and dst_scalars is not None:
+                w_edge, w_src, w_dst = torch.split(
+                    self.mlp[0].weight,
+                    (edge_emb_size, src_scalars_size, dst_scalars_size),
+                    dim=-1,
+                )
                 tp_weights = (
                     edge_emb @ w_edge.T
                     + (src_scalars @ w_src.T)[src]
@@ -229,17 +229,12 @@ def forward(
                 )
                 tp_weights = self.mlp[1:](tp_weights)
             else:
+                assert src_scalars is None and dst_scalars is None
                 tp_weights = self.mlp(edge_emb)
         else:
             tp_weights = edge_emb
 
-        if not self.use_e3nn_tp:
-            out = self.tp(src_features[src], edge_sh, tp_weights)
-        else:
-            src_features = transpose_irrep_to_m_last(src_features, self.in_irreps)
-            edge_sh = transpose_irrep_to_m_last(edge_sh, self.sh_irreps)
-            out = self.tp(src_features[src], edge_sh, tp_weights)
-            out = transpose_irrep_to_channels_last(out, self.out_irreps)
+        out = self.tp(src_features[src], edge_sh, tp_weights)
 
         if edge_envelope is not None:
             out = out * edge_envelope.view(-1, 1)
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
index 5dc90fe3f0d..7ec2227b3f1 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -20,11 +20,14 @@
 device = torch.device("cuda:0")
 
 
+@pytest.mark.parametrize("e3nn_compat_mode", [True, False])
 @pytest.mark.parametrize(
-    "mlp_channels, mlp_fast_first_layer",
-    [[(3 * 10, 8, 8), True], [(7,), False], [None, False]],
+    "mlp_channels, use_src_dst_scalars",
+    [[(30, 8, 8), True], [(7,), False], [None, False]],
 )
-def test_tensor_product_conv_equivariance(mlp_channels, mlp_fast_first_layer):
+def test_tensor_product_conv_equivariance(
+    mlp_channels, use_src_dst_scalars, e3nn_compat_mode
+):
     torch.manual_seed(12345)
 
     in_irreps = o3.Irreps("10x0e + 10x1e")
@@ -36,7 +39,7 @@ def test_tensor_product_conv_equivariance(mlp_channels, mlp_fast_first_layer):
         sh_irreps=sh_irreps,
         out_irreps=out_irreps,
         mlp_channels=mlp_channels,
-        mlp_fast_first_layer=mlp_fast_first_layer,
+        e3nn_compat_mode=e3nn_compat_mode,
     ).to(device)
 
     num_src_nodes, num_dst_nodes = 9, 7
@@ -62,10 +65,14 @@ def test_tensor_product_conv_equivariance(mlp_channels, mlp_fast_first_layer):
         edge_emb = torch.randn(num_edges, tp_conv.tp.weight_numel, device=device)
         src_scalars = dst_scalars = None
     else:
-        if mlp_fast_first_layer:
-            edge_emb = torch.randn(num_edges, tp_conv.num_scalars, device=device)
-            src_scalars = torch.randn(num_src_nodes, tp_conv.num_scalars, device=device)
-            dst_scalars = torch.randn(num_dst_nodes, tp_conv.num_scalars, device=device)
+        if use_src_dst_scalars:
+            edge_emb_size, src_scalars_size = 2, 1
+            dst_scalars_size = (
+                tp_conv.mlp[0].in_features - edge_emb_size - src_scalars_size
+            )
+            edge_emb = torch.randn(num_edges, edge_emb_size, device=device)
+            src_scalars = torch.randn(num_src_nodes, src_scalars_size, device=device)
+            dst_scalars = torch.randn(num_dst_nodes, dst_scalars_size, device=device)
         else:
             edge_emb = torch.randn(num_edges, tp_conv.mlp[0].in_features, device=device)
             src_scalars = dst_scalars = None

From 4457efffa5eb700cacd7b982bf49580cb3102e69 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 23 Jan 2024 14:34:41 -0500
Subject: [PATCH 15/21] mlp_activation accept sequences, imp docstrings

---
 .../nn/tensor_product_conv.py                 | 96 ++++++++++---------
 .../tests/test_tensor_product_conv.py         | 32 +++++--
 2 files changed, 74 insertions(+), 54 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index c8e5f29265e..aab8b07a1ba 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -11,7 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, Callable, Sequence
+from typing import Optional, Sequence, Union
 
 import torch
 from torch import nn
@@ -32,14 +32,16 @@ class FullyConnectedTensorProductConv(nn.Module):
         \sum_{b \in \mathcal{N}_a} Y\left(\hat{r}_{a b}\right)
         \otimes_{\psi_{a b}} \mathbf{h}_b
 
-    where the path weights :math:`\psi_{a b}` are from user input to the forward()
-    function. For example, they can be constructed from edge embeddings and
-    scalar features:
+    where the path weights :math:`\psi_{a b}` can be constructed from edge
+    embeddings and scalar features using an MLP:
 
     .. math::
         \psi_{a b} = \operatorname{MLP}
         \left(e_{a b}, \mathbf{h}_a^0, \mathbf{h}_b^0\right)
 
+    Users have the option to either directly input the weights or provide the
+    MLP parameters and scalar features from edges and nodes.
+
     Parameters
     ----------
     in_irreps : e3nn.o3.Irreps
@@ -58,19 +60,15 @@ class FullyConnectedTensorProductConv(nn.Module):
         A sequence of integers defining number of neurons in each layer in MLP
         before the output layer. If `None`, no MLP will be added.
 
-    mlp_activation : callable, optional (default=torch.nn.GELU)
-        Activation function in MLP.
-
-    mlp_fast_first_layer : bool, optional (default=False)
-        When enabled, for the first layer of MLP, the module performs GEMMs on
-        individual components (i.e., edge embeddings, scalar features of source
-        and destinations nodes) of the weights before indexing and concatenation,
-        leading to a lower complexity in most use cases. This option requires
-        users to explicitly pass in `src_scalars` and `dst_scalars` in
-        `forward()` call.
+    mlp_activation : nn.Module or sequence of nn.Module, optional (default=nn.GELU())
+        A sequence of functions to be applied in between linear layers in MLP,
+        e.g., `nn.Sequential(nn.ReLU(), nn.Dropout(0.4))`.
 
-    use_e3nn_tp: bool, optional (default=False)
-        If `True`, use TensorProduct functions from e3nn.
+    e3nn_compat_mode: bool, optional (default=False)
+        cugraph-ops and e3nn use different memory layout for Irreps-tensors.
+        The last (fastest moving) dimension is num_channels for cugraph-ops and
+        ir.dim for e3nn. When enabled, the input and output of this layer will
+        follow e3nn's memory layout.
 
     Examples
     --------
@@ -79,23 +77,23 @@ class FullyConnectedTensorProductConv(nn.Module):
     >>> #         the input layer: 6
     >>>
     >>> conv1 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
-    >>>     mlp_channels=[6, 16, 16], mlp_activation=nn.ReLU).cuda()
+    >>>     mlp_channels=[6, 16, 16], mlp_activation=nn.ReLU()).cuda()
     >>> out = conv1(src_features, edge_sh, edge_emb, graph)
     >>>
-    >>> # Case 2: No MLP, edge_emb will be directly used as the tensor product weights
+    >>> # Case 2: Same as case 1 but with the scalar features from edges, sources
+    >>> #         and destinations passed in separately.
     >>>
     >>> conv2 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
-    >>>     mlp_channels=None).cuda()
-    >>> out = conv2(src_features, edge_sh, edge_emb, graph)
+    >>>     mlp_channels=[6, 16, 16], mlp_activation=nn.ReLU()).cuda()
+    >>> out = conv3(src_features, edge_sh, edge_scalars, graph,
+    >>>     src_scalars=src_scalars, dst_scalars=dst_scalars)
     >>>
-    >>> # Case 3: Same as case 1 but with `mlp_fast_first_layer=True`. The scalar
-    >>> #         features from edges, sources and destinations have to be passed
-    >>> #         in separately.
+    >>> # Case 3: No MLP, edge_emb will be directly used as the tensor product weights
     >>>
     >>> conv3 = FullyConnectedTensorProductConv(in_irreps, sh_irreps, out_irreps,
-    >>>     mlp_channels=[6, 16, 16], mlp_fast_first_layer=True).cuda()
-    >>> out = conv3(src_features, edge_sh, edge_scalars, graph,
-    >>>     src_scalars=src_scalars, dst_scalars=dst_scalars)
+    >>>     mlp_channels=None).cuda()
+    >>> out = conv2(src_features, edge_sh, edge_emb, graph)
+
     """
 
     def __init__(
@@ -105,7 +103,7 @@ def __init__(
         out_irreps: o3.Irreps,
         batch_norm: bool = True,
         mlp_channels: Optional[Sequence[int]] = None,
-        mlp_activation: Optional[Callable[..., nn.Module]] = nn.GELU,
+        mlp_activation: Union[nn.Module, Sequence[nn.Module]] = nn.GELU(),
         e3nn_compat_mode: bool = False,
     ):
         super().__init__()
@@ -120,13 +118,22 @@ def __init__(
 
         self.batch_norm = BatchNorm(out_irreps) if batch_norm else None
 
+        if mlp_activation is None:
+            mlp_activation = []
+        elif hasattr(mlp_activation, "__len__") and hasattr(
+            mlp_activation, "__getitem__"
+        ):
+            mlp_activation = list(mlp_activation)
+        else:
+            mlp_activation = [mlp_activation]
+
         if mlp_channels is not None:
             dims = list(mlp_channels) + [self.tp.weight_numel]
             mlp = []
             for i in range(len(dims) - 1):
                 mlp.append(nn.Linear(dims[i], dims[i + 1]))
-                if mlp_activation is not None and i != len(dims) - 2:
-                    mlp.append(mlp_activation())
+                if i != len(dims) - 2:
+                    mlp.extend(mlp_activation)
             self.mlp = nn.Sequential(*mlp)
         else:
             self.mlp = None
@@ -156,10 +163,10 @@ def forward(
 
         edge_emb: torch.Tensor
             Edge embeddings that are fed into MLPs to generate tensor product weights.
-            Shape: (num_edges, dim), where `dim` should equal to:
+            Shape: (num_edges, dim), where `dim` should be:
             - `tp.weight_numel` when the layer does not contain MLPs.
-            - `num_scalars` when `mlp_fast_first_layer` enabled.
-            - `mlp_channels[0]` otherwise.
+            - num_edge_scalars, with the sum of num_[edge/src/dst]_scalars being
+              mlp_channels[0]
 
         graph : tuple
             A tuple that stores the graph information, with the first element being
@@ -168,11 +175,11 @@ def forward(
 
         src_scalars: torch.Tensor, optional
             Scalar features of source nodes.
-            Shape: (num_src_nodes, num_scalars)
+            Shape: (num_src_nodes, num_src_scalars)
 
         dst_scalars: torch.Tensor, optional
             Scalar features of destination nodes.
-            Shape: (num_dst_nodes, num_scalars)
+            Shape: (num_dst_nodes, num_dst_scalars)
 
         reduce : str, optional (default="mean")
             Reduction operator. Choose between "mean" and "sum".
@@ -215,22 +222,23 @@ def forward(
         (src, dst), (num_src_nodes, num_dst_nodes) = graph
 
         if self.mlp is not None:
-            if src_scalars is not None and dst_scalars is not None:
+            if src_scalars is None and dst_scalars is None:
+                tp_weights = self.mlp(edge_emb)
+            else:
                 w_edge, w_src, w_dst = torch.split(
                     self.mlp[0].weight,
                     (edge_emb_size, src_scalars_size, dst_scalars_size),
                     dim=-1,
                 )
-                tp_weights = (
-                    edge_emb @ w_edge.T
-                    + (src_scalars @ w_src.T)[src]
-                    + (dst_scalars @ w_dst.T)[dst]
-                    + self.mlp[0].bias
-                )
+                tp_weights = edge_emb @ w_edge.T + self.mlp[0].bias
+
+                if src_scalars is not None:
+                    tp_weights += (src_scalars @ w_src.T)[src]
+
+                if dst_scalars is not None:
+                    tp_weights += (dst_scalars @ w_dst.T)[dst]
+
                 tp_weights = self.mlp[1:](tp_weights)
-            else:
-                assert src_scalars is None and dst_scalars is None
-                tp_weights = self.mlp(edge_emb)
         else:
             tp_weights = edge_emb
 
diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
index 7ec2227b3f1..6c0c54baafd 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -14,6 +14,7 @@
 import pytest
 
 import torch
+from torch import nn
 from e3nn import o3
 from cugraph_equivariant.nn import FullyConnectedTensorProductConv
 
@@ -21,12 +22,17 @@
 
 
 @pytest.mark.parametrize("e3nn_compat_mode", [True, False])
+@pytest.mark.parametrize("batch_norm", [True, False])
 @pytest.mark.parametrize(
-    "mlp_channels, use_src_dst_scalars",
-    [[(30, 8, 8), True], [(7,), False], [None, False]],
+    "mlp_channels, mlp_activation, scalar_sizes",
+    [
+        [(30, 8, 8), nn.Sequential(nn.Dropout(0.3), nn.ReLU()), (15, 15, 0)],
+        [(7,), nn.GELU(), (2, 3, 2)],
+        [None, None, None],
+    ],
 )
 def test_tensor_product_conv_equivariance(
-    mlp_channels, use_src_dst_scalars, e3nn_compat_mode
+    mlp_channels, mlp_activation, scalar_sizes, batch_norm, e3nn_compat_mode
 ):
     torch.manual_seed(12345)
 
@@ -39,6 +45,8 @@ def test_tensor_product_conv_equivariance(
         sh_irreps=sh_irreps,
         out_irreps=out_irreps,
         mlp_channels=mlp_channels,
+        mlp_activation=mlp_activation,
+        batch_norm=batch_norm,
         e3nn_compat_mode=e3nn_compat_mode,
     ).to(device)
 
@@ -65,14 +73,18 @@ def test_tensor_product_conv_equivariance(
         edge_emb = torch.randn(num_edges, tp_conv.tp.weight_numel, device=device)
         src_scalars = dst_scalars = None
     else:
-        if use_src_dst_scalars:
-            edge_emb_size, src_scalars_size = 2, 1
-            dst_scalars_size = (
-                tp_conv.mlp[0].in_features - edge_emb_size - src_scalars_size
+        if scalar_sizes:
+            edge_emb = torch.randn(num_edges, scalar_sizes[0], device=device)
+            src_scalars = (
+                None
+                if scalar_sizes[1] == 0
+                else torch.randn(num_src_nodes, scalar_sizes[1], device=device)
+            )
+            dst_scalars = (
+                None
+                if scalar_sizes[2] == 0
+                else torch.randn(num_dst_nodes, scalar_sizes[2], device=device)
             )
-            edge_emb = torch.randn(num_edges, edge_emb_size, device=device)
-            src_scalars = torch.randn(num_src_nodes, src_scalars_size, device=device)
-            dst_scalars = torch.randn(num_dst_nodes, dst_scalars_size, device=device)
         else:
             edge_emb = torch.randn(num_edges, tp_conv.mlp[0].in_features, device=device)
             src_scalars = dst_scalars = None

From 65ceca319ae56aa58d2b8afca174a826473a715d Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 23 Jan 2024 20:03:46 -0500
Subject: [PATCH 16/21] clean up

---
 python/cugraph-equivariant/README.md                  | 11 -----------
 .../cugraph_equivariant/nn/tensor_product_conv.py     |  3 ++-
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/python/cugraph-equivariant/README.md b/python/cugraph-equivariant/README.md
index 2875a4d8555..d5de8852709 100644
--- a/python/cugraph-equivariant/README.md
+++ b/python/cugraph-equivariant/README.md
@@ -3,14 +3,3 @@
 ## Description
 
 cugraph-equivariant library provides fast symmetry-preserving (equivariant) operations and convolutional layers, to accelerate the equivariant neural networks in drug discovery and other domains.
-
-## Build from source
-
-Developers are suggested to create a conda environment that includes the runtime and test dependencies and pip install `cugraph-equivariant` in an editable mode.
-
-```bash
-# for cuda 11.8
-mamba env create -n cugraph_equivariant -f python/cugraph-equivariant/conda/cugraph_equivariant_dev_cuda-118_arch-x86_64.yaml
-conda activate cugraph_equivariant
-./build_component.sh -n cugraph-equivariant
-```
diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index aab8b07a1ba..998b7013446 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -58,7 +58,8 @@ class FullyConnectedTensorProductConv(nn.Module):
 
     mlp_channels : sequence of ints, optional (default=None)
         A sequence of integers defining number of neurons in each layer in MLP
-        before the output layer. If `None`, no MLP will be added.
+        before the output layer. If `None`, no MLP will be added. The input layer
+        contains edge embeddings and node scalar features.
 
     mlp_activation : nn.Module or sequence of nn.Module, optional (default=nn.GELU())
         A sequence of functions to be applied in between linear layers in MLP,

From f84885250f0e27bf665d2c6e1a94e73d55c038e0 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 23 Jan 2024 20:09:28 -0500
Subject: [PATCH 17/21] update workflow files

---
 .github/workflows/build.yaml | 19 +++++++++++++++++++
 .github/workflows/test.yaml  |  9 +++++++++
 2 files changed, 28 insertions(+)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 273a8902eae..243c5f23ec0 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -173,3 +173,22 @@ jobs:
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
       package-name: cugraph-pyg
+  wheel-build-cugraph-equivariant:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      script: ci/build_wheel_cugraph-equivariant.sh
+  wheel-publish-cugraph-equivariant:
+    needs: wheel-build-cugraph-equivariant
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+      package-name: cugraph-equivariant
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 773358ede8d..71051bcc529 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -75,3 +75,12 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cugraph-pyg.sh
+  wheel-tests-cugraph-equivariant:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      script: ci/test_wheel_cugraph-equivariant.sh

From c0abf8c338fed9c3b40c576c1244322ce84c5315 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Tue, 23 Jan 2024 23:55:31 -0500
Subject: [PATCH 18/21] correct test script

---
 ci/test_python.sh | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index f5a554e6414..5892c37e35b 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -247,12 +247,19 @@ else
   rapids-logger "skipping cugraph_pyg pytest on CUDA != 11.8"
 fi
 
+# test cugraph-equivariant
 if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
   if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
     # Reuse cugraph-dgl's test env for cugraph-equivariant
     set +u
     conda activate test_cugraph_dgl
     set -u
+    rapids-mamba-retry install \
+      --channel "${CPP_CHANNEL}" \
+      --channel "${PYTHON_CHANNEL}" \
+      --channel pytorch \
+      --channel nvidia \
+      cugraph-equivariant
     pip install e3nn==0.5.1
 
     rapids-print-env
@@ -261,10 +268,10 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
     pushd python/cugraph-equivariant/cugraph_equivariant
     pytest \
       --cache-clear \
-      --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-dgl.xml" \
+      --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-equivariant.xml" \
       --cov-config=../../.coveragerc \
-      --cov=cugraph_dgl \
-      --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-dgl-coverage.xml" \
+      --cov=cugraph_equivariant \
+      --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-equivariant-coverage.xml" \
       --cov-report=term \
       .
     popd

From 40b972c99b4f6aee3c2b6fcd3300dceae49f8d5e Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Wed, 24 Jan 2024 11:19:32 -0500
Subject: [PATCH 19/21] Update
 python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py

Co-authored-by: Mario Geiger <geiger.mario@gmail.com>
---
 .../cugraph_equivariant/tests/test_tensor_product_conv.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
index 6c0c54baafd..a2a13b32cd2 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py
@@ -91,8 +91,8 @@ def test_tensor_product_conv_equivariance(
 
     # rotate before
     out_before = tp_conv(
-        src_features=src_features @ D_in,
-        edge_sh=edge_sh @ D_sh,
+        src_features=src_features @ D_in.T,
+        edge_sh=edge_sh @ D_sh.T,
         edge_emb=edge_emb,
         graph=(edge_index, (num_src_nodes, num_dst_nodes)),
         src_scalars=src_scalars,
@@ -109,7 +109,7 @@ def test_tensor_product_conv_equivariance(
             src_scalars=src_scalars,
             dst_scalars=dst_scalars,
         )
-        @ D_out
+        @ D_out.T
     )
 
     torch.allclose(out_before, out_after, rtol=1e-4, atol=1e-4)

From 6dff47ae7190ff57aeeea99d1d87269d500a0aad Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Wed, 24 Jan 2024 11:19:45 -0500
Subject: [PATCH 20/21] Update
 python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py

Co-authored-by: Mario Geiger <geiger.mario@gmail.com>
---
 .../cugraph_equivariant/nn/tensor_product_conv.py                | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index 998b7013446..ea9979427d0 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -186,6 +186,7 @@ def forward(
             Reduction operator. Choose between "mean" and "sum".
 
         edge_envelope: torch.Tensor, optional
+            Optional, typically used as attenuation factors to fade out messages coming from nodes close to the cutoff distance used to create the graph. This is important to make the model smooth to the changes in node's coordinates.
             Edge envelope.
             Shape: (num_edges,)
 

From 3ee809bddd73982f4dcbd3534c50124b0a969816 Mon Sep 17 00:00:00 2001
From: Tingyu Wang <tingyuw@nvidia.com>
Date: Wed, 24 Jan 2024 11:25:22 -0500
Subject: [PATCH 21/21] format

---
 .../cugraph_equivariant/nn/tensor_product_conv.py           | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
index ea9979427d0..5120a23180d 100644
--- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
+++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py
@@ -186,8 +186,10 @@ def forward(
             Reduction operator. Choose between "mean" and "sum".
 
         edge_envelope: torch.Tensor, optional
-            Optional, typically used as attenuation factors to fade out messages coming from nodes close to the cutoff distance used to create the graph. This is important to make the model smooth to the changes in node's coordinates.
-            Edge envelope.
+            Typically used as attenuation factors to fade out messages coming
+            from nodes close to the cutoff distance used to create the graph.
+            This is important to make the model smooth to the changes in node's
+            coordinates.
             Shape: (num_edges,)
 
         Returns