From 4e3045fed370fa28b23faa7d10b9cfa955a62412 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 6 Jun 2024 11:15:13 -0700 Subject: [PATCH 01/47] c --- conda/recipes/cugraph-dgl/meta.yaml | 4 +- conda/recipes/cugraph-pyg/meta.yaml | 2 +- dependencies.yaml | 76 +++++++++++++++-------------- 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index 5e28e69a0d7..abdd91e21e0 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -27,8 +27,10 @@ requirements: - numba >=0.57 - numpy >=1.23,<2.0a0 - pylibcugraphops ={{ minor_version }} + - tensordict >=0.1.2,<0.3.1 - python - - pytorch + - pytorch >=2.0 + - cupy >= 12.0.0 tests: imports: diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 64091ff4782..60b7df5efa2 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -34,7 +34,7 @@ requirements: - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2 + - tensordict >=0.1.2,<0.3.1 - pyg >=2.5,<2.6 tests: diff --git a/dependencies.yaml b/dependencies.yaml index 93cb1bf35ac..2d26fbf1a6a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -73,6 +73,7 @@ files: table: build-system includes: - common_build + - python_build_wheel - depends_on_rmm - depends_on_pylibraft - depends_on_pylibcugraph @@ -107,6 +108,7 @@ files: table: build-system includes: - common_build + - python_build_wheel - depends_on_rmm - depends_on_pylibraft - python_build_cythonize @@ -357,11 +359,11 @@ dependencies: packages: - c-compiler - cxx-compiler - - libcudf==24.8.* - - libcugraphops==24.8.* - - libraft-headers==24.8.* - - libraft==24.8.* - - librmm==24.8.* + - libcudf==24.6.* + - libcugraphops==24.6.* + - libraft-headers==24.6.* + - libraft==24.6.* + - librmm==24.6.* - openmpi # Required for building cpp-mgtests (multi-GPU tests) specific: - output_types: [conda] @@ -446,11 +448,11 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - &dask rapids-dask-dependency==24.8.* - - &dask_cuda dask-cuda==24.8.* + - &dask rapids-dask-dependency==24.6.* + - &dask_cuda dask-cuda==24.6.* - &numba numba>=0.57 - &numpy numpy>=1.23,<2.0a0 - - &ucx_py ucx-py==0.39.* + - &ucx_py ucx-py==0.38.* - output_types: conda packages: - aiohttp @@ -475,15 +477,17 @@ dependencies: packages: - *numba - *numpy + - &tensordict tensordict>=0.1.2,<0.3.1 - output_types: [pyproject] packages: - - &cugraph cugraph==24.8.* + - &cugraph cugraph==24.6.* python_run_cugraph_pyg: common: - output_types: [conda, pyproject] packages: - *numba - *numpy + - *tensordict - output_types: [pyproject] packages: - *cugraph @@ -505,7 +509,7 @@ dependencies: - output_types: pyproject packages: - *cugraph - - cugraph-service-client==24.8.* + - cugraph-service-client==24.6.* test_cpp: common: - output_types: conda @@ -538,9 +542,6 @@ dependencies: - *numpy - python-louvain - scikit-learn>=0.23.1 - - output_types: [conda] - packages: - - pylibwholegraph==24.8.* test_python_pylibcugraph: common: @@ -558,18 +559,19 @@ dependencies: common: - output_types: [conda] packages: - - cugraph==24.8.* + - cugraph==24.6.* - pytorch>=2.0 - pytorch-cuda==11.8 + - *tensordict - dgl>=1.1.0.cu* cugraph_pyg_dev: common: - output_types: [conda] packages: - - cugraph==24.8.* + - cugraph==24.6.* - pytorch>=2.0 - pytorch-cuda==11.8 - - tensordict>=0.1.2 + - *tensordict - pyg>=2.5,<2.6 depends_on_pylibwholegraph: @@ -597,7 +599,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_conda rmm==24.8.* + - &rmm_conda rmm==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -608,17 +610,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - rmm-cu12==24.8.* + - rmm-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - rmm-cu11==24.8.* + - rmm-cu11==24.6.* - {matrix: null, packages: [*rmm_conda]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_conda cudf==24.8.* + - &cudf_conda cudf==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -629,17 +631,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cudf-cu12==24.8.* + - cudf-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - cudf-cu11==24.8.* + - cudf-cu11==24.6.* - {matrix: null, packages: [*cudf_conda]} depends_on_dask_cudf: common: - output_types: conda packages: - - &dask_cudf_conda dask-cudf==24.8.* + - &dask_cudf_conda dask-cudf==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -650,17 +652,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - dask-cudf-cu12==24.8.* + - dask-cudf-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - dask-cudf-cu11==24.8.* + - dask-cudf-cu11==24.6.* - {matrix: null, packages: [*dask_cudf_conda]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_conda pylibraft==24.8.* + - &pylibraft_conda pylibraft==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -671,17 +673,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibraft-cu12==24.8.* + - pylibraft-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - pylibraft-cu11==24.8.* + - pylibraft-cu11==24.6.* - {matrix: null, packages: [*pylibraft_conda]} depends_on_raft_dask: common: - output_types: conda packages: - - &raft_dask_conda raft-dask==24.8.* + - &raft_dask_conda raft-dask==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -692,17 +694,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - raft-dask-cu12==24.8.* + - raft-dask-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - raft-dask-cu11==24.8.* + - raft-dask-cu11==24.6.* - {matrix: null, packages: [*raft_dask_conda]} depends_on_pylibcugraph: common: - output_types: conda packages: - - &pylibcugraph_conda pylibcugraph==24.8.* + - &pylibcugraph_conda pylibcugraph==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -713,17 +715,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibcugraph-cu12==24.8.* + - pylibcugraph-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - pylibcugraph-cu11==24.8.* + - pylibcugraph-cu11==24.6.* - {matrix: null, packages: [*pylibcugraph_conda]} depends_on_pylibcugraphops: common: - output_types: conda packages: - - &pylibcugraphops_conda pylibcugraphops==24.8.* + - &pylibcugraphops_conda pylibcugraphops==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -734,10 +736,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibcugraphops-cu12==24.8.* + - pylibcugraphops-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - pylibcugraphops-cu11==24.8.* + - pylibcugraphops-cu11==24.6.* - {matrix: null, packages: [*pylibcugraphops_conda]} depends_on_cupy: From f243351c419c194b613925a97c90b235ee22893c Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 6 Jun 2024 11:20:18 -0700 Subject: [PATCH 02/47] pull in dependency fixes --- ci/test_python.sh | 5 ++--- ci/test_wheel_cugraph-pyg.sh | 1 - conda/recipes/cugraph-dgl/meta.yaml | 2 +- conda/recipes/cugraph-pyg/meta.yaml | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index fdcf88d692a..90cdf48c46c 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -214,9 +214,8 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then # Install pyg dependencies (which requires pip) pip install \ - ogb \ - tensordict - + ogb + pip install \ pyg_lib \ torch_scatter \ diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 1004063cc38..c55ae033344 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -42,7 +42,6 @@ rapids-retry python -m pip install \ pyg_lib \ torch_scatter \ torch_sparse \ - tensordict \ -f ${PYG_URL} rapids-logger "pytest cugraph-pyg (single GPU)" diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index abdd91e21e0..8d4f5327092 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -27,7 +27,7 @@ requirements: - numba >=0.57 - numpy >=1.23,<2.0a0 - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2,<0.3.1 + - tensordict >=0.1.2,<0.3.1a0 - python - pytorch >=2.0 - cupy >= 12.0.0 diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 60b7df5efa2..eef02994da9 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -34,7 +34,7 @@ requirements: - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2,<0.3.1 + - tensordict >=0.1.2,<0.3.1a0 - pyg >=2.5,<2.6 tests: From 4c29329957dce8f0fe2c5225151051cf93634dd0 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 7 Jun 2024 11:03:26 -0700 Subject: [PATCH 03/47] w --- python/cugraph-dgl/cugraph_dgl/features.py | 24 +++++ python/cugraph-dgl/cugraph_dgl/graph.py | 95 ++++++++++++++++++- python/cugraph-dgl/cugraph_dgl/typing.py | 16 ++++ .../utils/cugraph_conversion_utils.py | 15 ++- 4 files changed, 147 insertions(+), 3 deletions(-) create mode 100644 python/cugraph-dgl/cugraph_dgl/features.py create mode 100644 python/cugraph-dgl/cugraph_dgl/typing.py diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py new file mode 100644 index 00000000000..1e96a5ecc4a --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dgl = import_optional('dgl') + +class FeatureStore(dgl.FeatureStorage): + """ + Interface for feature storage. + """ + + def requires_ddp(self) -> bool: + return False + + def fetch \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 0b2f366f26d..726ceaa5405 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -11,7 +11,98 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cupy + +from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor + +from typing import Union, Optional, Dict, Tuple + +# Have to use import_optional even though these are required +# dependencies in order to build properly. +dgl = import_optional("dgl") +torch = import_optional("torch") +tensordict = import_optional("tensordict") + +HOMOGENEOUS_EDGE_TYPE = ('n','e','n') + class Graph: """ - Duck-typed version of dgl.DGLGraph. - """ \ No newline at end of file + cuGraph-backed duck-typed version of dgl.DGLGraph that distributes + the graph across workers. This object uses lazy graph creation. + Users can repeatedly call add_edges, and the tensors won't + be converted into a cuGraph graph until one is needed + (i.e. when creating a loader). Supports + single-node/single-GPU, single-node/multi-GPU, and + multi-node/multi-GPU graph storage. + + Each worker should have a slice of the graph locally, and + call put_edge_index with its slice. + """ + + def __init__(self, is_multi_gpu: bool=False): + """ + Parameters + ---------- + is_multi_gpu: bool (optional, default=False) + Specifies whether this graph is distributed across GPUs. + """ + + self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) + self.__sizes = {} + self.__graph = None + self.__vertex_offsets = None + self.__handle = None + self.__is_multi_gpu = is_multi_gpu + + def to_canonical_etype(self, etype: Union[str, Tuple[str, str, str]]) -> Tuple[str, str, str]: + if etype is None: + if len(self.__edge_indices.keys(leaves_only=True,include_nested=True)) > 1: + raise ValueError("Edge type is required for heterogeneous graphs.") + return HOMOGENEOUS_EDGE_TYPE + + if isinstance(etype, Tuple[str, str, str]): + return etype + + for src_type, rel_type, dst_type in self.__edge_indices.keys(leaves_only=True,include_nested=True): + if etype == rel_type: + return (src_type, rel_type, dst_type) + + raise ValueError( + "Unknown relation type " + etype + ) + + def add_edges(self, u: TensorType, v: TensorType, data:Optional[Dict[str, TensorType]]=None, etype:Optional[Union[str, Tuple[str, str, str]]]=None) -> None: + """ + Adds edges to this graph. + + Parameters + ---------- + u: TensorType + 1d tensor of source vertex ids. + v: TensorType + 1d tensor of destination vertex ids. + data: Dict[str, TensorType] (optional, default=None) + Dictionary containing edge features for the new edges. + etype: Union[str, Tuple[str, str, str]] + The edge type of the edges being inserted. Not required + for homogeneous graphs, which have only one edge type. + """ + + dgl_can_edge_type = self.to_canonical_etype(etype) + + new_edges = torch.stack([ + _cast_to_torch_tensor(u), + _cast_to_torch_tensor(v), + ]) + + if dgl_can_edge_type in self.__edge_indices.keys(leaves_only=True, include_nested=True): + self.__edge_indices[dgl_can_edge_type] = torch.concat([ + self.__edge_indices[dgl_can_edge_type], + new_edges, + ], dim=1) + else: + self.__edge_indices[dgl_can_edge_type] = new_edges + + if data is not None: + \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/typing.py b/python/cugraph-dgl/cugraph_dgl/typing.py new file mode 100644 index 00000000000..7a16a1b3dfd --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/typing.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Union + +TensorType = Union["torch.Tensor", "cupy.ndarray", "numpy.ndarray", "cudf.Series", "pandas.Series", List[int]] diff --git a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py index 647dbd38a64..7ae1cba0263 100644 --- a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py +++ b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py @@ -13,7 +13,9 @@ # Utils to convert b/w dgl heterograph to cugraph GraphStore from __future__ import annotations -from typing import Dict, Tuple, Union +from typing import Dict, Tuple, Union, List + +from cugraph_dgl.typing import TensorType import cudf import pandas as pd @@ -21,6 +23,7 @@ import dask_cudf from dask.distributed import get_client import cupy as cp +import numpy as np from cugraph.utilities.utils import import_optional from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import src_n, dst_n @@ -115,3 +118,13 @@ def add_edata_from_dgl_HeteroGraph(gs, g): gs.edata_storage.add_data( feat_name=feat_name, type_name=etype, feat_obj=feat_t ) + + +def _cast_to_torch_tensor(t: TensorType) -> "torch.Tensor": + if isinstance(t, torch.Tensor): + return t + elif isinstance(t, (cp.ndarray, cudf.Series)): + return torch.as_tensor(t, device='cuda') + elif isinstance(t, pd.Series, np.ndarray): + return torch.as_tensor(t, device='cpu') + return torch.as_tensor(t) \ No newline at end of file From 265f5467d935b499d1fdae8e771ccea8871adfe0 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 7 Jun 2024 14:37:08 -0700 Subject: [PATCH 04/47] basic graph/fs --- python/cugraph-dgl/cugraph_dgl/features.py | 105 ++++++++- python/cugraph-dgl/cugraph_dgl/graph.py | 244 ++++++++++++++++++--- 2 files changed, 316 insertions(+), 33 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py index 1e96a5ecc4a..8037bd90454 100644 --- a/python/cugraph-dgl/cugraph_dgl/features.py +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -11,14 +11,111 @@ # See the License for the specific language governing permissions and # limitations under the License. -dgl = import_optional('dgl') +import warnings -class FeatureStore(dgl.FeatureStorage): +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") +dgl = import_optional("dgl") +wgth = import_optional("pylibwholegraph.torch") + + +class WholeFeatureStore( + object if isinstance(dgl, MissingModule) else dgl.storages.base.FeatureStorage +): """ Interface for feature storage. """ + def __init__( + self, + tensor: "torch.Tensor", + memory_type: str = "distributed", + location: str = "cpu", + ): + """ + Constructs a new WholeFeatureStore object that wraps a WholeGraph wholememory + distributed tensor. + + Parameters + ---------- + t: torch.Tensor + The local slice of the tensor being distributed. These should be in order + by rank (i.e. rank 0 contains elements 0-9, rank 1 contains elements 10-19, + rank 3 contains elements 20-29, etc.) The sizes do not need to be equal. + memory_type: str (optional, default='distributed') + The memory type of this store. Options are + 'distributed', 'chunked', and 'continuous'. + For more information consult the WholeGraph + documentation. + location: str(optional, default='cpu') + The location ('cpu' or 'cuda') where data is stored. + """ + self.__wg_comm = wgth.get_local_node_communicator() + + if len(tensor.shape) > 2: + raise ValueError("Only 1-D or 2-D tensors are supported by WholeGraph.") + + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + ld = torch.tensor(tensor.shape[0], device="cuda", dtype=torch.int64) + sizes = torch.empty((world_size,), device="cuda", dtype=torch.int64) + torch.distributed.all_gather_into_tensor(sizes, ld) + + sizes = sizes.cpu() + ld = sizes.sum() + + self.__td = -1 if len(tensor.shape) == 1 else tensor.shape[1] + global_shape = [ + int(ld), + self.__td if self.__td > 0 else 1, + ] + + if self.__td < 0: + tensor = tensor.reshape((tensor.shape[0], 1)) + + wg_tensor = wgth.create_wholememory_tensor( + self.__wg_commm, + memory_type, + location, + global_shape, + tensor.dtype, + [global_shape[1], 1], + ) + + offset = sizes[:rank].sum() if rank > 0 else 0 + + wg_tensor.scatter( + tensor.clone(memory_format=torch.contiguous_format).cuda(), + torch.arange( + offset, offset + tensor.shape[0], dtype=torch.int64, device="cuda" + ).contiguous(), + ) + + self.__wg_comm.barrier() + + self.__wg_tensor = wg_tensor + def requires_ddp(self) -> bool: - return False + return True + + def fetch( + self, + indices: torch.Tensor, + device: torch.cuda.Device, + pin_memory=False, + **kwargs, + ): + if pin_memory: + warnings.warn("pin_memory has no effect for WholeFeatureStorage.") + + t = self.__wg_tensor.gather( + indices.cuda(), + force_dtype=self.__wg_tensor.dtype, + ) + + if self.__td < 0: + t = t.reshape((t.shape[0],)) - def fetch \ No newline at end of file + return t diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 726ceaa5405..731aeab0b3f 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -11,10 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cupy +from cugraph.utilities.utils import import_optional from cugraph_dgl.typing import TensorType from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor +from cugraph_dgl.features import WholeFeatureStore from typing import Union, Optional, Dict, Tuple @@ -24,7 +25,9 @@ torch = import_optional("torch") tensordict = import_optional("tensordict") -HOMOGENEOUS_EDGE_TYPE = ('n','e','n') +HOMOGENEOUS_NODE_TYPE = "n" +HOMOGENEOUS_EDGE_TYPE = (HOMOGENEOUS_NODE_TYPE, "e", HOMOGENEOUS_NODE_TYPE) + class Graph: """ @@ -40,48 +43,208 @@ class Graph: call put_edge_index with its slice. """ - def __init__(self, is_multi_gpu: bool=False): + def __init__( + self, + is_multi_gpu: bool = False, + ndata_storage="torch", + edata_storage="torch", + **kwargs, + ): """ Parameters ---------- is_multi_gpu: bool (optional, default=False) Specifies whether this graph is distributed across GPUs. + ndata_storage: str (optional, default='torch') + Specifies where node data should be stored + (options are 'torch' and 'wholegraph'). + If using PyTorch tensors for storage ('torch') + then data will be replicated across workers and data + for all nodes should be provided when calling add_nodes. + If using WholeGraph wholememory tensors for storage, + then data will be distributed across workers and only + the local slice of the data should be provided when + calling add_nodes. + edata_storage: str (optional, default='torch') + If using PyTorch tensors for storage ('torch') + then data will be replicated across workers and data + for all nodes should be provided when calling add_edge. + If using WholeGraph wholememory tensors for storage, + then data will be distributed across workers and only + the local slice of the data should be provided when + calling add_edges. + kwargs: + Optional kwargs for WholeGraph feature storage. """ + if ndata_storage not in ("torch", "wholegraph"): + raise ValueError( + "Invalid node storage type (valid types are 'torch' and 'wholegraph')" + ) + if edata_storage not in ("torch", "wholegraph"): + raise ValueError( + "Invalid edge storage type (valid types are 'torch' and 'wholegraph')" + ) + + self.__num_nodes_dict = {} self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) + self.__sizes = {} self.__graph = None self.__vertex_offsets = None self.__handle = None self.__is_multi_gpu = is_multi_gpu - def to_canonical_etype(self, etype: Union[str, Tuple[str, str, str]]) -> Tuple[str, str, str]: + self.__ndata_storage_type = ( + WholeFeatureStore + if ndata_storage == "wholegraph" + else dgl.storages.pytorch_tensor.PyTorchTensorStorage + ) + self.__edata_storage_type = ( + WholeFeatureStore + if edata_storage == "wholegraph" + else dgl.storages.pytorch_tensor.PyTorchTensorStorage + ) + self.__ndata_storage = {} + self.__edata_storage = {} + self.__wg_kwargs = kwargs + + @property + def is_multi_gpu(self): + return self.__is_multi_gpu + + def to_canonical_etype( + self, etype: Union[str, Tuple[str, str, str]] + ) -> Tuple[str, str, str]: if etype is None: - if len(self.__edge_indices.keys(leaves_only=True,include_nested=True)) > 1: + if len(self.__edge_indices.keys(leaves_only=True, include_nested=True)) > 1: raise ValueError("Edge type is required for heterogeneous graphs.") return HOMOGENEOUS_EDGE_TYPE if isinstance(etype, Tuple[str, str, str]): return etype - - for src_type, rel_type, dst_type in self.__edge_indices.keys(leaves_only=True,include_nested=True): + + for src_type, rel_type, dst_type in self.__edge_indices.keys( + leaves_only=True, include_nested=True + ): if etype == rel_type: return (src_type, rel_type, dst_type) - - raise ValueError( - "Unknown relation type " + etype - ) - - def add_edges(self, u: TensorType, v: TensorType, data:Optional[Dict[str, TensorType]]=None, etype:Optional[Union[str, Tuple[str, str, str]]]=None) -> None: + + raise ValueError("Unknown relation type " + etype) + + def add_nodes( + self, + global_num_nodes: int, + data: Optional[Dict[str, TensorType]] = None, + ntype: Optional[str] = None, + ): + """ + Adds the given number of nodes to this graph. Can only be called once + per node type. The number of nodes specified here refers to the total + number of nodes across workers. If the backing feature store is + distributed (i.e. wholegraph), then only local features should + be passed to the data argument. If the backing feature store is + replicated, then features for all nodes should be passed to the + data argument, including those for nodes not on the local worker. + + Parameters + ---------- + global_num_nodes: int + The total number of nodes of the given type in this graph. + The same number should be passed to every worker. + data: Dict[str, TensorType] (optional, default=None) + Node feature tensors. + ntype: str (optional, default=None) + The node type being modified. Required for heterogeneous graphs. + """ + if ntype is None: + if len(self.__num_nodes_dict.keys()) > 1: + raise ValueError("Node type is required for heterogeneous graphs.") + ntype = HOMOGENEOUS_NODE_TYPE + + if ntype in self.__num_nodes_dict: + raise ValueError( + "Calling add_nodes multiple types for the same " + "node type is not allowed in cuGraph-DGL" + ) + + if self.is_multi_gpu: + # Ensure all nodes got the same number of nodes passed + world_size = torch.distributed.get_world_size() + local_size = torch.tensor( + [global_num_nodes], device="cuda", dtype=torch.int64 + ) + ns = torch.empty((world_size,), device="cuda", dtype=torch.int64) + torch.distributed.all_gather_into_tensor(ns, local_size) + if not (ns == global_num_nodes).all(): + raise ValueError("The global number of nodes must match on all workers") + + # Ensure the sum of the feature shapes equals the global number of nodes. + for feature_name, feature_tensor in data.items(): + features_size = torch.tensor( + [int(feature_tensor.shape[0])], device="cuda", dtype=torch.int64 + ) + torch.distributed.all_reduce( + features_size, op=torch.distributed.ReduceOp.SUM + ) + if features_size != global_num_nodes: + raise ValueError( + "The total length of the feature vector across workers must" + " match the global number of nodes but it does not match for " + f"{feature_name}." + ) + + self.__num_nodes_dict[ntype] = global_num_nodes + + for feature_name, feature_tensor in data.items(): + self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( + feature_tensor, **self.__wg_kwargs + ) + + def __check_node_ids(self, ntype: str, ids: TensorType): + """ + Ensures all node ids in the provided id tensor are valid. + Raises a ValueError if any are invalid. + + Parameters + ---------- + ntype: str + The node type being validated against. + ids: + The tensor of ids being validated. """ - Adds edges to this graph. + if ntype in self.__num_nodes_dict: + if ids.max() + 1 > self.__num_nodes(ntype): + raise ValueError( + f"input tensor contains invalid node ids for type {ntype}" + ) + else: + raise ValueError( + f"add_nodes() must be called for type {ntype} before calling num_edges." + ) + + def add_edges( + self, + u: TensorType, + v: TensorType, + data: Optional[Dict[str, TensorType]] = None, + etype: Optional[Union[str, Tuple[str, str, str]]] = None, + ) -> None: + """ + Adds edges to this graph. Must be called after add_nodes + is called for the src/dst node type. If the backing feature + store is distributed (i.e. wholegraph), then only local + features should be passed to the data argument. If the + backing feature store is replicated, then features for + all edges should be passed to the data argument, + including those for edges not on the local worker. Parameters ---------- u: TensorType - 1d tensor of source vertex ids. + 1d tensor of source node ids (local slice of the distributed edgelist). v: TensorType - 1d tensor of destination vertex ids. + 1d tensor of destination node ids (local slice of the distributed edgelist). data: Dict[str, TensorType] (optional, default=None) Dictionary containing edge features for the new edges. etype: Union[str, Tuple[str, str, str]] @@ -89,20 +252,43 @@ def add_edges(self, u: TensorType, v: TensorType, data:Optional[Dict[str, Tensor for homogeneous graphs, which have only one edge type. """ + # Validate all inputs before proceeding + # The number of nodes for the src/dst type needs to be known and there cannot + # be any edges of this type in the graph. dgl_can_edge_type = self.to_canonical_etype(etype) + src_type, _, dst_type = dgl_can_edge_type + if dgl_can_edge_type in self.__edge_indices.keys( + leaves_only=True, include_nested=True + ): + raise ValueError( + "This cuGraph-DGL graph already contains edges of type" + f" {dgl_can_edge_type}. Calling add_edges multiple times" + " for the same edge type is not supported." + ) + self.__check_node_ids(src_type, u) + self.__check_node_ids(dst_type, v) - new_edges = torch.stack([ - _cast_to_torch_tensor(u), - _cast_to_torch_tensor(v), - ]) + self.__edge_indices[dgl_can_edge_type] = torch.stack( + [ + _cast_to_torch_tensor(u), + _cast_to_torch_tensor(v), + ] + ) - if dgl_can_edge_type in self.__edge_indices.keys(leaves_only=True, include_nested=True): - self.__edge_indices[dgl_can_edge_type] = torch.concat([ - self.__edge_indices[dgl_can_edge_type], - new_edges, - ], dim=1) - else: - self.__edge_indices[dgl_can_edge_type] = new_edges - if data is not None: - \ No newline at end of file + for attr_name, attr_tensor in data.items(): + self.__edata_storage[ + dgl_can_edge_type, attr_name + ] = self.__edata_storage_type(attr_tensor, **self.__wg_kwargs) + + def num_nodes(self, ntype: str = None): + """ + Returns the number of nodes of ntype, or if ntype is not provided, + the total number of nodes in the graph. + """ + if ntype is None: + if len(self.__num_nodes_dict.keys()) > 1: + raise ValueError("ntype is required for heterogeneous graphs") + return self.__num_nodes_dict[HOMOGENEOUS_NODE_TYPE] + + return self.__num_nodes_dict[ntype] From b51eda4ba9127315b273581fb8a41c025382f9be Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 10 Jun 2024 12:47:00 -0700 Subject: [PATCH 05/47] dist sampling --- python/cugraph-dgl/cugraph_dgl/graph.py | 93 +++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 731aeab0b3f..343ba4c8670 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -11,13 +11,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cupy + +import pylibcugraph + +from typing import Union, Optional, Dict, Tuple + from cugraph.utilities.utils import import_optional from cugraph_dgl.typing import TensorType from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor from cugraph_dgl.features import WholeFeatureStore -from typing import Union, Optional, Dict, Tuple + # Have to use import_optional even though these are required # dependencies in order to build properly. @@ -87,6 +93,7 @@ def __init__( ) self.__num_nodes_dict = {} + self.__num_edges_dict = {} self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) self.__sizes = {} @@ -200,6 +207,9 @@ def add_nodes( self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( feature_tensor, **self.__wg_kwargs ) + + self.__graph = None + self.__vertex_offsets = None def __check_node_ids(self, ntype: str, ids: TensorType): """ @@ -281,14 +291,87 @@ def add_edges( dgl_can_edge_type, attr_name ] = self.__edata_storage_type(attr_tensor, **self.__wg_kwargs) - def num_nodes(self, ntype: str = None): + num_edges = self.__edge_indices[dgl_can_edge_type].shape[1] + if self.is_multi_gpu: + num_edges = torch.tensor([num_edges], device='cuda', dtype=torch.int64) + torch.distributed.all_reduce(num_edges, op=torch.distributed.ReduceOp.SUM) + + self.__num_edges_dict[dgl_can_edge_type] = int(num_edges) + + self.__graph = None + self.__vertex_offsets = None + + def num_nodes(self, ntype: str = None) -> int: """ Returns the number of nodes of ntype, or if ntype is not provided, the total number of nodes in the graph. """ if ntype is None: - if len(self.__num_nodes_dict.keys()) > 1: - raise ValueError("ntype is required for heterogeneous graphs") - return self.__num_nodes_dict[HOMOGENEOUS_NODE_TYPE] + return sum(self.__num_nodes_dict.values()) return self.__num_nodes_dict[ntype] + + def number_of_nodes(self, ntype: str = None) -> int: + """ + Alias for num_nodes. + """ + return self.num_nodes(ntype=ntype) + + def num_edges(self, etype: Union[str, Tuple[str, str, str]]=None) -> int: + """ + Returns the number of edges of etype, or if etype is not provided, + the total number of edges in the graph. + """ + if etype is None: + return sum(self.__num_edges_dict.values()) + + etype = self.to_canonical_etype(etype) + return self.__num_edges_dict[etype] + + def number_of_edges(self, etype: Union[str, Tuple[str, str, str]]=None) -> int: + """ + Alias for num_edges. + """ + return self.num_edges(etype=etype) + + @property + def is_homogeneous(self): + return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <=1 + + @property + def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: + if self.__graph is None: + edgelist_dict = self.__get_edgelist() + + if self.is_multi_gpu: + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + vertices_array = cupy.arange( + sum(self._num_vertices().values()), dtype="int64" + ) + vertices_array = cupy.array_split(vertices_array, world_size)[rank] + + self.__graph = pylibcugraph.MGGraph( + self._resource_handle, + graph_properties, + [cupy.asarray(edgelist_dict["src"]).astype("int64")], + [cupy.asarray(edgelist_dict["dst"]).astype("int64")], + vertices_array=[vertices_array], + edge_id_array=[cupy.asarray(edgelist_dict["eid"])], + edge_type_array=[cupy.asarray(edgelist_dict["etp"])], + ) + else: + self.__graph = pylibcugraph.SGGraph( + self._resource_handle, + graph_properties, + cupy.asarray(edgelist_dict["src"]).astype("int64"), + cupy.asarray(edgelist_dict["dst"]).astype("int64"), + vertices_array=cupy.arange( + sum(self._num_vertices().values()), dtype="int64" + ), + edge_id_array=cupy.asarray(edgelist_dict["eid"]), + edge_type_array=cupy.asarray(edgelist_dict["etp"]), + ) + + return self.__graph \ No newline at end of file From 99432600df962efdf098cfa9a19bdc99926bde6a Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 12 Jun 2024 13:41:59 -0700 Subject: [PATCH 06/47] graph data views --- batch=00000.00000000-00000.00000002.parquet | Bin 0 -> 4768 bytes python/cugraph-dgl/cugraph_dgl/__init__.py | 1 + python/cugraph-dgl/cugraph_dgl/features.py | 2 +- python/cugraph-dgl/cugraph_dgl/graph.py | 300 ++++++++++++++++++-- python/cugraph-dgl/cugraph_dgl/view.py | 101 +++++++ 5 files changed, 378 insertions(+), 26 deletions(-) create mode 100644 batch=00000.00000000-00000.00000002.parquet create mode 100644 python/cugraph-dgl/cugraph_dgl/view.py diff --git a/batch=00000.00000000-00000.00000002.parquet b/batch=00000.00000000-00000.00000002.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a9962589e3be43900ec86c90d6eec24949d444cb GIT binary patch literal 4768 zcmb`LYfO_@7{}kXP+l(xMc&d%(OTsN6)QKZD0sh4L_nDr&{F7IsP+PF0Rb6;c$q_F z&grIG#&n5^x-IS!qh^WIEnBuF+e{V{jf)?)eb}UHQAUsO0>_+r5)7-&rp$`mIVV z>;CYcjhlb}xT-JV9KV0n)uF-c(fYk-t1eb&etY5ImyHvuZ|n(P`TEw6XS#m4k#^2B zI;FC0$%KCEnK>!Wrjq+VEln!vy?_0$4s{$m&7h98QsIbNexVgO@d?la_Jj0dnXumk z=>^xot_98DDzE~a2kr;ULCSzZ*b+Di8~`bk4!{n8d%@ixWtts!*7XMrohO0WZL z1t)_`!DAp*R2l4(AXV5w*bBkU;6`vC*aI#G4}))m)nEy@6Kn&!KsQL0*9)PISK>T`uK$_S~5p>3+pTQSr)`KZb>#R+U9neXEtxB4mI>l zu+P|$Q784~?3^=i{Pe71e$)Kj6~Ts{c8{@?jZK=nBg3#}oIZQyUd@42+w3x3N8+lb zxqWp~Z>M&NyvVHM@^+?c%O zZF{^$U7IQvH!ZL&OM^eQn**@{B{v z`^J9Id^fPH?B`nj^5UNTHM`3*?-XXgH+jQ#ZS(gP$%oU55?yk9neR^$SPT2+lHU)BW%8V<%%WJ!%&0SzsPWkHef+3YAeV zlSa6tyqJfjYe2SioG@8Wpxe?M;$0s_{CSl;0A)j_v+{-S2V*r4ZKYJdQfaV{h~~J=vcckbEuPIw@dh z%ZG<}dG9vK-Ow1~^aC2B$=EM)CLzw48K*VUsRVgQe@)Apcq~rjO*Sr@C!H{mpm91K zXUt)07bmFY%9FuyBfQbI6nBqO^^a1;M`_}B+OWTq2-|b`Q!A$%4vgZlMqeZTkuH9f zF8d41k0Jc?|IQ@`RPpd2UjHCo{M9TDnM}5as%)M_@Hq~}LdLO>J^3bz+gl0xTM6PZ zOR?>QI-93_ebT3iMU1`oaKvP1(T}M(Jgd^5Rf+ee729@evU$qY=WrM^3lyKN`d0D8 zjAGk9=8;nPSsr=izDOR$N4`E^9Ly=Uu_QMRUr2vBhvkpRGcrvznhe?|htKH<8ZPmj z7PrqS?X=hXJuO~e&@#_ly*0;dQA~SyOp53Yo1GS)!z;1r7Nu@1Y>~??d7Sp>eq}Se zTtM;yw=ZNXl;^W@Xljq{_DUg#(-Cs8xxN;Urz^)iycy~;dbbe+FM7N1xudT4SQF&4 zL~mJ;AM=Jv_p@%uN+J4o=#1VbRv=eXe4o9|2;qvgVZ`tY+&W4hqqmPaAo7raCPicU`~{XqRiTsofC> z_}k8 PF&o#Gz;Wg5|EB%}$$;L! literal 0 HcmV?d00001 diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py index 03ff50896a4..61b4142a871 100644 --- a/python/cugraph-dgl/cugraph_dgl/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/__init__.py @@ -15,6 +15,7 @@ # to prevent rapids context being created when importing cugraph_dgl os.environ["RAPIDS_NO_INITIALIZE"] = "1" +from cugraph_dgl.graph import Graph from cugraph_dgl.cugraph_storage import CuGraphStorage from cugraph_dgl.convert import cugraph_storage_from_heterograph import cugraph_dgl.dataloading diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py index 8037bd90454..80885cf01aa 100644 --- a/python/cugraph-dgl/cugraph_dgl/features.py +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -118,4 +118,4 @@ def fetch( if self.__td < 0: t = t.reshape((t.shape[0],)) - return t + return t.to(torch.device(device)) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 343ba4c8670..c6978af877b 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -11,13 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cupy - -import pylibcugraph - -from typing import Union, Optional, Dict, Tuple +from typing import Union, Optional, Dict, Tuple, List from cugraph.utilities.utils import import_optional +from cugraph.gnn import cugraph_comms_get_raft_handle + +import cupy +import pylibcugraph from cugraph_dgl.typing import TensorType from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor @@ -34,7 +34,6 @@ HOMOGENEOUS_NODE_TYPE = "n" HOMOGENEOUS_EDGE_TYPE = (HOMOGENEOUS_NODE_TYPE, "e", HOMOGENEOUS_NODE_TYPE) - class Graph: """ cuGraph-backed duck-typed version of dgl.DGLGraph that distributes @@ -96,7 +95,6 @@ def __init__( self.__num_edges_dict = {} self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) - self.__sizes = {} self.__graph = None self.__vertex_offsets = None self.__handle = None @@ -148,11 +146,11 @@ def add_nodes( """ Adds the given number of nodes to this graph. Can only be called once per node type. The number of nodes specified here refers to the total - number of nodes across workers. If the backing feature store is - distributed (i.e. wholegraph), then only local features should - be passed to the data argument. If the backing feature store is - replicated, then features for all nodes should be passed to the - data argument, including those for nodes not on the local worker. + number of nodes across all workers (the entire graph). If the backing + feature store is distributed (i.e. wholegraph), then only local features + should be passed to the data argument. If the backing feature store is + replicated, then features for all nodes in the graph should be passed to + the data argument, including those for nodes not on the local worker. Parameters ---------- @@ -334,13 +332,192 @@ def number_of_edges(self, etype: Union[str, Tuple[str, str, str]]=None) -> int: """ return self.num_edges(etype=etype) + @property + def ntypes(self) -> List[str]: + """ + Returns the node type names in this graph. + """ + return list(self.__num_nodes_dict.keys()) + + @property + def etypes(self) -> List[str]: + """ + Returns the edge type names in this graph + (the second element of the canonical edge + type tuple). + """ + return [ + et[1] + for et in self.__num_edges_dict.keys() + ] + + @property + def canonical_etypes(self) -> List[str]: + """ + Returns the canonical edge type names in this + graph. + """ + return list(self.__num_edges_dict.keys()) + + @property + def _vertex_offsets(self) -> Dict[str, int]: + if self.__vertex_offsets is None: + ordered_keys = sorted(list(self.ntypes)) + self.__vertex_offsets = {} + offset = 0 + for vtype in ordered_keys: + self.__vertex_offsets[vtype] = offset + offset += self.num_nodes(vtype) + + return dict(self.__vertex_offsets) + + def __get_edgelist(self) -> Dict[str, "torch.Tensor"]: + """ + This function always returns src/dst labels with respect + to the out direction. + + Returns + ------- + Dict[str, torch.Tensor] with the following keys: + src: source vertices (int64) + Note that src is the 1st element of the DGL edge index. + dst: destination vertices (int64) + Note that dst is the 2nd element of the DGL edge index. + eid: edge ids for each edge (int64) + Note that these start from 0 for each edge type. + etp: edge types for each edge (int32) + Note that these are in lexicographic order. + """ + sorted_keys = sorted( + list(self.__edge_indices.keys(leaves_only=True, include_nested=True)) + ) + + # note that this still follows the DGL convention of (src, rel, dst) + # i.e. (author, writes, paper): [[0,1,2],[2,0,1]] is referring to a + # cuGraph graph where (paper 2) -> (author 0), (paper 0) -> (author 1), + # and (paper 1) -> (author 0) + edge_index = torch.concat( + [ + torch.stack( + [ + self.__edge_indices[src_type, rel_type, dst_type][0] + + self._vertex_offsets[src_type], + self.__edge_indices[src_type, rel_type, dst_type][1] + + self._vertex_offsets[dst_type], + ] + ) + for (src_type, rel_type, dst_type) in sorted_keys + ], + axis=1, + ).cuda() + + edge_type_array = torch.arange( + len(sorted_keys), dtype=torch.int32, device="cuda" + ).repeat_interleave( + torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], + device="cuda", + dtype=torch.int32, + ) + ) + + if self.is_multi_gpu: + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + num_edges_t = torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" + ) + num_edges_all_t = torch.empty( + world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" + ) + torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) + + if rank > 0: + start_offsets = num_edges_all_t[:rank].T.sum(axis=1) + edge_id_array = torch.concat( + [ + torch.arange( + start_offsets[i], + start_offsets[i] + num_edges_all_t[rank][i], + dtype=torch.int64, + device="cuda", + ) + for i in range(len(sorted_keys)) + ] + ) + else: + edge_id_array = torch.concat( + [ + torch.arange( + self.__edge_indices[et].shape[1], + dtype=torch.int64, + device="cuda", + ) + for et in sorted_keys + ] + ) + + else: + # single GPU + edge_id_array = torch.concat( + [ + torch.arange( + self.__edge_indices[et].shape[1], + dtype=torch.int64, + device="cuda", + ) + for et in sorted_keys + ] + ) + + return { + "src": edge_index[0], + "dst": edge_index[1], + "etp": edge_type_array, + "eid": edge_id_array, + } + @property def is_homogeneous(self): return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <=1 - + + @property + def idtype(self): + return torch.int64 + @property - def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: + def _resource_handle(self): + if self.__handle is None: + if self.is_multi_gpu: + self.__handle = pylibcugraph.ResourceHandle( + cugraph_comms_get_raft_handle().getHandle() + ) + else: + self.__handle = pylibcugraph.ResourceHandle() + return self.__handle + + def _graph(self, direction:str) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: + """ + Gets the pylibcugraph Graph object with edges pointing in the given direction + (i.e. 'out' is standard, 'in' is reverse). + """ + + if direction not in ['out','in']: + raise ValueError(f"Invalid direction {direction} (expected 'in' or 'out').") + + graph_properties = pylibcugraph.GraphProperties( + is_multigraph=True, is_symmetric=False + ) + + if self.__graph[1] != direction: + self.__graph = None + if self.__graph is None: + src_col, dst_col = ( + ('src','dst') if direction == 'out' + else ('dst','src') + ) edgelist_dict = self.__get_edgelist() if self.is_multi_gpu: @@ -348,30 +525,103 @@ def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: world_size = torch.distributed.get_world_size() vertices_array = cupy.arange( - sum(self._num_vertices().values()), dtype="int64" + self.num_nodes(), dtype="int64" ) vertices_array = cupy.array_split(vertices_array, world_size)[rank] - self.__graph = pylibcugraph.MGGraph( + self.__graph = (pylibcugraph.MGGraph( self._resource_handle, graph_properties, - [cupy.asarray(edgelist_dict["src"]).astype("int64")], - [cupy.asarray(edgelist_dict["dst"]).astype("int64")], + [cupy.asarray(edgelist_dict[src_col]).astype("int64")], + [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], vertices_array=[vertices_array], edge_id_array=[cupy.asarray(edgelist_dict["eid"])], edge_type_array=[cupy.asarray(edgelist_dict["etp"])], - ) + ), direction) else: - self.__graph = pylibcugraph.SGGraph( + self.__graph = (pylibcugraph.SGGraph( self._resource_handle, graph_properties, - cupy.asarray(edgelist_dict["src"]).astype("int64"), - cupy.asarray(edgelist_dict["dst"]).astype("int64"), + cupy.asarray(edgelist_dict[src_col]).astype("int64"), + cupy.asarray(edgelist_dict[dst_col]).astype("int64"), vertices_array=cupy.arange( - sum(self._num_vertices().values()), dtype="int64" + self.num_nodes(), dtype="int64" ), edge_id_array=cupy.asarray(edgelist_dict["eid"]), edge_type_array=cupy.asarray(edgelist_dict["etp"]), - ) + ), direction) + + return self.__graph[0] + + def _get_n_emb(self, ntype:str, emb_name: str, u:Union[str, TensorType]): + """ + Gets the embedding of a single node type. + Unlike DGL, this function takes the string node + type name instead of an integer id. + + Parameters + ---------- + ntype: str + The node type to get the embedding of. + emb_name: str + The embedding name of the embedding to get. + u: Union[str, TensorType] + Nodes to get the representation of, or ALL + to get the representation of all nodes of + the given type. + """ + + if dgl.base.is_all(u): + u = torch.arange(self.num_nodes(ntype), dtype=torch.int64) + + return self.__ndata_storage[ntype, emb_name].fetch( + _cast_to_torch_tensor(u), + 'cuda' + ) + + def _set_n_emb(self, ntype:str, u:Union[str, TensorType], kv: Dict[str, TensorType]): + """ + Stores or updates the embedding(s) of a single node type. + Unlike DGL, this function takes the string node type name + instead of an integer id. + + The semantics of this function match those of add_nodes + with respect to whether or not the backing feature store + is distributed. + + Parameters + ---------- + ntype: str + The node type to store an embedding of. + u: Union[str, TensorType] + The indices to update, if updating the embedding. + Currently, updating a slice of an embedding is + unsupported, so this should be ALL. + kv: Dict[str, TensorType] + A mapping of embedding names to embedding tensors. + """ - return self.__graph \ No newline at end of file + if not dgl.base.is_all(u): + raise NotImplementedError( + "Updating a slice of an embedding is " + "currently unimplemented in cuGraph-DGL." + ) + + for k, v in kv: + self.__ndata_storage[ntype, k] = self.__ndata_storage_type( + v, **self.__wg_kwargs + ) + + def _pop_n_emb(self, ntype:str, key: str): + return self.__ndata_storage[ntype, key].pop(key) + + def _get_n_emb_keys(self, ntype:str): + return [ + k + for (t, k) in self.__ndata_storage + if ntype == t + ] + + @property + def ndata(self): + \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py new file mode 100644 index 00000000000..39ce44bae0c --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -0,0 +1,101 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from typing import Union, Dict, List + +from cugraph_dgl.typing import TensorType + +class HeteroNodeDataView: + """ + Duck-typed version of DGL's HeteroNodeDataView. + Used for accessing and modifying node features. + """ + + def __init__(self, graph: "cugraph_dgl.Graph", ntype: Union[str, List[str]], nodes: TensorType): + self.__graph = graph + self.__ntype = ntype + self.__nodes = nodes + + @property + def _ntype(self) -> str: + return self.__ntype + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + @property + def _nodes(self) -> TensorType: + return self.__nodes + + def __getitem__(self, key: str): + if isinstance(self._ntype, list): + return { + t: self._graph._get_n_emb(t, key, self._nodes) + for t in self._ntype + } + else: + return self._graph._get_n_emb(self._ntype, key, self._nodes) + + def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): + if isinstance(self._ntype, list): + if not isinstance(val, dict): + raise ValueError( + "There are multiple node types in this view. " + "Expected a dictionary of values." + ) + for t, v in val.items(): + if t not in self._ntype: + raise ValueError("Attempted to modify a type out of view.") + self._graph._set_n_emb(self._ntype, self._nodes, {key: v}) + else: + if isinstance(val, dict): + raise ValueError( + "There is only one node type in this view. " + "Expected a single value tensor." + ) + self._graph._set_n_emb(self._ntype, self._nodes, {key: val}) + + def __delitem__(self, key: str): + if isinstance(self._ntype, list): + for t in self._ntype: + self._graph._pop_n_emb(t, key) + + def _transpose(self, fetch_vals=True): + if isinstance(self._ntype, list): + tr = defaultdict(dict) + for ntype in self._ntype: + for key in self._graph._get_n_emb_keys(ntype): + tr[key][ntype] = self._graph._get_n_emb(ntype, key, self._nodes) if fetch_vals else [] + else: + tr = {} + for key in self._graph._get_n_emb_keys(self._ntype): + tr[key] = self._graph._get_n_emb(ntype, key, self._nodes) if fetch_vals else [] + + return tr + + def __len__(self): + return len(self._transpose(fetch_vals=False)) + + def __iter__(self): + return iter(self._transpose()) + + def keys(self): + return self._transpose(fetch_vals=False).keys() + + def values(self): + return self._transpose().values() + + def __repr__(self): + return repr(self.__transpose(fetch_vals=False)) \ No newline at end of file From 055db0a782c5427d617d1b8fd688b86adfa3bf0d Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 12 Jun 2024 13:42:30 -0700 Subject: [PATCH 07/47] remove unwanted file --- batch=00000.00000000-00000.00000002.parquet | Bin 4768 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 batch=00000.00000000-00000.00000002.parquet diff --git a/batch=00000.00000000-00000.00000002.parquet b/batch=00000.00000000-00000.00000002.parquet deleted file mode 100644 index a9962589e3be43900ec86c90d6eec24949d444cb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4768 zcmb`LYfO_@7{}kXP+l(xMc&d%(OTsN6)QKZD0sh4L_nDr&{F7IsP+PF0Rb6;c$q_F z&grIG#&n5^x-IS!qh^WIEnBuF+e{V{jf)?)eb}UHQAUsO0>_+r5)7-&rp$`mIVV z>;CYcjhlb}xT-JV9KV0n)uF-c(fYk-t1eb&etY5ImyHvuZ|n(P`TEw6XS#m4k#^2B zI;FC0$%KCEnK>!Wrjq+VEln!vy?_0$4s{$m&7h98QsIbNexVgO@d?la_Jj0dnXumk z=>^xot_98DDzE~a2kr;ULCSzZ*b+Di8~`bk4!{n8d%@ixWtts!*7XMrohO0WZL z1t)_`!DAp*R2l4(AXV5w*bBkU;6`vC*aI#G4}))m)nEy@6Kn&!KsQL0*9)PISK>T`uK$_S~5p>3+pTQSr)`KZb>#R+U9neXEtxB4mI>l zu+P|$Q784~?3^=i{Pe71e$)Kj6~Ts{c8{@?jZK=nBg3#}oIZQyUd@42+w3x3N8+lb zxqWp~Z>M&NyvVHM@^+?c%O zZF{^$U7IQvH!ZL&OM^eQn**@{B{v z`^J9Id^fPH?B`nj^5UNTHM`3*?-XXgH+jQ#ZS(gP$%oU55?yk9neR^$SPT2+lHU)BW%8V<%%WJ!%&0SzsPWkHef+3YAeV zlSa6tyqJfjYe2SioG@8Wpxe?M;$0s_{CSl;0A)j_v+{-S2V*r4ZKYJdQfaV{h~~J=vcckbEuPIw@dh z%ZG<}dG9vK-Ow1~^aC2B$=EM)CLzw48K*VUsRVgQe@)Apcq~rjO*Sr@C!H{mpm91K zXUt)07bmFY%9FuyBfQbI6nBqO^^a1;M`_}B+OWTq2-|b`Q!A$%4vgZlMqeZTkuH9f zF8d41k0Jc?|IQ@`RPpd2UjHCo{M9TDnM}5as%)M_@Hq~}LdLO>J^3bz+gl0xTM6PZ zOR?>QI-93_ebT3iMU1`oaKvP1(T}M(Jgd^5Rf+ee729@evU$qY=WrM^3lyKN`d0D8 zjAGk9=8;nPSsr=izDOR$N4`E^9Ly=Uu_QMRUr2vBhvkpRGcrvznhe?|htKH<8ZPmj z7PrqS?X=hXJuO~e&@#_ly*0;dQA~SyOp53Yo1GS)!z;1r7Nu@1Y>~??d7Sp>eq}Se zTtM;yw=ZNXl;^W@Xljq{_DUg#(-Cs8xxN;Urz^)iycy~;dbbe+FM7N1xudT4SQF&4 zL~mJ;AM=Jv_p@%uN+J4o=#1VbRv=eXe4o9|2;qvgVZ`tY+&W4hqqmPaAo7raCPicU`~{XqRiTsofC> z_}k8 PF&o#Gz;Wg5|EB%}$$;L! From 1f76898f6eb060e7a9df7c5526952a12c8ca5124 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 13 Jun 2024 14:30:11 -0700 Subject: [PATCH 08/47] revert devcontainer change --- .devcontainer/cuda11.8-pip/devcontainer.json | 1 - .devcontainer/cuda12.2-pip/devcontainer.json | 1 - python/cugraph-dgl/cugraph_dgl/graph.py | 348 +++++++++++++++---- python/cugraph-dgl/cugraph_dgl/view.py | 231 +++++++++++- 4 files changed, 499 insertions(+), 82 deletions(-) diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 523933c34fb..f044aa8fbbc 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -25,7 +25,6 @@ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 9b90398a29c..4a4bea7bbb0 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -25,7 +25,6 @@ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index c6978af877b..2bfa75e091c 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -22,7 +22,12 @@ from cugraph_dgl.typing import TensorType from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor from cugraph_dgl.features import WholeFeatureStore - +from cugraph_dgl.view import ( + HeteroNodeView, + HeteroNodeDataView, + HeteroEdgeView, + HeteroEdgeDataView, +) # Have to use import_optional even though these are required @@ -34,6 +39,7 @@ HOMOGENEOUS_NODE_TYPE = "n" HOMOGENEOUS_EDGE_TYPE = (HOMOGENEOUS_NODE_TYPE, "e", HOMOGENEOUS_NODE_TYPE) + class Graph: """ cuGraph-backed duck-typed version of dgl.DGLGraph that distributes @@ -205,7 +211,7 @@ def add_nodes( self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( feature_tensor, **self.__wg_kwargs ) - + self.__graph = None self.__vertex_offsets = None @@ -291,11 +297,11 @@ def add_edges( num_edges = self.__edge_indices[dgl_can_edge_type].shape[1] if self.is_multi_gpu: - num_edges = torch.tensor([num_edges], device='cuda', dtype=torch.int64) + num_edges = torch.tensor([num_edges], device="cuda", dtype=torch.int64) torch.distributed.all_reduce(num_edges, op=torch.distributed.ReduceOp.SUM) - + self.__num_edges_dict[dgl_can_edge_type] = int(num_edges) - + self.__graph = None self.__vertex_offsets = None @@ -314,19 +320,19 @@ def number_of_nodes(self, ntype: str = None) -> int: Alias for num_nodes. """ return self.num_nodes(ntype=ntype) - - def num_edges(self, etype: Union[str, Tuple[str, str, str]]=None) -> int: + + def num_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: """ Returns the number of edges of etype, or if etype is not provided, the total number of edges in the graph. """ if etype is None: return sum(self.__num_edges_dict.values()) - + etype = self.to_canonical_etype(etype) return self.__num_edges_dict[etype] - - def number_of_edges(self, etype: Union[str, Tuple[str, str, str]]=None) -> int: + + def number_of_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: """ Alias for num_edges. """ @@ -346,10 +352,7 @@ def etypes(self) -> List[str]: (the second element of the canonical edge type tuple). """ - return [ - et[1] - for et in self.__num_edges_dict.keys() - ] + return [et[1] for et in self.__num_edges_dict.keys()] @property def canonical_etypes(self) -> List[str]: @@ -480,12 +483,12 @@ def __get_edgelist(self) -> Dict[str, "torch.Tensor"]: @property def is_homogeneous(self): - return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <=1 - + return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <= 1 + @property def idtype(self): return torch.int64 - + @property def _resource_handle(self): if self.__handle is None: @@ -497,13 +500,15 @@ def _resource_handle(self): self.__handle = pylibcugraph.ResourceHandle() return self.__handle - def _graph(self, direction:str) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: + def _graph( + self, direction: str + ) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: """ Gets the pylibcugraph Graph object with edges pointing in the given direction (i.e. 'out' is standard, 'in' is reverse). """ - if direction not in ['out','in']: + if direction not in ["out", "in"]: raise ValueError(f"Invalid direction {direction} (expected 'in' or 'out').") graph_properties = pylibcugraph.GraphProperties( @@ -514,46 +519,47 @@ def _graph(self, direction:str) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGr self.__graph = None if self.__graph is None: - src_col, dst_col = ( - ('src','dst') if direction == 'out' - else ('dst','src') - ) + src_col, dst_col = ("src", "dst") if direction == "out" else ("dst", "src") edgelist_dict = self.__get_edgelist() if self.is_multi_gpu: rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() - vertices_array = cupy.arange( - self.num_nodes(), dtype="int64" - ) + vertices_array = cupy.arange(self.num_nodes(), dtype="int64") vertices_array = cupy.array_split(vertices_array, world_size)[rank] - self.__graph = (pylibcugraph.MGGraph( - self._resource_handle, - graph_properties, - [cupy.asarray(edgelist_dict[src_col]).astype("int64")], - [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], - vertices_array=[vertices_array], - edge_id_array=[cupy.asarray(edgelist_dict["eid"])], - edge_type_array=[cupy.asarray(edgelist_dict["etp"])], - ), direction) + self.__graph = ( + pylibcugraph.MGGraph( + self._resource_handle, + graph_properties, + [cupy.asarray(edgelist_dict[src_col]).astype("int64")], + [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], + vertices_array=[vertices_array], + edge_id_array=[cupy.asarray(edgelist_dict["eid"])], + edge_type_array=[cupy.asarray(edgelist_dict["etp"])], + ), + direction, + ) else: - self.__graph = (pylibcugraph.SGGraph( - self._resource_handle, - graph_properties, - cupy.asarray(edgelist_dict[src_col]).astype("int64"), - cupy.asarray(edgelist_dict[dst_col]).astype("int64"), - vertices_array=cupy.arange( - self.num_nodes(), dtype="int64" + self.__graph = ( + pylibcugraph.SGGraph( + self._resource_handle, + graph_properties, + cupy.asarray(edgelist_dict[src_col]).astype("int64"), + cupy.asarray(edgelist_dict[dst_col]).astype("int64"), + vertices_array=cupy.arange(self.num_nodes(), dtype="int64"), + edge_id_array=cupy.asarray(edgelist_dict["eid"]), + edge_type_array=cupy.asarray(edgelist_dict["etp"]), ), - edge_id_array=cupy.asarray(edgelist_dict["eid"]), - edge_type_array=cupy.asarray(edgelist_dict["etp"]), - ), direction) + direction, + ) return self.__graph[0] - def _get_n_emb(self, ntype:str, emb_name: str, u:Union[str, TensorType]): + def _get_n_emb( + self, ntype: str, emb_name: str, u: Union[str, TensorType] + ) -> "torch.Tensor": """ Gets the embedding of a single node type. Unlike DGL, this function takes the string node @@ -569,17 +575,55 @@ def _get_n_emb(self, ntype:str, emb_name: str, u:Union[str, TensorType]): Nodes to get the representation of, or ALL to get the representation of all nodes of the given type. + + Returns + ------- + torch.Tensor + The embedding of the given edge type with the given embedding name. """ if dgl.base.is_all(u): - u = torch.arange(self.num_nodes(ntype), dtype=torch.int64) - + u = torch.arange(self.num_nodes(ntype), dtype=self.idtype) + return self.__ndata_storage[ntype, emb_name].fetch( - _cast_to_torch_tensor(u), - 'cuda' + _cast_to_torch_tensor(u), "cuda" + ) + + def _get_e_emb( + self, etype: Tuple[str, str, str], emb_name: str, u: Union[str, TensorType] + ) -> "torch.Tensor": + """ + Gets the embedding of a single edge type. + Unlike DGL, this function takes the canonical edge type + instead of an integer id. + + Parameters + ---------- + etype: str + The edge type to get the embedding of. + emb_name: str + The embedding name of the embedding to get. + u: Union[str, TensorType] + Edges to get the representation of, or ALL to + get the representation of all nodes of the + given type. + + Returns + ------- + torch.Tensor + The embedding of the given edge type with the given embedding name. + """ + + if dgl.base.is_all(u): + u = torch.arange(self.num_edges(etype), dtype=self.idtype) + + return self.__edata_storage[etype, emb_name].fetch( + _cast_to_torch_tensor(u), "cuda" ) - def _set_n_emb(self, ntype:str, u:Union[str, TensorType], kv: Dict[str, TensorType]): + def _set_n_emb( + self, ntype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] + ) -> None: """ Stores or updates the embedding(s) of a single node type. Unlike DGL, this function takes the string node type name @@ -606,22 +650,200 @@ def _set_n_emb(self, ntype:str, u:Union[str, TensorType], kv: Dict[str, TensorTy "Updating a slice of an embedding is " "currently unimplemented in cuGraph-DGL." ) - + for k, v in kv: self.__ndata_storage[ntype, k] = self.__ndata_storage_type( - v, **self.__wg_kwargs + v, + **self.__wg_kwargs, ) - - def _pop_n_emb(self, ntype:str, key: str): + + def _set_e_emb( + self, etype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] + ) -> None: + """ + Stores or updates the embedding(s) of a single edge type. + Unlike DGL, this function takes the canonical edge type name + instead of an integer id. + + The semantics of this function match those of add_edges + with respect to whether or not the backing feature store + is distributed. + + Parameters + ---------- + etype: str + The edge type to store an embedding of. + u: Union[str, TensorType] + The indices to update, if updating the embedding. + Currently, updating a slice of an embedding is + unsupported, so this should be ALL. + kv: Dict[str, TensorType] + A mapping of embedding names to embedding tensors. + """ + + if not dgl.base.is_all(u): + raise NotImplementedError( + "Updating a slice of an embedding is " + "currently unimplemented in cuGraph-DGL." + ) + + for k, v in kv: + self.__edata_storage[etype, k] = self.__edata_storage_type( + v, + **self.__wg_kwargs, + ) + + def _pop_n_emb(self, ntype: str, key: str) -> "torch.Tensor": + """ + Removes and returns the embedding of the given node + type with the given name. + + Parameters + ---------- + ntype:str + The node type. + key:str + The embedding name. + + Returns + ------- + The removed embedding. + """ return self.__ndata_storage[ntype, key].pop(key) - def _get_n_emb_keys(self, ntype:str): - return [ - k - for (t, k) in self.__ndata_storage - if ntype == t - ] + def _pop_e_emb(self, etype: str, key: str) -> "torch.Tensor": + """ + Removes and returns the embedding of the given edge + type with the given name. + + Parameters + ---------- + etype:str + The node type. + key:str + The embedding name. + + Returns + ------- + torch.Tensor + The removed embedding. + """ + return self.__edata_storage[etype, key].pop(key) + + def _get_n_emb_keys(self, ntype: str) -> List[str]: + """ + Gets a list of the embedding names for a given node + type. + + Parameters + ---------- + ntype: str + The node type to get embedding names for. + + Returns + ------- + List[str] + The list of embedding names for the given node type. + """ + return [k for (t, k) in self.__ndata_storage if ntype == t] + + def _get_e_emb_keys(self, etype: str) -> List[str]: + """ + Gets a list of the embedding names for a given edge + type. + + Parameters + ---------- + etype: str + The edge type to get embedding names for. + + Returns + ------- + List[str] + The list of embedding names for the given edge type. + """ + return [k for (t, k) in self.__ndata_storage if etype == t] + + def all_edges( + self, + form="uv", + order="eid", + etype: Union[str, Tuple[str, str, str]] = None, + device: Union[str, int, "torch.device"] = "cpu", + ): + """ + Returns all edges with the specified edge type. + cuGraph-DGL currently only supports 'eid' format and + 'eid' order. + + Parameters + ---------- + form: str (optional, default='uv') + The format to return ('uv', 'eid', 'all'). + cuGraph-DGL currently only supports 'eid'. + order: str (optional, default='eid') + The order to return edges in ('eid', 'srcdst') + cuGraph-DGL currently only supports 'eid'. + etype: Union[str, Tuple[str, str, str]] (optional, default=None) + The edge type to get. Not required if this is + a homogeneous graph. Can be the relation type if the + relation type is unique, or the canonical edge type. + device: Union[str, int, torch.device] (optional, default='cpu') + The device where returned edges should be stored + ('cpu', 'cuda', or device id). + """ + + if form != "eid": + raise NotImplementedError("cuGraph-DGL only supports eid format.") + + if order != "eid": + raise NotImplementedError("cugraph-DGL only supports eid order.") + + if etype is None and len(self.canonical_etypes) > 1: + raise ValueError("Edge type is required for heterogeneous graphs.") + + etype = self.to_canonical_etype(etype) + return torch.arange( + 0, + self.__num_edges_dict[etype], + dtype=self.idtype, + device=device, + ) + + @property + def ndata(self) -> HeteroNodeDataView: + """ + Returns a view of the node data in this graph which can be used to + access or modify node features. + """ + + if len(self.ntypes) == 1: + ntype = self.ntypes[0] + return HeteroNodeDataView(self, ntype, dgl.base.ALL) + + return HeteroNodeDataView(self, self.ntypes, dgl.base.ALL) + + @property + def edata(self) -> HeteroEdgeDataView: + """ + Returns a view of the edge data in this graph which can be used to + access or modify edge features. + """ + if len(self.canonical_etypes) == 1: + return HeteroEdgeDataView(self, None, dgl.base.ALL) + + return HeteroEdgeDataView(self, self.canonical_etypes, dgl.base.ALL) + + @property + def nodes(self) -> HeteroNodeView: + """ + Returns a view of the nodes in this graph. + """ + return HeteroNodeView(self) @property - def ndata(self): - \ No newline at end of file + def edges(self) -> HeteroEdgeView: + """ + Returns a view of the edges in this graph. + """ + return HeteroEdgeView(self) diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index 39ce44bae0c..49d8d2f69b9 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -12,17 +12,127 @@ # limitations under the License. from collections import defaultdict -from typing import Union, Dict, List +from collections.abc import MutableMapping +from typing import Union, Dict, List, Tuple +from cugraph.utilities.utils import import_optional + +import cugraph_dgl from cugraph_dgl.typing import TensorType -class HeteroNodeDataView: +torch = import_optional("torch") +dgl = import_optional("dgl") + + +class HeteroEdgeDataView(MutableMapping): + """ + Duck-typed version of DGL's HeteroEdgeDataView. + Used for accessing and modifying edge features. + """ + + def __init__( + self, + graph: "cugraph_dgl.Graph", + etype: Union[Tuple[str, str, str], List[Tuple[str, str, str]]], + edges: TensorType, + ): + self.__graph = graph + self.__etype = etype + self.__edges = edges + + @property + def _etype(self) -> Tuple[str, str, str]: + return self.__etype + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + @property + def _edges(self) -> TensorType: + return self.__edges + + def __getitem__(self, key: str): + if isinstance(self._etype, list): + return {t: self._graph._get_e_emb(t, key, self._nodes) for t in self._etype} + + return self._graph._get_e_emb(self._etype, key, self._nodes) + + def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): + if isinstance(self._etype, list): + if not isinstance(val, dict): + raise ValueError( + "There are multiple edge types in this view. " + "Expected a dictionary of values." + ) + for t, v in val.items(): + if t not in self._etype: + raise ValueError("Attempted to modify a type out of view.") + self._graph.set_e_emb(t, self._edges, {key: v}) + else: + if isinstance(val, dict): + raise ValueError( + "There is only one edge type in this view. " + "Expected a single tensor." + ) + self._graph.set_e_emb(self._etype, self._edges, {key: v}) + + def __delitem__(self, key: str): + if isinstance(self._etype, list): + for t in self._etype: + self._graph.pop_e_emb(t, key) + else: + self._graph.pop_e_emb(self._etype, key) + + def _transpose(self, fetch_vals=True): + if isinstance(self._etype, list): + tr = defaultdict(dict) + for etype in self._etype: + for key in self._graph._get_e_emb_keys(etype): + tr[key][etype] = ( + self._graph._get_e_emb(etype, key, self._edges) + if fetch_vals + else [] + ) + else: + tr = {} + for key in self._graph._get_e_emb_keys(self._etype): + tr[key] = ( + self._graph._get_e_emb(self._etype, key, self._edges) + if fetch_vals + else [] + ) + + return tr + + def __len__(self): + return len(self._transpose(fetch_vals=False)) + + def __iter__(self): + return iter(self._transpose()) + + def keys(self): + return self._transpose(fetch_vals=False).keys() + + def values(self): + return self._transpose().values() + + def __repr__(self): + return repr(self.__transpose(fetch_vals=False)) + + +class HeteroNodeDataView(MutableMapping): """ Duck-typed version of DGL's HeteroNodeDataView. Used for accessing and modifying node features. """ - def __init__(self, graph: "cugraph_dgl.Graph", ntype: Union[str, List[str]], nodes: TensorType): + def __init__( + self, + graph: "cugraph_dgl.Graph", + ntype: Union[str, List[str]], + nodes: TensorType, + ): self.__graph = graph self.__ntype = ntype self.__nodes = nodes @@ -30,24 +140,21 @@ def __init__(self, graph: "cugraph_dgl.Graph", ntype: Union[str, List[str]], nod @property def _ntype(self) -> str: return self.__ntype - + @property def _graph(self) -> "cugraph_dgl.Graph": return self.__graph - + @property def _nodes(self) -> TensorType: return self.__nodes def __getitem__(self, key: str): if isinstance(self._ntype, list): - return { - t: self._graph._get_n_emb(t, key, self._nodes) - for t in self._ntype - } + return {t: self._graph._get_n_emb(t, key, self._nodes) for t in self._ntype} else: return self._graph._get_n_emb(self._ntype, key, self._nodes) - + def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): if isinstance(self._ntype, list): if not isinstance(val, dict): @@ -58,7 +165,7 @@ def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): for t, v in val.items(): if t not in self._ntype: raise ValueError("Attempted to modify a type out of view.") - self._graph._set_n_emb(self._ntype, self._nodes, {key: v}) + self._graph._set_n_emb(t, self._nodes, {key: v}) else: if isinstance(val, dict): raise ValueError( @@ -71,18 +178,28 @@ def __delitem__(self, key: str): if isinstance(self._ntype, list): for t in self._ntype: self._graph._pop_n_emb(t, key) + else: + self._graph.pop_n_emb(self._ntype, key) def _transpose(self, fetch_vals=True): if isinstance(self._ntype, list): tr = defaultdict(dict) for ntype in self._ntype: for key in self._graph._get_n_emb_keys(ntype): - tr[key][ntype] = self._graph._get_n_emb(ntype, key, self._nodes) if fetch_vals else [] + tr[key][ntype] = ( + self._graph._get_n_emb(ntype, key, self._nodes) + if fetch_vals + else [] + ) else: tr = {} for key in self._graph._get_n_emb_keys(self._ntype): - tr[key] = self._graph._get_n_emb(ntype, key, self._nodes) if fetch_vals else [] - + tr[key] = ( + self._graph._get_n_emb(self._ntype, key, self._nodes) + if fetch_vals + else [] + ) + return tr def __len__(self): @@ -90,12 +207,92 @@ def __len__(self): def __iter__(self): return iter(self._transpose()) - + def keys(self): return self._transpose(fetch_vals=False).keys() def values(self): return self._transpose().values() - + def __repr__(self): - return repr(self.__transpose(fetch_vals=False)) \ No newline at end of file + return repr(self.__transpose(fetch_vals=False)) + + +class HeteroEdgeView: + """ + Duck-typed version of DGL's HeteroEdgeView. + """ + + def __init__(self, graph): + self.__graph = graph + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + def __getitem__(self, key): + if isinstance(key, slice): + if not (key.start is None and key.stop is None and key.stop is None): + raise ValueError("Only full slices are supported in DGL.") + edges = dgl.base.ALL + etype = None + elif key is None: + edges = dgl.base.ALL + etype = None + elif isinstance(key, tuple): + if len(key) == 3: + edges = dgl.base.ALL + etype = key + else: + edges = key + etype = None + elif isinstance(key, str): + edges = dgl.base.ALL + etype = key + else: + edges = key + etype = None + + return HeteroEdgeDataView( + graph=self.__graph, + etype=etype, + edges=edges, + ) + + def __call__(self, *args, **kwargs): + return self.__graph.all_edges(*args, **kwargs) + + +class HeteroNodeView: + """ + Duck-typed version of DGL's HeteroNodeView. + """ + + def __init__(self, graph: "cugraph_dgl.Graph"): + self.__graph = graph + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + def __getitem__(self, key): + if isinstance(key, slice): + if not (key.start is None and key.stop is None and key.stop is None): + raise ValueError("Only full slices are supported in DGL.") + nodes = dgl.base.ALL + ntype = None + elif isinstance(key, tuple): + nodes, ntype = key + elif key is None or isinstance(key, str): + nodes = dgl.base.ALL + ntype = key + else: + nodes = key + ntype = None + + return HeteroNodeDataView(graph=self.__graph, ntype=ntype, nodes=nodes) + + def __call__(self, ntype=None): + return torch.arange( + 0, self.__graph.num_nodes(ntype), dtype=self.__graph.idtype, device="cuda" + ) From 927ee0908680aad90c887d1f69f5410e7c9a31ae Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 14 Jun 2024 12:43:43 -0700 Subject: [PATCH 09/47] tests, bugfixes, resolve indexing problem (sort of) --- python/cugraph-dgl/cugraph_dgl/graph.py | 26 ++++-- .../{ => cugraph_dgl}/tests/__init__.py | 0 .../{ => cugraph_dgl}/tests/conftest.py | 0 .../tests/mg/test_dataloader.py | 0 .../tests/nn/test_gatconv.py | 0 .../tests/nn/test_gatv2conv.py | 0 .../tests/nn/test_relgraphconv.py | 0 .../tests/nn/test_sageconv.py | 0 .../tests/nn/test_sparsegraph.py | 0 .../tests/nn/test_transformerconv.py | 0 .../tests/test_cugraph_storage.py | 0 .../tests/test_dataloader.py | 0 .../{ => cugraph_dgl}/tests/test_dataset.py | 0 .../tests/test_from_dgl_heterograph.py | 0 .../cugraph_dgl/tests/test_graph.py | 91 +++++++++++++++++++ .../{ => cugraph_dgl}/tests/test_utils.py | 0 .../{ => cugraph_dgl}/tests/utils.py | 0 .../utils/cugraph_conversion_utils.py | 12 +-- python/cugraph-dgl/cugraph_dgl/view.py | 12 +-- 19 files changed, 121 insertions(+), 20 deletions(-) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/__init__.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/conftest.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/mg/test_dataloader.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_gatconv.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_gatv2conv.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_relgraphconv.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_sageconv.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_sparsegraph.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/nn/test_transformerconv.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/test_cugraph_storage.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/test_dataloader.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/test_dataset.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/test_from_dgl_heterograph.py (100%) create mode 100644 python/cugraph-dgl/cugraph_dgl/tests/test_graph.py rename python/cugraph-dgl/{ => cugraph_dgl}/tests/test_utils.py (100%) rename python/cugraph-dgl/{ => cugraph_dgl}/tests/utils.py (100%) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 2bfa75e091c..142bf483cc3 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -128,11 +128,11 @@ def to_canonical_etype( self, etype: Union[str, Tuple[str, str, str]] ) -> Tuple[str, str, str]: if etype is None: - if len(self.__edge_indices.keys(leaves_only=True, include_nested=True)) > 1: + if len(self.canonical_etypes) > 1: raise ValueError("Edge type is required for heterogeneous graphs.") return HOMOGENEOUS_EDGE_TYPE - if isinstance(etype, Tuple[str, str, str]): + if isinstance(etype, tuple) and len(etype) == 3: return etype for src_type, rel_type, dst_type in self.__edge_indices.keys( @@ -209,7 +209,7 @@ def add_nodes( for feature_name, feature_tensor in data.items(): self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( - feature_tensor, **self.__wg_kwargs + _cast_to_torch_tensor(feature_tensor), **self.__wg_kwargs ) self.__graph = None @@ -228,7 +228,7 @@ def __check_node_ids(self, ntype: str, ids: TensorType): The tensor of ids being validated. """ if ntype in self.__num_nodes_dict: - if ids.max() + 1 > self.__num_nodes(ntype): + if ids.max() + 1 > self.num_nodes(ntype): raise ValueError( f"input tensor contains invalid node ids for type {ntype}" ) @@ -293,7 +293,9 @@ def add_edges( for attr_name, attr_tensor in data.items(): self.__edata_storage[ dgl_can_edge_type, attr_name - ] = self.__edata_storage_type(attr_tensor, **self.__wg_kwargs) + ] = self.__edata_storage_type( + _cast_to_torch_tensor(attr_tensor), **self.__wg_kwargs + ) num_edges = self.__edge_indices[dgl_can_edge_type].shape[1] if self.is_multi_gpu: @@ -515,7 +517,7 @@ def _graph( is_multigraph=True, is_symmetric=False ) - if self.__graph[1] != direction: + if self.__graph is not None and self.__graph[1] != direction: self.__graph = None if self.__graph is None: @@ -582,8 +584,14 @@ def _get_n_emb( The embedding of the given edge type with the given embedding name. """ + if ntype is None: + if len(self.ntypes) == 1: + ntype = HOMOGENEOUS_NODE_TYPE + else: + raise ValueError("Must provide the node type for a heterogeneous graph") + if dgl.base.is_all(u): - u = torch.arange(self.num_nodes(ntype), dtype=self.idtype) + u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu") return self.__ndata_storage[ntype, emb_name].fetch( _cast_to_torch_tensor(u), "cuda" @@ -614,8 +622,10 @@ def _get_e_emb( The embedding of the given edge type with the given embedding name. """ + etype = self.to_canonical_etype(etype) + if dgl.base.is_all(u): - u = torch.arange(self.num_edges(etype), dtype=self.idtype) + u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu") return self.__edata_storage[etype, emb_name].fetch( _cast_to_torch_tensor(u), "cuda" diff --git a/python/cugraph-dgl/tests/__init__.py b/python/cugraph-dgl/cugraph_dgl/tests/__init__.py similarity index 100% rename from python/cugraph-dgl/tests/__init__.py rename to python/cugraph-dgl/cugraph_dgl/tests/__init__.py diff --git a/python/cugraph-dgl/tests/conftest.py b/python/cugraph-dgl/cugraph_dgl/tests/conftest.py similarity index 100% rename from python/cugraph-dgl/tests/conftest.py rename to python/cugraph-dgl/cugraph_dgl/tests/conftest.py diff --git a/python/cugraph-dgl/tests/mg/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/mg/test_dataloader.py similarity index 100% rename from python/cugraph-dgl/tests/mg/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/mg/test_dataloader.py diff --git a/python/cugraph-dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_gatconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py diff --git a/python/cugraph-dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_gatv2conv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py diff --git a/python/cugraph-dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_relgraphconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py diff --git a/python/cugraph-dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_sageconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py diff --git a/python/cugraph-dgl/tests/nn/test_sparsegraph.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_sparsegraph.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_transformerconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py diff --git a/python/cugraph-dgl/tests/test_cugraph_storage.py b/python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py similarity index 100% rename from python/cugraph-dgl/tests/test_cugraph_storage.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py diff --git a/python/cugraph-dgl/tests/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/test_dataloader.py similarity index 100% rename from python/cugraph-dgl/tests/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_dataloader.py diff --git a/python/cugraph-dgl/tests/test_dataset.py b/python/cugraph-dgl/cugraph_dgl/tests/test_dataset.py similarity index 100% rename from python/cugraph-dgl/tests/test_dataset.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_dataset.py diff --git a/python/cugraph-dgl/tests/test_from_dgl_heterograph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py similarity index 100% rename from python/cugraph-dgl/tests/test_from_dgl_heterograph.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py new file mode 100644 index 00000000000..966d51d1d66 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -0,0 +1,91 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import cugraph_dgl +import pylibcugraph +import cupy +import numpy as np + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_homogeneous_graph(direction): + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + wgt = np.random.random((len(df),)) + + graph = cugraph_dgl.Graph() + num_nodes = max(df.src.max(), df.dst.max()) + 1 + node_x = np.random.random((num_nodes,)) + + graph.add_nodes( + num_nodes, data={"num": torch.arange(num_nodes, dtype=torch.int64), "x": node_x} + ) + graph.add_edges(df.src, df.dst, {"weight": wgt}) + plc_dgl_graph = graph._graph(direction=direction) + + assert graph.num_nodes() == num_nodes + assert graph.num_edges() == len(df) + assert graph.is_homogeneous + assert not graph.is_multi_gpu + + assert ( + graph.nodes() == torch.arange(num_nodes, dtype=torch.int64, device="cuda") + ).all() + assert (graph.nodes[None]["x"] == torch.as_tensor(node_x, device="cuda")).all() + assert ( + graph.nodes[None]["num"] + == torch.arange(num_nodes, dtype=torch.int64, device="cuda") + ).all() + + assert ( + graph.edges("eid", device="cuda") + == torch.arange(len(df), dtype=torch.int64, device="cuda") + ).all() + assert (graph.edges[None]["weight"] == torch.as_tensor(wgt, device="cuda")).all() + + plc_expected_graph = pylibcugraph.SGGraph( + pylibcugraph.ResourceHandle(), + pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), + df.src if direction == "out" else df.dst, + df.dst if direction == "out" else df.src, + vertices_array=cupy.arange(num_nodes, dtype="int64"), + ) + + # Do the expensive check to make sure this test fails if an invalid + # graph is constructed. + v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( + pylibcugraph.ResourceHandle(), + plc_dgl_graph, + source_vertices=cupy.arange(num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( + pylibcugraph.ResourceHandle(), + plc_expected_graph, + source_vertices=cupy.arange(num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + assert (v_actual == v_exp).all() + assert (d_in_actual == d_in_exp).all() + assert (d_out_actual == d_out_exp).all() diff --git a/python/cugraph-dgl/tests/test_utils.py b/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py similarity index 100% rename from python/cugraph-dgl/tests/test_utils.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_utils.py diff --git a/python/cugraph-dgl/tests/utils.py b/python/cugraph-dgl/cugraph_dgl/tests/utils.py similarity index 100% rename from python/cugraph-dgl/tests/utils.py rename to python/cugraph-dgl/cugraph_dgl/tests/utils.py diff --git a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py index 7ae1cba0263..2ba04bd916f 100644 --- a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py +++ b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,7 +13,7 @@ # Utils to convert b/w dgl heterograph to cugraph GraphStore from __future__ import annotations -from typing import Dict, Tuple, Union, List +from typing import Dict, Tuple, Union from cugraph_dgl.typing import TensorType @@ -124,7 +124,7 @@ def _cast_to_torch_tensor(t: TensorType) -> "torch.Tensor": if isinstance(t, torch.Tensor): return t elif isinstance(t, (cp.ndarray, cudf.Series)): - return torch.as_tensor(t, device='cuda') - elif isinstance(t, pd.Series, np.ndarray): - return torch.as_tensor(t, device='cpu') - return torch.as_tensor(t) \ No newline at end of file + return torch.as_tensor(t, device="cuda") + elif isinstance(t, (pd.Series, np.ndarray)): + return torch.as_tensor(t, device="cpu") + return torch.as_tensor(t) diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index 49d8d2f69b9..2bd4c1f2540 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -54,11 +54,11 @@ def _edges(self) -> TensorType: def __getitem__(self, key: str): if isinstance(self._etype, list): - return {t: self._graph._get_e_emb(t, key, self._nodes) for t in self._etype} + return {t: self._graph._get_e_emb(t, key, self._edges) for t in self._etype} - return self._graph._get_e_emb(self._etype, key, self._nodes) + return self._graph._get_e_emb(self._etype, key, self._edges) - def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): + def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._etype, list): if not isinstance(val, dict): raise ValueError( @@ -118,7 +118,7 @@ def values(self): return self._transpose().values() def __repr__(self): - return repr(self.__transpose(fetch_vals=False)) + return repr(self._transpose(fetch_vals=False)) class HeteroNodeDataView(MutableMapping): @@ -155,7 +155,7 @@ def __getitem__(self, key: str): else: return self._graph._get_n_emb(self._ntype, key, self._nodes) - def __setitem__(self, key: str, val: Union[TensorType, Dict[TensorType]]): + def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._ntype, list): if not isinstance(val, dict): raise ValueError( @@ -215,7 +215,7 @@ def values(self): return self._transpose().values() def __repr__(self): - return repr(self.__transpose(fetch_vals=False)) + return repr(self._transpose(fetch_vals=False)) class HeteroEdgeView: From 68129d999b063ccc12ee7d5374e8d31dd608cf51 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 25 Jun 2024 13:26:01 -0700 Subject: [PATCH 10/47] add heteogeneous tests --- python/cugraph-dgl/cugraph_dgl/graph.py | 46 ++++++-- ...st_dataloader.py => test_dataloader_mg.py} | 0 .../cugraph_dgl/tests/test_graph.py | 102 ++++++++++++++++++ .../cugraph_dgl/tests/test_graph_mg.py | 0 python/cugraph-dgl/cugraph_dgl/view.py | 5 +- 5 files changed, 141 insertions(+), 12 deletions(-) rename python/cugraph-dgl/cugraph_dgl/tests/{mg/test_dataloader.py => test_dataloader_mg.py} (100%) create mode 100644 python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 142bf483cc3..02abc9bffe0 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -287,7 +287,7 @@ def add_edges( _cast_to_torch_tensor(u), _cast_to_torch_tensor(v), ] - ) + ).to(self.idtype) if data is not None: for attr_name, attr_tensor in data.items(): @@ -790,7 +790,7 @@ def all_edges( ---------- form: str (optional, default='uv') The format to return ('uv', 'eid', 'all'). - cuGraph-DGL currently only supports 'eid'. + order: str (optional, default='eid') The order to return edges in ('eid', 'srcdst') cuGraph-DGL currently only supports 'eid'. @@ -803,9 +803,6 @@ def all_edges( ('cpu', 'cuda', or device id). """ - if form != "eid": - raise NotImplementedError("cuGraph-DGL only supports eid format.") - if order != "eid": raise NotImplementedError("cugraph-DGL only supports eid order.") @@ -813,12 +810,39 @@ def all_edges( raise ValueError("Edge type is required for heterogeneous graphs.") etype = self.to_canonical_etype(etype) - return torch.arange( - 0, - self.__num_edges_dict[etype], - dtype=self.idtype, - device=device, - ) + + if form == 'eid': + return torch.arange( + 0, + self.__num_edges_dict[etype], + dtype=self.idtype, + device=device, + ) + else: + if self.is_multi_gpu: + src = torch.empty((self.__num_edges_dict[etype], ), dtype=self.idtype, device='cuda') + dst = torch.empty((self.__num_edges_dict[etype], ), dtype=self.idtype, device='cuda') + + h1 = torch.distributed.all_gather_into_tensor(src, self.__edge_indices[etype][0].cuda(), async_op=True) + h2 = torch.distributed.all_gather_into_tensor(dst, self.__edge_indices[etype][1].cuda(), async_op=True) + + h1.wait() + h2.wait() + if form == 'uv': + return src.to(device), dst.to(device) + elif form == 'all': + return src.to(device), dst.to(device), torch.arange(self.__num_edges_dict[etype], dtype=self.idtype,device=device) + else: + raise ValueError(f"Invalid form {form}") + + else: + eix = self.__edge_indices[etype].to(device) + if form == 'uv': + return eix[0], eix[1] + elif form == 'all': + return eix[0], eix[1], torch.arange(self.__num_edges_dict[etype], dtype=self.idtype,device=device) + else: + raise ValueError(f"Invalid form {form}") @property def ndata(self) -> HeteroNodeDataView: diff --git a/python/cugraph-dgl/cugraph_dgl/tests/mg/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/test_dataloader_mg.py similarity index 100% rename from python/cugraph-dgl/cugraph_dgl/tests/mg/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_dataloader_mg.py diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py index 966d51d1d66..89a74ff073c 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -89,3 +89,105 @@ def test_graph_make_homogeneous_graph(direction): assert (v_actual == v_exp).all() assert (d_in_actual == d_in_exp).all() assert (d_out_actual == d_out_exp).all() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_heterogeneous_graph(direction): + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + wgt = np.random.random((len(df),)) + + graph = cugraph_dgl.Graph() + total_num_nodes = max(df.src.max(), df.dst.max()) + 1 + + num_nodes_group_1 = total_num_nodes // 2 + num_nodes_group_2 = total_num_nodes - num_nodes_group_1 + + node_x_1 = np.random.random((num_nodes_group_1,)) + node_x_2 = np.random.random((num_nodes_group_2,)) + + graph.add_nodes(num_nodes_group_1, {'x':node_x_1}, 'type1') + graph.add_nodes(num_nodes_group_2, {'x':node_x_2}, 'type2') + + edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + + edges_12.dst -= num_nodes_group_1 + edges_21.src -= num_nodes_group_1 + edges_22.dst -= num_nodes_group_1 + edges_22.src -= num_nodes_group_1 + + graph.add_edges(edges_11.src, edges_11.dst, etype=('type1', 'e1', 'type1')) + graph.add_edges(edges_12.src, edges_12.dst, etype=('type1', 'e2', 'type2')) + graph.add_edges(edges_21.src, edges_21.dst, etype=('type2', 'e3', 'type1')) + graph.add_edges(edges_22.src, edges_22.dst, etype=('type2', 'e4', 'type2')) + + assert not graph.is_homogeneous + assert not graph.is_multi_gpu + + # Verify graph.nodes() + assert ( + graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes('type1') == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes('type2') == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") + ).all() + + # Verify graph.edges() + assert((graph.edges('eid',etype=('type1','e1','type1')) == torch.arange(len(edges_11), dtype=torch.int64, device='cuda')).all()) + assert((graph.edges('eid',etype=('type1','e2','type2')) == torch.arange(len(edges_12), dtype=torch.int64, device='cuda')).all()) + assert((graph.edges('eid',etype=('type2','e3','type1')) == torch.arange(len(edges_21), dtype=torch.int64, device='cuda')).all()) + assert((graph.edges('eid',etype=('type2','e4','type2')) == torch.arange(len(edges_22), dtype=torch.int64, device='cuda')).all()) + + # Use sampling call to check graph creation + # This isn't a test of cuGraph sampling with DGL; the options are + # set to verify the graph only. + plc_graph = graph._graph(direction) + sampling_output = pylibcugraph.uniform_neighbor_sample( + pylibcugraph.ResourceHandle(), + plc_graph, + start_list=cupy.arange(total_num_nodes, dtype='int64'), + h_fan_out=np.array([1, 1], dtype='int32'), + with_replacement=False, + do_expensive_check=True, + with_edge_properties=True, + prior_sources_behavior='exclude', + return_dict=True, + ) + + expected_etypes = { + 0: 'e1', + 1: 'e2', + 2: 'e3', + 3: 'e4', + } + expected_offsets = { + 0: (0, 0), + 1: (0, num_nodes_group_1), + 2: (num_nodes_group_1, 0), + 3: (num_nodes_group_1, num_nodes_group_1), + } + if direction == 'in': + src_col = 'minors' + dst_col = 'majors' + else: + src_col = 'majors' + dst_col = 'minors' + + # Looping over the output verifies that all edges are valid + # (and therefore, the graph is valid) + for i, etype in enumerate(sampling_output['edge_type'].tolist()): + eid = int(sampling_output['edge_id'][i]) + + srcs, dsts, eids = graph.edges('all', etype=expected_etypes[etype], device='cpu') + + assert eids[eid] == eid + assert srcs[eid] == int(sampling_output[src_col][i]) - expected_offsets[etype][0] + assert dsts[eid] == int(sampling_output[dst_col][i]) - expected_offsets[etype][1] \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index 2bd4c1f2540..e2bf7c20a29 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -260,7 +260,10 @@ def __getitem__(self, key): ) def __call__(self, *args, **kwargs): - return self.__graph.all_edges(*args, **kwargs) + if 'device' in kwargs: + return self.__graph.all_edges(*args, **kwargs) + + return self.__graph.all_edges(*args, **kwargs, device='cuda') class HeteroNodeView: From 20450a37427881f641bd1b231284f4968be14246 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 26 Jun 2024 13:19:36 -0700 Subject: [PATCH 11/47] testing, fixing graph API --- python/cugraph-dgl/cugraph_dgl/features.py | 2 +- python/cugraph-dgl/cugraph_dgl/graph.py | 35 +- .../cugraph_dgl/tests/test_graph_mg.py | 340 ++++++++++++++++++ 3 files changed, 358 insertions(+), 19 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py index 80885cf01aa..b4ff0049494 100644 --- a/python/cugraph-dgl/cugraph_dgl/features.py +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -76,7 +76,7 @@ def __init__( tensor = tensor.reshape((tensor.shape[0], 1)) wg_tensor = wgth.create_wholememory_tensor( - self.__wg_commm, + self.__wg_comm, memory_type, location, global_shape, diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 02abc9bffe0..00fa9a66be7 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -811,7 +811,7 @@ def all_edges( etype = self.to_canonical_etype(etype) - if form == 'eid': + if form == "eid": return torch.arange( 0, self.__num_edges_dict[etype], @@ -820,27 +820,26 @@ def all_edges( ) else: if self.is_multi_gpu: - src = torch.empty((self.__num_edges_dict[etype], ), dtype=self.idtype, device='cuda') - dst = torch.empty((self.__num_edges_dict[etype], ), dtype=self.idtype, device='cuda') - - h1 = torch.distributed.all_gather_into_tensor(src, self.__edge_indices[etype][0].cuda(), async_op=True) - h2 = torch.distributed.all_gather_into_tensor(dst, self.__edge_indices[etype][1].cuda(), async_op=True) - - h1.wait() - h2.wait() - if form == 'uv': - return src.to(device), dst.to(device) - elif form == 'all': - return src.to(device), dst.to(device), torch.arange(self.__num_edges_dict[etype], dtype=self.idtype,device=device) - else: - raise ValueError(f"Invalid form {form}") + # This can't be done because it requires collective communication. + raise ValueError( + "Calling all_edges in a distributed graph with" + " form 'uv' or 'all' is unsupported." + ) else: eix = self.__edge_indices[etype].to(device) - if form == 'uv': + if form == "uv": return eix[0], eix[1] - elif form == 'all': - return eix[0], eix[1], torch.arange(self.__num_edges_dict[etype], dtype=self.idtype,device=device) + elif form == "all": + return ( + eix[0], + eix[1], + torch.arange( + self.__num_edges_dict[etype], + dtype=self.idtype, + device=device, + ), + ) else: raise ValueError(f"Invalid form {form}") diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py index e69de29bb2d..0dfde6b9715 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -0,0 +1,340 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import cugraph_dgl +import pylibcugraph +import cupy +import numpy as np + +import cudf + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, + cugraph_comms_get_raft_handle, +) + +pylibwholegraph = import_optional("pylibwholegraph") +torch = import_optional("torch") + + +def init_pytorch_worker(rank, world_size, cugraph_id): + import rmm + + rmm.reinitialize( + devices=rank, + ) + + import cupy + + cupy.cuda.Device(rank).use() + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + torch.cuda.set_device(rank) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) + + pylibwholegraph.torch.initialize.init( + rank, + world_size, + rank, + world_size, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) + + +def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): + init_pytorch_worker(rank, world_size, uid) + + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + wgt = np.random.random((len(df),)) + + graph = cugraph_dgl.Graph( + is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" + ) + + # The number of nodes is set globally but features can have + # any distribution across workers as long as they are in order. + global_num_nodes = max(df.src.max(), df.dst.max()) + 1 + node_x = np.array_split(np.arange(global_num_nodes, dtype="int64"), world_size)[ + rank + ] + + # Each worker gets a shuffled, permuted version of the edgelist + df = df.sample(frac=1.0) + df.src = (df.src + rank) % global_num_nodes + df.dst = (df.dst + rank + 1) % global_num_nodes + + graph.add_nodes(global_num_nodes, data={"x": node_x}) + graph.add_edges(df.src, df.dst, {"weight": wgt}) + plc_dgl_graph = graph._graph(direction=direction) + + assert graph.num_nodes() == global_num_nodes + assert graph.num_edges() == len(df) * world_size + assert graph.is_homogeneous + assert graph.is_multi_gpu + + assert ( + graph.nodes() + == torch.arange(global_num_nodes, dtype=torch.int64, device="cuda") + ).all() + ix = torch.arange(len(node_x) * rank, len(node_x) * (rank + 1), dtype=torch.int64) + assert (graph.nodes[ix]["x"] == torch.as_tensor(node_x, device="cuda")).all() + + assert ( + graph.edges("eid", device="cuda") + == torch.arange(world_size * len(df), dtype=torch.int64, device="cuda") + ).all() + ix = torch.arange(len(df) * rank, len(df) * (rank + 1), dtype=torch.int64) + assert (graph.edges[ix]["weight"] == torch.as_tensor(wgt, device="cuda")).all() + + plc_handle = pylibcugraph.ResourceHandle( + cugraph_comms_get_raft_handle().getHandle() + ) + + plc_expected_graph = pylibcugraph.MGGraph( + plc_handle, + pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), + [df.src] if direction == "out" else [df.dst], + [df.dst] if direction == "out" else [df.src], + vertices_array=[ + cupy.array_split(cupy.arange(global_num_nodes, dtype="int64"), world_size)[ + rank + ] + ], + ) + + # Do the expensive check to make sure this test fails if an invalid + # graph is constructed. + v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( + plc_handle, + plc_dgl_graph, + source_vertices=cupy.arange(global_num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( + plc_handle, + plc_expected_graph, + source_vertices=cupy.arange(global_num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + assert (v_actual == v_exp).all() + assert (d_in_actual == d_in_exp).all() + assert (d_out_actual == d_out_exp).all() + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" +) +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_homogeneous_graph_mg(direction): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_graph_make_homogeneous_graph_mg, + args=( + uid, + world_size, + direction, + ), + nprocs=world_size, + ) + + +def run_test_graph_make_heterogeneous_graph_mg(rank, uid, world_size, direction): + init_pytorch_worker(rank, world_size, uid) + + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + + graph = cugraph_dgl.Graph(is_multi_gpu=True) + total_num_nodes = max(df.src.max(), df.dst.max()) + 1 + + # Each worker gets a shuffled, permuted version of the edgelist + df = df.sample(frac=1.0) + df.src = (df.src + rank) % total_num_nodes + df.dst = (df.dst + rank + 1) % total_num_nodes + + num_nodes_group_1 = total_num_nodes // 2 + num_nodes_group_2 = total_num_nodes - num_nodes_group_1 + + node_x_1 = np.array_split(np.random.random((num_nodes_group_1,)), world_size)[rank] + node_x_2 = np.array_split(np.random.random((num_nodes_group_2,)), world_size)[rank] + + graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") + graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") + + edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + + edges_12.dst -= num_nodes_group_1 + edges_21.src -= num_nodes_group_1 + edges_22.dst -= num_nodes_group_1 + edges_22.src -= num_nodes_group_1 + + total_edges_11 = torch.tensor(len(edges_11), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_11, torch.distributed.ReduceOp.SUM) + total_edges_12 = torch.tensor(len(edges_12), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_12, torch.distributed.ReduceOp.SUM) + total_edges_21 = torch.tensor(len(edges_21), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_21, torch.distributed.ReduceOp.SUM) + total_edges_22 = torch.tensor(len(edges_22), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_22, torch.distributed.ReduceOp.SUM) + + graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) + graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) + graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) + graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) + + assert not graph.is_homogeneous + assert graph.is_multi_gpu + + # Verify graph.nodes() + assert ( + graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type1") + == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type2") + == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") + ).all() + + # Verify graph.edges() + assert ( + graph.edges("eid", etype=("type1", "e1", "type1")) + == torch.arange(total_edges_11, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type1", "e2", "type2")) + == torch.arange(total_edges_12, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e3", "type1")) + == torch.arange(total_edges_21, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e4", "type2")) + == torch.arange(total_edges_22, dtype=torch.int64, device="cuda") + ).all() + + # Use sampling call to check graph creation + # This isn't a test of cuGraph sampling with DGL; the options are + # set to verify the graph only. + plc_graph = graph._graph(direction) + assert isinstance(plc_graph, pylibcugraph.MGGraph) + sampling_output = pylibcugraph.uniform_neighbor_sample( + graph._resource_handle, + plc_graph, + start_list=cupy.arange(total_num_nodes, dtype="int64"), + batch_id_list=cupy.full(total_num_nodes, rank, dtype="int32"), + label_list=cupy.arange(world_size, dtype="int32"), + label_to_output_comm_rank=cupy.arange(world_size, dtype="int32"), + h_fan_out=np.array([-1], dtype="int32"), + with_replacement=False, + do_expensive_check=True, + with_edge_properties=True, + prior_sources_behavior="exclude", + return_dict=True, + ) + + sdf = cudf.DataFrame( + { + "majors": sampling_output["majors"], + "minors": sampling_output["minors"], + "edge_id": sampling_output["edge_id"], + "edge_type": sampling_output["edge_type"], + } + ) + + expected_offsets = { + 0: (0, 0), + 1: (0, num_nodes_group_1), + 2: (num_nodes_group_1, 0), + 3: (num_nodes_group_1, num_nodes_group_1), + } + if direction == "in": + src_col = "minors" + dst_col = "majors" + else: + src_col = "majors" + dst_col = "minors" + + edges_11["etype"] = 0 + edges_12["etype"] = 1 + edges_21["etype"] = 2 + edges_22["etype"] = 3 + + cdf = cudf.concat([edges_11, edges_12, edges_21, edges_22]) + for i in range(len(cdf)): + row = cdf.iloc[i] + etype = row["etype"] + src = row["src"] + expected_offsets[etype][0] + dst = row["dst"] + expected_offsets[etype][1] + + f = sdf[ + (sdf[src_col] == src) & (sdf[dst_col] == dst) & (sdf["edge_type"] == etype) + ] + assert len(f) > 0 # may be multiple, some could be on other GPU + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" +) +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_heterogeneous_graph_mg(direction): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_graph_make_heterogeneous_graph_mg, + args=( + uid, + world_size, + direction, + ), + nprocs=world_size, + ) From 557d9aa03442c9f1e2d82eeda4d6c67db62533d0 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 27 Jun 2024 15:15:42 -0700 Subject: [PATCH 12/47] Loaders --- .../cugraph_dgl/dataloading/__init__.py | 11 +- .../dataloading/dask_dataloader.py | 321 +++++++++++++++ .../cugraph_dgl/dataloading/dataloader.py | 384 +++++------------- .../dataloading/neighbor_sampler.py | 123 +++++- .../cugraph_dgl/dataloading/sampler.py | 154 +++++++ .../dataloading/utils/sampling_helpers.py | 104 ++++- 6 files changed, 806 insertions(+), 291 deletions(-) create mode 100644 python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py create mode 100644 python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py index 2fd7d29bd49..5a775f0e88c 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,9 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from cugraph_dgl.dataloading.dataset import ( HomogenousBulkSamplerDataset, HeterogenousBulkSamplerDataset, ) from cugraph_dgl.dataloading.neighbor_sampler import NeighborSampler -from cugraph_dgl.dataloading.dataloader import DataLoader +from cugraph_dgl.dataloading.dask_dataloader import DaskDataLoader + + +def DataLoader(*args, **kwargs): + warnings.warn("DataLoader has been renamed to DaskDataLoader", FutureWarning) + return DaskDataLoader(*args, **kwargs) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py new file mode 100644 index 00000000000..7cd94a1be84 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py @@ -0,0 +1,321 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations +import os +import shutil +import cugraph_dgl +import cupy as cp +import cudf +from cugraph.utilities.utils import import_optional +from cugraph.gnn import BulkSampler +from dask.distributed import default_client, Event +from cugraph_dgl.dataloading import ( + HomogenousBulkSamplerDataset, + HeterogenousBulkSamplerDataset, +) +from cugraph_dgl.dataloading.utils.extract_graph_helpers import ( + create_cugraph_graph_from_edges_dict, +) + +dgl = import_optional("dgl") +torch = import_optional("torch") + + +class DaskDataLoader(torch.utils.data.DataLoader): + """ + Sampled graph data loader. Wrap a :class:`~cugraph_dgl.CuGraphStorage` and a + :class:`~cugraph_dgl.dataloading.NeighborSampler` into + an iterable over mini-batches of samples. cugraph_dgl's ``DataLoader`` extends + PyTorch's ``DataLoader`` by handling creation and + transmission of graph samples. + """ + + def __init__( + self, + graph: cugraph_dgl.CuGraphStorage, + indices: torch.Tensor, + graph_sampler: cugraph_dgl.dataloading.NeighborSampler, + sampling_output_dir: str, + batches_per_partition: int = 50, + seeds_per_call: int = 200_000, + device: torch.device = None, + use_ddp: bool = False, + ddp_seed: int = 0, + batch_size: int = 1024, + drop_last: bool = False, + shuffle: bool = False, + sparse_format: str = "coo", + **kwargs, + ): + """ + Constructor for DaskDataLoader: + ------------------------------- + graph : CuGraphStorage + The graph. + indices : Tensor or dict[ntype, Tensor] + The set of indices. It can either be a tensor of + integer indices or a dictionary of types and indices. + The actual meaning of the indices is defined by the :meth:`sample` method of + :attr:`graph_sampler`. + graph_sampler : cugraph_dgl.dataloading.NeighborSampler + The subgraph sampler. + sampling_output_dir: str + Output directory to share sampling results in + batches_per_partition: int + The number of batches of sampling results to write/read + seeds_per_call: int + The number of seeds to sample at once + device : device context, optional + The device of the generated MFGs in each iteration, which should be a + PyTorch device object (e.g., ``torch.device``). + By default this returns the tenors on device with the current + cuda context + use_ddp : boolean, optional + If True, tells the DataLoader to split the training set for each + participating process appropriately using + :class:`torch.utils.data.distributed.DistributedSampler`. + Overrides the :attr:`sampler` argument of + :class:`torch.utils.data.DataLoader`. + ddp_seed : int, optional + The seed for shuffling the dataset in + :class:`torch.utils.data.distributed.DistributedSampler`. + Only effective when :attr:`use_ddp` is True. + batch_size: int + Batch size. + sparse_format: str, default = "coo" + The sparse format of the emitted sampled graphs. Choose between "csc" + and "coo". When using "csc", the graphs are of type + cugraph_dgl.nn.SparseGraph. + kwargs : dict + Key-word arguments to be passed to the parent PyTorch + :py:class:`torch.utils.data.DataLoader` class. Common arguments are: + - ``batch_size`` (int): The number of indices in each batch. + - ``drop_last`` (bool): Whether to drop the last incomplete + batch. + - ``shuffle`` (bool): Whether to randomly shuffle the + indices at each epoch + Examples + -------- + To train a 3-layer GNN for node classification on a set of nodes + ``train_nid`` on a homogeneous graph where each node takes messages + from 15 neighbors on the first layer, 10 neighbors on the second, and + 5 neighbors on the third: + >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) + >>> dataloader = cugraph_dgl.dataloading.DataLoader( + ... g, train_nid, sampler, + ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) + >>> for input_nodes, output_nodes, blocks in dataloader: + ... train_on(input_nodes, output_nodes, blocks) + **Using with Distributed Data Parallel** + If you are using PyTorch's distributed training (e.g. when using + :mod:`torch.nn.parallel.DistributedDataParallel`), + you can train the model by turning + on the `use_ddp` option: + >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) + >>> dataloader = cugraph_dgl.dataloading.DataLoader( + ... g, train_nid, sampler, use_ddp=True, + ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) + >>> for epoch in range(start_epoch, n_epochs): + ... for input_nodes, output_nodes, blocks in dataloader: + ... + """ + if sparse_format not in ["coo", "csc"]: + raise ValueError( + f"sparse_format must be one of 'coo', 'csc', " + f"but got {sparse_format}." + ) + self.sparse_format = sparse_format + + self.ddp_seed = ddp_seed + self.use_ddp = use_ddp + self.shuffle = shuffle + self.drop_last = drop_last + self.graph_sampler = graph_sampler + worker_init_fn = dgl.dataloading.WorkerInitWrapper( + kwargs.get("worker_init_fn", None) + ) + self.other_storages = {} + self.epoch_number = 0 + self._batch_size = batch_size + self._sampling_output_dir = sampling_output_dir + self._batches_per_partition = batches_per_partition + self._seeds_per_call = seeds_per_call + self._rank = None + + indices = _dgl_idx_to_cugraph_idx(indices, graph) + + self.tensorized_indices_ds = dgl.dataloading.create_tensorized_dataset( + indices, + batch_size, + drop_last, + use_ddp, + ddp_seed, + shuffle, + kwargs.get("persistent_workers", False), + ) + + if len(graph.ntypes) <= 1: + self.cugraph_dgl_dataset = HomogenousBulkSamplerDataset( + total_number_of_nodes=graph.total_number_of_nodes, + edge_dir=self.graph_sampler.edge_dir, + sparse_format=sparse_format, + ) + else: + etype_id_to_etype_str_dict = {v: k for k, v in graph._etype_id_dict.items()} + + self.cugraph_dgl_dataset = HeterogenousBulkSamplerDataset( + num_nodes_dict=graph.num_nodes_dict, + etype_id_dict=etype_id_to_etype_str_dict, + etype_offset_dict=graph._etype_offset_d, + ntype_offset_dict=graph._ntype_offset_d, + edge_dir=self.graph_sampler.edge_dir, + ) + + if use_ddp: + rank = torch.distributed.get_rank() + client = default_client() + self._graph_creation_event = Event("cugraph_dgl_load_mg_graph_event") + if rank == 0: + G = create_cugraph_graph_from_edges_dict( + edges_dict=graph._edges_dict, + etype_id_dict=graph._etype_id_dict, + edge_dir=graph_sampler.edge_dir, + ) + client.publish_dataset(cugraph_dgl_mg_graph_ds=G) + self._graph_creation_event.set() + else: + if self._graph_creation_event.wait(timeout=1000): + G = client.get_dataset("cugraph_dgl_mg_graph_ds") + else: + raise RuntimeError( + f"Fetch cugraph_dgl_mg_graph_ds to worker_id {rank}", + "from worker_id 0 failed", + ) + else: + rank = 0 + G = create_cugraph_graph_from_edges_dict( + edges_dict=graph._edges_dict, + etype_id_dict=graph._etype_id_dict, + edge_dir=graph_sampler.edge_dir, + ) + + self._rank = rank + self._cugraph_graph = G + super().__init__( + self.cugraph_dgl_dataset, + batch_size=None, + worker_init_fn=worker_init_fn, + collate_fn=lambda x: x, # Hack to prevent collating + **kwargs, + ) + + def __iter__(self): + output_dir = os.path.join( + self._sampling_output_dir, "epoch_" + str(self.epoch_number) + ) + kwargs = {} + if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): + kwargs["deduplicate_sources"] = True + kwargs["prior_sources_behavior"] = "carryover" + kwargs["renumber"] = True + + if self.sparse_format == "csc": + kwargs["compression"] = "CSR" + kwargs["compress_per_hop"] = True + # The following kwargs will be deprecated in uniform sampler. + kwargs["use_legacy_names"] = False + kwargs["include_hop_column"] = False + + else: + kwargs["deduplicate_sources"] = False + kwargs["prior_sources_behavior"] = None + kwargs["renumber"] = False + + bs = BulkSampler( + output_path=output_dir, + batch_size=self._batch_size, + graph=self._cugraph_graph, + batches_per_partition=self._batches_per_partition, + seeds_per_call=self._seeds_per_call, + fanout_vals=self.graph_sampler._reversed_fanout_vals, + with_replacement=self.graph_sampler.replace, + **kwargs, + ) + + if self.shuffle: + self.tensorized_indices_ds.shuffle() + + batch_df = create_batch_df(self.tensorized_indices_ds) + bs.add_batches(batch_df, start_col_name="start", batch_col_name="batch_id") + bs.flush() + self.cugraph_dgl_dataset.set_input_files(input_directory=output_dir) + self.epoch_number = self.epoch_number + 1 + return super().__iter__() + + def __del__(self): + if self.use_ddp: + torch.distributed.barrier() + if self._rank == 0: + if self.use_ddp: + client = default_client() + client.unpublish_dataset("cugraph_dgl_mg_graph_ds") + self._graph_creation_event.clear() + _clean_directory(self._sampling_output_dir) + + +def get_batch_id_series(n_output_rows: int, batch_size: int): + num_batches = (n_output_rows + batch_size - 1) // batch_size + print(f"Number of batches = {num_batches}".format(num_batches)) + batch_ar = cp.arange(0, num_batches).repeat(batch_size) + batch_ar = batch_ar[0:n_output_rows].astype(cp.int32) + return cudf.Series(batch_ar) + + +def create_batch_df(dataset: torch.Tensor): + batch_id_ls = [] + indices_ls = [] + for batch_id, b_indices in enumerate(dataset): + if isinstance(b_indices, dict): + b_indices = torch.cat(list(b_indices.values())) + batch_id_ar = cp.full(shape=len(b_indices), fill_value=batch_id, dtype=cp.int32) + batch_id_ls.append(batch_id_ar) + indices_ls.append(b_indices) + + batch_id_ar = cp.concatenate(batch_id_ls) + indices_ar = cp.asarray(torch.concat(indices_ls)) + batches_df = cudf.DataFrame( + { + "start": indices_ar, + "batch_id": batch_id_ar, + } + ) + return batches_df + + +def _dgl_idx_to_cugraph_idx(idx, cugraph_gs): + if not isinstance(idx, dict): + if len(cugraph_gs.ntypes) > 1: + raise dgl.DGLError( + "Must specify node type when the graph is not homogeneous." + ) + return idx + else: + return {k: cugraph_gs.dgl_n_id_to_cugraph_id(n, k) for k, n in idx.items()} + + +def _clean_directory(path): + """param could either be relative or absolute.""" + if os.path.isfile(path): + os.remove(path) # remove the file + elif os.path.isdir(path): + shutil.rmtree(path) # remove dir and all contains diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 11139910931..73130e2dfb0 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,151 +10,121 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations -import os -import shutil -import cugraph_dgl -import cupy as cp -import cudf + +import warnings + +from typing import Union, Optional, Dict + from cugraph.utilities.utils import import_optional -from cugraph.gnn import BulkSampler -from dask.distributed import default_client, Event -from cugraph_dgl.dataloading import ( - HomogenousBulkSamplerDataset, - HeterogenousBulkSamplerDataset, -) -from cugraph_dgl.dataloading.utils.extract_graph_helpers import ( - create_cugraph_graph_from_edges_dict, -) + +import cugraph_dgl +from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor dgl = import_optional("dgl") torch = import_optional("torch") -class DataLoader(torch.utils.data.DataLoader): +class DataLoader: """ - Sampled graph data loader. Wrap a :class:`~cugraph_dgl.CuGraphStorage` and a - :class:`~cugraph_dgl.dataloading.NeighborSampler` into - an iterable over mini-batches of samples. cugraph_dgl's ``DataLoader`` extends - PyTorch's ``DataLoader`` by handling creation and - transmission of graph samples. + Duck-typed version of dgl.dataloading.DataLoader """ def __init__( self, - graph: cugraph_dgl.CuGraphStorage, - indices: torch.Tensor, - graph_sampler: cugraph_dgl.dataloading.NeighborSampler, - sampling_output_dir: str, - batches_per_partition: int = 50, - seeds_per_call: int = 200_000, - device: torch.device = None, + graph: "cugraph_dgl.Graph", + indices: TensorType, + graph_sampler: "cugraph_dgl.dataloading.Sampler", + device: Union[int, str, "torch.device"] = None, use_ddp: bool = False, ddp_seed: int = 0, - batch_size: int = 1024, + batch_size=1, drop_last: bool = False, shuffle: bool = False, - sparse_format: str = "coo", + use_prefetch_thread: Optional[bool] = None, + use_alternate_streams: Optional[bool] = None, + pin_prefetcher: Optional[bool] = None, + use_uva=False, + gpu_cache: Dict[str, Dict[str, int]] = None, + output_format: str = "dgl.Block", **kwargs, ): """ - Constructor for CuGraphStorage: - ------------------------------- - graph : CuGraphStorage - The graph. - indices : Tensor or dict[ntype, Tensor] - The set of indices. It can either be a tensor of - integer indices or a dictionary of types and indices. - The actual meaning of the indices is defined by the :meth:`sample` method of - :attr:`graph_sampler`. - graph_sampler : cugraph_dgl.dataloading.NeighborSampler - The subgraph sampler. - sampling_output_dir: str - Output directory to share sampling results in - batches_per_partition: int - The number of batches of sampling results to write/read - seeds_per_call: int - The number of seeds to sample at once - device : device context, optional - The device of the generated MFGs in each iteration, which should be a - PyTorch device object (e.g., ``torch.device``). - By default this returns the tenors on device with the current - cuda context - use_ddp : boolean, optional - If True, tells the DataLoader to split the training set for each - participating process appropriately using - :class:`torch.utils.data.distributed.DistributedSampler`. - Overrides the :attr:`sampler` argument of - :class:`torch.utils.data.DataLoader`. - ddp_seed : int, optional - The seed for shuffling the dataset in - :class:`torch.utils.data.distributed.DistributedSampler`. - Only effective when :attr:`use_ddp` is True. - batch_size: int - Batch size. - sparse_format: str, default = "coo" - The sparse format of the emitted sampled graphs. Choose between "csc" - and "coo". When using "csc", the graphs are of type - cugraph_dgl.nn.SparseGraph. - kwargs : dict - Key-word arguments to be passed to the parent PyTorch - :py:class:`torch.utils.data.DataLoader` class. Common arguments are: - - ``batch_size`` (int): The number of indices in each batch. - - ``drop_last`` (bool): Whether to drop the last incomplete - batch. - - ``shuffle`` (bool): Whether to randomly shuffle the - indices at each epoch - Examples - -------- - To train a 3-layer GNN for node classification on a set of nodes - ``train_nid`` on a homogeneous graph where each node takes messages - from 15 neighbors on the first layer, 10 neighbors on the second, and - 5 neighbors on the third: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for input_nodes, output_nodes, blocks in dataloader: - ... train_on(input_nodes, output_nodes, blocks) - **Using with Distributed Data Parallel** - If you are using PyTorch's distributed training (e.g. when using - :mod:`torch.nn.parallel.DistributedDataParallel`), - you can train the model by turning - on the `use_ddp` option: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, use_ddp=True, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for epoch in range(start_epoch, n_epochs): - ... for input_nodes, output_nodes, blocks in dataloader: - ... + Parameters + ---------- + graph: cugraph_dgl.Graph + The graph being sampled. Can be a single-GPU or multi-GPU graph. + indices: TensorType + The seed nodes for sampling. If use_ddp=True, then all seed + nodes should be provided. If use_ddp=False, then only the seed + nodes assigned to this worker should be provided. + graph_sampler: cugraph_dgl.dataloading.Sampler + The sampler responsible for sampling the graph and producing + output minibatches. + device: Union[int, str, torch.device] + Optional. + The device assigned to this loader ('cpu', 'cuda' or device id). + Defaults to the current device. + use_ddp: bool + Optional (default=False). + If true, this argument will assume the entire list of input seed + nodes is being passed to each worker, and will appropriately + split and shuffle the list. + It false, then it is assumed that the list of input seed nodes + is comprised of the union of the lists provided to each worker. + ddp_seed: int + Optional (default=0). + The seed used for dividing and shuffling data if use_ddp=True. + Has no effect if use_ddp=False. + use_uva: bool + Optional (default=False). + Whether to use pinned memory and unified virtual addressing + to perform sampling. + This argument is ignored by cuGraph-DGL. + use_prefetch_thread: bool + Optional (default=False). + Whether to spawn a new thread for feature fetching. + This argument is ignored by cuGraph-DGL. + use_alternate_streams: bool + Optional (default=False). + Whether to perform feature fetching on a separate stream. + This argument is ignored by cuGraph-DGL. + pin_prefetcher: bool + Optional (default=False). + Whether to pin the feature tensors. + This argument is currently ignored by cuGraph-DGL. + gpu_cache: Dict[str, Dict[str, int]] + List of features to cache using HugeCTR. + This argument is not supported by cuGraph-DGL and + will result in an error. + output_format: str + Optional (default="dgl.Block"). + The output format for blocks. + Can be either "dgl.Block" or "cugraph_dgl.nn.SparseGraph". """ - if sparse_format not in ["coo", "csc"]: + + if use_uva: + warnings.warn("The 'use_uva' argument is ignored by cuGraph-DGL.") + if use_prefetch_thread: + warnings.warn( + "The 'use_prefetch_thread' argument is ignored by cuGraph-DGL." + ) + if use_alternate_streams: + warnings.warn( + "The 'use_alternate_streams' argument is ignored by cuGraph-DGL." + ) + if pin_prefetcher: + warnings.warn("The 'pin_prefetcher' argument is ignored by cuGraph-DGL.") + if gpu_cache: raise ValueError( - f"sparse_format must be one of 'coo', 'csc', " - f"but got {sparse_format}." + "HugeCTR is not supported by cuGraph-DGL. " + "Consider using WholeGraph for feature storage" + " in cugraph_dgl.Graph instead." ) - self.sparse_format = sparse_format - self.ddp_seed = ddp_seed - self.use_ddp = use_ddp - self.shuffle = shuffle - self.drop_last = drop_last - self.graph_sampler = graph_sampler - worker_init_fn = dgl.dataloading.WorkerInitWrapper( - kwargs.get("worker_init_fn", None) - ) - self.other_storages = {} - self.epoch_number = 0 - self._batch_size = batch_size - self._sampling_output_dir = sampling_output_dir - self._batches_per_partition = batches_per_partition - self._seeds_per_call = seeds_per_call - self._rank = None - - indices = _dgl_idx_to_cugraph_idx(indices, graph) + indices = _cast_to_torch_tensor(indices) - self.tensorized_indices_ds = dgl.dataloading.create_tensorized_dataset( + self.__dataset = dgl.dataloading.create_tensorized_dataset( indices, batch_size, drop_last, @@ -164,158 +134,24 @@ def __init__( kwargs.get("persistent_workers", False), ) - if len(graph.ntypes) <= 1: - self.cugraph_dgl_dataset = HomogenousBulkSamplerDataset( - total_number_of_nodes=graph.total_number_of_nodes, - edge_dir=self.graph_sampler.edge_dir, - sparse_format=sparse_format, - ) - else: - etype_id_to_etype_str_dict = {v: k for k, v in graph._etype_id_dict.items()} - - self.cugraph_dgl_dataset = HeterogenousBulkSamplerDataset( - num_nodes_dict=graph.num_nodes_dict, - etype_id_dict=etype_id_to_etype_str_dict, - etype_offset_dict=graph._etype_offset_d, - ntype_offset_dict=graph._ntype_offset_d, - edge_dir=self.graph_sampler.edge_dir, - ) + self.__output_format = output_format + self.__sampler = graph_sampler + self.__batch_size = batch_size + self.__graph = graph + self.__device = device - if use_ddp: - rank = torch.distributed.get_rank() - client = default_client() - self._graph_creation_event = Event("cugraph_dgl_load_mg_graph_event") - if rank == 0: - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - client.publish_dataset(cugraph_dgl_mg_graph_ds=G) - self._graph_creation_event.set() - else: - if self._graph_creation_event.wait(timeout=1000): - G = client.get_dataset("cugraph_dgl_mg_graph_ds") - else: - raise RuntimeError( - f"Fetch cugraph_dgl_mg_graph_ds to worker_id {rank}", - "from worker_id 0 failed", - ) - else: - rank = 0 - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - - self._rank = rank - self._cugraph_graph = G - super().__init__( - self.cugraph_dgl_dataset, - batch_size=None, - worker_init_fn=worker_init_fn, - collate_fn=lambda x: x, # Hack to prevent collating - **kwargs, - ) + @property + def dataset( + self, + ) -> Union[ + "dgl.dataloading.dataloader.TensorizedDataset", + "dgl.dataloading.dataloader.DDPTensorizedDataset", + ]: + return self.__dataset def __iter__(self): - output_dir = os.path.join( - self._sampling_output_dir, "epoch_" + str(self.epoch_number) + return self.__sampler.sample( + self.__graph, + self.__dataset, + self.__batch_size, ) - kwargs = {} - if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): - kwargs["deduplicate_sources"] = True - kwargs["prior_sources_behavior"] = "carryover" - kwargs["renumber"] = True - - if self.sparse_format == "csc": - kwargs["compression"] = "CSR" - kwargs["compress_per_hop"] = True - # The following kwargs will be deprecated in uniform sampler. - kwargs["use_legacy_names"] = False - kwargs["include_hop_column"] = False - - else: - kwargs["deduplicate_sources"] = False - kwargs["prior_sources_behavior"] = None - kwargs["renumber"] = False - - bs = BulkSampler( - output_path=output_dir, - batch_size=self._batch_size, - graph=self._cugraph_graph, - batches_per_partition=self._batches_per_partition, - seeds_per_call=self._seeds_per_call, - fanout_vals=self.graph_sampler._reversed_fanout_vals, - with_replacement=self.graph_sampler.replace, - **kwargs, - ) - - if self.shuffle: - self.tensorized_indices_ds.shuffle() - - batch_df = create_batch_df(self.tensorized_indices_ds) - bs.add_batches(batch_df, start_col_name="start", batch_col_name="batch_id") - bs.flush() - self.cugraph_dgl_dataset.set_input_files(input_directory=output_dir) - self.epoch_number = self.epoch_number + 1 - return super().__iter__() - - def __del__(self): - if self.use_ddp: - torch.distributed.barrier() - if self._rank == 0: - if self.use_ddp: - client = default_client() - client.unpublish_dataset("cugraph_dgl_mg_graph_ds") - self._graph_creation_event.clear() - _clean_directory(self._sampling_output_dir) - - -def get_batch_id_series(n_output_rows: int, batch_size: int): - num_batches = (n_output_rows + batch_size - 1) // batch_size - print(f"Number of batches = {num_batches}".format(num_batches)) - batch_ar = cp.arange(0, num_batches).repeat(batch_size) - batch_ar = batch_ar[0:n_output_rows].astype(cp.int32) - return cudf.Series(batch_ar) - - -def create_batch_df(dataset: torch.Tensor): - batch_id_ls = [] - indices_ls = [] - for batch_id, b_indices in enumerate(dataset): - if isinstance(b_indices, dict): - b_indices = torch.cat(list(b_indices.values())) - batch_id_ar = cp.full(shape=len(b_indices), fill_value=batch_id, dtype=cp.int32) - batch_id_ls.append(batch_id_ar) - indices_ls.append(b_indices) - - batch_id_ar = cp.concatenate(batch_id_ls) - indices_ar = cp.asarray(torch.concat(indices_ls)) - batches_df = cudf.DataFrame( - { - "start": indices_ar, - "batch_id": batch_id_ar, - } - ) - return batches_df - - -def _dgl_idx_to_cugraph_idx(idx, cugraph_gs): - if not isinstance(idx, dict): - if len(cugraph_gs.ntypes) > 1: - raise dgl.DGLError( - "Must specify node type when the graph is not homogeneous." - ) - return idx - else: - return {k: cugraph_gs.dgl_n_id_to_cugraph_id(n, k) for k, n in idx.items()} - - -def _clean_directory(path): - """param could either be relative or absolute.""" - if os.path.isfile(path): - os.remove(path) # remove the file - elif os.path.isdir(path): - shutil.rmtree(path) # remove dir and all contains diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index b61f05f6379..f64eb0d8866 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,8 +10,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import annotations -from typing import Sequence + +import warnings + +from typing import Sequence, Optional, Union, List, Tuple, Iterator + +from cugraph.gnn import UniformNeighborSampler, DistSampleWriter +from cugraph.utilities.utils import import_optional + + +import cugraph_dgl +from cugraph_dgl.nn import SparseGraph +from cugraph_dgl.typing import TensorType +from cugraph_dgl.dataloading.sampler import HomogeneousSampleReader + +torch = import_optional("torch") class NeighborSampler: @@ -50,7 +65,78 @@ def __init__( fanouts_per_layer: Sequence[int], edge_dir: str = "in", replace: bool = False, + prob: Optional[str] = None, + mask: Optional[str] = None, + prefetch_node_feats: Optional[Union[List[str], dict[str, List[str]]]] = None, + prefetch_edge_feats: Optional[ + Union[List[str], dict[Tuple[str, str, str], List[str]]] + ] = None, + prefetch_labels: Optional[Union[List[str], dict[str, List[str]]]] = None, + output_device: Optional[Union["torch.device", int, str]] = None, + fused: bool = True, + **kwargs, ): + """ + Parameters + ---------- + fanouts_per_layer: Sequence[int] + The number of neighbors to sample per layer. + edge_dir: str + Optional (default='in'). + The direction to traverse edges. + replace: bool + Optional (default=False). + Whether to sample with replacement. + prob: str + Optional. + If provided, the probability of each neighbor being + sampled is proportional to the edge feature + with the given name. Mutually exclusive with mask. + Currently unsupported. + mask: str + Optional. + If proivided, only neighbors where the edge mask + with the given name is True can be selected. + Mutually exclusive with prob. + Currently unsupported. + prefetch_node_feats: Union[List[str], dict[str, List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + prefetch_edge_feats: Union[List[str], dict[Tuple[str, str, str], List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + prefetch_labels: Union[List[str], dict[str, List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + output_device: Union[torch.device, int, str] + Optional. + Output device for samples. Defaults to the current device. + fused: bool + Optional (default=True). + This argument is ignored by cuGraph-DGL. + **kwargs + Keyword arguments for the underlying cuGraph distributed sampler + and writer (directory, batches_per_partition, format, + local_seeds_per_call). + """ + + if mask: + raise NotImplementedError( + "Edge masking is currently unsupported by cuGraph-DGL" + ) + if prob: + raise NotImplementedError( + "Edge masking is currently unsupported by cuGraph-DGL" + ) + if prefetch_edge_feats: + warnings.warn("'prefetch_edge_feats' is ignored by cuGraph-DGL") + if prefetch_node_feats: + warnings.warn("'prefetch_node_feats' is ignored by cuGraph-DGL") + if prefetch_labels: + warnings.warn("'prefetch_labels' is ignored by cuGraph-DGL") + if fused: + warnings.warn("'fused' is ignored by cuGraph-DGL") + self.fanouts = fanouts_per_layer reverse_fanouts = fanouts_per_layer.copy() reverse_fanouts.reverse() @@ -58,3 +144,36 @@ def __init__( self.edge_dir = edge_dir self.replace = replace + self.__kwargs = kwargs + + def sample( + self, g: "cugraph_dgl.Graph", indices: TensorType, batch_size: int = 1 + ) -> Iterator[Tuple["torch.Tensor", "torch.Tensor", List[SparseGraph]]]: + kwargs = dict(**self.__kwargs) + + writer = DistSampleWriter( + direction=kwargs.pop("directory", None), + batches_per_partition=kwargs.pop("batches_per_partition", 256), + format=kwargs.pop("format", "parquet"), + ) + + ds = UniformNeighborSampler( + g._graph(self.edge_dir), + writer, + compression="CSC", + fanout=self._reversed_fanout_vals, + prior_sources_behavior="carryover", + deduplicate_sources=True, + compress_per_hop=True, + with_replacement=self.replace, + **kwargs, + ) + + if g.is_homogeneous: + ds.sample_from_nodes(indices, batch_size=batch_size) + return HomogeneousSampleReader(ds.get_reader()) + + raise ValueError( + "Sampling heterogeneous graphs is currently" + " unsupported in the non-dask API" + ) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py new file mode 100644 index 00000000000..252ae2c36a0 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -0,0 +1,154 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Iterator, Dict, Tuple, List + +import cugraph_dgl +from cugraph_dgl.nn import SparseGraph +from cugraph_dgl.typing import TensorType +from cugraph_dgl.dataloading.utils.sampling_helpers import ( + create_homogeneous_sampled_graphs_from_tensors_csc, +) + +from cugraph.gnn import DistSampleReader + +from cugraph.utilities.utils import import_optional + +torch = import_optional("torch") + + +class SampleReader: + """ + Iterator that processes results from the cuGraph distributed sampler. + """ + + def __init__(self, base_reader: DistSampleReader): + """ + Constructs a new SampleReader. + + Parameters + ---------- + base_reader: DistSampleReader + The reader responsible for loading saved samples produced by + the cuGraph distributed sampler. + """ + self.__base_reader = base_reader + self.__num_samples_remaining = 0 + self.__index = 0 + + def __next__(self): + if self._num_samples_remaining == 0: + # raw_sample_data is already a dict of tensors + self.__raw_sample_data, start_inclusive, end_inclusive = next( + self.__base_reader + ) + + self.__decoded_samples = self._decode_all(self.__raw_sample_data) + self.__num_samples_remaining = end_inclusive - start_inclusive + 1 + self.__index = 0 + + out = self.__decoded_samples[self.__index] + self.__index += 1 + self.__num_samples_remaining -= 1 + return out + + def _decode_all(self): + raise NotImplementedError("Must be implemented by subclass") + + def __iter__(self): + return self + + +class HomogeneousSampleReader(SampleReader): + """ + Subclass of SampleReader that reads DGL homogeneous output samples + produced by the cuGraph distributed sampler. + """ + + def __init__(self, base_reader: DistSampleReader): + """ + Constructs a new HomogeneousSampleReader + + Parameters + ---------- + base_reader: DistSampleReader + The reader responsible for loading saved samples produced by + the cuGraph distributed sampler. + """ + super().__init__(base_reader) + + def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"]): + create_homogeneous_sampled_graphs_from_tensors_csc( + raw_sample_data, + ) + + def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"]): + raise NotImplementedError( + "COO format is currently unsupported in the non-dask API" + ) + + def _decode_all(self, raw_sample_data: Dict[str, "torch.Tensor"]): + if "major_offsets" in raw_sample_data: + return self.__decode_csc(raw_sample_data) + else: + return self.__decode_coo(raw_sample_data) + + +class Sampler: + """ + Base sampler class for all cugraph-DGL samplers. + """ + + def __init__(self, sparse_format: str = "csc"): + """ + Parameters + ---------- + sparse_format: str + Optional (default = "coo"). + The sparse format of the emitted sampled graphs. + Currently, only "csc" is supported. + """ + + if sparse_format != "csc": + raise ValueError("Only CSC format is supported at this time") + + self.__sparse_format = sparse_format + + def sample( + self, g: cugraph_dgl.Graph, indices: TensorType, batch_size: int = 1 + ) -> Iterator[Tuple["torch.Tensor", "torch.Tensor", List[SparseGraph]]]: + """ + Samples the graph. + + Parameters + ---------- + g: cugraph_dgl.Graph + The graph being sampled. + indices: TensorType + The node ids of seed nodes where sampling will initiate from. + batch_size: int + The number of seed nodes per batch. + + Returns + ------- + Iterator[Tuple[torch.Tensor, torch.Tensor, List[cugraph_dgl.nn.SparseGraph]]] + Iterator over batches. Returns batches in the sparse + graph format, which can be converted upstream to DGL blocks + if needed. The returned tuples are in standard + DGL format: (input nodes, output nodes, blocks) where input + nodes are the renumbered input nodes, output nodes are + the renumbered output nodes, and blocks are the output graphs + for each hop. + """ + + raise NotImplementedError("Must be implemented by subclass") diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 10d851ebade..1286ddfe0dc 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -139,6 +139,28 @@ def _get_tensor_d_from_sampled_df(df): return result_tensor_d +def create_homogeneous_sampled_graphs_from_tensors_dist_coo( + tensors: Dict[str, "torch.Tensor"], return_type: str = "dgl.Block" +): + """ + Creates DGL MFGs for homogeneous graphs from output + tensors from a cuGraph DistSampler. + + Parameters + ---------- + tensors: Dict[str, torch.Tensor] + The dictionary of output tensors from the bulk sampler. + return_type: str + Optional (default="dgl.Block"). + The return type for the MFGs (either "dgl.Block" or + "cugraph_dgl.nn.SparseGraph") + """ + if return_type not in ["dgl.Block", "cugraph_dgl.nn.SparseGraph"]: + raise ValueError( + "return_type must be either dgl.Block or cugraph_dgl.nn.SparseGraph" + ) + + def create_homogeneous_sampled_graphs_from_dataframe( sampled_df: cudf.DataFrame, edge_dir: str = "in", @@ -404,21 +426,21 @@ def create_heterogenous_dgl_block_from_tensors_dict( return block -def _process_sampled_df_csc( - df: cudf.DataFrame, +def _process_sampled_tensors_csc( + tensors: Dict["torch.Tensor"], reverse_hop_id: bool = True, ) -> Tuple[ - Dict[int, Dict[int, Dict[str, torch.Tensor]]], - List[torch.Tensor], + Dict[int, Dict[int, Dict[str, "torch.Tensor"]]], + List["torch.Tensor"], List[List[int, int]], ]: """ - Convert a dataframe generated by BulkSampler to a dictionary of tensors, to + Convert tensors generated by BulkSampler to a dictionary of tensors, to facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. Parameters ---------- - df: cudf.DataFrame + tensors: Dict[torch.Tensor] The output from BulkSampler compressed in CSC format. The dataframe should be generated with `compression="CSR"` in BulkSampler, since the sampling routine treats seed nodes as sources. @@ -442,12 +464,12 @@ def _process_sampled_df_csc( k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and destinations, respectively. """ - # dropna - major_offsets = cast_to_tensor(df.major_offsets.dropna()) - label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna()) - renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna()) - renumber_map = cast_to_tensor(df["map"].dropna()) - minors = cast_to_tensor(df.minors.dropna()) + + major_offsets = tensors["major_offsets"] + minors = tensors["minors"] + label_hop_offsets = tensors["label_hop_offsets"] + renumber_map = tensors["map"] + renumber_map_offsets = tensors["renumber_map_offsets"] n_batches = len(renumber_map_offsets) - 1 n_hops = int((len(label_hop_offsets) - 1) / n_batches) @@ -511,10 +533,58 @@ def _process_sampled_df_csc( return tensors_dict, renumber_map_list, mfg_sizes.tolist() +def _process_sampled_df_csc( + df: cudf.DataFrame, + reverse_hop_id: bool = True, +): + """ + Convert a dataframe generated by BulkSampler to a dictionary of tensors, to + facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. + + Parameters + ---------- + df: cudf.DataFrame + The output from BulkSampler compressed in CSC format. The dataframe + should be generated with `compression="CSR"` in BulkSampler, + since the sampling routine treats seed nodes as sources. + + reverse_hop_id: bool (default=True) + Reverse hop id. + + Returns + ------- + tensors_dict: dict + A nested dictionary keyed by batch id and hop id. + `tensor_dict[batch_id][hop_id]` holds "minors" and "major_offsets" + values for CSC MFGs. + + renumber_map_list: list + List of renumbering maps for looking up global indices of nodes. One + map for each batch. + + mfg_sizes: list + List of the number of nodes in each message passing layer. For the + k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and + destinations, respectively. + """ + + return _process_sampled_tensors_csc( + { + "major_offsets": cast_to_tensor(df.major_offsets.dropna()), + "label_hop_offsets": cast_to_tensor(df.label_hop_offsets.dropna()), + "renumber_map_offsets": cast_to_tensor(df.renumber_map_offsets.dropna()), + "map": cast_to_tensor(df["map"].dropna()), + "minors": cast_to_tensor(df.minors.dropna()), + }, + reverse_hop_id=reverse_hop_id, + ) + + def _create_homogeneous_sparse_graphs_from_csc( tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], renumber_map_list: List[torch.Tensor], mfg_sizes: List[int, int], + output_format: str = "dgl.Block", ) -> List[List[torch.Tensor, torch.Tensor, List[SparseGraph]]]: """Create mini-batches of MFGs. The input arguments are the outputs of the function `_process_sampled_df_csc`. @@ -553,5 +623,13 @@ def create_homogeneous_sampled_graphs_from_dataframe_csc(sampled_df: cudf.DataFr """Public API to create mini-batches of MFGs using a dataframe output by BulkSampler, where the sampled graph is compressed in CSC format.""" return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_df_csc(sampled_df)) + *(_process_sampled_df_csc(sampled_df)), + ) + + +def create_homogeneous_sampled_graphs_from_tensors_csc(tensors: Dict["torch.Tensor"]): + """Public API to create mini-batches of MFGs using a dataframe output by + BulkSampler, where the sampled graph is compressed in CSC format.""" + return _create_homogeneous_sparse_graphs_from_csc( + *(_process_sampled_tensors_csc(tensors)), ) From a8c0848b60b8f1f84c283b536ec6d2f8d5d99398 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 27 Jun 2024 15:16:21 -0700 Subject: [PATCH 13/47] add todo --- python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 73130e2dfb0..924ab696199 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -22,7 +22,7 @@ from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor dgl = import_optional("dgl") -torch = import_optional("torch") +torch = import_optional('torch') class DataLoader: @@ -150,6 +150,7 @@ def dataset( return self.__dataset def __iter__(self): + # TODO convert to the right output format and device return self.__sampler.sample( self.__graph, self.__dataset, From 913b8cd372adb5498a6d439a3c57f30bb7b4207b Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 28 Jun 2024 12:35:17 -0700 Subject: [PATCH 14/47] fix block issue, typing --- .../cugraph_dgl/dataloading/dataloader.py | 4 +- .../dataloading/neighbor_sampler.py | 24 +++-- .../cugraph_dgl/dataloading/sampler.py | 70 ++++++++++----- .../dataloading/utils/sampling_helpers.py | 88 ++++++++++++------- python/cugraph-dgl/cugraph_dgl/typing.py | 28 +++++- 5 files changed, 153 insertions(+), 61 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 924ab696199..862f58af45d 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -22,7 +22,7 @@ from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor dgl = import_optional("dgl") -torch = import_optional('torch') +torch = import_optional("torch") class DataLoader: @@ -150,7 +150,7 @@ def dataset( return self.__dataset def __iter__(self): - # TODO convert to the right output format and device + # TODO move to the correct device return self.__sampler.sample( self.__graph, self.__dataset, diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index f64eb0d8866..398a44fb3b5 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -22,8 +22,7 @@ import cugraph_dgl -from cugraph_dgl.nn import SparseGraph -from cugraph_dgl.typing import TensorType +from cugraph_dgl.typing import TensorType, DGLSamplerOutput from cugraph_dgl.dataloading.sampler import HomogeneousSampleReader torch = import_optional("torch") @@ -74,6 +73,8 @@ def __init__( prefetch_labels: Optional[Union[List[str], dict[str, List[str]]]] = None, output_device: Optional[Union["torch.device", int, str]] = None, fused: bool = True, + sparse_format="csc", + output_format="dgl.Block", **kwargs, ): """ @@ -114,6 +115,14 @@ def __init__( fused: bool Optional (default=True). This argument is ignored by cuGraph-DGL. + sparse_format: str + Optional (default = "coo"). + The sparse format of the emitted sampled graphs. + Currently, only "csc" is supported. + output_format: str + Optional (default = "dgl.Block") + The output format of the emitted sampled graphs. + Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". **kwargs Keyword arguments for the underlying cuGraph distributed sampler and writer (directory, batches_per_partition, format, @@ -146,9 +155,14 @@ def __init__( self.replace = replace self.__kwargs = kwargs + super( + sparse_format=sparse_format, + output_format=output_format, + ) + def sample( self, g: "cugraph_dgl.Graph", indices: TensorType, batch_size: int = 1 - ) -> Iterator[Tuple["torch.Tensor", "torch.Tensor", List[SparseGraph]]]: + ) -> Iterator[DGLSamplerOutput]: kwargs = dict(**self.__kwargs) writer = DistSampleWriter( @@ -160,7 +174,7 @@ def sample( ds = UniformNeighborSampler( g._graph(self.edge_dir), writer, - compression="CSC", + compression=self.sparse_format.upper(), fanout=self._reversed_fanout_vals, prior_sources_behavior="carryover", deduplicate_sources=True, @@ -171,7 +185,7 @@ def sample( if g.is_homogeneous: ds.sample_from_nodes(indices, batch_size=batch_size) - return HomogeneousSampleReader(ds.get_reader()) + return HomogeneousSampleReader(ds.get_reader(), self.output_format) raise ValueError( "Sampling heterogeneous graphs is currently" diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py index 252ae2c36a0..1ee76882bdd 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -11,11 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterator, Dict, Tuple, List +from typing import Iterator, Dict, Tuple, List, Union import cugraph_dgl from cugraph_dgl.nn import SparseGraph -from cugraph_dgl.typing import TensorType +from cugraph_dgl.typing import TensorType, DGLSamplerOutput from cugraph_dgl.dataloading.utils.sampling_helpers import ( create_homogeneous_sampled_graphs_from_tensors_csc, ) @@ -25,6 +25,7 @@ from cugraph.utilities.utils import import_optional torch = import_optional("torch") +dgl = import_optional("dgl") class SampleReader: @@ -32,7 +33,7 @@ class SampleReader: Iterator that processes results from the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader): + def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"): """ Constructs a new SampleReader. @@ -42,11 +43,16 @@ def __init__(self, base_reader: DistSampleReader): The reader responsible for loading saved samples produced by the cuGraph distributed sampler. """ + self.__output_format = output_format self.__base_reader = base_reader self.__num_samples_remaining = 0 self.__index = 0 - def __next__(self): + @property + def output_format(self) -> str: + return self.__output_format + + def __next__(self) -> DGLSamplerOutput: if self._num_samples_remaining == 0: # raw_sample_data is already a dict of tensors self.__raw_sample_data, start_inclusive, end_inclusive = next( @@ -62,10 +68,10 @@ def __next__(self): self.__num_samples_remaining -= 1 return out - def _decode_all(self): + def _decode_all(self) -> List[DGLSamplerOutput]: raise NotImplementedError("Must be implemented by subclass") - def __iter__(self): + def __iter__(self) -> DGLSamplerOutput: return self @@ -75,7 +81,7 @@ class HomogeneousSampleReader(SampleReader): produced by the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader): + def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"): """ Constructs a new HomogeneousSampleReader @@ -84,20 +90,29 @@ def __init__(self, base_reader: DistSampleReader): base_reader: DistSampleReader The reader responsible for loading saved samples produced by the cuGraph distributed sampler. + output_format: str + The output format for blocks (either "dgl.Block" or + "cugraph_dgl.nn.SparseGraph"). """ - super().__init__(base_reader) + super().__init__(base_reader, output_format=output_format) - def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"]): + def __decode_csc( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: create_homogeneous_sampled_graphs_from_tensors_csc( - raw_sample_data, + raw_sample_data, output_format=self.output_format ) - def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"]): + def __decode_coo( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: raise NotImplementedError( "COO format is currently unsupported in the non-dask API" ) - def _decode_all(self, raw_sample_data: Dict[str, "torch.Tensor"]): + def _decode_all( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: if "major_offsets" in raw_sample_data: return self.__decode_csc(raw_sample_data) else: @@ -109,24 +124,39 @@ class Sampler: Base sampler class for all cugraph-DGL samplers. """ - def __init__(self, sparse_format: str = "csc"): + def __init__(self, sparse_format: str = "csc", output_format="dgl.Block"): """ Parameters ---------- sparse_format: str - Optional (default = "coo"). - The sparse format of the emitted sampled graphs. - Currently, only "csc" is supported. + Optional (default = "coo"). + The sparse format of the emitted sampled graphs. + Currently, only "csc" is supported. + output_format: str + Optional (default = "dgl.Block") + The output format of the emitted sampled graphs. + Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". """ if sparse_format != "csc": raise ValueError("Only CSC format is supported at this time") self.__sparse_format = sparse_format + self.__output_format = output_format + + @property + def output_format(self): + return self.__output_format + + @property + def sparse_format(self): + return self.__sparse_format def sample( self, g: cugraph_dgl.Graph, indices: TensorType, batch_size: int = 1 - ) -> Iterator[Tuple["torch.Tensor", "torch.Tensor", List[SparseGraph]]]: + ) -> Iterator[ + Tuple["torch.Tensor", "torch.Tensor", List[Union[SparseGraph, "dgl.Block"]]] + ]: """ Samples the graph. @@ -141,10 +171,8 @@ def sample( Returns ------- - Iterator[Tuple[torch.Tensor, torch.Tensor, List[cugraph_dgl.nn.SparseGraph]]] - Iterator over batches. Returns batches in the sparse - graph format, which can be converted upstream to DGL blocks - if needed. The returned tuples are in standard + Iterator[DGLSamplerOutput] + Iterator over batches. The returned tuples are in standard DGL format: (input nodes, output nodes, blocks) where input nodes are the renumbered input nodes, output nodes are the renumbered output nodes, and blocks are the output graphs diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 1286ddfe0dc..f98909450cd 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -139,28 +139,6 @@ def _get_tensor_d_from_sampled_df(df): return result_tensor_d -def create_homogeneous_sampled_graphs_from_tensors_dist_coo( - tensors: Dict[str, "torch.Tensor"], return_type: str = "dgl.Block" -): - """ - Creates DGL MFGs for homogeneous graphs from output - tensors from a cuGraph DistSampler. - - Parameters - ---------- - tensors: Dict[str, torch.Tensor] - The dictionary of output tensors from the bulk sampler. - return_type: str - Optional (default="dgl.Block"). - The return type for the MFGs (either "dgl.Block" or - "cugraph_dgl.nn.SparseGraph") - """ - if return_type not in ["dgl.Block", "cugraph_dgl.nn.SparseGraph"]: - raise ValueError( - "return_type must be either dgl.Block or cugraph_dgl.nn.SparseGraph" - ) - - def create_homogeneous_sampled_graphs_from_dataframe( sampled_df: cudf.DataFrame, edge_dir: str = "in", @@ -580,11 +558,41 @@ def _process_sampled_df_csc( ) +def _create_homogeneous_blocks_from_csc( + tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], + renumber_map_list: List[torch.Tensor], + mfg_sizes: List[int, int], +): + """Create mini-batches of MFGs in the dgl.Block format. + The input arguments are the outputs of + the function `_process_sampled_df_csc`. + + Returns + ------- + output: list + A list of mini-batches. Each mini-batch is a list that consists of + `input_nodes` tensor, `output_nodes` tensor and a list of MFGs. + """ + n_batches = len(mfg_sizes) + output = [] + for b_id in range(n_batches): + output_batch = [] + output_batch.append(renumber_map_list[b_id]) + output_batch.append(renumber_map_list[b_id][: mfg_sizes[b_id][-1]]) + + mfgs = _create_homogeneous_sampled_graphs_from_tensors_perhop( + tensors_batch_d=tensors_dict[b_id], edge_dir="in", return_type="dgl.Block" + )[2] + + output_batch.append(mfgs) + + output.append(output_batch) + + def _create_homogeneous_sparse_graphs_from_csc( tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], renumber_map_list: List[torch.Tensor], mfg_sizes: List[int, int], - output_format: str = "dgl.Block", ) -> List[List[torch.Tensor, torch.Tensor, List[SparseGraph]]]: """Create mini-batches of MFGs. The input arguments are the outputs of the function `_process_sampled_df_csc`. @@ -619,17 +627,35 @@ def _create_homogeneous_sparse_graphs_from_csc( return output -def create_homogeneous_sampled_graphs_from_dataframe_csc(sampled_df: cudf.DataFrame): +def create_homogeneous_sampled_graphs_from_dataframe_csc( + sampled_df: cudf.DataFrame, output_format: str = "cugraph_dgl.nn.SparseGraph" +): """Public API to create mini-batches of MFGs using a dataframe output by BulkSampler, where the sampled graph is compressed in CSC format.""" - return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_df_csc(sampled_df)), - ) + if output_format == "cugraph_dgl.nn.SparseGraph": + return _create_homogeneous_sparse_graphs_from_csc( + *(_process_sampled_df_csc(sampled_df)), + ) + elif output_format == "dgl.Block": + return _create_homogeneous_blocks_from_csc( + *(_process_sampled_df_csc(sampled_df)), + ) + else: + raise ValueError(f"Invalid output format {output_format}") -def create_homogeneous_sampled_graphs_from_tensors_csc(tensors: Dict["torch.Tensor"]): +def create_homogeneous_sampled_graphs_from_tensors_csc( + tensors: Dict["torch.Tensor"], output_format: str = "cugraph_dgl.nn.SparseGraph" +): """Public API to create mini-batches of MFGs using a dataframe output by BulkSampler, where the sampled graph is compressed in CSC format.""" - return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_tensors_csc(tensors)), - ) + if output_format == "cugraph_dgl.nn.SparseGraph": + return _create_homogeneous_sparse_graphs_from_csc( + *(_process_sampled_tensors_csc(tensors)), + ) + elif output_format == "dgl.Block": + return _create_homogeneous_blocks_from_csc( + *(_process_sampled_tensors_csc(tensors)), + ) + else: + raise ValueError(f"Invalid output format {output_format}") diff --git a/python/cugraph-dgl/cugraph_dgl/typing.py b/python/cugraph-dgl/cugraph_dgl/typing.py index 7a16a1b3dfd..a68463c3fd9 100644 --- a/python/cugraph-dgl/cugraph_dgl/typing.py +++ b/python/cugraph-dgl/cugraph_dgl/typing.py @@ -11,6 +11,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Union +from typing import List, Union, Tuple +from cugraph.utilities.utils import import_optional -TensorType = Union["torch.Tensor", "cupy.ndarray", "numpy.ndarray", "cudf.Series", "pandas.Series", List[int]] +from cugraph_dgl.nn import SparseGraph + +import pandas +import numpy +import cupy +import cudf + +torch = import_optional("torch") +dgl = import_optional("dgl") + +TensorType = Union[ + "torch.Tensor", + "cupy.ndarray", + "numpy.ndarray", + "cudf.Series", + "pandas.Series", + List[int], +] + +DGLSamplerOutput = Tuple[ + "torch.Tensor", + "torch.Tensor", + List[Union["dgl.Block", SparseGraph]], +] From 79c8f7870adf734196c85f3d8b3f14ed1aa7adef Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 1 Jul 2024 10:12:37 -0700 Subject: [PATCH 15/47] reorganize tests --- python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py | 1 - .../{test_dataloader.py => dataloading/test_dask_dataloader.py} | 0 .../test_dask_dataloader_mg.py} | 0 .../cugraph_dgl/tests/{ => dataloading}/test_dataset.py | 0 4 files changed, 1 deletion(-) rename python/cugraph-dgl/cugraph_dgl/tests/{test_dataloader.py => dataloading/test_dask_dataloader.py} (100%) rename python/cugraph-dgl/cugraph_dgl/tests/{test_dataloader_mg.py => dataloading/test_dask_dataloader_mg.py} (100%) rename python/cugraph-dgl/cugraph_dgl/tests/{ => dataloading}/test_dataset.py (100%) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index 398a44fb3b5..7c98bd3c301 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -20,7 +20,6 @@ from cugraph.gnn import UniformNeighborSampler, DistSampleWriter from cugraph.utilities.utils import import_optional - import cugraph_dgl from cugraph_dgl.typing import TensorType, DGLSamplerOutput from cugraph_dgl.dataloading.sampler import HomogeneousSampleReader diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py similarity index 100% rename from python/cugraph-dgl/cugraph_dgl/tests/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py similarity index 100% rename from python/cugraph-dgl/cugraph_dgl/tests/test_dataloader_mg.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_dataset.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py similarity index 100% rename from python/cugraph-dgl/cugraph_dgl/tests/test_dataset.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py From a56b56d4e27938dd7650bc7bd8ba4c036cf1f7cb Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 2 Jul 2024 11:22:31 -0700 Subject: [PATCH 16/47] sampling --- .../cugraph_dgl/dataloading/__init__.py | 10 +++- .../cugraph_dgl/dataloading/dataloader.py | 8 +++ .../dataloading/neighbor_sampler.py | 22 ++++++--- .../cugraph_dgl/dataloading/sampler.py | 6 +-- .../dataloading/utils/sampling_helpers.py | 34 +++++++++++-- .../cugraph-dgl/cugraph_dgl/nn/conv/base.py | 25 +++++++++- .../tests/dataloading/test_dask_dataloader.py | 4 +- .../dataloading/test_dask_dataloader_mg.py | 4 +- .../tests/dataloading/test_dataloader.py | 49 +++++++++++++++++++ .../cugraph_dgl/tests/test_graph.py | 3 ++ .../cugraph_dgl/tests/test_graph_mg.py | 4 +- 11 files changed, 147 insertions(+), 22 deletions(-) create mode 100644 python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py index 5a775f0e88c..9ee5e6a970f 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py @@ -17,10 +17,18 @@ HomogenousBulkSamplerDataset, HeterogenousBulkSamplerDataset, ) + +from cugraph_dgl.dataloading.sampler import Sampler from cugraph_dgl.dataloading.neighbor_sampler import NeighborSampler + from cugraph_dgl.dataloading.dask_dataloader import DaskDataLoader def DataLoader(*args, **kwargs): - warnings.warn("DataLoader has been renamed to DaskDataLoader", FutureWarning) + warnings.warn( + "DataLoader has been renamed to DaskDataLoader. " + "In Release 24.10, cugraph_dgl.dataloading.dataloader.DataLoader " + "will take over the DataLoader name.", + FutureWarning + ) return DaskDataLoader(*args, **kwargs) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 862f58af45d..b3af6fc3bc9 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -156,3 +156,11 @@ def __iter__(self): self.__dataset, self.__batch_size, ) + + """ + start, end, blocks = out + + start = start.to(self.__device) + end = end.to(self.__device) + blocks = [b.to(self.__device) for b in blocks] + """ \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index 7c98bd3c301..f77b00bbac8 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -14,6 +14,7 @@ from __future__ import annotations import warnings +import tempfile from typing import Sequence, Optional, Union, List, Tuple, Iterator @@ -22,12 +23,12 @@ import cugraph_dgl from cugraph_dgl.typing import TensorType, DGLSamplerOutput -from cugraph_dgl.dataloading.sampler import HomogeneousSampleReader +from cugraph_dgl.dataloading.sampler import Sampler, HomogeneousSampleReader torch = import_optional("torch") -class NeighborSampler: +class NeighborSampler(Sampler): """Sampler that builds computational dependency of node representations via neighbor sampling for multilayer GNN. This sampler will make every node gather messages from a fixed number of neighbors @@ -71,7 +72,7 @@ def __init__( ] = None, prefetch_labels: Optional[Union[List[str], dict[str, List[str]]]] = None, output_device: Optional[Union["torch.device", int, str]] = None, - fused: bool = True, + fused: Optional[bool] = None, sparse_format="csc", output_format="dgl.Block", **kwargs, @@ -112,7 +113,7 @@ def __init__( Optional. Output device for samples. Defaults to the current device. fused: bool - Optional (default=True). + Optional. This argument is ignored by cuGraph-DGL. sparse_format: str Optional (default = "coo"). @@ -154,18 +155,24 @@ def __init__( self.replace = replace self.__kwargs = kwargs - super( + super().__init__( sparse_format=sparse_format, output_format=output_format, ) def sample( - self, g: "cugraph_dgl.Graph", indices: TensorType, batch_size: int = 1 + self, g: "cugraph_dgl.Graph", indices: Iterator["torch.Tensor"], batch_size: int = 1 ) -> Iterator[DGLSamplerOutput]: kwargs = dict(**self.__kwargs) + directory = kwargs.pop('directory', None) + if directory is None: + warnings.warn("Setting a directory to store samples is recommended.") + self._tempdir = tempfile.TemporaryDirectory() + directory = self._tempdir.name + writer = DistSampleWriter( - direction=kwargs.pop("directory", None), + directory=directory, batches_per_partition=kwargs.pop("batches_per_partition", 256), format=kwargs.pop("format", "parquet"), ) @@ -183,6 +190,7 @@ def sample( ) if g.is_homogeneous: + indices = torch.concat(list(indices)) ds.sample_from_nodes(indices, batch_size=batch_size) return HomogeneousSampleReader(ds.get_reader(), self.output_format) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py index 1ee76882bdd..20bce365c7e 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -53,7 +53,7 @@ def output_format(self) -> str: return self.__output_format def __next__(self) -> DGLSamplerOutput: - if self._num_samples_remaining == 0: + if self.__num_samples_remaining == 0: # raw_sample_data is already a dict of tensors self.__raw_sample_data, start_inclusive, end_inclusive = next( self.__base_reader @@ -99,7 +99,7 @@ def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Bloc def __decode_csc( self, raw_sample_data: Dict[str, "torch.Tensor"] ) -> List[DGLSamplerOutput]: - create_homogeneous_sampled_graphs_from_tensors_csc( + return create_homogeneous_sampled_graphs_from_tensors_csc( raw_sample_data, output_format=self.output_format ) @@ -153,7 +153,7 @@ def sparse_format(self): return self.__sparse_format def sample( - self, g: cugraph_dgl.Graph, indices: TensorType, batch_size: int = 1 + self, g: cugraph_dgl.Graph, indices: Iterator["torch.Tensor"], batch_size: int = 1 ) -> Iterator[ Tuple["torch.Tensor", "torch.Tensor", List[Union[SparseGraph, "dgl.Block"]]] ]: diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index f98909450cd..da40fb6f564 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -573,20 +573,44 @@ def _create_homogeneous_blocks_from_csc( A list of mini-batches. Each mini-batch is a list that consists of `input_nodes` tensor, `output_nodes` tensor and a list of MFGs. """ - n_batches = len(mfg_sizes) + n_batches, n_hops = len(mfg_sizes), len(mfg_sizes[0]) - 1 output = [] for b_id in range(n_batches): output_batch = [] output_batch.append(renumber_map_list[b_id]) output_batch.append(renumber_map_list[b_id][: mfg_sizes[b_id][-1]]) - mfgs = _create_homogeneous_sampled_graphs_from_tensors_perhop( - tensors_batch_d=tensors_dict[b_id], edge_dir="in", return_type="dgl.Block" - )[2] + mfgs = [ + SparseGraph( + size=(mfg_sizes[b_id][h_id], mfg_sizes[b_id][h_id + 1]), + src_ids=tensors_dict[b_id][h_id]["minors"], + cdst_ids=tensors_dict[b_id][h_id]["major_offsets"], + formats=["csc", "coo"], + reduce_memory=True, + ) + for h_id in range(n_hops) + ] - output_batch.append(mfgs) + blocks = [] + seednodes_range=None + for mfg in mfgs: + block_mfg = _create_homogeneous_dgl_block_from_tensor_d( + {'sources': mfg.src_ids(), 'destinations': mfg.dst_ids(), 'sources_range': mfg._num_src_nodes-1, 'destinations_range': mfg._num_dst_nodes-1}, + renumber_map=renumber_map_list[b_id], + seednodes_range=seednodes_range + ) + + seednodes_range = max( + mfg._num_src_nodes-1, + mfg._num_dst_nodes-1, + ) + blocks.append(block_mfg) + del mfgs + + output_batch.append(blocks) output.append(output_batch) + return output def _create_homogeneous_sparse_graphs_from_csc( diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py index ddd95a76366..d2460f814c9 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,8 @@ from cugraph.utilities.utils import import_optional +import cugraph_dgl + torch = import_optional("torch") ops_torch = import_optional("pylibcugraphops.pytorch") dgl = import_optional("dgl") @@ -255,6 +257,27 @@ def __repr__(self) -> str: f"num_edges={self._src_ids.size(0)}, formats={self._formats})" ) + def to(self, device: Union[torch.device, str, int]) -> "cugraph_dgl.nn.SparseGraph": + sg = SparseGraph( + src_ids=None if self._src_ids is None else self._src_ids.to(device), + dst_ids=None if self._dst_ids is None else self._dst_ids.to(device), + csrc_ids=None if self._csrc_ids is None else self._csrc_ids.to(device), + cdst_ids=None if self._cdst_ids is None else self._cdst_ids.to(device), + values=None if self._values is None else self._values.to(device), + is_sorted=self._is_sorted, + formats=self._formats, + reduce_memory=self._reduce_memory, + ) + + sg._perm_coo2csc = ( + None if self._perm_coo2csc is None else self._perm_coo2csc.to(device) + ) + sg._perm_csc2csr = ( + None if self._perm_csc2csr is None else self._perm_csc2csr.to(device) + ) + + return sg + class BaseConv(torch.nn.Module): r"""An abstract base class for cugraph-ops nn module.""" diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py index cc473cd0ad6..e2542657de4 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,7 +52,7 @@ def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) tempdir_object = tempfile.TemporaryDirectory() sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DataLoader( + dataloader = cugraph_dgl.dataloading.DaskDataLoader( cugraph_gs, train_nid, sampler, diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py index 29b7e1c3412..d49e1293e77 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,7 +51,7 @@ def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) tempdir_object = tempfile.TemporaryDirectory() sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DataLoader( + dataloader = cugraph_dgl.dataloading.DaskDataLoader( cugraph_gs, train_nid, sampler, diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py new file mode 100644 index 00000000000..d29f1c8ea41 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import cugraph_dgl +from cugraph_dgl.dataloading.dataloader import DataLoader +from cugraph_dgl.dataloading import NeighborSampler + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional('torch') +dgl = import_optional('dgl') + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_basic_homogeneous(): + graph = cugraph_dgl.Graph( + is_multi_gpu=False + ) + + num_nodes = karate.number_of_nodes() + graph.add_nodes( + num_nodes, + data={'z': torch.arange(num_nodes)} + ) + + edf = karate.get_edgelist() + graph.add_edges( + u=edf['src'], + v=edf['dst'], + data={'q': torch.arange(karate.number_of_edges())} + ) + + sampler = NeighborSampler([5, 5, 5]) + loader = DataLoader(graph, torch.arange(num_nodes), sampler, batch_size=2) + + print(next(iter(loader))) \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py index 89a74ff073c..e47e280c48e 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -22,9 +22,11 @@ from cugraph.utilities.utils import import_optional, MissingModule torch = import_optional("torch") +dgl = import_optional("dgl") @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") @pytest.mark.parametrize("direction", ["out", "in"]) def test_graph_make_homogeneous_graph(direction): df = karate.get_edgelist() @@ -92,6 +94,7 @@ def test_graph_make_homogeneous_graph(direction): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") @pytest.mark.parametrize("direction", ["out", "in"]) def test_graph_make_heterogeneous_graph(direction): df = karate.get_edgelist() diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py index 0dfde6b9715..f0561c41095 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -34,7 +34,7 @@ pylibwholegraph = import_optional("pylibwholegraph") torch = import_optional("torch") - +dgl = import_optional('dgl') def init_pytorch_worker(rank, world_size, cugraph_id): import rmm @@ -160,6 +160,7 @@ def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): @pytest.mark.skipif( isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" ) +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") @pytest.mark.parametrize("direction", ["out", "in"]) def test_graph_make_homogeneous_graph_mg(direction): uid = cugraph_comms_create_unique_id() @@ -324,6 +325,7 @@ def run_test_graph_make_heterogeneous_graph_mg(rank, uid, world_size, direction) @pytest.mark.skipif( isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" ) +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") @pytest.mark.parametrize("direction", ["out", "in"]) def test_graph_make_heterogeneous_graph_mg(direction): uid = cugraph_comms_create_unique_id() From 8f14f88a7f5acb2a42e10c7db6a19c3e20d9f3f1 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 2 Jul 2024 12:11:15 -0700 Subject: [PATCH 17/47] revert dependencies.yaml --- dependencies.yaml | 184 +++++++++++++++++++++++++++++++++------------- 1 file changed, 133 insertions(+), 51 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 7b42c666792..fdb6f278265 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -12,6 +12,7 @@ files: - cuda - cuda_version - docs + - python_build_rapids - python_build_wheel - python_build_cythonize - depends_on_rmm @@ -73,13 +74,20 @@ files: pyproject_dir: python/cugraph extras: table: build-system + includes: + - python_build_rapids + - python_build_cythonize + py_rapids_build_cugraph: + output: pyproject + pyproject_dir: python/cugraph + extras: + table: tool.rapids-build-backend + key: requires includes: - common_build - - python_build_wheel - depends_on_rmm - depends_on_pylibraft - depends_on_pylibcugraph - - python_build_cythonize py_run_cugraph: output: pyproject pyproject_dir: python/cugraph @@ -108,12 +116,19 @@ files: pyproject_dir: python/pylibcugraph extras: table: build-system + includes: + - python_build_rapids + - python_build_cythonize + py_rapids_build_pylibcugraph: + output: pyproject + pyproject_dir: python/pylibcugraph + extras: + table: tool.rapids-build-backend + key: requires includes: - common_build - - python_build_wheel - depends_on_rmm - depends_on_pylibraft - - python_build_cythonize py_run_pylibcugraph: output: pyproject pyproject_dir: python/pylibcugraph @@ -138,6 +153,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_nx_cugraph: output: pyproject @@ -163,6 +179,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_cugraph_dgl: output: pyproject @@ -188,6 +205,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_cugraph_pyg: output: pyproject @@ -213,6 +231,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_cugraph_equivariant: output: pyproject @@ -235,6 +254,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_cugraph_service_client: output: pyproject @@ -249,6 +269,7 @@ files: extras: table: build-system includes: + - python_build_rapids - python_build_wheel py_run_cugraph_service_server: output: pyproject @@ -363,11 +384,11 @@ dependencies: packages: - c-compiler - cxx-compiler - - libcudf==24.8.* - - libcugraphops==24.8.* - - libraft-headers==24.8.* - - libraft==24.8.* - - librmm==24.8.* + - libcudf==24.8.*,>=0.0.0a0 + - libcugraphops==24.8.*,>=0.0.0a0 + - libraft-headers==24.8.*,>=0.0.0a0 + - libraft==24.8.*,>=0.0.0a0 + - librmm==24.8.*,>=0.0.0a0 - openmpi # Required for building cpp-mgtests (multi-GPU tests) specific: - output_types: [conda] @@ -431,6 +452,11 @@ dependencies: - matrix: packages: - python>=3.9,<3.12 + python_build_rapids: + common: + - output_types: [conda, pyproject, requirements] + packages: + - rapids-build-backend>=0.3.1,<0.4.0.dev0 python_build_wheel: common: - output_types: [conda, pyproject, requirements] @@ -452,11 +478,10 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - &dask rapids-dask-dependency==24.6.* - - &dask_cuda dask-cuda==24.6.* + - &dask rapids-dask-dependency==24.8.*,>=0.0.0a0 + - &dask_cuda dask-cuda==24.8.*,>=0.0.0a0 - &numba numba>=0.57 - &numpy numpy>=1.23,<2.0a0 - - &ucx_py ucx-py==0.38.* - output_types: conda packages: - aiohttp @@ -464,11 +489,26 @@ dependencies: - requests - nccl>=2.9.9 - ucx-proc=*=gpu + - &ucx_py ucx-py==0.39.*,>=0.0.0a0 - output_types: pyproject packages: # cudf uses fsspec but is protocol independent. cugraph # dataset APIs require [http] extras for use with cudf. - fsspec[http]>=0.6.0 + specific: + - output_types: pyproject + matrices: + - matrix: + cuda: "11.*" + packages: + - &ucx_py_cu11 ucx-py-cu11==0.39.*,>=0.0.0a0 + - matrix: + cuda: "12.*" + packages: + - &ucx_py_cu12 ucx-py-cu12==0.39.*,>=0.0.0a0 + - matrix: + packages: + - *ucx_py python_run_nx_cugraph: common: - output_types: [conda, pyproject] @@ -481,20 +521,40 @@ dependencies: packages: - *numba - *numpy - - &tensordict tensordict>=0.1.2,<0.3.1 + specific: - output_types: [pyproject] - packages: - - &cugraph cugraph==24.6.* + matrices: + - matrix: + cuda: "11.*" + packages: + - &cugraph_cu11 cugraph-cu11==24.8.*,>=0.0.0a0 + - matrix: + cuda: "12.*" + packages: + - &cugraph_cu12 cugraph-cu12==24.8.*,>=0.0.0a0 + - matrix: + packages: + - &cugraph cugraph==24.8.*,>=0.0.0a0 python_run_cugraph_pyg: common: - output_types: [conda, pyproject] packages: - *numba - *numpy - - *tensordict + specific: - output_types: [pyproject] - packages: - - *cugraph + matrices: + - matrix: + cuda: "11.*" + packages: + - *cugraph_cu11 + - matrix: + cuda: "12.*" + packages: + - *cugraph_cu12 + - matrix: + packages: + - *cugraph python_run_cugraph_service_client: common: - output_types: [conda, pyproject] @@ -509,11 +569,27 @@ dependencies: - *numba - *numpy - *thrift - - *ucx_py - output_types: pyproject packages: - *cugraph - - cugraph-service-client==24.6.* + - cugraph-service-client==24.8.*,>=0.0.0a0 + - output_types: conda + packages: + - *ucx_py + specific: + - output_types: pyproject + matrices: + - matrix: + cuda: "11.*" + packages: + - *ucx_py_cu11 + - matrix: + cuda: "12.*" + packages: + - *ucx_py_cu12 + - matrix: + packages: + - *ucx_py test_cpp: common: - output_types: conda @@ -548,7 +624,7 @@ dependencies: - scikit-learn>=0.23.1 - output_types: [conda] packages: - - pylibwholegraph==24.8.* + - &pylibwholegraph_conda pylibwholegraph==24.8.*,>=0.0.0a0 # this thriftpy2 entry can be removed entirely (or switched to a '!=') # once a new release of that project resolves https://github.com/Thriftpy/thriftpy2/issues/281 - thriftpy2<=0.5.0 @@ -568,19 +644,18 @@ dependencies: common: - output_types: [conda] packages: - - cugraph==24.6.* + - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - *tensordict - dgl>=1.1.0.cu* cugraph_pyg_dev: common: - output_types: [conda] packages: - - cugraph==24.6.* + - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - *tensordict + - &tensordict tensordict>=0.1.2 - pyg>=2.5,<2.6 depends_on_pytorch: @@ -590,25 +665,32 @@ dependencies: - &pytorch_conda pytorch>=2.0,<2.2.0a0 specific: + - output_types: [requirements] + matrices: + - matrix: {cuda: "12.*"} + packages: + - --extra-index-url=https://download.pytorch.org/whl/cu121 + - matrix: {cuda: "11.*"} + packages: + - --extra-index-url=https://download.pytorch.org/whl/cu118 + - {matrix: null, packages: null} - output_types: [requirements, pyproject] matrices: - matrix: {cuda: "12.*"} packages: - &pytorch_pip torch>=2.0,<2.2.0a0 - *tensordict - - --extra-index-url=https://download.pytorch.org/whl/cu121 - matrix: {cuda: "11.*"} packages: - *pytorch_pip - *tensordict - - --extra-index-url=https://download.pytorch.org/whl/cu118 - {matrix: null, packages: [*pytorch_pip, *tensordict]} depends_on_pylibwholegraph: common: - output_types: conda packages: - - &pylibwholegraph_conda pylibwholegraph==24.8.* + - *pylibwholegraph_conda - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -619,17 +701,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibwholegraph-cu12==24.8.* + - pylibwholegraph-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - pylibwholegraph-cu11==24.8.* + - pylibwholegraph-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*pylibwholegraph_conda]} depends_on_rmm: common: - output_types: conda packages: - - &rmm_conda rmm==24.6.* + - &rmm_conda rmm==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -640,17 +722,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - rmm-cu12==24.6.* + - rmm-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - rmm-cu11==24.6.* + - rmm-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*rmm_conda]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_conda cudf==24.6.* + - &cudf_conda cudf==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -661,17 +743,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cudf-cu12==24.6.* + - cudf-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - cudf-cu11==24.6.* + - cudf-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_conda]} depends_on_dask_cudf: common: - output_types: conda packages: - - &dask_cudf_conda dask-cudf==24.6.* + - &dask_cudf_conda dask-cudf==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -682,17 +764,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - dask-cudf-cu12==24.6.* + - dask-cudf-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - dask-cudf-cu11==24.6.* + - dask-cudf-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*dask_cudf_conda]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_conda pylibraft==24.6.* + - &pylibraft_conda pylibraft==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -703,17 +785,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibraft-cu12==24.6.* + - pylibraft-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - pylibraft-cu11==24.6.* + - pylibraft-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_conda]} depends_on_raft_dask: common: - output_types: conda packages: - - &raft_dask_conda raft-dask==24.6.* + - &raft_dask_conda raft-dask==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -724,17 +806,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - raft-dask-cu12==24.6.* + - raft-dask-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - raft-dask-cu11==24.6.* + - raft-dask-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*raft_dask_conda]} depends_on_pylibcugraph: common: - output_types: conda packages: - - &pylibcugraph_conda pylibcugraph==24.6.* + - &pylibcugraph_conda pylibcugraph==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -745,17 +827,17 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibcugraph-cu12==24.6.* + - pylibcugraph-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - pylibcugraph-cu11==24.6.* + - pylibcugraph-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcugraph_conda]} depends_on_pylibcugraphops: common: - output_types: conda packages: - - &pylibcugraphops_conda pylibcugraphops==24.6.* + - &pylibcugraphops_conda pylibcugraphops==24.8.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -766,10 +848,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - pylibcugraphops-cu12==24.6.* + - pylibcugraphops-cu12==24.8.*,>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - - pylibcugraphops-cu11==24.6.* + - pylibcugraphops-cu11==24.8.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcugraphops_conda]} depends_on_cupy: From 5f74252c4b0e3fa395a11879e80696bb92372171 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 2 Jul 2024 12:13:15 -0700 Subject: [PATCH 18/47] update tensordict dependency --- dependencies.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index fdb6f278265..31683483967 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -647,6 +647,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 + - &tensordict tensordict>=0.1.2 - dgl>=1.1.0.cu* cugraph_pyg_dev: common: @@ -655,7 +656,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - &tensordict tensordict>=0.1.2 + - *tensordict - pyg>=2.5,<2.6 depends_on_pytorch: From b2fdef84931f5a5261d1158fef030dde5cbe18cb Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 2 Jul 2024 14:35:38 -0700 Subject: [PATCH 19/47] update dependencies --- dependencies.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 31683483967..4938b8f79e2 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -647,7 +647,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - &tensordict tensordict>=0.1.2 + - tensordict>=0.1.2 - dgl>=1.1.0.cu* cugraph_pyg_dev: common: @@ -656,7 +656,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - *tensordict + - tensordict>=0.1.2 - pyg>=2.5,<2.6 depends_on_pytorch: @@ -664,6 +664,8 @@ dependencies: - output_types: [conda] packages: - &pytorch_conda pytorch>=2.0,<2.2.0a0 + - torchdata + - pydantic specific: - output_types: [requirements] From 92fd8665b20f5ad2910cb9a24c7938c991a26bf4 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 2 Jul 2024 14:38:41 -0700 Subject: [PATCH 20/47] update meta files --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 ++ conda/environments/all_cuda-122_arch-x86_64.yaml | 2 ++ conda/recipes/cugraph-dgl/meta.yaml | 2 +- conda/recipes/cugraph-pyg/meta.yaml | 2 +- dependencies.yaml | 4 ++-- python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml | 1 + 6 files changed, 9 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 177145cc44c..4de237a2b0e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -46,6 +46,7 @@ dependencies: - packaging>=21 - pandas - pre-commit +- pydantic - pydata-sphinx-theme - pylibcugraphops==24.8.*,>=0.0.0a0 - pylibraft==24.8.*,>=0.0.0a0 @@ -72,6 +73,7 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - thriftpy2<=0.5.0 +- torchdata - ucx-proc=*=gpu - ucx-py==0.39.*,>=0.0.0a0 - wget diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml index 9b77955e0fc..5c8c773d0d6 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -51,6 +51,7 @@ dependencies: - packaging>=21 - pandas - pre-commit +- pydantic - pydata-sphinx-theme - pylibcugraphops==24.8.*,>=0.0.0a0 - pylibraft==24.8.*,>=0.0.0a0 @@ -77,6 +78,7 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - thriftpy2<=0.5.0 +- torchdata - ucx-proc=*=gpu - ucx-py==0.39.*,>=0.0.0a0 - wget diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index 7346c9e6f94..7c30c9c602b 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -28,7 +28,7 @@ requirements: - numba >=0.57 - numpy >=1.23,<2.0a0 - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2,<0.3.1a0 + - tensordict >=0.1.2 - python - pytorch >=2.0 - cupy >= 12.0.0 diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 17362893310..9833a78d88b 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -35,7 +35,7 @@ requirements: - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2,<0.3.1a0 + - tensordict >=0.1.2 - pyg >=2.5,<2.6 tests: diff --git a/dependencies.yaml b/dependencies.yaml index 4938b8f79e2..4580130643d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -647,7 +647,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - tensordict>=0.1.2 + - &tensordict tensordict>=0.1.2 - dgl>=1.1.0.cu* cugraph_pyg_dev: common: @@ -656,7 +656,7 @@ dependencies: - cugraph==24.8.*,>=0.0.0a0 - pytorch>=2.0 - pytorch-cuda==11.8 - - tensordict>=0.1.2 + - *tensordict - pyg>=2.5,<2.6 depends_on_pytorch: diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index 63771a75064..28c2fc81eeb 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -21,4 +21,5 @@ dependencies: - pytorch-cuda==11.8 - pytorch>=2.0 - scipy +- tensordict>=0.1.2 name: cugraph_dgl_dev_cuda-118 From 6107d8269de69ba3b41e0096d3964fc6e2d12268 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 3 Jul 2024 13:54:29 -0700 Subject: [PATCH 21/47] fix csr/csc issue, wrap up tests --- .../cugraph_dgl/dataloading/__init__.py | 5 +- .../cugraph_dgl/dataloading/dataloader.py | 6 +- .../dataloading/neighbor_sampler.py | 15 +- .../cugraph_dgl/dataloading/sampler.py | 19 +- .../dataloading/utils/sampling_helpers.py | 17 +- python/cugraph-dgl/cugraph_dgl/graph.py | 34 +-- .../tests/dataloading/test_dataloader.py | 113 ++++++++-- .../tests/dataloading/test_dataloader_mg.py | 208 ++++++++++++++++++ 8 files changed, 365 insertions(+), 52 deletions(-) create mode 100644 python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py index 9ee5e6a970f..8a2e9cd954d 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py @@ -22,13 +22,14 @@ from cugraph_dgl.dataloading.neighbor_sampler import NeighborSampler from cugraph_dgl.dataloading.dask_dataloader import DaskDataLoader +from cugraph_dgl.dataloading.dataloader import DataLoader as FutureDataLoader def DataLoader(*args, **kwargs): warnings.warn( "DataLoader has been renamed to DaskDataLoader. " - "In Release 24.10, cugraph_dgl.dataloading.dataloader.DataLoader " + "In Release 24.10, cugraph_dgl.dataloading.FutureDataLoader " "will take over the DataLoader name.", - FutureWarning + FutureWarning, ) return DaskDataLoader(*args, **kwargs) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index b3af6fc3bc9..39f43a5d805 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -154,13 +154,13 @@ def __iter__(self): return self.__sampler.sample( self.__graph, self.__dataset, - self.__batch_size, + batch_size=self.__batch_size, ) - + """ start, end, blocks = out start = start.to(self.__device) end = end.to(self.__device) blocks = [b.to(self.__device) for b in blocks] - """ \ No newline at end of file + """ diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index f77b00bbac8..1a35c3ea027 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -22,7 +22,7 @@ from cugraph.utilities.utils import import_optional import cugraph_dgl -from cugraph_dgl.typing import TensorType, DGLSamplerOutput +from cugraph_dgl.typing import DGLSamplerOutput from cugraph_dgl.dataloading.sampler import Sampler, HomogeneousSampleReader torch = import_optional("torch") @@ -161,11 +161,14 @@ def __init__( ) def sample( - self, g: "cugraph_dgl.Graph", indices: Iterator["torch.Tensor"], batch_size: int = 1 + self, + g: "cugraph_dgl.Graph", + indices: Iterator["torch.Tensor"], + batch_size: int = 1, ) -> Iterator[DGLSamplerOutput]: kwargs = dict(**self.__kwargs) - directory = kwargs.pop('directory', None) + directory = kwargs.pop("directory", None) if directory is None: warnings.warn("Setting a directory to store samples is recommended.") self._tempdir = tempfile.TemporaryDirectory() @@ -180,7 +183,7 @@ def sample( ds = UniformNeighborSampler( g._graph(self.edge_dir), writer, - compression=self.sparse_format.upper(), + compression="CSR", fanout=self._reversed_fanout_vals, prior_sources_behavior="carryover", deduplicate_sources=True, @@ -192,7 +195,9 @@ def sample( if g.is_homogeneous: indices = torch.concat(list(indices)) ds.sample_from_nodes(indices, batch_size=batch_size) - return HomogeneousSampleReader(ds.get_reader(), self.output_format) + return HomogeneousSampleReader( + ds.get_reader(), self.output_format, self.edge_dir + ) raise ValueError( "Sampling heterogeneous graphs is currently" diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py index 20bce365c7e..731ec1b8d6f 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -15,7 +15,7 @@ import cugraph_dgl from cugraph_dgl.nn import SparseGraph -from cugraph_dgl.typing import TensorType, DGLSamplerOutput +from cugraph_dgl.typing import DGLSamplerOutput from cugraph_dgl.dataloading.utils.sampling_helpers import ( create_homogeneous_sampled_graphs_from_tensors_csc, ) @@ -81,7 +81,12 @@ class HomogeneousSampleReader(SampleReader): produced by the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"): + def __init__( + self, + base_reader: DistSampleReader, + output_format: str = "dgl.Block", + edge_dir="in", + ): """ Constructs a new HomogeneousSampleReader @@ -93,7 +98,11 @@ def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Bloc output_format: str The output format for blocks (either "dgl.Block" or "cugraph_dgl.nn.SparseGraph"). + edge_dir: str + The direction sampling was performed in ("in" or "out"). """ + + self.__edge_dir = edge_dir super().__init__(base_reader, output_format=output_format) def __decode_csc( @@ -141,7 +150,6 @@ def __init__(self, sparse_format: str = "csc", output_format="dgl.Block"): if sparse_format != "csc": raise ValueError("Only CSC format is supported at this time") - self.__sparse_format = sparse_format self.__output_format = output_format @property @@ -153,7 +161,10 @@ def sparse_format(self): return self.__sparse_format def sample( - self, g: cugraph_dgl.Graph, indices: Iterator["torch.Tensor"], batch_size: int = 1 + self, + g: cugraph_dgl.Graph, + indices: Iterator["torch.Tensor"], + batch_size: int = 1, ) -> Iterator[ Tuple["torch.Tensor", "torch.Tensor", List[Union[SparseGraph, "dgl.Block"]]] ]: diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index da40fb6f564..e8c305b6ba1 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -592,21 +592,28 @@ def _create_homogeneous_blocks_from_csc( ] blocks = [] - seednodes_range=None + seednodes_range = None for mfg in mfgs: block_mfg = _create_homogeneous_dgl_block_from_tensor_d( - {'sources': mfg.src_ids(), 'destinations': mfg.dst_ids(), 'sources_range': mfg._num_src_nodes-1, 'destinations_range': mfg._num_dst_nodes-1}, + { + "sources": mfg.src_ids(), + "destinations": mfg.dst_ids(), + "sources_range": mfg._num_src_nodes - 1, + "destinations_range": mfg._num_dst_nodes - 1, + }, renumber_map=renumber_map_list[b_id], - seednodes_range=seednodes_range + seednodes_range=seednodes_range, ) seednodes_range = max( - mfg._num_src_nodes-1, - mfg._num_dst_nodes-1, + mfg._num_src_nodes - 1, + mfg._num_dst_nodes - 1, ) blocks.append(block_mfg) del mfgs + blocks.reverse() + output_batch.append(blocks) output.append(output_batch) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 00fa9a66be7..5e18d5ea616 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -191,26 +191,28 @@ def add_nodes( raise ValueError("The global number of nodes must match on all workers") # Ensure the sum of the feature shapes equals the global number of nodes. - for feature_name, feature_tensor in data.items(): - features_size = torch.tensor( - [int(feature_tensor.shape[0])], device="cuda", dtype=torch.int64 - ) - torch.distributed.all_reduce( - features_size, op=torch.distributed.ReduceOp.SUM - ) - if features_size != global_num_nodes: - raise ValueError( - "The total length of the feature vector across workers must" - " match the global number of nodes but it does not match for " - f"{feature_name}." + if data is not None: + for feature_name, feature_tensor in data.items(): + features_size = torch.tensor( + [int(feature_tensor.shape[0])], device="cuda", dtype=torch.int64 ) + torch.distributed.all_reduce( + features_size, op=torch.distributed.ReduceOp.SUM + ) + if features_size != global_num_nodes: + raise ValueError( + "The total length of the feature vector across workers must" + " match the global number of nodes but it does not " + f"match for {feature_name}." + ) self.__num_nodes_dict[ntype] = global_num_nodes - for feature_name, feature_tensor in data.items(): - self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( - _cast_to_torch_tensor(feature_tensor), **self.__wg_kwargs - ) + if data is not None: + for feature_name, feature_tensor in data.items(): + self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( + _cast_to_torch_tensor(feature_tensor), **self.__wg_kwargs + ) self.__graph = None self.__vertex_offsets = None diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py index d29f1c8ea41..ef47875463d 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py @@ -11,39 +11,118 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cugraph_dgl.dataloading import pytest import cugraph_dgl -from cugraph_dgl.dataloading.dataloader import DataLoader -from cugraph_dgl.dataloading import NeighborSampler from cugraph.datasets import karate from cugraph.utilities.utils import import_optional, MissingModule -torch = import_optional('torch') -dgl = import_optional('dgl') +import numpy as np + +torch = import_optional("torch") +dgl = import_optional("dgl") + @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") def test_dataloader_basic_homogeneous(): - graph = cugraph_dgl.Graph( - is_multi_gpu=False - ) + graph = cugraph_dgl.Graph(is_multi_gpu=False) num_nodes = karate.number_of_nodes() - graph.add_nodes( - num_nodes, - data={'z': torch.arange(num_nodes)} - ) + graph.add_nodes(num_nodes, data={"z": torch.arange(num_nodes)}) edf = karate.get_edgelist() graph.add_edges( - u=edf['src'], - v=edf['dst'], - data={'q': torch.arange(karate.number_of_edges())} + u=edf["src"], v=edf["dst"], data={"q": torch.arange(karate.number_of_edges())} + ) + + sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) + loader = cugraph_dgl.dataloading.FutureDataLoader( + graph, torch.arange(num_nodes), sampler, batch_size=2 + ) + + for in_t, out_t, blocks in loader: + assert len(blocks) == 3 + assert len(out_t) <= 2 + + +def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): + # Single fanout to match cugraph + sampler = dgl.dataloading.NeighborSampler(fanouts) + dataloader = dgl.dataloading.DataLoader( + g, + train_nid, + sampler, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + + dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return dgl_output + + +def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + cugraph_g, + train_nid, + sampler, + batch_size=batch_size, + drop_last=False, + shuffle=False, ) - sampler = NeighborSampler([5, 5, 5]) - loader = DataLoader(graph, torch.arange(num_nodes), sampler, batch_size=2) + cugraph_dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + cugraph_dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return cugraph_dgl_output + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("ix", [[1], [1, 0]]) +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_same_homogeneousgraph_results(ix, batch_size): + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + + train_nid = torch.tensor(ix) + # Create a heterograph with 3 node types and 3 edges types. + dgl_g = dgl.graph((src, dst)) - print(next(iter(loader))) \ No newline at end of file + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=False) + cugraph_g.add_nodes(9) + cugraph_g.add_edges(u=src, v=dst) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) + cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py new file mode 100644 index 00000000000..39ce55d7616 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py @@ -0,0 +1,208 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import os + +import numpy as np + +import cugraph_dgl + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +from cugraph.gnn import ( + cugraph_comms_create_unique_id, + cugraph_comms_init, + cugraph_comms_shutdown, +) + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +def init_pytorch_worker(rank, world_size, cugraph_id): + import rmm + + rmm.reinitialize( + devices=rank, + ) + + import cupy + + cupy.cuda.Device(rank).use() + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + torch.cuda.set_device(rank) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) + + +def run_test_dataloader_basic_homogeneous(rank, world_size, uid): + init_pytorch_worker(rank, world_size, uid) + + graph = cugraph_dgl.Graph(is_multi_gpu=True) + + num_nodes = karate.number_of_nodes() + graph.add_nodes( + num_nodes, + ) + + edf = karate.get_edgelist() + graph.add_edges( + u=torch.tensor_split(torch.as_tensor(edf["src"], device="cuda"), world_size)[ + rank + ], + v=torch.tensor_split(torch.as_tensor(edf["dst"], device="cuda"), world_size)[ + rank + ], + ) + + sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) + loader = cugraph_dgl.dataloading.FutureDataLoader( + graph, + torch.arange(num_nodes), + sampler, + batch_size=2, + use_ddp=True, + ) + + for in_t, out_t, blocks in loader: + assert len(blocks) == 3 + assert len(out_t) <= 2 + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_basic_homogeneous(): + uid = cugraph_comms_create_unique_id() + # Limit the number of GPUs this rest is run with + world_size = min(torch.cuda.device_count(), 4) + + torch.multiprocessing.spawn( + run_test_dataloader_basic_homogeneous, + args=( + world_size, + uid, + ), + nprocs=world_size, + ) + + +def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): + # Single fanout to match cugraph + sampler = dgl.dataloading.NeighborSampler(fanouts) + dataloader = dgl.dataloading.DataLoader( + g, + train_nid, + sampler, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + + dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return dgl_output + + +def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + cugraph_g, + train_nid, + sampler, + batch_size=batch_size, + drop_last=False, + shuffle=False, + ) + + cugraph_dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + cugraph_dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return cugraph_dgl_output + + +def run_test_same_homogeneousgraph_results(rank, world_size, uid, ix, batch_size): + init_pytorch_worker(rank, world_size, uid) + + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + + local_src = torch.tensor_split(src, world_size)[rank] + local_dst = torch.tensor_split(dst, world_size)[rank] + + train_nid = torch.tensor(ix) + # Create a heterograph with 3 node types and 3 edges types. + dgl_g = dgl.graph((src, dst)) + + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=True) + cugraph_g.add_nodes(9) + cugraph_g.add_edges(u=local_src, v=local_dst) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) + cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("ix", [[1], [1, 0]]) +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_same_homogeneousgraph_results_mg(ix, batch_size): + uid = cugraph_comms_create_unique_id() + # Limit the number of GPUs this rest is run with + world_size = min(torch.cuda.device_count(), 4) + + torch.multiprocessing.spawn( + run_test_same_homogeneousgraph_results, + args=(world_size, uid, ix, batch_size), + nprocs=world_size, + ) From 6bc4b4a6696931fe65a5d59c95feae3e229f51c9 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 8 Jul 2024 10:21:19 -0700 Subject: [PATCH 22/47] m --- .../cugraph_dgl/tests/test_graph.py | 85 ++++++++++++------- .../cugraph_dgl/tests/test_graph_mg.py | 4 +- 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py index e47e280c48e..0c1fb088198 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -100,19 +100,18 @@ def test_graph_make_heterogeneous_graph(direction): df = karate.get_edgelist() df.src = df.src.astype("int64") df.dst = df.dst.astype("int64") - wgt = np.random.random((len(df),)) graph = cugraph_dgl.Graph() total_num_nodes = max(df.src.max(), df.dst.max()) + 1 - + num_nodes_group_1 = total_num_nodes // 2 num_nodes_group_2 = total_num_nodes - num_nodes_group_1 - + node_x_1 = np.random.random((num_nodes_group_1,)) node_x_2 = np.random.random((num_nodes_group_2,)) - graph.add_nodes(num_nodes_group_1, {'x':node_x_1}, 'type1') - graph.add_nodes(num_nodes_group_2, {'x':node_x_2}, 'type2') + graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") + graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] @@ -124,10 +123,10 @@ def test_graph_make_heterogeneous_graph(direction): edges_22.dst -= num_nodes_group_1 edges_22.src -= num_nodes_group_1 - graph.add_edges(edges_11.src, edges_11.dst, etype=('type1', 'e1', 'type1')) - graph.add_edges(edges_12.src, edges_12.dst, etype=('type1', 'e2', 'type2')) - graph.add_edges(edges_21.src, edges_21.dst, etype=('type2', 'e3', 'type1')) - graph.add_edges(edges_22.src, edges_22.dst, etype=('type2', 'e4', 'type2')) + graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) + graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) + graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) + graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) assert not graph.is_homogeneous assert not graph.is_multi_gpu @@ -137,17 +136,31 @@ def test_graph_make_heterogeneous_graph(direction): graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") ).all() assert ( - graph.nodes('type1') == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") + graph.nodes("type1") + == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") ).all() assert ( - graph.nodes('type2') == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") + graph.nodes("type2") + == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") ).all() # Verify graph.edges() - assert((graph.edges('eid',etype=('type1','e1','type1')) == torch.arange(len(edges_11), dtype=torch.int64, device='cuda')).all()) - assert((graph.edges('eid',etype=('type1','e2','type2')) == torch.arange(len(edges_12), dtype=torch.int64, device='cuda')).all()) - assert((graph.edges('eid',etype=('type2','e3','type1')) == torch.arange(len(edges_21), dtype=torch.int64, device='cuda')).all()) - assert((graph.edges('eid',etype=('type2','e4','type2')) == torch.arange(len(edges_22), dtype=torch.int64, device='cuda')).all()) + assert ( + graph.edges("eid", etype=("type1", "e1", "type1")) + == torch.arange(len(edges_11), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type1", "e2", "type2")) + == torch.arange(len(edges_12), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e3", "type1")) + == torch.arange(len(edges_21), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e4", "type2")) + == torch.arange(len(edges_22), dtype=torch.int64, device="cuda") + ).all() # Use sampling call to check graph creation # This isn't a test of cuGraph sampling with DGL; the options are @@ -156,20 +169,20 @@ def test_graph_make_heterogeneous_graph(direction): sampling_output = pylibcugraph.uniform_neighbor_sample( pylibcugraph.ResourceHandle(), plc_graph, - start_list=cupy.arange(total_num_nodes, dtype='int64'), - h_fan_out=np.array([1, 1], dtype='int32'), + start_list=cupy.arange(total_num_nodes, dtype="int64"), + h_fan_out=np.array([1, 1], dtype="int32"), with_replacement=False, do_expensive_check=True, with_edge_properties=True, - prior_sources_behavior='exclude', + prior_sources_behavior="exclude", return_dict=True, ) expected_etypes = { - 0: 'e1', - 1: 'e2', - 2: 'e3', - 3: 'e4', + 0: "e1", + 1: "e2", + 2: "e3", + 3: "e4", } expected_offsets = { 0: (0, 0), @@ -177,20 +190,26 @@ def test_graph_make_heterogeneous_graph(direction): 2: (num_nodes_group_1, 0), 3: (num_nodes_group_1, num_nodes_group_1), } - if direction == 'in': - src_col = 'minors' - dst_col = 'majors' + if direction == "in": + src_col = "minors" + dst_col = "majors" else: - src_col = 'majors' - dst_col = 'minors' + src_col = "majors" + dst_col = "minors" # Looping over the output verifies that all edges are valid # (and therefore, the graph is valid) - for i, etype in enumerate(sampling_output['edge_type'].tolist()): - eid = int(sampling_output['edge_id'][i]) + for i, etype in enumerate(sampling_output["edge_type"].tolist()): + eid = int(sampling_output["edge_id"][i]) + + srcs, dsts, eids = graph.edges( + "all", etype=expected_etypes[etype], device="cpu" + ) - srcs, dsts, eids = graph.edges('all', etype=expected_etypes[etype], device='cpu') - assert eids[eid] == eid - assert srcs[eid] == int(sampling_output[src_col][i]) - expected_offsets[etype][0] - assert dsts[eid] == int(sampling_output[dst_col][i]) - expected_offsets[etype][1] \ No newline at end of file + assert ( + srcs[eid] == int(sampling_output[src_col][i]) - expected_offsets[etype][0] + ) + assert ( + dsts[eid] == int(sampling_output[dst_col][i]) - expected_offsets[etype][1] + ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py index f0561c41095..8e469519433 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -32,9 +32,11 @@ cugraph_comms_get_raft_handle, ) + pylibwholegraph = import_optional("pylibwholegraph") torch = import_optional("torch") -dgl = import_optional('dgl') +dgl = import_optional("dgl") + def init_pytorch_worker(rank, world_size, cugraph_id): import rmm From faeb4a52e280b11c736252540022a9e295c6ef63 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 8 Jul 2024 10:45:16 -0700 Subject: [PATCH 23/47] style --- python/cugraph-dgl/cugraph_dgl/__init__.py | 2 +- python/cugraph-dgl/cugraph_dgl/view.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py index 61b4142a871..bbae569c91b 100644 --- a/python/cugraph-dgl/cugraph_dgl/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index e2bf7c20a29..e65af53a096 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from collections import defaultdict from collections.abc import MutableMapping from typing import Union, Dict, List, Tuple @@ -260,10 +261,10 @@ def __getitem__(self, key): ) def __call__(self, *args, **kwargs): - if 'device' in kwargs: + if "device" in kwargs: return self.__graph.all_edges(*args, **kwargs) - - return self.__graph.all_edges(*args, **kwargs, device='cuda') + + return self.__graph.all_edges(*args, **kwargs, device="cuda") class HeteroNodeView: From afb94522422d4ac867e941c3bb594073d810b973 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 8 Jul 2024 10:47:05 -0700 Subject: [PATCH 24/47] revert ci script --- ci/test_python.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 061a6459085..e8c8272e8d6 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -210,12 +210,9 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then --channel "${PYTHON_CHANNEL}" \ --channel pyg \ "cugraph-pyg" \ - "pytorch=2.1.0" \ - "pytorch-cuda=${CONDA_CUDA_VERSION}" + "ogb" - # Install pyg dependencies (which requires pip) pip install \ - ogb \ pyg_lib \ torch_scatter \ torch_sparse \ From 48ba6d42b9874f9612f6074af2f611acb0714c0e Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 9 Jul 2024 11:37:45 -0700 Subject: [PATCH 25/47] fix meta.yaml issue --- conda/recipes/cugraph-dgl/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index 7c30c9c602b..0affe456b73 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -31,7 +31,7 @@ requirements: - tensordict >=0.1.2 - python - pytorch >=2.0 - - cupy >= 12.0.0 + - cupy >=12.0.0 tests: imports: From 801de87154aa997c49bb92034e197e9692b0d439 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:16:28 -0400 Subject: [PATCH 26/47] add type hint Co-authored-by: Vibhu Jawa --- python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py index 7cd94a1be84..994e8609348 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py @@ -281,7 +281,7 @@ def get_batch_id_series(n_output_rows: int, batch_size: int): return cudf.Series(batch_ar) -def create_batch_df(dataset: torch.Tensor): +def create_batch_df(dataset: torch.Tensor) -> cudf.DataFrame: batch_id_ls = [] indices_ls = [] for batch_id, b_indices in enumerate(dataset): From 5e511cc77092350602bc8d19fcd2a23306e99089 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:16:49 -0400 Subject: [PATCH 27/47] add missing type hint Co-authored-by: Vibhu Jawa --- python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 39f43a5d805..e01bd5b8d48 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -38,7 +38,7 @@ def __init__( device: Union[int, str, "torch.device"] = None, use_ddp: bool = False, ddp_seed: int = 0, - batch_size=1, + batch_size: int= 1, drop_last: bool = False, shuffle: bool = False, use_prefetch_thread: Optional[bool] = None, From 035b69ae10721c4781cadaf8a02d19a23f46583b Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 10 Jul 2024 12:28:49 -0700 Subject: [PATCH 28/47] remove comment, add issue reference --- .../cugraph-dgl/cugraph_dgl/dataloading/dataloader.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 39f43a5d805..5ea27cc0a36 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -150,17 +150,9 @@ def dataset( return self.__dataset def __iter__(self): - # TODO move to the correct device + # TODO move to the correct device (rapidsai/cugraph-gnn#11) return self.__sampler.sample( self.__graph, self.__dataset, batch_size=self.__batch_size, ) - - """ - start, end, blocks = out - - start = start.to(self.__device) - end = end.to(self.__device) - blocks = [b.to(self.__device) for b in blocks] - """ From b412776cf99e20b88043f1eaeeec2d3edd0433d8 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:30:38 -0400 Subject: [PATCH 29/47] Add type hint Co-authored-by: Vibhu Jawa --- python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py index 994e8609348..1b4233b14e4 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py @@ -273,7 +273,7 @@ def __del__(self): _clean_directory(self._sampling_output_dir) -def get_batch_id_series(n_output_rows: int, batch_size: int): +def get_batch_id_series(n_output_rows: int, batch_size: int) -> cudf.Series : num_batches = (n_output_rows + batch_size - 1) // batch_size print(f"Number of batches = {num_batches}".format(num_batches)) batch_ar = cp.arange(0, num_batches).repeat(batch_size) From 1c72bd6e1e054c8587506f4edae74b92e8d6b852 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 10 Jul 2024 14:48:22 -0700 Subject: [PATCH 30/47] add convert function, fix bugs --- python/cugraph-dgl/cugraph_dgl/__init__.py | 5 +- python/cugraph-dgl/cugraph_dgl/convert.py | 54 +++++++++++++++++- python/cugraph-dgl/cugraph_dgl/graph.py | 40 ++++++++++--- .../tests/test_from_dgl_heterograph.py | 41 +++++++++++++- python/cugraph-dgl/cugraph_dgl/tests/utils.py | 56 +++++++++++++++++-- python/cugraph-dgl/cugraph_dgl/view.py | 26 +++++++-- 6 files changed, 200 insertions(+), 22 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py index bbae569c91b..58850d47fba 100644 --- a/python/cugraph-dgl/cugraph_dgl/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/__init__.py @@ -17,7 +17,10 @@ os.environ["RAPIDS_NO_INITIALIZE"] = "1" from cugraph_dgl.graph import Graph from cugraph_dgl.cugraph_storage import CuGraphStorage -from cugraph_dgl.convert import cugraph_storage_from_heterograph +from cugraph_dgl.convert import ( + cugraph_storage_from_heterograph, + cugraph_dgl_graph_from_heterograph, +) import cugraph_dgl.dataloading import cugraph_dgl.nn diff --git a/python/cugraph-dgl/cugraph_dgl/convert.py b/python/cugraph-dgl/cugraph_dgl/convert.py index 1235f07adf1..ae4b96dd391 100644 --- a/python/cugraph-dgl/cugraph_dgl/convert.py +++ b/python/cugraph-dgl/cugraph_dgl/convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,6 +12,8 @@ # limitations under the License. from __future__ import annotations from cugraph.utilities.utils import import_optional + +import cugraph_dgl from cugraph_dgl import CuGraphStorage from cugraph_dgl.utils.cugraph_conversion_utils import ( get_edges_dict_from_dgl_HeteroGraph, @@ -39,3 +41,53 @@ def cugraph_storage_from_heterograph( add_ndata_from_dgl_HeteroGraph(gs, g) add_edata_from_dgl_HeteroGraph(gs, g) return gs + + +def cugraph_dgl_graph_from_heterograph( + input_graph: dgl.DGLGraph, + single_gpu: bool = True, + ndata_storage: str = "torch", + edata_storage: str = "torch", + **kwargs, +) -> cugraph_dgl.Graph: + """ + Converts a DGL Graph to a cuGraph-DGL Graph. + """ + + output_graph = cugraph_dgl.Graph( + is_multi_gpu=(not single_gpu), + ndata_storage=ndata_storage, + edata_storage=edata_storage, + **kwargs, + ) + + # Calling is_homogeneous does not work here + if len(input_graph.ntypes) <= 1: + output_graph.add_nodes( + input_graph.num_nodes(), data=input_graph.ndata, ntype=input_graph.ntypes[0] + ) + else: + for ntype in input_graph.ntypes: + data = { + k: v_dict[ntype] + for k, v_dict in input_graph.ndata.items() + if ntype in v_dict + } + output_graph.add_nodes(input_graph.num_nodes(ntype), data=data, ntype=ntype) + + if len(input_graph.canonical_etypes) <= 1: + can_etype = input_graph.canonical_etypes[0] + src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) + output_graph.add_edges(src_t, dst_t, input_graph.edata, etype=can_etype) + else: + for can_etype in input_graph.canonical_etypes: + data = { + k: v_dict[can_etype] + for k, v_dict in input_graph.edata.items() + if can_etype in v_dict + } + + src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) + output_graph.add_edges(src_t, dst_t, data=data, etype=can_etype) + + return output_graph diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 5e18d5ea616..2eba13c6958 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -11,6 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from typing import Union, Optional, Dict, Tuple, List from cugraph.utilities.utils import import_optional @@ -561,6 +563,9 @@ def _graph( return self.__graph[0] + def _has_n_emb(self, ntype: str, emb_name: str) -> bool: + return (ntype, emb_name) in self.__ndata_storage + def _get_n_emb( self, ntype: str, emb_name: str, u: Union[str, TensorType] ) -> "torch.Tensor": @@ -595,9 +600,21 @@ def _get_n_emb( if dgl.base.is_all(u): u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu") - return self.__ndata_storage[ntype, emb_name].fetch( - _cast_to_torch_tensor(u), "cuda" - ) + try: + return self.__ndata_storage[ntype, emb_name].fetch( + _cast_to_torch_tensor(u), "cuda" + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__ndata_storage[ntype, emb_name].fetch( + _cast_to_torch_tensor(u).cuda(), "cuda" + ) + + def _has_e_emb(self, etype: Tuple[str, str, str], emb_name: str) -> bool: + return (etype, emb_name) in self.__edata_storage def _get_e_emb( self, etype: Tuple[str, str, str], emb_name: str, u: Union[str, TensorType] @@ -629,9 +646,18 @@ def _get_e_emb( if dgl.base.is_all(u): u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu") - return self.__edata_storage[etype, emb_name].fetch( - _cast_to_torch_tensor(u), "cuda" - ) + try: + return self.__edata_storage[etype, emb_name].fetch( + _cast_to_torch_tensor(u), "cuda" + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__edata_storage[etype, emb_name].fetch( + _cast_to_torch_tensor(u).cuda(), "cuda" + ) def _set_n_emb( self, ntype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] @@ -774,7 +800,7 @@ def _get_e_emb_keys(self, etype: str) -> List[str]: List[str] The list of embedding names for the given edge type. """ - return [k for (t, k) in self.__ndata_storage if etype == t] + return [k for (t, k) in self.__edata_storage if etype == t] def all_edges( self, diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py index 128d9bfaca5..667a4a2e66d 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,9 @@ from cugraph.utilities.utils import import_optional from .utils import ( assert_same_edge_feats, + assert_same_edge_feats_daskapi, assert_same_node_feats, + assert_same_node_feats_daskapi, assert_same_num_edges_can_etypes, assert_same_num_edges_etypes, assert_same_num_nodes, @@ -134,7 +136,7 @@ def create_heterograph4(idtype): @pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_nodes(idxtype): +def test_heterograph_conversion_nodes_daskapi(idxtype): graph_fs = [ create_heterograph1, create_heterograph2, @@ -145,6 +147,39 @@ def test_heterograph_conversion_nodes(idxtype): g = graph_f(idxtype) gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + assert_same_num_nodes(gs, g) + assert_same_node_feats_daskapi(gs, g) + + +@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) +def test_heterograph_conversion_edges_daskapi(idxtype): + graph_fs = [ + create_heterograph1, + create_heterograph2, + create_heterograph3, + create_heterograph4, + ] + for graph_f in graph_fs: + g = graph_f(idxtype) + gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + + assert_same_num_edges_can_etypes(gs, g) + assert_same_num_edges_etypes(gs, g) + assert_same_edge_feats_daskapi(gs, g) + + +@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) +def test_heterograph_conversion_nodes(idxtype): + graph_fs = [ + create_heterograph1, + create_heterograph2, + create_heterograph3, + create_heterograph4, + ] + for graph_f in graph_fs: + g = graph_f(idxtype) + gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) + assert_same_num_nodes(gs, g) assert_same_node_feats(gs, g) @@ -159,7 +194,7 @@ def test_heterograph_conversion_edges(idxtype): ] for graph_f in graph_fs: g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) assert_same_num_edges_can_etypes(gs, g) assert_same_num_edges_etypes(gs, g) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/utils.py b/python/cugraph-dgl/cugraph_dgl/tests/utils.py index d6a90840b72..09c267099e5 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/utils.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,8 +15,8 @@ th = import_optional("torch") -def assert_same_node_feats(gs, g): - set(gs.ndata.keys()) == set(g.ndata.keys()) +def assert_same_node_feats_daskapi(gs, g): + assert set(gs.ndata.keys()) == set(g.ndata.keys()) for key in g.ndata.keys(): for ntype in g.ntypes: @@ -30,6 +30,29 @@ def assert_same_node_feats(gs, g): assert equal_t == 0 +def assert_same_node_feats(gs, g): + assert set(gs.ndata.keys()) == set(g.ndata.keys()) + assert set(gs.ntypes) == set(g.ntypes) + + for key in g.ndata.keys(): + for ntype in g.ntypes: + if len(g.ntypes) <= 1 or ntype in g.ndata[key]: + indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype) + + g_output = g.ndata[key] + gs_output = gs.ndata[key] + + if len(g.ntypes) > 1: + g_output = g_output[ntype] + gs_output = gs_output[ntype] + + g_output = g_output[indices] + gs_output = gs_output[indices] + + equal_t = (gs_output != g_output).sum() + assert equal_t == 0 + + def assert_same_num_nodes(gs, g): for ntype in g.ntypes: assert g.num_nodes(ntype) == gs.num_nodes(ntype) @@ -45,8 +68,8 @@ def assert_same_num_edges_etypes(gs, g): assert g.num_edges(etype) == gs.num_edges(etype) -def assert_same_edge_feats(gs, g): - set(gs.edata.keys()) == set(g.edata.keys()) +def assert_same_edge_feats_daskapi(gs, g): + assert set(gs.edata.keys()) == set(g.edata.keys()) for key in g.edata.keys(): for etype in g.canonical_etypes: indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() @@ -59,6 +82,29 @@ def assert_same_edge_feats(gs, g): assert equal_t == 0 +def assert_same_edge_feats(gs, g): + assert set(gs.edata.keys()) == set(g.edata.keys()) + assert set(gs.canonical_etypes) == set(g.canonical_etypes) + assert set(gs.etypes) == set(g.etypes) + + for key in g.edata.keys(): + for etype in g.canonical_etypes: + if len(g.etypes) <= 1 or etype in g.edata[key]: + indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() + g_output = g.edata[key] + gs_output = gs.edata[key] + + if len(g.etypes) > 1: + g_output = g_output[etype] + gs_output = gs_output[etype] + + g_output = g_output[indices] + gs_output = gs_output[indices] + + equal_t = (gs_output != g_output).sum().cpu() + assert equal_t == 0 + + def assert_same_sampling_len(dgl_g, cugraph_gs, nodes, fanout, edge_dir): dgl_o = dgl_g.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) cugraph_o = cugraph_gs.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index e65af53a096..2e836591757 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -55,9 +55,17 @@ def _edges(self) -> TensorType: def __getitem__(self, key: str): if isinstance(self._etype, list): - return {t: self._graph._get_e_emb(t, key, self._edges) for t in self._etype} - - return self._graph._get_e_emb(self._etype, key, self._edges) + return { + t: self._graph._get_e_emb(t, key, self._edges) + for t in self._etype + if self._graph._has_e_emb(t, key) + } + + return ( + self._graph._get_e_emb(self._etype, key, self._edges) + if self._graph._has_e_emb(self._etype, key) + else None + ) def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._etype, list): @@ -152,9 +160,17 @@ def _nodes(self) -> TensorType: def __getitem__(self, key: str): if isinstance(self._ntype, list): - return {t: self._graph._get_n_emb(t, key, self._nodes) for t in self._ntype} + return { + t: self._graph._get_n_emb(t, key, self._nodes) + for t in self._ntype + if self._graph._has_n_emb(t, key) + } else: - return self._graph._get_n_emb(self._ntype, key, self._nodes) + return ( + self._graph._get_n_emb(self._ntype, key, self._nodes) + if self._graph._has_n_emb(self._ntype, key) + else None + ) def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._ntype, list): From 2d522b1c2b68e9888670efdf6f088faee23b4994 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 10 Jul 2024 14:54:42 -0700 Subject: [PATCH 31/47] move worker init to utility --- .../tests/dataloading/test_dataloader_mg.py | 31 +------------- .../cugraph_dgl/tests/test_graph_mg.py | 39 +----------------- python/cugraph-dgl/cugraph_dgl/tests/utils.py | 41 +++++++++++++++++++ 3 files changed, 45 insertions(+), 66 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py index 39ce55d7616..3eabdf454e2 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py @@ -12,7 +12,6 @@ # limitations under the License. import pytest -import os import numpy as np @@ -23,41 +22,15 @@ from cugraph.gnn import ( cugraph_comms_create_unique_id, - cugraph_comms_init, cugraph_comms_shutdown, ) +from utils import init_pytorch_worker + torch = import_optional("torch") dgl = import_optional("dgl") -def init_pytorch_worker(rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=rank, - ) - - import cupy - - cupy.cuda.Device(rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) - - def run_test_dataloader_basic_homogeneous(rank, world_size, uid): init_pytorch_worker(rank, world_size, uid) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py index 8e469519433..cabeecff758 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -11,7 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import pytest @@ -26,54 +25,20 @@ from cugraph.utilities.utils import import_optional, MissingModule from cugraph.gnn import ( - cugraph_comms_init, cugraph_comms_shutdown, cugraph_comms_create_unique_id, cugraph_comms_get_raft_handle, ) +from utils import init_pytorch_worker pylibwholegraph = import_optional("pylibwholegraph") torch = import_optional("torch") dgl = import_optional("dgl") -def init_pytorch_worker(rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=rank, - ) - - import cupy - - cupy.cuda.Device(rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) - - pylibwholegraph.torch.initialize.init( - rank, - world_size, - rank, - world_size, - ) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) - - def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): - init_pytorch_worker(rank, world_size, uid) + init_pytorch_worker(rank, world_size, uid, init_wholegraph=True) df = karate.get_edgelist() df.src = df.src.astype("int64") diff --git a/python/cugraph-dgl/cugraph_dgl/tests/utils.py b/python/cugraph-dgl/cugraph_dgl/tests/utils.py index 09c267099e5..fa4eb05f297 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/utils.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/utils.py @@ -10,7 +10,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import os + from cugraph.utilities.utils import import_optional +from cugraph.gnn import cugraph_comms_init th = import_optional("torch") @@ -111,3 +115,40 @@ def assert_same_sampling_len(dgl_g, cugraph_gs, nodes, fanout, edge_dir): assert cugraph_o.num_edges() == dgl_o.num_edges() for etype in dgl_o.canonical_etypes: assert dgl_o.num_edges(etype) == cugraph_o.num_edges(etype) + + +def init_pytorch_worker(rank, world_size, cugraph_id, init_wholegraph=False): + import rmm + + rmm.reinitialize( + devices=rank, + ) + + import cupy + + cupy.cuda.Device(rank).use() + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + th.cuda.set_device(rank) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + th.distributed.init_process_group("nccl", rank=rank, world_size=world_size) + + if init_wholegraph: + import pylibwholegraph + + pylibwholegraph.torch.initialize.init( + rank, + world_size, + rank, + world_size, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) From e1fa6e06ac9bf64d7a76ff0e45b8c4382d8b403d Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 10 Jul 2024 15:17:43 -0700 Subject: [PATCH 32/47] revert none return, add check --- .../tests/dataloading/test_dataloader_mg.py | 2 +- python/cugraph-dgl/cugraph_dgl/tests/test_graph.py | 2 ++ .../cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py | 3 ++- python/cugraph-dgl/cugraph_dgl/view.py | 12 ++---------- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py index 3eabdf454e2..b32233f16a6 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py @@ -25,7 +25,7 @@ cugraph_comms_shutdown, ) -from utils import init_pytorch_worker +from cugraph_dgl.tests.utils import init_pytorch_worker torch = import_optional("torch") dgl = import_optional("dgl") diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py index 0c1fb088198..a60db97b8d6 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -52,6 +52,8 @@ def test_graph_make_homogeneous_graph(direction): assert ( graph.nodes() == torch.arange(num_nodes, dtype=torch.int64, device="cuda") ).all() + + assert graph.nodes[None]["x"] is not None assert (graph.nodes[None]["x"] == torch.as_tensor(node_x, device="cuda")).all() assert ( graph.nodes[None]["num"] diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py index cabeecff758..eedda664c52 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -30,7 +30,7 @@ cugraph_comms_get_raft_handle, ) -from utils import init_pytorch_worker +from .utils import init_pytorch_worker pylibwholegraph = import_optional("pylibwholegraph") torch = import_optional("torch") @@ -75,6 +75,7 @@ def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): == torch.arange(global_num_nodes, dtype=torch.int64, device="cuda") ).all() ix = torch.arange(len(node_x) * rank, len(node_x) * (rank + 1), dtype=torch.int64) + assert graph.nodes[ix]["x"] is not None assert (graph.nodes[ix]["x"] == torch.as_tensor(node_x, device="cuda")).all() assert ( diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index 2e836591757..dbc53e73b6a 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -61,11 +61,7 @@ def __getitem__(self, key: str): if self._graph._has_e_emb(t, key) } - return ( - self._graph._get_e_emb(self._etype, key, self._edges) - if self._graph._has_e_emb(self._etype, key) - else None - ) + return self._graph._get_e_emb(self._etype, key, self._edges) def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._etype, list): @@ -166,11 +162,7 @@ def __getitem__(self, key: str): if self._graph._has_n_emb(t, key) } else: - return ( - self._graph._get_n_emb(self._ntype, key, self._nodes) - if self._graph._has_n_emb(self._ntype, key) - else None - ) + return self._graph._get_n_emb(self._ntype, key, self._nodes) def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): if isinstance(self._ntype, list): From 85299878533b4c80b39d57b2bda5ad50b336fed6 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 10 Jul 2024 16:11:49 -0700 Subject: [PATCH 33/47] style --- python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py | 2 +- python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py index 1b4233b14e4..e220b93f738 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py @@ -273,7 +273,7 @@ def __del__(self): _clean_directory(self._sampling_output_dir) -def get_batch_id_series(n_output_rows: int, batch_size: int) -> cudf.Series : +def get_batch_id_series(n_output_rows: int, batch_size: int) -> cudf.Series: num_batches = (n_output_rows + batch_size - 1) // batch_size print(f"Number of batches = {num_batches}".format(num_batches)) batch_ar = cp.arange(0, num_batches).repeat(batch_size) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index e1072e6a24d..21b70b05f3a 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -38,7 +38,7 @@ def __init__( device: Union[int, str, "torch.device"] = None, use_ddp: bool = False, ddp_seed: int = 0, - batch_size: int= 1, + batch_size: int = 1, drop_last: bool = False, shuffle: bool = False, use_prefetch_thread: Optional[bool] = None, From 89f4ef44bb3400b86f8022fbd0a1ffa008d4dfe5 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 22 Jul 2024 09:20:15 -0700 Subject: [PATCH 34/47] use global communicator --- python/cugraph-pyg/cugraph_pyg/data/feature_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py index a3715d3ddf4..ce5b186b2fa 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py @@ -169,7 +169,7 @@ def __init__(self, memory_type="distributed", location="cpu"): self.__features = {} - self.__wg_comm = wgth.get_local_node_communicator() + self.__wg_comm = wgth.get_global_node_communicator() self.__wg_type = memory_type self.__wg_location = location From 4d82ee0879efce80a605e0b7efabf1aaaacb1aa4 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 22 Jul 2024 12:44:07 -0700 Subject: [PATCH 35/47] global --- python/cugraph-pyg/cugraph_pyg/data/feature_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py index ce5b186b2fa..b6450e7b192 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py @@ -169,7 +169,7 @@ def __init__(self, memory_type="distributed", location="cpu"): self.__features = {} - self.__wg_comm = wgth.get_global_node_communicator() + self.__wg_comm = wgth.get_global_communicator() self.__wg_type = memory_type self.__wg_location = location From 2b160bfa37a2a346bd3553efcb2fd1b6d2cc4ab0 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Wed, 24 Jul 2024 12:08:30 -0700 Subject: [PATCH 36/47] use int64 to store # edges --- python/cugraph-pyg/cugraph_pyg/data/graph_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py index 622b68d37e2..e086bf07b1f 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py @@ -271,7 +271,7 @@ def __get_edgelist(self): torch.tensor( [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda", - dtype=torch.int32, + dtype=torch.int64, ) ) From 6db236c13149b7c53d1a05df8501130172d596c1 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 25 Jul 2024 13:05:50 -0700 Subject: [PATCH 37/47] example --- .../all_cuda-118_arch-x86_64.yaml | 2 + .../all_cuda-125_arch-x86_64.yaml | 2 + dependencies.yaml | 2 + .../dataloading/utils/sampling_helpers.py | 2 +- .../graphsage/node-classification-dask.py | 273 ++++++++++++++++++ .../examples/graphsage/node-classification.py | 37 ++- 6 files changed, 304 insertions(+), 14 deletions(-) create mode 100644 python/cugraph-dgl/examples/graphsage/node-classification-dask.py diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 5474c087532..24d59ba5865 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -42,6 +42,7 @@ dependencies: - numpy>=1.23,<2.0a0 - numpydoc - nvcc_linux-64=11.8 +- ogb - openmpi - packaging>=21 - pandas @@ -74,6 +75,7 @@ dependencies: - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 - torchdata +- torchmetrics - ucx-proc=*=gpu - ucx-py==0.39.*,>=0.0.0a0 - wget diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 54049a92061..bf7bcc88c51 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -47,6 +47,7 @@ dependencies: - numba>=0.57 - numpy>=1.23,<2.0a0 - numpydoc +- ogb - openmpi - packaging>=21 - pandas @@ -79,6 +80,7 @@ dependencies: - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 - torchdata +- torchmetrics - ucx-proc=*=gpu - ucx-py==0.39.*,>=0.0.0a0 - wget diff --git a/dependencies.yaml b/dependencies.yaml index 5ffbcbab5fc..cd7cda89884 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -670,6 +670,8 @@ dependencies: - &pytorch_conda pytorch>=2.0,<2.2.0a0 - torchdata - pydantic + - ogb + - torchmetrics specific: - output_types: [requirements] diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index e8c305b6ba1..3b7e4502134 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -593,7 +593,7 @@ def _create_homogeneous_blocks_from_csc( blocks = [] seednodes_range = None - for mfg in mfgs: + for mfg in reversed(mfgs): block_mfg = _create_homogeneous_dgl_block_from_tensor_d( { "sources": mfg.src_ids(), diff --git a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py new file mode 100644 index 00000000000..03cf49bd939 --- /dev/null +++ b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py @@ -0,0 +1,273 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Example modified from: +# https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/node_classification.py + +# Ignore Warning +import warnings +import time +import cugraph_dgl +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchmetrics.functional as MF +import dgl +import dgl.nn as dglnn +from dgl.data import AsNodePredDataset +from dgl.dataloading import ( + DataLoader, + NeighborSampler, + MultiLayerFullNeighborSampler, +) +from ogb.nodeproppred import DglNodePropPredDataset +import tqdm +import argparse + +warnings.filterwarnings("ignore") + + +def set_allocators(): + import rmm + import cudf + import cupy + from rmm.allocators.torch import rmm_torch_allocator + from rmm.allocators.cupy import rmm_cupy_allocator + + mr = rmm.mr.CudaAsyncMemoryResource() + rmm.mr.set_current_device_resource(mr) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + cupy.cuda.set_allocator(rmm_cupy_allocator) + cudf.set_option("spill", True) + + +class SAGE(nn.Module): + def __init__(self, in_size, hid_size, out_size): + super().__init__() + self.layers = nn.ModuleList() + # three-layer GraphSAGE-mean + self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) + self.dropout = nn.Dropout(0.5) + self.hid_size = hid_size + self.out_size = out_size + + def forward(self, blocks, x): + h = x + for l_id, (layer, block) in enumerate(zip(self.layers, blocks)): + h = layer(block, h) + if l_id != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + return h + + def inference(self, g, device, batch_size): + """Conduct layer-wise inference to get all the node embeddings.""" + all_node_ids = torch.arange(0, g.num_nodes()).to(device) + feat = g.get_node_storage(key="feat", ntype="_N").fetch( + all_node_ids, device=device + ) + + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) + dataloader = DataLoader( + g, + torch.arange(g.num_nodes()).to(g.device), + sampler, + device=device, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + buffer_device = torch.device("cpu") + pin_memory = buffer_device != device + + for l_id, layer in enumerate(self.layers): + y = torch.empty( + g.num_nodes(), + self.hid_size if l_id != len(self.layers) - 1 else self.out_size, + device=buffer_device, + pin_memory=pin_memory, + ) + feat = feat.to(device) + for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): + x = feat[input_nodes] + h = layer(blocks[0], x) # len(blocks) = 1 + if l_id != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + # by design, our output nodes are contiguous + y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) + feat = y + return y + + +def evaluate(model, graph, dataloader): + model.eval() + ys = [] + y_hats = [] + for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + with torch.no_grad(): + if isinstance(graph.ndata["feat"], dict): + x = graph.ndata["feat"]["_N"][input_nodes] + label = graph.ndata["label"]["_N"][output_nodes] + else: + x = graph.ndata["feat"][input_nodes] + label = graph.ndata["label"][output_nodes] + ys.append(label) + y_hats.append(model(blocks, x)) + num_classes = y_hats[0].shape[1] + return MF.accuracy( + torch.cat(y_hats), + torch.cat(ys), + task="multiclass", + num_classes=num_classes, + ) + + +def layerwise_infer(device, graph, nid, model, batch_size): + model.eval() + with torch.no_grad(): + pred = model.inference(graph, device, batch_size) # pred in buffer_device + pred = pred[nid] + label = graph.ndata["label"] + if isinstance(label, dict): + label = label["_N"] + label = label[nid].to(device).to(pred.device) + num_classes = pred.shape[1] + return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) + + +def train(args, device, g, dataset, model): + # create sampler & dataloader + train_idx = dataset.train_idx.to(device) + val_idx = dataset.val_idx.to(device) + + use_uva = args.mode == "mixed" + batch_size = 1024 + fanouts = [5, 10, 15] + sampler = NeighborSampler(fanouts) + train_dataloader = DataLoader( + g, + train_idx, + sampler, + device=device, + batch_size=batch_size, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + val_dataloader = DataLoader( + g, + val_idx, + sampler, + device=device, + batch_size=batch_size, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + + opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) + + for epoch in range(10): + model.train() + total_loss = 0 + st = time.time() + for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): + if isinstance(g.ndata["feat"], dict): + x = g.ndata["feat"]["_N"][input_nodes] + y = g.ndata["label"]["_N"][output_nodes] + else: + x = g.ndata["feat"][input_nodes] + y = g.ndata["label"][output_nodes] + + print(x.shape, input_nodes.shape, y.shape, output_nodes.shape) + print([b.num_nodes() for b in blocks]) + + y_hat = model(blocks, x) + loss = F.cross_entropy(y_hat, y) + opt.zero_grad() + loss.backward() + opt.step() + total_loss += loss.item() + + et = time.time() + + print(f"Time taken for epoch {epoch} with batch_size {batch_size} = {et-st} s") + acc = evaluate(model, g, val_dataloader) + print( + "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( + epoch, total_loss / (it + 1), acc.item() + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--mode", + default="gpu_cugraph_dgl", + choices=["cpu", "mixed", "gpu_dgl", "gpu_cugraph_dgl"], + help="Training mode." + " 'cpu' for CPU training," + " 'mixed' for CPU-GPU mixed training, " + " 'gpu_dgl' for pure-GPU training, " + " 'gpu_cugraph_dgl' for pure-GPU training.", + ) + args = parser.parse_args() + if not torch.cuda.is_available(): + args.mode = "cpu" + if args.mode == "gpu_cugraph_dgl": + set_allocators() + print(f"Training in {args.mode} mode.") + + # load and preprocess dataset + print("Loading data") + dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) + g = dataset[0] + g = dgl.add_self_loop(g) + if args.mode == "gpu_cugraph_dgl": + g = cugraph_dgl.cugraph_storage_from_heterograph(g.to("cuda")) + del dataset.g + + else: + g = g.to("cuda" if args.mode == "gpu_dgl" else "cpu") + device = torch.device( + "cpu" if args.mode == "cpu" or args.mode == "mixed" else "cuda" + ) + + # create GraphSAGE model + feat_shape = ( + g.get_node_storage(key="feat", ntype="_N") + .fetch(torch.LongTensor([0]).to(device), device=device) + .shape[1] + ) + print(feat_shape) + # no ndata in cugraph storage object + in_size = feat_shape + out_size = dataset.num_classes + model = SAGE(in_size, 256, out_size).to(device) + + # model training + print("Training...") + train(args, device, g, dataset, model) + + # test the model + print("Testing...") + acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) + print("Test Accuracy {:.4f}".format(acc.item())) diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py index 539fd86d136..a8a542f8017 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,6 +19,7 @@ import warnings import time import cugraph_dgl +import cugraph_dgl.dataloading import torch import torch.nn as nn import torch.nn.functional as F @@ -80,8 +81,13 @@ def inference(self, g, device, batch_size): all_node_ids, device=device ) - sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) - dataloader = DataLoader( + if isinstance(g, cugraph_dgl.Graph): + sampler = cugraph_dgl.sampling.NeighborSampler(-1) + loader_cls = cugraph_dgl.dataloading.FutureDataLoader + else: + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) + loader_cls = DataLoader + dataloader = loader_cls( g, torch.arange(g.num_nodes()).to(g.device), sampler, @@ -158,8 +164,13 @@ def train(args, device, g, dataset, model): use_uva = args.mode == "mixed" batch_size = 1024 fanouts = [5, 10, 15] - sampler = NeighborSampler(fanouts) - train_dataloader = DataLoader( + if isinstance(g, cugraph_dgl.Graph): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + loader_cls = cugraph_dgl.dataloading.FutureDataLoader + else: + sampler = NeighborSampler(fanouts) + loader_cls = DataLoader + train_dataloader = loader_cls( g, train_idx, sampler, @@ -170,7 +181,7 @@ def train(args, device, g, dataset, model): num_workers=0, use_uva=use_uva, ) - val_dataloader = DataLoader( + val_dataloader = loader_cls( g, val_idx, sampler, @@ -195,6 +206,9 @@ def train(args, device, g, dataset, model): else: x = g.ndata["feat"][input_nodes] y = g.ndata["label"][output_nodes] + + print(x.shape, input_nodes.shape, y.shape, output_nodes.shape) + print([b.num_nodes() for b in blocks]) y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() @@ -238,7 +252,7 @@ def train(args, device, g, dataset, model): g = dataset[0] g = dgl.add_self_loop(g) if args.mode == "gpu_cugraph_dgl": - g = cugraph_dgl.cugraph_storage_from_heterograph(g.to("cuda")) + g = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g.to("cuda")) del dataset.g else: @@ -248,12 +262,9 @@ def train(args, device, g, dataset, model): ) # create GraphSAGE model - feat_shape = ( - g.get_node_storage(key="feat", ntype="_N") - .fetch(torch.LongTensor([0]).to(device), device=device) - .shape[1] - ) - # no ndata in cugraph storage object + feat_shape = g.ndata["feat"].shape[1] + print(feat_shape) + in_size = feat_shape out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) From 7a3d38f3cddb5743120c91d28470b3ad1c85524a Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 25 Jul 2024 13:06:26 -0700 Subject: [PATCH 38/47] reverse mfgs --- .../cugraph_dgl/dataloading/utils/sampling_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index e8c305b6ba1..3b7e4502134 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -593,7 +593,7 @@ def _create_homogeneous_blocks_from_csc( blocks = [] seednodes_range = None - for mfg in mfgs: + for mfg in reversed(mfgs): block_mfg = _create_homogeneous_dgl_block_from_tensor_d( { "sources": mfg.src_ids(), From 710741c822dd3ac41e88a0e4860cdfa69728ab7d Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 30 Jul 2024 07:20:55 -0700 Subject: [PATCH 39/47] node classification --- python/cugraph-dgl/cugraph_dgl/graph.py | 16 +- python/cugraph-dgl/cugraph_dgl/view.py | 36 +++ .../graphsage/node-classification-dask.py | 3 - .../examples/graphsage/node-classification.py | 2 - .../multi_trainer_MG_example/model.py | 6 +- .../{workflow.py => workflow_dask.py} | 4 + .../multi_trainer_MG_example/workflow_mnmg.py | 0 .../multi_trainer_MG_example/workflow_snmg.py | 228 ++++++++++++++++++ 8 files changed, 280 insertions(+), 15 deletions(-) rename python/cugraph-dgl/examples/multi_trainer_MG_example/{workflow.py => workflow_dask.py} (98%) create mode 100644 python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py create mode 100644 python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 2eba13c6958..1341719a4b4 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -29,6 +29,7 @@ HeteroNodeDataView, HeteroEdgeView, HeteroEdgeDataView, + EmbeddingView, ) @@ -567,8 +568,8 @@ def _has_n_emb(self, ntype: str, emb_name: str) -> bool: return (ntype, emb_name) in self.__ndata_storage def _get_n_emb( - self, ntype: str, emb_name: str, u: Union[str, TensorType] - ) -> "torch.Tensor": + self, ntype: Union[str, None], emb_name: str, u: Union[str, TensorType] + ) -> Union["torch.Tensor", "cugraph_dgl.view.EmbeddingView"]: """ Gets the embedding of a single node type. Unlike DGL, this function takes the string node @@ -583,11 +584,11 @@ def _get_n_emb( u: Union[str, TensorType] Nodes to get the representation of, or ALL to get the representation of all nodes of - the given type. + the given type (returns embedding view). Returns ------- - torch.Tensor + Union[torch.Tensor, cugraph_dgl.view.EmbeddingView] The embedding of the given edge type with the given embedding name. """ @@ -598,9 +599,10 @@ def _get_n_emb( raise ValueError("Must provide the node type for a heterogeneous graph") if dgl.base.is_all(u): - u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu") + return EmbeddingView(self.__ndata_storage[ntype, emb_name], self.num_nodes(ntype)) try: + print(u,) return self.__ndata_storage[ntype, emb_name].fetch( _cast_to_torch_tensor(u), "cuda" ) @@ -644,7 +646,9 @@ def _get_e_emb( etype = self.to_canonical_etype(etype) if dgl.base.is_all(u): - u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu") + return EmbeddingView( + self.__edata_storage[etype, emb_name], self.num_edges(etype) + ) try: return self.__edata_storage[etype, emb_name].fetch( diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index dbc53e73b6a..4c980806ec7 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -12,6 +12,8 @@ # limitations under the License. +import warnings + from collections import defaultdict from collections.abc import MutableMapping from typing import Union, Dict, List, Tuple @@ -20,11 +22,45 @@ import cugraph_dgl from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor torch = import_optional("torch") dgl = import_optional("dgl") +class EmbeddingView: + def __init__(self, storage: "dgl.storages.base.FeatureStorage", ld: int): + self.__ld = ld + self.__storage = storage + + def __getitem__(self, u: TensorType) -> "torch.Tensor": + u = _cast_to_torch_tensor(u) + try: + return self.__storage.fetch( + u, + 'cuda', + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__storage.fetch( + u.cuda(), + 'cuda', + ) + + @property + def shape(self) -> "torch.Size": + try: + f = self.__storage.fetch(torch.tensor([0]), 'cpu') + except RuntimeError: + f = self.__storage.fetch(torch.tensor([0],device='cuda'), 'cuda') + sz = [s for s in f.shape] + sz[0] = self.__ld + return torch.Size(tuple(sz)) + + class HeteroEdgeDataView(MutableMapping): """ Duck-typed version of DGL's HeteroEdgeDataView. diff --git a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py index 03cf49bd939..992669e4284 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py @@ -196,9 +196,6 @@ def train(args, device, g, dataset, model): x = g.ndata["feat"][input_nodes] y = g.ndata["label"][output_nodes] - print(x.shape, input_nodes.shape, y.shape, output_nodes.shape) - print([b.num_nodes() for b in blocks]) - y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py index a8a542f8017..731cbcce97e 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification.py @@ -207,8 +207,6 @@ def train(args, device, g, dataset, model): x = g.ndata["feat"][input_nodes] y = g.ndata["label"][output_nodes] - print(x.shape, input_nodes.shape, y.shape, output_nodes.shape) - print([b.num_nodes() for b in blocks]) y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py index a6f771e4b51..acdd832424b 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py @@ -114,15 +114,13 @@ def layerwise_infer(graph, nid, model, batch_size, device): def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): - g.ndata["feat"]["_N"] = g.ndata["feat"]["_N"].to("cuda") - g.ndata["label"]["_N"] = g.ndata["label"]["_N"].to("cuda") st = time.time() model.train() for epoch in range(num_epochs): total_loss = 0 for _, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = g.ndata["feat"]["_N"][input_nodes] - y = g.ndata["label"]["_N"][output_nodes] + x = g.ndata["feat"][input_nodes] + y = g.ndata["label"][output_nodes] y_hat = model(blocks, x) y = y.squeeze(1) loss = F.cross_entropy(y_hat, y) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py similarity index 98% rename from python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py rename to python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py index 474f17dc2bb..8ca40bd1e2a 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py @@ -204,6 +204,10 @@ def run_workflow(rank, devices, scheduler_address): n_epochs = 10 total_st = time.time() opt = torch.optim.Adam(model.parameters(), lr=0.01) + + gs.ndata["feat"] = gs.ndata["feat"].to("cuda") + gs.ndata["label"] = gs.ndata["label"].to("cuda") + train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) torch.distributed.barrier() total_et = time.time() diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py new file mode 100644 index 00000000000..f043940486b --- /dev/null +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py @@ -0,0 +1,228 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dgl +import torch +import time +import tempfile +import argparse +import os + +import cugraph_dgl + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + +# Allow computation on objects that are larger than GPU memory +# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory +os.environ["CUDF_SPILL"] = "1" + + +def initalize_pytorch_worker(dev_id): + import cupy as cp + import rmm + from rmm.allocators.cupy import rmm_cupy_allocator + + dev = cp.cuda.Device( + dev_id + ) # Create cuda context on the right gpu, defaults to gpu-0 + dev.use() + rmm.reinitialize( + pool_allocator=True, + initial_pool_size=10e9, + maximum_pool_size=15e9, + devices=[dev_id], + ) + + from cugraph.testing.mg_utils import enable_spilling + enable_spilling() + + torch.cuda.set_device(dev_id) + cp.cuda.set_allocator(rmm_cupy_allocator) + print("device_id", dev_id, flush=True) + + +def load_dgl_dataset(dataset_name="ogbn-products"): + from ogb.nodeproppred import DglNodePropPredDataset + + dataset = DglNodePropPredDataset(name=dataset_name) + split_idx = dataset.get_idx_split() + train_idx, valid_idx, test_idx = ( + split_idx["train"], + split_idx["valid"], + split_idx["test"], + ) + g, label = dataset[0] + g.ndata["label"] = label + if len(g.etypes) <= 1: + g = dgl.add_self_loop(g) + else: + for etype in g.etypes: + if etype[0] == etype[2]: + # only add self loops for src->dst + g = dgl.add_self_loop(g, etype=etype) + + g = g.int() + train_idx = train_idx.int() + valid_idx = valid_idx.int() + test_idx = test_idx.int() + return g, train_idx, valid_idx, test_idx, dataset.num_classes + + +def create_cugraph_graphstore_from_dgl_dataset( + dataset, rank, world_size +): + (g, train_idx, valid_idx, test_idx, num_classes) = dataset + # Partition the data + cg = cugraph_dgl.Graph(is_multi_gpu=True, ndata_storage='wholegraph', edata_storage='wholegraph') + + nix = torch.tensor_split(torch.arange(g.num_nodes()), world_size)[rank] + ndata = { + k: g.ndata[k][nix].cuda() + for k in g.ndata.keys() + } + + eix = torch.tensor_split(torch.arange(g.num_edges()), world_size)[rank] + src, dst = g.all_edges(form='uv', order='eid') + edata = { + k: g.edata[k][eix].cuda() + for k in g.edata.keys() + } + + cg.add_nodes(g.num_nodes(), data=ndata) + cg.add_edges( + torch.tensor_split(src, world_size)[rank].cuda(), + torch.tensor_split(dst, world_size)[rank].cuda(), + data=edata, + ) + + return (cg, torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), num_classes) + + +def create_dataloader(gs, train_idx, device, temp_dir, stage): + import cugraph_dgl + + temp_path = os.path.join(temp_dir, f'{stage}_{device}') + os.mkdir(temp_path) + + sampler = cugraph_dgl.dataloading.NeighborSampler([10, 20], directory=temp_path, batches_per_partition=10,) + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + gs, + train_idx, + sampler, + device=device, # Put the sampled MFGs on CPU or GPU + use_ddp=True, # Make it work with distributed data parallel + batch_size=1024, + shuffle=False, # Whether to shuffle the nodes for every epoch + drop_last=False, + num_workers=0, + ) + return dataloader + + +def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): + from model import Sage, train_model + + # Below sets gpu_number + dev_id = rank + initalize_pytorch_worker(dev_id) + device = torch.device(f"cuda:{dev_id}") + + # Pytorch training worker initialization + dist_init_method = "tcp://{master_ip}:{master_port}".format( + master_ip="127.0.0.1", master_port="12346" + ) + + torch.distributed.init_process_group( + backend="nccl", + init_method=dist_init_method, + world_size=world_size, + rank=rank, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) + wm_init(rank, world_size, rank, world_size) + + print(f"rank {rank}.", flush=True) + print("Initalized across GPUs.") + + ( + gs, + train_idx, + valid_idx, + test_idx, + num_classes, + ) = create_cugraph_graphstore_from_dgl_dataset( + dataset, rank, world_size, + ) + del dataset + + torch.distributed.barrier() + print(f"Loading graph to worker {rank} is complete", flush=True) + + dataloader = create_dataloader(gs, train_idx, device, temp_dir, 'train') + print("Dataloader Creation Complete", flush=True) + num_feats = gs.ndata["feat"].shape[1] + hid_size = 256 + # Load Training example + model = Sage(num_feats, hid_size, num_classes).to(device) + model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device, + ) + torch.distributed.barrier() + n_epochs = 10 + total_st = time.time() + opt = torch.optim.Adam(model.parameters(), lr=0.01) + train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) + torch.distributed.barrier() + total_et = time.time() + print( + f"Total time taken on n_epochs {n_epochs} = {total_et-total_st} s", + f"measured by worker = {rank}", + ) + + wm_finalize() + cugraph_comms_shutdown() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--dataset', type=str, default='ogbn-products') + args = parser.parse_args() + + from rmm.allocators.torch import rmm_torch_allocator + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + # Create the uid needed for cuGraph comms + cugraph_id = cugraph_comms_create_unique_id() + + ds = load_dgl_dataset(args.dataset) + + world_size = torch.cuda.device_count() + + with tempfile.TemporaryDirectory() as directory: + torch.multiprocessing.spawn( + run_workflow, + args=(world_size, cugraph_id, ds, directory), + nprocs=world_size, + ) From f943d9144afdb6d0409daed694454ee2ad6013f0 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 30 Jul 2024 13:04:14 -0700 Subject: [PATCH 40/47] mnmg --- .../multi_trainer_MG_example/workflow_mnmg.py | 228 ++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py index e69de29bb2d..f043940486b 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py @@ -0,0 +1,228 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dgl +import torch +import time +import tempfile +import argparse +import os + +import cugraph_dgl + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + +# Allow computation on objects that are larger than GPU memory +# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory +os.environ["CUDF_SPILL"] = "1" + + +def initalize_pytorch_worker(dev_id): + import cupy as cp + import rmm + from rmm.allocators.cupy import rmm_cupy_allocator + + dev = cp.cuda.Device( + dev_id + ) # Create cuda context on the right gpu, defaults to gpu-0 + dev.use() + rmm.reinitialize( + pool_allocator=True, + initial_pool_size=10e9, + maximum_pool_size=15e9, + devices=[dev_id], + ) + + from cugraph.testing.mg_utils import enable_spilling + enable_spilling() + + torch.cuda.set_device(dev_id) + cp.cuda.set_allocator(rmm_cupy_allocator) + print("device_id", dev_id, flush=True) + + +def load_dgl_dataset(dataset_name="ogbn-products"): + from ogb.nodeproppred import DglNodePropPredDataset + + dataset = DglNodePropPredDataset(name=dataset_name) + split_idx = dataset.get_idx_split() + train_idx, valid_idx, test_idx = ( + split_idx["train"], + split_idx["valid"], + split_idx["test"], + ) + g, label = dataset[0] + g.ndata["label"] = label + if len(g.etypes) <= 1: + g = dgl.add_self_loop(g) + else: + for etype in g.etypes: + if etype[0] == etype[2]: + # only add self loops for src->dst + g = dgl.add_self_loop(g, etype=etype) + + g = g.int() + train_idx = train_idx.int() + valid_idx = valid_idx.int() + test_idx = test_idx.int() + return g, train_idx, valid_idx, test_idx, dataset.num_classes + + +def create_cugraph_graphstore_from_dgl_dataset( + dataset, rank, world_size +): + (g, train_idx, valid_idx, test_idx, num_classes) = dataset + # Partition the data + cg = cugraph_dgl.Graph(is_multi_gpu=True, ndata_storage='wholegraph', edata_storage='wholegraph') + + nix = torch.tensor_split(torch.arange(g.num_nodes()), world_size)[rank] + ndata = { + k: g.ndata[k][nix].cuda() + for k in g.ndata.keys() + } + + eix = torch.tensor_split(torch.arange(g.num_edges()), world_size)[rank] + src, dst = g.all_edges(form='uv', order='eid') + edata = { + k: g.edata[k][eix].cuda() + for k in g.edata.keys() + } + + cg.add_nodes(g.num_nodes(), data=ndata) + cg.add_edges( + torch.tensor_split(src, world_size)[rank].cuda(), + torch.tensor_split(dst, world_size)[rank].cuda(), + data=edata, + ) + + return (cg, torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), num_classes) + + +def create_dataloader(gs, train_idx, device, temp_dir, stage): + import cugraph_dgl + + temp_path = os.path.join(temp_dir, f'{stage}_{device}') + os.mkdir(temp_path) + + sampler = cugraph_dgl.dataloading.NeighborSampler([10, 20], directory=temp_path, batches_per_partition=10,) + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + gs, + train_idx, + sampler, + device=device, # Put the sampled MFGs on CPU or GPU + use_ddp=True, # Make it work with distributed data parallel + batch_size=1024, + shuffle=False, # Whether to shuffle the nodes for every epoch + drop_last=False, + num_workers=0, + ) + return dataloader + + +def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): + from model import Sage, train_model + + # Below sets gpu_number + dev_id = rank + initalize_pytorch_worker(dev_id) + device = torch.device(f"cuda:{dev_id}") + + # Pytorch training worker initialization + dist_init_method = "tcp://{master_ip}:{master_port}".format( + master_ip="127.0.0.1", master_port="12346" + ) + + torch.distributed.init_process_group( + backend="nccl", + init_method=dist_init_method, + world_size=world_size, + rank=rank, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) + wm_init(rank, world_size, rank, world_size) + + print(f"rank {rank}.", flush=True) + print("Initalized across GPUs.") + + ( + gs, + train_idx, + valid_idx, + test_idx, + num_classes, + ) = create_cugraph_graphstore_from_dgl_dataset( + dataset, rank, world_size, + ) + del dataset + + torch.distributed.barrier() + print(f"Loading graph to worker {rank} is complete", flush=True) + + dataloader = create_dataloader(gs, train_idx, device, temp_dir, 'train') + print("Dataloader Creation Complete", flush=True) + num_feats = gs.ndata["feat"].shape[1] + hid_size = 256 + # Load Training example + model = Sage(num_feats, hid_size, num_classes).to(device) + model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device, + ) + torch.distributed.barrier() + n_epochs = 10 + total_st = time.time() + opt = torch.optim.Adam(model.parameters(), lr=0.01) + train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) + torch.distributed.barrier() + total_et = time.time() + print( + f"Total time taken on n_epochs {n_epochs} = {total_et-total_st} s", + f"measured by worker = {rank}", + ) + + wm_finalize() + cugraph_comms_shutdown() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--dataset', type=str, default='ogbn-products') + args = parser.parse_args() + + from rmm.allocators.torch import rmm_torch_allocator + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + # Create the uid needed for cuGraph comms + cugraph_id = cugraph_comms_create_unique_id() + + ds = load_dgl_dataset(args.dataset) + + world_size = torch.cuda.device_count() + + with tempfile.TemporaryDirectory() as directory: + torch.multiprocessing.spawn( + run_workflow, + args=(world_size, cugraph_id, ds, directory), + nprocs=world_size, + ) From 7ba4d898ec76c3e7d0e7f19c3669c12075d4d6a6 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Tue, 30 Jul 2024 13:05:31 -0700 Subject: [PATCH 41/47] use global communicator --- python/cugraph-dgl/cugraph_dgl/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py index b4ff0049494..9dc009f4127 100644 --- a/python/cugraph-dgl/cugraph_dgl/features.py +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -51,7 +51,7 @@ def __init__( location: str(optional, default='cpu') The location ('cpu' or 'cuda') where data is stored. """ - self.__wg_comm = wgth.get_local_node_communicator() + self.__wg_comm = wgth.get_global_communicator() if len(tensor.shape) > 2: raise ValueError("Only 1-D or 2-D tensors are supported by WholeGraph.") From 2d3a640ef042209c80ceeaab0c4fdb8438f83ee9 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 1 Aug 2024 10:56:46 -0700 Subject: [PATCH 42/47] fix partition function --- .../examples/multi_trainer_MG_example/workflow_mnmg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py index 741f2e4fa31..2bec1a3dbcf 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py @@ -276,7 +276,7 @@ def run_workflow( meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") if not args.skip_partition and global_rank == 0: - partition_data(*load_dgl_dataset(args.dataset_root, args.dataset)) + partition_data(*load_dgl_dataset(args.dataset_root, args.dataset), edge_path, feature_path, label_path, meta_path) torch.distributed.barrier() print("loading partitions...") From 994aca8975abdff7ab7c4a76d5e1270076e00bf9 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 1 Aug 2024 13:04:02 -0700 Subject: [PATCH 43/47] fix minor issues --- .../cugraph_dgl/dataloading/dataloader.py | 4 +++ .../dataloading/neighbor_sampler.py | 2 +- .../multi_trainer_MG_example/model.py | 7 +++--- .../multi_trainer_MG_example/workflow_mnmg.py | 25 ++++++++++++------- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 21b70b05f3a..4f36353cb18 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -140,6 +140,10 @@ def __init__( self.__graph = graph self.__device = device + @property + def _batch_size(self): + return self.__batch_size + @property def dataset( self, diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index 1a35c3ea027..87d111adcba 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -194,7 +194,7 @@ def sample( if g.is_homogeneous: indices = torch.concat(list(indices)) - ds.sample_from_nodes(indices, batch_size=batch_size) + ds.sample_from_nodes(indices.long(), batch_size=batch_size) return HomogeneousSampleReader( ds.get_reader(), self.output_format, self.edge_dir ) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py index acdd832424b..1becd9682bb 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -58,9 +58,8 @@ def inference(self, g, batch_size, device): # The nodes on each layer are of course splitted in batches. all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.get_node_storage(key="feat", ntype="_N").fetch( - all_node_ids, device=device - ) + feat = g.ndata["feat"][all_node_ids].to(device) + sampler = dgl.dataloading.MultiLayerFullNeighborSampler( 1, prefetch_node_feats=["feat"] ) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py index 2bec1a3dbcf..b1878b37d4e 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py @@ -13,7 +13,6 @@ import dgl import torch -import pandas import time import tempfile import argparse @@ -124,10 +123,12 @@ def partition_data( ) nix = torch.arange(g.num_nodes()) - ndata = pandas.DataFrame({k: g.ndata[k][nix] for k in g.ndata.keys()}) for (r, f) in enumerate(torch.tensor_split(nix, world_size)): - rank_path = os.path.join(feature_path, f"rank={r}_feat.parquet") - ndata.iloc[f].to_parquet(rank_path) + feat_path = os.path.join(feature_path, f"rank={r}_feat.pt") + torch.save(g.ndata["feat"][f], feat_path) + + label_f_path = os.path.join(feature_path, f"rank={r}_label.pt") + torch.save(g.ndata["label"][f], label_f_path) # Split and save labels os.makedirs( @@ -167,10 +168,9 @@ def load_partitioned_data(rank, edge_path, feature_path, label_path, meta_path): ) # Load features - ndata_df = pandas.read_parquet( - os.path.join(feature_path, f"rank={rank}_feat.parquet") - ) - ndata = {col: torch.as_tensor(s.values) for col, s in ndata_df.items()} + feat_t = torch.load(os.path.join(feature_path, f"rank={rank}_feat.pt")) + label_f_t = torch.load(os.path.join(feature_path, f"rank={rank}_label.pt")) + ndata = {"feat": feat_t, "label": label_f_t} g.add_nodes(meta["num_nodes"], data=ndata) # Load edge index @@ -191,6 +191,7 @@ def create_dataloader(gs, train_idx, device, temp_dir, stage): directory=temp_path, batches_per_partition=10, ) + dataloader = cugraph_dgl.dataloading.FutureDataLoader( gs, train_idx, @@ -276,7 +277,13 @@ def run_workflow( meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") if not args.skip_partition and global_rank == 0: - partition_data(*load_dgl_dataset(args.dataset_root, args.dataset), edge_path, feature_path, label_path, meta_path) + partition_data( + *load_dgl_dataset(args.dataset_root, args.dataset), + edge_path, + feature_path, + label_path, + meta_path, + ) torch.distributed.barrier() print("loading partitions...") From d1c8494178cca2d0522be4fbac5078310706e97d Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Thu, 1 Aug 2024 13:39:42 -0700 Subject: [PATCH 44/47] remove dask example --- .../multi_trainer_MG_example/workflow_dask.py | 248 ------------------ 1 file changed, 248 deletions(-) delete mode 100644 python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py deleted file mode 100644 index 8ca40bd1e2a..00000000000 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_dask.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import dgl -import torch -import time -from distributed import Client, Event as Dask_Event -import tempfile -from cugraph.dask.comms import comms as Comms - - -def enable_spilling(): - import cudf - - cudf.set_option("spill", True) - - -def setup_cluster(dask_worker_devices): - dask_worker_devices_str = ",".join([str(i) for i in dask_worker_devices]) - from dask_cuda import LocalCUDACluster - - cluster = LocalCUDACluster( - protocol="tcp", - CUDA_VISIBLE_DEVICES=dask_worker_devices_str, - rmm_pool_size="25GB", - ) - - client = Client(cluster) - client.wait_for_workers(n_workers=len(dask_worker_devices)) - client.run(enable_spilling) - print("Dask Cluster Setup Complete") - del client - return cluster - - -def create_dask_client(scheduler_address): - from cugraph.dask.comms import comms as Comms - - client = Client(scheduler_address) - Comms.initialize(p2p=True) - return client - - -def initalize_pytorch_worker(dev_id): - import cupy as cp - import rmm - from rmm.allocators.torch import rmm_torch_allocator - from rmm.allocators.cupy import rmm_cupy_allocator - - dev = cp.cuda.Device( - dev_id - ) # Create cuda context on the right gpu, defaults to gpu-0 - dev.use() - rmm.reinitialize( - pool_allocator=True, - initial_pool_size=10e9, - maximum_pool_size=15e9, - devices=[dev_id], - ) - - if dev_id == 0: - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - - torch.cuda.set_device(dev_id) - cp.cuda.set_allocator(rmm_cupy_allocator) - enable_spilling() - print("device_id", dev_id, flush=True) - - -def load_dgl_dataset(dataset_name="ogbn-products"): - from ogb.nodeproppred import DglNodePropPredDataset - - dataset = DglNodePropPredDataset(name=dataset_name) - split_idx = dataset.get_idx_split() - train_idx, valid_idx, test_idx = ( - split_idx["train"], - split_idx["valid"], - split_idx["test"], - ) - g, label = dataset[0] - g.ndata["label"] = label - if len(g.etypes) <= 1: - g = dgl.add_self_loop(g) - else: - for etype in g.etypes: - if etype[0] == etype[2]: - # only add self loops for src->dst - g = dgl.add_self_loop(g, etype=etype) - - g = g.int() - train_idx = train_idx.int() - valid_idx = valid_idx.int() - test_idx = test_idx.int() - return g, train_idx, valid_idx, test_idx, dataset.num_classes - - -def create_cugraph_graphstore_from_dgl_dataset( - dataset_name="ogbn-products", single_gpu=False -): - from cugraph_dgl import cugraph_storage_from_heterograph - - dgl_g, train_idx, valid_idx, test_idx, num_classes = load_dgl_dataset(dataset_name) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - return cugraph_gs, train_idx, valid_idx, test_idx, num_classes - - -def create_dataloader(gs, train_idx, device): - import cugraph_dgl - - temp_dir = tempfile.TemporaryDirectory() - sampler = cugraph_dgl.dataloading.NeighborSampler([10, 20]) - dataloader = cugraph_dgl.dataloading.DataLoader( - gs, - train_idx, - sampler, - sampling_output_dir=temp_dir.name, - batches_per_partition=10, - device=device, # Put the sampled MFGs on CPU or GPU - use_ddp=True, # Make it work with distributed data parallel - batch_size=1024, - shuffle=False, # Whether to shuffle the nodes for every epoch - drop_last=False, - num_workers=0, - ) - return dataloader - - -def run_workflow(rank, devices, scheduler_address): - from model import Sage, train_model - - # Below sets gpu_number - dev_id = devices[rank] - initalize_pytorch_worker(dev_id) - device = torch.device(f"cuda:{dev_id}") - # cugraph dask client initialization - client = create_dask_client(scheduler_address) - - # Pytorch training worker initialization - dist_init_method = "tcp://{master_ip}:{master_port}".format( - master_ip="127.0.0.1", master_port="12346" - ) - - torch.distributed.init_process_group( - backend="nccl", - init_method=dist_init_method, - world_size=len(devices), - rank=rank, - ) - - print(f"rank {rank}.", flush=True) - print("Initalized across GPUs.") - - event = Dask_Event("cugraph_gs_creation_event") - if rank == 0: - ( - gs, - train_idx, - valid_idx, - test_idx, - num_classes, - ) = create_cugraph_graphstore_from_dgl_dataset( - "ogbn-products", single_gpu=False - ) - client.publish_dataset(cugraph_gs=gs) - client.publish_dataset(train_idx=train_idx) - client.publish_dataset(valid_idx=valid_idx) - client.publish_dataset(test_idx=test_idx) - client.publish_dataset(num_classes=num_classes) - event.set() - else: - if event.wait(timeout=1000): - gs = client.get_dataset("cugraph_gs") - train_idx = client.get_dataset("train_idx") - valid_idx = client.get_dataset("valid_idx") - test_idx = client.get_dataset("test_idx") - num_classes = client.get_dataset("num_classes") - else: - raise RuntimeError(f"Fetch cugraph_gs to worker_id {rank} failed") - - torch.distributed.barrier() - print(f"Loading cugraph_store to worker {rank} is complete", flush=True) - dataloader = create_dataloader(gs, train_idx, device) - print("Data Loading Complete", flush=True) - num_feats = gs.ndata["feat"]["_N"].shape[1] - hid_size = 256 - # Load Training example - model = Sage(num_feats, hid_size, num_classes).to(device) - model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device, - ) - torch.distributed.barrier() - n_epochs = 10 - total_st = time.time() - opt = torch.optim.Adam(model.parameters(), lr=0.01) - - gs.ndata["feat"] = gs.ndata["feat"].to("cuda") - gs.ndata["label"] = gs.ndata["label"].to("cuda") - - train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) - torch.distributed.barrier() - total_et = time.time() - print( - f"Total time taken on n_epochs {n_epochs} = {total_et-total_st} s", - f"measured by worker = {rank}", - ) - - # cleanup dask cluster - if rank == 0: - client.unpublish_dataset("cugraph_gs") - client.unpublish_dataset("train_idx") - client.unpublish_dataset("valid_idx") - client.unpublish_dataset("test_idx") - event.clear() - print("Workflow completed") - print("---" * 10) - Comms.destroy() - - -if __name__ == "__main__": - # Load dummy first - # because new environments - # require dataset download - load_dgl_dataset() - dask_worker_devices = [5, 6] - cluster = setup_cluster(dask_worker_devices) - - trainer_devices = [0, 1, 2] - import torch.multiprocessing as mp - - mp.spawn( - run_workflow, - args=(trainer_devices, cluster.scheduler_address), - nprocs=len(trainer_devices), - ) - Comms.destroy() - cluster.close() From 05e1da42f8c8a4f90aea4fa83e0177f951973685 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 2 Aug 2024 13:46:40 -0700 Subject: [PATCH 45/47] use float64 --- python/cugraph-dgl/examples/multi_trainer_MG_example/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py index 1becd9682bb..3293f731336 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py @@ -118,7 +118,7 @@ def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): for epoch in range(num_epochs): total_loss = 0 for _, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = g.ndata["feat"][input_nodes] + x = g.ndata["feat"][input_nodes].to(torch.float64) y = g.ndata["label"][output_nodes] y_hat = model(blocks, x) y = y.squeeze(1) From 5b46f436ba69b544aaa501b7f8d7657cb28fbcd1 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 2 Aug 2024 14:02:15 -0700 Subject: [PATCH 46/47] set dtype --- python/cugraph-dgl/examples/multi_trainer_MG_example/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py index 3293f731336..d3aad2ab309 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py @@ -118,8 +118,8 @@ def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): for epoch in range(num_epochs): total_loss = 0 for _, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = g.ndata["feat"][input_nodes].to(torch.float64) - y = g.ndata["label"][output_nodes] + x = g.ndata["feat"][input_nodes].to(torch.float32) + y = g.ndata["label"][output_nodes].to(torch.int64) y_hat = model(blocks, x) y = y.squeeze(1) loss = F.cross_entropy(y_hat, y) From 139b3d64a5656c5f6f65bd102505c662ddeb39f3 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Mon, 5 Aug 2024 14:42:43 -0700 Subject: [PATCH 47/47] allow setting directories --- .../examples/graphsage/node-classification.py | 14 ++-- .../multi_trainer_MG_example/workflow_snmg.py | 64 +++++++++++-------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py index 5b4f8863e5d..2b8b687efab 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification.py @@ -17,6 +17,7 @@ # Ignore Warning import warnings +import tempfile import time import cugraph_dgl import cugraph_dgl.dataloading @@ -154,7 +155,7 @@ def layerwise_infer(device, graph, nid, model, batch_size): return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) -def train(args, device, g, dataset, model): +def train(args, device, g, dataset, model, directory): # create sampler & dataloader train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) @@ -163,7 +164,7 @@ def train(args, device, g, dataset, model): batch_size = 1024 fanouts = [5, 10, 15] if isinstance(g, cugraph_dgl.Graph): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts, directory=directory) loader_cls = cugraph_dgl.dataloading.FutureDataLoader else: sampler = NeighborSampler(fanouts) @@ -235,6 +236,8 @@ def train(args, device, g, dataset, model): " 'gpu_dgl' for pure-GPU training, " " 'gpu_cugraph_dgl' for pure-GPU training.", ) + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--tempdir_root", type=str, default=None) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" @@ -244,7 +247,9 @@ def train(args, device, g, dataset, model): # load and preprocess dataset print("Loading data") - dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) + dataset = AsNodePredDataset( + DglNodePropPredDataset("ogbn-products", root=args.dataset_root) + ) g = dataset[0] g = dgl.add_self_loop(g) if args.mode == "gpu_cugraph_dgl": @@ -267,7 +272,8 @@ def train(args, device, g, dataset, model): # model training print("Training...") - train(args, device, g, dataset, model) + with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: + train(args, device, g, dataset, model, directory) # test the model print("Testing...") diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py index f043940486b..da5c2b4d64e 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -53,6 +53,7 @@ def initalize_pytorch_worker(dev_id): ) from cugraph.testing.mg_utils import enable_spilling + enable_spilling() torch.cuda.set_device(dev_id) @@ -60,10 +61,13 @@ def initalize_pytorch_worker(dev_id): print("device_id", dev_id, flush=True) -def load_dgl_dataset(dataset_name="ogbn-products"): +def load_dgl_dataset( + dataset_name="ogbn-products", + dataset_root=None, +): from ogb.nodeproppred import DglNodePropPredDataset - dataset = DglNodePropPredDataset(name=dataset_name) + dataset = DglNodePropPredDataset(name=dataset_name, root=dataset_root) split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = ( split_idx["train"], @@ -87,25 +91,19 @@ def load_dgl_dataset(dataset_name="ogbn-products"): return g, train_idx, valid_idx, test_idx, dataset.num_classes -def create_cugraph_graphstore_from_dgl_dataset( - dataset, rank, world_size -): +def create_cugraph_graphstore_from_dgl_dataset(dataset, rank, world_size): (g, train_idx, valid_idx, test_idx, num_classes) = dataset # Partition the data - cg = cugraph_dgl.Graph(is_multi_gpu=True, ndata_storage='wholegraph', edata_storage='wholegraph') - + cg = cugraph_dgl.Graph( + is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" + ) + nix = torch.tensor_split(torch.arange(g.num_nodes()), world_size)[rank] - ndata = { - k: g.ndata[k][nix].cuda() - for k in g.ndata.keys() - } + ndata = {k: g.ndata[k][nix].cuda() for k in g.ndata.keys()} eix = torch.tensor_split(torch.arange(g.num_edges()), world_size)[rank] - src, dst = g.all_edges(form='uv', order='eid') - edata = { - k: g.edata[k][eix].cuda() - for k in g.edata.keys() - } + src, dst = g.all_edges(form="uv", order="eid") + edata = {k: g.edata[k][eix].cuda() for k in g.edata.keys()} cg.add_nodes(g.num_nodes(), data=ndata) cg.add_edges( @@ -114,16 +112,26 @@ def create_cugraph_graphstore_from_dgl_dataset( data=edata, ) - return (cg, torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), num_classes) + return ( + cg, + torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), + torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), + torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), + num_classes, + ) def create_dataloader(gs, train_idx, device, temp_dir, stage): import cugraph_dgl - temp_path = os.path.join(temp_dir, f'{stage}_{device}') + temp_path = os.path.join(temp_dir, f"{stage}_{device}") os.mkdir(temp_path) - sampler = cugraph_dgl.dataloading.NeighborSampler([10, 20], directory=temp_path, batches_per_partition=10,) + sampler = cugraph_dgl.dataloading.NeighborSampler( + [10, 20], + directory=temp_path, + batches_per_partition=10, + ) dataloader = cugraph_dgl.dataloading.FutureDataLoader( gs, train_idx, @@ -171,14 +179,16 @@ def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): test_idx, num_classes, ) = create_cugraph_graphstore_from_dgl_dataset( - dataset, rank, world_size, + dataset, + rank, + world_size, ) del dataset torch.distributed.barrier() print(f"Loading graph to worker {rank} is complete", flush=True) - dataloader = create_dataloader(gs, train_idx, device, temp_dir, 'train') + dataloader = create_dataloader(gs, train_idx, device, temp_dir, "train") print("Dataloader Creation Complete", flush=True) num_feats = gs.ndata["feat"].shape[1] hid_size = 256 @@ -201,26 +211,30 @@ def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): f"measured by worker = {rank}", ) + torch.cuda.synchronize() wm_finalize() cugraph_comms_shutdown() if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--dataset', type=str, default='ogbn-products') + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--tempdir_root", type=str, default=None) + parser.add_argument("--dataset", type=str, default="ogbn-products") args = parser.parse_args() from rmm.allocators.torch import rmm_torch_allocator + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) # Create the uid needed for cuGraph comms cugraph_id = cugraph_comms_create_unique_id() - ds = load_dgl_dataset(args.dataset) + ds = load_dgl_dataset(args.dataset, args.dataset_root) world_size = torch.cuda.device_count() - with tempfile.TemporaryDirectory() as directory: + with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: torch.multiprocessing.spawn( run_workflow, args=(world_size, cugraph_id, ds, directory),