diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4ada4243a..b0f5bbefa 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -38,17 +38,6 @@ mkdir -p ../docs/wholegraph/_xml # xml is tarred up to s3 and used in cugraph mkdir -p "${RAPIDS_DOCS_DIR}/libwholegraph/xml_tar" tar -czf "${RAPIDS_DOCS_DIR}/libwholegraph/xml_tar"/xml.tar.gz -C xml . -# _xml is used for sphinx breathe project -mv xml/* "../docs/wholegraph/_xml" -popd - -rapids-logger "Build Sphinx docs" -pushd docs/wholegraph -sphinx-build -b dirhtml ./source _html -sphinx-build -b text ./source _text -mkdir -p "${RAPIDS_DOCS_DIR}/wholegraph/"{html,txt} -mv _html/* "${RAPIDS_DOCS_DIR}/wholegraph/html" -mv _text/* "${RAPIDS_DOCS_DIR}/wholegraph/txt" popd rapids-logger "Output temp dir: ${RAPIDS_DOCS_DIR}" diff --git a/docs/wholegraph/Makefile b/docs/wholegraph/Makefile deleted file mode 100644 index bfd6f2751..000000000 --- a/docs/wholegraph/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SPHINXPROJ = wholegraph -SOURCEDIR = source -BUILDDIR ?= build -IMGDIR = images - -gen_doc_dirs = ${BUILDDIR} source/api_docs/api -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile clean - -# clean to remove all the generated documentation files in build and source -clean: - rm -rf $(gen_doc_dirs) - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/wholegraph/README.md b/docs/wholegraph/README.md deleted file mode 100644 index 617698e06..000000000 --- a/docs/wholegraph/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Building Documentation - -All prerequisite for building docs are in the WholeGraph development conda environment. -[See build instructions](source/installation/source_build.md) on how to create the development conda environment - -## Steps to follow: - -In order to build the docs, we need the conda dev environment and we need to build wholegraph from source. - -1. Create a conda env and build wholegraph from source. The dependencies to build from source are installed in that conda environment, and then wholegraph is built and installed into the same environment. - -2. Once wholegraph is built from source, navigate to `../docs/wholegraph/`. If you have your documentation written and want to turn it into HTML, run makefile: - - -```bash -# most be in the /docs/wholegraph directory -make html -``` - -This should run Sphinx in your shell, and outputs to `build/html/index.html` - - -## View docs web page by opening HTML in browser: - -First navigate to `/build/html/` folder, and then run the following command: - -```bash -python -m http.server -``` -Then, navigate a web browser to the IP address or hostname of the host machine at port 8000: - -``` -https://:8000 -``` -Now you can check if your docs edits formatted correctly, and read well. diff --git a/docs/wholegraph/make.bat b/docs/wholegraph/make.bat deleted file mode 100644 index f92ef7811..000000000 --- a/docs/wholegraph/make.bat +++ /dev/null @@ -1,36 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build -set SPHINXPROJ=wholegraph - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/docs/wholegraph/source/_static/EMPTY b/docs/wholegraph/source/_static/EMPTY deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/wholegraph/source/_static/references.css b/docs/wholegraph/source/_static/references.css deleted file mode 100644 index d1f647233..000000000 --- a/docs/wholegraph/source/_static/references.css +++ /dev/null @@ -1,23 +0,0 @@ - -/* Fix references to not look like parameters */ -dl.citation > dt.label { - display: unset !important; - float: left !important; - border: unset !important; - background: unset !important; - padding: unset !important; - margin: unset !important; - font-size: unset !important; - line-height: unset !important; - padding-right: 0.5rem !important; -} - -/* Add opening bracket */ -dl.citation > dt.label > span::before { - content: "["; -} - -/* Add closing bracket */ -dl.citation > dt.label > span::after { - content: "]"; -} diff --git a/docs/wholegraph/source/api_docs/index.rst b/docs/wholegraph/source/api_docs/index.rst deleted file mode 100644 index 80e231d46..000000000 --- a/docs/wholegraph/source/api_docs/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -WholeGraph API reference -======================== - -This page provides WholeGraph API reference - -.. toctree:: - :maxdepth: 2 - :caption: WholeGraph API Documentation - - libwholegraph/index.rst - pylibwholegraph/index.rst diff --git a/docs/wholegraph/source/api_docs/libwholegraph/index.rst b/docs/wholegraph/source/api_docs/libwholegraph/index.rst deleted file mode 100644 index 4ef68abef..000000000 --- a/docs/wholegraph/source/api_docs/libwholegraph/index.rst +++ /dev/null @@ -1,228 +0,0 @@ -===================== -libwholegraph API doc -===================== - -Doxygen WholeGraph C API documentation --------------------------------------- -For doxygen documentation, please refer to `Doxygen Documentation <../../doxygen_docs/libwholegraph/html/index.html>`_ - -WholeGraph C API documentation ------------------------------- - -Library Level APIs -++++++++++++++++++ - -.. doxygenenum:: wholememory_error_code_t - :project: libwholegraph -.. doxygenfunction:: wholememory_init - :project: libwholegraph -.. doxygenfunction:: wholememory_finalize - :project: libwholegraph -.. doxygenfunction:: fork_get_device_count - :project: libwholegraph - -WholeMemory Communicator APIs -+++++++++++++++++++++++++++++ - -.. doxygentypedef:: wholememory_comm_t - :project: libwholegraph -.. doxygenstruct:: wholememory_unique_id_t - :project: libwholegraph -.. doxygenfunction:: wholememory_create_unique_id - :project: libwholegraph -.. doxygenfunction:: wholememory_create_communicator - :project: libwholegraph -.. doxygenfunction:: wholememory_destroy_communicator - :project: libwholegraph -.. doxygenfunction:: wholememory_communicator_get_rank - :project: libwholegraph -.. doxygenfunction:: wholememory_communicator_get_size - :project: libwholegraph -.. doxygenfunction:: wholememory_communicator_barrier - :project: libwholegraph - -WholeMemoryHandle APIs -++++++++++++++++++++++ - -.. doxygenenum:: wholememory_memory_type_t - :project: libwholegraph -.. doxygenenum:: wholememory_memory_location_t - :project: libwholegraph -.. doxygentypedef:: wholememory_handle_t - :project: libwholegraph -.. doxygenstruct:: wholememory_gref_t - :project: libwholegraph -.. doxygenfunction:: wholememory_malloc - :project: libwholegraph -.. doxygenfunction:: wholememory_free - :project: libwholegraph -.. doxygenfunction:: wholememory_get_communicator - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_type - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_location - :project: libwholegraph -.. doxygenfunction:: wholememory_get_total_size - :project: libwholegraph -.. doxygenfunction:: wholememory_get_data_granularity - :project: libwholegraph -.. doxygenfunction:: wholememory_get_local_memory - :project: libwholegraph -.. doxygenfunction:: wholememory_get_rank_memory - :project: libwholegraph -.. doxygenfunction:: wholememory_get_global_pointer - :project: libwholegraph -.. doxygenfunction:: wholememory_get_global_reference - :project: libwholegraph -.. doxygenfunction:: wholememory_determine_partition_plan - :project: libwholegraph -.. doxygenfunction:: wholememory_determine_entry_partition_plan - :project: libwholegraph -.. doxygenfunction:: wholememory_get_partition_plan - :project: libwholegraph -.. doxygenfunction:: wholememory_load_from_file - :project: libwholegraph -.. doxygenfunction:: wholememory_store_to_file - :project: libwholegraph - -WholeMemoryTensor APIs -++++++++++++++++++++++ - -.. doxygenenum:: wholememory_dtype_t - :project: libwholegraph -.. doxygenstruct:: wholememory_array_description_t - :project: libwholegraph -.. doxygenstruct:: wholememory_matrix_description_t - :project: libwholegraph -.. doxygenstruct:: wholememory_tensor_description_t - :project: libwholegraph -.. doxygentypedef:: wholememory_tensor_t - :project: libwholegraph -.. doxygenfunction:: wholememory_dtype_get_element_size - :project: libwholegraph -.. doxygenfunction:: wholememory_dtype_is_floating_number - :project: libwholegraph -.. doxygenfunction:: wholememory_dtype_is_integer_number - :project: libwholegraph -.. doxygenfunction:: wholememory_create_array_desc - :project: libwholegraph -.. doxygenfunction:: wholememory_create_matrix_desc - :project: libwholegraph -.. doxygenfunction:: wholememory_initialize_tensor_desc - :project: libwholegraph -.. doxygenfunction:: wholememory_copy_array_desc_to_matrix - :project: libwholegraph -.. doxygenfunction:: wholememory_copy_array_desc_to_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_copy_matrix_desc_to_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_convert_tensor_desc_to_array - :project: libwholegraph -.. doxygenfunction:: wholememory_convert_tensor_desc_to_matrix - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_element_count_from_array - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_size_from_array - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_element_count_from_matrix - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_size_from_matrix - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_element_count_from_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_get_memory_size_from_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_unsqueeze_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_create_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_destroy_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_make_tensor_from_pointer - :project: libwholegraph -.. doxygenfunction:: wholememory_make_tensor_from_handle - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_has_handle - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_memory_handle - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_tensor_description - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_global_reference - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_map_local_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_data_pointer - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_entry_per_partition - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_subtensor - :project: libwholegraph -.. doxygenfunction:: wholememory_tensor_get_root - :project: libwholegraph - -Ops on WholeMemory Tensors -++++++++++++++++++++++++++ - -.. doxygenfunction:: wholememory_gather - :project: libwholegraph -.. doxygenfunction:: wholememory_scatter - :project: libwholegraph - -WholeTensorEmbedding APIs -+++++++++++++++++++++++++ - -.. doxygentypedef:: wholememory_embedding_cache_policy_t - :project: libwholegraph -.. doxygentypedef:: wholememory_embedding_optimizer_t - :project: libwholegraph -.. doxygentypedef:: wholememory_embedding_t - :project: libwholegraph -.. doxygenenum:: wholememory_access_type_t - :project: libwholegraph -.. doxygenenum:: wholememory_optimizer_type_t - :project: libwholegraph -.. doxygenfunction:: wholememory_create_embedding_optimizer - :project: libwholegraph -.. doxygenfunction:: wholememory_optimizer_set_parameter - :project: libwholegraph -.. doxygenfunction:: wholememory_destroy_embedding_optimizer - :project: libwholegraph -.. doxygenfunction:: wholememory_create_embedding_cache_policy - :project: libwholegraph -.. doxygenfunction:: wholememory_destroy_embedding_cache_policy - :project: libwholegraph -.. doxygenfunction:: wholememory_create_embedding - :project: libwholegraph -.. doxygenfunction:: wholememory_destroy_embedding - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_get_embedding_tensor - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_gather - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_gather_gradient_apply - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_get_optimizer_state_names - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_get_optimizer_state - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_writeback_cache - :project: libwholegraph -.. doxygenfunction:: wholememory_embedding_drop_all_cache - :project: libwholegraph - -Ops on graphs stored in WholeMemory -+++++++++++++++++++++++++++++++++++ - -.. doxygenfunction:: wholegraph_csr_unweighted_sample_without_replacement - :project: libwholegraph -.. doxygenfunction:: wholegraph_csr_weighted_sample_without_replacement - :project: libwholegraph - -Miscellaneous Ops for graph -+++++++++++++++++++++++++++ - -.. doxygenfunction:: graph_append_unique - :project: libwholegraph -.. doxygenfunction:: csr_add_self_loop - :project: libwholegraph diff --git a/docs/wholegraph/source/api_docs/pylibwholegraph/index.rst b/docs/wholegraph/source/api_docs/pylibwholegraph/index.rst deleted file mode 100644 index 3f905b11f..000000000 --- a/docs/wholegraph/source/api_docs/pylibwholegraph/index.rst +++ /dev/null @@ -1,38 +0,0 @@ -======================= -pylibwholegraph API doc -======================= - -.. currentmodule:: pylibwholegraph - -APIs ----- -.. autosummary:: - :toctree: ../api/pylibwholegraph - - torch.initialize.init_torch_env - torch.initialize.init_torch_env_and_create_wm_comm - torch.initialize.finalize - torch.comm.WholeMemoryCommunicator - torch.comm.set_world_info - torch.comm.create_group_communicator - torch.comm.destroy_communicator - torch.comm.get_global_communicator - torch.comm.get_local_node_communicator - torch.comm.get_local_device_communicator - torch.tensor.WholeMemoryTensor - torch.tensor.create_wholememory_tensor - torch.tensor.create_wholememory_tensor_from_filelist - torch.tensor.destroy_wholememory_tensor - torch.embedding.WholeMemoryOptimizer - torch.embedding.create_wholememory_optimizer - torch.embedding.destroy_wholememory_optimizer - torch.embedding.WholeMemoryCachePolicy - torch.embedding.create_wholememory_cache_policy - torch.embedding.create_builtin_cache_policy - torch.embedding.destroy_wholememory_cache_policy - torch.embedding.WholeMemoryEmbedding - torch.embedding.create_embedding - torch.embedding.create_embedding_from_filelist - torch.embedding.destroy_embedding - torch.embedding.WholeMemoryEmbeddingModule - torch.graph_structure.GraphStructure diff --git a/docs/wholegraph/source/basics/index.rst b/docs/wholegraph/source/basics/index.rst deleted file mode 100644 index 429fe35d6..000000000 --- a/docs/wholegraph/source/basics/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -====== -Basics -====== - - -.. toctree:: - :maxdepth: 2 - - wholegraph_intro - wholememory_intro - wholememory_implementation_details diff --git a/docs/wholegraph/source/basics/wholegraph_intro.md b/docs/wholegraph/source/basics/wholegraph_intro.md deleted file mode 100644 index ee3c49a69..000000000 --- a/docs/wholegraph/source/basics/wholegraph_intro.md +++ /dev/null @@ -1,138 +0,0 @@ -# WholeGraph Introduction -WholeGraph is developed to help train large-scale Graph Neural Networks(GNN). -WholeGraph provides underlying storage structure called WholeMemory. -WholeMemory is a Tensor like storage and provide multi-GPU support. -It is optimized for NVLink systems like DGX A100 servers. -By working together with cuGraph, cuGraph-Ops, cuGraph-DGL, cuGraph-PyG, and upstream DGL and PyG, -it will be easy to build GNN applications. - -## WholeMemory -WholeMemory can be regarded as a whole view of GPU memory. -WholeMemory exposes a handle of the memory instance no matter how the underlying data is stored across multiple GPUs. -WholeMemory assumes that separate process is used to control each GPU. - -### WholeMemory Basics -To define WholeMemory, we need to specify following: - -#### 1. Specify the set of GPU to handle the Memory - -As WholeMemory is owned by a set of GPUs, so the set of GPUs need to be specified. -This is done by creating [WholeMemory Communicator](#wholememory-communicator) and specify the WholeMemory Communicator -when creating WholeMemory. - -#### 2. Specify the location of the memory - -Although WholeMemory is owned by a set of GPUs, but the memory itself can be located on host memory or on device memory. -So the location of the memory need to be specified, two types of location can be specified. - -- **Host memory**: will use pinned host memory as underlying storage. -- **Device memory**: will use GPU device memory as underlying storage. - -#### 3. Specify the address mapping mode of the memory - -As WholeMemory is owned by multiple GPUs, each GPU will access the whole memory space, so we need address mapping. -There are three types of address mapping modes (also known as WholeMemory types), they are: - -- **Continuous**: All memory from each GPU will be mapped into a single continuous memory address space for each GPU. - In this mode, each GPU can directly access the whole memory using a single pointer and offset, just like using normal - device memory. Software will see no difference. Hardware peer to peer access will handle the underlying communication. - -- **Chunked**: Memory from each GPU will be mapped into different memory chunks, one chunk for each GPU. - In this mode, direct access is also supported, but not using a single pointer. Software will see the chunked memory. - However, an abstract layer may help to hide this. - -- **Distributed**: Memory from other GPUs are not mapped into current GPU, so no direct access is supported. - To access memory of other GPU, explicit communication is needed. - -If you would like to know more details about WholeMemory locations and WholeMemory types, please refer to -[WholeMemory Implementation Details](wholememory_implementation_details.md) - -### WholeMemory Communicator -WholeMemory Communicator has two main purpose: - -- **Defines a set of GPUs which works together on WholeMemory.** WholeMemory Communicator is created by all GPUs that - wants to work together. A WholeMemory Communicator can be reused as long as the GPU set needed is the same. -- **Provides underlying communication channel needed by WholeMemory.** WholeMemory may need commuincator between GPUs - during the WholeMemory creation and some OPs on some types of WholeMemory. - -To Create WholeMemory Communicator, a WholeMemory Unique ID need to be created first, it is usually created by the first -GPU in the set of GPUs, and then broadcasted to all GPUs that want to work together. Then all GPUs in this communicator -will call WholeMemory Communicator creation function using this WholeMemory Unique ID, and the rank of current GPU as -well as all GPU count. - -### WholeMemory Granularity -As underlying storage may be partitioned into multiple GPUs physically, this is usually not wanted inside one single -user data block. To help on this, when creating WholeMemory, the granularity of data can be specified. Then the -WholeMemory is considered as multiple block of the same granularity and will not get split inside the granularity. - -### WholeMemory Mapping -As WholeMemory provides a whole view of memory to GPU, to access WholeMemory, mapping is usually needed. -Different types of WholeMemory have different mapping methods supported as their names. -Some mappings supported include -- All the WholeMemory types support mapping the memory range that local GPU is responsible for. - That is, each rank can directly access "Local" memory in all types of WholeMemory. - Here "Local" memory doesn't have to be on current GPU's memory, it can be on host memory or even maybe on other GPU, - but it is guaranteed to be directly accessed by current GPU. -- Chunked and Continuous WholeMemory also support Chunked mapping. That is, memory of all GPUs can be mapped into - current GPU, one continuous chunk for one GPU. Each chunk can be directly accessed by current GPU. But the memory of - different chunks are not guaranteed to be continuous. -- Continuous WholeMemory can be mapped into continuous memory space. That is, memory of all GPUs are mapped into a - single range of virtual memory, accessing to different position of this memory will physically access to different - GPUs. This mapping will be handled by hardware (CPU pagetable or GPU pagetable). - -### Operations on WholeMemory -There are some operations that can be performed on WholeMemory. They are based on the mapping of WholeMemory. -#### Local Operation -As all WholeMemory supports mapping of local memory, so operation on local memory is supported. The operation can be -either read or write. Just use it as GPU memory of current device is OK. -#### Load and Store -To facilitate file operation, Load / Store WholeMemory from file or to file is supported. WholeMemory use raw binary -file format for disk operation. For Load, the input file can be single file or a list of files, if it is a list, they -will be logically concatenated together and then loaded. For store, each GPU stores its local memory to file, producing -a list of files. -#### Gather and Scatter -WholeMemory also supports Gather / Scatter operation, usually they operations on -[WholeMemory Tensor](#wholememory-tensor). - -### WholeMemory Tensor -If compare with PyTorch, WholeMemory is like PyTorch Storage while WholeMemory Tensor is like PyTorch Tensor. -For now, WholeMemory supports only 1D and 2D tensor, or array and matrix. Only first dimension is partitioned. - -### WholeMemory Embedding -WholeMemory Embedding is just like 2D WholeMemory Tensor, with two features added. They are cache support and sparse -optimizer support. -#### Cache Support -WholeMemory Embedding supports cache. To create WholeMemory Embedding with cache, WholeMemory CachePolicy need first be -created. WholeMemoryCachePolicy can be created with following fields: -- **WholeMemory Communicator**: WholeMemory CachePolicy also need WholeMemory Communicator. - This WholeMemory Communicator defines the set of GPUs that cache the all the Embedding. - It can be the same as the WholeMemory Communicator used to create WholeMemory Embedding. -- **WholeMemory type**: WholeMemory CachePolicy use WholeMemory type to specify the WholeMemory type of the cache. -- **WholeMemory location**: WholeMemory CachePolicy use WholeMemory location to specify the location of the cache. -- **Access type**: Access type can be readonly or readwrite. -- **Cache ratio**: Specify how much memory the cache will use. This ratio is computed for each GPU set that caches the - whole embedding. - -There may be two mostly used caches. They are: -- **Device cached host memory**: When the WholeMemory Communicator for Cache Policy is the same as the WholeMemory - Communicator used to create WholeMemory Embedding, it means that cache has same GPU set as WholeMemory Embedding. - So each GPU just cache its own part of raw Embedding. - Most situations of this case are when raw WholeMemory Embedding is located on host memory, and the cache is on device - memory, each GPU just cache its own part of host memory. -- **Local cached global memory**: The WholeMemory Communicator of WholeMemory CachePolicy can also be a subset of the - WholeMemory Communicator of WholeMemory Embedding. In this case, the subset of GPUs together cache all the embeddings. - Most situations of this case are when raw WholeMemory Embedding is partitioned on different machine nodes, and we - want to cache some embeddings in local machine or local GPU, then the subset of GPU can be all the GPUs in local - machine. For local cached global memory, only readonly is supported. - -#### WholeMemory Embedding Sparse Optimizer -Another feature of WholeMemory Embedding is that WholeMemory Embedding supports embedding training. -To efficiently train large embedding tables, sparse optimizer is needed. -WholeMemory Embedding Sparse Optimizer can run on cached or noncached WholeMemory Embedding. -Now supported optimizers include SGD, Adam, RMSProp and AdaGrad. - -## Graph Structure -Graph structure in WholeGraph is also based on WholeMemory. -In WholeGraph, graph is stored in [CSR format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)). -Both ROW_INDEX (noted as `csr_row_ptr`) and COL_INDEX (notated as `csr_col_ind`) are stored in -WholeMemory Tensor. So loading Graph Structure can use [WholeMemory Tensor Loading mechanism](#load-and-store). diff --git a/docs/wholegraph/source/basics/wholememory_implementation_details.md b/docs/wholegraph/source/basics/wholememory_implementation_details.md deleted file mode 100644 index eee72e7a4..000000000 --- a/docs/wholegraph/source/basics/wholememory_implementation_details.md +++ /dev/null @@ -1,58 +0,0 @@ -# WholeMemory Implementation Details -As described in [WholeMemory Introduction](wholegraph_intro.md), there are two WholeMemory location and three -WholeMemory types. So there will be total six WholeMemory. - -| Type | CONTINUOUS | CONTINUOUS | CHUNKED | CHUNKED | DISTRIBUTED | DISTRIBUTED | -|:-------------:|:-----------:|:----------:|:---------:|:---------:|:-----------:|:-----------:| -| Location | DEVICE | HOST | DEVICE | HOST | DEVICE | HOST | -| Allocated by | EACH | FIRST | EACH | FIRST | EACH | EACH | -| Allocate API | Driver | Host | Runtime | Host | Runtime | Runtime | -| IPC Mapping | Unix fd | mmap | cudaIpc | mmap | No IPC map | No IPC map | - -For "Continuous" and "Chunked" type of WholeMemory, all memory are mapped to each GPU, -so these two types are all "Mapped" WholeMemory, in opposite is "Distributed" WholeMemory which are not all mapped. - -## WholeMemory Layout -As the underlying memory of a single WholeMemory object may be on multiple GPU devices, so our WholeGraph library will -partition data into these GPU devices. -The partition method guarantees that each GPU can access one continuous part of the entire memory. -Here "can access" means can directly access from CUDA kernels, but the memory don't have to be physically on that GPU. -For example, they may on host memory or other GPU's device memory that can be access using P2P. -In case the stored data have its own granularity that don't want to be split, when creating WholeMemory, -data granularity can be specified. Then each data granularity can be considered as a block of data. - -The follow figure shows the layout of 15 data block over 4 GPUs. -![WholeMemory Layout](../imgs/general_wholememory.png) - -For WholeMemory Tensors, they can be 1D or 2D tensors. -For 1D tensor, data granularity is one element. For 2D tensor, data granularity is its 1D tensor. -Their layout will be like this: -![WholeMemory Tensor Layout](../imgs/wholememory_tensor.png) - -## WholeMemory Allocation -As there are six types of WholeMemory, the allocation process of each type are as following: - -### Device Continuous WholeMemory -For Device Continuous WholeMemory, first a range of virtual address space is reserved in each GPU, which covers the -entire memory range. Then a part of pyhsical memory is allocated in each GPU, like shown in the follow figure. -![Device Continuous WholeMemory Allocation Step 1](../imgs/device_continuous_wholememory_step1.png) -After that, each GPU gather all the handles of memory from all GPUs, and mapped them to the reserved address space. -![Device Continuous WholeMemory Allocation Step 2](../imgs/device_continuous_wholememory_step2.png) - -### Device Chunked WholeMemory -For Deivce Chunked WholeMemory, first each GPU allocate its own part of memory using CUDA runtime API, this will create -both virtual address space and physical memory for its own memory. -![Device Chunked WholeMemory Allocation Step 1](../imgs/device_chunked_wholememory_step1.png) -They each GPU gather the Ipc handle of memory from all other GPUs, and mapped that into its own virtual address space. -![Device Chunked WholeMemory Allocation Step 2](../imgs/device_chunked_wholememory_step2.png) - -### Host Mapped WholeMemory -For Host, Continuous and Chunked are using same method. First rank allocate the host physical and share that to all -ranks. -![Host Mapped WholeMemory Allocation Step 1](../imgs/host_mapped_wholememory_step1.png) -Then each rank register that host memory to GPU address space. -![Host Mapped WholeMemory Allocation Step 2](../imgs/host_mapped_wholememory_step2.png) - -### Distributed WholeMemory -For Distributed WholeMemory, each GPU just malloc its own part of memory, no need to share to other GPUs. -![Distributed WholeMemory Allocation](../imgs/distributed_wholememory.png) diff --git a/docs/wholegraph/source/basics/wholememory_intro.md b/docs/wholegraph/source/basics/wholememory_intro.md deleted file mode 100644 index f4b813768..000000000 --- a/docs/wholegraph/source/basics/wholememory_intro.md +++ /dev/null @@ -1,124 +0,0 @@ -## WholeMemory -WholeMemory can be regarded as a whole view of GPU memory. -WholeMemory exposes a handle of the memory instance no matter how the underlying data is stored across multiple GPUs. -WholeMemory assumes that separate process is used to control each GPU. - -### WholeMemory Basics -To define WholeMemory, we need to specify following: - -#### 1. Specify the set of GPU to handle the Memory - -As WholeMemory is owned by a set of GPUs, so the set of GPUs need to be specified. -This is done by creating [WholeMemory Communicator](#wholememory-communicator) and specify the WholeMemory Communicator -when creating WholeMemory. - -#### 2. Specify the location of the memory - -Although WholeMemory is owned by a set of GPUs, but the memory itself can be located on host memory or on device memory. -So the location of the memory need to be specified, two types of location can be specified. - -- **Host memory**: will use pinned host memory as underlying storage. -- **Device memory**: will use GPU device memory as underlying storage. - -#### 3. Specify the address mapping mode of the memory - -As WholeMemory is owned by multiple GPUs, each GPU will access the whole memory space, so we need address mapping. -There are three types of address mapping modes (also known as WholeMemory types), they are: - -- **Continuous**: All memory from each GPU will be mapped into a single continuous memory address space for each GPU. - In this mode, each GPU can directly access the whole memory using a single pointer and offset, just like using normal - device memory. Software will see no difference. Hardware peer to peer access will handle the underlying communication. - -- **Chunked**: Memory from each GPU will be mapped into different memory chunks, one chunk for each GPU. - In this mode, direct access is also supported, but not using a single pointer. Software will see the chunked memory. - However, an abstract layer may help to hide this. - -- **Distributed**: Memory from other GPUs are not mapped into current GPU, so no direct access is supported. - To access memory of other GPU, explicit communication is needed. - -If you would like to know more details about WholeMemory locations and WholeMemory types, please refer to -[WholeMemory Implementation Details](wholememory_implementation_details.md) - -### WholeMemory Communicator -WholeMemory Communicator has two main purpose: - -- **Defines a set of GPUs which works together on WholeMemory.** WholeMemory Communicator is created by all GPUs that - wants to work together. A WholeMemory Communicator can be reused as long as the GPU set needed is the same. -- **Provides underlying communication channel needed by WholeMemory.** WholeMemory may need commuincator between GPUs - during the WholeMemory creation and some OPs on some types of WholeMemory. - -To Create WholeMemory Communicator, a WholeMemory Unique ID need to be created first, it is usually created by the first -GPU in the set of GPUs, and then broadcasted to all GPUs that want to work together. Then all GPUs in this communicator -will call WholeMemory Communicator creation function using this WholeMemory Unique ID, and the rank of current GPU as -well as all GPU count. - -### WholeMemory Granularity -As underlying storage may be partitioned into multiple GPUs physically, this is usually not wanted inside one single -user data block. To help on this, when creating WholeMemory, the granularity of data can be specified. Then the -WholeMemory is considered as multiple block of the same granularity and will not get split inside the granularity. - -### WholeMemory Mapping -As WholeMemory provides a whole view of memory to GPU, to access WholeMemory, mapping is usually needed. -Different types of WholeMemory have different mapping methods supported as their names. -Some mappings supported include -- All the WholeMemory types support mapping the memory range that local GPU is responsible for. - That is, each rank can directly access "Local" memory in all types of WholeMemory. - Here "Local" memory doesn't have to be on current GPU's memory, it can be on host memory or even maybe on other GPU, - but it is guaranteed to be directly accessed by current GPU. -- Chunked and Continuous WholeMemory also support Chunked mapping. That is, memory of all GPUs can be mapped into - current GPU, one continuous chunk for one GPU. Each chunk can be directly accessed by current GPU. But the memory of - different chunks are not guaranteed to be continuous. -- Continuous WholeMemory can be mapped into continuous memory space. That is, memory of all GPUs are mapped into a - single range of virtual memory, accessing to different position of this memory will physically access to different - GPUs. This mapping will be handled by hardware (CPU pagetable or GPU pagetable). - -### Operations on WholeMemory -There are some operations that can be performed on WholeMemory. They are based on the mapping of WholeMemory. -#### Local Operation -As all WholeMemory supports mapping of local memory, so operation on local memory is supported. The operation can be -either read or write. Just use it as GPU memory of current device is OK. -#### Load / Store -To facilitate file operation, Load / Store WholeMemory from file or to file is supported. WholeMemory use raw binary -file format for disk operation. For Load, the input file can be single file or a list of files, if it is a list, they -will be logically concatenated together and then loaded. For store, each GPU stores its local memory to file, producing -a list of files. -#### Gather / Scatter -WholeMemory also supports Gather / Scatter operation, usually they operations on -[WholeMemory Tensor](#wholememory-tensor). - -### WholeMemory Tensor -If compare with PyTorch, WholeMemory is like PyTorch Storage while WholeMemory Tensor is like PyTorch Tensor. -For now, WholeMemory supports only 1D and 2D tensor, or array and matrix. Only first dimension is partitioned. - -### WholeMemory Embedding -WholeMemory Embedding is just like 2D WholeMemory Tensor, with two features added. They are cache support and sparse -optimizer support. -#### Cache Support -WholeMemory Embedding supports cache. To create WholeMemory Embedding with cache, WholeMemory CachePolicy need first be -created. WholeMemoryCachePolicy can be created with following fields: -- **WholeMemory Communicator**: WholeMemory CachePolicy also need WholeMemory Communicator. - This WholeMemory Communicator defines the set of GPUs that cache the all the Embedding. - It can be the same as the WholeMemory Communicator used to create WholeMemory Embedding. -- **WholeMemory type**: WholeMemory CachePolicy use WholeMemory type to specify the WholeMemory type of the cache. -- **WholeMemory location**: WholeMemory CachePolicy use WholeMemory location to specify the location of the cache. -- **Access type**: Access type can be readonly or readwrite. -- **Cache ratio**: Specify how much memory the cache will use. This ratio is computed for each GPU set that caches the - whole embedding. - -There may be two mostly used caches. They are: -- **Device cached host memory**: When the WholeMemory Communicator for Cache Policy is the same as the WholeMemory - Communicator used to create WholeMemory Embedding, it means that cache has same GPU set as WholeMemory Embedding. - So each GPU just cache its own part of raw Embedding. - Most situations of this case are when raw WholeMemory Embedding is located on host memory, and the cache is on device - memory, each GPU just cache its own part of host memory. -- **Local cached global memory**: The WholeMemory Communicator of WholeMemory CachePolicy can also be a subset of the - WholeMemory Communicator of WholeMemory Embedding. In this case, the subset of GPUs together cache all the embeddings. - Most situations of this case are when raw WholeMemory Embedding is partitioned on different machine nodes, and we - want to cache some embeddings in local machine or local GPU, then the subset of GPU can be all the GPUs in local - machine. For local cached global memory, only readonly is supported. - -#### WholeMemory Embedding Sparse Optimizer -Another feature of WholeMemory Embedding is that WholeMemory Embedding supports embedding training. -To efficiently train large embedding tables, sparse optimizer is needed. -WholeMemory Embedding Sparse Optimizer can run on cached or noncached WholeMemory Embedding. -Now supported optimizers include SGD, Adam, RMSProp and AdaGrad. diff --git a/docs/wholegraph/source/conf.py b/docs/wholegraph/source/conf.py deleted file mode 100644 index fe919ce6a..000000000 --- a/docs/wholegraph/source/conf.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# pygdf documentation build configuration file, created by -# sphinx-quickstart on Wed May 3 10:59:22 2017. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -# If extensions (or modules to document with autodoc) are in another -# directory, add these directories to sys.path here. If the directory -# is relative to the documentation root, use os.path.abspath to make it -# absolute, like shown here. -sys.path.insert(0, os.path.abspath('sphinxext')) - -from github_link import make_linkcode_resolve # noqa - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "breathe", - "sphinx.ext.intersphinx", - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "numpydoc", - "sphinx_markdown_tables", - 'sphinx.ext.doctest', - 'sphinx.ext.linkcode', - "IPython.sphinxext.ipython_console_highlighting", - "IPython.sphinxext.ipython_directive", - "nbsphinx", - "recommonmark", - "sphinx_copybutton", -] - - -ipython_mplbackend = 'str' - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = {".rst": "restructuredtext", ".md": "markdown"} - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'wholegraph' -copyright = '2022-2023, NVIDIA Corporation' -author = 'NVIDIA Corporation' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '23.10' -# The full version, including alpha/beta/rc tags. -release = '23.10.00' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'en' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# - -html_theme = 'pydata_sphinx_theme' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -html_theme_options = { - "external_links": [], - "github_url": "https://github.com/rapidsai/wholegraph", - "twitter_url": "https://twitter.com/rapidsai", - "show_toc_level": 1, - "navbar_align": "right", -} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'wholegraphdoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'wholegraph.tex', 'wholegraph Documentation', - 'NVIDIA Corporation', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'wholegraph', 'wholegraph Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'wholegraph', 'wholegraph Documentation', - author, 'wholegraph', 'One line description of project.', - 'Miscellaneous'), -] - -autodoc_mock_imports = [ - "numpy", - "torch", - "torch.distributed", - "torch.utils.dlpack", - "torch.utils.data.Dataset", - "pylibwholegraph.binding.wholememory_binding" -] - - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} - - -# Config numpydoc -numpydoc_show_inherited_class_members = False -numpydoc_class_members_toctree = False - -breathe_projects = { - 'libwholegraph': '../_xml', -} - - -def setup(app): - app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") - app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer") - app.add_css_file("references.css") - - -source_suffix = ['.rst', '.md'] - -# The following is used by sphinx.ext.linkcode to provide links to github -linkcode_resolve = make_linkcode_resolve( - 'pylibwholegraph', 'https://github.com/rapidsai/' - 'wholegraph/blob/{revision}/python/' - '{package}/{path}#L{lineno}') diff --git a/docs/wholegraph/source/imgs/device_chunked_wholememory_step1.png b/docs/wholegraph/source/imgs/device_chunked_wholememory_step1.png deleted file mode 100644 index b8a0447e6..000000000 Binary files a/docs/wholegraph/source/imgs/device_chunked_wholememory_step1.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/device_chunked_wholememory_step2.png b/docs/wholegraph/source/imgs/device_chunked_wholememory_step2.png deleted file mode 100644 index 8b203ce22..000000000 Binary files a/docs/wholegraph/source/imgs/device_chunked_wholememory_step2.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/device_continuous_wholememory_step1.png b/docs/wholegraph/source/imgs/device_continuous_wholememory_step1.png deleted file mode 100644 index 46ecd1f14..000000000 Binary files a/docs/wholegraph/source/imgs/device_continuous_wholememory_step1.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/device_continuous_wholememory_step2.png b/docs/wholegraph/source/imgs/device_continuous_wholememory_step2.png deleted file mode 100644 index b773b1ef6..000000000 Binary files a/docs/wholegraph/source/imgs/device_continuous_wholememory_step2.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/distributed_wholememory.png b/docs/wholegraph/source/imgs/distributed_wholememory.png deleted file mode 100644 index e6bbe9f13..000000000 Binary files a/docs/wholegraph/source/imgs/distributed_wholememory.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/general_wholememory.png b/docs/wholegraph/source/imgs/general_wholememory.png deleted file mode 100644 index 3ece02b00..000000000 Binary files a/docs/wholegraph/source/imgs/general_wholememory.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/host_mapped_wholememory_step1.png b/docs/wholegraph/source/imgs/host_mapped_wholememory_step1.png deleted file mode 100644 index aad8caf0d..000000000 Binary files a/docs/wholegraph/source/imgs/host_mapped_wholememory_step1.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/host_mapped_wholememory_step2.png b/docs/wholegraph/source/imgs/host_mapped_wholememory_step2.png deleted file mode 100644 index 20597f3e5..000000000 Binary files a/docs/wholegraph/source/imgs/host_mapped_wholememory_step2.png and /dev/null differ diff --git a/docs/wholegraph/source/imgs/wholememory_tensor.png b/docs/wholegraph/source/imgs/wholememory_tensor.png deleted file mode 100644 index e725d6c28..000000000 Binary files a/docs/wholegraph/source/imgs/wholememory_tensor.png and /dev/null differ diff --git a/docs/wholegraph/source/index.rst b/docs/wholegraph/source/index.rst deleted file mode 100644 index b7262e9d1..000000000 --- a/docs/wholegraph/source/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -Welcome to RAPIDS WholeGraph documentation -========================================== -RAPIDS WholeGraph has following package: - -* pylibwholegraph: shared memory-based GPU-accelerated GNN training - - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - basics/index - installation/index - api_docs/index - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`search` diff --git a/docs/wholegraph/source/installation/container.md b/docs/wholegraph/source/installation/container.md deleted file mode 100644 index 38a2601b1..000000000 --- a/docs/wholegraph/source/installation/container.md +++ /dev/null @@ -1,30 +0,0 @@ -# Build Container for WholeGraph -To run WholeGraph or build WholeGraph from source, the environment need to be setup first. -The recommended method to set up environment is to use Docker images. -For example, to build WholeGraph base image from NGC pytorch 22.10 image, you can follow `Dockerfile`, -it may be like this: -```dockerfile -FROM nvcr.io/nvidia/pytorch:22.10-py3 - -RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y lsb-core software-properties-common wget libspdlog-dev - -#RUN remove old cmake to update -RUN conda remove --force -y cmake -RUN rm -rf /usr/local/bin/cmake && rm -rf /usr/local/lib/cmake && rm -rf /usr/lib/cmake - -RUN apt-key adv --fetch-keys https://apt.kitware.com/keys/kitware-archive-latest.asc && \ - export LSB_CODENAME=$(lsb_release -cs) && \ - apt-add-repository -y "deb https://apt.kitware.com/ubuntu/ ${LSB_CODENAME} main" && \ - apt update && apt install -y cmake - -# update py for pytest -RUN pip3 install -U py -RUN pip3 install Cython setuputils3 scikit-build nanobind pytest-forked pytest -``` - -To run GNN applications, you may also need cuGraphOps, DGL or PyG library to run GNN layers. -You may refer to [DGL](https://www.dgl.ai/pages/start.html) or [PyG](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) -For example, to install DGL, you may need to add: -```dockerfile -RUN pip3 install dgl -f https://data.dgl.ai/wheels/cu118/repo.html -``` diff --git a/docs/wholegraph/source/installation/getting_wholegraph.md b/docs/wholegraph/source/installation/getting_wholegraph.md deleted file mode 100644 index a9b563ab9..000000000 --- a/docs/wholegraph/source/installation/getting_wholegraph.md +++ /dev/null @@ -1,52 +0,0 @@ - -# Getting the WholeGraph Packages - -Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install) -and checkout the [RAPIDS install selector](https://rapids.ai/start.html) for a pick list of install options. - - -There are 4 ways to get WholeGraph packages: -1. [Quick start with Docker Repo](#docker) -2. [Conda Installation](#conda) -3. [Pip Installation](#pip) -4. [Build from Source](./source_build.md) - - -
- -## Docker -The RAPIDS Docker containers (as of Release 23.10) contain all RAPIDS packages, including WholeGraph, as well as all required supporting packages. To download a container, please see the [Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries. - -
- - -## Conda -It is easy to install WholeGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download). - -WholeGraph conda packages - * libwholegraph - * pylibwholegraph - -Replace the package name in the example below to the one you want to install. - - -Install and update WholeGraph using the conda command: - -```bash -conda install -c rapidsai -c conda-forge -c nvidia wholegraph cudatoolkit=11.8 -``` - -Note: This conda installation only applies to Linux and Python versions 3.8/3.10. - -
- -## PIP -wholegraph, and all of RAPIDS, is available via pip. - -``` -pip install wholegraph-cu11 --extra-index-url=https://pypi.ngc.nvidia.com -``` - -pip packages for other packages are being worked and should be available in late 2023 - -
diff --git a/docs/wholegraph/source/installation/index.rst b/docs/wholegraph/source/installation/index.rst deleted file mode 100644 index 09f1cb44a..000000000 --- a/docs/wholegraph/source/installation/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Installation -============ - -.. toctree:: - :maxdepth: 2 - - getting_wholegraph - container - source_build diff --git a/docs/wholegraph/source/installation/source_build.md b/docs/wholegraph/source/installation/source_build.md deleted file mode 100644 index 5fd32601e..000000000 --- a/docs/wholegraph/source/installation/source_build.md +++ /dev/null @@ -1,187 +0,0 @@ -# Building from Source - -The following instructions are for users wishing to build wholegraph from source code. These instructions are tested on supported distributions of Linux,CUDA, -and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. -Other operating systems _might be_ compatible, but are not currently tested. - -The wholegraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly. -The C/C++ CUDA library is `libwholegraph` and the python library is `pylibwholegraph`. - -## Prerequisites - -__Compiler__: -* `gcc` version 11.0+ -* `nvcc` version 11.0+ -* `cmake` version 3.26.4+ - -__CUDA__: -* CUDA 11.8+ -* NVIDIA driver 450.80.02+ -* Pascal architecture or better - -You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). - -__Other Packages__: -* ninja -* nccl -* cython -* setuputils3 -* scikit-learn -* scikit-build -* nanobind>=0.2.0 - -## Building wholegraph -To install wholegraph from source, ensure the dependencies are met. - -### Clone Repo and Configure Conda Environment -__GIT clone a version of the repository__ - - ```bash - # Set the localtion to wholegraph in an environment variable WHOLEGRAPH_HOME - export WHOLEGRAPH_HOME=$(pwd)/wholegraph - - # Download the wholegraph repo - if you have a folked version, use that path here instead - git clone https://github.com/rapidsai/wholegraph.git $WHOLEGRAPH_HOME - - cd $WHOLEGRAPH_HOME - ``` - -__Create the conda development environment__ - -```bash -# create the conda environment (assuming in base `wholegraph` directory) - -# for CUDA 11.x -conda env create --name wholegraph_dev --file conda/environments/all_cuda-118_arch-x86_64.yaml - -# activate the environment -conda activate wholegraph_dev - -# to deactivate an environment -conda deactivate -``` - - - The environment can be updated as development includes/changes the dependencies. To do so, run: - - -```bash - -# Where XXX is the CUDA 11 version -conda env update --name wholegraph_dev --file conda/environments/all_cuda-118_arch-x86_64.yaml - -conda activate wholegraph_dev -``` - - -### Build and Install Using the `build.sh` Script -Using the `build.sh` script make compiling and installing wholegraph a -breeze. To build and install, simply do: - -```bash -$ cd $WHOLEGRAPH_HOME -$ ./build.sh clean -$ ./build.sh libwholegraph -$ ./build.sh pylibwholegraph -``` - -There are several other options available on the build script for advanced users. -`build.sh` options: -```bash -build.sh [ ...] [ ...] - where is: - clean - remove all existing build artifacts and configuration (start over). - uninstall - uninstall libwholegraph and pylibwholegraph from a prior build/install (see also -n) - libwholegraph - build the libwholegraph C++ library. - pylibwholegraph - build the pylibwholegraph Python package. - tests - build the C++ (OPG) tests. - benchmarks - build benchmarks. - docs - build the docs - and is: - -v - verbose build mode - -g - build for debug - -n - no install step - --allgpuarch - build for all supported GPU architectures - --cmake-args=\\\"\\\" - add arbitrary CMake arguments to any cmake call - --compile-cmd - only output compile commands (invoke CMake without build) - --clean - clean an individual target (note: to do a complete rebuild, use the clean target described above) - -h | --h[elp] - print this text - - default action (no args) is to build and install 'libwholegraph' then 'pylibwholegraph' targets - -examples: -$ ./build.sh clean # remove prior build artifacts (start over) -$ ./build.sh - -# make parallelism options can also be defined: Example build jobs using 4 threads (make -j4) -$ PARALLEL_LEVEL=4 ./build.sh libwholegraph - -Note that the libraries will be installed to the location set in `$PREFIX` if set (i.e. `export PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`. -``` - - -## Building each section independently -### Build and Install the C++/CUDA `libwholegraph` Library -CMake depends on the `nvcc` executable being on your path or defined in `$CUDACXX`. - -This project uses cmake for building the C/C++ library. To configure cmake, run: - - ```bash - # Set the localtion to wholegraph in an environment variable WHOLEGRAPH_HOME - export WHOLEGRAPH_HOME=$(pwd)/wholegraph - - cd $WHOLEGRAPH_HOME - cd cpp # enter cpp directory - mkdir build # create build directory - cd build # enter the build directory - cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX - - # now build the code - make -j # "-j" starts multiple threads - make install # install the libraries - ``` -The default installation locations are `$CMAKE_INSTALL_PREFIX/lib` and `$CMAKE_INSTALL_PREFIX/include/wholegraph` respectively. - -### Building and installing the Python package - -Build and Install the Python packages to your Python path: - -```bash -cd $WHOLEGRAPH_HOME -cd python -cd pylibwholegraph -python setup.py build_ext --inplace -python setup.py install # install pylibwholegraph -``` - -## Run tests - -Run either the C++ or the Python tests with datasets - - - **Python tests with datasets** - - ```bash - cd $WHOLEGRAPH_HOME - cd python - pytest - ``` - - - **C++ stand alone tests** - - From the build directory : - - ```bash - # Run the tests - cd $WHOLEGRAPH_HOME - cd cpp/build - gtests/PARALLEL_UTILS_TESTS # this is an executable file - ``` - - -Note: This conda installation only applies to Linux and Python versions 3.8/3.10. - -## Creating documentation - -Python API documentation can be generated from _./docs/wholegraph directory_. Or through using "./build.sh docs" - -## Attribution -Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md diff --git a/docs/wholegraph/source/sphinxext/github_link.py b/docs/wholegraph/source/sphinxext/github_link.py deleted file mode 100644 index b36540f20..000000000 --- a/docs/wholegraph/source/sphinxext/github_link.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# NOTE: -# This contains code with copyright by the scikit-learn project, subject to the -# license in /thirdparty/LICENSES/LICENSE.scikit_learn - -import inspect -import os -import re -import subprocess -import sys -from functools import partial -from operator import attrgetter - -orig = inspect.isfunction - - -# See https://opendreamkit.org/2017/06/09/CythonSphinx/ -def isfunction(obj): - - orig_val = orig(obj) - - new_val = hasattr(type(obj), "__code__") - - if (orig_val != new_val): - return new_val - - return orig_val - - -inspect.isfunction = isfunction - -REVISION_CMD = 'git rev-parse --short HEAD' - -source_regex = re.compile(r"^File: (.*?) \(starting at line ([0-9]*?)\)$", - re.MULTILINE) - - -def _get_git_revision(): - try: - revision = subprocess.check_output(REVISION_CMD.split()).strip() - except (subprocess.CalledProcessError, OSError): - print('Failed to execute git to get revision') - return None - return revision.decode('utf-8') - - -def _linkcode_resolve(domain, info, package, url_fmt, revision): - """Determine a link to online source for a class/method/function - - This is called by sphinx.ext.linkcode - - An example with a long-untouched module that everyone has - >>> _linkcode_resolve('py', {'module': 'tty', - ... 'fullname': 'setraw'}, - ... package='tty', - ... url_fmt='http://hg.python.org/cpython/file/' - ... '{revision}/Lib/{package}/{path}#L{lineno}', - ... revision='xxxx') - 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' - """ - - if revision is None: - return - if domain not in ('py', 'pyx'): - return - if not info.get('module') or not info.get('fullname'): - return - - class_name = info['fullname'].split('.')[0] - module = __import__(info['module'], fromlist=[class_name]) - obj = attrgetter(info['fullname'])(module) - - # Unwrap the object to get the correct source - # file in case that is wrapped by a decorator - obj = inspect.unwrap(obj) - - fn: str = None - lineno: str = None - - try: - fn = inspect.getsourcefile(obj) - except Exception: - fn = None - if not fn: - try: - fn = inspect.getsourcefile(sys.modules[obj.__module__]) - except Exception: - fn = None - - if not fn: - # Possibly Cython code. Search docstring for source - m = source_regex.search(obj.__doc__) - - if (m is not None): - source_file = m.group(1) - lineno = m.group(2) - - # fn is expected to be the absolute path. - fn = os.path.relpath(source_file, start=package) - print("{}:{}".format( - os.path.abspath(os.path.join("..", "python", "cuml", fn)), - lineno)) - else: - return - else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) - - # Convert to relative from module root - fn = os.path.relpath(fn, - start=os.path.dirname( - __import__(package).__file__)) - - # Get the line number if we need it. (Can work without it) - if (lineno is None): - try: - lineno = inspect.getsourcelines(obj)[1] - except Exception: - - # Can happen if its a cyfunction. See if it has `__code__` - if (hasattr(obj, "__code__")): - lineno = obj.__code__.co_firstlineno - else: - lineno = '' - return url_fmt.format(revision=revision, - package=package, - path=fn, - lineno=lineno) - - -def make_linkcode_resolve(package, url_fmt): - """Returns a linkcode_resolve function for the given URL format - - revision is a git commit reference (hash or name) - - package is the name of the root module of the package - - url_fmt is along the lines of ('https://github.com/USER/PROJECT/' - 'blob/{revision}/{package}/' - '{path}#L{lineno}') - """ - revision = _get_git_revision() - return partial(_linkcode_resolve, - revision=revision, - package=package, - url_fmt=url_fmt)