Merge branch 'branch-24.06' into remove-extra-copyright-script

rapidsai · Apr 15, 2024 · 474947c · 474947c
2 parents df71c0b + 9d1dcb1
commit 474947c
Show file tree

Hide file tree

Showing 33 changed files with 853 additions and 176 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@
 # To run: `pre-commit run --all-files`
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: check-added-large-files
       - id: debug-statements
@@ -13,7 +13,7 @@ repos:
         args: [--markdown-linebreak-ext=md]
       - id: end-of-file-fixer
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.0.0
     hooks:
       - id: flake8
         args: ["--config=.flake8"]
@@ -22,7 +22,7 @@ repos:
             scripts
           )
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v16.0.6
+    rev: v18.1.2
     hooks:
       - id: clang-format
         exclude: |
@@ -32,12 +32,12 @@ repos:
         types_or: [c, c++, cuda]
         args: ["-fallback-style=none", "-style=file", "-i"]
   - repo: https://github.com/rapidsai/dependency-file-generator
-    rev: v1.8.0
+    rev: v1.11.0
     hooks:
       - id: rapids-dependency-file-generator
         args: ["--clean"]
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.0.1
+    rev: v0.0.3
     hooks:
       - id: verify-copyright
         files: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,32 @@
+# wholegraph 24.04.00 (10 Apr 2024)
+
+## 🐛 Bug Fixes
+
+- Update pre-commit-hooks to v0.0.3 ([#152](https://github.com/rapidsai/wholegraph/pull/152)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Fixed README links to point to cuGraph API ([#145](https://github.com/rapidsai/wholegraph/pull/145)) [@acostadon](https://github.com/acostadon)
+- [Bugfix] Fix to compile when NVSHMEM is ON ([#142](https://github.com/rapidsai/wholegraph/pull/142)) [@chang-l](https://github.com/chang-l)
+- handle more RAPIDS version formats in update-version.sh ([#122](https://github.com/rapidsai/wholegraph/pull/122)) [@jameslamb](https://github.com/jameslamb)
+
+## 🚀 New Features
+
+- Support CUDA 12.2 ([#116](https://github.com/rapidsai/wholegraph/pull/116)) [@jameslamb](https://github.com/jameslamb)
+
+## 🛠️ Improvements
+
+- Use `conda env create --yes` instead of `--force` ([#155](https://github.com/rapidsai/wholegraph/pull/155)) [@bdice](https://github.com/bdice)
+- add round-robin shard strategy ([#154](https://github.com/rapidsai/wholegraph/pull/154)) [@linhu-nv](https://github.com/linhu-nv)
+- Switch to scikit-build-core ([#150](https://github.com/rapidsai/wholegraph/pull/150)) [@vyasr](https://github.com/vyasr)
+- Update script input name ([#147](https://github.com/rapidsai/wholegraph/pull/147)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Add upper bound to prevent usage of NumPy 2 ([#146](https://github.com/rapidsai/wholegraph/pull/146)) [@bdice](https://github.com/bdice)
+- Replace local copyright check with pre-commit-hooks verify-copyright ([#144](https://github.com/rapidsai/wholegraph/pull/144)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- remove an unnecessary sync in exchange_embeddings_nccl_func ([#143](https://github.com/rapidsai/wholegraph/pull/143)) [@linhu-nv](https://github.com/linhu-nv)
+- Use default `rapids-cmake` CUDA_ARCHITECTURES ([#140](https://github.com/rapidsai/wholegraph/pull/140)) [@trxcllnt](https://github.com/trxcllnt)
+- Add support for Python 3.11, require NumPy 1.23+ ([#139](https://github.com/rapidsai/wholegraph/pull/139)) [@jameslamb](https://github.com/jameslamb)
+- [Bugfix] Host full-neighbor sampling returns wrong results in unit test ([#138](https://github.com/rapidsai/wholegraph/pull/138)) [@chang-l](https://github.com/chang-l)
+- use enum to implement log_level in wholememory ([#136](https://github.com/rapidsai/wholegraph/pull/136)) [@linhu-nv](https://github.com/linhu-nv)
+- target branch-24.04 for GitHub Actions workflows ([#135](https://github.com/rapidsai/wholegraph/pull/135)) [@jameslamb](https://github.com/jameslamb)
+- Add environment-agnostic scripts for running ctests and pytests ([#128](https://github.com/rapidsai/wholegraph/pull/128)) [@trxcllnt](https://github.com/trxcllnt)
+
 # wholegraph 24.02.00 (12 Feb 2024)
 
 ## 🐛 Bug Fixes

diff --git a/build.sh b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 # wholegraph build script
 
@@ -49,7 +49,7 @@ HELP="$0 [<target> ...] [<flag> ...]
  and <flag> is:
    -v                          - verbose build mode
    -g                          - build for debug
-   -n                          - no install step
+   -n                          - no install step (does not affect Python)
    --allgpuarch               - build for all supported GPU architectures
    --cmake-args=\\\"<args>\\\" - add arbitrary CMake arguments to any cmake call
    --compile-cmd               - only output compile commands (invoke CMake without build)
@@ -271,19 +271,12 @@ if buildAll || hasArg pylibwholegraph; then
     if ! hasArg --compile-cmd; then
         cd ${REPODIR}/python/pylibwholegraph
         env LIBWHOLEGRAPH_DIR=${LIBWHOLEGRAPH_DIR} \
-        ${PYTHON} setup.py build_ext --inplace \
-            --build-type=${BUILD_TYPE} \
-            ${EXTRA_CMAKE_ARGS}
-        if ! hasArg -n; then
-            env LIBWHOLEGRAPH_DIR=${LIBWHOLEGRAPH_DIR} \
-            ${PYTHON} setup.py install \
-                --build-type=${BUILD_TYPE} \
-                ${EXTRA_CMAKE_ARGS}
-        fi
+        SKBUILD_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${BUILD_TYPE};${EXTRA_CMAKE_ARGS/ /;}" ${PYTHON} -m pip install --no-build-isolation --no-deps .
+
     else
-        # just invoke cmake without going through scikit-build
+        # just invoke cmake without going through scikit-build-core
         env LIBWHOLEGRAPH_DIR=${LIBWHOLEGRAPH_DIR} \
-        cmake -S ${REPODIR}/python/pylibwholegraph -B ${REPODIR}/python/pylibwholegraph/_skbuild/build \
+        cmake -S ${REPODIR}/python/pylibwholegraph -B ${REPODIR}/python/pylibwholegraph/build \
            -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
             ${EXTRA_CMAKE_ARGS}
     fi

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -11,7 +11,7 @@ rapids-dependency-file-generator \
   --file_key docs \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n docs
+rapids-mamba-retry env create --yes -f env.yaml -n docs
 # Temporarily allow unbound variables for conda activation.
 set +u
 conda activate docs

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -38,7 +38,7 @@ fi
 cd "${package_dir}"
 
 # Hardcode the output dir
-SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE -DCUDA_STATIC_RUNTIME=ON -DWHOLEGRAPH_BUILD_WHEELS=ON" \
+SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DBUILD_SHARED_LIBS=OFF;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-DCUDA_STATIC_RUNTIME=ON;-DWHOLEGRAPH_BUILD_WHEELS=ON" \
   python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 mkdir -p final_dist

diff --git a/ci/check_style.sh b/ci/check_style.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -11,7 +11,7 @@ rapids-dependency-file-generator \
   --file_key checks \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n checks
+rapids-mamba-retry env create --yes -f env.yaml -n checks
 conda activate checks
 
 # Run pre-commit checks

diff --git a/ci/test_clang_tidy.sh b/ci/test_clang_tidy.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -12,7 +12,7 @@ rapids-dependency-file-generator \
   --file_key clang_tidy \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n clang_tidy
+rapids-mamba-retry env create --yes -f env.yaml -n clang_tidy
 # Temporarily allow unbound variables for conda activation.
 set +u
 conda activate clang_tidy

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
@@ -14,7 +14,7 @@ rapids-dependency-file-generator \
   --file_key test_cpp \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n test
+rapids-mamba-retry env create --yes -f env.yaml -n test
 
 # Temporarily allow unbound variables for conda activation.
 set +u

diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -27,7 +27,7 @@ rapids-dependency-file-generator \
   --file_key test_python \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=${ARCH};py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n test
+rapids-mamba-retry env create --yes -f env.yaml -n test
 
 # Temporarily allow unbound variables for conda activation.
 set +u

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -42,7 +42,7 @@ dependencies:
 - pytorch-cuda=11.8
 - pytorch=2.0.0
 - recommonmark
-- scikit-build
+- scikit-build-core>=0.7.0
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -40,7 +40,7 @@ dependencies:
 - pytest-xdist
 - python>=3.9,<3.12
 - recommonmark
-- scikit-build
+- scikit-build-core>=0.7.0
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6

diff --git a/conda/recipes/pylibwholegraph/conda_build_config.yaml b/conda/recipes/pylibwholegraph/conda_build_config.yaml
@@ -13,8 +13,8 @@ cuda11_compiler:
 cmake_version:
   - ">=3.26.4"
 
-scikit_build_version:
-  - ">=0.13.1"
+scikit_build_core_version:
+  - ">=0.7.0"
 
 sysroot_version:
   - "2.17"
diff --git a/conda/recipes/pylibwholegraph/meta.yaml b/conda/recipes/pylibwholegraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -63,7 +63,7 @@ requirements:
     - cython
     - libwholegraph ={{ version }}
     - python
-    - scikit-build {{ scikit_build_version }}
+    - scikit-build-core {{ scikit_build_core_version }}
   run:
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
     {% if cuda_major == "11" %}

diff --git a/cpp/include/wholememory/embedding.h b/cpp/include/wholememory/embedding.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -131,6 +131,7 @@ wholememory_error_code_t wholememory_destroy_embedding_cache_policy(
  * @param optimizer : Optimizer to use for training, if don't train embedding, use nullptr
  * @param cache_policy : Cache policy for this embedding, if don't use cache, use nullptr
  * @param user_defined_sms : User-defined sms number for raw embedding gather/scatter
+ * @param round_robin_size : continuous embedding size in each rank under round-robin shard mode
  * @return : wholememory_error_code_t
  */
 wholememory_error_code_t wholememory_create_embedding(
@@ -141,7 +142,8 @@ wholememory_error_code_t wholememory_create_embedding(
   wholememory_memory_location_t memory_location,
   wholememory_embedding_optimizer_t optimizer,
   wholememory_embedding_cache_policy_t cache_policy,
-  int user_defined_sms = -1);
+  int user_defined_sms = -1,
+  int round_robin_size = 0);
 
 /**
  * Destroy WholeMemory Embedding

diff --git a/cpp/include/wholememory/wholememory.h b/cpp/include/wholememory/wholememory.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -360,14 +360,16 @@ int fork_get_device_count();
  * @param file_entry_size : entry size in file, should be less than or equal to memory_entry_size
  * @param file_names : file names, all binary files will be logically concatenated and loaded.
  * @param file_count : number of files.
+ * @param round_robin_size : continuous embedding number for a rank under round-robin shard mode
  * @return : wholememory_error_code_t
  */
 wholememory_error_code_t wholememory_load_from_file(wholememory_handle_t wholememory_handle,
                                                     size_t memory_offset,
                                                     size_t memory_entry_size,
                                                     size_t file_entry_size,
                                                     const char** file_names,
-                                                    int file_count);
+                                                    int file_count,
+                                                    int round_robin_size);
 
 /**
  * Store local WholeMemory to file, this should be called by all ranks, with different