Merge pull request #5315 from vyasr/branch-23.06-merge-23.04

Branch 23.06 merge 23.04
rapidsai · Mar 31, 2023 · ae54384 · ae54384
2 parents a67bfdf + c06582b
commit ae54384
Show file tree

Hide file tree

Showing 184 changed files with 18,095 additions and 5,777 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -102,10 +102,10 @@ jobs:
       build_type: pull-request
       package-name: cuml
       # Always want to test against latest dask/distributed.
-      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       # On arm also need to install cupy from the specific webpage and CMake
       # because treelite needs to be compiled (no wheels available for arm).
-      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64 && pip install cmake && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install cmake && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       # skipped test context: https://github.com/rapidsai/cuml/issues/5025
       # parallelization is based on current test memory usage
       test-unittest: "pytest -v ./python/cuml/tests -k 'not test_silhouette_score_batched and not test_sparse_pca_inputs' -n 8 --ignore=python/cuml/tests/dask && pytest -v ./python/cuml/tests -k 'test_sparse_pca_inputs' && pytest -v ./python/cuml/tests/dask"

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -49,10 +49,10 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       package-name: cuml
-      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       # On arm also need to install cupy from the specific webpage and CMake
       # because treelite needs to be compiled (no wheels available for arm).
-      test-before-arm64: "pip install cupy-cuda11x -f https://pip.cupy.dev/aarch64 && pip install cmake && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.04"
+      test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install cmake && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       # skipped test context: https://github.com/rapidsai/cuml/issues/5025
       # parallelization is based on current test memory usage
       test-unittest: "pytest -v ./python/cuml/tests -k 'not test_silhouette_score_batched and not test_sparse_pca_inputs' -n 8 --ignore=python/cuml/tests/dask && pytest -v ./python/cuml/tests -k 'test_sparse_pca_inputs' && pytest -v ./python/cuml/tests/dask"
diff --git a/.gitignore b/.gitignore
@@ -60,3 +60,7 @@ docs/source/*.tl
 
 ## doxygen build check inside ci/checks/style.sh
 doxygen_check/
+
+## Doxygen
+cpp/html
+cpp/Doxyfile
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -61,7 +61,7 @@ repos:
             pass_filenames: false
             language: python
     - repo: https://github.com/rapidsai/dependency-file-generator
-      rev: v1.4.0
+      rev: v1.5.1
       hooks:
           - id: rapids-dependency-file-generator
             args: ["--clean"]

diff --git a/build.sh b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 
 # cuml build script
 
@@ -47,7 +47,6 @@ HELP="$0 [<target> ...] [<flag> ...]
                        and profiling enabled (WARNING: Impacts performance)
    --ccache          - Use ccache to cache previous compilations
    --nocloneraft     - CMake will clone RAFT even if it is in the environment, use this flag to disable that behavior
-   --static-faiss    - Force CMake to use the FAISS static libs, cloning and building them if necessary
    --static-treelite - Force CMake to use the Treelite static libs, cloning and building them if necessary
 
  default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch
@@ -78,7 +77,6 @@ BUILD_DISABLE_DEPRECATION_WARNINGS=ON
 BUILD_CUML_STD_COMMS=ON
 BUILD_CUML_TESTS=ON
 BUILD_CUML_MG_TESTS=OFF
-BUILD_STATIC_FAISS=OFF
 BUILD_STATIC_TREELITE=OFF
 CMAKE_LOG_LEVEL=WARNING
 
@@ -199,9 +197,6 @@ while true; do
         --nocloneraft )
             DISABLE_FORCE_CLONE_RAFT=ON
             ;;
-        --static-faiss )
-            BUILD_STATIC_FAISS=ON
-            ;;
         --static-treelite )
             BUILD_STATIC_TREELITE=ON
             ;;
@@ -256,7 +251,6 @@ if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg pri
           -DBUILD_CUML_TESTS=${BUILD_CUML_TESTS} \
           -DBUILD_CUML_MPI_COMMS=${BUILD_CUML_MG_TESTS} \
           -DBUILD_CUML_MG_TESTS=${BUILD_CUML_MG_TESTS} \
-          -DCUML_USE_FAISS_STATIC=${BUILD_STATIC_FAISS} \
           -DCUML_USE_TREELITE_STATIC=${BUILD_STATIC_TREELITE} \
           -DNVTX=${NVTX} \
           -DUSE_CCACHE=${CCACHE} \

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 

diff --git a/ci/build_python.sh b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 

diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
@@ -36,7 +36,6 @@
 ]
 FILES_TO_EXCLUDE = [
     re.compile(r"cpp/src/tsne/cannylab/bh\.cu"),
-    re.compile(r"cpp/src/hdbscan/detail/faiss_mr\.hpp"),
 ]
 
 # this will break starting at year 10000, which is probably OK :)

diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
@@ -6,9 +6,6 @@
 VERSION=${1}
 CUDA_SUFFIX=${2}
 
-# __init__.py versions
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/cuml/__init__.py
-
 # pyproject.toml versions
 sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pyproject.toml
 

diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -11,12 +11,12 @@ dependencies:
 - cmake>=3.23.1,!=3.25.0
 - cuda-python>=11.7.1,<12.0
 - cudatoolkit=11.8
-- cudf=23.06.*
-- cupy>=7.8.0,<12.0.0a0
+- cudf==23.6.*
+- cupy>=9.5.0,<12.0.0a0
 - cxx-compiler
 - cython>=0.29,<0.30
-- dask-cuda=23.06.*
-- dask-cudf=23.06.*
+- dask-cuda==23.6.*
+- dask-cudf==23.6.*
 - dask-ml
 - dask>=2023.1.1
 - distributed>=2023.1.1
@@ -33,42 +33,44 @@ dependencies:
 - libcublas=11.11.3.6
 - libcufft-dev=10.9.0.58
 - libcufft=10.9.0.58
-- libcumlprims=23.06.*
+- libcumlprims==23.6.*
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libcusolver-dev=11.4.1.48
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
 - libfaiss>=1.7.1
-- libraft-distance=23.06.*
-- libraft-headers=23.06.*
-- libraft-nn=23.06.*
+- libraft-headers==23.6.*
+- libraft==23.6.*
+- librmm==23.6.*
 - nbsphinx
 - ninja
 - nltk
+- numba
 - numpydoc
 - pip
 - pydata-sphinx-theme
-- pylibraft=23.06.*
+- pylibraft==23.6.*
 - pytest
+- pytest-benchmark
 - pytest-cases
 - pytest-cov
 - pytest-xdist
 - python>=3.8,<3.11
-- raft-dask=23.06.*
+- raft-dask==23.6.*
 - recommonmark
-- rmm=23.06.*
+- rmm==23.6.*
 - scikit-build>=0.13.1
-- scikit-learn=1.2
+- scikit-learn==1.2
+- scipy
 - seaborn
-- sparse
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sphinx<6
 - statsmodels
 - sysroot_linux-64==2.17
-- treelite=3.1.0
+- treelite==3.1.0
 - umap-learn
 - pip:
   - git+https://github.com/dask/dask-glm@main

diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -17,18 +17,17 @@ dependencies:
 - libcublas=11.11.3.6
 - libcufft-dev=10.9.0.58
 - libcufft=10.9.0.58
-- libcumlprims=23.06.*
+- libcumlprims==23.6.*
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libcusolver-dev=11.4.1.48
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
 - libfaiss>=1.7.1
-- libraft-distance=23.06.*
-- libraft-headers=23.06.*
-- libraft-nn=23.06.*
+- libraft-headers==23.6.*
+- libraft==23.6.*
+- librmm==23.6.*
 - ninja
-- rmm=23.06.*
 - sysroot_linux-64==2.17
 name: cpp_all_cuda-118_arch-x86_64
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
@@ -79,6 +79,7 @@ requirements:
     - python x.x
     - raft-dask ={{ minor_version }}
     - treelite {{ treelite_version }}
+    - seaborn
 
 tests:
   requirements:

diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml
@@ -55,9 +55,8 @@ requirements:
     - libcusolver-dev {{ libcusolver_host_version }}
     - libcusparse {{ libcusparse_host_version }}
     - libcusparse-dev {{ libcusparse_host_version }}
-    - libraft-distance ={{ minor_version }}
+    - libraft ={{ minor_version }}
     - libraft-headers ={{ minor_version }}
-    - libraft-nn ={{ minor_version }}
     - treelite {{ treelite_version }}
     - libfaiss>=1.7.1
     - faiss-proc=*=cuda
@@ -84,9 +83,8 @@ outputs:
         - libcurand {{ libcurand_run_version }}
         - libcusparse {{ libcusparse_run_version }}
         - libcusparse-dev {{ libcusparse_run_version }}
-        - libraft-distance ={{ minor_version }}
+        - libraft ={{ minor_version }}
         - libraft-headers ={{ minor_version }}
-        - libraft-nn ={{ minor_version }}
         - treelite {{ treelite_version }}
         - libfaiss>=1.7.1
         - faiss-proc=*=cuda

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023 NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -43,6 +43,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 ##############################################################################
 # - User Options  ------------------------------------------------------------
 
+option(CUML_ENABLE_GPU "Enable building GPU-accelerated algorithms" ON)
 option(BUILD_SHARED_LIBS "Build cuML shared libraries" ON)
 option(BUILD_CUML_C_LIBRARY "Build libcuml_c shared library. Contains the cuML C API" ON)
 option(BUILD_CUML_CPP_LIBRARY "Build libcuml shared library" ON)
@@ -63,17 +64,15 @@ option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
 option(USE_CCACHE "Cache build artifacts with ccache" OFF)
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
 option(CUML_USE_RAFT_STATIC "Build and statically link the RAFT libraries" OFF)
-option(CUML_USE_FAISS_STATIC "Build and statically link the FAISS library for nearest neighbors search on GPU" OFF)
+option(CUML_RAFT_COMPILED "Use libraft shared library" ON)
 option(CUML_USE_TREELITE_STATIC "Build and statically link the treelite library" OFF)
 option(CUML_EXPORT_TREELITE_LINKAGE "Whether to publicly or privately link treelite to libcuml++" OFF)
 option(CUML_USE_CUMLPRIMS_MG_STATIC "Build and statically link the cumlprims_mg library" OFF)
-option(CUML_ENABLE_NN_DEPENDENCIES "Whether to enable FAISS dependency" ON)
 
 # The options below allow incorporating libcuml into another build process
 # without installing all its components. This is useful if total file size is
 # at a premium and we do not expect other consumers to use any APIs of the
 # dependency except those that are directly linked to by the dependent library.
-option(CUML_EXCLUDE_FAISS_FROM_ALL "Exclude FAISS targets from RAFT's 'all' target" ON)
 option(CUML_EXCLUDE_RAFT_FROM_ALL "Exclude RAFT targets from cuML's 'all' target" OFF)
 option(CUML_EXCLUDE_TREELITE_FROM_ALL "Exclude Treelite targets from cuML's 'all' target" OFF)
 option(CUML_EXCLUDE_CUMLPRIMS_MG_FROM_ALL "Exclude cumlprims_mg targets from cuML's 'all' target" OFF)
@@ -98,7 +97,6 @@ message(VERBOSE "CUML_CPP: Disabling all mnmg components and comms libraries: ${
 message(VERBOSE "CUML_CPP: Cache build artifacts with ccache: ${USE_CCACHE}")
 message(VERBOSE "CUML_CPP: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}")
 message(VERBOSE "CUML_CPP: Build and statically link RAFT libraries: ${CUML_USE_RAFT_STATIC}")
-message(VERBOSE "CUML_CPP: Build and statically link FAISS library: ${CUML_USE_FAISS_STATIC}")
 message(VERBOSE "CUML_CPP: Build and statically link Treelite library: ${CUML_USE_TREELITE_STATIC}")
 
 set(CUML_ALGORITHMS "ALL" CACHE STRING "Experimental: Choose which algorithms are built into libcuml++.so. Can specify individual algorithms or groups in a semicolon-separated list.")
@@ -151,6 +149,7 @@ if (NOT DISABLE_OPENMP)
   find_package(OpenMP)
   if(OpenMP_FOUND)
     message(STATUS "CUML_CPP: OpenMP found in ${OPENMP_INCLUDE_DIRS}")
+    list(APPEND CUML_CXX_FLAGS ${OpenMP_CXX_FLAGS})
   endif()
 endif()
 
@@ -171,7 +170,7 @@ include(cmake/modules/ConfigureCUDA.cmake)
 ##############################################################################
 # - Set options based on user defined one  -----------------------------------
 set(CUML_USE_RAFT_NN OFF)
-set(CUML_USE_RAFT_DIST OFF)
+set(CUML_RAFT_COMPILED OFF)
 set(LINK_TREELITE OFF)
 set(LINK_CUFFT OFF)
 include(cmake/modules/ConfigureAlgorithms.cmake)
@@ -223,10 +222,6 @@ endif()
 
 include(cmake/thirdparty/get_raft.cmake)
 
-if(CUML_USE_RAFT_NN)
-  include(cmake/thirdparty/get_faiss.cmake)
-endif()
-
 if(LINK_TREELITE)
   include(cmake/thirdparty/get_treelite.cmake)
 endif()
@@ -287,6 +282,9 @@ if(BUILD_CUML_CPP_LIBRARY)
   # common components
   add_library(${CUML_CPP_TARGET}
               src/common/logger.cpp)
+  if (CUML_ENABLE_GPU)
+    target_compile_definitions(${CUML_CPP_TARGET} PUBLIC CUML_ENABLE_GPU)
+  endif()
 
   if(all_algo OR arima_algo)
     target_sources(${CUML_CPP_TARGET}
@@ -348,6 +346,24 @@ if(BUILD_CUML_CPP_LIBRARY)
         src/fil/fil.cu
         src/fil/infer.cu
         src/fil/treelite_import.cu)
+    target_sources(${CUML_CPP_TARGET}
+      PRIVATE
+        src/experimental/fil/infer0.cpp
+        src/experimental/fil/infer0.cu
+        src/experimental/fil/infer1.cpp
+        src/experimental/fil/infer1.cu
+        src/experimental/fil/infer2.cpp
+        src/experimental/fil/infer2.cu
+        src/experimental/fil/infer3.cpp
+        src/experimental/fil/infer3.cu
+        src/experimental/fil/infer4.cpp
+        src/experimental/fil/infer4.cu
+        src/experimental/fil/infer5.cpp
+        src/experimental/fil/infer5.cu
+        src/experimental/fil/infer6.cpp
+        src/experimental/fil/infer6.cu
+        src/experimental/fil/infer7.cpp
+        src/experimental/fil/infer7.cu)
   endif()
 
   # todo: organize linear models better
@@ -586,7 +602,6 @@ if(BUILD_CUML_CPP_LIBRARY)
   # These are always private:
   list(APPEND _cuml_cpp_private_libs
     raft::raft
-    $<$<BOOL:${CUML_USE_RAFT_NN}>:faiss>
     $<TARGET_NAME_IF_EXISTS:GPUTreeShap::GPUTreeShap>
     $<$<BOOL:${LINK_CUFFT}>:CUDA::cufft${_ctk_static_suffix}>
     ${TREELITE_LIBS}
@@ -606,8 +621,7 @@ if(BUILD_CUML_CPP_LIBRARY)
   # because cumlprims_mg and cuML inherit their CUDA libs from the raft::raft
   # INTERFACE target.
   list(APPEND ${_cuml_cpp_libs_var_name}
-    $<$<BOOL:${CUML_USE_RAFT_NN}>:raft::nn>
-    $<$<BOOL:${CUML_USE_RAFT_DIST}>:raft::distance>
+    $<$<BOOL:${CUML_RAFT_COMPILED}>:raft::compiled>
     $<TARGET_NAME_IF_EXISTS:cumlprims_mg::cumlprims_mg>
   )
 
@@ -767,5 +781,5 @@ endif()
 
 include(cmake/doxygen.cmake)
 add_doxygen_target(IN_DOXYFILE Doxyfile.in
-  OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-  CWD ${CMAKE_CURRENT_BINARY_DIR})
+  OUT_DOXYFILE ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile
+  CWD ${CMAKE_CURRENT_SOURCE_DIR})