From 372a9a54df42fc65b2dd3e9fc6c33e3a61ef8856 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 25 May 2022 12:41:16 -0500 Subject: [PATCH 1/4] Pin `dask` & `distributed` for release (#680) Pinnings to be in-line with https://github.com/rapidsai/cudf/pull/10965 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/raft/pull/680 --- ci/gpu/build.sh | 4 ++-- conda/environments/raft_dev_cuda11.0.yml | 4 ++-- conda/environments/raft_dev_cuda11.2.yml | 4 ++-- conda/environments/raft_dev_cuda11.4.yml | 4 ++-- conda/environments/raft_dev_cuda11.5.yml | 4 ++-- conda/recipes/pyraft/meta.yaml | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index cd4030e0b2..ba6bed2fc2 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -73,8 +73,8 @@ fi # Install the master version of dask, distributed, and dask-ml gpuci_logger "Install the master version of dask and distributed" set -x -pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps -pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps +pip install "git+https://github.com/dask/distributed.git@2022.05.1" --upgrade --no-deps +pip install "git+https://github.com/dask/dask.git@2022.05.1" --upgrade --no-deps set +x # Install pre-built conda packages from previous CI step diff --git a/conda/environments/raft_dev_cuda11.0.yml b/conda/environments/raft_dev_cuda11.0.yml index 30df28aa52..e34a9e0423 100644 --- a/conda/environments/raft_dev_cuda11.0.yml +++ b/conda/environments/raft_dev_cuda11.0.yml @@ -26,8 +26,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2022.05.1 + - git+https://github.com/dask/distributed.git@2022.05.1 # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.2.yml b/conda/environments/raft_dev_cuda11.2.yml index 949be16dca..7d25499710 100644 --- a/conda/environments/raft_dev_cuda11.2.yml +++ b/conda/environments/raft_dev_cuda11.2.yml @@ -26,8 +26,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2022.05.1 + - git+https://github.com/dask/distributed.git@2022.05.1 # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.4.yml b/conda/environments/raft_dev_cuda11.4.yml index 195e6fad2b..12e8cb0ab7 100644 --- a/conda/environments/raft_dev_cuda11.4.yml +++ b/conda/environments/raft_dev_cuda11.4.yml @@ -26,8 +26,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2022.05.1 + - git+https://github.com/dask/distributed.git@2022.05.1 # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/environments/raft_dev_cuda11.5.yml b/conda/environments/raft_dev_cuda11.5.yml index 2fb2abbc85..bc03362e5d 100644 --- a/conda/environments/raft_dev_cuda11.5.yml +++ b/conda/environments/raft_dev_cuda11.5.yml @@ -27,8 +27,8 @@ dependencies: - pip: - sphinx_markdown_tables - breathe - - git+https://github.com/dask/dask.git@main - - git+https://github.com/dask/distributed.git@main + - git+https://github.com/dask/dask.git@2022.05.1 + - git+https://github.com/dask/distributed.git@2022.05.1 # rapids-build-env, notebook-env and doc-env are defined in # https://docs.rapids.ai/maintainers/depmgmt/ diff --git a/conda/recipes/pyraft/meta.yaml b/conda/recipes/pyraft/meta.yaml index a640c4beb7..27e1153270 100644 --- a/conda/recipes/pyraft/meta.yaml +++ b/conda/recipes/pyraft/meta.yaml @@ -48,8 +48,8 @@ requirements: - ucx >={{ ucx_version }} - ucx-py {{ ucx_py_version }} - ucx-proc=*=gpu - - dask>=2022.03.0 - - distributed>=2022.03.0 + - dask==2022.05.1 + - distributed==2022.05.1 - cuda-python >=11.5,<12.0 - joblib >=0.11 - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} From 29b3a18947e83ce242556c39b9155d9761df390a Mon Sep 17 00:00:00 2001 From: Mahesh Doijade <36705640+mdoijade@users.noreply.github.com> Date: Thu, 26 May 2022 01:50:08 +0530 Subject: [PATCH 2/4] fix race in fusedL2knn smem read/write by adding a syncwarp (#679) -- fix race in fusedL2knn smem read/write by adding a syncwarp reported by racecheck tool of compute-sanitizer. -- this addresses issue - https://github.com/rapidsai/raft/issues/676 Authors: - Mahesh Doijade (https://github.com/mdoijade) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/679 --- cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh index 7424a5ff81..f8532e52a0 100644 --- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh +++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh @@ -446,6 +446,7 @@ __global__ __launch_bounds__(Policy::Nthreads, 2) void fusedL2kNN(const DataT* x } } } + __syncwarp(); const int finalNumVals = raft::shfl(numValsWarpTopK[i], 31); loadWarpQShmem(heapArr[i], &shDumpKV[0], rowId, numOfNN); updateSortedWarpQ( From 88334f05c2c5db0338be272d3746765e19d62322 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 25 May 2022 17:30:40 -0400 Subject: [PATCH 3/4] Adding handle and stream to pylibraft (#683) Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/raft/pull/683 --- python/pylibraft/pylibraft/common/cuda.pyx | 84 ++++++++++++++++++ python/pylibraft/pylibraft/common/handle.pxd | 2 +- python/pylibraft/pylibraft/common/handle.pyx | 89 ++++++++++++++++++++ 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 python/pylibraft/pylibraft/common/cuda.pyx create mode 100644 python/pylibraft/pylibraft/common/handle.pyx diff --git a/python/pylibraft/pylibraft/common/cuda.pyx b/python/pylibraft/pylibraft/common/cuda.pyx new file mode 100644 index 0000000000..eb48f64cf1 --- /dev/null +++ b/python/pylibraft/pylibraft/common/cuda.pyx @@ -0,0 +1,84 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cuda.ccudart cimport( + cudaStream_t, + cudaError_t, + cudaSuccess, + cudaStreamCreate, + cudaStreamDestroy, + cudaStreamSynchronize, + cudaGetLastError, + cudaGetErrorString, + cudaGetErrorName +) + + +class CudaRuntimeError(RuntimeError): + def __init__(self, extraMsg=None): + cdef cudaError_t e = cudaGetLastError() + cdef bytes errMsg = cudaGetErrorString(e) + cdef bytes errName = cudaGetErrorName(e) + msg = "Error! %s reason='%s'" % (errName.decode(), errMsg.decode()) + if extraMsg is not None: + msg += " extraMsg='%s'" % extraMsg + super(CudaRuntimeError, self).__init__(msg) + + +cdef class Stream: + """ + Stream represents a thin-wrapper around cudaStream_t and its operations. + + Examples + -------- + + .. code-block:: python + + from raft.common.cuda import Stream + stream = Stream() + stream.sync() + del stream # optional! + """ + def __cinit__(self): + cdef cudaStream_t stream + cdef cudaError_t e = cudaStreamCreate(&stream) + if e != cudaSuccess: + raise CudaRuntimeError("Stream create") + self.s = stream + + def __dealloc__(self): + self.sync() + cdef cudaError_t e = cudaStreamDestroy(self.s) + if e != cudaSuccess: + raise CudaRuntimeError("Stream destroy") + + def sync(self): + """ + Synchronize on the cudastream owned by this object. Note that this + could raise exception due to issues with previous asynchronous + launches + """ + cdef cudaError_t e = cudaStreamSynchronize(self.s) + if e != cudaSuccess: + raise CudaRuntimeError("Stream sync") + + cdef cudaStream_t getStream(self): + return self.s diff --git a/python/pylibraft/pylibraft/common/handle.pxd b/python/pylibraft/pylibraft/common/handle.pxd index bc248a335b..6504a122f7 100644 --- a/python/pylibraft/pylibraft/common/handle.pxd +++ b/python/pylibraft/pylibraft/common/handle.pxd @@ -25,7 +25,7 @@ from rmm._lib.cuda_stream_pool cimport cuda_stream_pool from libcpp.memory cimport shared_ptr from libcpp.memory cimport unique_ptr -cdef extern from "raft/handle.hpp" namespace "raft" nogil: +cdef extern from "raft/core/handle.hpp" namespace "raft" nogil: cdef cppclass handle_t: handle_t() except + handle_t(cuda_stream_view stream_view) except + diff --git a/python/pylibraft/pylibraft/common/handle.pyx b/python/pylibraft/pylibraft/common/handle.pyx new file mode 100644 index 0000000000..83a4676076 --- /dev/null +++ b/python/pylibraft/pylibraft/common/handle.pyx @@ -0,0 +1,89 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +# import raft +from rmm._lib.cuda_stream_view cimport cuda_stream_per_thread +from rmm._lib.cuda_stream_view cimport cuda_stream_view + +from .cuda cimport Stream +from .cuda import CudaRuntimeError + + +cdef class Handle: + """ + Handle is a lightweight python wrapper around the corresponding C++ class + of handle_t exposed by RAFT's C++ interface. Refer to the header file + raft/handle.hpp for interface level details of this struct + + Examples + -------- + + .. code-block:: python + + from raft.common import Stream, Handle + stream = Stream() + handle = Handle(stream) + + # call algos here + + # final sync of all work launched in the stream of this handle + # this is same as `raft.cuda.Stream.sync()` call, but safer in case + # the default stream inside the `handle_t` is being used + handle.sync() + del handle # optional! + """ + + def __cinit__(self, stream: Stream = None, n_streams=0): + self.n_streams = n_streams + if n_streams > 0: + self.stream_pool.reset(new cuda_stream_pool(n_streams)) + + cdef cuda_stream_view c_stream + if stream is None: + # this constructor will construct a "main" handle on + # per-thread default stream, which is non-blocking + self.c_obj.reset(new handle_t(cuda_stream_per_thread, + self.stream_pool)) + else: + # this constructor constructs a handle on user stream + c_stream = cuda_stream_view(stream.getStream()) + self.c_obj.reset(new handle_t(c_stream, + self.stream_pool)) + + def sync(self): + """ + Issues a sync on the stream set for this handle. + """ + self.c_obj.get()[0].sync_stream() + + def getHandle(self): + return self.c_obj.get() + + def __getstate__(self): + return self.n_streams + + def __setstate__(self, state): + self.n_streams = state + if self.n_streams > 0: + self.stream_pool.reset(new cuda_stream_pool(self.n_streams)) + + self.c_obj.reset(new handle_t(cuda_stream_per_thread, + self.stream_pool)) From 120d4780a1f00106e86e86aed4949a42c8119d39 Mon Sep 17 00:00:00 2001 From: Micka <9810050+lowener@users.noreply.github.com> Date: Thu, 26 May 2022 03:08:25 +0200 Subject: [PATCH 4/4] Revert print vector changes because of std::vector (#681) The specialization of `std::vector` when `T=bool` is unfortunately causing compilation issue in cuml because the `data()` function member is not implemented. And the elements may not be stored contiguously. (Link to the CI failure: https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cuml/job/prb/job/cuml-cpu-cuda-build-arm64/CUDA=11.5/1364/console) cc @achirkin Authors: - Micka (https://github.com/lowener) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/681 --- cpp/include/raft/core/cudart_utils.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/core/cudart_utils.hpp b/cpp/include/raft/core/cudart_utils.hpp index a1e7e6bc32..ce0d2a7025 100644 --- a/cpp/include/raft/core/cudart_utils.hpp +++ b/cpp/include/raft/core/cudart_utils.hpp @@ -36,7 +36,6 @@ #include #include #include -#include ///@todo: enable once logging has been enabled in raft //#include "logger.hpp" @@ -303,10 +302,10 @@ void print_device_vector(const char* variable_name, size_t componentsCount, OutStream& out) { - std::vector host_mem(componentsCount); - CUDA_CHECK( - cudaMemcpy(host_mem.data(), devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost)); - print_host_vector(variable_name, host_mem.data(), componentsCount, out); + T* host_mem = new T[componentsCount]; + CUDA_CHECK(cudaMemcpy(host_mem, devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost)); + print_host_vector(variable_name, host_mem, componentsCount, out); + delete[] host_mem; } /**