Skip to content

Commit

Permalink
Merge branch-21.10
Browse files Browse the repository at this point in the history
  • Loading branch information
viclafargue committed Aug 3, 2021
2 parents 192882a + 947e22f commit 7725a80
Show file tree
Hide file tree
Showing 44 changed files with 1,739 additions and 204 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# raft 21.10.00 (Date TBD)

Please see https://github.com/rapidsai/raft/releases/tag/v21.10.00a for the latest changes to this development branch.

# raft 21.08.00 (Date TBD)

Please see https://github.com/rapidsai/raft/releases/tag/v21.08.00a for the latest changes to this development branch.
Expand Down
8 changes: 4 additions & 4 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ env
gpuci_logger "Check GPU usage"
nvidia-smi

# temporary usage of gpuci_conda_retry install with packages listed here, looking into
# temporary usage of gpuci_mamba_retry install with packages listed here, looking into
# using the repos yaml files for this
gpuci_logger "Activate conda env"
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids
gpuci_logger "Installing packages needed for RAFT"
gpuci_conda_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \
gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \
"cudatoolkit=${CUDA_REL}" \
"cudf=${MINOR_VERSION}" \
"rmm=${MINOR_VERSION}" \
Expand All @@ -59,8 +59,8 @@ gpuci_conda_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvid
# Install the master version of dask, distributed, and dask-ml
gpuci_logger "Install the master version of dask and distributed"
set -x
pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps
pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
set +x


Expand Down
4 changes: 2 additions & 2 deletions ci/local/old-gpubuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ fi

# Install the master version of dask, distributed, and dask-ml
set -x
pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps
pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
set +x


Expand Down
12 changes: 4 additions & 8 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@
#=============================================================================

cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
include(FetchContent)
FetchContent_Declare(
rapids-cmake
GIT_REPOSITORY https://github.com/rapidsai/rapids-cmake.git
GIT_TAG origin/branch-21.08
)
FetchContent_MakeAvailable(rapids-cmake)
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.10/RAPIDS.cmake
${CMAKE_BINARY_DIR}/RAPIDS.cmake)
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
include(rapids-cmake)
include(rapids-cpm)
include(rapids-cuda)
Expand All @@ -30,7 +26,7 @@ include(rapids-find)

rapids_cuda_init_architectures(RAFT)

project(RAFT VERSION 21.08.00 LANGUAGES CXX CUDA)
project(RAFT VERSION 21.10.00 LANGUAGES CXX CUDA)

##############################################################################
# - build type ---------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ endif()
list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)

# set warnings as errors
# list(APPEND RAFT_CUDA_FLAGS -Werror=cross-execution-space-call)
# list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
list(APPEND RAFT_CUDA_FLAGS -Werror=cross-execution-space-call)
list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)

# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
if(CUDA_ENABLE_LINEINFO)
Expand Down
10 changes: 3 additions & 7 deletions cpp/cmake/thirdparty/get_cuco.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,17 @@ function(find_and_configure_cuco VERSION)
endif()

rapids_cpm_find(cuco ${VERSION}
GLOBAL_TARGETS cuco cuco::cuco
GLOBAL_TARGETS cuco::cuco
BUILD_EXPORT_SET raft-exports
INSTALL_EXPORT_SET raft-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/trxcllnt/cuCollections.git
GIT_TAG dev
GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git
GIT_TAG b1fea0cbe4c384160740af00f7c8760846539abb
OPTIONS "BUILD_TESTS OFF"
"BUILD_BENCHMARKS OFF"
"BUILD_EXAMPLES OFF"
)

if(NOT TARGET cuco::cuco)
add_library(cuco::cuco ALIAS cuco)
endif()

endfunction()

find_and_configure_cuco(0.0.1)
5 changes: 4 additions & 1 deletion cpp/cmake/thirdparty/get_faiss.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ function(find_and_configure_faiss)

if(FAISS_ADDED)
set(FAISS_GPU_HEADERS ${FAISS_SOURCE_DIR} PARENT_SCOPE)
add_library(FAISS::FAISS ALIAS faiss)
endif()

if(TARGET faiss AND NOT TARGET FAISS::FAISS)
add_library(FAISS::FAISS ALIAS faiss)
endif()

endfunction()
Expand Down
161 changes: 161 additions & 0 deletions cpp/include/raft/distance/canberra.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include <raft/distance/pairwise_distance_base.cuh>

namespace raft {
namespace distance {

/**
* @brief the canberra distance matrix calculation implementer
* It computes the following equation: cij = max(cij, op(ai-bj))
* @tparam DataT input data-type (for A and B matrices)
* @tparam AccT accumulation data-type
* @tparam OutT output data-type (for C and D matrices)
* @tparam IdxT index data-type
* @tparam Veclen number of k-elements loaded by each thread
for every LDG call. details in contractions.cuh
* @tparam FinalLambda final lambda called on final distance value
* @tparam isRowMajor true if input/output is row major,
false for column major
* @param[in] x input matrix
* @param[in] y input matrix
* @param[in] m number of rows of A and C/D
* @param[in] n number of rows of B and cols of C/D
* @param[in] k number of cols of A and B
* @param[in] lda leading dimension of A
* @param[in] ldb leading dimension of B
* @param[in] ldd leading dimension of C/D
* @param[output] dOutput output matrix
* @param fin_op the final gemm epilogue lambda
* @param stream cuda stream to launch work
*/
template <typename DataT, typename AccT, typename OutT, typename IdxT,
int VecLen, typename FinalLambda, bool isRowMajor>
static void canberraImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k,
IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput,
FinalLambda fin_op, cudaStream_t stream) {
typedef typename raft::linalg::Policy4x4<DataT, VecLen>::Policy RowPolicy;
typedef typename raft::linalg::Policy4x4<DataT, VecLen>::ColPolicy ColPolicy;

typedef
typename std::conditional<isRowMajor, RowPolicy, ColPolicy>::type KPolicy;

dim3 blk(KPolicy::Nthreads);

// Accumulation operation lambda
auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
const auto diff = raft::L1Op<AccT, IdxT>()(x - y);
const auto add = raft::myAbs(x) + raft::myAbs(y);
// deal with potential for 0 in denominator by
// forcing 1/0 instead
acc += ((add != 0) * diff / (add + (add == 0)));
};

// epilogue operation lambda for final value calculation
auto epilog_lambda = [] __device__(
AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
DataT * regxn, DataT * regyn, IdxT gridStrideX,
IdxT gridStrideY) { return; };

if (isRowMajor) {
auto canberraRowMajor =
pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>;
dim3 grid =
launchConfigGenerator<KPolicy>(m, n, KPolicy::SmemSize, canberraRowMajor);

canberraRowMajor<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
} else {
auto canberraColMajor =
pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>;
dim3 grid =
launchConfigGenerator<KPolicy>(m, n, KPolicy::SmemSize, canberraColMajor);
canberraColMajor<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
}

CUDA_CHECK(cudaGetLastError());
}

template <typename DataT, typename AccT, typename OutT, typename IdxT,
typename FinalLambda, bool isRowMajor>
void canberra(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd,
const DataT *x, const DataT *y, OutT *dOutput, FinalLambda fin_op,
cudaStream_t stream) {
size_t bytesA = sizeof(DataT) * lda;
size_t bytesB = sizeof(DataT) * ldb;
if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) {
canberraImpl<DataT, AccT, OutT, IdxT, 16 / sizeof(DataT), FinalLambda,
isRowMajor>(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op,
stream);
} else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) {
canberraImpl<DataT, AccT, OutT, IdxT, 8 / sizeof(DataT), FinalLambda,
isRowMajor>(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op,
stream);
} else {
canberraImpl<DataT, AccT, OutT, IdxT, 1, FinalLambda, isRowMajor>(
x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream);
}
}

/**
* @brief the canberra distance matrix calculation
* It computes the following equation: cij = max(cij, op(ai-bj))
* @tparam InType input data-type (for A and B matrices)
* @tparam AccType accumulation data-type
* @tparam OutType output data-type (for C and D matrices)
* @tparam FinalLambda user-defined epilogue lamba
* @tparam Index_ Index type
* @param[in] m number of rows of A and C/D
* @param[in] n number of rows of B and cols of C/D
* @param[in] k number of cols of A and B
* @param[in] pA input matrix
* @param[in] pB input matrix
* @param[out] pD output matrix
* @param[in] fin_op the final element-wise epilogue lambda
* @param[in] stream cuda stream to launch work
* @param[in] isRowMajor whether the input and output matrices are row major
*/
template <typename InType, typename AccType, typename OutType,
typename FinalLambda, typename Index_ = int>
void canberraImpl(int m, int n, int k, const InType *pA, const InType *pB,
OutType *pD, FinalLambda fin_op, cudaStream_t stream,
bool isRowMajor) {
typedef std::is_same<OutType, bool> is_bool;
typedef typename std::conditional<is_bool::value, OutType, AccType>::type
canberraOutType;
Index_ lda, ldb, ldd;
canberraOutType *pDcast = reinterpret_cast<canberraOutType *>(pD);
if (isRowMajor) {
lda = k, ldb = k, ldd = n;
canberra<InType, AccType, canberraOutType, Index_, FinalLambda, true>(
m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream);
} else {
lda = n, ldb = m, ldd = m;
canberra<InType, AccType, canberraOutType, Index_, FinalLambda, false>(
n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream);
}
}
} // namespace distance
} // namespace raft
Loading

0 comments on commit 7725a80

Please sign in to comment.