Skip to content

Commit

Permalink
Promote trustworthiness_score to public header, add missing include…
Browse files Browse the repository at this point in the history
…s, update dependencies (#3968)

* Updates dask/distributed versions to match cuDF (rapidsai/cudf#8458)
* Updates to Thrust v1.12.0 to align with cuDF and cuGraph
* Don't include the src and src_prims directories in `cuml::cuml++` target's public include paths
* Add missing `<cstddef>` and `<cstdint>` include directives
* Promote `trustworthiness_score` to public `cuml/metrics/metrics.hpp` header and update Cython
* Compile Cython with `-std=c++17`
* Remove `-Wstrict-prototypes` Cython warning
* Fixes linker error in debug builds
* Fixes #3885

Authors:
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: #3968
  • Loading branch information
trxcllnt authored Jun 16, 2021
1 parent dc82e88 commit afdeda9
Show file tree
Hide file tree
Showing 17 changed files with 91 additions and 84 deletions.
4 changes: 4 additions & 0 deletions conda/environments/cuml_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ dependencies:
- umap-learn
- scikit-learn=0.23.1
- treelite=1.3.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- sphinx_markdown_tables
Expand Down
4 changes: 4 additions & 0 deletions conda/environments/cuml_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ dependencies:
- umap-learn
- scikit-learn=0.23.1
- treelite=1.3.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- sphinx_markdown_tables
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ requirements:
- python x.x
- setuptools
- cython>=0.29,<0.30
- cmake>=3.14
- cmake>=3.20.1
- treelite=1.3.0
- cudf {{ minor_version }}
- libcuml={{ version }}
Expand All @@ -46,8 +46,8 @@ requirements:
- nccl>=2.9.9
- ucx-py 0.21
- ucx-proc=*=gpu
- dask>=2.12.0
- distributed>=2.12.0
- dask>=2021.6.0
- distributed>=2021.6.0
- joblib >=0.11
- {{ pin_compatible('cudatoolkit', max_pin='x.x') }}

Expand Down
20 changes: 18 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ endif()
##############################################################################
# - build libcuml++ shared library -------------------------------------------

if(BUILD_CUML_C_LIBRARY OR BUILD_CUML_CPP_LIBRARY)
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
[=[
SECTIONS
{
.nvFatBinSegment : { *(.nvFatBinSegment) }
.nv_fatbin : { *(.nv_fatbin) }
}
]=])
endif()

if(BUILD_CUML_CPP_LIBRARY)

# single GPU components
Expand Down Expand Up @@ -309,11 +320,11 @@ if(BUILD_CUML_CPP_LIBRARY)
target_include_directories(${CUML_CPP_TARGET}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src_prims>
$<BUILD_INTERFACE:$<$<BOOL:${ENABLE_CUMLPRIMS_MG}>:${cumlprims_mg_INCLUDE_DIRS}>>
$<$<BOOL:Treelite_ADDED>:${Treelite_SOURCE_DIR}/include>
PRIVATE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src_prims>
$<$<OR:$<BOOL:BUILD_CUML_STD_COMMS>,$<BOOL:BUILD_CUML_MPI_COMMS>>:${NCCL_INCLUDE_DIRS}>
$<$<BOOL:BUILD_CUML_MPI_COMMS>:${MPI_CXX_INCLUDE_PATH}>
INTERFACE
Expand Down Expand Up @@ -347,6 +358,8 @@ if(BUILD_CUML_CPP_LIBRARY)
target_link_options(${CUML_CPP_TARGET} PRIVATE "-Wl,--exclude-libs,libdmlc.a")
# same as above, but for protobuf library
target_link_options(${CUML_CPP_TARGET} PRIVATE "-Wl,--exclude-libs,libprotobuf.a")
# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
target_link_options(${CUML_CPP_TARGET} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

endif(BUILD_CUML_CPP_LIBRARY)

Expand Down Expand Up @@ -382,6 +395,9 @@ if(BUILD_CUML_C_LIBRARY)
FAISS::FAISS
)

# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
target_link_options(${CUML_C_TARGET} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

endif()

##############################################################################
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_thrust.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ function(find_and_configure_thrust VERSION)

endfunction()

find_and_configure_thrust(1.10.0)
find_and_configure_thrust(1.12.0)
3 changes: 3 additions & 0 deletions cpp/include/cuml/manifold/umap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

#pragma once

#include <cstddef>
#include <cstdint>

namespace raft {
class handle_t;
}
Expand Down
21 changes: 21 additions & 0 deletions cpp/include/cuml/metrics/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <raft/linalg/distance_type.h>

#include <cstdint>

namespace raft {
class handle_t;
}
Expand Down Expand Up @@ -337,5 +339,24 @@ void pairwiseDistance_sparse(const raft::handle_t &handle, float *x, float *y,
raft::distance::DistanceType metric,
float metric_arg);

/**
* @brief Compute the trustworthiness score
*
* @param h Raft handle
* @param X Data in original dimension
* @param X_embedded Data in target dimension (embedding)
* @param n Number of samples
* @param m Number of features in high/original dimension
* @param d Number of features in low/embedded dimension
* @param n_neighbors Number of neighbors considered by trustworthiness score
* @param batchSize Batch size
* @tparam distance_type: Distance type to consider
* @return Trustworthiness score
*/
template <typename math_t, raft::distance::DistanceType distance_type>
double trustworthiness_score(const raft::handle_t &h, const math_t *X,
math_t *X_embedded, int n, int m, int d,
int n_neighbors, int batchSize = 512);

} // namespace Metrics
} // namespace ML
20 changes: 12 additions & 8 deletions cpp/src/metrics/trustworthiness.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
*/

#include <metrics/trustworthiness_score.cuh>

#include <cuml/metrics/metrics.hpp>

#include <raft/distance/distance.cuh>
#include <raft/handle.hpp>

Expand All @@ -23,14 +26,15 @@ namespace Metrics {

/**
* @brief Compute the trustworthiness score
* @param h: Raft handle
* @param X[in]: Data in original dimension
* @param X_embedded[in]: Data in target dimension (embedding)
* @param n[in]: Number of samples
* @param m[in]: Number of features in high/original dimension
* @param d[in]: Number of features in low/embedded dimension
* @param n_neighbors[in]: Number of neighbors considered by
* trustworthiness score
*
* @param h Raft handle
* @param X Data in original dimension
* @param X_embedded Data in target dimension (embedding)
* @param n Number of samples
* @param m Number of features in high/original dimension
* @param d Number of features in low/embedded dimension
* @param n_neighbors Number of neighbors considered by trustworthiness score
* @param batchSize Batch size
* @tparam distance_type: Distance type to consider
* @return Trustworthiness score
*/
Expand Down
30 changes: 0 additions & 30 deletions cpp/src/metrics/trustworthiness.cuh

This file was deleted.

30 changes: 0 additions & 30 deletions cpp/src/metrics/trustworthiness_c.h

This file was deleted.

4 changes: 2 additions & 2 deletions cpp/src/svm/workingset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ class WorkingSet {
// Determine temporary device storage requirements for cub
size_t cub_bytes2 = 0;
cub::DeviceRadixSort::SortPairs(
NULL, cub_bytes, f_idx.data(), f_idx_sorted.data(), f_sorted.data(),
f_sorted.data(), n_train, 0, 8 * sizeof(int), stream);
NULL, cub_bytes, f_sorted.data(), f_sorted.data(), f_idx.data(),
f_idx_sorted.data(), n_train, 0, 8 * sizeof(math_t), stream);
cub::DeviceSelect::If(NULL, cub_bytes2, f_idx.data(), f_idx.data(),
d_num_selected, n_train, dummy_select_op, stream);
cub_bytes = max(cub_bytes, cub_bytes2);
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/tsa/auto_arima.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ void cumulative_sum_helper(
std::shared_ptr<raft::mr::device::allocator> allocator, cudaStream_t stream) {
// Determine temporary storage size
size_t temp_storage_bytes = 0;
cub::DeviceScan::InclusiveSum(NULL, temp_storage_bytes, mask, cumul,
cub::DeviceScan::InclusiveSum(NULL, temp_storage_bytes,
reinterpret_cast<const char*>(mask), cumul,
mask_size, stream);

// Allocate temporary storage
Expand All @@ -58,7 +59,8 @@ void cumulative_sum_helper(
void* d_temp_storage = (void*)temp_storage.data();

// Execute the scan
cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, mask, cumul,
cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes,
reinterpret_cast<const char*>(mask), cumul,
mask_size, stream);
}

Expand Down
5 changes: 4 additions & 1 deletion cpp/test/sg/trustworthiness_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
* limitations under the License.
*/

#include <cuml/metrics/metrics.hpp>

#include <raft/handle.hpp>

#include <gtest/gtest.h>
#include <raft/cudart_utils.h>
#include <metrics/trustworthiness.cuh>
#include <raft/cuda_utils.cuh>
#include <vector>

Expand Down
5 changes: 3 additions & 2 deletions cpp/test/sg/tsne_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/

#include <cuml/manifold/tsne.h>
#include <cuml/metrics/metrics.hpp>

#include <datasets/boston.h>
#include <datasets/breast_cancer.h>
#include <datasets/diabetes.h>
Expand All @@ -26,7 +28,6 @@
#include <cuml/common/device_buffer.hpp>
#include <cuml/common/logger.hpp>
#include <iostream>
#include <metrics/trustworthiness.cuh>
#include <raft/mr/device/allocator.hpp>
#include <tsne/distances.cuh>
#include <vector>
Expand Down Expand Up @@ -174,4 +175,4 @@ TEST_P(TSNETestF, Result) {
assert_score(knn_score_fft, "knn_fft\n", trustworthiness_threshold);
}

INSTANTIATE_TEST_CASE_P(TSNETests, TSNETestF, ::testing::ValuesIn(inputs));
INSTANTIATE_TEST_CASE_P(TSNETests, TSNETestF, ::testing::ValuesIn(inputs));
3 changes: 2 additions & 1 deletion cpp/test/sg/umap_parametrizable_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <cuml/metrics/metrics.hpp>

#include <gtest/gtest.h>
#include <iostream>
#include <vector>
Expand All @@ -27,7 +29,6 @@
#include <cuml/manifold/umap.hpp>
#include <cuml/neighbors/knn.hpp>
#include <linalg/reduce_rows_by_key.cuh>
#include <metrics/trustworthiness.cuh>
#include <raft/cuda_utils.cuh>
#include <raft/distance/distance.cuh>
#include <raft/handle.hpp>
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/metrics/trustworthiness.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ cdef extern from "raft/linalg/distance_type.h" namespace "raft::distance":
ctypedef int DistanceType
ctypedef DistanceType euclidean "(raft::distance::DistanceType)5"

cdef extern from "metrics/trustworthiness_c.h" namespace "ML::Metrics":
cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics":

cdef double trustworthiness_score[T, DistanceType](const handle_t& h,
T* X,
Expand Down
10 changes: 9 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def finalize_options(self):
],
libraries=libs,
language='c++',
extra_compile_args=['-std=c++14'])
extra_compile_args=['-std=c++17'])
]

self.distribution.ext_modules = extensions
Expand All @@ -207,6 +207,14 @@ class cuml_build_ext(cython_build_ext, object):

boolean_options = ["singlegpu"] + cython_build_ext.boolean_options

def build_extensions(self):
try:
# Silence the '-Wstrict-prototypes' warning
self.compiler.compiler_so.remove("-Wstrict-prototypes")
except Exception:
pass
cython_build_ext.build_extensions(self)

def initialize_options(self):

self.singlegpu = None
Expand Down

0 comments on commit afdeda9

Please sign in to comment.