Skip to content

Commit

Permalink
Move NVTX range helpers to raft (#4445)
Browse files Browse the repository at this point in the history
Move NVTX range helpers to raft  and extend them a little bit.
Corresponding raft PR: rapidsai/raft#416 .

Authors:
  - Artem M. Chirkin (https://github.com/achirkin)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #4445
  • Loading branch information
achirkin authored Dec 17, 2021
1 parent b9b97fc commit d630156
Show file tree
Hide file tree
Showing 24 changed files with 192 additions and 457 deletions.
5 changes: 1 addition & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,7 @@ if(BUILD_CUML_CPP_LIBRARY)

# common components
add_library(${CUML_CPP_TARGET} SHARED
src/common/logger.cpp
src/common/nvtx.cu)
src/common/logger.cpp)

# FIL components
target_sources(${CUML_CPP_TARGET}
Expand Down Expand Up @@ -367,7 +366,6 @@ if(BUILD_CUML_CPP_LIBRARY)

target_compile_definitions(${CUML_CPP_TARGET}
PUBLIC
$<$<BOOL:${NVTX}>:NVTX_ENABLED>
DISABLE_CUSPARSE_DEPRECATED
PRIVATE
CUML_CPP_API
Expand Down Expand Up @@ -407,7 +405,6 @@ if(BUILD_CUML_CPP_LIBRARY)
CUDA::cudart
CUDA::cusparse
GPUTreeShap::GPUTreeShap
$<$<BOOL:${NVTX}>:CUDA::nvToolsExt>
$<$<BOOL:${LINK_FAISS}>:FAISS::FAISS>
$<IF:$<BOOL:${Treelite_ADDED}>,treelite::treelite_static,treelite::treelite>
$<IF:$<BOOL:${Treelite_ADDED}>,treelite::treelite_runtime_static,treelite::treelite_runtime>
Expand Down
7 changes: 4 additions & 3 deletions cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ function(find_and_configure_raft)
GIT_TAG ${PKG_PINNED_TAG}
SOURCE_SUBDIR cpp
OPTIONS
"BUILD_TESTS OFF"
)
"BUILD_TESTS OFF"
"NVTX ${NVTX}"
)

if(raft_ADDED)
message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}")
Expand All @@ -58,4 +59,4 @@ set(CUML_BRANCH_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}")
find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft}
FORK rapidsai
PINNED_TAG branch-${CUML_BRANCH_VERSION_raft}
)
)
28 changes: 8 additions & 20 deletions cpp/src/arima/batched_arima.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
#include <cuml/tsa/batched_kalman.hpp>

#include <raft/cudart_utils.h>
#include <common/nvtx.hpp>
#include <linalg/batched/matrix.cuh>
#include <metrics/batched/information_criterion.cuh>
#include <raft/common/nvtx.hpp>
#include <raft/cuda_utils.cuh>
#include <raft/handle.hpp>
#include <raft/linalg/matrix_vector_op.cuh>
Expand Down Expand Up @@ -104,7 +104,7 @@ void predict(raft::handle_t& handle,
double* d_lower,
double* d_upper)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);
const auto stream = handle.get_stream();

bool diff = order.need_diff() && pre_diff && level == 0;
Expand Down Expand Up @@ -245,8 +245,6 @@ void predict(raft::handle_t& handle,
});
/// TODO: 2D copy kernel?
}

ML::POP_RANGE();
}

/**
Expand Down Expand Up @@ -360,7 +358,7 @@ void conditional_sum_of_squares(raft::handle_t& handle,
double* d_loglike,
int truncate)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);
auto stream = handle.get_stream();

int n_phi = order.n_phi();
Expand Down Expand Up @@ -393,8 +391,6 @@ void conditional_sum_of_squares(raft::handle_t& handle,
start_y,
start_v);
CUDA_CHECK(cudaPeekAtLastError());

ML::POP_RANGE();
}

void batched_loglike(raft::handle_t& handle,
Expand All @@ -417,7 +413,7 @@ void batched_loglike(raft::handle_t& handle,
double* d_lower,
double* d_upper)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);

auto stream = handle.get_stream();

Expand Down Expand Up @@ -473,7 +469,6 @@ void batched_loglike(raft::handle_t& handle,
/* Tranfer log-likelihood device -> host */
raft::update_host(loglike, d_loglike, batch_size, stream);
}
ML::POP_RANGE();
}

void batched_loglike(raft::handle_t& handle,
Expand All @@ -490,7 +485,7 @@ void batched_loglike(raft::handle_t& handle,
LoglikeMethod method,
int truncate)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);

// unpack parameters
auto stream = handle.get_stream();
Expand Down Expand Up @@ -518,8 +513,6 @@ void batched_loglike(raft::handle_t& handle,
host_loglike,
method,
truncate);

ML::POP_RANGE();
}

void batched_loglike_grad(raft::handle_t& handle,
Expand All @@ -536,7 +529,7 @@ void batched_loglike_grad(raft::handle_t& handle,
LoglikeMethod method,
int truncate)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);
auto stream = handle.get_stream();
auto counting = thrust::make_counting_iterator(0);
int N = order.complexity();
Expand Down Expand Up @@ -597,8 +590,6 @@ void batched_loglike_grad(raft::handle_t& handle,
d_x_pert[N * bid + i] = d_x[N * bid + i];
});
}

ML::POP_RANGE();
}

void information_criterion(raft::handle_t& handle,
Expand All @@ -612,7 +603,7 @@ void information_criterion(raft::handle_t& handle,
double* d_ic,
int ic_type)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);
auto stream = handle.get_stream();

/* Compute log-likelihood in d_ic */
Expand All @@ -628,8 +619,6 @@ void information_criterion(raft::handle_t& handle,
batch_size,
n_obs - order.n_diff(),
stream);

ML::POP_RANGE();
}

/**
Expand Down Expand Up @@ -962,7 +951,7 @@ void estimate_x0(raft::handle_t& handle,
const ARIMAOrder& order,
bool missing)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);
const auto& handle_impl = handle;
auto stream = handle_impl.get_stream();
auto cublas_handle = handle_impl.get_cublas_handle();
Expand Down Expand Up @@ -1007,7 +996,6 @@ void estimate_x0(raft::handle_t& handle,

// Do the computation of the initial parameters
_start_params(handle, params, bm_yd, bm_exog_diff, order);
ML::POP_RANGE();
}

} // namespace ML
14 changes: 5 additions & 9 deletions cpp/src/arima/batched_kalman.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
#include <raft/linalg/binary_op.cuh>
#include <rmm/device_uvector.hpp>

#include <common/nvtx.hpp>
#include <linalg/batched/matrix.cuh>
#include <linalg/block.cuh>
#include <raft/common/nvtx.hpp>
#include <timeSeries/arima_helpers.cuh>

namespace ML {
Expand Down Expand Up @@ -1283,7 +1283,7 @@ void _batched_kalman_filter(raft::handle_t& handle,
MLCommon::LinAlg::Batched::b_gemm(false, true, rd, rd, 1, 1.0, RQb, Rb, 0.0, RQR);

// Durbin Koopman "Time Series Analysis" pg 138
ML::PUSH_RANGE("Init P");
raft::common::nvtx::push_range("Init P");
MLCommon::LinAlg::Batched::Matrix<double> P(
rd, rd, batch_size, cublasHandle, arima_mem.P_batches, arima_mem.P_dense, stream, true);
{
Expand Down Expand Up @@ -1326,7 +1326,7 @@ void _batched_kalman_filter(raft::handle_t& handle,
_lyapunov_wrapper(handle, arima_mem, Tb, RQR, P, rd);
}
}
ML::POP_RANGE();
raft::common::nvtx::pop_range();

// Initialize the state alpha by solving (I - T*) x* = c with:
// | mu |
Expand Down Expand Up @@ -1442,7 +1442,7 @@ void init_batched_kalman_matrices(raft::handle_t& handle,
double* d_R_b,
double* d_T_b)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);

auto stream = handle.get_stream();

Expand Down Expand Up @@ -1535,8 +1535,6 @@ void init_batched_kalman_matrices(raft::handle_t& handle,
// If rd=2 and phi_2=-1, I-TxT is singular
if (rd == 2 && order.p == 2 && abs(batch_T[1] + 1) < 0.01) { batch_T[1] = -0.99; }
});

ML::POP_RANGE();
}

void batched_kalman_filter(raft::handle_t& handle,
Expand All @@ -1556,7 +1554,7 @@ void batched_kalman_filter(raft::handle_t& handle,
double* d_lower,
double* d_upper)
{
ML::PUSH_RANGE(__func__);
raft::common::nvtx::range fun_scope(__func__);

auto cublasHandle = handle.get_cublas_handle();
auto stream = handle.get_stream();
Expand Down Expand Up @@ -1607,8 +1605,6 @@ void batched_kalman_filter(raft::handle_t& handle,
level,
d_lower,
d_upper);

ML::POP_RANGE();
}

void batched_jones_transform(raft::handle_t& handle,
Expand Down
Loading

0 comments on commit d630156

Please sign in to comment.