-
Notifications
You must be signed in to change notification settings - Fork 197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Moving remaining stats prims from cuml #507
Merged
rapids-bot
merged 26 commits into
rapidsai:branch-22.04
from
cjnolet:imp-2204_moving_remaining_stats
Feb 11, 2022
Merged
Changes from all commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
f86d765
iMoving more linalg prims from cuml
cjnolet 6a0d70c
Fixing style
cjnolet 4909d2c
Updarting style
cjnolet df48d34
Fixing include for test utils
cjnolet 3c2fc7e
Updating lstsq
cjnolet 0b5ba54
Adding missing reduction test
cjnolet 1b613b2
Fixing lstsq
cjnolet c7f059f
Typo
cjnolet 3f207c5
Exposing convert_array
cjnolet 34f7bf3
Oops
cjnolet 68639a2
Adding proper doxygen docs to lstsq
cjnolet 34b54f4
Updating gtests
cjnolet 587c0f1
Moving remaining stats stuff over
cjnolet 3297a3d
Re-routing includes
cjnolet 510188d
Moving epsilon neighborhood. Need data generators for tests
cjnolet 42c6252
Merge remote-tracking branch 'rapidsai/branch-22.04' into imp-2204_mo…
cjnolet 5a770e9
Finishing out the move
cjnolet 895c1c4
Fixing style
cjnolet ce425d4
Updating year andeive
cjnolet 3d0b167
proper include
cjnolet 368513b
Proper filename
cjnolet 8b5e5c5
Adding missing include
cjnolet 5a99047
Merge remote-tracking branch 'rapidsai/branch-22.04' into imp-2204_mo…
cjnolet 413b0d4
Updating rsvd test
cjnolet 02341d6
iFixing style
cjnolet a3c59da
Fixing docs
cjnolet File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
/* | ||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <raft/cuda_utils.cuh> | ||
#include <vector> | ||
|
||
// Taken from: | ||
// https://github.com/teju85/programming/blob/master/euler/include/seive.h | ||
|
||
namespace raft { | ||
namespace common { | ||
|
||
/** | ||
* @brief Implementation of 'Seive of Eratosthenes' | ||
*/ | ||
class Seive { | ||
public: | ||
/** | ||
* @param _num number of integers for which seive is needed | ||
*/ | ||
Seive(unsigned _num) | ||
{ | ||
N = _num; | ||
generateSeive(); | ||
} | ||
|
||
/** | ||
* @brief Check whether a number is prime or not | ||
* @param num number to be checked | ||
* @return true if the 'num' is prime, else false | ||
*/ | ||
bool isPrime(unsigned num) const | ||
{ | ||
unsigned mask, pos; | ||
if (num <= 1) { return false; } | ||
if (num == 2) { return true; } | ||
if (!(num & 1)) { return false; } | ||
getMaskPos(num, mask, pos); | ||
return (seive[pos] & mask); | ||
} | ||
|
||
private: | ||
void generateSeive() | ||
{ | ||
auto sqN = fastIntSqrt(N); | ||
auto size = raft::ceildiv<unsigned>(N, sizeof(unsigned) * 8); | ||
seive.resize(size); | ||
// assume all to be primes initially | ||
for (auto& itr : seive) { | ||
itr = 0xffffffffu; | ||
} | ||
unsigned cid = 0; | ||
unsigned cnum = getNum(cid); | ||
while (cnum <= sqN) { | ||
do { | ||
++cid; | ||
cnum = getNum(cid); | ||
if (isPrime(cnum)) { break; } | ||
} while (cnum <= sqN); | ||
auto cnum2 = cnum << 1; | ||
// 'unmark' all the 'odd' multiples of the current prime | ||
for (unsigned i = 3, num = i * cnum; num <= N; i += 2, num += cnum2) { | ||
unmark(num); | ||
} | ||
} | ||
} | ||
|
||
unsigned getId(unsigned num) const { return (num >> 1); } | ||
|
||
unsigned getNum(unsigned id) const | ||
{ | ||
if (id == 0) { return 2; } | ||
return ((id << 1) + 1); | ||
} | ||
|
||
void getMaskPos(unsigned num, unsigned& mask, unsigned& pos) const | ||
{ | ||
pos = getId(num); | ||
mask = 1 << (pos & 0x1f); | ||
pos >>= 5; | ||
} | ||
|
||
void unmark(unsigned num) | ||
{ | ||
unsigned mask, pos; | ||
getMaskPos(num, mask, pos); | ||
seive[pos] &= ~mask; | ||
} | ||
|
||
// REF: http://www.azillionmonkeys.com/qed/ulerysqroot.pdf | ||
unsigned fastIntSqrt(unsigned val) | ||
{ | ||
unsigned g = 0; | ||
auto bshft = 15u, b = 1u << bshft; | ||
do { | ||
unsigned temp = ((g << 1) + b) << bshft--; | ||
if (val >= temp) { | ||
g += b; | ||
val -= temp; | ||
} | ||
} while (b >>= 1); | ||
return g; | ||
} | ||
|
||
/** find all primes till this number */ | ||
unsigned N; | ||
/** the seive */ | ||
std::vector<unsigned> seive; | ||
}; | ||
}; // namespace common | ||
}; // namespace raft |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* | ||
* Copyright (c) 2021-2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <raft/cuda_utils.cuh> | ||
#include <utility> // pair | ||
|
||
namespace raft { | ||
|
||
// TODO move to raft https://github.com/rapidsai/raft/issues/90 | ||
/** helper method to get the compute capability version numbers */ | ||
inline std::pair<int, int> getDeviceCapability() | ||
{ | ||
int devId; | ||
RAFT_CUDA_TRY(cudaGetDevice(&devId)); | ||
int major, minor; | ||
RAFT_CUDA_TRY(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devId)); | ||
RAFT_CUDA_TRY(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devId)); | ||
return std::make_pair(major, minor); | ||
} | ||
|
||
/** | ||
* @brief Batched warp-level sum reduction | ||
* | ||
* @tparam T data type | ||
* @tparam NThreads Number of threads in the warp doing independent reductions | ||
* | ||
* @param[in] val input value | ||
* @return for the first "group" of threads, the reduced value. All | ||
* others will contain unusable values! | ||
* | ||
* @note Why not cub? Because cub doesn't seem to allow working with arbitrary | ||
* number of warps in a block and also doesn't support this kind of | ||
* batched reduction operation | ||
* @note All threads in the warp must enter this function together | ||
* | ||
* @todo Expand this to support arbitrary reduction ops | ||
*/ | ||
template <typename T, int NThreads> | ||
DI T batchedWarpReduce(T val) | ||
{ | ||
#pragma unroll | ||
for (int i = NThreads; i < raft::WarpSize; i <<= 1) { | ||
val += raft::shfl(val, raft::laneId() + i); | ||
} | ||
return val; | ||
} | ||
|
||
/** | ||
* @brief 1-D block-level batched sum reduction | ||
* | ||
* @tparam T data type | ||
* @tparam NThreads Number of threads in the warp doing independent reductions | ||
* | ||
* @param val input value | ||
* @param smem shared memory region needed for storing intermediate results. It | ||
* must alteast be of size: `sizeof(T) * nWarps * NThreads` | ||
* @return for the first "group" of threads in the block, the reduced value. | ||
* All others will contain unusable values! | ||
* | ||
* @note Why not cub? Because cub doesn't seem to allow working with arbitrary | ||
* number of warps in a block and also doesn't support this kind of | ||
* batched reduction operation | ||
* @note All threads in the block must enter this function together | ||
* | ||
* @todo Expand this to support arbitrary reduction ops | ||
*/ | ||
template <typename T, int NThreads> | ||
DI T batchedBlockReduce(T val, char* smem) | ||
{ | ||
auto* sTemp = reinterpret_cast<T*>(smem); | ||
constexpr int nGroupsPerWarp = raft::WarpSize / NThreads; | ||
static_assert(raft::isPo2(nGroupsPerWarp), "nGroupsPerWarp must be a PO2!"); | ||
const int nGroups = (blockDim.x + NThreads - 1) / NThreads; | ||
const int lid = raft::laneId(); | ||
const int lgid = lid % NThreads; | ||
const int gid = threadIdx.x / NThreads; | ||
const auto wrIdx = (gid / nGroupsPerWarp) * NThreads + lgid; | ||
const auto rdIdx = gid * NThreads + lgid; | ||
for (int i = nGroups; i > 0;) { | ||
auto iAligned = ((i + nGroupsPerWarp - 1) / nGroupsPerWarp) * nGroupsPerWarp; | ||
if (gid < iAligned) { | ||
val = batchedWarpReduce<T, NThreads>(val); | ||
if (lid < NThreads) sTemp[wrIdx] = val; | ||
} | ||
__syncthreads(); | ||
i /= nGroupsPerWarp; | ||
if (i > 0) { val = gid < i ? sTemp[rdIdx] : T(0); } | ||
__syncthreads(); | ||
} | ||
return val; | ||
} | ||
|
||
} // namespace raft |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this class have a
detail::SeiveImpl
which has the implementation, and the public API beclass Seive :: public detail::SeiveImpl
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm still thinking through how to hide the common/utilities stuff, and whether we should. This Seive is used internally in one place so far and it might be that we just end up making it internal. For now, though I don't think it should affect this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(I'd love to gather your thoughts on the utilities as well)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think whether it's an internal or external common/utility source, the consistency of having the implementation hidden in detail helps a developer to know where, if ever, they need to look for source code of implementation. The API again just being a contract whether it's internal or external.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The benefit to hiding internals such as what we've been doing is so that we make a contract with the outside world and know that if we don't break that contract, consumers should be able to build against RAFT. I'm less concerned internally because we will get build errors when things change. I think there is still a level of caution that we can take, but it's not going to require we protect the contract as strongly as w/ the public (external) APIs.
The problem with many of these files w/ utility functions is that there are no "internal" bits- the functions themselves are all external so we don't gain as much by hiding those bits. This is my perspective anyways. I'm still undecided on exactly what we should do. One part of me thinks we should eventually copy the public utils stuff into a
raft/utils
directory and deprecate the old ones and call it a day. I think we should revisit after moving over the primitives, since those are more of the core focus.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good. It of course doesn't matter much when it's just an internal function, and my only point as a dev was that I would know where to find the implementation if I had to.
Apart from that, I'm fully onboard for the raft/utils package. The utils in the root directly are a bit of a mess and they can be cleaned significantly. But yes, not the highest priority!