Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update .clang-format to be consistent with all other RAPIDS repos #300

Merged
merged 6 commits into from
Nov 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 52 additions & 45 deletions cpp/.clang-format
Original file line number Diff line number Diff line change
@@ -1,72 +1,78 @@
---
# Refer to the following link for the explanation of each params:
# http://releases.llvm.org/8.0.1/tools/clang/docs/ClangFormatStyleOptions.html
Language: Cpp
# BasedOnStyle: Google
# http://releases.llvm.org/8.0.0/tools/clang/docs/ClangFormatStyleOptions.html
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveAssignments: true
AlignConsecutiveBitFields: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: true
AlignEscapedNewlines: Left
AlignOperands: true
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: true
AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortLambdasOnASingleLine: true
AllowShortLoopsOnASingleLine: false
# This is deprecated
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterClass: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
# disabling the below splits, else, they'll just add to the vertical length of source files!
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakAfterJavaFieldAnnotations: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeBraces: WebKit
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakInheritanceList: BeforeColon
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
# Kept the below 2 to be the same as `IndentWidth` to keep everything uniform
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
- Regex: '^<.*\.h>'
Expand Down Expand Up @@ -100,9 +106,9 @@ PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
Expand All @@ -111,7 +117,7 @@ RawStringFormats:
- 'c++'
- 'C++'
CanonicalDelimiter: ''
- Language: TextProto
- Language: TextProto
Delimiters:
- pb
- PB
Expand All @@ -126,10 +132,10 @@ RawStringFormats:
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
BasedOnStyle: google
# Enabling comment reflow causes doxygen comments to be messed up in their formats!
ReflowComments: false
SortIncludes: true
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
Expand All @@ -139,19 +145,20 @@ SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
# We are C++14, but clang-format puts this under `Cpp11` itself
Standard: Cpp11
StatementMacros:
Standard: c++17
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
# Be consistent with indent-width, even for people who use tab for indentation!
TabWidth: 2
UseTab: Never
...
TabWidth: 2
UseTab: Never
3 changes: 2 additions & 1 deletion cpp/include/raft.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ namespace raft {
/* Function for testing RAFT include
*
* @return message indicating RAFT has been included succesfully*/
inline std::string test_raft() {
inline std::string test_raft()
{
std::string status = "RAFT Setup succesfully";
return status;
}
Expand Down
105 changes: 61 additions & 44 deletions cpp/include/raft/cache/cache_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,16 @@ namespace cache {
* @param [out] out vectors collected from the cache, size [n_vec * n]
*/
template <typename math_t, typename idx_t, typename int_t>
__global__ void get_vecs(const math_t *cache, int_t n_vec,
const idx_t *cache_idx, int_t n, math_t *out) {
__global__ void get_vecs(
const math_t* cache, int_t n_vec, const idx_t* cache_idx, int_t n, math_t* out)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
int row = tid % n_vec; // row idx
if (tid < n_vec * n) {
size_t out_col = tid / n_vec; // col idx
size_t out_col = tid / n_vec; // col idx
size_t cache_col = cache_idx[out_col];
if (cache_idx[out_col] >= 0) {
if (row + out_col * n_vec < (size_t)n_vec * n) {
out[tid] = cache[row + cache_col * n_vec];
}
if (row + out_col * n_vec < (size_t)n_vec * n) { out[tid] = cache[row + cache_col * n_vec]; }
}
}
}
Expand Down Expand Up @@ -84,21 +83,26 @@ __global__ void get_vecs(const math_t *cache, int_t n_vec,
* @param [in] n_cache_vecs
*/
template <typename math_t>
__global__ void store_vecs(const math_t *tile, int n_tile, int n_vec,
const int *tile_idx, int n, const int *cache_idx,
math_t *cache, int n_cache_vecs) {
__global__ void store_vecs(const math_t* tile,
int n_tile,
int n_vec,
const int* tile_idx,
int n,
const int* cache_idx,
math_t* cache,
int n_cache_vecs)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
int row = tid % n_vec; // row idx
if (tid < n_vec * n) {
int tile_col = tid / n_vec; // col idx
int data_col = tile_idx ? tile_idx[tile_col] : tile_col;
int tile_col = tid / n_vec; // col idx
int data_col = tile_idx ? tile_idx[tile_col] : tile_col;
int cache_col = cache_idx[tile_col];

// We ignore negative values. The rest of the checks should be fulfilled
// if the cache is used properly
if (cache_col >= 0 && cache_col < n_cache_vecs && data_col < n_tile) {
cache[row + (size_t)cache_col * n_vec] =
tile[row + (size_t)data_col * n_vec];
cache[row + (size_t)cache_col * n_vec] = tile[row + (size_t)data_col * n_vec];
}
}
}
Expand All @@ -121,14 +125,15 @@ int DI hash(int key, int n_cache_sets) { return key % n_cache_sets; }
* @return the index of the first element in the array for which
* array[idx] >= value. If there is no such value, then return n.
*/
int DI arg_first_ge(const int *array, int n, int val) {
int DI arg_first_ge(const int* array, int n, int val)
{
int start = 0;
int end = n - 1;
int end = n - 1;
if (array[0] == val) return 0;
if (array[end] < val) return n;
while (start + 1 < end) {
int q = (start + end + 1) / 2;
//invariants:
// invariants:
// start < end
// start < q <=end
// array[start] < val && array[end] <=val
Expand Down Expand Up @@ -157,7 +162,8 @@ int DI arg_first_ge(const int *array, int n, int val) {
* @return the idx of the k-th occurance of val in array, or -1 if
* the value is not found.
*/
int DI find_nth_occurrence(const int *array, int n, int val, int k) {
int DI find_nth_occurrence(const int* array, int n, int val, int k)
{
int q = arg_first_ge(array, n, val);
if (q + k < n && array[q + k] == val) {
q += k;
Expand Down Expand Up @@ -196,10 +202,10 @@ int DI find_nth_occurrence(const int *array, int n, int val, int k) {
* Each block should give a different pointer for rank.
*/
template <int nthreads, int associativity>
DI void rank_set_entries(const int *cache_time, int n_cache_sets, int *rank) {
DI void rank_set_entries(const int* cache_time, int n_cache_sets, int* rank)
{
const int items_per_thread = raft::ceildiv(associativity, nthreads);
typedef cub::BlockRadixSort<int, nthreads, items_per_thread, int>
BlockRadixSort;
typedef cub::BlockRadixSort<int, nthreads, items_per_thread, int> BlockRadixSort;
__shared__ typename BlockRadixSort::TempStorage temp_storage;

int key[items_per_thread];
Expand All @@ -208,18 +214,16 @@ DI void rank_set_entries(const int *cache_time, int n_cache_sets, int *rank) {
int block_offset = blockIdx.x * associativity;

for (int j = 0; j < items_per_thread; j++) {
int k = threadIdx.x + j * nthreads;
int t = (k < associativity) ? cache_time[block_offset + k] : 32768;
int k = threadIdx.x + j * nthreads;
int t = (k < associativity) ? cache_time[block_offset + k] : 32768;
key[j] = t;
val[j] = k;
}

BlockRadixSort(temp_storage).Sort(key, val);

for (int j = 0; j < items_per_thread; j++) {
if (val[j] < associativity) {
rank[val[j]] = threadIdx.x * items_per_thread + j;
}
if (val[j] < associativity) { rank[val[j]] = threadIdx.x * items_per_thread + j; }
}
__syncthreads();
}
Expand Down Expand Up @@ -252,9 +256,15 @@ DI void rank_set_entries(const int *cache_time, int n_cache_sets, int *rank) {
* not be cached, size [n]
*/
template <int nthreads, int associativity>
__global__ void assign_cache_idx(const int *keys, int n, const int *cache_set,
int *cached_keys, int n_cache_sets,
int *cache_time, int time, int *cache_idx) {
__global__ void assign_cache_idx(const int* keys,
int n,
const int* cache_set,
int* cached_keys,
int n_cache_sets,
int* cache_time,
int time,
int* cache_idx)
{
int block_offset = blockIdx.x * associativity;

const int items_per_thread = raft::ceildiv(associativity, nthreads);
Expand All @@ -273,7 +283,7 @@ __global__ void assign_cache_idx(const int *keys, int n, const int *cache_set,
// these elements are assigned -1.

for (int j = 0; j < items_per_thread; j++) {
int i = threadIdx.x + j * nthreads;
int i = threadIdx.x + j * nthreads;
int t_idx = block_offset + i;
bool mask = (i < associativity);
// whether this slot is available for writing
Expand All @@ -284,10 +294,10 @@ __global__ void assign_cache_idx(const int *keys, int n, const int *cache_set,
if (mask) {
int k = find_nth_occurrence(cache_set, n, blockIdx.x, rank[i]);
if (k > -1) {
int key_val = keys[k];
int key_val = keys[k];
cached_keys[t_idx] = key_val;
cache_idx[k] = t_idx;
cache_time[t_idx] = time;
cache_idx[k] = t_idx;
cache_time[t_idx] = time;
}
}
}
Expand Down Expand Up @@ -315,21 +325,28 @@ namespace {
* @param [inout] cached_keys keys stored in the cache, size [n_cache_sets * associativity]
* @param [in] n_cache_sets number of cache sets
* @param [in] associativity number of keys in cache set
* @param [inout] cache_time time stamp when the indices were cached, size [n_cache_sets * associativity]
* @param [inout] cache_time time stamp when the indices were cached, size [n_cache_sets *
* associativity]
* @param [out] cache_idx cache indices of the working set elements, size [n]
* @param [out] is_cached whether the element is cached size[n]
* @param [in] time iteration counter (used for time stamping)
*/
__global__ void get_cache_idx(int *keys, int n, int *cached_keys,
int n_cache_sets, int associativity,
int *cache_time, int *cache_idx, bool *is_cached,
int time) {
__global__ void get_cache_idx(int* keys,
int n,
int* cached_keys,
int n_cache_sets,
int associativity,
int* cache_time,
int* cache_idx,
bool* is_cached,
int time)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < n) {
int widx = keys[tid];
int sidx = hash(widx, n_cache_sets);
int cidx = sidx * associativity;
int i = 0;
int widx = keys[tid];
int sidx = hash(widx, n_cache_sets);
int cidx = sidx * associativity;
int i = 0;
bool found = false;
// search for empty spot and the least recently used spot
while (i < associativity && !found) {
Expand All @@ -338,9 +355,9 @@ __global__ void get_cache_idx(int *keys, int n, int *cached_keys,
}
is_cached[tid] = found;
if (found) {
cidx = cidx + i - 1;
cache_time[cidx] = time; //update time stamp
cache_idx[tid] = cidx; //exact cache idx
cidx = cidx + i - 1;
cache_time[cidx] = time; // update time stamp
cache_idx[tid] = cidx; // exact cache idx
} else {
cache_idx[tid] = sidx; // assign cache set
}
Expand Down
Loading