Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #249 from allisonvacanti/enh/pedantic_flags/gh.cub228
Browse files Browse the repository at this point in the history
Enable more warning flags.
  • Loading branch information
alliepiper authored Feb 16, 2021
2 parents f5ef160 + e0a6736 commit b229817
Show file tree
Hide file tree
Showing 34 changed files with 359 additions and 230 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# 3.15 is the minimum.
# 3.17 for NVC++.
# 3.18 for C++17 + CUDA.
# 3.18.3 for C++17 + CUDA.
cmake_minimum_required(VERSION 3.15)

# Remove this when we use the new CUDA_ARCHITECTURES properties.
Expand Down
70 changes: 34 additions & 36 deletions cmake/CubBuildCompilerTargets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,29 @@ function(cub_build_compiler_targets)
set(cxx_compile_options)

if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
# TODO Enable /Wall
append_option_if_available("/WX" cxx_compile_options)

# Disabled loss-of-data conversion warnings.
# TODO Re-enable.
append_option_if_available("/wd4244" cxx_compile_options)
append_option_if_available("/wd4267" cxx_compile_options)
append_option_if_available("/W4" cxx_compile_options)

# Suppress numeric conversion-to-bool warnings.
# TODO Re-enable.
append_option_if_available("/wd4800" cxx_compile_options)
append_option_if_available("/WX" cxx_compile_options)

# Disable warning about applying unary operator- to unsigned type.
append_option_if_available("/wd4146" cxx_compile_options)
# Suppress overly-pedantic/unavoidable warnings brought in with /W4:
# C4324: structure was padded due to alignment specifier
append_option_if_available("/wd4324" cxx_compile_options)
# C4127: conditional expression is constant
# This can be fixed with `if constexpr` when available, but there's no way
# to silence these pre-C++17.
# TODO We should have per-dialect interface targets so we can leave these
# warnings enabled on C++17:
append_option_if_available("/wd4127" cxx_compile_options)
# C4505: unreferenced local function has been removed
# The CUDA `host_runtime.h` header emits this for
# `__cudaUnregisterBinaryUtil`.
append_option_if_available("/wd4505" cxx_compile_options)
# C4706: assignment within conditional expression
# MSVC doesn't provide an opt-out for this warning when the assignment is
# intentional. Clang will warn for these, but suppresses the warning when
# double-parentheses are used around the assignment. We'll let Clang catch
# unintentional assignments and suppress all such warnings on MSVC.
append_option_if_available("/wd4706" cxx_compile_options)

# Some tests require /bigobj to fit everything into their object files:
append_option_if_available("/bigobj" cxx_compile_options)
Expand All @@ -35,45 +44,34 @@ function(cub_build_compiler_targets)
append_option_if_available("-Winit-self" cxx_compile_options)
append_option_if_available("-Woverloaded-virtual" cxx_compile_options)
append_option_if_available("-Wcast-qual" cxx_compile_options)
append_option_if_available("-Wno-cast-align" cxx_compile_options)
append_option_if_available("-Wno-long-long" cxx_compile_options)
append_option_if_available("-Wno-variadic-macros" cxx_compile_options)
append_option_if_available("-Wpointer-arith" cxx_compile_options)
append_option_if_available("-Wunused-local-typedef" cxx_compile_options)
append_option_if_available("-Wvla" cxx_compile_options)

# Disable GNU extensions (flag is clang only)
append_option_if_available("-Wgnu" cxx_compile_options)
# Calling a variadic macro with zero args is a GNU extension until C++20,
# but the THRUST_PP_ARITY macro is used with zero args. Need to see if this
# is a real problem worth fixing.
append_option_if_available("-Wno-gnu-zero-variadic-macro-arguments" cxx_compile_options)

# This complains about functions in CUDA system headers when used with nvcc.
append_option_if_available("-Wno-unused-function" cxx_compile_options)
append_option_if_available("-Wno-unused-variable" cxx_compile_options)

# CUB uses deprecated texture functions (cudaBindTexture, etc). These
# need to be replaced, but silence the warnings for now.
# This can be removed once NVIDIA/cub#191 is fixed.
append_option_if_available("-Wno-deprecated-declarations" cxx_compile_options)
endif()

if ("GNU" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 4.5)
# This isn't available until GCC 4.3, and misfires on TMP code until
# GCC 4.5.
append_option_if_available("-Wlogical-op" cxx_compile_options)
endif()

if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3)
# GCC 7.3 complains about name mangling changes due to `noexcept`
# becoming part of the type system; we don't care.
append_option_if_available("-Wno-noexcept-type" cxx_compile_options)
endif()
endif()

if (("Clang" STREQUAL "${CMAKE_CXX_COMPILER_ID}") OR
("XL" STREQUAL "${CMAKE_CXX_COMPILER_ID}"))
# xlC and Clang warn about unused parameters in uninstantiated templates.
# This causes xlC to choke on the OMP backend, which is mostly #ifdef'd out
# (and thus has unused parameters) when you aren't using it.
append_option_if_available("-Wno-unused-parameters" cxx_compile_options)
endif()

if ("Clang" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
# -Wunneeded-internal-declaration misfires in the unit test framework
# on older versions of Clang.
append_option_if_available("-Wno-unneeded-internal-declaration" cxx_compile_options)
endif()

if ("Intel" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
# Disable warning that inlining is inhibited by compiler thresholds.
append_option_if_available("-diag-disable=11074" cxx_compile_options)
Expand Down
4 changes: 2 additions & 2 deletions cmake/CubBuildTargetList.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ function(cub_build_target_list)
)
endif()

# CMake added C++17 support for CUDA targets in 3.18:
# CMake fixed C++17 support for NVCC + MSVC targets in 3.18.3:
if (CUB_ENABLE_DIALECT_CPP17)
cmake_minimum_required(VERSION 3.18)
cmake_minimum_required(VERSION 3.18.3)
endif()

# Supported versions of MSVC do not distinguish between C++11 and C++14.
Expand Down
12 changes: 6 additions & 6 deletions cub/agent/agent_radix_sort_downsweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,11 @@ struct AgentRadixSortDownsweep
typename BlockLoadValuesT::TempStorage load_values;
typename BlockRadixRankT::TempStorage radix_rank;

struct
struct KeysAndOffsets
{
UnsignedBits exchange_keys[TILE_ITEMS];
OffsetT relative_bin_offsets[RADIX_DIGITS];
};
} keys_and_offsets;

Uninitialized<ValueExchangeT> exchange_values;

Expand Down Expand Up @@ -246,17 +246,17 @@ struct AgentRadixSortDownsweep
#pragma unroll
for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
{
temp_storage.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM];
temp_storage.keys_and_offsets.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM];
}

CTA_SYNC();

#pragma unroll
for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM)
{
UnsignedBits key = temp_storage.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)];
UnsignedBits key = temp_storage.keys_and_offsets.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)];
UnsignedBits digit = digit_extractor.Digit(key);
relative_bin_offsets[ITEM] = temp_storage.relative_bin_offsets[digit];
relative_bin_offsets[ITEM] = temp_storage.keys_and_offsets.relative_bin_offsets[digit];

// Un-twiddle
key = Traits<KeyT>::TwiddleOut(key);
Expand Down Expand Up @@ -580,7 +580,7 @@ struct AgentRadixSortDownsweep
if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
{
bin_offset[track] -= exclusive_digit_prefix[track];
temp_storage.relative_bin_offsets[bin_idx] = bin_offset[track];
temp_storage.keys_and_offsets.relative_bin_offsets[bin_idx] = bin_offset[track];
bin_offset[track] += inclusive_digit_prefix[track];
}
}
Expand Down
14 changes: 7 additions & 7 deletions cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,12 @@ struct AgentReduceByKey
// Shared memory type for this thread block
union _TempStorage
{
struct
struct ScanStorage
{
typename BlockScanT::TempStorage scan; // Smem needed for tile scanning
typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback
typename BlockDiscontinuityKeys::TempStorage discontinuity; // Smem needed for discontinuity detection
};
} scan_storage;

// Smem needed for loading keys
typename BlockLoadKeysT::TempStorage load_keys;
Expand Down Expand Up @@ -433,13 +433,13 @@ struct AgentReduceByKey
{
// Use custom flag operator to additionally flag the first out-of-bounds item
GuardedInequalityWrapper<EqualityOpT> flag_op(equality_op, num_remaining);
BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads(
BlockDiscontinuityKeys(temp_storage.scan_storage.discontinuity).FlagHeads(
head_flags, keys, prev_keys, flag_op, tile_predecessor);
}
else
{
InequalityWrapper<EqualityOpT> flag_op(equality_op);
BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads(
BlockDiscontinuityKeys(temp_storage.scan_storage.discontinuity).FlagHeads(
head_flags, keys, prev_keys, flag_op, tile_predecessor);
}

Expand All @@ -458,7 +458,7 @@ struct AgentReduceByKey
if (tile_idx == 0)
{
// Scan first tile
BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate);
BlockScanT(temp_storage.scan_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate);
num_segments_prefix = 0;
total_aggregate = block_aggregate;

Expand All @@ -469,8 +469,8 @@ struct AgentReduceByKey
else
{
// Scan non-first tile
TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx);
BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, prefix_op);
TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.scan_storage.prefix, scan_op, tile_idx);
BlockScanT(temp_storage.scan_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, prefix_op);

block_aggregate = prefix_op.GetBlockAggregate();
num_segments_prefix = prefix_op.GetExclusivePrefix().key;
Expand Down
24 changes: 12 additions & 12 deletions cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,13 @@ struct AgentRle
// Aliasable storage layout
union Aliasable
{
struct
struct ScanStorage
{
typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection
typename WarpScanPairs::TempStorage warp_scan[WARPS]; // Smem needed for warp-synchronous scans
Uninitialized<LengthOffsetPair[WARPS]> warp_aggregates; // Smem needed for sharing warp-wide aggregates
typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback
};
} scan_storage;

// Smem needed for input loading
typename BlockLoadT::TempStorage load;
Expand Down Expand Up @@ -305,7 +305,7 @@ struct AgentRle
{
// First-and-last-tile always head-flags the first item and tail-flags the last item

BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails(
BlockDiscontinuityT(temp_storage.aliasable.scan_storage.discontinuity).FlagHeadsAndTails(
head_flags, tail_flags, items, inequality_op);
}
else if (FIRST_TILE)
Expand All @@ -317,7 +317,7 @@ struct AgentRle
if (threadIdx.x == BLOCK_THREADS - 1)
tile_successor_item = d_in[tile_offset + TILE_ITEMS];

BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails(
BlockDiscontinuityT(temp_storage.aliasable.scan_storage.discontinuity).FlagHeadsAndTails(
head_flags, tail_flags, tile_successor_item, items, inequality_op);
}
else if (LAST_TILE)
Expand All @@ -329,7 +329,7 @@ struct AgentRle
if (threadIdx.x == 0)
tile_predecessor_item = d_in[tile_offset - 1];

BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails(
BlockDiscontinuityT(temp_storage.aliasable.scan_storage.discontinuity).FlagHeadsAndTails(
head_flags, tile_predecessor_item, tail_flags, items, inequality_op);
}
else
Expand All @@ -344,7 +344,7 @@ struct AgentRle
if (threadIdx.x == 0)
tile_predecessor_item = d_in[tile_offset - 1];

BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails(
BlockDiscontinuityT(temp_storage.aliasable.scan_storage.discontinuity).FlagHeadsAndTails(
head_flags, tile_predecessor_item, tail_flags, tile_successor_item, items, inequality_op);
}

Expand Down Expand Up @@ -381,7 +381,7 @@ struct AgentRle

LengthOffsetPair thread_inclusive;
LengthOffsetPair thread_aggregate = internal::ThreadReduce(lengths_and_num_runs, scan_op);
WarpScanPairs(temp_storage.aliasable.warp_scan[warp_id]).Scan(
WarpScanPairs(temp_storage.aliasable.scan_storage.warp_scan[warp_id]).Scan(
thread_aggregate,
thread_inclusive,
thread_exclusive_in_warp,
Expand All @@ -390,22 +390,22 @@ struct AgentRle

// Last lane in each warp shares its warp-aggregate
if (lane_id == WARP_THREADS - 1)
temp_storage.aliasable.warp_aggregates.Alias()[warp_id] = thread_inclusive;
temp_storage.aliasable.scan_storage.warp_aggregates.Alias()[warp_id] = thread_inclusive;

CTA_SYNC();

// Accumulate total selected and the warp-wide prefix
warp_exclusive_in_tile = identity;
warp_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[warp_id];
tile_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[0];
warp_aggregate = temp_storage.aliasable.scan_storage.warp_aggregates.Alias()[warp_id];
tile_aggregate = temp_storage.aliasable.scan_storage.warp_aggregates.Alias()[0];

#pragma unroll
for (int WARP = 1; WARP < WARPS; ++WARP)
{
if (warp_id == WARP)
warp_exclusive_in_tile = tile_aggregate;

tile_aggregate = scan_op(tile_aggregate, temp_storage.aliasable.warp_aggregates.Alias()[WARP]);
tile_aggregate = scan_op(tile_aggregate, temp_storage.aliasable.scan_storage.warp_aggregates.Alias()[WARP]);
}
}

Expand Down Expand Up @@ -738,7 +738,7 @@ struct AgentRle
lengths_and_num_runs);

// First warp computes tile prefix in lane 0
TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.aliasable.prefix, Sum(), tile_idx);
TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.aliasable.scan_storage.prefix, Sum(), tile_idx);
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
if (warp_id == 0)
{
Expand Down
14 changes: 7 additions & 7 deletions cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,11 @@ struct AgentScan
typename BlockLoadT::TempStorage load; // Smem needed for tile loading
typename BlockStoreT::TempStorage store; // Smem needed for tile storing

struct
struct ScanStorage
{
typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback
typename BlockScanT::TempStorage scan; // Smem needed for tile scanning
};
} scan_storage;
};

// Alias wrapper allowing storage to be unioned
Expand Down Expand Up @@ -205,7 +205,7 @@ struct AgentScan
OutputT &block_aggregate,
Int2Type<false> /*is_inclusive*/)
{
BlockScanT(temp_storage.scan).ExclusiveScan(items, items, init_value, scan_op, block_aggregate);
BlockScanT(temp_storage.scan_storage.scan).ExclusiveScan(items, items, init_value, scan_op, block_aggregate);
block_aggregate = scan_op(init_value, block_aggregate);
}

Expand All @@ -221,7 +221,7 @@ struct AgentScan
OutputT &block_aggregate,
Int2Type<true> /*is_inclusive*/)
{
BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate);
BlockScanT(temp_storage.scan_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate);
}


Expand All @@ -236,7 +236,7 @@ struct AgentScan
PrefixCallback &prefix_op,
Int2Type<false> /*is_inclusive*/)
{
BlockScanT(temp_storage.scan).ExclusiveScan(items, items, scan_op, prefix_op);
BlockScanT(temp_storage.scan_storage.scan).ExclusiveScan(items, items, scan_op, prefix_op);
}


Expand All @@ -251,7 +251,7 @@ struct AgentScan
PrefixCallback &prefix_op,
Int2Type<true> /*is_inclusive*/)
{
BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, prefix_op);
BlockScanT(temp_storage.scan_storage.scan).InclusiveScan(items, items, scan_op, prefix_op);
}


Expand Down Expand Up @@ -322,7 +322,7 @@ struct AgentScan
else
{
// Scan non-first tile
TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx);
TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.scan_storage.prefix, scan_op, tile_idx);
ScanTile(items, scan_op, prefix_op, Int2Type<IS_INCLUSIVE>());
}

Expand Down
Loading

0 comments on commit b229817

Please sign in to comment.