From 44afc5109b342b653797a20db1e2654fa450417f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 14 Oct 2024 16:02:14 -0700 Subject: [PATCH] Add clang-tidy to CI (#16958) This PR adds clang-tidy checks to our CI. clang-tidy will be run in nightly CI via CMake. For now, only the parts of the code base that were already made compliant in the PRs leading up to this have been enabled, namely cudf source and test cpp files. Over time we can add more files like benchmarks and examples, add or subtract more rules, or enable linting of cu files (see https://gitlab.kitware.com/cmake/cmake/-/issues/25399). This PR is intended to be the starting point enabling systematic linting, at which point everything else should be significantly easier. Resolves #584 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16958 --- .github/workflows/test.yaml | 14 +- ci/clang_tidy.sh | 29 +++ cpp/.clang-tidy | 2 +- cpp/CMakeLists.txt | 54 ++++++ cpp/scripts/run-clang-tidy.py | 253 -------------------------- cpp/tests/CMakeLists.txt | 1 + cpp/tests/interop/nanoarrow_utils.hpp | 3 +- dependencies.yaml | 30 ++- 8 files changed, 129 insertions(+), 257 deletions(-) create mode 100755 ci/clang_tidy.sh delete mode 100644 cpp/scripts/run-clang-tidy.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a22d3c5b9cc..1275aad757c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -47,11 +47,23 @@ jobs: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: - build_type: pull-request + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} # Use the wheel container so we can skip conda solves and since our # primary static consumers (Spark) are not in conda anyway. container_image: "rapidsai/ci-wheel:latest" run_script: "ci/configure_cpp_static.sh" + clang-tidy: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + run_script: "ci/clang_tidy.sh" conda-python-cudf-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh new file mode 100755 index 00000000000..4d5d3fc3136 --- /dev/null +++ b/ci/clang_tidy.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create clang-tidy conda environment" +. /opt/conda/etc/profile.d/conda.sh + +ENV_YAML_DIR="$(mktemp -d)" + +rapids-dependency-file-generator \ + --output conda \ + --file-key clang_tidy \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" + +rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate clang_tidy +set -u + +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + +source rapids-configure-sccache + +# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled. +cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja +cmake --build cpp/build diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index 2d4f8c0d80e..12120a5c6d1 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -39,7 +39,7 @@ Checks: -clang-analyzer-optin.core.EnumCastOutOfRange, -clang-analyzer-optin.cplusplus.UninitializedObject' -WarningsAsErrors: '' +WarningsAsErrors: '*' HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*' ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*' FormatStyle: none diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c8f8ae2dcfe..32a753c9f40 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -88,6 +88,7 @@ option( ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL} ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) +option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF) message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") @@ -144,6 +145,58 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR) set(CUDF_GENERATED_INCLUDE_DIR ${CUDF_BINARY_DIR}) endif() +# ################################################################################################## +# * clang-tidy configuration ---------------------------------------------------------------------- +if(CUDF_CLANG_TIDY) + find_program( + CLANG_TIDY_EXE + NAMES "clang-tidy" + DOC "Path to clang-tidy executable" REQUIRED + ) + + execute_process( + COMMAND ${CLANG_TIDY_EXE} --version + OUTPUT_VARIABLE CLANG_TIDY_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REGEX MATCH "LLVM version ([0-9]+\\.[0-9]+)\\.[0-9]+" LLVM_VERSION_MATCH + "${CLANG_TIDY_OUTPUT}" + ) + # Discard the patch version and allow it to float. Empirically, results between patch versions are + # mostly stable, and looking at available packages on some package managers sometimes patch + # versions are skipped so we don't want to constrain to a patch version that the user can't + # install. + set(LLVM_VERSION "${CMAKE_MATCH_1}") + set(expected_clang_tidy_version 19.1) + if(NOT expected_clang_tidy_version VERSION_EQUAL LLVM_VERSION) + message( + FATAL_ERROR + "clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}" + ) + endif() +endif() + +# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES. +function(enable_clang_tidy target) + set(_tidy_options) + set(_tidy_one_value) + set(_tidy_multi_value SKIPPED_FILES) + cmake_parse_arguments( + _TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN} + ) + + if(CUDF_CLANG_TIDY) + # clang will complain about unused link libraries on the compile line unless we specify + # -Qunused-arguments. + set_target_properties( + ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments" + ) + foreach(file IN LISTS _TIDY_SKIPPED_FILES) + set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON) + endforeach() + endif() +endfunction() + # ################################################################################################## # * conda environment ----------------------------------------------------------------------------- rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) @@ -714,6 +767,7 @@ target_compile_options( cudf PRIVATE "$<$:${CUDF_CXX_FLAGS}>" "$<$:${CUDF_CUDA_FLAGS}>" ) +enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp) if(CUDF_BUILD_STACKTRACE_DEBUG) # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py deleted file mode 100644 index e5e57dbf562..00000000000 --- a/cpp/scripts/run-clang-tidy.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import re -import os -import subprocess -import argparse -import json -import multiprocessing as mp -import shutil - - -EXPECTED_VERSION = "16.0.6" -VERSION_REGEX = re.compile(r" LLVM version ([0-9.]+)") -GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") -SPACES = re.compile(r"\s+") -SEPARATOR = "-" * 16 - - -def parse_args(): - argparser = argparse.ArgumentParser("Runs clang-tidy on a project") - argparser.add_argument("-cdb", type=str, - # TODO This is a hack, needs to be fixed - default="cpp/build/cuda-11.5.0/clang-tidy/release/compile_commands.clangd.json", - help="Path to cmake-generated compilation database" - " file. It is always found inside the root of the " - "cmake build folder. So make sure that `cmake` has " - "been run once before running this script!") - argparser.add_argument("-exe", type=str, default="clang-tidy", - help="Path to clang-tidy exe") - argparser.add_argument("-ignore", type=str, default="[.]cu$|examples/kmeans/", - help="Regex used to ignore files from checking") - argparser.add_argument("-select", type=str, default=None, - help="Regex used to select files for checking") - argparser.add_argument("-j", type=int, default=-1, - help="Number of parallel jobs to launch.") - args = argparser.parse_args() - if args.j <= 0: - args.j = mp.cpu_count() - args.ignore_compiled = re.compile(args.ignore) if args.ignore else None - args.select_compiled = re.compile(args.select) if args.select else None - ret = subprocess.check_output("%s --version" % args.exe, shell=True) - ret = ret.decode("utf-8") - version = VERSION_REGEX.search(ret) - if version is None: - raise Exception("Failed to figure out clang-tidy version!") - version = version.group(1) - if version != EXPECTED_VERSION: - raise Exception("clang-tidy exe must be v%s found '%s'" % \ - (EXPECTED_VERSION, version)) - if not os.path.exists(args.cdb): - raise Exception("Compilation database '%s' missing" % args.cdb) - return args - - -def get_all_commands(cdb): - with open(cdb) as fp: - return json.load(fp) - - -def get_gpu_archs(command): - archs = [] - for loc in range(len(command)): - if command[loc] != "-gencode": - continue - arch_flag = command[loc + 1] - match = GPU_ARCH_REGEX.search(arch_flag) - if match is not None: - archs.append("--cuda-gpu-arch=sm_%s" % match.group(1)) - return archs - - -def get_index(arr, item): - try: - return arr.index(item) - except: - return -1 - - -def remove_item(arr, item): - loc = get_index(arr, item) - if loc >= 0: - del arr[loc] - return loc - - -def remove_item_plus_one(arr, item): - loc = get_index(arr, item) - if loc >= 0: - del arr[loc + 1] - del arr[loc] - return loc - - -def get_clang_includes(exe): - dir = os.getenv("CONDA_PREFIX") - if dir is None: - ret = subprocess.check_output("which %s 2>&1" % exe, shell=True) - ret = ret.decode("utf-8") - dir = os.path.dirname(os.path.dirname(ret)) - header = os.path.join(dir, "include", "ClangHeaders") - return ["-I", header] - - -def get_tidy_args(cmd, exe): - command, file = cmd["command"], cmd["file"] - is_cuda = file.endswith(".cu") - command = re.split(SPACES, command) - # compiler is always clang++! - command[0] = "clang++" - # remove compilation and output targets from the original command - remove_item_plus_one(command, "-c") - remove_item_plus_one(command, "-o") - if is_cuda: - # replace nvcc's "-gencode ..." with clang's "--cuda-gpu-arch ..." - archs = get_gpu_archs(command) - command.extend(archs) - while True: - loc = remove_item_plus_one(command, "-gencode") - if loc < 0: - break - # "-x cuda" is the right usage in clang - loc = get_index(command, "-x") - if loc >= 0: - command[loc + 1] = "cuda" - remove_item_plus_one(command, "-ccbin") - remove_item(command, "--expt-extended-lambda") - remove_item(command, "--diag_suppress=unrecognized_gcc_pragma") - command.extend(get_clang_includes(exe)) - return command, is_cuda - - -def run_clang_tidy_command(tidy_cmd): - cmd = " ".join(tidy_cmd) - result = subprocess.run(cmd, check=False, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - status = result.returncode == 0 - if status: - out = "" - else: - out = "CMD: " + cmd - out += result.stdout.decode("utf-8").rstrip() - return status, out - - -def run_clang_tidy(cmd, args): - command, is_cuda = get_tidy_args(cmd, args.exe) - tidy_cmd = [args.exe, - "-header-filter='.*cudf/cpp/(src|include|bench|comms).*'", - cmd["file"], "--", ] - tidy_cmd.extend(command) - status = True - out = "" - if is_cuda: - tidy_cmd.append("--cuda-device-only") - tidy_cmd.append(cmd["file"]) - ret, out1 = run_clang_tidy_command(tidy_cmd) - out += out1 - out += "%s" % SEPARATOR - if not ret: - status = ret - tidy_cmd[-2] = "--cuda-host-only" - ret, out1 = run_clang_tidy_command(tidy_cmd) - if not ret: - status = ret - out += out1 - else: - tidy_cmd.append(cmd["file"]) - ret, out1 = run_clang_tidy_command(tidy_cmd) - if not ret: - status = ret - out += out1 - return status, out, cmd["file"] - - -# yikes! global var :( -results = [] -def collect_result(result): - global results - results.append(result) - - -def print_result(passed, stdout, file): - status_str = "PASSED" if passed else "FAILED" - print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}") - if stdout: - print(stdout) - print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}") - - -def print_results(): - global results - status = True - for passed, stdout, file in results: - print_result(passed, stdout, file) - if not passed: - status = False - return status - - -def run_tidy_for_all_files(args, all_files): - pool = None if args.j == 1 else mp.Pool(args.j) - # actual tidy checker - for cmd in all_files: - # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: - continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: - continue - if pool is not None: - pool.apply_async(run_clang_tidy, args=(cmd, args), - callback=collect_result) - else: - passed, stdout, file = run_clang_tidy(cmd, args) - collect_result((passed, stdout, file)) - if pool is not None: - pool.close() - pool.join() - return print_results() - - -def main(): - args = parse_args() - # Attempt to making sure that we run this script from root of repo always - if not os.path.exists(".git"): - raise Exception("This needs to always be run from the root of repo") - # Check whether clang-tidy exists - # print(args) - if "exe" not in args and shutil.which("clang-tidy") is not None: - print("clang-tidy not found. Exiting...") - return - all_files = get_all_commands(args.cdb) - status = run_tidy_for_all_files(args, all_files) - if not status: - raise Exception("clang-tidy failed! Refer to the errors above.") - - -if __name__ == "__main__": - main() diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 62189f76cae..799a84cbc37 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -83,6 +83,7 @@ function(ConfigureTest CMAKE_TEST_NAME) "GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$" ) endif() + enable_clang_tidy(${CMAKE_TEST_NAME}) endfunction() # ################################################################################################## diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp index a961f73d955..8be7e087b6d 100644 --- a/cpp/tests/interop/nanoarrow_utils.hpp +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -256,7 +256,8 @@ std::enable_if_t, nanoarrow::UniqueArray> get_nanoarrow_ ArrowBitmap out; ArrowBitmapInit(&out); NANOARROW_THROW_NOT_OK(ArrowBitmapResize(&out, b.size(), 1)); - std::memset(out.buffer.data, 0, out.buffer.size_bytes); + // TODO: Investigate clang-tidy issue further after nanoarrow is made compliant + std::memset(out.buffer.data, 0, out.buffer.size_bytes); // NOLINT for (size_t i = 0; i < b.size(); ++i) { ArrowBitSetTo(out.buffer.data, i, static_cast(b[i])); diff --git a/dependencies.yaml b/dependencies.yaml index ca17917c905..ff97b67f0ce 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -6,10 +6,18 @@ files: cuda: ["11.8", "12.5"] arch: [x86_64] includes: + # Note that clang-tidy is not included here because cudf's preferred + # version conflicts with the rest of RAPIDS as well as its own + # clang-format version. Until we update our clang-format version we will + # not be able to install both into the same environment. Moreover, using + # this version will break compatibility with other RAPIDS libraries that + # are still using 16.0.6, and as such will and that would break any + # unified environment like that used in unified devcontainers. - build_base - build_all - build_cpp - build_python_common + - clang_format - cuda - cuda_version - depends_on_cupy @@ -86,6 +94,16 @@ files: includes: - develop - py_version + clang_tidy: + output: none + includes: + - build_all + - build_base + - clang_tidy + - cuda + - cuda_version + - develop + - py_version docs: output: none includes: @@ -553,11 +571,21 @@ dependencies: # pre-commit requires identify minimum version 1.0, but clang-format requires textproto support and that was # added in 2.5.20, so we need to call out the minimum version needed for our plugins - identify>=2.5.20 + - output_types: conda + packages: + - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version. + clang_format: + common: - output_types: conda packages: - clang==16.0.6 - clang-tools=16.0.6 - - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version. + clang_tidy: + common: + - output_types: conda + packages: + - clang==19.1.0 + - clang-tools==19.1.0 docs: common: - output_types: [conda]