diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1275aad757c..ad3f5940b94 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -63,7 +63,8 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} - run_script: "ci/clang_tidy.sh" + run_script: "ci/cpp_linters.sh" + file_to_upload: iwyu_results.txt conda-python-cudf-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh deleted file mode 100755 index 4d5d3fc3136..00000000000 --- a/ci/clang_tidy.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, NVIDIA CORPORATION. - -set -euo pipefail - -rapids-logger "Create clang-tidy conda environment" -. /opt/conda/etc/profile.d/conda.sh - -ENV_YAML_DIR="$(mktemp -d)" - -rapids-dependency-file-generator \ - --output conda \ - --file-key clang_tidy \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" - -rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy - -# Temporarily allow unbound variables for conda activation. -set +u -conda activate clang_tidy -set -u - -RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" - -source rapids-configure-sccache - -# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled. -cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja -cmake --build cpp/build diff --git a/ci/cpp_linters.sh b/ci/cpp_linters.sh new file mode 100755 index 00000000000..a7c7255456f --- /dev/null +++ b/ci/cpp_linters.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create checks conda environment" +. /opt/conda/etc/profile.d/conda.sh + +ENV_YAML_DIR="$(mktemp -d)" + +rapids-dependency-file-generator \ + --output conda \ + --file-key clang_tidy \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" + +rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate clang_tidy +set -u + +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + +source rapids-configure-sccache + +# TODO: For testing purposes, clone and build IWYU. We can switch to a release +# once a clang 19-compatible version is available, which should be soon +# (https://github.com/include-what-you-use/include-what-you-use/issues/1641). +git clone --depth 1 https://github.com/include-what-you-use/include-what-you-use.git +pushd include-what-you-use +# IWYU's CMake build uses some Python scripts that assume that the cwd is +# importable, so support that legacy behavior. +export PYTHONPATH=${PWD}:${PYTHONPATH:-} +cmake -S . -B build -GNinja --install-prefix=${CONDA_PREFIX} +cmake --build build +cmake --install build +popd + +# Run the build via CMake, which will run clang-tidy when CUDF_STATIC_LINTERS is enabled. +cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_STATIC_LINTERS=ON -GNinja +cmake --build cpp/build 2>&1 | python cpp/scripts/parse_iwyu_output.py + +# Remove invalid components of the path for local usage. The path below is +# valid in the CI due to where the project is cloned, but presumably the fixes +# will be applied locally from inside a clone of cudf. +sed -i 's/\/__w\/cudf\/cudf\///' iwyu_results.txt diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index 12120a5c6d1..0e5699876fc 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -39,8 +39,8 @@ Checks: -clang-analyzer-optin.core.EnumCastOutOfRange, -clang-analyzer-optin.cplusplus.UninitializedObject' -WarningsAsErrors: '*' -HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*' +WarningsAsErrors: '' +HeaderFilterRegex: '.*cudf/cpp/(src|include).*' ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*' FormatStyle: none CheckOptions: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bfa4bf80724..d3bf7019e35 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -88,7 +88,7 @@ option( ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL} ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) -option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF) +option(CUDF_STATIC_LINTERS "Enable static linters during compilation" OFF) message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") @@ -146,8 +146,10 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR) endif() # ################################################################################################## -# * clang-tidy configuration ---------------------------------------------------------------------- -if(CUDF_CLANG_TIDY) +# * linter configuration --------------------------------------------------------------------------- +if(CUDF_STATIC_LINTERS) + # For simplicity, for now we assume that all linters can be installed into an environment where + # any linter is being run. We could relax this requirement if desired. find_program( CLANG_TIDY_EXE NAMES "clang-tidy" @@ -174,24 +176,48 @@ if(CUDF_CLANG_TIDY) "clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}" ) endif() + + find_program(IWYU_EXE NAMES include-what-you-use iwyu REQUIRED) endif() # Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES. -function(enable_clang_tidy target) - set(_tidy_options) +function(enable_static_checkers target) + set(_tidy_options IWYU CLANG_TIDY) set(_tidy_one_value) set(_tidy_multi_value SKIPPED_FILES) cmake_parse_arguments( - _TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN} + _LINT "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN} ) - if(CUDF_CLANG_TIDY) - # clang will complain about unused link libraries on the compile line unless we specify - # -Qunused-arguments. - set_target_properties( - ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments" - ) - foreach(file IN LISTS _TIDY_SKIPPED_FILES) + if(CUDF_STATIC_LINTERS) + if(_LINT_CLANG_TIDY) + # clang will complain about unused link libraries on the compile line unless we specify + # -Qunused-arguments. + set_target_properties( + ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments" + ) + endif() + if(_LINT_IWYU) + # A few extra warnings pop up when building with IWYU. I'm not sure why, but they are not + # relevant since they don't show up in any other build so it's better to suppress them until + # we can figure out the cause. Setting this as part of CXX_INCLUDE_WHAT_YOU_USE does not + # appear to be sufficient, we must also ensure that it is set to the underlying target's CXX + # compile flags. To do this completely cleanly we should modify the flags on the target rather + # than the global CUDF_CXX_FLAGS, but this solution is good enough for now since we never run + # the linters on real builds. + foreach(_flag -Wno-missing-braces -Wno-absolute-value -Wunneeded-internal-declaration) + list(FIND CUDF_CXX_FLAGS "${flag}" _flag_index) + if(_flag_index EQUAL -1) + list(APPEND CUDF_CXX_FLAGS ${flag}) + endif() + endforeach() + set(CUDF_CXX_FLAGS + "${CUDF_CXX_FLAGS}" + PARENT_SCOPE + ) + set_target_properties(${target} PROPERTIES CXX_INCLUDE_WHAT_YOU_USE "${IWYU_EXE}") + endif() + foreach(file IN LISTS _LINT_SKIPPED_FILES) set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON) endforeach() endif() @@ -771,11 +797,15 @@ set_target_properties( INTERFACE_POSITION_INDEPENDENT_CODE ON ) +# Note: This must come before the target_compile_options below so that the function can modify the +# flags if necessary. +enable_static_checkers( + cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp CLANG_TIDY IWYU +) target_compile_options( cudf PRIVATE "$<$:${CUDF_CXX_FLAGS}>" "$<$:${CUDF_CUDA_FLAGS}>" ) -enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp) if(CUDF_BUILD_STACKTRACE_DEBUG) # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option diff --git a/cpp/scripts/parse_iwyu_output.py b/cpp/scripts/parse_iwyu_output.py new file mode 100644 index 00000000000..822a980a1a8 --- /dev/null +++ b/cpp/scripts/parse_iwyu_output.py @@ -0,0 +1,170 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Helper script to modify IWYU output to only include removals. + +Lines that are not from include-what-you-use are removed from the output. +""" + +import argparse +import re +from enum import Enum + + +class Mode(Enum): + NORMAL = 0 + ADD = 1 + REMOVE = 2 + FULL_INCLUDE_LIST = 3 + + +def extract_include_file(include_line): + """Extract the core file path from an #include directive.""" + match = re.search(r'#include\s+[<"]([^">]+)[">]', include_line) + if match: + return match.group(1) + return None + + +def parse_output(input_stream): + include_modifications = {} + current_file = None + mode = Mode.NORMAL + + for line in input_stream: + if match := re.match(r"(\/\S+) should add these lines:", line): + current_file = match.group(1) + include_modifications.setdefault( + current_file, + { + "add_includes": [], + "remove_includes": [], + "full_include_list": [], + }, + ) + mode = Mode.ADD + elif match := re.match(r"(\/\S+) should remove these lines:", line): + mode = Mode.REMOVE + elif match := re.match(r"The full include-list for (\/\S+):", line): + mode = Mode.FULL_INCLUDE_LIST + elif line.strip() == "---": + current_file = None + mode = Mode.NORMAL + else: + if current_file: + if mode == Mode.ADD: + include_modifications[current_file]["add_includes"].append( + line.strip() + ) + elif mode == Mode.REMOVE: + include_modifications[current_file][ + "remove_includes" + ].append(line.strip()) + elif mode == Mode.FULL_INCLUDE_LIST: + include_modifications[current_file][ + "full_include_list" + ].append(line.strip()) + else: + if ( + line.strip() + and "include-what-you-use reported diagnostics" not in line + and "In file included from" not in line + and "has correct #includes/fwd-decls" not in line + ): + print(line, end="") + + return include_modifications + + +def post_process_includes(include_modifications): + """Deduplicate and remove redundant entries from add and remove includes.""" + for mods in include_modifications.values(): + # Deduplicate add_includes and remove_includes + mods["add_includes"] = list(set(mods["add_includes"])) + mods["remove_includes"] = list(set(mods["remove_includes"])) + + # Extract file paths from add_includes and remove_includes + add_files = { + extract_include_file(line) for line in mods["add_includes"] + } + remove_files = { + extract_include_file(line) for line in mods["remove_includes"] + } + + # Remove entries that exist in both add_includes and remove_includes + common_files = add_files & remove_files + mods["add_includes"] = [ + line + for line in mods["add_includes"] + if extract_include_file(line) not in common_files + ] + mods["remove_includes"] = [ + line + for line in mods["remove_includes"] + if extract_include_file(line) not in common_files + ] + + # Remove entries that exist in add_includes from full_include_list + mods["full_include_list"] = [ + include + for include in mods["full_include_list"] + if extract_include_file(include) not in add_files + ] + + +def write_output(include_modifications, output_stream): + for filename, mods in include_modifications.items(): + if mods["remove_includes"]: + # IWYU requires all sections to exist, so we write out this header even + # though we never write out any actual additions. + output_stream.write(f"{filename} should add these lines:\n\n") + + output_stream.write(f"{filename} should remove these lines:\n") + for line in mods["remove_includes"]: + output_stream.write(line + "\n") + output_stream.write("\n") + + output_stream.write(f"The full include-list for {filename}:\n") + for line in mods["full_include_list"]: + output_stream.write(line + "\n") + output_stream.write("---\n") + + +def main(): + parser = argparse.ArgumentParser( + description="Process include modifications from a build output log." + ) + parser.add_argument( + "input", + nargs="?", + type=argparse.FileType("r"), + default="-", + help="Input file to read (default: stdin)", + ) + parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="iwyu_results.txt", + help="Output file to write (default: iwyu_output.txt)", + ) + args = parser.parse_args() + + include_modifications = parse_output(args.input) + post_process_includes(include_modifications) + write_output(include_modifications, args.output) + + +if __name__ == "__main__": + main() diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e9ba58ba224..f502195aea4 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -83,7 +83,6 @@ function(ConfigureTest CMAKE_TEST_NAME) "GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$" ) endif() - enable_clang_tidy(${CMAKE_TEST_NAME}) endfunction() # ################################################################################################## diff --git a/dependencies.yaml b/dependencies.yaml index 93213172445..59f8f2fda49 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -587,6 +587,12 @@ dependencies: packages: - clang==19.1.0 - clang-tools==19.1.0 + # TODO: These are build requirements for IWYU and can be replaced + # with IWYU itself once a conda package of IWYU supporting clang 19 + # is available. + - clangdev==19.1.0 + - llvm==19.1.0 + - llvmdev==19.1.0 docs: common: - output_types: [conda]