From 93c63be3126539739f65059beef0350c51d024b3 Mon Sep 17 00:00:00 2001 From: "Gao, Xiang" Date: Wed, 5 Mar 2025 16:52:06 -0800 Subject: [PATCH] Version updates on many things (#3999) --- .github/workflows/build.yml | 7 ++--- .github/workflows/lint.yml | 11 +++----- .lintrunner.toml | 2 +- CMakeLists.txt | 7 ++++- README.md | 12 +++------ csrc/utils.h | 51 +++---------------------------------- setup.py | 2 ++ tests/cpp/test_utils.cpp | 9 +------ tools/apt-install-things.sh | 20 ++++++++++++--- tools/pip-install-things.sh | 2 +- tools/setup-env.sh | 3 +++ 11 files changed, 43 insertions(+), 83 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 11e58cd776d..0909af66095 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ env: jobs: clang-build: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -30,14 +30,11 @@ jobs: tools/apt-install-things.sh & tools/pip-install-things.sh & source tools/setup-env.sh - sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13 - export CC=clang - export CXX=clang++ wait python setup.py build dynamic-type-meson: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f20b1a88ae1..a9168522e11 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,7 +15,7 @@ env: jobs: check-license: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -28,7 +28,7 @@ jobs: test ! -s missing-header-files.txt clang-tidy: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -43,9 +43,6 @@ jobs: tools/pip-install-things.sh & source tools/setup-env.sh - # clang-tidy does not work well with gcc-13 headers, remove them - sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13 - # Install lintrunner pip install lintrunner @@ -72,7 +69,7 @@ jobs: git --no-pager diff --diff-filter=d --name-only $head_commit | grep -e "csrc/.*\.cpp" -e "csrc/.*\.h" | xargs lintrunner --take CLANGTIDY --force-color lintrunner: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -90,7 +87,7 @@ jobs: lintrunner init 2> /dev/null # Install dependencies - sudo apt install -y libtinfo5 + sudo apt install -y libtinfo6 # Run lintrunner except clang-tidy lintrunner --force-color --take FLAKE8,MYPY,CLANGFORMAT,NOQA,TYPEIGNORE,NEWLINE,MYPYSTRICT,TABS,SPACES,EXEC,BLACK,TORCH_INTERNAL_ASSERT,TORCH_CHECK,C10_ERROR,TORCH_CUDA_CU_API --all-files diff --git a/.lintrunner.toml b/.lintrunner.toml index cd4b08cc9b8..7fcac6c3c4d 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -179,7 +179,7 @@ init_command = [ 'python3', 'tools/linter/adapters/pip_init.py', '--dry-run={{DRYRUN}}', - 'clang-tidy==14.0.6', + 'clang-tidy==19.1.0.1', ] command = [ 'python3', diff --git a/CMakeLists.txt b/CMakeLists.txt index 00a7b3e9155..3c7f3515d63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,11 +39,15 @@ message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}") # are footguns that should generally be avoided, because they are difficult to # target where and *only* where they are needed. See e.g.: # https://cliutils.gitlab.io/modern-cmake/chapters/intro/dodonot.html -set(NVFUSER_C_STANDARD 17 CACHE STRING "C standard to use for C code") +set(NVFUSER_C_STANDARD 20 CACHE STRING "C standard to use for C code") set(NVFUSER_CPP_STANDARD 20 CACHE STRING "C++ standard to use for C++ code") set(NVFUSER_CUDA_STANDARD 17 CACHE STRING "C++ standard to use for CUDA code") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + # TODO: gcc 11.4 has been end of life according to https://gcc.gnu.org/ + # I believe we should bump up the version below to 12.x. + # However, because gcc 11.4 is well tested and stable, let's defer this + # rejection until the day that we find a bug in gcc 11.4. if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.4) message(FATAL_ERROR "GCC < 11.4 has compiler bugs and can not compile nvFuser.") endif() @@ -342,6 +346,7 @@ set_target_properties(codegen_internal PROPERTIES CXX_STANDARD ${NVFUSER_CPP_STANDARD} CXX_STANDARD_REQUIRED ON CXX_VISIBILITY_PRESET hidden + # this is to find pip installed nvrtc.so INSTALL_RPATH "$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../../torch/lib" diff --git a/README.md b/README.md index f70fc8c091a..99cf2a89b77 100644 --- a/README.md +++ b/README.md @@ -43,20 +43,16 @@ Supported compilers: **GCC:** We support all "supported releases" of gcc as specified in [the official site](https://gcc.gnu.org/). -As of 6/20/2024, they are: +As of 3/2/2025, they are: -- gcc 11.4 - gcc 12.4 -- gcc 13.2 -- gcc 14.1 +- gcc 13.3 +- gcc 14.2 **Clang:** -- clang 14+ +- clang 16+ Supported C++ standard: -- C++17 - C++20 - -We are actively considering dropping C++17 support diff --git a/csrc/utils.h b/csrc/utils.h index 8db5bd3a3ec..c2a0111666b 100644 --- a/csrc/utils.h +++ b/csrc/utils.h @@ -626,55 +626,9 @@ using std::views::zip; #else namespace views { -#if !defined(__clang__) || (__clang_major__ > 14) -using std::views::iota; -#else -// Workaround for Clang 14 -class iota { - public: - class iterator { - public: - using value_type = int; - using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; - int64_t value; - iterator(int64_t start) : value(start) {} - int64_t operator*() const { - return value; - } - iterator& operator++() { - ++value; - return *this; - } - iterator operator++(int) { - iterator temp = *this; - ++value; - return temp; - } - template - bool operator==(T) const { - return false; - } - }; - - iterator begin() const { - return iterator(start); - } - auto end() const { - return std::unreachable_sentinel; - } - int64_t start; - iota(int64_t start) : start(start) {} -}; -#endif template -#if !defined(__clang__) || (__clang_major__ > 14) class zip_view : public std::ranges::view_interface> { -#else -// Workaround for Clang 14 -class zip_view { -#endif private: std::tuple bases; @@ -742,10 +696,11 @@ auto zip(Rs&&... rs) { } // namespace views using views::zip; -#endif +#endif // C++23 auto enumerate(auto&& range) { - return zip(views::iota((int64_t)0), std::forward(range)); + return zip( + std::views::iota((int64_t)0), std::forward(range)); } } // namespace nvfuser diff --git a/setup.py b/setup.py index ed4c7b1dfc7..49c7c4118c7 100644 --- a/setup.py +++ b/setup.py @@ -148,6 +148,8 @@ continue if arg.startswith("--cpp="): CPP_STANDARD = int(arg.split("=")[1]) + if CPP_STANDARD < 20: + raise ValueError("nvfuser requires C++20 standard or higher") continue if arg in ["clean"]: # only disables BUILD_SETUP, but keep the argument for setuptools diff --git a/tests/cpp/test_utils.cpp b/tests/cpp/test_utils.cpp index 7afe60ccf05..ae701bcdb61 100644 --- a/tests/cpp/test_utils.cpp +++ b/tests/cpp/test_utils.cpp @@ -1760,12 +1760,7 @@ TEST_F(TestCpp23BackPort, ZipDifferentWaysToSayZeroToTen) { } }; static_assert(std::input_iterator); -#if !defined(__clang__) || (__clang_major__ > 14) struct ZeroToInf : std::ranges::view_interface { -#else - // Workaround for Clang 14 - struct ZeroToInf { -#endif SetTheoreticNaturalNumber begin() { return SetTheoreticNaturalNumber(); } @@ -1773,10 +1768,8 @@ TEST_F(TestCpp23BackPort, ZipDifferentWaysToSayZeroToTen) { return std::unreachable_sentinel; } } set_theoretic_zero_to_inf; -#if !defined(__clang__) || (__clang_major__ > 14) static_assert(std::ranges::input_range); static_assert(std::ranges::view); -#endif int64_t counter = 0; auto english_it = english.begin(); @@ -1784,7 +1777,7 @@ TEST_F(TestCpp23BackPort, ZipDifferentWaysToSayZeroToTen) { zip(integer, english, set_theoretic_zero_to_inf, - views::iota((int64_t)0))) { + std::views::iota((int64_t)0))) { static_assert(std::is_same_v); static_assert(std::is_same_v); static_assert(std::is_same_v); diff --git a/tools/apt-install-things.sh b/tools/apt-install-things.sh index c303a85f3d6..c3895d7a70e 100755 --- a/tools/apt-install-things.sh +++ b/tools/apt-install-things.sh @@ -1,7 +1,19 @@ #!/bin/bash -# Install cuda -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb -sudo dpkg -i cuda-keyring_1.0-1_all.deb +set -e + +# Install cuda keyring +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update -sudo apt-get -y install ninja-build cuda-compiler-12-1 cuda-command-line-tools-12-1 cuda-libraries-dev-12-1 libnccl-dev clang-14 nlohmann-json3-dev + +# Remove some old toolchains. By default, the github action comes with multiple versions of gcc and clang installed. +# Having many versions of gcc and clang installed interfers with each other, causing weird build and clang-tidy errors. +# We only keep one version of gcc and clang in the system, and remove the rest. +sudo apt-get -y remove gcc-13 libstdc++-13-dev gcc-12 libstdc++-12-dev + +# Install the latest version of clang and gcc. +sudo apt-get -y install --reinstall clang-19 gcc-14 nlohmann-json3-dev ninja-build + +# Install minimal cuda toolkit. +sudo apt-get -y install cuda-compiler-12-8 cuda-command-line-tools-12-8 cuda-libraries-dev-12-8 libnccl-dev diff --git a/tools/pip-install-things.sh b/tools/pip-install-things.sh index d38faf501fc..096806278da 100755 --- a/tools/pip-install-things.sh +++ b/tools/pip-install-things.sh @@ -1,6 +1,6 @@ #!/bin/bash -pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 +pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 # PyTorch's has a buildin Modules_CUDA which is super old. Remove it. rm -rf $(python -c 'import torch; print(torch.__path__[0])')/share/cmake/Caffe2/Modules_CUDA_fix diff --git a/tools/setup-env.sh b/tools/setup-env.sh index ed26ab19aea..465e5547f03 100755 --- a/tools/setup-env.sh +++ b/tools/setup-env.sh @@ -3,3 +3,6 @@ export CUDACXX=/usr/local/cuda/bin/nvcc export PATH=/usr/local/cuda/bin:${PATH} export CUDA_INSTALL_PATH=/usr/local/cuda +export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda +export CC=clang-19 +export CXX=clang++-19