Merge branch 'subpart' of github.com:NVIDIA/Fuser into subpart

NVIDIA · Mar 6, 2025 · 1763d6b · 1763d6b
2 parents 6534a23 + 24ba947
commit 1763d6b
Show file tree

Hide file tree

Showing 11 changed files with 43 additions and 83 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -16,7 +16,7 @@ env:
 
 jobs:
   clang-build:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:
@@ -30,14 +30,11 @@ jobs:
           tools/apt-install-things.sh &
           tools/pip-install-things.sh &
           source tools/setup-env.sh
-          sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13
-          export CC=clang
-          export CXX=clang++
           wait
           python setup.py build
 
   dynamic-type-meson:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -15,7 +15,7 @@ env:
 
 jobs:
   check-license:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:
@@ -28,7 +28,7 @@ jobs:
           test ! -s missing-header-files.txt
 
   clang-tidy:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:
@@ -43,9 +43,6 @@ jobs:
           tools/pip-install-things.sh &
           source tools/setup-env.sh
 
-          # clang-tidy does not work well with gcc-13 headers, remove them
-          sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13
-
           # Install lintrunner
           pip install lintrunner
 
@@ -72,7 +69,7 @@ jobs:
           git --no-pager diff --diff-filter=d --name-only $head_commit | grep -e "csrc/.*\.cpp" -e "csrc/.*\.h" | xargs lintrunner --take CLANGTIDY --force-color
 
   lintrunner:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:
@@ -90,7 +87,7 @@ jobs:
           lintrunner init 2> /dev/null
 
           # Install dependencies
-          sudo apt install -y libtinfo5
+          sudo apt install -y libtinfo6
 
           # Run lintrunner except clang-tidy
           lintrunner --force-color --take FLAKE8,MYPY,CLANGFORMAT,NOQA,TYPEIGNORE,NEWLINE,MYPYSTRICT,TABS,SPACES,EXEC,BLACK,TORCH_INTERNAL_ASSERT,TORCH_CHECK,C10_ERROR,TORCH_CUDA_CU_API --all-files
diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -179,7 +179,7 @@ init_command = [
     'python3',
     'tools/linter/adapters/pip_init.py',
     '--dry-run={{DRYRUN}}',
-    'clang-tidy==14.0.6',
+    'clang-tidy==19.1.0.1',
 ]
 command = [
     'python3',

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -39,11 +39,15 @@ message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}")
 # are footguns that should generally be avoided, because they are difficult to
 # target where and *only* where they are needed. See e.g.:
 # https://cliutils.gitlab.io/modern-cmake/chapters/intro/dodonot.html
-set(NVFUSER_C_STANDARD 17 CACHE STRING "C standard to use for C code")
+set(NVFUSER_C_STANDARD 20 CACHE STRING "C standard to use for C code")
 set(NVFUSER_CPP_STANDARD 20 CACHE STRING "C++ standard to use for C++ code")
 set(NVFUSER_CUDA_STANDARD 17 CACHE STRING "C++ standard to use for CUDA code")
 
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+  # TODO: gcc 11.4 has been end of life according to https://gcc.gnu.org/
+  # I believe we should bump up the version below to 12.x.
+  # However, because gcc 11.4 is well tested and stable, let's defer this
+  # rejection until the day that we find a bug in gcc 11.4.
   if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.4)
     message(FATAL_ERROR "GCC < 11.4 has compiler bugs and can not compile nvFuser.")
   endif()
@@ -342,6 +346,7 @@ set_target_properties(codegen_internal PROPERTIES
   CXX_STANDARD ${NVFUSER_CPP_STANDARD}
   CXX_STANDARD_REQUIRED ON
   CXX_VISIBILITY_PRESET hidden
+
   # this is to find pip installed nvrtc.so
   INSTALL_RPATH
   "$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../../torch/lib"

diff --git a/README.md b/README.md
@@ -43,20 +43,16 @@ Supported compilers:
 **GCC:**
 
 We support all "supported releases" of gcc as specified in [the official site](https://gcc.gnu.org/).
-As of 6/20/2024, they are:
+As of 3/2/2025, they are:
 
-- gcc 11.4
 - gcc 12.4
-- gcc 13.2
-- gcc 14.1
+- gcc 13.3
+- gcc 14.2
 
 **Clang:**
 
-- clang 14+
+- clang 16+
 
 Supported C++ standard:
 
-- C++17
 - C++20
-
-We are actively considering dropping C++17 support
diff --git a/csrc/utils.h b/csrc/utils.h
@@ -626,55 +626,9 @@ using std::views::zip;
 #else
 
 namespace views {
-#if !defined(__clang__) || (__clang_major__ > 14)
-using std::views::iota;
-#else
-// Workaround for Clang 14
-class iota {
- public:
-  class iterator {
-   public:
-    using value_type = int;
-    using difference_type = std::ptrdiff_t;
-    using iterator_category = std::input_iterator_tag;
-    int64_t value;
-    iterator(int64_t start) : value(start) {}
-    int64_t operator*() const {
-      return value;
-    }
-    iterator& operator++() {
-      ++value;
-      return *this;
-    }
-    iterator operator++(int) {
-      iterator temp = *this;
-      ++value;
-      return temp;
-    }
-    template <typename T>
-    bool operator==(T) const {
-      return false;
-    }
-  };
-
-  iterator begin() const {
-    return iterator(start);
-  }
-  auto end() const {
-    return std::unreachable_sentinel;
-  }
-  int64_t start;
-  iota(int64_t start) : start(start) {}
-};
-#endif
 
 template <std::ranges::input_range... Rs>
-#if !defined(__clang__) || (__clang_major__ > 14)
 class zip_view : public std::ranges::view_interface<zip_view<Rs...>> {
-#else
-// Workaround for Clang 14
-class zip_view {
-#endif
  private:
   std::tuple<Rs...> bases;
 
@@ -742,10 +696,11 @@ auto zip(Rs&&... rs) {
 } // namespace views
 using views::zip;
 
-#endif
+#endif // C++23
 
 auto enumerate(auto&& range) {
-  return zip(views::iota((int64_t)0), std::forward<decltype(range)>(range));
+  return zip(
+      std::views::iota((int64_t)0), std::forward<decltype(range)>(range));
 }
 
 } // namespace nvfuser
diff --git a/setup.py b/setup.py
@@ -148,6 +148,8 @@
         continue
     if arg.startswith("--cpp="):
         CPP_STANDARD = int(arg.split("=")[1])
+        if CPP_STANDARD < 20:
+            raise ValueError("nvfuser requires C++20 standard or higher")
         continue
     if arg in ["clean"]:
         # only disables BUILD_SETUP, but keep the argument for setuptools

diff --git a/tests/cpp/test_utils.cpp b/tests/cpp/test_utils.cpp
@@ -1760,31 +1760,24 @@ TEST_F(TestCpp23BackPort, ZipDifferentWaysToSayZeroToTen) {
     }
   };
   static_assert(std::input_iterator<SetTheoreticNaturalNumber>);
-#if !defined(__clang__) || (__clang_major__ > 14)
   struct ZeroToInf : std::ranges::view_interface<ZeroToInf> {
-#else
-  // Workaround for Clang 14
-  struct ZeroToInf {
-#endif
     SetTheoreticNaturalNumber begin() {
       return SetTheoreticNaturalNumber();
     }
     auto end() {
       return std::unreachable_sentinel;
     }
   } set_theoretic_zero_to_inf;
-#if !defined(__clang__) || (__clang_major__ > 14)
   static_assert(std::ranges::input_range<ZeroToInf>);
   static_assert(std::ranges::view<ZeroToInf>);
-#endif
 
   int64_t counter = 0;
   auto english_it = english.begin();
   for (auto&& [i, e, s, iota] :
        zip(integer,
            english,
            set_theoretic_zero_to_inf,
-           views::iota((int64_t)0))) {
+           std::views::iota((int64_t)0))) {
     static_assert(std::is_same_v<decltype(i), int64_t&>);
     static_assert(std::is_same_v<decltype(e), std::string&>);
     static_assert(std::is_same_v<decltype(s), SetTheoreticNaturalNumber>);

diff --git a/tools/apt-install-things.sh b/tools/apt-install-things.sh
@@ -1,7 +1,19 @@
 #!/bin/bash
 
-# Install cuda
-wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
-sudo dpkg -i cuda-keyring_1.0-1_all.deb
+set -e
+
+# Install cuda keyring
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
 sudo apt-get update
-sudo apt-get -y install ninja-build cuda-compiler-12-1 cuda-command-line-tools-12-1 cuda-libraries-dev-12-1 libnccl-dev clang-14 nlohmann-json3-dev
+
+# Remove some old toolchains. By default, the github action comes with multiple versions of gcc and clang installed.
+# Having many versions of gcc and clang installed interfers with each other, causing weird build and clang-tidy errors.
+# We only keep one version of gcc and clang in the system, and remove the rest.
+sudo apt-get -y remove gcc-13 libstdc++-13-dev gcc-12 libstdc++-12-dev
+
+# Install the latest version of clang and gcc.
+sudo apt-get -y install --reinstall clang-19 gcc-14 nlohmann-json3-dev ninja-build
+
+# Install minimal cuda toolkit.
+sudo apt-get -y install cuda-compiler-12-8 cuda-command-line-tools-12-8 cuda-libraries-dev-12-8 libnccl-dev
diff --git a/tools/pip-install-things.sh b/tools/pip-install-things.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
+pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128
 
 # PyTorch's has a buildin Modules_CUDA which is super old. Remove it.
 rm -rf $(python -c 'import torch; print(torch.__path__[0])')/share/cmake/Caffe2/Modules_CUDA_fix

diff --git a/tools/setup-env.sh b/tools/setup-env.sh
@@ -3,3 +3,6 @@
 export CUDACXX=/usr/local/cuda/bin/nvcc
 export PATH=/usr/local/cuda/bin:${PATH}
 export CUDA_INSTALL_PATH=/usr/local/cuda
+export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda
+export CC=clang-19
+export CXX=clang++-19