Skip to content

Commit

Permalink
Merge branch 'subpart' of github.com:NVIDIA/Fuser into subpart
Browse files Browse the repository at this point in the history
  • Loading branch information
zasdfgbnm committed Mar 6, 2025
2 parents 6534a23 + 24ba947 commit 1763d6b
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 83 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:

jobs:
clang-build:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -30,14 +30,11 @@ jobs:
tools/apt-install-things.sh &
tools/pip-install-things.sh &
source tools/setup-env.sh
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13
export CC=clang
export CXX=clang++
wait
python setup.py build
dynamic-type-meson:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
Expand Down
11 changes: 4 additions & 7 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env:

jobs:
check-license:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -28,7 +28,7 @@ jobs:
test ! -s missing-header-files.txt
clang-tidy:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -43,9 +43,6 @@ jobs:
tools/pip-install-things.sh &
source tools/setup-env.sh
# clang-tidy does not work well with gcc-13 headers, remove them
sudo rm -rf /usr/lib/gcc/x86_64-linux-gnu/13
# Install lintrunner
pip install lintrunner
Expand All @@ -72,7 +69,7 @@ jobs:
git --no-pager diff --diff-filter=d --name-only $head_commit | grep -e "csrc/.*\.cpp" -e "csrc/.*\.h" | xargs lintrunner --take CLANGTIDY --force-color
lintrunner:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -90,7 +87,7 @@ jobs:
lintrunner init 2> /dev/null
# Install dependencies
sudo apt install -y libtinfo5
sudo apt install -y libtinfo6
# Run lintrunner except clang-tidy
lintrunner --force-color --take FLAKE8,MYPY,CLANGFORMAT,NOQA,TYPEIGNORE,NEWLINE,MYPYSTRICT,TABS,SPACES,EXEC,BLACK,TORCH_INTERNAL_ASSERT,TORCH_CHECK,C10_ERROR,TORCH_CUDA_CU_API --all-files
2 changes: 1 addition & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'clang-tidy==14.0.6',
'clang-tidy==19.1.0.1',
]
command = [
'python3',
Expand Down
7 changes: 6 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}")
# are footguns that should generally be avoided, because they are difficult to
# target where and *only* where they are needed. See e.g.:
# https://cliutils.gitlab.io/modern-cmake/chapters/intro/dodonot.html
set(NVFUSER_C_STANDARD 17 CACHE STRING "C standard to use for C code")
set(NVFUSER_C_STANDARD 20 CACHE STRING "C standard to use for C code")
set(NVFUSER_CPP_STANDARD 20 CACHE STRING "C++ standard to use for C++ code")
set(NVFUSER_CUDA_STANDARD 17 CACHE STRING "C++ standard to use for CUDA code")

if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# TODO: gcc 11.4 has been end of life according to https://gcc.gnu.org/
# I believe we should bump up the version below to 12.x.
# However, because gcc 11.4 is well tested and stable, let's defer this
# rejection until the day that we find a bug in gcc 11.4.
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.4)
message(FATAL_ERROR "GCC < 11.4 has compiler bugs and can not compile nvFuser.")
endif()
Expand Down Expand Up @@ -342,6 +346,7 @@ set_target_properties(codegen_internal PROPERTIES
CXX_STANDARD ${NVFUSER_CPP_STANDARD}
CXX_STANDARD_REQUIRED ON
CXX_VISIBILITY_PRESET hidden

# this is to find pip installed nvrtc.so
INSTALL_RPATH
"$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../../torch/lib"
Expand Down
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,16 @@ Supported compilers:
**GCC:**

We support all "supported releases" of gcc as specified in [the official site](https://gcc.gnu.org/).
As of 6/20/2024, they are:
As of 3/2/2025, they are:

- gcc 11.4
- gcc 12.4
- gcc 13.2
- gcc 14.1
- gcc 13.3
- gcc 14.2

**Clang:**

- clang 14+
- clang 16+

Supported C++ standard:

- C++17
- C++20

We are actively considering dropping C++17 support
51 changes: 3 additions & 48 deletions csrc/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,55 +626,9 @@ using std::views::zip;
#else

namespace views {
#if !defined(__clang__) || (__clang_major__ > 14)
using std::views::iota;
#else
// Workaround for Clang 14
class iota {
public:
class iterator {
public:
using value_type = int;
using difference_type = std::ptrdiff_t;
using iterator_category = std::input_iterator_tag;
int64_t value;
iterator(int64_t start) : value(start) {}
int64_t operator*() const {
return value;
}
iterator& operator++() {
++value;
return *this;
}
iterator operator++(int) {
iterator temp = *this;
++value;
return temp;
}
template <typename T>
bool operator==(T) const {
return false;
}
};

iterator begin() const {
return iterator(start);
}
auto end() const {
return std::unreachable_sentinel;
}
int64_t start;
iota(int64_t start) : start(start) {}
};
#endif

template <std::ranges::input_range... Rs>
#if !defined(__clang__) || (__clang_major__ > 14)
class zip_view : public std::ranges::view_interface<zip_view<Rs...>> {
#else
// Workaround for Clang 14
class zip_view {
#endif
private:
std::tuple<Rs...> bases;

Expand Down Expand Up @@ -742,10 +696,11 @@ auto zip(Rs&&... rs) {
} // namespace views
using views::zip;

#endif
#endif // C++23

auto enumerate(auto&& range) {
return zip(views::iota((int64_t)0), std::forward<decltype(range)>(range));
return zip(
std::views::iota((int64_t)0), std::forward<decltype(range)>(range));
}

} // namespace nvfuser
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@
continue
if arg.startswith("--cpp="):
CPP_STANDARD = int(arg.split("=")[1])
if CPP_STANDARD < 20:
raise ValueError("nvfuser requires C++20 standard or higher")
continue
if arg in ["clean"]:
# only disables BUILD_SETUP, but keep the argument for setuptools
Expand Down
9 changes: 1 addition & 8 deletions tests/cpp/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,31 +1760,24 @@ TEST_F(TestCpp23BackPort, ZipDifferentWaysToSayZeroToTen) {
}
};
static_assert(std::input_iterator<SetTheoreticNaturalNumber>);
#if !defined(__clang__) || (__clang_major__ > 14)
struct ZeroToInf : std::ranges::view_interface<ZeroToInf> {
#else
// Workaround for Clang 14
struct ZeroToInf {
#endif
SetTheoreticNaturalNumber begin() {
return SetTheoreticNaturalNumber();
}
auto end() {
return std::unreachable_sentinel;
}
} set_theoretic_zero_to_inf;
#if !defined(__clang__) || (__clang_major__ > 14)
static_assert(std::ranges::input_range<ZeroToInf>);
static_assert(std::ranges::view<ZeroToInf>);
#endif

int64_t counter = 0;
auto english_it = english.begin();
for (auto&& [i, e, s, iota] :
zip(integer,
english,
set_theoretic_zero_to_inf,
views::iota((int64_t)0))) {
std::views::iota((int64_t)0))) {
static_assert(std::is_same_v<decltype(i), int64_t&>);
static_assert(std::is_same_v<decltype(e), std::string&>);
static_assert(std::is_same_v<decltype(s), SetTheoreticNaturalNumber>);
Expand Down
20 changes: 16 additions & 4 deletions tools/apt-install-things.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
#!/bin/bash

# Install cuda
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
set -e

# Install cuda keyring
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install ninja-build cuda-compiler-12-1 cuda-command-line-tools-12-1 cuda-libraries-dev-12-1 libnccl-dev clang-14 nlohmann-json3-dev

# Remove some old toolchains. By default, the github action comes with multiple versions of gcc and clang installed.
# Having many versions of gcc and clang installed interfers with each other, causing weird build and clang-tidy errors.
# We only keep one version of gcc and clang in the system, and remove the rest.
sudo apt-get -y remove gcc-13 libstdc++-13-dev gcc-12 libstdc++-12-dev

# Install the latest version of clang and gcc.
sudo apt-get -y install --reinstall clang-19 gcc-14 nlohmann-json3-dev ninja-build

# Install minimal cuda toolkit.
sudo apt-get -y install cuda-compiler-12-8 cuda-command-line-tools-12-8 cuda-libraries-dev-12-8 libnccl-dev
2 changes: 1 addition & 1 deletion tools/pip-install-things.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128

# PyTorch's has a buildin Modules_CUDA which is super old. Remove it.
rm -rf $(python -c 'import torch; print(torch.__path__[0])')/share/cmake/Caffe2/Modules_CUDA_fix
Expand Down
3 changes: 3 additions & 0 deletions tools/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
export CUDACXX=/usr/local/cuda/bin/nvcc
export PATH=/usr/local/cuda/bin:${PATH}
export CUDA_INSTALL_PATH=/usr/local/cuda
export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda
export CC=clang-19
export CXX=clang++-19

0 comments on commit 1763d6b

Please sign in to comment.