[riscv] Update pytorch to version 2.4.0 (#9293)

* Update pytorch to version 2.3.1 * Explicitly set TORCH_CUDA_ARCH_LIST in packages depending on pytorch * Disable ROCm; patch for missing braces is still needed * Fix one more instance of missing braces * Update fmt to 10.2.1 * Update patch * Update pytorch to 2.4.0 which has riscv support * Changes from review * Update patch for v2.4.0 * Update patch --------- Co-authored-by: Malik Shahzad Muzaffar <[email protected]>
cms-sw · Jul 25, 2024 · e2ad566 · e2ad566
1 parent 4e6b81a
commit e2ad566
Show file tree

Hide file tree

Showing 7 changed files with 117 additions and 33 deletions.
diff --git a/fmt.spec b/fmt.spec
@@ -1,4 +1,4 @@
-### RPM external fmt 8.0.1
+### RPM external fmt 10.2.1
 ## INCLUDE compilation_flags
 Source: https://github.com/fmtlib/fmt/archive/%{realversion}.tar.gz
 BuildRequires: gmake cmake

diff --git a/pytorch-cluster.spec b/pytorch-cluster.spec
@@ -1,6 +1,8 @@
 ### RPM external pytorch-cluster 1.6.3
 ## INCLUDE compilation_flags
 ## INCLUDE cpp-standard
+## INCLUDE cuda-flags
+
 %define tag f2d99195a0003ca2d2ba9ed50d0117e2f23360e0
 %define branch master
 %define github_user rusty1s
@@ -9,19 +11,26 @@ Source: git+https://github.com/%{github_user}/pytorch_cluster.git?obj=%{branch}/
 
 BuildRequires: cmake
 Requires: pytorch
-%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags} 
+%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags}
+%define cuda_arch_float $(echo %{cuda_arch} | tr ' ' '\\n' | sed -E 's|([0-9])$|.\\1|' | tr '\\n' ' ')
 
 %prep
 %setup -n %{n}-%{realversion}
 # Make sure the default c++sdt stand is c++14
 grep -q 'CMAKE_CXX_STANDARD  *14' CMakeLists.txt
 sed -i -e 's|CMAKE_CXX_STANDARD  *14|CMAKE_CXX_STANDARD %{cms_cxx_standard}|' CMakeLists.txt
 
+USE_CUDA=OFF
+%if 0%{!?without_cuda:1}
+if [ "%{cuda_gcc_support}" = "true" ] ; then
+USE_CUDA=ON
+fi
+%endif
+
 %build
 
 rm -rf ../build && mkdir ../build && cd ../build
 
-
 cmake ../%{n}-%{realversion} \
     -DCMAKE_BUILD_TYPE=Release \
     -DCMAKE_INSTALL_PREFIX=%{i} \
@@ -33,7 +42,11 @@ cmake ../%{n}-%{realversion} \
     -DWITH_PYTHON=OFF \
     -DWITH_CUDA=OFF \
     -DBUILD_TEST=OFF \
-    -DBUILD_SHARED_LIBS=ON 
+%if 0%{!?without_cuda:1}
+    -DUSE_CUDA=${USE_CUDA} \
+    -DTORCH_CUDA_ARCH_LIST="%{cuda_arch_float}" \
+%endif
+    -DBUILD_SHARED_LIBS=ON
 
 
 make %{makeprocesses} VERBOSE=1

diff --git a/pytorch-missing-braces.patch b/pytorch-missing-braces.patch
@@ -1,8 +1,8 @@
 diff --git a/torch/csrc/profiler/events.h b/torch/csrc/profiler/events.h
-index a1a956f1327..a4e02c795d9 100644
+index 78bac1fea19..f08683223f0 100644
 --- a/torch/csrc/profiler/events.h
 +++ b/torch/csrc/profiler/events.h
-@@ -11,7 +11,7 @@ namespace profiler {
+@@ -11,7 +11,7 @@ namespace torch::profiler {
  using perf_counters_t = std::vector<uint64_t>;
 
  /* Standard list of performance events independent of hardware or backend */
@@ -11,11 +11,60 @@ index a1a956f1327..a4e02c795d9 100644
      /*
       * Number of Processing Elelement (PE) cycles between two points of interest
       * in time. This should correlate positively with wall-time. Measured in
-@@ -25,6 +25,6 @@ constexpr std::array<const char*, 2> ProfilerPerfEvents = {
+@@ -25,5 +25,5 @@ constexpr std::array<const char*, 2> ProfilerPerfEvents = {
       * (i.e. work). Across repeat executions, the number of instructions should
       * be more or less invariant. Measured in uint64_t. PE can be non cpu.
       */
 -    "instructions"};
-+    "instructions" }};
- } // namespace profiler
- } // namespace torch
++    "instructions"}};
+ } // namespace torch::profiler
+diff --git a/c10/util/typeid.h b/c10/util/typeid.h
+index 2c6ac38882f..d165059a956 100644
+--- a/c10/util/typeid.h
++++ b/c10/util/typeid.h
+@@ -303,12 +303,12 @@ class _Uninitialized final {};
+ //
+
+ // item sizes for TypeMeta::itemsize() fast path
+-static constexpr std::array<uint8_t, NumScalarTypes> scalarTypeItemSizes = {
++static constexpr std::array<uint8_t, NumScalarTypes> scalarTypeItemSizes = {{
+ #define SCALAR_TYPE_SIZE(T, name) sizeof(T),
+     AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SCALAR_TYPE_SIZE)
+ #undef SCALAR_TYPE_SIZE
+         0, // Undefined
+-};
++}};
+
+ /**
+  * TypeMeta is a thin class that allows us to store the type of a container such
+--- a/c10/util/order_preserving_flat_hash_map.h
++++ b/c10/util/order_preserving_flat_hash_map.h
+@@ -177,11 +177,11 @@ struct sherwood_v3_entry {
+ };
+
+ inline int8_t log2(uint64_t value) {
+-  static constexpr std::array<int8_t, 64> table = {
++  static constexpr std::array<int8_t, 64> table = {{
+       63, 0,  58, 1,  59, 47, 53, 2,  60, 39, 48, 27, 54, 33, 42, 3,
+       61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4,
+       62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21,
+-      56, 45, 25, 31, 35, 16, 9,  12, 44, 24, 15, 8,  23, 7,  6,  5};
++      56, 45, 25, 31, 35, 16, 9,  12, 44, 24, 15, 8,  23, 7,  6,  5}};
+   value |= value >> 1;
+   value |= value >> 2;
+   value |= value >> 4;
+--- a/torch/csrc/jit/runtime/operator.h
++++ b/torch/csrc/jit/runtime/operator.h
+@@ -35,8 +35,8 @@ using ::c10::Symbol;
+ using OperationCreator = Operation (*)(const Node*);
+
+ namespace {
+-const std::array<at::Tag, 1> kJitOnlyOperatorTags = {
+-    at::Tag::pt2_compliant_tag};
++const std::array<at::Tag, 1> kJitOnlyOperatorTags = {{
++    at::Tag::pt2_compliant_tag}};
+ }
+
+ /*
+
+
diff --git a/pytorch-scatter.spec b/pytorch-scatter.spec
@@ -1,6 +1,7 @@
 ### RPM external pytorch-scatter 2.1.2
 ## INCLUDE compilation_flags
 ## INCLUDE cpp-standard
+## INCLUDE cuda-flags
 %define tag c095c62e4334fcd05e4ac3c4bb09d285960d6be6
 %define branch master
 %define github_user rusty1s
@@ -9,14 +10,22 @@ Source: git+https://github.com/%{github_user}/pytorch_scatter.git?obj=%{branch}/
 
 BuildRequires: cmake
 Requires: pytorch
-%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags} 
+%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags}
+%define cuda_arch_float $(echo %{cuda_arch} | tr ' ' '\\n' | sed -E 's|([0-9])$|.\\1|' | tr '\\n' ' ')
 
 %prep
 %setup -n %{n}-%{realversion}
 # Make sure the default c++sdt stand is c++14
 grep -q 'CMAKE_CXX_STANDARD  *14' CMakeLists.txt
 sed -i -e 's|CMAKE_CXX_STANDARD  *14|CMAKE_CXX_STANDARD %{cms_cxx_standard}|' CMakeLists.txt
 
+USE_CUDA=OFF
+%if "%{cmsos}" != "slc7_aarch64"
+if [ "%{cuda_gcc_support}" = "true" ] ; then
+USE_CUDA=%{!?without_cuda:ON}
+fi
+%endif
+
 %build
 
 rm -rf ../build && mkdir ../build && cd ../build
@@ -33,7 +42,11 @@ cmake ../%{n}-%{realversion} \
     -DWITH_PYTHON=OFF \
     -DWITH_CUDA=OFF \
     -DBUILD_TEST=OFF \
-    -DBUILD_SHARED_LIBS=ON 
+%if 0%{!?without_cuda:1}
+    -DUSE_CUDA=${USE_CUDA} \
+    -DTORCH_CUDA_ARCH_LIST="%{cuda_arch_float}" \
+%endif
+    -DBUILD_SHARED_LIBS=ON
 
 
 make %{makeprocesses} VERBOSE=1

diff --git a/pytorch-sparse.spec b/pytorch-sparse.spec
@@ -1,6 +1,8 @@
 ### RPM external pytorch-sparse 0.6.18
 ## INCLUDE compilation_flags
 ## INCLUDE cpp-standard
+## INCLUDE cuda-flags
+
 %define tag 2d559810c6af7f8b2cf88553dd5a5824a667a07d
 %define branch master
 %define github_user rusty1s
@@ -9,14 +11,22 @@ Source: git+https://github.com/%{github_user}/pytorch_sparse.git?obj=%{branch}/%
 
 BuildRequires: cmake
 Requires: pytorch
-%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags} 
+%define build_flags -Wall -Wextra -pedantic %{?arch_build_flags}
+%define cuda_arch_float $(echo %{cuda_arch} | tr ' ' '\\n' | sed -E 's|([0-9])$|.\\1|' | tr '\\n' ' ')
 
 %prep
 %setup -n %{n}-%{realversion}
 # Make sure the default c++sdt stand is c++14
 grep -q 'CMAKE_CXX_STANDARD  *14' CMakeLists.txt
 sed -i -e 's|CMAKE_CXX_STANDARD  *14|CMAKE_CXX_STANDARD %{cms_cxx_standard}|' CMakeLists.txt
 
+USE_CUDA=OFF
+%if 0%{!?without_cuda:1}
+if [ "%{cuda_gcc_support}" = "true" ] ; then
+USE_CUDA=ON
+fi
+%endif
+
 %build
 
 rm -rf ../build && mkdir ../build && cd ../build
@@ -33,7 +43,11 @@ cmake ../%{n}-%{realversion} \
     -DWITH_PYTHON=OFF \
     -DWITH_CUDA=OFF \
     -DBUILD_TEST=OFF \
-    -DBUILD_SHARED_LIBS=ON 
+%if 0%{!?without_cuda:1}
+    -DUSE_CUDA=${USE_CUDA} \
+    -DTORCH_CUDA_ARCH_LIST="%{cuda_arch_float}" \
+%endif
+    -DBUILD_SHARED_LIBS=ON
 
 
 make %{makeprocesses} VERBOSE=1

diff --git a/pytorch-system-fmt.patch b/pytorch-system-fmt.patch
@@ -1,15 +1,15 @@
 diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
-index feebad7cbb7..8a2ad840286 100644
+index 82eb9690383..2f40c80a66e 100644
 --- a/c10/CMakeLists.txt
 +++ b/c10/CMakeLists.txt
-@@ -87,6 +87,7 @@ endif()
- if(${USE_GLOG})
+@@ -94,6 +94,7 @@ if(NOT BUILD_LIBTORCHLESS)
+   if(C10_USE_GLOG)
      target_link_libraries(c10 PUBLIC glog::glog)
- endif()
-+find_package(fmt REQUIRED)
- target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+   endif()
++  find_package(fmt REQUIRED)
+   target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
 
- find_package(Backtrace)
+   if(C10_USE_NUMA)
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
 index 74d0d557190..ff3e94b1c4b 100644
 --- a/caffe2/CMakeLists.txt
@@ -55,3 +55,4 @@ index c3abce52e4c..5e89d3eb43a 100644
 
  # ---[ Kineto
  # edge profiler depends on KinetoProfiler but it only does cpu
+
diff --git a/pytorch.spec b/pytorch.spec
@@ -1,27 +1,23 @@
-### RPM external pytorch 2.1.1
+### RPM external pytorch 2.4.0
 ## INCLUDE cuda-flags
 ## INCLUDE microarch_flags
 
 %define cuda_arch_float $(echo %{cuda_arch} | tr ' ' '\\n' | sed -E 's|([0-9])$|.\\1|' | tr '\\n' ' ')
-%define tag bb938bbe9f53414dda1e1159795b7536dbffd041
-%define branch cms/v%{realversion}
 
-Source: git+https://github.com/cms-externals/pytorch.git?obj=%{branch}/%{tag}&export=%{n}-%{realversion}&submodules=1&output=/%{n}-%{realversion}.tgz
+Source: git+https://github.com/pytorch/pytorch.git?obj=main/v%{realversion}&export=%{n}-%{realversion}&submodules=1&output=/%{n}-%{realversion}.tgz
 Source1: FindEigen3.cmake
 Source2: FindFMT.cmake
 Source99: scram-tools.file/tools/eigen/env
-Patch0: pytorch-ignore-different-cuda-include-dir
 Patch1: pytorch-missing-braces
 Patch2: pytorch-system-fmt
 
 BuildRequires: cmake ninja
 Requires: eigen fxdiv numactl openmpi protobuf psimd python3 py3-PyYAML
-Requires: OpenBLAS zlib protobuf fmt py3-pybind11
+Requires: OpenBLAS zlib protobuf fmt py3-pybind11 py3-typing-extensions
 %{!?without_cuda:Requires: cuda cudnn}
 
 %prep
 %setup -n %{n}-%{realversion}
-%patch0 -p1
 %patch1 -p1
 %patch2 -p1
 
@@ -31,9 +27,9 @@ rm -rf ../build && mkdir ../build && cd ../build
 source %{_sourcedir}/env
 
 USE_CUDA=OFF
-%if "%{cmsos}" != "slc7_aarch64"
+%if 0%{!?without_cuda:1}
 if [ "%{cuda_gcc_support}" = "true" ] ; then
-USE_CUDA=%{!?without_cuda:ON}
+USE_CUDA=ON
 fi
 %endif
 
@@ -94,6 +90,4 @@ ninja -v  %{makeprocesses} install
 %{relocateConfig}include/caffe2/core/macros.h
 %{relocateConfig}share/cmake/ATen/ATenConfig.cmake
 
-# For ROCm, pre-build
-# NOTICE: can't build with both cuda and rocm
-# python @{_builddir}/tools/amd_build/build_amd.py
+# NOTICE: can't build with both cuda and rocm - see aten/CMakeLists.txt