[fbgemm_gpu] Add documentation on building FBGEMM_GPU with Clang

- Add documentation on building FBGEMM_GPU with Clang - Enable building with Clang for FBGEMM_GPU ROCm variant
pytorch · Feb 22, 2024 · 09de69c · 09de69c
1 parent 6717f1f
commit 09de69c
Show file tree

Hide file tree

Showing 8 changed files with 223 additions and 81 deletions.
diff --git a/.github/scripts/utils_build.bash b/.github/scripts/utils_build.bash
@@ -89,6 +89,7 @@ __conda_install_gcc () {
   # shellcheck disable=SC2155,SC2086
   local cxx_path=$(conda run ${env_prefix} printenv CXX)
 
+  # Set the symlinks, override if needed
   print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
   print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
@@ -103,7 +104,7 @@ __conda_install_clang () {
   local llvm_version=15.0.7
 
   echo "[INSTALL] Installing Clang and relevant libraries through Conda ..."
-  # NOTE: libcxx from conda-forge is outdated for linux-aarch64, we we cannot
+  # NOTE: libcxx from conda-forge is outdated for linux-aarch64, so we cannot
   # explicitly specify the version number
   #
   # shellcheck disable=SC2086
@@ -121,6 +122,7 @@ __conda_install_clang () {
   # shellcheck disable=SC2155,SC2086
   local cxx_path=$(conda run ${env_prefix} which clang++)
 
+  # Set the symlinks, override if needed
   print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
   print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
@@ -132,7 +134,7 @@ __conda_install_clang () {
   # shellcheck disable=SC2155,SC2086
   local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
   # shellcheck disable=SC2086
-  print_exec conda env config vars set ${env_prefix} LD_LIBRARY_PATH="${ld_library_path}:${conda_prefix}/lib"
+  print_exec conda env config vars set ${env_prefix} LD_LIBRARY_PATH="${ld_library_path:+${ld_library_path}:}${conda_prefix}/lib"
 
   echo "[BUILD] Setting Clang (should already be symlinked as c++) as the host compiler for NVCC: ${cxx_path}"
   # When NVCC is used, set Clang to be the host compiler, but set GNU libstdc++
@@ -244,8 +246,10 @@ install_build_tools () {
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
   echo "[INSTALL] Installing build tools ..."
-  # NOTE: Only the openblas package will install cblas.h directly into
-  # $CONDA_PREFIX/include directory
+  # NOTES:
+  # - Only the openblas package will install cblas.h directly into
+  #   $CONDA_PREFIX/include directory
+  # - ncurses is needed to silence bad libtinfo6.so errors for ROCm+Clang builds
   #
   # shellcheck disable=SC2086
   (exec_with_retries 3 conda install ${env_prefix} -c conda-forge -y \
@@ -254,6 +258,7 @@ install_build_tools () {
     hypothesis \
     jinja2 \
     make \
+    ncurses \
     ninja \
     numpy \
     openblas \

diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml
@@ -112,7 +112,7 @@ jobs:
     - name: Upload Built Wheel as GHA Artifact
       uses: actions/upload-artifact@v4
       with:
-        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_${{ matrix.python-version }}.whl
+        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}.whl
         path: fbgemm_gpu/dist/fbgemm_gpu_nightly_cpu-*.whl
 
 
@@ -151,7 +151,7 @@ jobs:
     - name: Download Wheel Artifact from GHA
       uses: actions/download-artifact@v4
       with:
-        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_${{ matrix.python-version }}.whl
+        name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}.whl
 
     - name: Display System Info
       run: . $PRELUDE; print_system_info; print_ec2_info
@@ -165,8 +165,7 @@ jobs:
     - name: Create Conda Environment
       run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
 
-    - name: Install C/C++ Compilers
-      # CXX compiler is needed for inductor used by torchrec.
+    - name: Install C/C++ Compilers for Updated LIBGCC
       run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
 
     - name: Install PyTorch-CPU Nightly

diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml
@@ -180,7 +180,7 @@ jobs:
     - name: Create Conda Environment
       run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
 
-    - name: Install C/C++ Compilers
+    - name: Install C/C++ Compilers for Updated LIBGCC
       run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
 
     - name: Install CUDA

diff --git a/.github/workflows/fbgemm_gpu_ci_rocm.yml b/.github/workflows/fbgemm_gpu_ci_rocm.yml
@@ -65,6 +65,7 @@ jobs:
         container-image: [ "ubuntu:20.04" ]
         python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
         rocm-version: [ "5.7" ]
+        compiler: [ "gcc", "clang" ]
 
     steps:
     - name: Setup Build Container
@@ -97,7 +98,7 @@ jobs:
       run: . $PRELUDE; install_rocm_ubuntu $BUILD_ENV ${{ matrix.rocm-version }}
 
     - name: Install C/C++ Compilers
-      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
 
     - name: Install Build Tools
       run: . $PRELUDE; install_build_tools $BUILD_ENV
@@ -118,7 +119,7 @@ jobs:
     - name: Upload Built Wheel as GHA Artifact
       uses: actions/upload-artifact@v4
       with:
-        name: fbgemm_gpu_nightly_rocm_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_rocm${{ matrix.rocm-version }}.whl
+        name: fbgemm_gpu_nightly_rocm_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_rocm${{ matrix.rocm-version }}.whl
         path: fbgemm_gpu/dist/fbgemm_gpu_nightly_rocm-*.whl
 
 
@@ -142,8 +143,9 @@ jobs:
           { arch: x86, instance: "rocm" },
         ]
         # ROCm machines are limited, so we only test a subset of Python versions
-        python-version: [ "3.11", "3.12" ]
+        python-version: [ "3.12" ]
         rocm-version: [ "5.7" ]
+        compiler: [ "gcc", "clang" ]
     needs: build_artifact
 
     steps:
@@ -159,7 +161,7 @@ jobs:
     - name: Download Wheel Artifact from GHA
       uses: actions/download-artifact@v4
       with:
-        name: fbgemm_gpu_nightly_rocm_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_rocm${{ matrix.rocm-version }}.whl
+        name: fbgemm_gpu_nightly_rocm_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_rocm${{ matrix.rocm-version }}.whl
 
     - name: Display System Info
       run: . $PRELUDE; print_system_info

diff --git a/fbgemm_gpu/docs/src/fbgemm-development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm-development/BuildInstructions.rst
@@ -73,32 +73,15 @@ Install the Build Tools
 C/C++ Compiler
 ~~~~~~~~~~~~~~
 
-For Linux and macOS platforms, Install a version of the GCC toolchain
-**that supports C++17**. The ``sysroot`` package will also need to be installed
-to avoid issues with missing versioned symbols with ``GLIBCXX`` when compiling FBGEMM:
-
-.. code:: sh
-
-  conda install -n "${env_name}" -y gxx_linux-64=10.4.0 sysroot_linux-64=2.17 -c conda-forge
-
-While newer versions of GCC can be used, binaries compiled under newer versions
-of GCC will not be compatible with older systems such as Ubuntu 20.04 or CentOS
-Stream 8, because the compiled library will reference symbols from versions of
-``GLIBCXX`` that the system’s ``libstdc++.so.6`` will not support. To see what
-versions of GLIBC and GLIBCXX the available ``libstdc++.so.6`` supports:
-
-.. code:: sh
-
-  libcxx_path=/path/to/libstdc++.so.6
-
-  # Print supported for GLIBC versions
-  objdump -TC "${libcxx_path}" | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/GLIBC_\1/g' | sort -Vu | cat
-
-  # Print supported for GLIBCXX versions
-  objdump -TC "${libcxx_path}" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
+For Linux and macOS platforms, follow the instructions in
+:ref:`fbgemm-gpu.build.setup.tools.install.compiler.gcc` to install the GCC
+toolchain.  For Clang-based builds, follow the instructions in
+:ref:`fbgemm-gpu.build.setup.tools.install.compiler.clang` to install the Clang
+toolchain.
 
 For builds on Windows machines, Microsoft Visual Studio 2019 or newer is
-recommended.  Follow the installation instructions provided by Microsoft.
+recommended.  Follow the installation instructions provided by Microsoft
+`here <https://visualstudio.microsoft.com/vs/older-downloads/>`_.
 
 Other Build Tools
 ~~~~~~~~~~~~~~~~~
@@ -107,15 +90,16 @@ Install the other necessary build tools such as ``ninja``, ``cmake``, etc:
 
 .. code:: sh
 
-  conda install -n "${env_name}" -y \
+  conda install -n ${env_name} -y \
       bazel \
       cmake \
+      doxygen \
       make \
       ninja \
-      openblas-dev
+      openblas
 
-Note that the `bazel` package is only necessary for Bazel builds, and the
-`ninja` package is only necessary for Windows builds.
+Note that the ``bazel`` package is only necessary for Bazel builds, and the
+``ninja`` package is only necessary for Windows builds.
 
 
 Build the FBGEMM Library
@@ -134,8 +118,8 @@ Clone the repo along with its submodules:
   git clone --recurse-submodules https://github.com/pytorch/FBGEMM.git
   cd FBGEMM
 
-Building on Linux and macOS (CMake)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Building on Linux and macOS (CMake + GCC)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Assuming a Conda environment with all the tools installed, the CMake build
 process is straightforward:
@@ -148,9 +132,18 @@ process is straightforward:
   mkdir build
   cd build
 
+  # Set CMake build arguments
+  build_args=(
+    -DUSE_SANITIZER=address
+    -DFBGEMM_LIBRARY_TYPE=shared
+    -DPYTHON_EXECUTABLE=`which python3`
+
+    # OPTIONAL: Set to generate Doxygen documentation
+    -DFBGEMM_BUILD_DOCS=ON
+  )
+
   # Set up the build
-  # To generate Doxygen documentation, add `-DFBGEMM_BUILD_DOCS=ON`
-  cmake -DUSE_SANITIZER=address -DFBGEMM_LIBRARY_TYPE=shared -DPYTHON_EXECUTABLE=`which python3` ..
+  cmake ${build_args[@]} ..
 
   # Build the library
   make -j VERBOSE=1
@@ -161,6 +154,49 @@ process is straightforward:
   # Install the library
   make install
 
+Build Issues with GCC 12+
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+As of time of writing, compilation of FBGEMM on GCC 12+ will fail due to a
+`known compiler regression <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593>`__.
+To work around the issue, append the following exports prior to running CMake:
+
+.. code:: sh
+
+  # !! Run inside the Conda environment !!
+
+  export CFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict"
+  export CXXFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict"
+
+Please see GitHub issues
+`77939 <https://github.com/pytorch/pytorch/issues/77939>`__,
+`1094 <https://github.com/pytorch/FBGEMM/issues/1094>`__, and
+`1666 <https://github.com/pytorch/FBGEMM/issues/1666>`__ for more details.
+
+Building on Linux and macOS (CMake + Clang)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The steps for building FBGEMM using Clang are exactly the same as that for
+building using GCC.  However, extra build arguments need to be added to the
+CMake invocation to specify the Clang path, the LLVM-based C++ standard library
+(``libc++``), and the LLVM-based OpenMP implementation (``libomp``):
+
+.. code:: sh
+
+  # !! Run inside the Conda environment !!
+
+  # Locate Clang
+  cc_path=$(which clang)
+  cxx_path=$(which clang++)
+
+  # Append to the CMake build arguments
+  build_args+=(
+    -DCMAKE_C_COMPILER="${cc_path}"
+    -DCMAKE_CXX_COMPILER="${cxx_path}"
+    -DCMAKE_C_FLAGS=\"-fopenmp=libomp -stdlib=libc++ -I $CONDA_PREFIX/include\"
+    -DCMAKE_CXX_FLAGS=\"-fopenmp=libomp -stdlib=libc++ -I $CONDA_PREFIX/include\"
+  )
+
 Building on Linux (Bazel)
 ~~~~~~~~~~~~~~~~~~~~~~~~~