From 88f030e9b3b786439e39c0492a6776549d3aac60 Mon Sep 17 00:00:00 2001 From: PENGUINLIONG Date: Mon, 19 Sep 2022 17:04:44 +0800 Subject: [PATCH] [aot] Support multi-target builds for Apple M1 (#6083) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR enables Taichi to be built into multi-target libraries. Recent releases of macOS supports dynamically linking against x86_64 or arm64 of the same `.dylib` on demand, depending on the arch of the parent process. Previously this was not possible because the CMake script forced a `-march=nehalem` that is not a kind of arm64; and `CMAKE_OSX_ARCHITECTURES` is forced `arm64`. The multi-target feature is only enabled with `-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"`. Python builds will select one of the archs, based on the arch of the skbuild Python process. 图片 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CMakeLists.txt | 14 -------------- cmake/TaichiCXXFlags.cmake | 11 +++++++++-- setup.py | 6 ++++++ taichi/program/program.cpp | 11 ++++++----- taichi/system/timer.cpp | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 53a4bcc3b584b..0cc5703b7d5dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,20 +6,6 @@ cmake_minimum_required(VERSION 3.17) project(taichi) -# Taichi does not set target architecture explicitly, -# but rather rely on CMake to detect the host arch. -# -# However on Mac m1, there are two available architectures namely x86_64 and arm64. -# On some combination of "OSX version" and "CMake version", CMake will use x86_64 as default architecture even if it's on m1 chip. -# This causes conflicts with the precompiled LLVM/Clang binaries downloaded from Taichi's repo (pre-built for arm64) -# -# Therefore we force CMake to choose arm64 architecture on arm64 chips. -if (APPLE) - if( "${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "arm64" ) - set(CMAKE_OSX_ARCHITECTURES ${CMAKE_HOST_SYSTEM_PROCESSOR}) - endif() -endif() - if (NOT DEFINED TI_VERSION_MAJOR) message(WARNING "It seems that you are running cmake manually, which may cause issues. Please use setup.py to build taichi from source, see https://docs.taichi-lang.org/docs/dev_install for more details.") set(TI_VERSION_MAJOR 0) diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake index 0b73ac4324c2c..687b705f37084 100644 --- a/cmake/TaichiCXXFlags.cmake +++ b/cmake/TaichiCXXFlags.cmake @@ -81,8 +81,15 @@ if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}" if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"TI_ARCH_x64\"") else() - message("Setting -march=nehalem for x86_64 processors") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem -DTI_ARCH_x64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_ARCH_x64") + if ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + # TODO: (penguinliong) Will probably need this in a future version + # of Clang. Clang11 doesn't recognize this. + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=apple-m1") + else() + message("Setting -march=nehalem for x86_64 processors") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem") + endif() endif() set(ARCH "x64") elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") diff --git a/setup.py b/setup.py index c7fe63fa51b17..7196db9694c8a 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ import glob import multiprocessing import os +import platform import shutil import sys from distutils.command.clean import clean @@ -131,6 +132,11 @@ def get_cmake_args(): if sys.platform != 'win32': os.environ['SKBUILD_BUILD_OPTIONS'] = f'-j{num_threads}' + if sys.platform == "darwin": + if platform.machine() == "arm64": + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=arm64"] + else: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=x86_64"] return cmake_args diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index e0af071f09cb3..57dcd27a0de56 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -40,10 +40,10 @@ #include "taichi/rhi/dx/dx_api.h" #endif -#if defined(TI_ARCH_x64) +#if defined(_M_X64) || defined(__x86_64) // For _MM_SET_FLUSH_ZERO_MODE #include -#endif +#endif // defined(_M_X64) || defined(__x86_64) namespace taichi { namespace lang { @@ -55,9 +55,10 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) { // For performance considerations and correctness of QuantFloatType // operations, we force floating-point operations to flush to zero on all // backends (including CPUs). -#if defined(TI_ARCH_x64) +#if defined(_M_X64) || defined(__x86_64) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); -#else +#endif // defined(_M_X64) || defined(__x86_64) +#if defined(__arm64__) || defined(__aarch64__) // Enforce flush to zero on arm64 CPUs // https://developer.arm.com/documentation/100403/0201/register-descriptions/advanced-simd-and-floating-point-registers/aarch64-register-descriptions/fpcr--floating-point-control-register?lang=en std::uint64_t fpcr; @@ -68,7 +69,7 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) { : : "ri"(fpcr | (1 << 24))); // Bit 24 is FZ __asm__ __volatile__(""); -#endif +#endif // defined(__arm64__) || defined(__aarch64__) config = default_compile_config; config.arch = desired_arch; // TODO: allow users to run in debug mode without out-of-bound checks diff --git a/taichi/system/timer.cpp b/taichi/system/timer.cpp index 45491f4eea29a..fb0d61472e231 100644 --- a/taichi/system/timer.cpp +++ b/taichi/system/timer.cpp @@ -220,7 +220,7 @@ uint64 Time::get_cycles() { #else uint64 Time::get_cycles() { -#if defined(TI_ARCH_x64) +#if defined(TI_ARCH_x64) && !(defined(__arm64__) || defined(__aarch64__)) unsigned int lo, hi; __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64)hi << 32) | lo;