Skip to content

Commit

Permalink
[aot] Support multi-target builds for Apple M1 (#6083)
Browse files Browse the repository at this point in the history
This PR enables Taichi to be built into multi-target libraries. Recent
releases of macOS supports dynamically linking against x86_64 or arm64
of the same `.dylib` on demand, depending on the arch of the parent
process. Previously this was not possible because the CMake script
forced a `-march=nehalem` that is not a kind of arm64; and
`CMAKE_OSX_ARCHITECTURES` is forced `arm64`.

The multi-target feature is only enabled with
`-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"`. Python builds will select
one of the archs, based on the arch of the skbuild Python process.

<img width="1260" alt="图片"
src="https://user-images.githubusercontent.com/1487605/190690041-f4a04941-a0dc-440e-b083-c59ae00b111f.png">

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
PENGUINLIONG and pre-commit-ci[bot] authored Sep 19, 2022
1 parent 18c946e commit 88f030e
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 22 deletions.
14 changes: 0 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,6 @@ cmake_minimum_required(VERSION 3.17)

project(taichi)

# Taichi does not set target architecture explicitly,
# but rather rely on CMake to detect the host arch.
#
# However on Mac m1, there are two available architectures namely x86_64 and arm64.
# On some combination of "OSX version" and "CMake version", CMake will use x86_64 as default architecture even if it's on m1 chip.
# This causes conflicts with the precompiled LLVM/Clang binaries downloaded from Taichi's repo (pre-built for arm64)
#
# Therefore we force CMake to choose arm64 architecture on arm64 chips.
if (APPLE)
if( "${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "arm64" )
set(CMAKE_OSX_ARCHITECTURES ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
endif()

if (NOT DEFINED TI_VERSION_MAJOR)
message(WARNING "It seems that you are running cmake manually, which may cause issues. Please use setup.py to build taichi from source, see https://docs.taichi-lang.org/docs/dev_install for more details.")
set(TI_VERSION_MAJOR 0)
Expand Down
11 changes: 9 additions & 2 deletions cmake/TaichiCXXFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,15 @@ if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}"
if (MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"TI_ARCH_x64\"")
else()
message("Setting -march=nehalem for x86_64 processors")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem -DTI_ARCH_x64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_ARCH_x64")
if ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
# TODO: (penguinliong) Will probably need this in a future version
# of Clang. Clang11 doesn't recognize this.
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=apple-m1")
else()
message("Setting -march=nehalem for x86_64 processors")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem")
endif()
endif()
set(ARCH "x64")
elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64")
Expand Down
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import glob
import multiprocessing
import os
import platform
import shutil
import sys
from distutils.command.clean import clean
Expand Down Expand Up @@ -131,6 +132,11 @@ def get_cmake_args():

if sys.platform != 'win32':
os.environ['SKBUILD_BUILD_OPTIONS'] = f'-j{num_threads}'
if sys.platform == "darwin":
if platform.machine() == "arm64":
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=arm64"]
else:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=x86_64"]
return cmake_args


Expand Down
11 changes: 6 additions & 5 deletions taichi/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
#include "taichi/rhi/dx/dx_api.h"
#endif

#if defined(TI_ARCH_x64)
#if defined(_M_X64) || defined(__x86_64)
// For _MM_SET_FLUSH_ZERO_MODE
#include <xmmintrin.h>
#endif
#endif // defined(_M_X64) || defined(__x86_64)

namespace taichi {
namespace lang {
Expand All @@ -55,9 +55,10 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) {
// For performance considerations and correctness of QuantFloatType
// operations, we force floating-point operations to flush to zero on all
// backends (including CPUs).
#if defined(TI_ARCH_x64)
#if defined(_M_X64) || defined(__x86_64)
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
#else
#endif // defined(_M_X64) || defined(__x86_64)
#if defined(__arm64__) || defined(__aarch64__)
// Enforce flush to zero on arm64 CPUs
// https://developer.arm.com/documentation/100403/0201/register-descriptions/advanced-simd-and-floating-point-registers/aarch64-register-descriptions/fpcr--floating-point-control-register?lang=en
std::uint64_t fpcr;
Expand All @@ -68,7 +69,7 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) {
:
: "ri"(fpcr | (1 << 24))); // Bit 24 is FZ
__asm__ __volatile__("");
#endif
#endif // defined(__arm64__) || defined(__aarch64__)
config = default_compile_config;
config.arch = desired_arch;
// TODO: allow users to run in debug mode without out-of-bound checks
Expand Down
2 changes: 1 addition & 1 deletion taichi/system/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ uint64 Time::get_cycles() {
#else

uint64 Time::get_cycles() {
#if defined(TI_ARCH_x64)
#if defined(TI_ARCH_x64) && !(defined(__arm64__) || defined(__aarch64__))
unsigned int lo, hi;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64)hi << 32) | lo;
Expand Down

0 comments on commit 88f030e

Please sign in to comment.