diff --git a/CMakeLists.txt b/CMakeLists.txt index 53a4bcc3b584b..0cc5703b7d5dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,20 +6,6 @@ cmake_minimum_required(VERSION 3.17) project(taichi) -# Taichi does not set target architecture explicitly, -# but rather rely on CMake to detect the host arch. -# -# However on Mac m1, there are two available architectures namely x86_64 and arm64. -# On some combination of "OSX version" and "CMake version", CMake will use x86_64 as default architecture even if it's on m1 chip. -# This causes conflicts with the precompiled LLVM/Clang binaries downloaded from Taichi's repo (pre-built for arm64) -# -# Therefore we force CMake to choose arm64 architecture on arm64 chips. -if (APPLE) - if( "${CMAKE_HOST_SYSTEM_PROCESSOR}" STREQUAL "arm64" ) - set(CMAKE_OSX_ARCHITECTURES ${CMAKE_HOST_SYSTEM_PROCESSOR}) - endif() -endif() - if (NOT DEFINED TI_VERSION_MAJOR) message(WARNING "It seems that you are running cmake manually, which may cause issues. Please use setup.py to build taichi from source, see https://docs.taichi-lang.org/docs/dev_install for more details.") set(TI_VERSION_MAJOR 0) diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake index 0b73ac4324c2c..687b705f37084 100644 --- a/cmake/TaichiCXXFlags.cmake +++ b/cmake/TaichiCXXFlags.cmake @@ -81,8 +81,15 @@ if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64" OR "${CMAKE_SYSTEM_PROCESSOR}" if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"TI_ARCH_x64\"") else() - message("Setting -march=nehalem for x86_64 processors") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem -DTI_ARCH_x64") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_ARCH_x64") + if ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + # TODO: (penguinliong) Will probably need this in a future version + # of Clang. Clang11 doesn't recognize this. + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=apple-m1") + else() + message("Setting -march=nehalem for x86_64 processors") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=nehalem") + endif() endif() set(ARCH "x64") elseif ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") diff --git a/setup.py b/setup.py index c7fe63fa51b17..7196db9694c8a 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ import glob import multiprocessing import os +import platform import shutil import sys from distutils.command.clean import clean @@ -131,6 +132,11 @@ def get_cmake_args(): if sys.platform != 'win32': os.environ['SKBUILD_BUILD_OPTIONS'] = f'-j{num_threads}' + if sys.platform == "darwin": + if platform.machine() == "arm64": + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=arm64"] + else: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=x86_64"] return cmake_args diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index e0af071f09cb3..57dcd27a0de56 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -40,10 +40,10 @@ #include "taichi/rhi/dx/dx_api.h" #endif -#if defined(TI_ARCH_x64) +#if defined(_M_X64) || defined(__x86_64) // For _MM_SET_FLUSH_ZERO_MODE #include -#endif +#endif // defined(_M_X64) || defined(__x86_64) namespace taichi { namespace lang { @@ -55,9 +55,10 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) { // For performance considerations and correctness of QuantFloatType // operations, we force floating-point operations to flush to zero on all // backends (including CPUs). -#if defined(TI_ARCH_x64) +#if defined(_M_X64) || defined(__x86_64) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); -#else +#endif // defined(_M_X64) || defined(__x86_64) +#if defined(__arm64__) || defined(__aarch64__) // Enforce flush to zero on arm64 CPUs // https://developer.arm.com/documentation/100403/0201/register-descriptions/advanced-simd-and-floating-point-registers/aarch64-register-descriptions/fpcr--floating-point-control-register?lang=en std::uint64_t fpcr; @@ -68,7 +69,7 @@ Program::Program(Arch desired_arch) : snode_rw_accessors_bank_(this) { : : "ri"(fpcr | (1 << 24))); // Bit 24 is FZ __asm__ __volatile__(""); -#endif +#endif // defined(__arm64__) || defined(__aarch64__) config = default_compile_config; config.arch = desired_arch; // TODO: allow users to run in debug mode without out-of-bound checks diff --git a/taichi/system/timer.cpp b/taichi/system/timer.cpp index 45491f4eea29a..fb0d61472e231 100644 --- a/taichi/system/timer.cpp +++ b/taichi/system/timer.cpp @@ -220,7 +220,7 @@ uint64 Time::get_cycles() { #else uint64 Time::get_cycles() { -#if defined(TI_ARCH_x64) +#if defined(TI_ARCH_x64) && !(defined(__arm64__) || defined(__aarch64__)) unsigned int lo, hi; __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64)hi << 32) | lo;