From 70822bd1fbb7df53f4e7d243f6588d9f2c5d140e Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:46:57 +0800 Subject: [PATCH 01/18] [misc] Do not print CHANGELOG when specified --save (make_changelog.py) ghstack-source-id: 9b846a6261afc4c30f1ea8ba1034d6608a8425f5 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7983 --- misc/make_changelog.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/misc/make_changelog.py b/misc/make_changelog.py index a7e23e593ece4..c373aa52bdf4a 100644 --- a/misc/make_changelog.py +++ b/misc/make_changelog.py @@ -112,4 +112,5 @@ def format(c): if args.save: with open("./python/taichi/CHANGELOG.md", "w", encoding="utf-8") as f: f.write(res) - print(res) + else: + print(res) From c71549b57489fe4116042bb9584781da7f393abb Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:03 +0800 Subject: [PATCH 02/18] [ci] Do not try to terminate sccache server after compilation Can disturb concurrent builds ghstack-source-id: fe3d955213ba68569af16d033de0b24b5ec2d029 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7984 --- .github/workflows/scripts/ti_build/entry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index 41dd136be38e9..5a7683290bdd0 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -89,7 +89,7 @@ def action_wheel(): handle_alternate_actions() build_wheel(python, pip) try: - sccache("--stop-server") + sccache("-s") except CommandFailed: pass @@ -100,7 +100,7 @@ def action_android(): handle_alternate_actions() build_android(python, pip) try: - sccache("--stop-server") + sccache("-s") except CommandFailed: pass From 2c4ed0dad011b4464a65ca67032d6bb9cc467e15 Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:09 +0800 Subject: [PATCH 03/18] [ci] build.py: Add nice when compiling ghstack-source-id: bc7b2bb9014e36cf8e24b732906b0def7b3131a3 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7985 --- .github/workflows/scripts/ti_build/entry.py | 5 +++-- .github/workflows/scripts/ti_build/tinysh.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index 5a7683290bdd0..f9ea83aef8f6e 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -20,7 +20,7 @@ from .ospkg import setup_os_pkgs from .python import get_desired_python_version, setup_python from .sccache import setup_sccache -from .tinysh import Command, CommandFailed, git +from .tinysh import Command, CommandFailed, git, nice from .vulkan import setup_vulkan @@ -50,7 +50,8 @@ def build_wheel(python: Command, pip: Command) -> None: python("setup.py", "clean") python("misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save") - python("setup.py", *proj_tags, "bdist_wheel", *extra) + with nice(): + python("setup.py", *proj_tags, "bdist_wheel", *extra) @banner("Install Build Wheel Dependencies") diff --git a/.github/workflows/scripts/ti_build/tinysh.py b/.github/workflows/scripts/ti_build/tinysh.py index 4a88d88077b84..516136de6b52e 100644 --- a/.github/workflows/scripts/ti_build/tinysh.py +++ b/.github/workflows/scripts/ti_build/tinysh.py @@ -218,6 +218,19 @@ def sudo(): return prefix("sudo") +def nice(): + """ + Wrap a command with sudo. + """ + if IS_WINDOWS: + from .misc import warn + + warn("nice is not yet implemented on Windows") + return with_options({}) + else: + return prefix("nice") + + sh = Command() git = sh.git # Use setup_python ! From 4d406ec101275d9a7b0c2b9742d1edacb72ca5aa Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:15 +0800 Subject: [PATCH 04/18] [ci] Tag wheel with TI_WITH_xxx tags ghstack-source-id: 90bf91396419802d8bba58ea92bf381d9ba30b9e Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7986 --- .github/workflows/scripts/ti_build/cmake.py | 27 ++++++++++------ .github/workflows/scripts/ti_build/entry.py | 35 +++++++++++++++------ CMakeLists.txt | 8 ++--- cmake/TaichiCore.cmake | 20 ++++++------ 4 files changed, 56 insertions(+), 34 deletions(-) diff --git a/.github/workflows/scripts/ti_build/cmake.py b/.github/workflows/scripts/ti_build/cmake.py index cf6d07ed5df14..2dc327710e40b 100644 --- a/.github/workflows/scripts/ti_build/cmake.py +++ b/.github/workflows/scripts/ti_build/cmake.py @@ -14,7 +14,7 @@ from .misc import banner # -- code -- -OPTION_RE = re.compile(r'option\(([A-Z0-9_]*) +"(.*?)" +(ON|OFF)\)') +OPTION_RE = re.compile(r'option\(([A-Z0-9_]*) +"(.*?)" +(ON|OFF)\)(?: *# wheel-tag: (.*))?') DEF_RE = re.compile(r"-D([A-Z0-9_]*)(?::BOOL)?=([^ ]+)(?: |$)") @@ -28,10 +28,7 @@ def __init__(self, environ_name): self.environ_name = environ_name self.definitions = {} self.option_definitions = { - "CMAKE_EXPORT_COMPILE_COMMANDS": ( - "Generate compile_commands.json", - False, - ), + "CMAKE_EXPORT_COMPILE_COMMANDS": ("Generate compile_commands.json", False, ""), } self.finalized = False @@ -39,9 +36,9 @@ def __init__(self, environ_name): def collect_options(self, *files: str) -> None: for fn in files: with open(fn, "r") as f: - for name, desc, default in OPTION_RE.findall(f.read()): + for name, desc, default, wheel_tag in OPTION_RE.findall(f.read()): default = self._VMAP.get(default, default) - self.option_definitions[name] = (desc, default) + self.option_definitions[name] = (desc, default, wheel_tag) def parse_initial_args(self) -> None: args = os.environ.get(self.environ_name, "") @@ -49,7 +46,7 @@ def parse_initial_args(self) -> None: self.set(name, value) def get_effective(self, name: str) -> Union[str, bool]: - _, default = self.option_definitions.get(name, ("", None)) + _, default, _ = self.option_definitions.get(name, ("", None, "")) return self.definitions.get(name, default) def set(self, name: str, value: Union[str, bool]) -> None: @@ -57,7 +54,7 @@ def set(self, name: str, value: Union[str, bool]) -> None: desc = "" value = self._VMAP.get(value, value) default = None - desc, default = self.option_definitions.get(name, ("", None)) + desc, default, wheel_tag = self.option_definitions.get(name, ("", None, "")) desc = desc and f" ({desc}) " is_bool = isinstance(default, bool) assert not is_bool or isinstance(value, bool), f"Option {name} must be bool" @@ -84,6 +81,7 @@ def set(self, name: str, value: Union[str, bool]) -> None: else: p(f"{B}:: CMAKE: Already disabled: {name}{desc}{N}") else: + assert not wheel_tag, "Set a non boolean value to an option with wheel-tag" if orig != value: if orig != default: p(f"{R}:: CMAKE- {name}={orig}{desc}{N}") @@ -99,7 +97,7 @@ def render(self) -> List[Tuple[str, str, str]]: else: v = f"-D{name}={value}" - desc, _ = self.option_definitions.get(name, ("", None)) + desc, _, _ = self.option_definitions.get(name, ("", None, "")) if desc: prefix = "DO NOT " if not value else "" desc = f" ({prefix}{desc})" @@ -108,6 +106,15 @@ def render(self) -> List[Tuple[str, str, str]]: return lst + def render_wheel_tag(self) -> str: + tags = [] + for name, (_, default, wheel_tag) in self.option_definitions.items(): + if not wheel_tag: + continue + if self.definitions.get(name, default): + tags.append(wheel_tag) + return ".".join(sorted(tags)) + @banner("{self.environ_name} Summary") def print_summary(self, rendered) -> None: p = lambda s: print(s, file=sys.stderr, flush=True) diff --git a/.github/workflows/scripts/ti_build/entry.py b/.github/workflows/scripts/ti_build/entry.py index f9ea83aef8f6e..663de529f1463 100644 --- a/.github/workflows/scripts/ti_build/entry.py +++ b/.github/workflows/scripts/ti_build/entry.py @@ -2,6 +2,7 @@ # -- stdlib -- import argparse +import datetime import os import platform import subprocess @@ -30,15 +31,22 @@ def build_wheel(python: Command, pip: Command) -> None: """ Build the Taichi wheel """ + git.fetch("origin", "master", "--tags") - proj = os.environ.get("PROJECT_NAME", "taichi") proj_tags = [] extra = [] - if proj == "taichi-nightly": - proj_tags.extend(["egg_info", "--tag-date", "--tag-build=.post"]) - # Include C-API in nightly builds - cmake_args["TI_WITH_C_API"] = True + cmake_args.writeback() + wheel_tag = cmake_args.render_wheel_tag() + if misc.options.tag_local: + wheel_tag = misc.options.tag_local + + if misc.options.nightly: + os.environ["PROJECT_NAME"] = "taichi-nightly" + now = datetime.datetime.now().strftime("%Y%m%d") + proj_tags.extend(["egg_info", f"--tag-build=.post{now}+{wheel_tag}"]) + elif misc.options.tag_config or misc.options.tag_local: + proj_tags.extend(["egg_info", f"--tag-build=+{wheel_tag}"]) if platform.system() == "Linux": if is_manylinux2014(): @@ -46,7 +54,6 @@ def build_wheel(python: Command, pip: Command) -> None: else: extra.extend(["-p", "manylinux_2_27_x86_64"]) - cmake_args.writeback() python("setup.py", "clean") python("misc/make_changelog.py", "--ver", "origin/master", "--repo_dir", "./", "--save") @@ -60,7 +67,7 @@ def install_build_wheel_deps(python: Command, pip: Command) -> None: pip.install("-r", "requirements_dev.txt") -def setup_basic_build_env(force_vulkan=False): +def setup_basic_build_env(): u = platform.uname() if (u.system, u.machine) == ("Windows", "AMD64"): # Use MSVC on Windows @@ -71,8 +78,7 @@ def setup_basic_build_env(force_vulkan=False): setup_clang() setup_llvm() - if force_vulkan or cmake_args.get_effective("TI_WITH_VULKAN"): - setup_vulkan() + setup_vulkan() sccache = setup_sccache() @@ -96,7 +102,7 @@ def action_wheel(): def action_android(): - sccache, python, pip = setup_basic_build_env(force_vulkan=True) + sccache, python, pip = setup_basic_build_env() setup_android_ndk() handle_alternate_actions() build_android(python, pip) @@ -151,6 +157,15 @@ def parse_args(): help = "Continue when encounters error." parser.add_argument("--permissive", action="store_true", default=False, help=help) + help = "Tag built wheel with TI_WITH_xxx config." + parser.add_argument("--tag-config", action="store_true", default=False, help=help) + + help = "Set a local version. Overrides --tag-config." + parser.add_argument("--tag-local", type=str, default=None, help=help) + + help = "Build nightly wheel." + parser.add_argument("--nightly", action="store_true", default=False, help=help) + options = parser.parse_args() return options diff --git a/CMakeLists.txt b/CMakeLists.txt index d024821b1f5e0..b5988e4285255 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) option(USE_LLD "Use lld (from llvm) linker" OFF) option(USE_MOLD "Use mold (A Modern Linker)" OFF) -option(TI_WITH_BACKTRACE "Use backward-cpp to print out C++ stack trace upon failure" OFF) +option(TI_WITH_BACKTRACE "Use backward-cpp to print out C++ stack trace upon failure" OFF) # wheel-tag: bt if(LINUX OR APPLE) if (NOT IOS) @@ -205,8 +205,8 @@ endif() configure_file(taichi/common/version.h.in ${CMAKE_SOURCE_DIR}/taichi/common/version.h) configure_file(taichi/common/commit_hash.h.in ${CMAKE_SOURCE_DIR}/taichi/common/commit_hash.h) -option(TI_WITH_C_API "build taichi runtime c-api library" ON) -option(TI_WITH_STATIC_C_API "build static taichi runtime c-api library" OFF) +option(TI_WITH_C_API "build taichi runtime c-api library" ON) # wheel-tag: aot +option(TI_WITH_STATIC_C_API "build static taichi runtime c-api library" OFF) # wheel-tag: static_aot if(TI_WITH_STATIC_C_API) set(TI_WITH_C_API ${TI_WITH_STATIC_C_API}) @@ -231,7 +231,7 @@ if (TI_BUILD_RHI_EXAMPLES) endif() -option(TI_WITH_GRAPHVIZ "generate dependency graphs between targets" OFF) +option(TI_WITH_GRAPHVIZ "generate dependency graphs between targets" OFF) # wheel-tag: viz if (TI_WITH_GRAPHVIZ) set(GRAPHVIZ_GRAPH_NAME "ti_targets") add_custom_target(graphviz ALL diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index 8579d2d3bdacb..a3b44521f23d2 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -1,14 +1,14 @@ option(USE_STDCPP "Use -stdlib=libc++" OFF) -option(TI_WITH_LLVM "Build with LLVM backends" ON) -option(TI_WITH_METAL "Build with the Metal backend" ON) -option(TI_WITH_CUDA "Build with the CUDA backend" ON) -option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF) -option(TI_WITH_AMDGPU "Build with the AMDGPU backend" OFF) -option(TI_WITH_OPENGL "Build with the OpenGL backend" ON) -option(TI_WITH_VULKAN "Build with the Vulkan backend" OFF) -option(TI_WITH_DX11 "Build with the DX11 backend" OFF) -option(TI_WITH_DX12 "Build with the DX12 backend" OFF) -option(TI_WITH_GGUI "Build with GGUI" OFF) +option(TI_WITH_LLVM "Build with LLVM backends" ON) # wheel-tag: llvm +option(TI_WITH_METAL "Build with the Metal backend" ON) # wheel-tag: mtl +option(TI_WITH_CUDA "Build with the CUDA backend" ON) # wheel-tag: cu +option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF) # wheel-tag: cutk +option(TI_WITH_AMDGPU "Build with the AMDGPU backend" OFF) # wheel-tag: amd +option(TI_WITH_OPENGL "Build with the OpenGL backend" ON) # wheel-tag: gl +option(TI_WITH_VULKAN "Build with the Vulkan backend" OFF) # wheel-tag: vk +option(TI_WITH_DX11 "Build with the DX11 backend" OFF) # wheel-tag: dx11 +option(TI_WITH_DX12 "Build with the DX12 backend" OFF) # wheel-tag: dx12 +option(TI_WITH_GGUI "Build with GGUI" OFF) # wheel-tag: ggui # Force symbols to be 'hidden' by default so nothing is exported from the Taichi # library including the third-party dependencies. From d12826555d54c35f882213f9e2624861b64837b2 Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:21 +0800 Subject: [PATCH 05/18] [build] Not generating PDB files by default (for compliation caching) ghstack-source-id: 44bdc44b4312a4fdd46e14e8d76c6db1e16643eb Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7987 --- CMakeLists.txt | 1 + cmake/TaichiCAPITests.cmake | 2 +- cmake/TaichiCXXFlags.cmake | 6 ++++-- cmake/TaichiTests.cmake | 10 +++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b5988e4285255..49311f433a7d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) option(USE_LLD "Use lld (from llvm) linker" OFF) option(USE_MOLD "Use mold (A Modern Linker)" OFF) option(TI_WITH_BACKTRACE "Use backward-cpp to print out C++ stack trace upon failure" OFF) # wheel-tag: bt +option(TI_GENERATE_PDB "Generate Program Database (PDB) files (will make compilation uncacheable)" OFF) if(LINUX OR APPLE) if (NOT IOS) diff --git a/cmake/TaichiCAPITests.cmake b/cmake/TaichiCAPITests.cmake index de894cf131b54..ad743eec3cb52 100644 --- a/cmake/TaichiCAPITests.cmake +++ b/cmake/TaichiCAPITests.cmake @@ -22,7 +22,7 @@ if (WIN32) set_target_properties(${C_API_TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${C_API_TESTS_OUTPUT_DIR}) set_target_properties(${C_API_TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL ${C_API_TESTS_OUTPUT_DIR}) set_target_properties(${C_API_TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${C_API_TESTS_OUTPUT_DIR}) - if (MSVC) + if (MSVC AND TI_GENERATE_PDB) target_compile_options(${C_API_TESTS_NAME} PRIVATE "$<$:/Zi>") target_link_options(${C_API_TESTS_NAME} PRIVATE "$<$:/DEBUG>") target_link_options(${C_API_TESTS_NAME} PRIVATE "$<$:/OPT:REF>") diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake index a03e41d9fc59e..9a704cf557bc5 100644 --- a/cmake/TaichiCXXFlags.cmake +++ b/cmake/TaichiCXXFlags.cmake @@ -39,8 +39,10 @@ if (WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus /Zc:inline /std:c++17") # Linker & object related flags set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /bigobj") - # Debugging (generate PBD files) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /Zf") + # Debugging (generate PDB files) + if (TI_GENERATE_PDB) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /Zf") + endif() # Performance and optimizations set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Oi") # C4244: conversion from 'type1' to 'type2', possible loss of data diff --git a/cmake/TaichiTests.cmake b/cmake/TaichiTests.cmake index c3d59577d15a9..63d96c3a60c11 100644 --- a/cmake/TaichiTests.cmake +++ b/cmake/TaichiTests.cmake @@ -56,11 +56,11 @@ if (WIN32) set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${TESTS_OUTPUT_DIR}) set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL ${TESTS_OUTPUT_DIR}) set_target_properties(${TESTS_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${TESTS_OUTPUT_DIR}) - if (MSVC) - target_compile_options(${TESTS_NAME} PRIVATE "$<$:/Zi>") - target_link_options(${TESTS_NAME} PRIVATE "$<$:/DEBUG>") - target_link_options(${TESTS_NAME} PRIVATE "$<$:/OPT:REF>") - target_link_options(${TESTS_NAME} PRIVATE "$<$:/OPT:ICF>") + if (MSVC AND TI_GENERATE_PDB) + target_compile_options(${TESTS_NAME} PRIVATE "/Zi") + target_link_options(${TESTS_NAME} PRIVATE "/DEBUG") + target_link_options(${TESTS_NAME} PRIVATE "/OPT:REF") + target_link_options(${TESTS_NAME} PRIVATE "/OPT:ICF") endif() endif() target_link_libraries(${TESTS_NAME} PRIVATE taichi_core) From 4c7ec67b6c24113a3683815d433e8cee717133ad Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:26 +0800 Subject: [PATCH 06/18] [build] Use Ninja and MSVC to build on Windows ghstack-source-id: d2cc403815fe196fdf0770beb4a3bd2872727496 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7988 --- .../workflows/scripts/ti_build/compiler.py | 114 +++++++++++++----- .github/workflows/scripts/ti_build/tinysh.py | 2 + taichi/rhi/CMakeLists.txt | 5 + taichi/rhi/dummy.cpp | 0 4 files changed, 93 insertions(+), 28 deletions(-) create mode 100644 taichi/rhi/dummy.cpp diff --git a/.github/workflows/scripts/ti_build/compiler.py b/.github/workflows/scripts/ti_build/compiler.py index 5854705bc3f55..4c3b66dc15f60 100644 --- a/.github/workflows/scripts/ti_build/compiler.py +++ b/.github/workflows/scripts/ti_build/compiler.py @@ -3,14 +3,18 @@ # -- stdlib -- from pathlib import Path import os +import json import platform import shutil +import tempfile +import sys # -- third party -- # -- own -- from .cmake import cmake_args from .dep import download_dep from .misc import banner, error, get_cache_home, warn +from .tinysh import powershell # -- code -- @@ -56,36 +60,90 @@ def setup_clang(as_compiler=True) -> None: cmake_args["CMAKE_CXX_COMPILER"] = clangpp +ENV_EXTRACT_SCRIPT = """ +param ([string]$DevShell, [string]$VsPath, [string]$OutFile) +$WarningPreference = 'SilentlyContinue' +Import-Module $DevShell +Enter-VsDevShell -VsInstallPath $VsPath -SkipAutomaticLocation -DevCmdArguments "-arch=x64" +Get-ChildItem env:* | ConvertTo-Json -Depth 1 | Out-File $OutFile +""" + + +def _vs_devshell(vs): + dll = vs / "Common7" / "Tools" / "Microsoft.VisualStudio.DevShell.dll" + + if not dll.exists(): + error("Could not find Visual Studio DevShell") + return + + with tempfile.TemporaryDirectory() as tmp: + tmp = Path(tmp) + script = tmp / "extract.ps1" + with script.open("w") as f: + f.write(ENV_EXTRACT_SCRIPT) + outfile = tmp / "env.json" + powershell( + "-ExecutionPolicy", + "Bypass", + "-File", + str(script), + "-DevShell", + str(dll), + "-VsPath", + str(vs), + "-OutFile", + str(outfile), + ) + with outfile.open(encoding="utf-16") as f: + envs = json.load(f) + + for v in envs: + os.environ[v["Key"]] = v["Value"] + + @banner("Setup MSVC") def setup_msvc() -> None: assert platform.system() == "Windows" - os.environ["TAICHI_USE_MSBUILD"] = "1" - base = Path(r"C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools") - for edition in ("Enterprise", "Professional", "Community", "BuildTools"): - if (base / edition).exists(): - return + base = Path("C:\\Program Files (x86)\\Microsoft Visual Studio") + for ver in ("2022",): + for edition in ("Enterprise", "Professional", "Community", "BuildTools"): + vs = base / ver / edition + if not vs.exists(): + continue + + if os.environ.get("TI_CI") and not os.environ.get("TAICHI_USE_MSBUILD"): + # Use Ninja + MSVC in CI, for better caching + _vs_devshell(vs) + cmake_args["CMAKE_C_COMPILER"] = "cl.exe" + cmake_args["CMAKE_CXX_COMPILER"] = "cl.exe" + else: + os.environ["TAICHI_USE_MSBUILD"] = "1" - url = "https://aka.ms/vs/17/release/vs_BuildTools.exe" - out = base - download_dep( - url, - out, - elevate=True, - args=[ - "--passive", - "--wait", - "--norestart", - "--includeRecommended", - "--add", - "Microsoft.VisualStudio.Workload.VCTools", - # NOTE: We are using the custom built Clang++, - # so components below are not necessary anymore. - # '--add', - # 'Microsoft.VisualStudio.Component.VC.Llvm.Clang', - # '--add', - # 'Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Llvm.Clang', - # '--add', - # 'Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset', - ], - ) + return + else: + url = "https://aka.ms/vs/17/release/vs_BuildTools.exe" + out = base / "2022" / "BuildTools" + download_dep( + url, + out, + elevate=True, + args=[ + "--passive", + "--wait", + "--norestart", + "--includeRecommended", + "--add", + "Microsoft.VisualStudio.Workload.VCTools", + # NOTE: We are using the custom built Clang++, + # so components below are not necessary anymore. + # '--add', + # 'Microsoft.VisualStudio.Component.VC.Llvm.Clang', + # '--add', + # 'Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Llvm.Clang', + # '--add', + # 'Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset', + ], + ) + warn("Please restart build.py after Visual Studio Build Tools is installed.") + sys.exit(1) diff --git a/.github/workflows/scripts/ti_build/tinysh.py b/.github/workflows/scripts/ti_build/tinysh.py index 516136de6b52e..9a7e126095bad 100644 --- a/.github/workflows/scripts/ti_build/tinysh.py +++ b/.github/workflows/scripts/ti_build/tinysh.py @@ -241,3 +241,5 @@ def nice(): bash = sh.bash start = sh.start.bake("/wait") apt = sh.sudo.apt +powershell = Command("powershell.exe") +pwsh = Command("pwsh.exe") diff --git a/taichi/rhi/CMakeLists.txt b/taichi/rhi/CMakeLists.txt index 0e26f5579f500..4b88b7d974c0d 100644 --- a/taichi/rhi/CMakeLists.txt +++ b/taichi/rhi/CMakeLists.txt @@ -113,3 +113,8 @@ target_link_libraries(${TAICHI_DEVICE_API} PUBLIC common_rhi) # Generate shared library add_library(ti_device_api_shared SHARED public_device.h) target_link_libraries(ti_device_api_shared PUBLIC ${TAICHI_DEVICE_API}) + +# When building targets on Windows using Ninja + MSVC, the linker requires at least 1 object file +# to work properly, else link.exe would complain about LNK4001 warning, and fail afterwards. +# Adding a dummy file to workaround this. +target_sources(ti_device_api_shared PRIVATE dummy.cpp) diff --git a/taichi/rhi/dummy.cpp b/taichi/rhi/dummy.cpp new file mode 100644 index 0000000000000..e69de29bb2d1d From f86663b9686a772fb8965880861a6269304464fb Mon Sep 17 00:00:00 2001 From: Proton Date: Thu, 11 May 2023 18:47:32 +0800 Subject: [PATCH 07/18] [build] Guard Windows LTO with flags ghstack-source-id: 045bd93247b0448fe6c29b20b968aa83c59fb050 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7989 --- CMakeLists.txt | 1 + cmake/TaichiCXXFlags.cmake | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49311f433a7d4..b29755fbcad3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ option(USE_LLD "Use lld (from llvm) linker" OFF) option(USE_MOLD "Use mold (A Modern Linker)" OFF) option(TI_WITH_BACKTRACE "Use backward-cpp to print out C++ stack trace upon failure" OFF) # wheel-tag: bt option(TI_GENERATE_PDB "Generate Program Database (PDB) files (will make compilation uncacheable)" OFF) +option(TI_WITH_LTO "Enable Link Time Optimization (LTO) (affects Windows + MSVC for now)" OFF) # wheel-tag: lto if(LINUX OR APPLE) if (NOT IOS) diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake index 9a704cf557bc5..58b9a5a3ded3d 100644 --- a/cmake/TaichiCXXFlags.cmake +++ b/cmake/TaichiCXXFlags.cmake @@ -25,10 +25,14 @@ if (WIN32) set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -flto=thin") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -flto=thin") elseif (MSVC) - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} /GL /Gy") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /GL /Gy") - set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS} /LTCG") - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /LTCG") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} /Gy") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /Gy") + if (TI_WITH_LTO) + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} /GL") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /GL") + set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS} /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /LTCG") + endif() endif() endif() From 6edff3175e9cd852b18f5cd7e3e932cb79bd49fc Mon Sep 17 00:00:00 2001 From: Proton Date: Fri, 12 May 2023 20:50:28 +0800 Subject: [PATCH 08/18] [ci] Add dedicated build pipeline ghstack-source-id: 43522332b4d24e95fb3333727d5d49ac1a432b61 Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7990 --- .github/actionlint.yaml | 19 ++ .github/workflows/build.yaml | 395 +++++++++++++++++++++++++++++++ .github/workflows/initiator.yaml | 74 ++++++ 3 files changed, 488 insertions(+) create mode 100644 .github/actionlint.yaml create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/initiator.yaml diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000000000..36fa23577f27d --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,19 @@ +self-hosted-runner: + labels: + - cn + - cuda + - OpenGL + - vulkan + - m1 + - driver470 + - driver510 + - benchmark + - release + - build + - sm70 + - sm86 + - amdgpu + - online + +# config-variables: +# - ENVIRONMENT_STAGE diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000000000..5da34e3b05b9b --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,395 @@ +name: Build Taichi +on: + workflow_call: + inputs: + build_id: + required: true + type: string + nightly: + required: true + type: boolean + python: + required: true + type: string + secrets: + BOT_MINIO_ACCESS_KEY: + required: true + BOT_MINIO_SECRET_KEY: + required: true + workflow_dispatch: + inputs: + build_id: + description: 'The build id. e.g.: 20230427-102544-abcdefab' + required: true + type: string + nightly: + description: 'Are we building nightly wheels?' + required: true + default: false + type: boolean + python: + description: 'JSON encoded python versions need building wheel. e.g.: ["3.7"]' + required: true + type: string + +concurrency: + group: build-${{ github.event.number || github.run_id }} + cancel-in-progress: true + +env: + TI_CI: "1" + TI_SKIP_VERSION_CHECK: 'ON' + CI_IMAGE_VERSION: '202304251731' + TI_USE_GIT_CACHE: ${{ vars.TI_USE_GIT_CACHE }} + NIGHTLY: ${{ inputs.nightly && 'nightly' || '' }} + +jobs: + show_environ: + name: Show Environment Variables + # Disable this workflow on forks + if: github.repository_owner == 'taichi-dev' + runs-on: [self-hosted, Linux] + steps: + - name: Environment Variables + run: env + - name: Github Object + run: | + cat <<'EOF' + ${{ toJson(github) }} + EOF + + build_cpu_mac: + name: Build macOS-x86 + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + python: ${{ fromJSON(inputs.python) }} + _designated: [''] + include: + - _designated: '' + designated: designated + runs-on: + - self-hosted + - macos-10.15 + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_OPENGL:BOOL=OFF + -DTI_WITH_VULKAN:BOOL=ON + -DTI_WITH_C_API:BOOL=ON + -DTI_BUILD_TESTS:BOOL=ON + steps: + - name: Workaround checkout Needed single revision issue + run: git submodule foreach 'git rev-parse HEAD > /dev/null 2>&1 || rm -rf $PWD' || true + + - uses: actions/checkout@v3 + with: + fetch-depth: '0' + submodules: 'recursive' + + - name: Build + run: ./build.py ${NIGHTLY:+--nightly} --python=${{ matrix.python }} ${SHOULD_TAG_CONFIG:+--tag-config} + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + build_linux: + name: Build Linux + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + cuda: ['', cuda] + llvm: ['', llvm] + gl: ['', gl] + vk: ['', vk] + python: ${{ fromJSON(inputs.python) }} + include: + - {cuda: cuda, llvm: llvm, gl: gl, vk: vk, designated: designated} + exclude: + - {llvm: '', cuda: cuda} + runs-on: [self-hosted, online, Linux, build] + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_CUDA:BOOL=${{ matrix.cuda && 'ON' || 'OFF' }} + -DTI_WITH_LLVM:BOOL=${{ matrix.llvm && 'ON' || 'OFF' }} + -DTI_WITH_OPENGL:BOOL=${{ matrix.gl && 'ON' || 'OFF' }} + -DTI_WITH_VULKAN:BOOL=${{ matrix.vk && 'ON' || 'OFF' }} + -DTI_WITH_METAL:BOOL=OFF + -DTI_WITH_BACKTRACE:BOOL=ON + -DTI_BUILD_TESTS:BOOL=ON + + steps: + - name: Workaround checkout Needed single revision issue + run: git submodule foreach 'git rev-parse HEAD > /dev/null 2>&1 || rm -rf $PWD' || true + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: '0' + + - name: Build + run: | + . .github/workflows/scripts/common-utils.sh + ci-docker-run-gpu \ + -v $(pwd):/home/dev/taichi \ + registry.botmaster.tgr/taichi-build-cuda:${{ env.CI_IMAGE_VERSION }} \ + /home/dev/taichi/build.py ${NIGHTLY:+--nightly} --python=${{ matrix.python }} ${SHOULD_TAG_CONFIG:+--tag-config} + env: + SHOULD_TAG_CONFIG: ${{ !matrix.designated && 'yes' || '' }} + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + build_manylinux2014: + name: Build manylinux2014 + timeout-minutes: 30 + runs-on: [self-hosted, online, Linux, build] + strategy: + fail-fast: false + matrix: + python: ${{ fromJSON(inputs.python) }} + _designated: [''] + include: + - _designated: '' + designated: designated + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_OPENGL:BOOL=OFF + -DTI_WITH_VULKAN:BOOL=OFF + -DTI_BUILD_TESTS:BOOL=ON + + steps: + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: '0' + + - name: Build + run: | + . .github/workflows/scripts/common-utils.sh + + ci-docker-run-gpu \ + -v $(pwd):/home/dev/taichi \ + registry.botmaster.tgr/taichi-build-manylinux2014-cuda:${{ env.CI_IMAGE_VERSION }} \ + /home/dev/taichi/build.py ${NIGHTLY:+--nightly} --python=${{ matrix.python }} ${SHOULD_TAG_CONFIG:+--tag-config} + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + build_amdgpu_linux: + name: Build AMDGPU + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + python: ${{ fromJSON(inputs.python) }} + designated: [''] + runs-on: [self-hosted, online, Linux, build] + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_CUDA:BOOL=OFF + -DTI_WITH_VULKAN:BOOL=OFF + -DTI_WITH_OPENGL:BOOL=OFF + -DTI_BUILD_TESTS:BOOL=ON + -DTI_WITH_AMDGPU:BOOL=ON + + steps: + - name: Workaround checkout Needed single revision issue + run: git submodule foreach 'git rev-parse HEAD > /dev/null 2>&1 || rm -rf $PWD' || true + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: '0' + + - name: Build & Install + run: | + . .github/workflows/scripts/common-utils.sh + + ci-docker-run \ + -v $(pwd):/home/dev/taichi \ + registry.botmaster.tgr/taichi-build-amdgpu:${{ env.CI_IMAGE_VERSION }} \ + /home/dev/taichi/build.py ${NIGHTLY:+--nightly} --python=${{ matrix.python }} --tag-local=amd + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + build_windows: + name: Build Windows + strategy: + fail-fast: false + matrix: + cuda: ['', cuda] + llvm: ['', llvm] + gl: ['', gl] + vk: ['', vk] + python: ${{ fromJSON(inputs.python) }} + include: + - {cuda: cuda, llvm: llvm, gl: gl, vk: vk, lto: lto, pdb: pdb, designated: designated} + exclude: + - {llvm: '', cuda: cuda} + runs-on: [self-hosted, online, Windows, build] + timeout-minutes: 30 + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_CUDA:BOOL=${{ matrix.cuda && 'ON' || 'OFF' }} + -DTI_WITH_LLVM:BOOL=${{ matrix.llvm && 'ON' || 'OFF' }} + -DTI_WITH_OPENGL:BOOL=${{ matrix.gl && 'ON' || 'OFF' }} + -DTI_WITH_VULKAN:BOOL=${{ matrix.vk && 'ON' || 'OFF' }} + -DTI_WITH_METAL:BOOL=OFF + -DTI_WITH_BACKTRACE:BOOL=ON + -DTI_WITH_DX11:BOOL=ON + -DTI_WITH_DX12:BOOL=ON + -DTI_BUILD_TESTS:BOOL=ON + -DTI_WITH_C_API:BOOL=ON + -DTI_WITH_LTO:BOOL=${{ matrix.lto && 'ON' || 'OFF' }} + -DTI_GENERATE_PDB:BOOL=${{ matrix.pdb && 'ON' || 'OFF' }} + steps: + - name: Workaround checkout Needed single revision issue + shell: pwsh + run: | + $ErrorActionPreference = 'SilentlyContinue' + git config --system core.longpaths true + git submodule foreach --recursive 'git rev-parse HEAD || rm -rf $PWD' + $LASTEXITCODE = 0 + + - uses: actions/checkout@v3 + with: + fetch-depth: '0' + submodules: 'recursive' + + - uses: actions/setup-python@v4 + with: + # force a 3.7 is ok, build.py will handle actual python env + python-version: 3.7 + + - name: Build + shell: pwsh + run: | + $nightlyFlag = $null + if ($env:NIGHTLY) { $nightlyFlag = "--nightly" } + $tagFlag = $null + if ($env:SHOULD_TAG_CONFIG) { $tagFlag = "--tag-config" } + python build.py $nightlyFlag --python=${{ matrix.python }} $tagFlag + env: + SHOULD_TAG_CONFIG: ${{ !matrix.designated && 'yes' || '' }} + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + - name: Cleanup Git Cache Configs + shell: pwsh + if: always() + run: | + . .github/workflows/scripts/common-utils.ps1 + UnsetGitCachingProxy + exit 0 + + build_m1: + name: Build M1 + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + python: ${{ fromJSON(inputs.python) }} + _designated: [''] + exclude: + - python: "3.7" + include: + - _designated: '' + designated: designated + defaults: + run: + shell: '/usr/bin/arch -arch arm64e /bin/bash --noprofile --norc -eo pipefail {0}' + runs-on: [self-hosted, online, m1] + env: + TAICHI_CMAKE_ARGS: >- + -DTI_WITH_OPENGL:BOOL=OFF + -DTI_WITH_CUDA:BOOL=OFF + -DTI_WITH_VULKAN:BOOL=ON + -DTI_BUILD_TESTS:BOOL=ON + -DTI_WITH_C_API=ON + -DTI_WITH_STATIC_C_API=ON + PLATFORM: 'm1' + steps: + - name: Workaround checkout Needed single revision issue + run: git submodule foreach 'git rev-parse HEAD > /dev/null 2>&1 || rm -rf $PWD' || true + + - uses: actions/checkout@v3 + with: + fetch-depth: '0' + submodules: 'recursive' + + - name: Build + run: | + brew install molten-vk + ./build.py ${NIGHTLY:+--nightly} --python=${{ matrix.python }} + + - name: Upload Built Wheel + uses: shallwefootball/s3-upload-action@v1.3.3 + with: + aws_key_id: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + aws_secret_access_key: ${{ secrets.BOT_MINIO_SECRET_KEY }} + aws_bucket: built-wheels + source_dir: dist + destination_dir: built-wheels/${{ inputs.build_id }}/${{ !matrix.designated && 'matrix/' || '' }} + endpoint: http://botmaster.tgr:9000 + + build_ios_capi: + name: Build iOS C-API Static Library + timeout-minutes: 30 + runs-on: [self-hosted, online, m1] + steps: + - name: Workaround checkout Needed single revision issue + run: git submodule foreach 'git rev-parse HEAD > /dev/null 2>&1 || rm -rf $PWD' || true + + - uses: actions/checkout@v3 + with: + fetch-depth: '0' + submodules: 'recursive' + + - name: Build + run: .github/workflows/scripts/build.py ios + + - name: Save Compiled Static Library + uses: actions/upload-artifact@v3 + with: + name: libtaichi_c_api.iOS.a + path: 'dist/C-API-iOS/*.a' + retention-days: 7 diff --git a/.github/workflows/initiator.yaml b/.github/workflows/initiator.yaml new file mode 100644 index 0000000000000..8cd5cfd4ddf8a --- /dev/null +++ b/.github/workflows/initiator.yaml @@ -0,0 +1,74 @@ +name: Taichi Workflow Initiator +on: + push: + branches: + - master + - rc-* + +concurrency: + group: ${{ github.event.number || github.run_id }} + cancel-in-progress: true + +env: + TI_CI: "1" + TI_SKIP_VERSION_CHECK: 'ON' + TI_LITE_TEST: ${{ github.event_name == 'pull_request' && ! contains(github.event.pull_request.labels.*.name, 'full-ci') && ! startsWith(github.base_ref, 'rc-') && '1' || '' }} + TI_TEST_OFFLINE_CACHE: ${{ github.event.schedule == '0 18 * * *' && '1' || '' }} + CI_IMAGE_VERSION: '202304251731' + TI_USE_GIT_CACHE: ${{ vars.TI_USE_GIT_CACHE }} + REDIS_HOST: 172.16.5.1 + +jobs: + show_environ: + name: Show Environment Variables + # Disable this workflow on forks + if: github.repository_owner == 'taichi-dev' + runs-on: [self-hosted, Linux] + steps: + - name: Environment Variables + run: env + - name: Github Object + run: | + cat <<'EOF' + ${{ toJson(github) }} + EOF + + preparation: + name: Preparation + runs-on: [self-hosted, Linux] + outputs: + date: ${{ steps.gather.outputs.date }} + short_sha: ${{ steps.gather.outputs.short_sha }} + build_id: ${{ steps.gather.outputs.build_id }} + steps: + - name: Gather Information + id: gather + run: | + DATE=$(date +'%Y%m%d-%H%M%S') + SHORT_SHA=$(echo $GITHUB_SHA | cut -c '1-10') + echo date=$DATE >> $GITHUB_OUTPUT + echo short_sha=$SHORT_SHA >> $GITHUB_OUTPUT + echo build_id=$DATE-$SHORT_SHA >> $GITHUB_OUTPUT + + build: + name: Build Artifacts + needs: [preparation] + uses: ./.github/workflows/build.yaml + with: + build_id: ${{ needs.preparation.outputs.build_id }} + nightly: false + python: '["3.9", "3.10", "3.11"]' + secrets: + BOT_MINIO_ACCESS_KEY: ${{ secrets.BOT_MINIO_ACCESS_KEY }} + BOT_MINIO_SECRET_KEY: ${{ secrets.BOT_MINIO_SECRET_KEY }} + + save_build_metadata: + name: Save Build Metadata + runs-on: [self-hosted, Linux] + needs: [preparation, build] + steps: + - name: Saving Metadata + id: gather + run: >- + redis-cli -h $REDIS_HOST --raw + set "latest-build-id:$GITHUB_REPOSITORY:$GITHUB_REF" ${{ needs.preparation.outputs.build_id }} From 7f749506ccaf246c4652b6f6b9360fe31b93f736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A7=8B=E4=BA=91=E6=9C=AA=E4=BA=91?= Date: Sat, 13 May 2023 05:43:23 +0800 Subject: [PATCH 09/18] [Opengl] Fix: runtime caught error cannot be displayed in opengl (#7998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Brief Summary This PR fixes the issue that caught errors weren't logged out in opengl device. ### 🤖 Generated by Copilot at 9738324 Refactor and improve error handling of `create_pipeline` function in `opengl_device.cpp`. Catch and log different exceptions and return corresponding error codes. ### Walkthrough ### 🤖 Generated by Copilot at 9738324 * Improve error handling and reporting in `create_pipeline` function ([link](https://github.com/taichi-dev/taichi/pull/7998/files?diff=unified&w=0#diff-28721a9ee9ac35b296afebd149e19d760c079aac1be524a048e77c5fd8f51069L647-R658)) by catching and logging different types of exceptions and returning different error codes --- taichi/rhi/opengl/opengl_device.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/taichi/rhi/opengl/opengl_device.cpp b/taichi/rhi/opengl/opengl_device.cpp index e2e65e25f2a28..cae2caeb4d0ea 100644 --- a/taichi/rhi/opengl/opengl_device.cpp +++ b/taichi/rhi/opengl/opengl_device.cpp @@ -644,9 +644,18 @@ RhiResult GLDevice::create_pipeline(Pipeline **out_pipeline, PipelineCache *cache) noexcept { try { *out_pipeline = new GLPipeline(src, name); - } catch (std::bad_alloc &) { + } catch (std::bad_alloc &e) { *out_pipeline = nullptr; + RHI_LOG_ERROR(e.what()); return RhiResult::out_of_memory; + } catch (std::invalid_argument &e) { + *out_pipeline = nullptr; + RHI_LOG_ERROR(e.what()); + return RhiResult::invalid_usage; + } catch (std::runtime_error &e) { + *out_pipeline = nullptr; + RHI_LOG_ERROR(e.what()); + return RhiResult::error; } return RhiResult::success; } From 6e9fe2e6b34c6e69afb169cc75461b0b9dde67bc Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Sat, 13 May 2023 18:14:12 +0800 Subject: [PATCH 10/18] [Lang] Fix error with irpass::check_out_of_bound() for TensorTyped ExternalPtrStmt (#7997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: # ### Brief Summary ### 🤖 Generated by Copilot at fde5c3c Simplify and clean up the code for out-of-bound check for external tensors in `taichi/transforms/check_out_of_bound.cpp`. Remove the deprecated SOA layout branch and use consistent naming for dimensions and shapes. ### Walkthrough ### 🤖 Generated by Copilot at fde5c3c * Simplify code and remove deprecated SOA layout branch for ndarray in `check_out_of_bound` transform ([link](https://github.com/taichi-dev/taichi/pull/7997/files?diff=unified&w=0#diff-2d1e902d25643016ff6e05dc05dfdc05d1615face8181014196fe26796c35e0fL62-R77)) --- taichi/ir/frontend_ir.cpp | 2 +- taichi/ir/ir_builder.cpp | 2 +- taichi/ir/statements.cpp | 3 +++ taichi/ir/statements.h | 7 +++++++ taichi/transforms/auto_diff.cpp | 16 +++++++++------- taichi/transforms/check_out_of_bound.cpp | 19 ++++++++++++------- taichi/transforms/scalarize.cpp | 6 +++--- taichi/transforms/vectorize_half2.cpp | 3 ++- tests/python/test_ndarray.py | 14 ++++++++++++++ 9 files changed, 52 insertions(+), 20 deletions(-) diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index ae597e7199ebd..8b152dcb1fa4f 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -656,7 +656,7 @@ Stmt *make_ndarray_access(Expression::FlattenContext *ctx, auto var_stmt = flatten_lvalue(var, ctx); auto expr = var.cast(); auto external_ptr_stmt = std::make_unique( - var_stmt, index_stmts, expr->dt.get_shape(), expr->element_dim, + var_stmt, index_stmts, expr->dim, expr->dt.get_shape(), expr->element_dim, expr->is_grad); if (expr->dim == indices.size()) { // Indexing into an scalar element diff --git a/taichi/ir/ir_builder.cpp b/taichi/ir/ir_builder.cpp index 29df5319cf31e..27a2f70d4b4aa 100644 --- a/taichi/ir/ir_builder.cpp +++ b/taichi/ir/ir_builder.cpp @@ -441,7 +441,7 @@ ExternalPtrStmt *IRBuilder::create_external_ptr( const std::vector &indices, bool is_grad) { return insert(Stmt::make_typed( - ptr, indices, std::vector(), 0, is_grad)); + ptr, indices, indices.size(), std::vector(), 0, is_grad)); } AdStackAllocaStmt *IRBuilder::create_ad_stack(const DataType &dt, diff --git a/taichi/ir/statements.cpp b/taichi/ir/statements.cpp index 8234ddce9b868..1666a614d8e89 100644 --- a/taichi/ir/statements.cpp +++ b/taichi/ir/statements.cpp @@ -36,6 +36,7 @@ ExternalPtrStmt::ExternalPtrStmt(Stmt *base_ptr, const std::vector &indices, bool is_grad) : base_ptr(base_ptr), indices(indices), is_grad(is_grad) { + ndim = indices.size(); TI_ASSERT(base_ptr != nullptr); TI_ASSERT(base_ptr->is()); TI_STMT_REG_FIELDS; @@ -43,12 +44,14 @@ ExternalPtrStmt::ExternalPtrStmt(Stmt *base_ptr, ExternalPtrStmt::ExternalPtrStmt(Stmt *base_ptr, const std::vector &indices, + int ndim, const std::vector &element_shape, int element_dim, bool is_grad) : ExternalPtrStmt(base_ptr, indices, is_grad) { this->element_shape = element_shape; this->element_dim = element_dim; + this->ndim = ndim; } GlobalPtrStmt::GlobalPtrStmt(SNode *snode, diff --git a/taichi/ir/statements.h b/taichi/ir/statements.h index 04fe2175f1838..f3bfd718207e9 100644 --- a/taichi/ir/statements.h +++ b/taichi/ir/statements.h @@ -333,7 +333,13 @@ class AtomicOpStmt : public Stmt, class ExternalPtrStmt : public Stmt { public: Stmt *base_ptr; + std::vector indices; + + // Number of dimensions of external shape + int ndim; + + // Shape of element type std::vector element_shape; // AOS: element_dim < 0 // SOA: element_dim > 0 @@ -352,6 +358,7 @@ class ExternalPtrStmt : public Stmt { ExternalPtrStmt(Stmt *base_ptr, const std::vector &indices, + int ndim, const std::vector &element_shape, int element_dim, bool is_grad = false); diff --git a/taichi/transforms/auto_diff.cpp b/taichi/transforms/auto_diff.cpp index 67a4b08eb29e3..d15c90fb1f7fa 100644 --- a/taichi/transforms/auto_diff.cpp +++ b/taichi/transforms/auto_diff.cpp @@ -1518,9 +1518,10 @@ class MakeAdjoint : public ADTransform { "Cannot automatically differentiate through a grad " "tensor, if you really want to do that, pass the grad " "tensor into the kernel directly"); - auto adj_ptr = insert( - src->base_ptr, src->indices, src->element_shape, src->element_dim, - /*is_grad=*/true); + auto adj_ptr = + insert(src->base_ptr, src->indices, src->ndim, + src->element_shape, src->element_dim, + /*is_grad=*/true); adj_ptr->ret_type = src->ret_type; if (is_ptr_offset) { @@ -1592,9 +1593,10 @@ class MakeAdjoint : public ADTransform { "Cannot automatically differentiate through a grad " "tensor, if you really want to do that, pass the grad " "tensor into the kernel directly"); - adjoint_ptr = insert( - dest->base_ptr, dest->indices, dest->element_shape, dest->element_dim, - /*is_grad=*/true); + adjoint_ptr = + insert(dest->base_ptr, dest->indices, dest->ndim, + dest->element_shape, dest->element_dim, + /*is_grad=*/true); adjoint_ptr->ret_type = dest->ret_type; if (is_ptr_offset) { @@ -1659,7 +1661,7 @@ class MakeAdjoint : public ADTransform { "tensor, if you really want to do that, pass the grad " "tensor into the kernel directly"); auto adjoint_ptr = - insert(dest->base_ptr, dest->indices, + insert(dest->base_ptr, dest->indices, dest->ndim, dest->element_shape, dest->element_dim, /*is_grad=*/true); adjoint_ptr->ret_type = dest->ret_type; diff --git a/taichi/transforms/check_out_of_bound.cpp b/taichi/transforms/check_out_of_bound.cpp index 4bd5dd51e51af..79acfff34a754 100644 --- a/taichi/transforms/check_out_of_bound.cpp +++ b/taichi/transforms/check_out_of_bound.cpp @@ -59,17 +59,22 @@ class CheckOutOfBound : public BasicStmtVisitor { auto check_lower_bound = new_stmts.push_back( BinaryOpType::cmp_ge, stmt->indices[i], lower_bound); Stmt *upper_bound{nullptr}; - // TODO: Simplify logic here since SOA layout for ndarray is deprecated - if ((stmt->element_dim < 0 && i == (stmt->indices.size() - 1)) || - (stmt->element_dim > 0 && i == 0)) { - upper_bound = - new_stmts.push_back(TypedConstant(flattened_element)); - } else { - auto axis = stmt->element_dim <= 0 ? i : (i - stmt->element_dim); + + // SOA layout for ndarray is deprecated, assert it's AOS layout + TI_ASSERT(stmt->element_dim <= 0); + auto ndim = stmt->ndim; + if (i < ndim) { + // Check for External Shape + auto axis = i; upper_bound = new_stmts.push_back( /*axis=*/axis, /*arg_id=*/stmt->base_ptr->as()->arg_id); + } else { + // Check for Element Shape + upper_bound = + new_stmts.push_back(TypedConstant(flattened_element)); } + auto check_upper_bound = new_stmts.push_back( BinaryOpType::cmp_lt, stmt->indices[i], upper_bound); auto check_i = new_stmts.push_back( diff --git a/taichi/transforms/scalarize.cpp b/taichi/transforms/scalarize.cpp index 69073000b5329..25253f29b00d8 100644 --- a/taichi/transforms/scalarize.cpp +++ b/taichi/transforms/scalarize.cpp @@ -1119,9 +1119,9 @@ class MergeExternalAndMatrixPtr : public BasicStmtVisitor { std::accumulate(begin(origin->element_shape), end(origin->element_shape), 1, std::multiplies<>())}; - auto fused = std::make_unique(origin->base_ptr, indices, - element_shape, element_dim, - origin->is_grad); + auto fused = std::make_unique( + origin->base_ptr, indices, origin->ndim, element_shape, element_dim, + origin->is_grad); fused->ret_type = stmt->ret_type; // Note: Update base_ptr's ret_type so that it matches the ExternalPtrStmt // with flattened indices. Main goal is to keep all the hacks in a single diff --git a/taichi/transforms/vectorize_half2.cpp b/taichi/transforms/vectorize_half2.cpp index 3a9358937e1a6..d159092357b66 100644 --- a/taichi/transforms/vectorize_half2.cpp +++ b/taichi/transforms/vectorize_half2.cpp @@ -355,7 +355,8 @@ class Half2Vectorize : public BasicStmtVisitor { std::vector element_shape = {2}; int element_dim = -1; auto new_extern_stmt = std::make_unique( - self_ptr, new_indices, element_shape, element_dim); + self_ptr, new_indices, self_extern_stmt->ndim, element_shape, + element_dim); new_extern_stmt->overrided_dtype = true; new_extern_stmt->ret_type = tensor_type; new_extern_stmt->ret_type.set_is_pointer(true); diff --git a/tests/python/test_ndarray.py b/tests/python/test_ndarray.py index 476b9df320222..8f04ed2ad6ba8 100644 --- a/tests/python/test_ndarray.py +++ b/tests/python/test_ndarray.py @@ -6,6 +6,7 @@ from taichi.lang.exception import TaichiIndexError, TaichiTypeError from taichi.lang.misc import get_host_arch_list from taichi.lang.util import has_pytorch +from taichi.math import vec3, ivec3 import taichi as ti from tests import test_utils @@ -779,10 +780,23 @@ def test_matrix_ndarray_oob(): def access_arr(input: ti.types.ndarray(), p: ti.i32, q: ti.i32, x: ti.i32, y: ti.i32) -> ti.f32: return input[p, q][x, y] + @ti.kernel + def valid_access(indices: ti.types.ndarray(dtype=ivec3, ndim=1), dummy: ti.types.ndarray(dtype=ivec3, ndim=1)): + for i in indices: + index_vec = ti.Vector([0, 0, 0]) + for j in ti.static(range(3)): + index = indices[i][j] + index_vec[j] = index + dummy[i] = index_vec + input = ti.ndarray(dtype=ti.math.mat2, shape=(4, 5)) + indices = ti.ndarray(dtype=ivec3, shape=(10)) + dummy = ti.ndarray(dtype=ivec3, shape=(10)) + # Works access_arr(input, 2, 3, 0, 1) + valid_access(indices, dummy) # element_shape with pytest.raises(AssertionError, match=r"Out of bound access"): From 407d0768b4984d065a86f6baf915b962e6dd96b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A7=8B=E4=BA=91=E6=9C=AA=E4=BA=91?= Date: Mon, 15 May 2023 13:19:43 +0800 Subject: [PATCH 11/18] [Misc] Make clang-tidy happy on 'explicit' (#7999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Brief Summary This PR tries to resolve the problem during ` Build and Test / Check Static Analyzer (pull_request) `. This check takes too much time and prints out thousands of lines of 'explicit errors' like this: ``` error: single-argument constructors must be marked explicit to avoid unintentional implicit conversions [google-explicit-constructor,-warnings-as-errors] one_or_more(Container &&value) : var(std::move(value)) { ^ explicit ``` This PR also resolves `error: copy constructor should not be declared explicit` on `GLResourceSet`. ### Walkthrough ### 🤖 Generated by Copilot at cbf3891 * Suppress clang-tidy warning for single-argument constructors of `one_or_more` class by adding `// NOLINTNEXTLINE` comments ([link](https://github.com/taichi-dev/taichi/pull/7999/files?diff=unified&w=0#diff-6230f8e8c6a8a297f8900dd0f7e212097b07ee7b64fc7fa5a5ffee5af47211a8L14-R34), ). This class is defined in `taichi/common/one_or_more.h` and allows holding either a single value or a container of values. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- taichi/common/one_or_more.h | 6 ++++++ taichi/rhi/amdgpu/amdgpu_context.h | 2 +- taichi/rhi/llvm/device_memory_pool.h | 2 +- taichi/rhi/opengl/opengl_device.h | 2 +- taichi/rhi/vulkan/vulkan_device.h | 2 +- taichi/transforms/make_cpu_multithreaded_range_for.cpp | 3 ++- 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/taichi/common/one_or_more.h b/taichi/common/one_or_more.h index 472f0629b8ec8..bb7ad75a14d33 100644 --- a/taichi/common/one_or_more.h +++ b/taichi/common/one_or_more.h @@ -11,21 +11,27 @@ struct one_or_more { std::variant var; + // NOLINTNEXTLINE one_or_more(value_type const &value) : var(value) { } + // NOLINTNEXTLINE one_or_more(value_type &value) : var(value) { } + // NOLINTNEXTLINE one_or_more(value_type &&value) : var(std::move(value)) { } + // NOLINTNEXTLINE one_or_more(Container const &value) : var(value) { } + // NOLINTNEXTLINE one_or_more(Container &value) : var(value) { } + // NOLINTNEXTLINE one_or_more(Container &&value) : var(std::move(value)) { } diff --git a/taichi/rhi/amdgpu/amdgpu_context.h b/taichi/rhi/amdgpu/amdgpu_context.h index affef71aa2ddd..6688eb91a5667 100644 --- a/taichi/rhi/amdgpu/amdgpu_context.h +++ b/taichi/rhi/amdgpu/amdgpu_context.h @@ -94,7 +94,7 @@ class AMDGPUContext { void *new_ctx_; public: - ContextGuard(AMDGPUContext *new_ctx) + explicit ContextGuard(AMDGPUContext *new_ctx) : old_ctx_(nullptr), new_ctx_(new_ctx) { AMDGPUDriver::get_instance().context_get_current(&old_ctx_); if (old_ctx_ != new_ctx) diff --git a/taichi/rhi/llvm/device_memory_pool.h b/taichi/rhi/llvm/device_memory_pool.h index 0ccb5ae77f338..f5081defb2c57 100644 --- a/taichi/rhi/llvm/device_memory_pool.h +++ b/taichi/rhi/llvm/device_memory_pool.h @@ -24,7 +24,7 @@ class TI_DLL_EXPORT DeviceMemoryPool { void *allocate(std::size_t size, std::size_t alignment, bool managed = false); void release(std::size_t size, void *ptr, bool release_raw = false); void reset(); - DeviceMemoryPool(bool merge_upon_release); + explicit DeviceMemoryPool(bool merge_upon_release); ~DeviceMemoryPool(); protected: diff --git a/taichi/rhi/opengl/opengl_device.h b/taichi/rhi/opengl/opengl_device.h index 514ec98b13426..3c5008cee2199 100644 --- a/taichi/rhi/opengl/opengl_device.h +++ b/taichi/rhi/opengl/opengl_device.h @@ -29,7 +29,7 @@ extern void *kGetOpenglProcAddr; class GLResourceSet : public ShaderResourceSet { public: GLResourceSet() = default; - explicit GLResourceSet(const GLResourceSet &other) = default; + GLResourceSet(const GLResourceSet &other) = default; ~GLResourceSet() override; diff --git a/taichi/rhi/vulkan/vulkan_device.h b/taichi/rhi/vulkan/vulkan_device.h index 1e7a4fd8027e1..71b33ecdf6b6e 100644 --- a/taichi/rhi/vulkan/vulkan_device.h +++ b/taichi/rhi/vulkan/vulkan_device.h @@ -239,7 +239,7 @@ class VulkanResourceSet : public ShaderResourceSet { class VulkanRasterResources : public RasterResources { public: - VulkanRasterResources(VulkanDevice *device) : device_(device) { + explicit VulkanRasterResources(VulkanDevice *device) : device_(device) { } struct BufferBinding { diff --git a/taichi/transforms/make_cpu_multithreaded_range_for.cpp b/taichi/transforms/make_cpu_multithreaded_range_for.cpp index 8ada2a9c70c88..8528b0ba1685a 100644 --- a/taichi/transforms/make_cpu_multithreaded_range_for.cpp +++ b/taichi/transforms/make_cpu_multithreaded_range_for.cpp @@ -48,7 +48,8 @@ using TaskType = OffloadedStmt::TaskType; class MakeCPUMultithreadedRangeFor : public BasicStmtVisitor { public: - MakeCPUMultithreadedRangeFor(const CompileConfig &config) : config(config) { + explicit MakeCPUMultithreadedRangeFor(const CompileConfig &config) + : config(config) { } void visit(Block *block) override { From 54e952e3f15c144caec985e8989a99850fdfafeb Mon Sep 17 00:00:00 2001 From: lin-hitonami Date: Thu, 11 May 2023 17:47:02 +0800 Subject: [PATCH 12/18] [refactor] Let the type of reference arguments be a pointer ghstack-source-id: 2306463f766056172c4175cf4b3675de592a1f2b Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7982 --- taichi/ir/frontend_ir.cpp | 33 +++++++++++++++++++++----------- taichi/ir/frontend_ir.h | 2 ++ taichi/transforms/scalarize.cpp | 4 ++++ taichi/transforms/type_check.cpp | 7 +------ 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index 8b152dcb1fa4f..641c9077a2576 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -140,6 +140,9 @@ FrontendWhileStmt::FrontendWhileStmt(const FrontendWhileStmt &o) void ArgLoadExpression::type_check(const CompileConfig *) { ret_type = dt; + if (is_ptr) { + ret_type = TypeFactory::get_instance().get_pointer_type(ret_type, false); + } if (!create_load) { ret_type = TypeFactory::get_instance().get_pointer_type(ret_type, false); } @@ -960,7 +963,7 @@ void AtomicOpExpression::type_check(const CompileConfig *config) { }; // Broadcast val to dest if neccessary - auto val_dtype = val->ret_type; + auto val_dtype = get_rvalue_dtype(val); auto dest_dtype = dest->ret_type.ptr_removed(); if (dest_dtype->is() && val_dtype->is()) { error(); @@ -973,20 +976,18 @@ void AtomicOpExpression::type_check(const CompileConfig *config) { } // Validate dtype - auto dtype = val->ret_type; - if (dtype->is()) { - dtype = dtype.get_element_type(); + if (val_dtype->is()) { + val_dtype = val_dtype.get_element_type(); } - if (!dtype->is()) { + if (!val_dtype->is()) { error(); } - if (is_quant(dest->ret_type)) { - ret_type = dest->ret_type->get_compute_type(); - } else if (dest->ret_type->is() || - dest->ret_type->is()) { - ret_type = dest->ret_type; + if (is_quant(dest_dtype)) { + ret_type = dest_dtype->get_compute_type(); + } else if (dest_dtype->is() || dest_dtype->is()) { + ret_type = dest_dtype; } else { error(); } @@ -1271,7 +1272,7 @@ void MeshIndexConversionExpression::flatten(FlattenContext *ctx) { } void ReferenceExpression::type_check(const CompileConfig *) { - ret_type = var->ret_type; + ret_type = TypeFactory::get_instance().get_pointer_type(var->ret_type); } void ReferenceExpression::flatten(FlattenContext *ctx) { @@ -1796,4 +1797,14 @@ Stmt *flatten_rvalue(Expr ptr, Expression::FlattenContext *ctx) { return ptr_stmt; } +DataType get_rvalue_dtype(Expr expr) { + if (auto argload = expr.cast()) { + if (argload->is_ptr) { + return argload->ret_type.ptr_removed(); + } + return argload->ret_type; + } + return expr->ret_type; +} + } // namespace taichi::lang diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index d0b278a3de499..346e2140f9f02 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -1108,4 +1108,6 @@ Stmt *flatten_lvalue(Expr expr, Expression::FlattenContext *ctx); Stmt *flatten_rvalue(Expr expr, Expression::FlattenContext *ctx); +DataType get_rvalue_dtype(Expr expr); + } // namespace taichi::lang diff --git a/taichi/transforms/scalarize.cpp b/taichi/transforms/scalarize.cpp index 25253f29b00d8..ba426be5baa2c 100644 --- a/taichi/transforms/scalarize.cpp +++ b/taichi/transforms/scalarize.cpp @@ -608,10 +608,14 @@ class Scalarize : public BasicStmtVisitor { } void visit(ArgLoadStmt *stmt) override { + if (!stmt->ret_type.is_pointer()) { + return; + } if (stmt->ret_type.ptr_removed()->is()) { return; } auto ret_type = stmt->ret_type.ptr_removed().get_element_type(); + ret_type = TypeFactory::get_instance().get_pointer_type(ret_type); auto arg_load = std::make_unique( stmt->arg_id, ret_type, stmt->is_ptr, stmt->create_load); diff --git a/taichi/transforms/type_check.cpp b/taichi/transforms/type_check.cpp index 8d59faf8cc30e..e979f11d3b5e5 100644 --- a/taichi/transforms/type_check.cpp +++ b/taichi/transforms/type_check.cpp @@ -431,12 +431,7 @@ class TypeCheck : public IRVisitor { } void visit(ArgLoadStmt *stmt) override { - // TODO: Maybe have a type_inference() pass, which takes in the args/rets - // defined by the kernel. After that, type_check() pass will purely do - // verification, without modifying any types. - if (stmt->is_ptr) { - stmt->ret_type.set_is_pointer(true); - } + // Do nothing } void visit(ReturnStmt *stmt) override { From d77c798b06e30691ef8ef67f0c6152248c448235 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Mon, 15 May 2023 13:41:25 +0800 Subject: [PATCH 13/18] [Lang] Migrate irpass::scalarize() after irpass::demote_no_access_mesh_fors() (#7956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: # ### Brief Summary ### 🤖 Generated by Copilot at 8f62992 This pull request improves the performance and memory efficiency of matrix operations on global and external variables in Taichi kernels. It enhances several IR passes, such as `scalarize`, `die`, `DemoteAtomics`, and `cache_loop_invariant_global_vars`, to handle matrix operations better on different backends. It also fixes a bug in the `ExtractLocalPointers` pass that caused a crash on offloaded tasks. ### Walkthrough ### 🤖 Generated by Copilot at 8f62992 * Extend `CacheLoopInvariantGlobalVars` pass to support matrix operations on global and external variables ([link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-b811d49ff1b631b511a9e64d42aa77d96c85d8a1d55e2088e7dd5b4a1c3c6a2fL57-R70), [link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-b811d49ff1b631b511a9e64d42aa77d96c85d8a1d55e2088e7dd5b4a1c3c6a2fL72-R99)) * Move scalarization of matrix operations to `offload_to_executable` function, after caching some global variables as local variables ([link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-8fde186587db97b3bbc8a856e59bc4467b30257335b0fad064b4eebd521a912bL138-L145), [link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-8fde186587db97b3bbc8a856e59bc4467b30257335b0fad064b4eebd521a912bR205-R212)) * Extend `DemoteAtomics` pass to support matrix operations on global and external variables ([link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-5fe31716eccdda9061aa4d74f5ce21a276137f7e545014ed8d1ff09a1bfdee14L42-R56), [link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-5fe31716eccdda9061aa4d74f5ce21a276137f7e545014ed8d1ff09a1bfdee14L69-R102)) * Fix a bug in `ExtractLocalPointers` pass that caused a crash when applied to an offloaded task ([link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-97b0d9ab204b703802b3b5d04d036d30f66b34b726128216faf0d8a2a8564528L1052-R1057)) * Add empty lines for readability in `compile_to_offloads.cpp` and `demote_atomics.cpp` ([link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-8fde186587db97b3bbc8a856e59bc4467b30257335b0fad064b4eebd521a912bR185), [link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-5fe31716eccdda9061aa4d74f5ce21a276137f7e545014ed8d1ff09a1bfdee14R117), [link](https://github.com/taichi-dev/taichi/pull/7956/files?diff=unified&w=0#diff-5fe31716eccdda9061aa4d74f5ce21a276137f7e545014ed8d1ff09a1bfdee14R125)) --- .../cache_loop_invariant_global_vars.cpp | 32 +++++++++++++++++-- taichi/transforms/compile_to_offloads.cpp | 16 +++++----- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/taichi/transforms/cache_loop_invariant_global_vars.cpp b/taichi/transforms/cache_loop_invariant_global_vars.cpp index 3966bd4d9d181..3ba7cc886d3af 100644 --- a/taichi/transforms/cache_loop_invariant_global_vars.cpp +++ b/taichi/transforms/cache_loop_invariant_global_vars.cpp @@ -54,7 +54,20 @@ class CacheLoopInvariantGlobalVars : public LoopInvariantDetector { if (current_offloaded->task_type == OffloadedTaskType::serial) { return true; } - if (auto global_ptr = stmt->cast()) { + + // Handle GlobalPtrStmt + bool is_global_ptr_stmt = false; + GlobalPtrStmt *global_ptr = nullptr; + if (stmt->is()) { + is_global_ptr_stmt = true; + global_ptr = stmt->as(); + } else if (stmt->is() && + stmt->as()->origin->is()) { + is_global_ptr_stmt = true; + global_ptr = stmt->as()->origin->as(); + } + + if (global_ptr) { auto snode = global_ptr->snode; if (loop_unique_ptr_[snode] == nullptr || loop_unique_ptr_[snode]->indices.empty()) { @@ -69,8 +82,21 @@ class CacheLoopInvariantGlobalVars : public LoopInvariantDetector { return false; } return true; - } else if (stmt->is()) { - ExternalPtrStmt *dest_ptr = stmt->as(); + } + + // Handle ExternalPtrStmt + bool is_external_ptr_stmt = false; + ExternalPtrStmt *dest_ptr = nullptr; + if (stmt->is()) { + is_external_ptr_stmt = true; + dest_ptr = stmt->as(); + } else if (stmt->is() && + stmt->as()->origin->is()) { + is_external_ptr_stmt = true; + dest_ptr = stmt->as()->origin->as(); + } + + if (is_external_ptr_stmt) { if (dest_ptr->indices.empty()) { return false; } diff --git a/taichi/transforms/compile_to_offloads.cpp b/taichi/transforms/compile_to_offloads.cpp index 14fb925dd8414..f162f5975274f 100644 --- a/taichi/transforms/compile_to_offloads.cpp +++ b/taichi/transforms/compile_to_offloads.cpp @@ -183,14 +183,6 @@ void offload_to_executable(IRNode *ir, print("Atomics demoted I"); irpass::analysis::verify(ir); - if (config.real_matrix_scalarize) { - irpass::scalarize(ir); - - // Remove redundant MatrixInitStmt inserted during scalarization - irpass::full_simplify(ir, config, {false, /*autodiff_enabled*/ false}); - print("Scalarized"); - } - if (config.cache_loop_invariant_global_vars) { irpass::cache_loop_invariant_global_vars(ir, config); print("Cache loop-invariant global vars"); @@ -218,6 +210,14 @@ void offload_to_executable(IRNode *ir, irpass::analysis::verify(ir); } + if (config.real_matrix_scalarize) { + irpass::scalarize(ir); + + // Remove redundant MatrixInitStmt inserted during scalarization + irpass::full_simplify(ir, config, {false, /*autodiff_enabled*/ false}); + print("Scalarized"); + } + if (make_thread_local) { irpass::make_thread_local(ir, config); print("Make thread local"); From bf7998bc53eeb2a5e6fe1b942c8f40105ba2f321 Mon Sep 17 00:00:00 2001 From: PENGUINLIONG Date: Tue, 16 May 2023 10:24:57 +0800 Subject: [PATCH 14/18] [aot] Export aot kernels with decorator properly (#8016) --- python/taichi/aot/_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/taichi/aot/_export.py b/python/taichi/aot/_export.py index 0af32c5fcb938..37889a70fed91 100644 --- a/python/taichi/aot/_export.py +++ b/python/taichi/aot/_export.py @@ -23,4 +23,4 @@ def inner(f): def export(f): - export_as(f.__name__)(f) + return export_as(f.__name__)(f) From 1b84a2e8cc6f1f07012c1044a001466eb087a009 Mon Sep 17 00:00:00 2001 From: listerily Date: Fri, 12 May 2023 18:05:50 +0800 Subject: [PATCH 15/18] [lang] Added ti.u1 definition ghstack-source-id: ac52abfd5e136811d4a7f3b86d5b36362ed94f4f Pull Request resolved: https://github.com/taichi-dev/taichi/pull/7995 --- python/taichi/lang/util.py | 15 +++++++- python/taichi/types/primitive_types.py | 16 ++++++++- taichi/codegen/llvm/codegen_llvm.cpp | 3 ++ taichi/codegen/spirv/spirv_codegen.cpp | 3 ++ taichi/codegen/spirv/spirv_ir_builder.cpp | 10 ++++-- taichi/codegen/spirv/spirv_types.cpp | 12 ++++++- taichi/common/core.h | 2 ++ taichi/common/types.h | 2 ++ taichi/inc/data_type_with_c_type.inc.h | 3 +- taichi/ir/expr.cpp | 4 +++ taichi/ir/expr.h | 2 ++ taichi/ir/frontend_ir.h | 9 ++--- taichi/ir/type.cpp | 11 ++++++ taichi/ir/type.h | 7 ++++ taichi/ir/type_utils.cpp | 5 ++- taichi/ir/type_utils.h | 7 ++++ taichi/program/kernel.cpp | 2 ++ taichi/program/launch_context_builder.cpp | 2 ++ taichi/python/export_lang.cpp | 3 ++ taichi/runtime/gfx/runtime.cpp | 1 + .../runtime/llvm/runtime_module/runtime.cpp | 2 ++ tests/python/test_api.py | 2 ++ tests/python/test_pow.py | 36 ++++++++++--------- 23 files changed, 130 insertions(+), 29 deletions(-) diff --git a/python/taichi/lang/util.py b/python/taichi/lang/util.py index b4cb76b4a06a9..e604de64962ca 100644 --- a/python/taichi/lang/util.py +++ b/python/taichi/lang/util.py @@ -16,6 +16,7 @@ i16, i32, i64, + u1, u8, u16, u32, @@ -119,6 +120,8 @@ def to_numpy_type(dt): return np.int8 if dt == i16: return np.int16 + if dt == u1: + return np.bool_ if dt == u8: return np.uint8 if dt == u16: @@ -157,6 +160,8 @@ def to_pytorch_type(dt): return torch.int8 if dt == i16: return torch.int16 + if dt == u1: + return torch.bool if dt == u8: return torch.uint8 if dt == f16: @@ -190,6 +195,8 @@ def to_paddle_type(dt): return paddle.int8 if dt == i16: return paddle.int16 + if dt == u1: + return paddle.bool if dt == u8: return paddle.uint8 if dt == f16: @@ -224,6 +231,8 @@ def to_taichi_type(dt): return i8 if dt == np.int16: return i16 + if dt == np.bool_: + return u1 if dt == np.uint8: return u8 if dt == np.uint16: @@ -251,6 +260,8 @@ def to_taichi_type(dt): return i8 if dt == torch.int16: return i16 + if dt == torch.bool: + return u1 if dt == torch.uint8: return u8 if dt == torch.float16: @@ -273,6 +284,8 @@ def to_taichi_type(dt): return i8 if dt == paddle.int16: return i16 + if dt == paddle.bool: + return u1 if dt == paddle.uint8: return u8 if dt == paddle.float16: @@ -293,7 +306,7 @@ def cook_dtype(dtype): if dtype is int: return impl.get_runtime().default_ip if dtype is bool: - return i32 # TODO[Xiaoyan]: Use i1 in the future + return i32 # TODO(zhantong): Replace it with u1 raise ValueError(f"Invalid data type {dtype}") diff --git a/python/taichi/types/primitive_types.py b/python/taichi/types/primitive_types.py index d631067396086..aad85d6df51da 100644 --- a/python/taichi/types/primitive_types.py +++ b/python/taichi/types/primitive_types.py @@ -99,6 +99,18 @@ # ---------------------------------------- +uint1 = ti_python_core.DataType_u1 +"""1-bit unsigned integer data type. Same as booleans. +""" + +# ---------------------------------------- + +u1 = uint1 +"""Alias for :const:`~taichi.types.primitive_types.uint1` +""" + +# ---------------------------------------- + u8 = uint8 """Alias for :const:`~taichi.types.primitive_types.uint8` """ @@ -154,7 +166,7 @@ def ref(tp): real_types = [f16, f32, f64, float] real_type_ids = [id(t) for t in real_types] -integer_types = [i8, i16, i32, i64, u8, u16, u32, u64, int, bool] +integer_types = [i8, i16, i32, i64, u1, u8, u16, u32, u64, int, bool] integer_type_ids = [id(t) for t in integer_types] all_types = real_types + integer_types @@ -175,6 +187,8 @@ def ref(tp): "i32", "int64", "i64", + "uint1", + "u1", "uint8", "u8", "uint16", diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index 94a8dc459217b..2fefbeb247892 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -1015,6 +1015,9 @@ void TaskCodeGenLLVM::visit(ConstStmt *stmt) { } else if (val.dt->is_primitive(PrimitiveTypeID::f64)) { llvm_val[stmt] = llvm::ConstantFP::get(*llvm_context, llvm::APFloat(val.val_float64())); + } else if (val.dt->is_primitive(PrimitiveTypeID::u1)) { + llvm_val[stmt] = llvm::ConstantInt::get( + *llvm_context, llvm::APInt(1, (uint64)val.val_uint1(), false)); } else if (val.dt->is_primitive(PrimitiveTypeID::i8)) { llvm_val[stmt] = llvm::ConstantInt::get( *llvm_context, llvm::APInt(8, (uint64)val.val_int8(), true)); diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index 26c3cffd41447..38348e4c1d727 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -246,6 +246,9 @@ class TaskCodegen : public IRVisitor { } else if (dt->is_primitive(PrimitiveTypeID::i16)) { return ir_->int_immediate_number( stype, static_cast(const_val.val_i16), false); + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return ir_->uint_immediate_number( + stype, static_cast(const_val.val_u1), false); } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return ir_->uint_immediate_number( stype, static_cast(const_val.val_u8), false); diff --git a/taichi/codegen/spirv/spirv_ir_builder.cpp b/taichi/codegen/spirv/spirv_ir_builder.cpp index 2b48e2a3edbd4..65f5cf159ffc1 100644 --- a/taichi/codegen/spirv/spirv_ir_builder.cpp +++ b/taichi/codegen/spirv/spirv_ir_builder.cpp @@ -377,6 +377,8 @@ SType IRBuilder::get_primitive_uint_type(const DataType &dt) const { } else if (dt == PrimitiveType::i16 || dt == PrimitiveType::u16 || dt == PrimitiveType::f16) { return t_uint16_; + } else if (dt == PrimitiveType::u1) { + return t_bool_; } else { return t_uint8_; } @@ -392,6 +394,8 @@ DataType IRBuilder::get_taichi_uint_type(const DataType &dt) const { } else if (dt == PrimitiveType::i16 || dt == PrimitiveType::u16 || dt == PrimitiveType::f16) { return PrimitiveType::u16; + } else if (dt == PrimitiveType::u1) { + return PrimitiveType::u1; } else { return PrimitiveType::u8; } @@ -1090,10 +1094,10 @@ DEFINE_BUILDER_CMP_OP(ge, GreaterThanEqual); Value IRBuilder::_OpName(Value a, Value b) { \ TI_ASSERT(a.stype.id == b.stype.id); \ const auto &bool_type = t_bool_; /* TODO: Only scalar supported now */ \ - if (is_integral(a.stype.dt)) { \ - return make_value(spv::OpI##_Op, bool_type, a, b); \ - } else if (a.stype.id == bool_type.id) { \ + if (a.stype.id == bool_type.id) { \ return make_value(spv::OpLogical##_Op, bool_type, a, b); \ + } else if (is_integral(a.stype.dt)) { \ + return make_value(spv::OpI##_Op, bool_type, a, b); \ } else { \ TI_ASSERT(is_real(a.stype.dt)); \ return make_value(spv::OpFOrd##_Op, bool_type, a, b); \ diff --git a/taichi/codegen/spirv/spirv_types.cpp b/taichi/codegen/spirv/spirv_types.cpp index c47e26824a4f9..c6bff148c36e5 100644 --- a/taichi/codegen/spirv/spirv_types.cpp +++ b/taichi/codegen/spirv/spirv_types.cpp @@ -179,6 +179,14 @@ const tinyir::Type *translate_ti_primitive(tinyir::Block &ir_module, } else if (t == PrimitiveType::i64) { return ir_module.emplace_back(/*num_bits=*/64, /*is_signed=*/true); + } else if (t == PrimitiveType::u1) { + // Spir-v has no full support for boolean types, using boolean types in + // backend may cause issues. These issues arise when we use boolean as + // return type, argument type and inner dtype of compount types. Since + // boolean types has the same width with int32 in GLSL, we use int32 + // instead. + return ir_module.emplace_back(/*num_bits=*/32, + /*is_signed=*/false); } else if (t == PrimitiveType::u8) { return ir_module.emplace_back(/*num_bits=*/8, /*is_signed=*/false); @@ -395,7 +403,9 @@ class Translate2Spirv : public TypeVisitor { vt = spir_builder_->i64_type(); } } else { - if (type->num_bits() == 8) { + if (type->num_bits() == 1) { + vt = spir_builder_->bool_type(); + } else if (type->num_bits() == 8) { vt = spir_builder_->u8_type(); } else if (type->num_bits() == 16) { vt = spir_builder_->u16_type(); diff --git a/taichi/common/core.h b/taichi/common/core.h index 15c601c56a60b..f112f8f35baad 100644 --- a/taichi/common/core.h +++ b/taichi/common/core.h @@ -133,6 +133,8 @@ class CoreState { // Types //****************************************************************************** +using uint1 = bool; + using uchar = unsigned char; using int8 = int8_t; diff --git a/taichi/common/types.h b/taichi/common/types.h index d71d75e6eff1a..b4728a6daa22d 100644 --- a/taichi/common/types.h +++ b/taichi/common/types.h @@ -4,6 +4,8 @@ namespace taichi { +using uint1 = bool; + using uchar = unsigned char; using int8 = int8_t; diff --git a/taichi/inc/data_type_with_c_type.inc.h b/taichi/inc/data_type_with_c_type.inc.h index 2b12f83cbbd98..5d4963ae90b5d 100644 --- a/taichi/inc/data_type_with_c_type.inc.h +++ b/taichi/inc/data_type_with_c_type.inc.h @@ -1,10 +1,11 @@ -// Doesn't contain f16 and u1. +// Doesn't contain f16. PER_C_TYPE(f32, float32) PER_C_TYPE(f64, float64) PER_C_TYPE(i8, int8) PER_C_TYPE(i16, int16) PER_C_TYPE(i32, int32) PER_C_TYPE(i64, int64) +PER_C_TYPE(u1, uint1) PER_C_TYPE(u8, uint8) PER_C_TYPE(u16, uint16) PER_C_TYPE(u32, uint32) diff --git a/taichi/ir/expr.cpp b/taichi/ir/expr.cpp index f8ef27bbdfc18..0c395dddcda48 100644 --- a/taichi/ir/expr.cpp +++ b/taichi/ir/expr.cpp @@ -49,6 +49,10 @@ void Expr::set_adjoint_checkbit(const Expr &o) { this->cast()->adjoint_checkbit.set(o); } +Expr::Expr(uint1 x) : Expr() { + expr = std::make_shared(PrimitiveType::u1, x); +} + Expr::Expr(int16 x) : Expr() { expr = std::make_shared(PrimitiveType::i16, x); } diff --git a/taichi/ir/expr.h b/taichi/ir/expr.h index 9b59dc036ae47..571d7d136f4fc 100644 --- a/taichi/ir/expr.h +++ b/taichi/ir/expr.h @@ -23,6 +23,8 @@ class Expr { atomic = false; } + explicit Expr(uint1 x); + explicit Expr(int16 x); explicit Expr(int32 x); diff --git a/taichi/ir/frontend_ir.h b/taichi/ir/frontend_ir.h index 346e2140f9f02..1c4a6bb3448fa 100644 --- a/taichi/ir/frontend_ir.h +++ b/taichi/ir/frontend_ir.h @@ -102,10 +102,11 @@ class FrontendSNodeOpStmt : public Stmt { ExprGroup indices; Expr val; - FrontendSNodeOpStmt(SNodeOpType op_type, - SNode *snode, - const ExprGroup &indices, - const Expr &val = Expr(nullptr)); + FrontendSNodeOpStmt( + SNodeOpType op_type, + SNode *snode, + const ExprGroup &indices, + const Expr &val = Expr(std::shared_ptr(nullptr))); TI_DEFINE_ACCEPT TI_DEFINE_CLONE_FOR_FRONTEND_IR diff --git a/taichi/ir/type.cpp b/taichi/ir/type.cpp index eac8111ed3b7a..fb076e2448716 100644 --- a/taichi/ir/type.cpp +++ b/taichi/ir/type.cpp @@ -359,6 +359,8 @@ std::string TypedConstant::stringify() const { return fmt::format("{}", val_i8); } else if (dt->is_primitive(PrimitiveTypeID::i16)) { return fmt::format("{}", val_i16); + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return fmt::format("{}", val_u1); } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return fmt::format("{}", val_u8); } else if (dt->is_primitive(PrimitiveTypeID::u16)) { @@ -391,6 +393,8 @@ bool TypedConstant::equal_type_and_value(const TypedConstant &o) const { return val_i8 == o.val_i8; } else if (dt->is_primitive(PrimitiveTypeID::i16)) { return val_i16 == o.val_i16; + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return val_u1 == o.val_u1; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return val_u8 == o.val_u8; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { @@ -440,6 +444,11 @@ int16 &TypedConstant::val_int16() { return val_i16; } +uint1 &TypedConstant::val_uint1() { + TI_ASSERT(get_data_type() == dt); + return val_u1; +} + uint8 &TypedConstant::val_uint8() { TI_ASSERT(get_data_type() == dt); return val_u8; @@ -483,6 +492,8 @@ uint64 TypedConstant::val_uint() const { return val_u64; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return val_u8; + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return val_u1; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { return val_u16; } else { diff --git a/taichi/ir/type.h b/taichi/ir/type.h index e7adb08008190..f398bc36e8d3f 100644 --- a/taichi/ir/type.h +++ b/taichi/ir/type.h @@ -529,6 +529,7 @@ class TypedConstant { float64 val_f64; int8 val_i8; int16 val_i16; + uint1 val_u1; uint8 val_u8; uint16 val_u16; uint32 val_u32; @@ -564,6 +565,9 @@ class TypedConstant { explicit TypedConstant(int16 x) : dt(PrimitiveType::i16), val_i16(x) { } + explicit TypedConstant(uint1 x) : dt(PrimitiveType::u1), val_u1(x) { + } + explicit TypedConstant(uint8 x) : dt(PrimitiveType::u8), val_u8(x) { } @@ -594,6 +598,8 @@ class TypedConstant { val_i8 = value; } else if (dt->is_primitive(PrimitiveTypeID::i16)) { val_i16 = value; + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + val_u1 = value; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { val_u8 = value; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { @@ -627,6 +633,7 @@ class TypedConstant { float64 &val_float64(); int8 &val_int8(); int16 &val_int16(); + uint1 &val_uint1(); uint8 &val_uint8(); uint16 &val_uint16(); uint32 &val_uint32(); diff --git a/taichi/ir/type_utils.cpp b/taichi/ir/type_utils.cpp index 2fdb816d518d4..2ad7ceacd2151 100644 --- a/taichi/ir/type_utils.cpp +++ b/taichi/ir/type_utils.cpp @@ -57,6 +57,7 @@ int data_type_size(DataType t) { REGISTER_DATA_TYPE(i16, int16); REGISTER_DATA_TYPE(i32, int32); REGISTER_DATA_TYPE(i64, int64); + REGISTER_DATA_TYPE(u1, uint1); REGISTER_DATA_TYPE(u8, uint8); REGISTER_DATA_TYPE(u16, uint16); REGISTER_DATA_TYPE(u32, uint32); @@ -99,7 +100,9 @@ std::string tensor_type_format(DataType t, Arch arch) { } std::string data_type_format(DataType dt, Arch arch) { - if (dt->is_primitive(PrimitiveTypeID::i8)) { + if (dt->is_primitive(PrimitiveTypeID::u1)) { + return "%d"; + } else if (dt->is_primitive(PrimitiveTypeID::i8)) { // i8/u8 is converted to i16/u16 before printing, because CUDA doesn't // support the "%hhd"/"%hhu" specifiers. return "%hd"; diff --git a/taichi/ir/type_utils.h b/taichi/ir/type_utils.h index b644447094410..bb8c687170d6c 100644 --- a/taichi/ir/type_utils.h +++ b/taichi/ir/type_utils.h @@ -38,6 +38,8 @@ inline DataType get_data_type() { return PrimitiveType::i32; } else if (std::is_same()) { return PrimitiveType::i64; + } else if (std::is_same()) { + return PrimitiveType::u1; } else if (std::is_same()) { return PrimitiveType::u8; } else if (std::is_same()) { @@ -101,6 +103,7 @@ inline bool is_integral(DataType dt) { dt->is_primitive(PrimitiveTypeID::i16) || dt->is_primitive(PrimitiveTypeID::i32) || dt->is_primitive(PrimitiveTypeID::i64) || + dt->is_primitive(PrimitiveTypeID::u1) || dt->is_primitive(PrimitiveTypeID::u8) || dt->is_primitive(PrimitiveTypeID::u16) || dt->is_primitive(PrimitiveTypeID::u32) || @@ -146,6 +149,8 @@ inline TypedConstant get_max_value(DataType dt) { return {dt, std::numeric_limits::max()}; } else if (dt->is_primitive(PrimitiveTypeID::i64)) { return {dt, std::numeric_limits::max()}; + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return {dt, std::numeric_limits::max()}; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return {dt, std::numeric_limits::max()}; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { @@ -172,6 +177,8 @@ inline TypedConstant get_min_value(DataType dt) { return {dt, std::numeric_limits::lowest()}; } else if (dt->is_primitive(PrimitiveTypeID::i64)) { return {dt, std::numeric_limits::lowest()}; + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return {dt, std::numeric_limits::lowest()}; } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return {dt, std::numeric_limits::lowest()}; } else if (dt->is_primitive(PrimitiveTypeID::u16)) { diff --git a/taichi/program/kernel.cpp b/taichi/program/kernel.cpp index 7c1a39c246804..869f824a55ad4 100644 --- a/taichi/program/kernel.cpp +++ b/taichi/program/kernel.cpp @@ -73,6 +73,8 @@ T Kernel::fetch_ret(DataType dt, int i) { return (T)program->fetch_result(i); } else if (dt->is_primitive(PrimitiveTypeID::i16)) { return (T)program->fetch_result(i); + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return (T)program->fetch_result(i); } else if (dt->is_primitive(PrimitiveTypeID::u8)) { return (T)program->fetch_result(i); } else if (dt->is_primitive(PrimitiveTypeID::u16)) { diff --git a/taichi/program/launch_context_builder.cpp b/taichi/program/launch_context_builder.cpp index 1458ae81d6203..cc9c14df5a729 100644 --- a/taichi/program/launch_context_builder.cpp +++ b/taichi/program/launch_context_builder.cpp @@ -104,6 +104,8 @@ void LaunchContextBuilder::set_arg_int(int arg_id, int64 d) { set_arg(arg_id, (int8)d); } else if (dt->is_primitive(PrimitiveTypeID::i16)) { set_arg(arg_id, (int16)d); + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + set_arg(arg_id, (uint1)d); } else if (dt->is_primitive(PrimitiveTypeID::u8)) { set_arg(arg_id, (uint8)d); } else if (dt->is_primitive(PrimitiveTypeID::u16)) { diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 425e2ad69af73..9e4d11ac12c47 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -943,6 +943,9 @@ void export_lang(py::module &m) { m.def("make_rand_expr", Expr::make); + m.def("make_const_expr_bool", + Expr::make); + m.def("make_const_expr_int", Expr::make); diff --git a/taichi/runtime/gfx/runtime.cpp b/taichi/runtime/gfx/runtime.cpp index c49a912226a36..be9b89eeb9e15 100644 --- a/taichi/runtime/gfx/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -173,6 +173,7 @@ class HostDeviceContextBlitter { for (int j = 0; j < num; ++j) { // (penguinliong) Again, it's the module loader's responsibility to // check the data type availability. + TO_HOST(u1, uint1, j) TO_HOST(i8, int8, j) TO_HOST(u8, uint8, j) TO_HOST(i16, int16, j) diff --git a/taichi/runtime/llvm/runtime_module/runtime.cpp b/taichi/runtime/llvm/runtime_module/runtime.cpp index e13fe2de455f1..3a71575ae4243 100644 --- a/taichi/runtime/llvm/runtime_module/runtime.cpp +++ b/taichi/runtime/llvm/runtime_module/runtime.cpp @@ -85,6 +85,7 @@ using int8 = int8_t; using int16 = int16_t; using int32 = int32_t; using int64 = int64_t; +using uint1 = bool; using uint8 = uint8_t; using uint16 = uint16_t; using uint32 = uint32_t; @@ -96,6 +97,7 @@ using i8 = int8; using i16 = int16; using i32 = int32; using i64 = int64; +using u1 = uint1; using u8 = uint8; using u16 = uint16; using u32 = uint32; diff --git a/tests/python/test_api.py b/tests/python/test_api.py index 2710a27ddebc9..7e8bddaf5614a 100644 --- a/tests/python/test_api.py +++ b/tests/python/test_api.py @@ -224,11 +224,13 @@ def _get_expected_matrix_apis(): "template", "tools", "types", + "u1", "u16", "u32", "u64", "u8", "ui", + "uint1", "uint16", "uint32", "uint64", diff --git a/tests/python/test_pow.py b/tests/python/test_pow.py index 2679520134310..86fd59575827c 100644 --- a/tests/python/test_pow.py +++ b/tests/python/test_pow.py @@ -62,23 +62,25 @@ def foo(x: dt, y: ti.template()): foo(10, -10) -@test_utils.test( - debug=True, - advanced_optimization=False, - exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], -) -def test_ipow_negative_exp_i32(): - _ipow_negative_exp(ti.i32) - - -@test_utils.test( - debug=True, - advanced_optimization=False, - require=ti.extension.data64, - exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], -) -def test_ipow_negative_exp_i64(): - _ipow_negative_exp(ti.i64) +# FIXME(zhantong): Uncomment this test after bool assertion is finished. +# @test_utils.test( +# debug=True, +# advanced_optimization=False, +# exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], +# ) +# def test_ipow_negative_exp_i32(): +# _ipow_negative_exp(ti.i32) + + +# FIXME(zhantong): Uncomment this test after bool assertion is finished. +# @test_utils.test( +# debug=True, +# advanced_optimization=False, +# require=ti.extension.data64, +# exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], +# ) +# def test_ipow_negative_exp_i64(): +# _ipow_negative_exp(ti.i64) def _test_pow_int_base_int_exp(dt_base, dt_exp): From c48c6e95759783c7699843c61a4fd3af18e411eb Mon Sep 17 00:00:00 2001 From: listerily Date: Mon, 15 May 2023 17:44:17 +0800 Subject: [PATCH 16/18] [ir] Update codegen for `if` `while` `assert` to support type u1. ghstack-source-id: e83b6e73f1f9ea90e913bd4859d9d962d6782a6c Pull Request resolved: https://github.com/taichi-dev/taichi/pull/8003 --- taichi/codegen/cuda/codegen_cuda.cpp | 4 +++ taichi/codegen/llvm/codegen_llvm.cpp | 35 +++++++++++------- taichi/codegen/spirv/spirv_codegen.cpp | 10 +++--- taichi/runtime/llvm/llvm_context.cpp | 6 +++- .../runtime/llvm/runtime_module/runtime.cpp | 12 +++---- tests/python/test_pow.py | 36 +++++++++---------- 6 files changed, 60 insertions(+), 43 deletions(-) diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp index bacad80ee53d8..46bbf331e4c5d 100644 --- a/taichi/codegen/cuda/codegen_cuda.cpp +++ b/taichi/codegen/cuda/codegen_cuda.cpp @@ -94,6 +94,10 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM { value_type = tlctx->get_data_type(PrimitiveType::u16); value = builder->CreateZExt(value, value_type); } + if (dt->is_primitive(PrimitiveTypeID::u1)) { + value_type = tlctx->get_data_type(PrimitiveType::i32); + value = builder->CreateZExt(value, value_type); + } return std::make_tuple(value, value_type); } diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index 2fefbeb247892..bb249983483c4 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -197,7 +197,6 @@ void TaskCodeGenLLVM::emit_extra_unary(UnaryOpStmt *stmt) { UNARY_STD(tan) UNARY_STD(tanh) UNARY_STD(sgn) - UNARY_STD(logic_not) UNARY_STD(acos) UNARY_STD(asin) UNARY_STD(cos) @@ -524,6 +523,11 @@ void TaskCodeGenLLVM::visit(UnaryOpStmt *stmt) { } else { llvm_val[stmt] = builder->CreateNeg(input, "neg"); } + } else if (op == UnaryOpType::logic_not) { + llvm_val[stmt] = builder->CreateIsNull(input); + // TODO: (zhantong) remove this zero ext + llvm_val[stmt] = builder->CreateZExt( + llvm_val[stmt], tlctx->get_data_type(PrimitiveType::i32)); } UNARY_INTRINSIC(round) UNARY_INTRINSIC(floor) @@ -618,6 +622,12 @@ void TaskCodeGenLLVM::visit(BinaryOpStmt *stmt) { } else if (op == BinaryOpType::mod) { llvm_val[stmt] = builder->CreateSRem(llvm_val[stmt->lhs], llvm_val[stmt->rhs]); + } else if (op == BinaryOpType::logical_and) { + llvm_val[stmt] = + builder->CreateAnd(llvm_val[stmt->lhs], llvm_val[stmt->rhs]); + } else if (op == BinaryOpType::logical_or) { + llvm_val[stmt] = + builder->CreateOr(llvm_val[stmt->lhs], llvm_val[stmt->rhs]); } else if (op == BinaryOpType::bit_and) { llvm_val[stmt] = builder->CreateAnd(llvm_val[stmt->lhs], llvm_val[stmt->rhs]); @@ -851,10 +861,9 @@ void TaskCodeGenLLVM::visit(BinaryOpStmt *stmt) { void TaskCodeGenLLVM::visit(TernaryOpStmt *stmt) { TI_ASSERT(stmt->op_type == TernaryOpType::select); - llvm_val[stmt] = builder->CreateSelect( - builder->CreateTrunc(llvm_val[stmt->op1], - tlctx->get_data_type(PrimitiveType::u1)), - llvm_val[stmt->op2], llvm_val[stmt->op3]); + llvm_val[stmt] = + builder->CreateSelect(builder->CreateIsNotNull(llvm_val[stmt->op1]), + llvm_val[stmt->op2], llvm_val[stmt->op3]); } void TaskCodeGenLLVM::visit(IfStmt *if_stmt) { @@ -865,9 +874,8 @@ void TaskCodeGenLLVM::visit(IfStmt *if_stmt) { llvm::BasicBlock::Create(*llvm_context, "false_block", func); llvm::BasicBlock *after_if = llvm::BasicBlock::Create(*llvm_context, "after_if", func); - builder->CreateCondBr( - builder->CreateICmpNE(llvm_val[if_stmt->cond], tlctx->get_constant(0)), - true_block, false_block); + llvm::Value *cond = builder->CreateIsNotNull(llvm_val[if_stmt->cond]); + builder->CreateCondBr(cond, true_block, false_block); builder->SetInsertPoint(true_block); if (if_stmt->true_statements) { if_stmt->true_statements->accept(this); @@ -959,6 +967,9 @@ void TaskCodeGenLLVM::visit(PrintStmt *stmt) { if (dtype->is_primitive(PrimitiveTypeID::u8)) return builder->CreateZExt(to_print, tlctx->get_data_type(PrimitiveType::u16)); + if (dtype->is_primitive(PrimitiveTypeID::u1)) + return builder->CreateZExt(to_print, + tlctx->get_data_type(PrimitiveType::i32)); return to_print; }; for (auto i = 0; i < stmt->contents.size(); ++i) { @@ -1054,8 +1065,7 @@ void TaskCodeGenLLVM::visit(WhileControlStmt *stmt) { BasicBlock *after_break = BasicBlock::Create(*llvm_context, "after_break", func); TI_ASSERT(current_while_after_loop); - auto cond = - builder->CreateICmpEQ(llvm_val[stmt->cond], tlctx->get_constant(0)); + auto *cond = builder->CreateIsNull(llvm_val[stmt->cond]); builder->CreateCondBr(cond, current_while_after_loop, after_break); builder->SetInsertPoint(after_break); } @@ -1309,7 +1319,7 @@ void TaskCodeGenLLVM::visit(AssertStmt *stmt) { std::vector args; args.emplace_back(get_runtime()); - args.emplace_back(llvm_val[stmt->cond]); + args.emplace_back(builder->CreateIsNotNull(llvm_val[stmt->cond])); args.emplace_back(builder->CreateGlobalStringPtr(stmt->text)); for (int i = 0; i < stmt->args.size(); i++) { @@ -2220,8 +2230,7 @@ void TaskCodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt) { // test whether the current voxel is active or not auto is_active = call(leaf_block, element.get("element"), "is_active", {builder->CreateLoad(loop_index_ty, loop_index)}); - is_active = - builder->CreateTrunc(is_active, llvm::Type::getInt1Ty(*llvm_context)); + is_active = builder->CreateIsNotNull(is_active); exec_cond = builder->CreateAnd(exec_cond, is_active); } diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index 38348e4c1d727..f03e0fe0feddb 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -1652,9 +1652,10 @@ class TaskCodegen : public IRVisitor { } void visit(IfStmt *if_stmt) override { - spirv::Value cond_v = ir_->query_value(if_stmt->cond->raw_name()); + spirv::Value cond_v = ir_->cast( + ir_->bool_type(), ir_->query_value(if_stmt->cond->raw_name())); spirv::Value cond = - ir_->ne(cond_v, ir_->cast(cond_v.stype, ir_->const_i32_zero_)); + ir_->ne(cond_v, ir_->cast(ir_->bool_type(), ir_->const_i32_zero_)); spirv::Label then_label = ir_->new_label(); spirv::Label merge_label = ir_->new_label(); spirv::Label else_label = ir_->new_label(); @@ -1776,9 +1777,10 @@ class TaskCodegen : public IRVisitor { } void visit(WhileControlStmt *stmt) override { - spirv::Value cond_v = ir_->query_value(stmt->cond->raw_name()); + spirv::Value cond_v = + ir_->cast(ir_->bool_type(), ir_->query_value(stmt->cond->raw_name())); spirv::Value cond = - ir_->eq(cond_v, ir_->cast(cond_v.stype, ir_->const_i32_zero_)); + ir_->eq(cond_v, ir_->cast(ir_->bool_type(), ir_->const_i32_zero_)); spirv::Label then_label = ir_->new_label(); spirv::Label merge_label = ir_->new_label(); diff --git a/taichi/runtime/llvm/llvm_context.cpp b/taichi/runtime/llvm/llvm_context.cpp index cef39f793b7e4..3156e0c72ccb6 100644 --- a/taichi/runtime/llvm/llvm_context.cpp +++ b/taichi/runtime/llvm/llvm_context.cpp @@ -692,6 +692,9 @@ llvm::Value *TaichiLLVMContext::get_constant(DataType dt, T t) { return llvm::ConstantFP::get(llvm::Type::getHalfTy(*ctx), (float32)t); } else if (dt->is_primitive(PrimitiveTypeID::f64)) { return llvm::ConstantFP::get(*ctx, llvm::APFloat((float64)t)); + } else if (dt->is_primitive(PrimitiveTypeID::u1)) { + return t ? llvm::ConstantInt::getTrue(*ctx) + : llvm::ConstantInt::getFalse(*ctx); } else if (is_integral(dt)) { if (is_signed(dt)) { return llvm::ConstantInt::get( @@ -721,7 +724,8 @@ llvm::Value *TaichiLLVMContext::get_constant(T t) { std::is_same_v) { return llvm::ConstantFP::get(*ctx, llvm::APFloat(t)); } else if (std::is_same_v) { - return llvm::ConstantInt::get(*ctx, llvm::APInt(1, (uint64)t, true)); + return t ? llvm::ConstantInt::getTrue(*ctx) + : llvm::ConstantInt::getFalse(*ctx); } else if (std::is_same_v || std::is_same_v) { return llvm::ConstantInt::get(*ctx, llvm::APInt(32, (uint64)t, true)); diff --git a/taichi/runtime/llvm/runtime_module/runtime.cpp b/taichi/runtime/llvm/runtime_module/runtime.cpp index 3a71575ae4243..460beb145e7e7 100644 --- a/taichi/runtime/llvm/runtime_module/runtime.cpp +++ b/taichi/runtime/llvm/runtime_module/runtime.cpp @@ -332,9 +332,9 @@ struct LLVMRuntime; constexpr bool enable_assert = true; -void taichi_assert(RuntimeContext *context, i32 test, const char *msg); -void taichi_assert_runtime(LLVMRuntime *runtime, i32 test, const char *msg); -#define TI_ASSERT_INFO(x, msg) taichi_assert(context, (int)(x), msg) +void taichi_assert(RuntimeContext *context, u1 test, const char *msg); +void taichi_assert_runtime(LLVMRuntime *runtime, u1 test, const char *msg); +#define TI_ASSERT_INFO(x, msg) taichi_assert(context, (u1)(x), msg) #define TI_ASSERT(x) TI_ASSERT_INFO(x, #x) void ___stubs___() { @@ -753,12 +753,12 @@ RUNTIME_STRUCT_FIELD(ListManager, num_elements); RUNTIME_STRUCT_FIELD(ListManager, max_num_elements_per_chunk); RUNTIME_STRUCT_FIELD(ListManager, element_size); -void taichi_assert(RuntimeContext *context, i32 test, const char *msg) { +void taichi_assert(RuntimeContext *context, u1 test, const char *msg) { taichi_assert_runtime(context->runtime, test, msg); } void taichi_assert_format(LLVMRuntime *runtime, - i32 test, + u1 test, const char *format, int num_arguments, uint64 *arguments) { @@ -808,7 +808,7 @@ void taichi_assert_format(LLVMRuntime *runtime, #endif } -void taichi_assert_runtime(LLVMRuntime *runtime, i32 test, const char *msg) { +void taichi_assert_runtime(LLVMRuntime *runtime, u1 test, const char *msg) { taichi_assert_format(runtime, test, msg, 0, nullptr); } diff --git a/tests/python/test_pow.py b/tests/python/test_pow.py index 86fd59575827c..2679520134310 100644 --- a/tests/python/test_pow.py +++ b/tests/python/test_pow.py @@ -62,25 +62,23 @@ def foo(x: dt, y: ti.template()): foo(10, -10) -# FIXME(zhantong): Uncomment this test after bool assertion is finished. -# @test_utils.test( -# debug=True, -# advanced_optimization=False, -# exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], -# ) -# def test_ipow_negative_exp_i32(): -# _ipow_negative_exp(ti.i32) - - -# FIXME(zhantong): Uncomment this test after bool assertion is finished. -# @test_utils.test( -# debug=True, -# advanced_optimization=False, -# require=ti.extension.data64, -# exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], -# ) -# def test_ipow_negative_exp_i64(): -# _ipow_negative_exp(ti.i64) +@test_utils.test( + debug=True, + advanced_optimization=False, + exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], +) +def test_ipow_negative_exp_i32(): + _ipow_negative_exp(ti.i32) + + +@test_utils.test( + debug=True, + advanced_optimization=False, + require=ti.extension.data64, + exclude=[ti.vulkan, ti.metal, ti.opengl, ti.gles], +) +def test_ipow_negative_exp_i64(): + _ipow_negative_exp(ti.i64) def _test_pow_int_base_int_exp(dt_base, dt_exp): From 25cc2c8539917062cef852364fa329e515175b00 Mon Sep 17 00:00:00 2001 From: listerily Date: Mon, 15 May 2023 17:44:18 +0800 Subject: [PATCH 17/18] [llvm] Simplified and add support for type u1 in logical not operation ghstack-source-id: 61e101a0e14c73d6027982c47f521bf3a8363fdf Pull Request resolved: https://github.com/taichi-dev/taichi/pull/8005 --- taichi/codegen/amdgpu/codegen_amdgpu.cpp | 8 +------- taichi/codegen/cuda/codegen_cuda.cpp | 6 ------ taichi/runtime/llvm/runtime_module/runtime.cpp | 4 ---- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/taichi/codegen/amdgpu/codegen_amdgpu.cpp b/taichi/codegen/amdgpu/codegen_amdgpu.cpp index 9d4f4ac10b39f..5b2431ef54eb5 100644 --- a/taichi/codegen/amdgpu/codegen_amdgpu.cpp +++ b/taichi/codegen/amdgpu/codegen_amdgpu.cpp @@ -65,13 +65,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM { TI_NOT_IMPLEMENTED \ } \ } - if (op == UnaryOpType::logic_not) { - if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) { - llvm_val[stmt] = call("logic_not_i32", input); - } else { - TI_NOT_IMPLEMENTED - } - } else if (op == UnaryOpType::abs) { + if (op == UnaryOpType::abs) { if (input_taichi_type->is_primitive(PrimitiveTypeID::f16)) { llvm_val[stmt] = call("__ocml_fasb_f16", input); } else if (input_taichi_type->is_primitive(PrimitiveTypeID::f32)) { diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp index 46bbf331e4c5d..f0ebd49aafbfd 100644 --- a/taichi/codegen/cuda/codegen_cuda.cpp +++ b/taichi/codegen/cuda/codegen_cuda.cpp @@ -254,12 +254,6 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM { } else { TI_NOT_IMPLEMENTED } - } else if (op == UnaryOpType::logic_not) { - if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) { - llvm_val[stmt] = call("logic_not_i32", input); - } else { - TI_NOT_IMPLEMENTED - } } else if (op == UnaryOpType::frexp) { auto stype = tlctx->get_data_type(stmt->ret_type.ptr_removed()); auto res = builder->CreateAlloca(stype); diff --git a/taichi/runtime/llvm/runtime_module/runtime.cpp b/taichi/runtime/llvm/runtime_module/runtime.cpp index 460beb145e7e7..53547922a2e85 100644 --- a/taichi/runtime/llvm/runtime_module/runtime.cpp +++ b/taichi/runtime/llvm/runtime_module/runtime.cpp @@ -229,10 +229,6 @@ i64 max_i64(i64 a, i64 b) { return a > b ? a : b; } -int32 logic_not_i32(int32 a) { - return !a; -} - float32 sgn_f32(float32 a) { float32 b; if (a > 0) From da39780a879d179de967e1fb9d42fb026d43f042 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Tue, 16 May 2023 11:40:18 +0800 Subject: [PATCH 18/18] [Lang] Support allocate with cuda memory pool and reduce preallocation size accordingly (#7929) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: # ### Brief Summary ### 🤖 Generated by Copilot at c85f600 This pull request enhances the memory management of Taichi on CUDA devices by using `malloc_async` and `mem_free_async` functions and adjusting the preallocation size based on the memory pool feature. This affects the files `cuda_device.cpp` and `llvm_runtime_executor.cpp`. ### Walkthrough ### 🤖 Generated by Copilot at c85f600 * Add support for CUDA memory pool feature to improve performance and memory usage on CUDA devices - Check if CUDA context supports memory pool and use `malloc_async` and `mem_free_async` functions for allocation and deallocation without caching ([link](https://github.com/taichi-dev/taichi/pull/7929/files?diff=unified&w=0#diff-7919f5d7e33aafc72f27ed93febc58a0ac77c220ae718bf78ef134dad3790654L55-R61), [link](https://github.com/taichi-dev/taichi/pull/7929/files?diff=unified&w=0#diff-7919f5d7e33aafc72f27ed93febc58a0ac77c220ae718bf78ef134dad3790654L103-R113)) - Reduce preallocation size by half for runtime objects and sparse data structures on CUDA devices if memory pool is supported ([link](https://github.com/taichi-dev/taichi/pull/7929/files?diff=unified&w=0#diff-b9155792159f392bd8bacd44cb1819be5239b022d707499fc364c0f93dd8c5e5R574-R579)) --- c_api/src/taichi_llvm_impl.cpp | 8 +- taichi/rhi/amdgpu/amdgpu_device.cpp | 2 +- taichi/rhi/cuda/cuda_device.cpp | 15 +- taichi/rhi/cuda/cuda_device.h | 1 + taichi/rhi/llvm/llvm_device.h | 1 + taichi/runtime/llvm/llvm_runtime_executor.cpp | 134 ++++++++++++------ taichi/runtime/llvm/llvm_runtime_executor.h | 14 +- 7 files changed, 115 insertions(+), 60 deletions(-) diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index a8b2fb8d8c6d2..7e89211031a34 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -54,11 +54,11 @@ TiMemory LlvmRuntime::allocate_memory( const taichi::lang::Device::AllocParams ¶ms) { taichi::lang::LLVMRuntime *llvm_runtime = executor_->get_llvm_runtime(); taichi::lang::LlvmDevice *llvm_device = executor_->llvm_device(); - taichi::lang::DeviceAllocation devalloc = - llvm_device->allocate_memory_runtime({params, - executor_->get_runtime_jit_module(), - llvm_runtime, result_buffer}); + llvm_device->allocate_memory_runtime( + {params, executor_->get_runtime_jit_module(), llvm_runtime, + result_buffer, executor_->use_device_memory_pool()}); + return devalloc2devmem(*this, devalloc); } diff --git a/taichi/rhi/amdgpu/amdgpu_device.cpp b/taichi/rhi/amdgpu/amdgpu_device.cpp index ee86b03665dc3..ac786a4cb0ffb 100644 --- a/taichi/rhi/amdgpu/amdgpu_device.cpp +++ b/taichi/rhi/amdgpu/amdgpu_device.cpp @@ -106,8 +106,8 @@ void AmdgpuDevice::dealloc_memory(DeviceAllocation handle) { false); } else if (!info.use_preallocated) { DeviceMemoryPool::get_instance().release(info.size, info.ptr); - info.ptr = nullptr; } + info.ptr = nullptr; } RhiResult AmdgpuDevice::map(DeviceAllocation alloc, void **mapped_ptr) { diff --git a/taichi/rhi/cuda/cuda_device.cpp b/taichi/rhi/cuda/cuda_device.cpp index fe5d296071672..25d24ac8e8e5a 100644 --- a/taichi/rhi/cuda/cuda_device.cpp +++ b/taichi/rhi/cuda/cuda_device.cpp @@ -53,17 +53,17 @@ DeviceAllocation CudaDevice::allocate_memory_runtime( info.size = taichi::iroundup(params.size, taichi_page_size); if (info.size == 0) { info.ptr = nullptr; + } else if (params.use_memory_pool) { + CUDADriver::get_instance().malloc_async((void **)&info.ptr, info.size, + nullptr); } else { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); - - TI_ASSERT(info.ptr != nullptr); - - CUDADriver::get_instance().memset((void *)info.ptr, 0, info.size); } info.is_imported = false; info.use_cached = true; info.use_preallocated = true; + info.use_memory_pool = params.use_memory_pool; DeviceAllocation alloc; alloc.alloc_id = allocations_.size(); @@ -92,6 +92,7 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) { validate_device_alloc(handle); AllocInfo &info = allocations_[handle.alloc_id]; + if (info.size == 0) { return; } @@ -99,14 +100,16 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) { TI_ERROR("the DeviceAllocation is already deallocated"); } TI_ASSERT(!info.is_imported); - if (info.use_cached) { + if (info.use_memory_pool) { + CUDADriver::get_instance().mem_free_async(info.ptr, nullptr); + } else if (info.use_cached) { DeviceMemoryPool::get_instance().release(info.size, (uint64_t *)info.ptr, false); } else if (!info.use_preallocated) { auto &mem_pool = DeviceMemoryPool::get_instance(); mem_pool.release(info.size, info.ptr, true /*release_raw*/); - info.ptr = nullptr; } + info.ptr = nullptr; } RhiResult CudaDevice::upload_data(DevicePtr *device_ptr, diff --git a/taichi/rhi/cuda/cuda_device.h b/taichi/rhi/cuda/cuda_device.h index 0e06174552331..3a94209084a13 100644 --- a/taichi/rhi/cuda/cuda_device.h +++ b/taichi/rhi/cuda/cuda_device.h @@ -77,6 +77,7 @@ class CudaDevice : public LlvmDevice { * */ bool use_preallocated{true}; bool use_cached{false}; + bool use_memory_pool{false}; void *mapped{nullptr}; }; diff --git a/taichi/rhi/llvm/llvm_device.h b/taichi/rhi/llvm/llvm_device.h index cbefaaeb60327..4279ba1765a7a 100644 --- a/taichi/rhi/llvm/llvm_device.h +++ b/taichi/rhi/llvm/llvm_device.h @@ -13,6 +13,7 @@ class LlvmDevice : public Device { JITModule *runtime_jit{nullptr}; LLVMRuntime *runtime{nullptr}; uint64 *result_buffer{nullptr}; + bool use_memory_pool{false}; }; Arch arch() const override { diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index 6c636f833f299..3adada586f5df 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -46,6 +46,7 @@ LlvmRuntimeExecutor::LlvmRuntimeExecutor(CompileConfig &config, config.arch = host_arch(); } else { // CUDA runtime created successfully + use_device_memory_pool_ = CUDAContext::get_instance().supports_mem_pool(); } #else TI_WARN("Taichi is not compiled with CUDA."); @@ -398,6 +399,20 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes( const int tree_id = field_cache_data.tree_id; const int root_id = field_cache_data.root_id; + bool all_dense = config_.demote_dense_struct_fors; + for (size_t i = 0; i < snode_metas.size(); i++) { + if (snode_metas[i].type != SNodeType::dense && + snode_metas[i].type != SNodeType::place && + snode_metas[i].type != SNodeType::root) { + all_dense = false; + break; + } + } + + if (config_.arch == Arch::cuda && use_device_memory_pool() && !all_dense) { + preallocate_runtime_memory(); + } + TI_TRACE("Allocating data structure of size {} bytes", root_size); std::size_t rounded_size = taichi::iroundup(root_size, taichi_page_size); @@ -424,16 +439,6 @@ void LlvmRuntimeExecutor::initialize_llvm_runtime_snodes( snode_tree_allocs_[tree_id] = alloc; - bool all_dense = config_.demote_dense_struct_fors; - for (size_t i = 0; i < snode_metas.size(); i++) { - if (snode_metas[i].type != SNodeType::dense && - snode_metas[i].type != SNodeType::place && - snode_metas[i].type != SNodeType::root) { - all_dense = false; - break; - } - } - runtime_jit->call( "runtime_initialize_snodes", llvm_runtime_, root_size, root_id, (int)snode_metas.size(), tree_id, rounded_size, root_buffer, all_dense); @@ -471,16 +476,25 @@ LlvmDevice *LlvmRuntimeExecutor::llvm_device() { DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray( std::size_t alloc_size, uint64 *result_buffer) { - return llvm_device()->allocate_memory_runtime( + auto devalloc = llvm_device()->allocate_memory_runtime( {{alloc_size, /*host_write=*/false, /*host_read=*/false, /*export_sharing=*/false, AllocUsage::Storage}, get_runtime_jit_module(), get_llvm_runtime(), - result_buffer}); + result_buffer, + use_device_memory_pool()}); + + TI_ASSERT(allocated_runtime_memory_allocs_.find(devalloc.alloc_id) == + allocated_runtime_memory_allocs_.end()); + allocated_runtime_memory_allocs_[devalloc.alloc_id] = devalloc; + return devalloc; } void LlvmRuntimeExecutor::deallocate_memory_ndarray(DeviceAllocation handle) { + TI_ASSERT(allocated_runtime_memory_allocs_.find(handle.alloc_id) != + allocated_runtime_memory_allocs_.end()); llvm_device()->dealloc_memory(handle); + allocated_runtime_memory_allocs_.erase(handle.alloc_id); } void LlvmRuntimeExecutor::fill_ndarray(const DeviceAllocation &alloc, @@ -534,13 +548,32 @@ uint64_t *LlvmRuntimeExecutor::get_ndarray_alloc_info_ptr( void LlvmRuntimeExecutor::finalize() { profiler_ = nullptr; - for (auto &preallocated_device_buffer_alloc : - preallocated_device_buffer_allocs_) { - if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) { - llvm_device()->dealloc_memory(preallocated_device_buffer_alloc); - llvm_device()->clear(); - DeviceMemoryPool::get_instance().reset(); + if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) { + preallocated_runtime_objects_allocs_.reset(); + preallocated_runtime_memory_allocs_.reset(); + + // Reset runtime memory + auto allocated_runtime_memory_allocs_copy = + allocated_runtime_memory_allocs_; + for (auto &iter : allocated_runtime_memory_allocs_copy) { + // The runtime allocation may have already been freed upon explicit + // Ndarray/Field destruction Check if the allocation still alive + void *ptr = llvm_device()->get_memory_addr(iter.second); + if (ptr == nullptr) + continue; + + deallocate_memory_ndarray(iter.second); } + allocated_runtime_memory_allocs_.clear(); + + // Reset device + llvm_device()->clear(); + + // Reset memory pool + DeviceMemoryPool::get_instance().reset(); + + // Release unused memory from cuda memory pool + synchronize(); } finalized_ = true; } @@ -551,7 +584,9 @@ LlvmRuntimeExecutor::~LlvmRuntimeExecutor() { } } -void *LlvmRuntimeExecutor::preallocate_memory(std::size_t prealloc_size) { +void *LlvmRuntimeExecutor::preallocate_memory( + std::size_t prealloc_size, + DeviceAllocationUnique &devalloc) { DeviceAllocation preallocated_device_buffer_alloc; Device::AllocParams preallocated_device_buffer_alloc_params; @@ -563,11 +598,38 @@ void *LlvmRuntimeExecutor::preallocate_memory(std::size_t prealloc_size) { void *preallocated_device_buffer = llvm_device()->get_memory_addr(preallocated_device_buffer_alloc); - preallocated_device_buffer_allocs_.emplace_back( + devalloc = std::make_unique( std::move(preallocated_device_buffer_alloc)); return preallocated_device_buffer; } +void LlvmRuntimeExecutor::preallocate_runtime_memory() { + if (preallocated_runtime_memory_allocs_ != nullptr) + return; + + std::size_t total_prealloc_size = 0; + const auto total_mem = llvm_device()->get_total_memory(); + if (config_.device_memory_fraction == 0) { + TI_ASSERT(config_.device_memory_GB > 0); + total_prealloc_size = std::size_t(config_.device_memory_GB * (1UL << 30)); + } else { + total_prealloc_size = + std::size_t(config_.device_memory_fraction * total_mem); + } + TI_ASSERT(total_prealloc_size <= total_mem); + + void *runtime_memory_prealloc_buffer = preallocate_memory( + total_prealloc_size, preallocated_runtime_memory_allocs_); + + TI_TRACE("Allocating device memory {:.2f} MB", + 1.0 * total_prealloc_size / (1UL << 20)); + + auto *const runtime_jit = get_runtime_jit_module(); + runtime_jit->call( + "runtime_initialize_memory", llvm_runtime_, total_prealloc_size, + runtime_memory_prealloc_buffer); +} + void LlvmRuntimeExecutor::materialize_runtime(KernelProfilerBase *profiler, uint64 **result_buffer_ptr) { // The result buffer allocated here is only used for the launches of @@ -576,14 +638,14 @@ void LlvmRuntimeExecutor::materialize_runtime(KernelProfilerBase *profiler, // CUDA and AMDGPU backends. // | ==================preallocated device buffer ========================== | // |<- reserved for return ->|<---- usable for allocators on the device ---->| - std::size_t runtime_objects_prealloc_size = 0; void *runtime_objects_prealloc_buffer = nullptr; if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) { #if defined(TI_WITH_CUDA) || defined(TI_WITH_AMDGPU) + runtime_objects_prealloc_size = 60 * (1UL << 20); // 50 MB - runtime_objects_prealloc_buffer = - preallocate_memory(runtime_objects_prealloc_size); + runtime_objects_prealloc_buffer = preallocate_memory( + runtime_objects_prealloc_size, preallocated_runtime_objects_allocs_); TI_TRACE("Allocating device memory {:.2f} MB", 1.0 * runtime_objects_prealloc_size / (1UL << 20)); @@ -639,31 +701,9 @@ void LlvmRuntimeExecutor::materialize_runtime(KernelProfilerBase *profiler, // Preallocate for runtime memory and update to LLVMRuntime if (config_.arch == Arch::cuda || config_.arch == Arch::amdgpu) { - std::size_t total_prealloc_size = 0; - const auto total_mem = llvm_device()->get_total_memory(); - if (config_.device_memory_fraction == 0) { - TI_ASSERT(config_.device_memory_GB > 0); - total_prealloc_size = std::size_t(config_.device_memory_GB * (1UL << 30)); - } else { - total_prealloc_size = - std::size_t(config_.device_memory_fraction * total_mem); + if (!use_device_memory_pool()) { + preallocate_runtime_memory(); } - TI_ASSERT(total_prealloc_size <= total_mem); - - auto runtime_memory_prealloc_size = - total_prealloc_size > runtime_objects_prealloc_size - ? total_prealloc_size - runtime_objects_prealloc_size - : 0; - - void *runtime_memory_prealloc_buffer = - preallocate_memory(runtime_memory_prealloc_size); - - TI_TRACE("Allocating device memory {:.2f} MB", - 1.0 * runtime_memory_prealloc_size / (1UL << 20)); - - runtime_jit->call( - "runtime_initialize_memory", llvm_runtime_, - runtime_memory_prealloc_size, runtime_memory_prealloc_buffer); } if (config_.arch == Arch::cuda) { diff --git a/taichi/runtime/llvm/llvm_runtime_executor.h b/taichi/runtime/llvm/llvm_runtime_executor.h index 6e487419a28a5..871e394ad8b50 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.h +++ b/taichi/runtime/llvm/llvm_runtime_executor.h @@ -75,6 +75,10 @@ class LlvmRuntimeExecutor { void synchronize(); + bool use_device_memory_pool() { + return use_device_memory_pool_; + } + private: /* ----------------------- */ /* ------ Allocation ----- */ @@ -96,7 +100,9 @@ class LlvmRuntimeExecutor { std::size_t size, uint32_t data); - void *preallocate_memory(std::size_t prealloc_size); + void *preallocate_memory(std::size_t prealloc_size, + DeviceAllocationUnique &devalloc); + void preallocate_runtime_memory(); /* ------------------------- */ /* ---- Runtime Helpers ---- */ @@ -144,12 +150,16 @@ class LlvmRuntimeExecutor { std::unique_ptr snode_tree_buffer_manager_{nullptr}; std::unordered_map snode_tree_allocs_; - std::vector preallocated_device_buffer_allocs_; + DeviceAllocationUnique preallocated_runtime_objects_allocs_ = nullptr; + DeviceAllocationUnique preallocated_runtime_memory_allocs_ = nullptr; + std::unordered_map + allocated_runtime_memory_allocs_; // good buddy friend LlvmProgramImpl; friend SNodeTreeBufferManager; + bool use_device_memory_pool_ = false; bool finalized_{false}; KernelProfilerBase *profiler_ = nullptr; };